From cc008f92397e1f02251098081f402a82528b118a Mon Sep 17 00:00:00 2001
From: Eugene <eugene@n8n.io>
Date: Tue, 7 Jan 2025 16:03:16 +0100
Subject: [PATCH] feat: Save the number of successful, failed and total
 executions for Test Run (no-changelog) (#12131)

---
 .../cli/src/databases/entities/test-run.ee.ts |  19 ++
 .../1736172058779-AddStatsColumnsToTestRun.ts |  31 +++
 .../src/databases/migrations/mysqldb/index.ts |   2 +
 .../databases/migrations/postgresdb/index.ts  |   2 +
 .../src/databases/migrations/sqlite/index.ts  |   2 +
 .../repositories/test-run.repository.ee.ts    |  18 +-
 .../__tests__/test-runner.service.ee.test.ts  | 199 +++++++++++++++++-
 .../test-runner/test-runner.service.ee.ts     |  89 ++++----
 .../evaluation/test-runs.api.test.ts          |   2 +-
 9 files changed, 323 insertions(+), 41 deletions(-)
 create mode 100644 packages/cli/src/databases/migrations/common/1736172058779-AddStatsColumnsToTestRun.ts

diff --git a/packages/cli/src/databases/entities/test-run.ee.ts b/packages/cli/src/databases/entities/test-run.ee.ts
index ab5f041f11..39d8e16ddd 100644
--- a/packages/cli/src/databases/entities/test-run.ee.ts
+++ b/packages/cli/src/databases/entities/test-run.ee.ts
@@ -35,4 +35,23 @@ export class TestRun extends WithTimestampsAndStringId {
 
 	@Column(jsonColumnType, { nullable: true })
 	metrics: AggregatedTestRunMetrics;
+
+	/**
+	 * Total number of the test cases, matching the filter condition of the test definition (specified annotationTag)
+	 */
+	@Column('integer', { nullable: true })
+	totalCases: number;
+
+	/**
+	 * Number of test cases that passed (evaluation workflow was executed successfully)
+	 */
+	@Column('integer', { nullable: true })
+	passedCases: number;
+
+	/**
+	 * Number of failed test cases
+	 * (any unexpected exception happened during the execution or evaluation workflow ended with an error)
+	 */
+	@Column('integer', { nullable: true })
+	failedCases: number;
 }
diff --git a/packages/cli/src/databases/migrations/common/1736172058779-AddStatsColumnsToTestRun.ts b/packages/cli/src/databases/migrations/common/1736172058779-AddStatsColumnsToTestRun.ts
new file mode 100644
index 0000000000..8d6a5cdc20
--- /dev/null
+++ b/packages/cli/src/databases/migrations/common/1736172058779-AddStatsColumnsToTestRun.ts
@@ -0,0 +1,31 @@
+import type { MigrationContext, ReversibleMigration } from '@/databases/types';
+
+const columns = ['totalCases', 'passedCases', 'failedCases'] as const;
+
+export class AddStatsColumnsToTestRun1736172058779 implements ReversibleMigration {
+	async up({ escape, runQuery }: MigrationContext) {
+		const tableName = escape.tableName('test_run');
+		const columnNames = columns.map((name) => escape.columnName(name));
+
+		// Values can be NULL only if the test run is new, otherwise they must be non-negative integers.
+		// Test run might be cancelled or interrupted by unexpected error at any moment, so values can be either NULL or non-negative integers.
+		for (const name of columnNames) {
+			await runQuery(`ALTER TABLE ${tableName} ADD COLUMN ${name} INT CHECK(
+				CASE
+					WHEN status = 'new' THEN ${name} IS NULL
+					WHEN status in ('cancelled', 'error') THEN ${name} IS NULL OR ${name} >= 0
+					ELSE ${name} >= 0
+				END
+			)`);
+		}
+	}
+
+	async down({ escape, runQuery }: MigrationContext) {
+		const tableName = escape.tableName('test_run');
+		const columnNames = columns.map((name) => escape.columnName(name));
+
+		for (const name of columnNames) {
+			await runQuery(`ALTER TABLE ${tableName} DROP COLUMN ${name}`);
+		}
+	}
+}
diff --git a/packages/cli/src/databases/migrations/mysqldb/index.ts b/packages/cli/src/databases/migrations/mysqldb/index.ts
index 89df273472..b76409c0c1 100644
--- a/packages/cli/src/databases/migrations/mysqldb/index.ts
+++ b/packages/cli/src/databases/migrations/mysqldb/index.ts
@@ -76,6 +76,7 @@ import { CreateTestMetricTable1732271325258 } from '../common/1732271325258-Crea
 import { CreateTestRun1732549866705 } from '../common/1732549866705-CreateTestRunTable';
 import { AddMockedNodesColumnToTestDefinition1733133775640 } from '../common/1733133775640-AddMockedNodesColumnToTestDefinition';
 import { AddManagedColumnToCredentialsTable1734479635324 } from '../common/1734479635324-AddManagedColumnToCredentialsTable';
+import { AddStatsColumnsToTestRun1736172058779 } from '../common/1736172058779-AddStatsColumnsToTestRun';
 
 export const mysqlMigrations: Migration[] = [
 	InitialMigration1588157391238,
@@ -154,4 +155,5 @@ export const mysqlMigrations: Migration[] = [
 	AddMockedNodesColumnToTestDefinition1733133775640,
 	AddManagedColumnToCredentialsTable1734479635324,
 	AddProjectIcons1729607673469,
+	AddStatsColumnsToTestRun1736172058779,
 ];
diff --git a/packages/cli/src/databases/migrations/postgresdb/index.ts b/packages/cli/src/databases/migrations/postgresdb/index.ts
index d5d72282f4..7cf90bde5b 100644
--- a/packages/cli/src/databases/migrations/postgresdb/index.ts
+++ b/packages/cli/src/databases/migrations/postgresdb/index.ts
@@ -76,6 +76,7 @@ import { CreateTestMetricTable1732271325258 } from '../common/1732271325258-Crea
 import { CreateTestRun1732549866705 } from '../common/1732549866705-CreateTestRunTable';
 import { AddMockedNodesColumnToTestDefinition1733133775640 } from '../common/1733133775640-AddMockedNodesColumnToTestDefinition';
 import { AddManagedColumnToCredentialsTable1734479635324 } from '../common/1734479635324-AddManagedColumnToCredentialsTable';
+import { AddStatsColumnsToTestRun1736172058779 } from '../common/1736172058779-AddStatsColumnsToTestRun';
 
 export const postgresMigrations: Migration[] = [
 	InitialMigration1587669153312,
@@ -154,4 +155,5 @@ export const postgresMigrations: Migration[] = [
 	AddMockedNodesColumnToTestDefinition1733133775640,
 	AddManagedColumnToCredentialsTable1734479635324,
 	AddProjectIcons1729607673469,
+	AddStatsColumnsToTestRun1736172058779,
 ];
diff --git a/packages/cli/src/databases/migrations/sqlite/index.ts b/packages/cli/src/databases/migrations/sqlite/index.ts
index 7fec59baf2..363a6e47c3 100644
--- a/packages/cli/src/databases/migrations/sqlite/index.ts
+++ b/packages/cli/src/databases/migrations/sqlite/index.ts
@@ -73,6 +73,7 @@ import { CreateTestMetricTable1732271325258 } from '../common/1732271325258-Crea
 import { CreateTestRun1732549866705 } from '../common/1732549866705-CreateTestRunTable';
 import { AddMockedNodesColumnToTestDefinition1733133775640 } from '../common/1733133775640-AddMockedNodesColumnToTestDefinition';
 import { AddManagedColumnToCredentialsTable1734479635324 } from '../common/1734479635324-AddManagedColumnToCredentialsTable';
+import { AddStatsColumnsToTestRun1736172058779 } from '../common/1736172058779-AddStatsColumnsToTestRun';
 
 const sqliteMigrations: Migration[] = [
 	InitialMigration1588102412422,
@@ -148,6 +149,7 @@ const sqliteMigrations: Migration[] = [
 	AddMockedNodesColumnToTestDefinition1733133775640,
 	AddManagedColumnToCredentialsTable1734479635324,
 	AddProjectIcons1729607673469,
+	AddStatsColumnsToTestRun1736172058779,
 ];
 
 export { sqliteMigrations };
diff --git a/packages/cli/src/databases/repositories/test-run.repository.ee.ts b/packages/cli/src/databases/repositories/test-run.repository.ee.ts
index f0f235c551..037844734f 100644
--- a/packages/cli/src/databases/repositories/test-run.repository.ee.ts
+++ b/packages/cli/src/databases/repositories/test-run.repository.ee.ts
@@ -21,14 +21,28 @@ export class TestRunRepository extends Repository<TestRun> {
 		return await this.save(testRun);
 	}
 
-	async markAsRunning(id: string) {
-		return await this.update(id, { status: 'running', runAt: new Date() });
+	async markAsRunning(id: string, totalCases: number) {
+		return await this.update(id, {
+			status: 'running',
+			runAt: new Date(),
+			totalCases,
+			passedCases: 0,
+			failedCases: 0,
+		});
 	}
 
 	async markAsCompleted(id: string, metrics: AggregatedTestRunMetrics) {
 		return await this.update(id, { status: 'completed', completedAt: new Date(), metrics });
 	}
 
+	async incrementPassed(id: string) {
+		return await this.increment({ id }, 'passedCases', 1);
+	}
+
+	async incrementFailed(id: string) {
+		return await this.increment({ id }, 'failedCases', 1);
+	}
+
 	async getMany(testDefinitionId: string, options: ListQuery.Options) {
 		const findManyOptions: FindManyOptions<TestRun> = {
 			where: { testDefinition: { id: testDefinitionId } },
diff --git a/packages/cli/src/evaluation.ee/test-runner/__tests__/test-runner.service.ee.test.ts b/packages/cli/src/evaluation.ee/test-runner/__tests__/test-runner.service.ee.test.ts
index 06b9653374..cc3c3bc33b 100644
--- a/packages/cli/src/evaluation.ee/test-runner/__tests__/test-runner.service.ee.test.ts
+++ b/packages/cli/src/evaluation.ee/test-runner/__tests__/test-runner.service.ee.test.ts
@@ -2,7 +2,8 @@ import type { SelectQueryBuilder } from '@n8n/typeorm';
 import { stringify } from 'flatted';
 import { readFileSync } from 'fs';
 import { mock, mockDeep } from 'jest-mock-extended';
-import type { GenericValue, IRun } from 'n8n-workflow';
+import type { ErrorReporter } from 'n8n-core';
+import type { ExecutionError, GenericValue, IRun } from 'n8n-workflow';
 import path from 'path';
 
 import type { ActiveExecutions } from '@/active-executions';
@@ -90,6 +91,16 @@ function mockExecutionData() {
 	});
 }
 
+function mockErrorExecutionData() {
+	return mock<IRun>({
+		data: {
+			resultData: {
+				error: mock<ExecutionError>(),
+			},
+		},
+	});
+}
+
 function mockEvaluationExecutionData(metrics: Record<string, GenericValue>) {
 	return mock<IRun>({
 		data: {
@@ -110,6 +121,9 @@ function mockEvaluationExecutionData(metrics: Record<string, GenericValue>) {
 						},
 					],
 				},
+				// error is an optional prop, but jest-mock-extended will mock it by default,
+				// which affects the code logic. So, we need to explicitly set it to undefined.
+				error: undefined,
 			},
 		},
 	});
@@ -156,6 +170,8 @@ describe('TestRunnerService', () => {
 		testRunRepository.createTestRun.mockClear();
 		testRunRepository.markAsRunning.mockClear();
 		testRunRepository.markAsCompleted.mockClear();
+		testRunRepository.incrementFailed.mockClear();
+		testRunRepository.incrementPassed.mockClear();
 	});
 
 	test('should create an instance of TestRunnerService', async () => {
@@ -167,6 +183,7 @@ describe('TestRunnerService', () => {
 			testRunRepository,
 			testMetricRepository,
 			mockNodeTypes,
+			mock<ErrorReporter>(),
 		);
 
 		expect(testRunnerService).toBeInstanceOf(TestRunnerService);
@@ -181,6 +198,7 @@ describe('TestRunnerService', () => {
 			testRunRepository,
 			testMetricRepository,
 			mockNodeTypes,
+			mock<ErrorReporter>(),
 		);
 
 		workflowRepository.findById.calledWith('workflow-under-test-id').mockResolvedValueOnce({
@@ -218,6 +236,7 @@ describe('TestRunnerService', () => {
 			testRunRepository,
 			testMetricRepository,
 			mockNodeTypes,
+			mock<ErrorReporter>(),
 		);
 
 		workflowRepository.findById.calledWith('workflow-under-test-id').mockResolvedValueOnce({
@@ -298,12 +317,185 @@ describe('TestRunnerService', () => {
 		// Check Test Run status was updated correctly
 		expect(testRunRepository.createTestRun).toHaveBeenCalledTimes(1);
 		expect(testRunRepository.markAsRunning).toHaveBeenCalledTimes(1);
-		expect(testRunRepository.markAsRunning).toHaveBeenCalledWith('test-run-id');
+		expect(testRunRepository.markAsRunning).toHaveBeenCalledWith('test-run-id', expect.any(Number));
 		expect(testRunRepository.markAsCompleted).toHaveBeenCalledTimes(1);
 		expect(testRunRepository.markAsCompleted).toHaveBeenCalledWith('test-run-id', {
 			metric1: 0.75,
 			metric2: 0,
 		});
+
+		expect(testRunRepository.incrementPassed).toHaveBeenCalledTimes(2);
+		expect(testRunRepository.incrementFailed).not.toHaveBeenCalled();
+	});
+
+	test('should properly count passed and failed executions', async () => {
+		const testRunnerService = new TestRunnerService(
+			workflowRepository,
+			workflowRunner,
+			executionRepository,
+			activeExecutions,
+			testRunRepository,
+			testMetricRepository,
+			mockNodeTypes,
+			mock<ErrorReporter>(),
+		);
+
+		workflowRepository.findById.calledWith('workflow-under-test-id').mockResolvedValueOnce({
+			id: 'workflow-under-test-id',
+			...wfUnderTestJson,
+		});
+
+		workflowRepository.findById.calledWith('evaluation-workflow-id').mockResolvedValueOnce({
+			id: 'evaluation-workflow-id',
+			...wfEvaluationJson,
+		});
+
+		workflowRunner.run.mockResolvedValueOnce('some-execution-id');
+		workflowRunner.run.mockResolvedValueOnce('some-execution-id-2');
+		workflowRunner.run.mockResolvedValueOnce('some-execution-id-3');
+		workflowRunner.run.mockResolvedValueOnce('some-execution-id-4');
+
+		// Mock executions of workflow under test
+		activeExecutions.getPostExecutePromise
+			.calledWith('some-execution-id')
+			.mockResolvedValue(mockExecutionData());
+
+		activeExecutions.getPostExecutePromise
+			.calledWith('some-execution-id-3')
+			.mockResolvedValue(mockExecutionData());
+
+		// Mock executions of evaluation workflow
+		activeExecutions.getPostExecutePromise
+			.calledWith('some-execution-id-2')
+			.mockResolvedValue(mockEvaluationExecutionData({ metric1: 1, metric2: 0 }));
+
+		activeExecutions.getPostExecutePromise
+			.calledWith('some-execution-id-4')
+			.mockRejectedValue(new Error('Some error'));
+
+		await testRunnerService.runTest(
+			mock<User>(),
+			mock<TestDefinition>({
+				workflowId: 'workflow-under-test-id',
+				evaluationWorkflowId: 'evaluation-workflow-id',
+				mockedNodes: [],
+			}),
+		);
+
+		expect(testRunRepository.incrementPassed).toHaveBeenCalledTimes(1);
+		expect(testRunRepository.incrementFailed).toHaveBeenCalledTimes(1);
+	});
+
+	test('should properly count failed test executions', async () => {
+		const testRunnerService = new TestRunnerService(
+			workflowRepository,
+			workflowRunner,
+			executionRepository,
+			activeExecutions,
+			testRunRepository,
+			testMetricRepository,
+			mockNodeTypes,
+			mock<ErrorReporter>(),
+		);
+
+		workflowRepository.findById.calledWith('workflow-under-test-id').mockResolvedValueOnce({
+			id: 'workflow-under-test-id',
+			...wfUnderTestJson,
+		});
+
+		workflowRepository.findById.calledWith('evaluation-workflow-id').mockResolvedValueOnce({
+			id: 'evaluation-workflow-id',
+			...wfEvaluationJson,
+		});
+
+		workflowRunner.run.mockResolvedValueOnce('some-execution-id');
+		workflowRunner.run.mockResolvedValueOnce('some-execution-id-2');
+		workflowRunner.run.mockResolvedValueOnce('some-execution-id-3');
+		workflowRunner.run.mockResolvedValueOnce('some-execution-id-4');
+
+		// Mock executions of workflow under test
+		activeExecutions.getPostExecutePromise
+			.calledWith('some-execution-id')
+			.mockResolvedValue(mockExecutionData());
+
+		activeExecutions.getPostExecutePromise
+			.calledWith('some-execution-id-3')
+			.mockResolvedValue(mockErrorExecutionData());
+
+		// Mock executions of evaluation workflow
+		activeExecutions.getPostExecutePromise
+			.calledWith('some-execution-id-2')
+			.mockResolvedValue(mockEvaluationExecutionData({ metric1: 1, metric2: 0 }));
+
+		await testRunnerService.runTest(
+			mock<User>(),
+			mock<TestDefinition>({
+				workflowId: 'workflow-under-test-id',
+				evaluationWorkflowId: 'evaluation-workflow-id',
+				mockedNodes: [],
+			}),
+		);
+
+		expect(testRunRepository.incrementPassed).toHaveBeenCalledTimes(1);
+		expect(testRunRepository.incrementFailed).toHaveBeenCalledTimes(1);
+	});
+
+	test('should properly count failed evaluations', async () => {
+		const testRunnerService = new TestRunnerService(
+			workflowRepository,
+			workflowRunner,
+			executionRepository,
+			activeExecutions,
+			testRunRepository,
+			testMetricRepository,
+			mockNodeTypes,
+			mock<ErrorReporter>(),
+		);
+
+		workflowRepository.findById.calledWith('workflow-under-test-id').mockResolvedValueOnce({
+			id: 'workflow-under-test-id',
+			...wfUnderTestJson,
+		});
+
+		workflowRepository.findById.calledWith('evaluation-workflow-id').mockResolvedValueOnce({
+			id: 'evaluation-workflow-id',
+			...wfEvaluationJson,
+		});
+
+		workflowRunner.run.mockResolvedValueOnce('some-execution-id');
+		workflowRunner.run.mockResolvedValueOnce('some-execution-id-2');
+		workflowRunner.run.mockResolvedValueOnce('some-execution-id-3');
+		workflowRunner.run.mockResolvedValueOnce('some-execution-id-4');
+
+		// Mock executions of workflow under test
+		activeExecutions.getPostExecutePromise
+			.calledWith('some-execution-id')
+			.mockResolvedValue(mockExecutionData());
+
+		activeExecutions.getPostExecutePromise
+			.calledWith('some-execution-id-3')
+			.mockResolvedValue(mockExecutionData());
+
+		// Mock executions of evaluation workflow
+		activeExecutions.getPostExecutePromise
+			.calledWith('some-execution-id-2')
+			.mockResolvedValue(mockEvaluationExecutionData({ metric1: 1, metric2: 0 }));
+
+		activeExecutions.getPostExecutePromise
+			.calledWith('some-execution-id-4')
+			.mockResolvedValue(mockErrorExecutionData());
+
+		await testRunnerService.runTest(
+			mock<User>(),
+			mock<TestDefinition>({
+				workflowId: 'workflow-under-test-id',
+				evaluationWorkflowId: 'evaluation-workflow-id',
+				mockedNodes: [],
+			}),
+		);
+
+		expect(testRunRepository.incrementPassed).toHaveBeenCalledTimes(1);
+		expect(testRunRepository.incrementFailed).toHaveBeenCalledTimes(1);
 	});
 
 	test('should specify correct start nodes when running workflow under test', async () => {
@@ -315,6 +507,7 @@ describe('TestRunnerService', () => {
 			testRunRepository,
 			testMetricRepository,
 			mockNodeTypes,
+			mock<ErrorReporter>(),
 		);
 
 		workflowRepository.findById.calledWith('workflow-under-test-id').mockResolvedValueOnce({
@@ -388,6 +581,7 @@ describe('TestRunnerService', () => {
 			testRunRepository,
 			testMetricRepository,
 			mockNodeTypes,
+			mock<ErrorReporter>(),
 		);
 
 		const startNodesData = (testRunnerService as any).getStartNodesData(
@@ -412,6 +606,7 @@ describe('TestRunnerService', () => {
 			testRunRepository,
 			testMetricRepository,
 			mockNodeTypes,
+			mock<ErrorReporter>(),
 		);
 
 		const startNodesData = (testRunnerService as any).getStartNodesData(
diff --git a/packages/cli/src/evaluation.ee/test-runner/test-runner.service.ee.ts b/packages/cli/src/evaluation.ee/test-runner/test-runner.service.ee.ts
index c1e6e33942..5a054a3527 100644
--- a/packages/cli/src/evaluation.ee/test-runner/test-runner.service.ee.ts
+++ b/packages/cli/src/evaluation.ee/test-runner/test-runner.service.ee.ts
@@ -1,5 +1,6 @@
 import { Service } from '@n8n/di';
 import { parse } from 'flatted';
+import { ErrorReporter } from 'n8n-core';
 import { NodeConnectionType, Workflow } from 'n8n-workflow';
 import type {
 	IDataObject,
@@ -45,6 +46,7 @@ export class TestRunnerService {
 		private readonly testRunRepository: TestRunRepository,
 		private readonly testMetricRepository: TestMetricRepository,
 		private readonly nodeTypes: NodeTypes,
+		private readonly errorReporter: ErrorReporter,
 	) {}
 
 	/**
@@ -230,51 +232,66 @@ export class TestRunnerService {
 
 		// 2. Run over all the test cases
 
-		await this.testRunRepository.markAsRunning(testRun.id);
+		await this.testRunRepository.markAsRunning(testRun.id, pastExecutions.length);
 
 		// Object to collect the results of the evaluation workflow executions
 		const metrics = new EvaluationMetrics(testMetricNames);
 
 		for (const { id: pastExecutionId } of pastExecutions) {
-			// Fetch past execution with data
-			const pastExecution = await this.executionRepository.findOne({
-				where: { id: pastExecutionId },
-				relations: ['executionData', 'metadata'],
-			});
-			assert(pastExecution, 'Execution not found');
+			try {
+				// Fetch past execution with data
+				const pastExecution = await this.executionRepository.findOne({
+					where: { id: pastExecutionId },
+					relations: ['executionData', 'metadata'],
+				});
+				assert(pastExecution, 'Execution not found');
 
-			const executionData = parse(pastExecution.executionData.data) as IRunExecutionData;
+				const executionData = parse(pastExecution.executionData.data) as IRunExecutionData;
 
-			// Run the test case and wait for it to finish
-			const testCaseExecution = await this.runTestCase(
-				workflow,
-				executionData,
-				pastExecution.executionData.workflowData,
-				test.mockedNodes,
-				user.id,
-			);
+				// Run the test case and wait for it to finish
+				const testCaseExecution = await this.runTestCase(
+					workflow,
+					executionData,
+					pastExecution.executionData.workflowData,
+					test.mockedNodes,
+					user.id,
+				);
 
-			// In case of a permission check issue, the test case execution will be undefined.
-			// Skip them and continue with the next test case
-			if (!testCaseExecution) {
-				continue;
+				// In case of a permission check issue, the test case execution will be undefined.
+				// Skip them, increment the failed count and continue with the next test case
+				if (!testCaseExecution) {
+					await this.testRunRepository.incrementFailed(testRun.id);
+					continue;
+				}
+
+				// Collect the results of the test case execution
+				const testCaseRunData = testCaseExecution.data.resultData.runData;
+
+				// Get the original runData from the test case execution data
+				const originalRunData = executionData.resultData.runData;
+
+				// Run the evaluation workflow with the original and new run data
+				const evalExecution = await this.runTestCaseEvaluation(
+					evaluationWorkflow,
+					originalRunData,
+					testCaseRunData,
+					testRun.id,
+				);
+				assert(evalExecution);
+
+				metrics.addResults(this.extractEvaluationResult(evalExecution));
+
+				if (evalExecution.data.resultData.error) {
+					await this.testRunRepository.incrementFailed(testRun.id);
+				} else {
+					await this.testRunRepository.incrementPassed(testRun.id);
+				}
+			} catch (e) {
+				// In case of an unexpected error, increment the failed count and continue with the next test case
+				await this.testRunRepository.incrementFailed(testRun.id);
+
+				this.errorReporter.error(e);
 			}
-
-			// Collect the results of the test case execution
-			const testCaseRunData = testCaseExecution.data.resultData.runData;
-
-			// Get the original runData from the test case execution data
-			const originalRunData = executionData.resultData.runData;
-
-			// Run the evaluation workflow with the original and new run data
-			const evalExecution = await this.runTestCaseEvaluation(
-				evaluationWorkflow,
-				originalRunData,
-				testCaseRunData,
-				testRun.id,
-			);
-			assert(evalExecution);
-			metrics.addResults(this.extractEvaluationResult(evalExecution));
 		}
 
 		const aggregatedMetrics = metrics.getAggregatedMetrics();
diff --git a/packages/cli/test/integration/evaluation/test-runs.api.test.ts b/packages/cli/test/integration/evaluation/test-runs.api.test.ts
index 8f01029ad2..3fcc321cc9 100644
--- a/packages/cli/test/integration/evaluation/test-runs.api.test.ts
+++ b/packages/cli/test/integration/evaluation/test-runs.api.test.ts
@@ -93,7 +93,7 @@ describe('GET /evaluation/test-definitions/:testDefinitionId/runs', () => {
 		const testRunRepository = Container.get(TestRunRepository);
 		const testRun1 = await testRunRepository.createTestRun(testDefinition.id);
 		// Mark as running just to make a slight delay between the runs
-		await testRunRepository.markAsRunning(testRun1.id);
+		await testRunRepository.markAsRunning(testRun1.id, 10);
 		const testRun2 = await testRunRepository.createTestRun(testDefinition.id);
 
 		// Fetch the first page