Merge branch 'n8n-io:master' into master

This commit is contained in:
Tristan Robert 2024-12-02 15:28:30 +01:00 committed by GitHub
commit d946f20c93
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 183 additions and 11 deletions

View file

@ -0,0 +1,72 @@
import { EvaluationMetrics } from '../evaluation-metrics.ee';
describe('EvaluationMetrics', () => {
test('should aggregate metrics correctly', () => {
const testMetricNames = new Set(['metric1', 'metric2']);
const metrics = new EvaluationMetrics(testMetricNames);
metrics.addResults({ metric1: 1, metric2: 0 });
metrics.addResults({ metric1: 0.5, metric2: 0.2 });
const aggregatedMetrics = metrics.getAggregatedMetrics();
expect(aggregatedMetrics).toEqual({ metric1: 0.75, metric2: 0.1 });
});
test('should aggregate only numbers', () => {
const testMetricNames = new Set(['metric1', 'metric2']);
const metrics = new EvaluationMetrics(testMetricNames);
metrics.addResults({ metric1: 1, metric2: 0 });
metrics.addResults({ metric1: '0.5', metric2: 0.2 });
metrics.addResults({ metric1: 'not a number', metric2: [1, 2, 3] });
const aggregatedUpMetrics = metrics.getAggregatedMetrics();
expect(aggregatedUpMetrics).toEqual({ metric1: 1, metric2: 0.1 });
});
test('should handle missing values', () => {
const testMetricNames = new Set(['metric1', 'metric2']);
const metrics = new EvaluationMetrics(testMetricNames);
metrics.addResults({ metric1: 1 });
metrics.addResults({ metric2: 0.2 });
const aggregatedMetrics = metrics.getAggregatedMetrics();
expect(aggregatedMetrics).toEqual({ metric1: 1, metric2: 0.2 });
});
test('should handle empty metrics', () => {
const testMetricNames = new Set(['metric1', 'metric2']);
const metrics = new EvaluationMetrics(testMetricNames);
const aggregatedMetrics = metrics.getAggregatedMetrics();
expect(aggregatedMetrics).toEqual({});
});
test('should handle empty testMetrics', () => {
const metrics = new EvaluationMetrics(new Set());
metrics.addResults({ metric1: 1, metric2: 0 });
metrics.addResults({ metric1: 0.5, metric2: 0.2 });
const aggregatedMetrics = metrics.getAggregatedMetrics();
expect(aggregatedMetrics).toEqual({});
});
test('should ignore non-relevant values', () => {
const testMetricNames = new Set(['metric1']);
const metrics = new EvaluationMetrics(testMetricNames);
metrics.addResults({ metric1: 1, notRelevant: 0 });
metrics.addResults({ metric1: 0.5, notRelevant2: { foo: 'bar' } });
const aggregatedMetrics = metrics.getAggregatedMetrics();
expect(aggregatedMetrics).toEqual({ metric1: 0.75 });
});
});

View file

@ -57,6 +57,12 @@
"name": "success", "name": "success",
"value": true, "value": true,
"type": "boolean" "type": "boolean"
},
{
"id": "877d1bf8-31a7-4571-9293-a6837b51d22b",
"name": "metric1",
"value": 0.1,
"type": "number"
} }
] ]
}, },

View file

@ -2,15 +2,17 @@ import type { SelectQueryBuilder } from '@n8n/typeorm';
import { stringify } from 'flatted'; import { stringify } from 'flatted';
import { readFileSync } from 'fs'; import { readFileSync } from 'fs';
import { mock, mockDeep } from 'jest-mock-extended'; import { mock, mockDeep } from 'jest-mock-extended';
import type { IRun } from 'n8n-workflow'; import type { GenericValue, IRun } from 'n8n-workflow';
import path from 'path'; import path from 'path';
import type { ActiveExecutions } from '@/active-executions'; import type { ActiveExecutions } from '@/active-executions';
import type { ExecutionEntity } from '@/databases/entities/execution-entity'; import type { ExecutionEntity } from '@/databases/entities/execution-entity';
import type { TestDefinition } from '@/databases/entities/test-definition.ee'; import type { TestDefinition } from '@/databases/entities/test-definition.ee';
import type { TestMetric } from '@/databases/entities/test-metric.ee';
import type { TestRun } from '@/databases/entities/test-run.ee'; import type { TestRun } from '@/databases/entities/test-run.ee';
import type { User } from '@/databases/entities/user'; import type { User } from '@/databases/entities/user';
import type { ExecutionRepository } from '@/databases/repositories/execution.repository'; import type { ExecutionRepository } from '@/databases/repositories/execution.repository';
import type { TestMetricRepository } from '@/databases/repositories/test-metric.repository.ee';
import type { TestRunRepository } from '@/databases/repositories/test-run.repository.ee'; import type { TestRunRepository } from '@/databases/repositories/test-run.repository.ee';
import type { WorkflowRepository } from '@/databases/repositories/workflow.repository'; import type { WorkflowRepository } from '@/databases/repositories/workflow.repository';
import type { WorkflowRunner } from '@/workflow-runner'; import type { WorkflowRunner } from '@/workflow-runner';
@ -58,12 +60,38 @@ function mockExecutionData() {
}); });
} }
function mockEvaluationExecutionData(metrics: Record<string, GenericValue>) {
return mock<IRun>({
data: {
resultData: {
lastNodeExecuted: 'lastNode',
runData: {
lastNode: [
{
data: {
main: [
[
{
json: metrics,
},
],
],
},
},
],
},
},
},
});
}
describe('TestRunnerService', () => { describe('TestRunnerService', () => {
const executionRepository = mock<ExecutionRepository>(); const executionRepository = mock<ExecutionRepository>();
const workflowRepository = mock<WorkflowRepository>(); const workflowRepository = mock<WorkflowRepository>();
const workflowRunner = mock<WorkflowRunner>(); const workflowRunner = mock<WorkflowRunner>();
const activeExecutions = mock<ActiveExecutions>(); const activeExecutions = mock<ActiveExecutions>();
const testRunRepository = mock<TestRunRepository>(); const testRunRepository = mock<TestRunRepository>();
const testMetricRepository = mock<TestMetricRepository>();
beforeEach(() => { beforeEach(() => {
const executionsQbMock = mockDeep<SelectQueryBuilder<ExecutionEntity>>({ const executionsQbMock = mockDeep<SelectQueryBuilder<ExecutionEntity>>({
@ -80,6 +108,11 @@ describe('TestRunnerService', () => {
.mockResolvedValueOnce(executionMocks[1]); .mockResolvedValueOnce(executionMocks[1]);
testRunRepository.createTestRun.mockResolvedValue(mock<TestRun>({ id: 'test-run-id' })); testRunRepository.createTestRun.mockResolvedValue(mock<TestRun>({ id: 'test-run-id' }));
testMetricRepository.find.mockResolvedValue([
mock<TestMetric>({ name: 'metric1' }),
mock<TestMetric>({ name: 'metric2' }),
]);
}); });
afterEach(() => { afterEach(() => {
@ -97,6 +130,7 @@ describe('TestRunnerService', () => {
executionRepository, executionRepository,
activeExecutions, activeExecutions,
testRunRepository, testRunRepository,
testMetricRepository,
); );
expect(testRunnerService).toBeInstanceOf(TestRunnerService); expect(testRunnerService).toBeInstanceOf(TestRunnerService);
@ -109,6 +143,7 @@ describe('TestRunnerService', () => {
executionRepository, executionRepository,
activeExecutions, activeExecutions,
testRunRepository, testRunRepository,
testMetricRepository,
); );
workflowRepository.findById.calledWith('workflow-under-test-id').mockResolvedValueOnce({ workflowRepository.findById.calledWith('workflow-under-test-id').mockResolvedValueOnce({
@ -143,6 +178,7 @@ describe('TestRunnerService', () => {
executionRepository, executionRepository,
activeExecutions, activeExecutions,
testRunRepository, testRunRepository,
testMetricRepository,
); );
workflowRepository.findById.calledWith('workflow-under-test-id').mockResolvedValueOnce({ workflowRepository.findById.calledWith('workflow-under-test-id').mockResolvedValueOnce({
@ -166,17 +202,17 @@ describe('TestRunnerService', () => {
.mockResolvedValue(mockExecutionData()); .mockResolvedValue(mockExecutionData());
activeExecutions.getPostExecutePromise activeExecutions.getPostExecutePromise
.calledWith('some-execution-id-2') .calledWith('some-execution-id-3')
.mockResolvedValue(mockExecutionData()); .mockResolvedValue(mockExecutionData());
// Mock executions of evaluation workflow // Mock executions of evaluation workflow
activeExecutions.getPostExecutePromise activeExecutions.getPostExecutePromise
.calledWith('some-execution-id-3') .calledWith('some-execution-id-2')
.mockResolvedValue(mockExecutionData()); .mockResolvedValue(mockEvaluationExecutionData({ metric1: 1, metric2: 0 }));
activeExecutions.getPostExecutePromise activeExecutions.getPostExecutePromise
.calledWith('some-execution-id-4') .calledWith('some-execution-id-4')
.mockResolvedValue(mockExecutionData()); .mockResolvedValue(mockEvaluationExecutionData({ metric1: 0.5 }));
await testRunnerService.runTest( await testRunnerService.runTest(
mock<User>(), mock<User>(),
@ -225,7 +261,8 @@ describe('TestRunnerService', () => {
expect(testRunRepository.markAsRunning).toHaveBeenCalledWith('test-run-id'); expect(testRunRepository.markAsRunning).toHaveBeenCalledWith('test-run-id');
expect(testRunRepository.markAsCompleted).toHaveBeenCalledTimes(1); expect(testRunRepository.markAsCompleted).toHaveBeenCalledTimes(1);
expect(testRunRepository.markAsCompleted).toHaveBeenCalledWith('test-run-id', { expect(testRunRepository.markAsCompleted).toHaveBeenCalledWith('test-run-id', {
success: false, metric1: 0.75,
metric2: 0,
}); });
}); });
}); });

View file

@ -0,0 +1,32 @@
import type { IDataObject } from 'n8n-workflow';
export class EvaluationMetrics {
private readonly rawMetricsByName = new Map<string, number[]>();
constructor(private readonly metricNames: Set<string>) {
for (const metricName of metricNames) {
this.rawMetricsByName.set(metricName, []);
}
}
addResults(result: IDataObject) {
for (const [metricName, metricValue] of Object.entries(result)) {
if (typeof metricValue === 'number' && this.metricNames.has(metricName)) {
this.rawMetricsByName.get(metricName)!.push(metricValue);
}
}
}
getAggregatedMetrics() {
const aggregatedMetrics: Record<string, number> = {};
for (const [metricName, metricValues] of this.rawMetricsByName.entries()) {
if (metricValues.length > 0) {
const metricSum = metricValues.reduce((acc, val) => acc + val, 0);
aggregatedMetrics[metricName] = metricSum / metricValues.length;
}
}
return aggregatedMetrics;
}
}

View file

@ -15,11 +15,13 @@ import type { TestDefinition } from '@/databases/entities/test-definition.ee';
import type { User } from '@/databases/entities/user'; import type { User } from '@/databases/entities/user';
import type { WorkflowEntity } from '@/databases/entities/workflow-entity'; import type { WorkflowEntity } from '@/databases/entities/workflow-entity';
import { ExecutionRepository } from '@/databases/repositories/execution.repository'; import { ExecutionRepository } from '@/databases/repositories/execution.repository';
import { TestMetricRepository } from '@/databases/repositories/test-metric.repository.ee';
import { TestRunRepository } from '@/databases/repositories/test-run.repository.ee'; import { TestRunRepository } from '@/databases/repositories/test-run.repository.ee';
import { WorkflowRepository } from '@/databases/repositories/workflow.repository'; import { WorkflowRepository } from '@/databases/repositories/workflow.repository';
import { getRunData } from '@/workflow-execute-additional-data'; import { getRunData } from '@/workflow-execute-additional-data';
import { WorkflowRunner } from '@/workflow-runner'; import { WorkflowRunner } from '@/workflow-runner';
import { EvaluationMetrics } from './evaluation-metrics.ee';
import { createPinData, getPastExecutionStartNode } from './utils.ee'; import { createPinData, getPastExecutionStartNode } from './utils.ee';
/** /**
@ -40,6 +42,7 @@ export class TestRunnerService {
private readonly executionRepository: ExecutionRepository, private readonly executionRepository: ExecutionRepository,
private readonly activeExecutions: ActiveExecutions, private readonly activeExecutions: ActiveExecutions,
private readonly testRunRepository: TestRunRepository, private readonly testRunRepository: TestRunRepository,
private readonly testMetricRepository: TestMetricRepository,
) {} ) {}
/** /**
@ -113,6 +116,11 @@ export class TestRunnerService {
return await executePromise; return await executePromise;
} }
/**
* Evaluation result is the first item in the output of the last node
* executed in the evaluation workflow. Defaults to an empty object
* in case the node doesn't produce any output items.
*/
private extractEvaluationResult(execution: IRun): IDataObject { private extractEvaluationResult(execution: IRun): IDataObject {
const lastNodeExecuted = execution.data.resultData.lastNodeExecuted; const lastNodeExecuted = execution.data.resultData.lastNodeExecuted;
assert(lastNodeExecuted, 'Could not find the last node executed in evaluation workflow'); assert(lastNodeExecuted, 'Could not find the last node executed in evaluation workflow');
@ -124,6 +132,21 @@ export class TestRunnerService {
return mainConnectionData?.[0]?.json ?? {}; return mainConnectionData?.[0]?.json ?? {};
} }
/**
* Get the metrics to collect from the evaluation workflow execution results.
*/
private async getTestMetricNames(testDefinitionId: string) {
const metrics = await this.testMetricRepository.find({
where: {
testDefinition: {
id: testDefinitionId,
},
},
});
return new Set(metrics.map((m) => m.name));
}
/** /**
* Creates a new test run for the given test definition. * Creates a new test run for the given test definition.
*/ */
@ -152,11 +175,15 @@ export class TestRunnerService {
.andWhere('execution.workflowId = :workflowId', { workflowId: test.workflowId }) .andWhere('execution.workflowId = :workflowId', { workflowId: test.workflowId })
.getMany(); .getMany();
// Get the metrics to collect from the evaluation workflow
const testMetricNames = await this.getTestMetricNames(test.id);
// 2. Run over all the test cases // 2. Run over all the test cases
await this.testRunRepository.markAsRunning(testRun.id); await this.testRunRepository.markAsRunning(testRun.id);
const metrics = []; // Object to collect the results of the evaluation workflow executions
const metrics = new EvaluationMetrics(testMetricNames);
for (const { id: pastExecutionId } of pastExecutions) { for (const { id: pastExecutionId } of pastExecutions) {
// Fetch past execution with data // Fetch past execution with data
@ -192,12 +219,10 @@ export class TestRunnerService {
assert(evalExecution); assert(evalExecution);
// Extract the output of the last node executed in the evaluation workflow // Extract the output of the last node executed in the evaluation workflow
metrics.push(this.extractEvaluationResult(evalExecution)); metrics.addResults(this.extractEvaluationResult(evalExecution));
} }
// TODO: 3. Aggregate the results const aggregatedMetrics = metrics.getAggregatedMetrics();
// Now we just set success to true if all the test cases passed
const aggregatedMetrics = { success: metrics.every((metric) => metric.success) };
await this.testRunRepository.markAsCompleted(testRun.id, aggregatedMetrics); await this.testRunRepository.markAsCompleted(testRun.id, aggregatedMetrics);
} }