mirror of
https://github.com/n8n-io/n8n.git
synced 2024-12-25 04:34:06 -08:00
refactor(Structured Output Parser Node): Sandbox JSON schema parsing (no-changelog) (#9239)
Signed-off-by: Oleg Ivaniv <me@olegivaniv.com>
This commit is contained in:
parent
e17e767e70
commit
f84abc0586
|
@ -8,21 +8,22 @@ import {
|
|||
NodeOperationError,
|
||||
NodeConnectionType,
|
||||
} from 'n8n-workflow';
|
||||
|
||||
import { parseSchema } from 'json-schema-to-zod';
|
||||
import { z } from 'zod';
|
||||
import type { JSONSchema7 } from 'json-schema';
|
||||
import { StructuredOutputParser } from 'langchain/output_parsers';
|
||||
import { OutputParserException } from '@langchain/core/output_parsers';
|
||||
import get from 'lodash/get';
|
||||
import { logWrapper } from '../../../utils/logWrapper';
|
||||
import { getSandboxContext } from 'n8n-nodes-base/dist/nodes/Code/Sandbox';
|
||||
import { JavaScriptSandbox } from 'n8n-nodes-base/dist/nodes/Code/JavaScriptSandbox';
|
||||
import { makeResolverFromLegacyOptions } from '@n8n/vm2';
|
||||
import { getConnectionHintNoticeField } from '../../../utils/sharedFields';
|
||||
import { logWrapper } from '../../../utils/logWrapper';
|
||||
|
||||
const STRUCTURED_OUTPUT_KEY = '__structured__output';
|
||||
const STRUCTURED_OUTPUT_OBJECT_KEY = '__structured__output__object';
|
||||
const STRUCTURED_OUTPUT_ARRAY_KEY = '__structured__output__array';
|
||||
|
||||
class N8nStructuredOutputParser<T extends z.ZodTypeAny> extends StructuredOutputParser<T> {
|
||||
export class N8nStructuredOutputParser<T extends z.ZodTypeAny> extends StructuredOutputParser<T> {
|
||||
async parse(text: string): Promise<z.infer<T>> {
|
||||
try {
|
||||
const parsed = (await super.parse(text)) as object;
|
||||
|
@ -39,26 +40,19 @@ class N8nStructuredOutputParser<T extends z.ZodTypeAny> extends StructuredOutput
|
|||
}
|
||||
}
|
||||
|
||||
static fromZedJsonSchema(
|
||||
schema: JSONSchema7,
|
||||
static async fromZedJsonSchema(
|
||||
sandboxedSchema: JavaScriptSandbox,
|
||||
nodeVersion: number,
|
||||
): StructuredOutputParser<z.ZodType<object, z.ZodTypeDef, object>> {
|
||||
// Make sure to remove the description from root schema
|
||||
const { description, ...restOfSchema } = schema;
|
||||
|
||||
const zodSchemaString = parseSchema(restOfSchema as JSONSchema7);
|
||||
|
||||
// TODO: This is obviously not great and should be replaced later!!!
|
||||
// eslint-disable-next-line @typescript-eslint/no-implied-eval
|
||||
const itemSchema = new Function('z', `return (${zodSchemaString})`)(z) as z.ZodSchema<object>;
|
||||
): Promise<StructuredOutputParser<z.ZodType<object, z.ZodTypeDef, object>>> {
|
||||
const zodSchema = (await sandboxedSchema.runCode()) as z.ZodSchema<object>;
|
||||
|
||||
let returnSchema: z.ZodSchema<object>;
|
||||
if (nodeVersion === 1) {
|
||||
returnSchema = z.object({
|
||||
[STRUCTURED_OUTPUT_KEY]: z
|
||||
.object({
|
||||
[STRUCTURED_OUTPUT_OBJECT_KEY]: itemSchema.optional(),
|
||||
[STRUCTURED_OUTPUT_ARRAY_KEY]: z.array(itemSchema).optional(),
|
||||
[STRUCTURED_OUTPUT_OBJECT_KEY]: zodSchema.optional(),
|
||||
[STRUCTURED_OUTPUT_ARRAY_KEY]: z.array(zodSchema).optional(),
|
||||
})
|
||||
.describe(
|
||||
`Wrapper around the output data. It can only contain ${STRUCTURED_OUTPUT_OBJECT_KEY} or ${STRUCTURED_OUTPUT_ARRAY_KEY} but never both.`,
|
||||
|
@ -80,7 +74,7 @@ class N8nStructuredOutputParser<T extends z.ZodTypeAny> extends StructuredOutput
|
|||
});
|
||||
} else {
|
||||
returnSchema = z.object({
|
||||
output: itemSchema.optional(),
|
||||
output: zodSchema.optional(),
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -166,18 +160,64 @@ export class OutputParserStructured implements INodeType {
|
|||
if (itemSchema.type === undefined) {
|
||||
itemSchema = {
|
||||
type: 'object',
|
||||
properties: itemSchema.properties || (itemSchema as { [key: string]: JSONSchema7 }),
|
||||
properties: itemSchema.properties ?? (itemSchema as { [key: string]: JSONSchema7 }),
|
||||
};
|
||||
}
|
||||
} catch (error) {
|
||||
throw new NodeOperationError(this.getNode(), 'Error during parsing of JSON Schema.');
|
||||
}
|
||||
|
||||
const nodeVersion = this.getNode().typeVersion;
|
||||
const parser = N8nStructuredOutputParser.fromZedJsonSchema(itemSchema, nodeVersion);
|
||||
const vmResolver = makeResolverFromLegacyOptions({
|
||||
external: {
|
||||
modules: ['json-schema-to-zod', 'zod'],
|
||||
transitive: false,
|
||||
},
|
||||
resolve(moduleName, parentDirname) {
|
||||
if (moduleName === 'json-schema-to-zod') {
|
||||
return require.resolve(
|
||||
'@n8n/n8n-nodes-langchain/node_modules/json-schema-to-zod/dist/cjs/jsonSchemaToZod.js',
|
||||
{
|
||||
paths: [parentDirname],
|
||||
},
|
||||
);
|
||||
}
|
||||
if (moduleName === 'zod') {
|
||||
return require.resolve('@n8n/n8n-nodes-langchain/node_modules/zod.cjs', {
|
||||
paths: [parentDirname],
|
||||
});
|
||||
}
|
||||
return;
|
||||
},
|
||||
builtin: [],
|
||||
});
|
||||
const context = getSandboxContext.call(this, itemIndex);
|
||||
// Make sure to remove the description from root schema
|
||||
const { description, ...restOfSchema } = itemSchema;
|
||||
const sandboxedSchema = new JavaScriptSandbox(
|
||||
context,
|
||||
`
|
||||
const { z } = require('zod');
|
||||
const { parseSchema } = require('json-schema-to-zod');
|
||||
const zodSchema = parseSchema(${JSON.stringify(restOfSchema)});
|
||||
const itemSchema = new Function('z', 'return (' + zodSchema + ')')(z)
|
||||
return itemSchema
|
||||
`,
|
||||
itemIndex,
|
||||
this.helpers,
|
||||
{ resolver: vmResolver },
|
||||
);
|
||||
|
||||
return {
|
||||
response: logWrapper(parser, this),
|
||||
};
|
||||
const nodeVersion = this.getNode().typeVersion;
|
||||
try {
|
||||
const parser = await N8nStructuredOutputParser.fromZedJsonSchema(
|
||||
sandboxedSchema,
|
||||
nodeVersion,
|
||||
);
|
||||
return {
|
||||
response: logWrapper(parser, this),
|
||||
};
|
||||
} catch (error) {
|
||||
throw new NodeOperationError(this.getNode(), 'Error during parsing of JSON Schema.');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,149 @@
|
|||
import type { IExecuteFunctions, IWorkflowDataProxyData } from 'n8n-workflow';
|
||||
import { mock } from 'jest-mock-extended';
|
||||
import { normalizeItems } from 'n8n-core';
|
||||
import type { z } from 'zod';
|
||||
import type { StructuredOutputParser } from 'langchain/output_parsers';
|
||||
import { OutputParserStructured } from '../OutputParserStructured.node';
|
||||
|
||||
describe('OutputParserStructured', () => {
|
||||
let outputParser: OutputParserStructured;
|
||||
const thisArg = mock<IExecuteFunctions>({
|
||||
helpers: { normalizeItems },
|
||||
});
|
||||
const workflowDataProxy = mock<IWorkflowDataProxyData>({ $input: mock() });
|
||||
thisArg.getWorkflowDataProxy.mockReturnValue(workflowDataProxy);
|
||||
thisArg.getNode.mockReturnValue({ typeVersion: 1.1 });
|
||||
thisArg.addInputData.mockReturnValue({ index: 0 });
|
||||
thisArg.addOutputData.mockReturnValue();
|
||||
|
||||
beforeEach(() => {
|
||||
outputParser = new OutputParserStructured();
|
||||
});
|
||||
|
||||
describe('supplyData', () => {
|
||||
it('should parse a valid JSON schema', async () => {
|
||||
const schema = `{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "string"
|
||||
},
|
||||
"age": {
|
||||
"type": "number"
|
||||
}
|
||||
},
|
||||
"required": ["name", "age"]
|
||||
}`;
|
||||
thisArg.getNodeParameter.calledWith('jsonSchema', 0).mockReturnValueOnce(schema);
|
||||
const { response } = (await outputParser.supplyData.call(thisArg, 0)) as {
|
||||
response: StructuredOutputParser<z.ZodType<object, z.ZodTypeDef, object>>;
|
||||
};
|
||||
const outputObject = { output: { name: 'Mac', age: 27 } };
|
||||
const parsersOutput = await response.parse(`Here's the output!
|
||||
\`\`\`json
|
||||
${JSON.stringify(outputObject)}
|
||||
\`\`\`
|
||||
`);
|
||||
|
||||
expect(parsersOutput).toEqual(outputObject);
|
||||
});
|
||||
it('should handle missing required properties', async () => {
|
||||
const schema = `{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "string"
|
||||
},
|
||||
"age": {
|
||||
"type": "number"
|
||||
}
|
||||
},
|
||||
"required": ["name", "age"]
|
||||
}`;
|
||||
thisArg.getNodeParameter.calledWith('jsonSchema', 0).mockReturnValueOnce(schema);
|
||||
const { response } = (await outputParser.supplyData.call(thisArg, 0)) as {
|
||||
response: StructuredOutputParser<z.ZodType<object, z.ZodTypeDef, object>>;
|
||||
};
|
||||
const outputObject = { output: { name: 'Mac' } };
|
||||
|
||||
await expect(
|
||||
response.parse(`Here's the output!
|
||||
\`\`\`json
|
||||
${JSON.stringify(outputObject)}
|
||||
\`\`\`
|
||||
`),
|
||||
).rejects.toThrow('Required');
|
||||
});
|
||||
|
||||
it('should throw on wrong type', async () => {
|
||||
const schema = `{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "string"
|
||||
},
|
||||
"age": {
|
||||
"type": "number"
|
||||
}
|
||||
},
|
||||
"required": ["name", "age"]
|
||||
}`;
|
||||
thisArg.getNodeParameter.calledWith('jsonSchema', 0).mockReturnValueOnce(schema);
|
||||
const { response } = (await outputParser.supplyData.call(thisArg, 0)) as {
|
||||
response: StructuredOutputParser<z.ZodType<object, z.ZodTypeDef, object>>;
|
||||
};
|
||||
const outputObject = { output: { name: 'Mac', age: '27' } };
|
||||
|
||||
await expect(
|
||||
response.parse(`Here's the output!
|
||||
\`\`\`json
|
||||
${JSON.stringify(outputObject)}
|
||||
\`\`\`
|
||||
`),
|
||||
).rejects.toThrow('Expected number, received string');
|
||||
});
|
||||
|
||||
it('should parse array output', async () => {
|
||||
const schema = `{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"myArr": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "string"
|
||||
},
|
||||
"age": {
|
||||
"type": "number"
|
||||
}
|
||||
},
|
||||
"required": ["name", "age"]
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": ["myArr"]
|
||||
}`;
|
||||
thisArg.getNodeParameter.calledWith('jsonSchema', 0).mockReturnValueOnce(schema);
|
||||
const { response } = (await outputParser.supplyData.call(thisArg, 0)) as {
|
||||
response: StructuredOutputParser<z.ZodType<object, z.ZodTypeDef, object>>;
|
||||
};
|
||||
const outputObject = {
|
||||
output: {
|
||||
myArr: [
|
||||
{ name: 'Mac', age: 27 },
|
||||
{ name: 'Alice', age: 25 },
|
||||
],
|
||||
},
|
||||
};
|
||||
const parsersOutput = await response.parse(`Here's the output!
|
||||
\`\`\`json
|
||||
${JSON.stringify(outputObject)}
|
||||
\`\`\`
|
||||
`);
|
||||
|
||||
expect(parsersOutput).toEqual(outputObject);
|
||||
});
|
||||
});
|
||||
});
|
|
@ -156,6 +156,7 @@
|
|||
"epub2": "3.0.2",
|
||||
"form-data": "4.0.0",
|
||||
"html-to-text": "9.0.5",
|
||||
"jest-mock-extended": "^3.0.4",
|
||||
"json-schema-to-zod": "2.0.14",
|
||||
"langchain": "0.1.25",
|
||||
"lodash": "4.17.21",
|
||||
|
|
190
pnpm-lock.yaml
190
pnpm-lock.yaml
File diff suppressed because it is too large
Load diff
Loading…
Reference in a new issue