refactor(Structured Output Parser Node): Sandbox JSON schema parsing (no-changelog) (#9239)

Signed-off-by: Oleg Ivaniv <me@olegivaniv.com>
This commit is contained in:
oleg 2024-04-29 13:59:55 +02:00 committed by GitHub
parent e17e767e70
commit f84abc0586
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 217 additions and 211 deletions

View file

@ -8,21 +8,22 @@ import {
NodeOperationError,
NodeConnectionType,
} from 'n8n-workflow';
import { parseSchema } from 'json-schema-to-zod';
import { z } from 'zod';
import type { JSONSchema7 } from 'json-schema';
import { StructuredOutputParser } from 'langchain/output_parsers';
import { OutputParserException } from '@langchain/core/output_parsers';
import get from 'lodash/get';
import { logWrapper } from '../../../utils/logWrapper';
import { getSandboxContext } from 'n8n-nodes-base/dist/nodes/Code/Sandbox';
import { JavaScriptSandbox } from 'n8n-nodes-base/dist/nodes/Code/JavaScriptSandbox';
import { makeResolverFromLegacyOptions } from '@n8n/vm2';
import { getConnectionHintNoticeField } from '../../../utils/sharedFields';
import { logWrapper } from '../../../utils/logWrapper';
const STRUCTURED_OUTPUT_KEY = '__structured__output';
const STRUCTURED_OUTPUT_OBJECT_KEY = '__structured__output__object';
const STRUCTURED_OUTPUT_ARRAY_KEY = '__structured__output__array';
class N8nStructuredOutputParser<T extends z.ZodTypeAny> extends StructuredOutputParser<T> {
export class N8nStructuredOutputParser<T extends z.ZodTypeAny> extends StructuredOutputParser<T> {
async parse(text: string): Promise<z.infer<T>> {
try {
const parsed = (await super.parse(text)) as object;
@ -39,26 +40,19 @@ class N8nStructuredOutputParser<T extends z.ZodTypeAny> extends StructuredOutput
}
}
static fromZedJsonSchema(
schema: JSONSchema7,
static async fromZedJsonSchema(
sandboxedSchema: JavaScriptSandbox,
nodeVersion: number,
): StructuredOutputParser<z.ZodType<object, z.ZodTypeDef, object>> {
// Make sure to remove the description from root schema
const { description, ...restOfSchema } = schema;
const zodSchemaString = parseSchema(restOfSchema as JSONSchema7);
// TODO: This is obviously not great and should be replaced later!!!
// eslint-disable-next-line @typescript-eslint/no-implied-eval
const itemSchema = new Function('z', `return (${zodSchemaString})`)(z) as z.ZodSchema<object>;
): Promise<StructuredOutputParser<z.ZodType<object, z.ZodTypeDef, object>>> {
const zodSchema = (await sandboxedSchema.runCode()) as z.ZodSchema<object>;
let returnSchema: z.ZodSchema<object>;
if (nodeVersion === 1) {
returnSchema = z.object({
[STRUCTURED_OUTPUT_KEY]: z
.object({
[STRUCTURED_OUTPUT_OBJECT_KEY]: itemSchema.optional(),
[STRUCTURED_OUTPUT_ARRAY_KEY]: z.array(itemSchema).optional(),
[STRUCTURED_OUTPUT_OBJECT_KEY]: zodSchema.optional(),
[STRUCTURED_OUTPUT_ARRAY_KEY]: z.array(zodSchema).optional(),
})
.describe(
`Wrapper around the output data. It can only contain ${STRUCTURED_OUTPUT_OBJECT_KEY} or ${STRUCTURED_OUTPUT_ARRAY_KEY} but never both.`,
@ -80,7 +74,7 @@ class N8nStructuredOutputParser<T extends z.ZodTypeAny> extends StructuredOutput
});
} else {
returnSchema = z.object({
output: itemSchema.optional(),
output: zodSchema.optional(),
});
}
@ -166,18 +160,64 @@ export class OutputParserStructured implements INodeType {
if (itemSchema.type === undefined) {
itemSchema = {
type: 'object',
properties: itemSchema.properties || (itemSchema as { [key: string]: JSONSchema7 }),
properties: itemSchema.properties ?? (itemSchema as { [key: string]: JSONSchema7 }),
};
}
} catch (error) {
throw new NodeOperationError(this.getNode(), 'Error during parsing of JSON Schema.');
}
const nodeVersion = this.getNode().typeVersion;
const parser = N8nStructuredOutputParser.fromZedJsonSchema(itemSchema, nodeVersion);
const vmResolver = makeResolverFromLegacyOptions({
external: {
modules: ['json-schema-to-zod', 'zod'],
transitive: false,
},
resolve(moduleName, parentDirname) {
if (moduleName === 'json-schema-to-zod') {
return require.resolve(
'@n8n/n8n-nodes-langchain/node_modules/json-schema-to-zod/dist/cjs/jsonSchemaToZod.js',
{
paths: [parentDirname],
},
);
}
if (moduleName === 'zod') {
return require.resolve('@n8n/n8n-nodes-langchain/node_modules/zod.cjs', {
paths: [parentDirname],
});
}
return;
},
builtin: [],
});
const context = getSandboxContext.call(this, itemIndex);
// Make sure to remove the description from root schema
const { description, ...restOfSchema } = itemSchema;
const sandboxedSchema = new JavaScriptSandbox(
context,
`
const { z } = require('zod');
const { parseSchema } = require('json-schema-to-zod');
const zodSchema = parseSchema(${JSON.stringify(restOfSchema)});
const itemSchema = new Function('z', 'return (' + zodSchema + ')')(z)
return itemSchema
`,
itemIndex,
this.helpers,
{ resolver: vmResolver },
);
return {
response: logWrapper(parser, this),
};
const nodeVersion = this.getNode().typeVersion;
try {
const parser = await N8nStructuredOutputParser.fromZedJsonSchema(
sandboxedSchema,
nodeVersion,
);
return {
response: logWrapper(parser, this),
};
} catch (error) {
throw new NodeOperationError(this.getNode(), 'Error during parsing of JSON Schema.');
}
}
}

View file

@ -0,0 +1,149 @@
import type { IExecuteFunctions, IWorkflowDataProxyData } from 'n8n-workflow';
import { mock } from 'jest-mock-extended';
import { normalizeItems } from 'n8n-core';
import type { z } from 'zod';
import type { StructuredOutputParser } from 'langchain/output_parsers';
import { OutputParserStructured } from '../OutputParserStructured.node';
describe('OutputParserStructured', () => {
let outputParser: OutputParserStructured;
const thisArg = mock<IExecuteFunctions>({
helpers: { normalizeItems },
});
const workflowDataProxy = mock<IWorkflowDataProxyData>({ $input: mock() });
thisArg.getWorkflowDataProxy.mockReturnValue(workflowDataProxy);
thisArg.getNode.mockReturnValue({ typeVersion: 1.1 });
thisArg.addInputData.mockReturnValue({ index: 0 });
thisArg.addOutputData.mockReturnValue();
beforeEach(() => {
outputParser = new OutputParserStructured();
});
describe('supplyData', () => {
it('should parse a valid JSON schema', async () => {
const schema = `{
"type": "object",
"properties": {
"name": {
"type": "string"
},
"age": {
"type": "number"
}
},
"required": ["name", "age"]
}`;
thisArg.getNodeParameter.calledWith('jsonSchema', 0).mockReturnValueOnce(schema);
const { response } = (await outputParser.supplyData.call(thisArg, 0)) as {
response: StructuredOutputParser<z.ZodType<object, z.ZodTypeDef, object>>;
};
const outputObject = { output: { name: 'Mac', age: 27 } };
const parsersOutput = await response.parse(`Here's the output!
\`\`\`json
${JSON.stringify(outputObject)}
\`\`\`
`);
expect(parsersOutput).toEqual(outputObject);
});
it('should handle missing required properties', async () => {
const schema = `{
"type": "object",
"properties": {
"name": {
"type": "string"
},
"age": {
"type": "number"
}
},
"required": ["name", "age"]
}`;
thisArg.getNodeParameter.calledWith('jsonSchema', 0).mockReturnValueOnce(schema);
const { response } = (await outputParser.supplyData.call(thisArg, 0)) as {
response: StructuredOutputParser<z.ZodType<object, z.ZodTypeDef, object>>;
};
const outputObject = { output: { name: 'Mac' } };
await expect(
response.parse(`Here's the output!
\`\`\`json
${JSON.stringify(outputObject)}
\`\`\`
`),
).rejects.toThrow('Required');
});
it('should throw on wrong type', async () => {
const schema = `{
"type": "object",
"properties": {
"name": {
"type": "string"
},
"age": {
"type": "number"
}
},
"required": ["name", "age"]
}`;
thisArg.getNodeParameter.calledWith('jsonSchema', 0).mockReturnValueOnce(schema);
const { response } = (await outputParser.supplyData.call(thisArg, 0)) as {
response: StructuredOutputParser<z.ZodType<object, z.ZodTypeDef, object>>;
};
const outputObject = { output: { name: 'Mac', age: '27' } };
await expect(
response.parse(`Here's the output!
\`\`\`json
${JSON.stringify(outputObject)}
\`\`\`
`),
).rejects.toThrow('Expected number, received string');
});
it('should parse array output', async () => {
const schema = `{
"type": "object",
"properties": {
"myArr": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {
"type": "string"
},
"age": {
"type": "number"
}
},
"required": ["name", "age"]
}
}
},
"required": ["myArr"]
}`;
thisArg.getNodeParameter.calledWith('jsonSchema', 0).mockReturnValueOnce(schema);
const { response } = (await outputParser.supplyData.call(thisArg, 0)) as {
response: StructuredOutputParser<z.ZodType<object, z.ZodTypeDef, object>>;
};
const outputObject = {
output: {
myArr: [
{ name: 'Mac', age: 27 },
{ name: 'Alice', age: 25 },
],
},
};
const parsersOutput = await response.parse(`Here's the output!
\`\`\`json
${JSON.stringify(outputObject)}
\`\`\`
`);
expect(parsersOutput).toEqual(outputObject);
});
});
});

View file

@ -156,6 +156,7 @@
"epub2": "3.0.2",
"form-data": "4.0.0",
"html-to-text": "9.0.5",
"jest-mock-extended": "^3.0.4",
"json-schema-to-zod": "2.0.14",
"langchain": "0.1.25",
"lodash": "4.17.21",

File diff suppressed because it is too large Load diff