fix(AI Agent Node): Fix output parsing and empty tool input handling in AI Agent node (#10970)

This commit is contained in:
oleg 2024-09-30 10:09:39 +02:00 committed by GitHub
parent 0ca9c076ca
commit 3a65bdc1f5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -1,27 +1,28 @@
import { BINARY_ENCODING, NodeConnectionType, NodeOperationError } from 'n8n-workflow';
import type { IExecuteFunctions, INodeExecutionData } from 'n8n-workflow';
import type { AgentAction, AgentFinish } from 'langchain/agents';
import { AgentExecutor, createToolCallingAgent } from 'langchain/agents';
import type { BaseChatMemory } from '@langchain/community/memory/chat_memory'; import type { BaseChatMemory } from '@langchain/community/memory/chat_memory';
import { HumanMessage } from '@langchain/core/messages';
import type { BaseMessage } from '@langchain/core/messages';
import type { BaseOutputParser, StructuredOutputParser } from '@langchain/core/output_parsers';
import type { BaseMessagePromptTemplateLike } from '@langchain/core/prompts'; import type { BaseMessagePromptTemplateLike } from '@langchain/core/prompts';
import { ChatPromptTemplate } from '@langchain/core/prompts'; import { ChatPromptTemplate } from '@langchain/core/prompts';
import { omit } from 'lodash'; import { RunnableSequence } from '@langchain/core/runnables';
import type { Tool } from '@langchain/core/tools'; import type { Tool } from '@langchain/core/tools';
import { DynamicStructuredTool } from '@langchain/core/tools'; import { DynamicStructuredTool } from '@langchain/core/tools';
import type { AgentAction, AgentFinish } from 'langchain/agents';
import { AgentExecutor, createToolCallingAgent } from 'langchain/agents';
import { OutputFixingParser } from 'langchain/output_parsers';
import { omit } from 'lodash';
import { BINARY_ENCODING, jsonParse, NodeConnectionType, NodeOperationError } from 'n8n-workflow';
import type { IExecuteFunctions, INodeExecutionData } from 'n8n-workflow';
import type { ZodObject } from 'zod'; import type { ZodObject } from 'zod';
import { z } from 'zod'; import { z } from 'zod';
import type { BaseOutputParser, StructuredOutputParser } from '@langchain/core/output_parsers';
import { OutputFixingParser } from 'langchain/output_parsers'; import { SYSTEM_MESSAGE } from './prompt';
import { HumanMessage } from '@langchain/core/messages';
import { RunnableSequence } from '@langchain/core/runnables';
import { import {
isChatInstance, isChatInstance,
getPromptInputByType, getPromptInputByType,
getOptionalOutputParsers, getOptionalOutputParsers,
getConnectedTools, getConnectedTools,
} from '../../../../../utils/helpers'; } from '../../../../../utils/helpers';
import { SYSTEM_MESSAGE } from './prompt';
function getOutputParserSchema(outputParser: BaseOutputParser): ZodObject<any, any, any, any> { function getOutputParserSchema(outputParser: BaseOutputParser): ZodObject<any, any, any, any> {
const parserType = outputParser.lc_namespace[outputParser.lc_namespace.length - 1]; const parserType = outputParser.lc_namespace[outputParser.lc_namespace.length - 1];
@ -74,6 +75,39 @@ async function extractBinaryMessages(ctx: IExecuteFunctions) {
content: [...binaryMessages], content: [...binaryMessages],
}); });
} }
/**
* Fixes empty content messages in agent steps.
*
* This function is necessary when using RunnableSequence.from in LangChain.
* If a tool doesn't have any arguments, LangChain returns input: '' (empty string).
* This can throw an error for some providers (like Anthropic) which expect the input to always be an object.
* This function replaces empty string inputs with empty objects to prevent such errors.
*
* @param steps - The agent steps to fix
* @returns The fixed agent steps
*/
function fixEmptyContentMessage(steps: AgentFinish | AgentAction[]) {
if (!Array.isArray(steps)) return steps;
steps.forEach((step) => {
if ('messageLog' in step && step.messageLog !== undefined) {
if (Array.isArray(step.messageLog)) {
step.messageLog.forEach((message: BaseMessage) => {
if ('content' in message && Array.isArray(message.content)) {
// eslint-disable-next-line @typescript-eslint/no-unsafe-member-access
(message.content as Array<{ input?: string | object }>).forEach((content) => {
if (content.input === '') {
content.input = {};
}
});
}
});
}
}
});
return steps;
}
export async function toolsAgentExecute(this: IExecuteFunctions): Promise<INodeExecutionData[][]> { export async function toolsAgentExecute(this: IExecuteFunctions): Promise<INodeExecutionData[][]> {
this.logger.debug('Executing Tools Agent'); this.logger.debug('Executing Tools Agent');
@ -156,6 +190,14 @@ export async function toolsAgentExecute(this: IExecuteFunctions): Promise<INodeE
// If the steps do not contain multiple outputs, return them as is // If the steps do not contain multiple outputs, return them as is
return agentFinishSteps; return agentFinishSteps;
} }
// If memory is connected we need to stringify the returnValues so that it can be saved in the memory as a string
function handleParsedStepOutput(output: Record<string, unknown>) {
return {
returnValues: memory ? { output: JSON.stringify(output) } : output,
log: 'Final response formatted',
};
}
async function agentStepsParser( async function agentStepsParser(
steps: AgentFinish | AgentAction[], steps: AgentFinish | AgentAction[],
): Promise<AgentFinish | AgentAction[]> { ): Promise<AgentFinish | AgentAction[]> {
@ -168,24 +210,18 @@ export async function toolsAgentExecute(this: IExecuteFunctions): Promise<INodeE
unknown unknown
>; >;
return { return handleParsedStepOutput(returnValues);
returnValues,
log: 'Final response formatted',
};
} }
} }
// If the steps are an AgentFinish and the outputParser is defined it must mean that the LLM didn't use `format_final_response` tool so we will parse the output manually
// If the steps are an AgentFinish and the outputParser is defined it must mean that the LLM didn't use `format_final_response` tool so we will try to parse the output manually
if (outputParser && typeof steps === 'object' && (steps as AgentFinish).returnValues) { if (outputParser && typeof steps === 'object' && (steps as AgentFinish).returnValues) {
const finalResponse = (steps as AgentFinish).returnValues; const finalResponse = (steps as AgentFinish).returnValues;
const returnValues = (await outputParser.parse(finalResponse as unknown as string)) as Record< const returnValues = (await outputParser.parse(finalResponse as unknown as string)) as Record<
string, string,
unknown unknown
>; >;
return handleParsedStepOutput(returnValues);
return {
returnValues,
log: 'Final response formatted',
};
} }
return handleAgentFinishOutput(steps); return handleAgentFinishOutput(steps);
} }
@ -233,7 +269,7 @@ export async function toolsAgentExecute(this: IExecuteFunctions): Promise<INodeE
}); });
agent.streamRunnable = false; agent.streamRunnable = false;
const runnableAgent = RunnableSequence.from([agent, agentStepsParser]); const runnableAgent = RunnableSequence.from([agent, agentStepsParser, fixEmptyContentMessage]);
const executor = AgentExecutor.fromAgentAndTools({ const executor = AgentExecutor.fromAgentAndTools({
agent: runnableAgent, agent: runnableAgent,
@ -273,6 +309,13 @@ export async function toolsAgentExecute(this: IExecuteFunctions): Promise<INodeE
'IMPORTANT: Always call `format_final_response` to format your final response!', 'IMPORTANT: Always call `format_final_response` to format your final response!',
}); });
if (memory && outputParser) {
const parsedOutput = jsonParse<{ output: Record<string, unknown> }>(
response.output as string,
);
response.output = parsedOutput?.output ?? parsedOutput;
}
returnData.push({ returnData.push({
json: omit( json: omit(
response, response,