fix(AI Agent Node): Fix tools agent when using memory and Anthropic models (#10513)

This commit is contained in:
oleg 2024-09-04 14:45:14 +02:00 committed by GitHub
parent 8b5c333d3d
commit 746e7b89f7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -1,7 +1,7 @@
import { BINARY_ENCODING, NodeConnectionType, NodeOperationError } from 'n8n-workflow';
import type { IExecuteFunctions, INodeExecutionData } from 'n8n-workflow';
import type { AgentAction, AgentFinish, AgentStep } from 'langchain/agents';
import type { AgentAction, AgentFinish } from 'langchain/agents';
import { AgentExecutor, createToolCallingAgent } from 'langchain/agents';
import type { BaseChatMemory } from '@langchain/community/memory/chat_memory';
import type { BaseMessagePromptTemplateLike } from '@langchain/core/prompts';
@ -9,12 +9,12 @@ import { ChatPromptTemplate } from '@langchain/core/prompts';
import { omit } from 'lodash';
import type { Tool } from '@langchain/core/tools';
import { DynamicStructuredTool } from '@langchain/core/tools';
import { RunnableSequence } from '@langchain/core/runnables';
import type { ZodObject } from 'zod';
import { z } from 'zod';
import type { BaseOutputParser, StructuredOutputParser } from '@langchain/core/output_parsers';
import { OutputFixingParser } from 'langchain/output_parsers';
import { HumanMessage } from '@langchain/core/messages';
import { RunnableSequence } from '@langchain/core/runnables';
import {
isChatInstance,
getPromptInputByType,
@ -93,7 +93,69 @@ export async function toolsAgentExecute(this: IExecuteFunctions): Promise<INodeE
const tools = (await getConnectedTools(this, true, false)) as Array<DynamicStructuredTool | Tool>;
const outputParser = (await getOptionalOutputParsers(this))?.[0];
let structuredOutputParserTool: DynamicStructuredTool | undefined;
/**
* Ensures consistent handling of outputs regardless of the model used,
* providing a unified output format for further processing.
*
* This method is necessary to handle different output formats from various language models.
* Specifically, it checks if the agent step is the final step (contains returnValues) and determines
* if the output is a simple string (e.g., from OpenAI models) or an array of outputs (e.g., from Anthropic models).
*
* Examples:
* 1. Anthropic model output:
* ```json
* {
* "output": [
* {
* "index": 0,
* "type": "text",
* "text": "The result of the calculation is approximately 1001.8166..."
* }
* ]
* }
*```
* 2. OpenAI model output:
* ```json
* {
* "output": "The result of the calculation is approximately 1001.82..."
* }
* ```
*
* @param steps - The agent finish or agent action steps.
* @returns The modified agent finish steps or the original steps.
*/
function handleAgentFinishOutput(steps: AgentFinish | AgentAction[]) {
// Check if the steps contain multiple outputs
type AgentMultiOutputFinish = AgentFinish & {
returnValues: { output: Array<{ text: string; type: string; index: number }> };
};
const agentFinishSteps = steps as AgentMultiOutputFinish | AgentFinish;
if (agentFinishSteps.returnValues) {
const isMultiOutput = Array.isArray(agentFinishSteps.returnValues?.output);
if (isMultiOutput) {
// Define the type for each item in the multi-output array
type MultiOutputItem = { index: number; type: string; text: string };
const multiOutputSteps = agentFinishSteps.returnValues.output as MultiOutputItem[];
// Check if all items in the multi-output array are of type 'text'
const isTextOnly = (multiOutputSteps ?? []).every((output) => 'text' in output);
if (isTextOnly) {
// If all items are of type 'text', merge them into a single string
agentFinishSteps.returnValues.output = multiOutputSteps
.map((output) => output.text)
.join('\n')
.trim();
}
return agentFinishSteps;
}
}
// If the steps do not contain multiple outputs, return them as is
return agentFinishSteps;
}
async function agentStepsParser(
steps: AgentFinish | AgentAction[],
): Promise<AgentFinish | AgentAction[]> {
@ -112,7 +174,6 @@ export async function toolsAgentExecute(this: IExecuteFunctions): Promise<INodeE
};
}
}
// If the steps are an AgentFinish and the outputParser is defined it must mean that the LLM didn't use `format_final_response` tool so we will parse the output manually
if (outputParser && typeof steps === 'object' && (steps as AgentFinish).returnValues) {
const finalResponse = (steps as AgentFinish).returnValues;
@ -126,7 +187,7 @@ export async function toolsAgentExecute(this: IExecuteFunctions): Promise<INodeE
log: 'Final response formatted',
};
}
return steps;
return handleAgentFinishOutput(steps);
}
if (outputParser) {
@ -172,9 +233,7 @@ export async function toolsAgentExecute(this: IExecuteFunctions): Promise<INodeE
});
agent.streamRunnable = false;
const runnableAgent = RunnableSequence.from<{
steps: AgentStep[];
}>([agent, agentStepsParser]);
const runnableAgent = RunnableSequence.from([agent, agentStepsParser]);
const executor = AgentExecutor.fromAgentAndTools({
agent: runnableAgent,
@ -196,7 +255,7 @@ export async function toolsAgentExecute(this: IExecuteFunctions): Promise<INodeE
});
if (input === undefined) {
throw new NodeOperationError(this.getNode(), 'The text parameter is empty.');
throw new NodeOperationError(this.getNode(), 'The text parameter is empty.');
}
// OpenAI doesn't allow empty tools array so we will provide a more user-friendly error message