n8n/packages/@n8n/nodes-langchain/nodes/llms/N8nLlmTracing.ts

import { BaseCallbackHandler } from '@langchain/core/callbacks/base';
import { getModelNameForTiktoken } from '@langchain/core/language_models/base';
import { encodingForModel } from '@langchain/core/utils/tiktoken';
import type {
	Serialized,
	SerializedNotImplemented,
	SerializedSecret,
} from '@langchain/core/load/serializable';
import type { LLMResult } from '@langchain/core/outputs';
import type { IDataObject, IExecuteFunctions } from 'n8n-workflow';
import { NodeConnectionType } from 'n8n-workflow';
import { pick } from 'lodash';
import type { BaseMessage } from '@langchain/core/messages';
import type { SerializedFields } from '@langchain/core/dist/load/map_keys';
import { logAiEvent } from '../../utils/helpers';

type TokensUsageParser = (llmOutput: LLMResult['llmOutput']) => {
	completionTokens: number;
	promptTokens: number;
	totalTokens: number;
};

type RunDetail = {
	index: number;
	messages: BaseMessage[] | string[] | string;
	options: SerializedSecret | SerializedNotImplemented | SerializedFields;
};

const TIKTOKEN_ESTIMATE_MODEL = 'gpt-3.5-turbo';
export class N8nLlmTracing extends BaseCallbackHandler {
	name = 'N8nLlmTracing';

	executionFunctions: IExecuteFunctions;

	connectionType = NodeConnectionType.AiLanguageModel;

	promptTokensEstimate = 0;

	completionTokensEstimate = 0;

	/**
	 * A map to associate LLM run IDs to run details.
	 * Key: Unique identifier for each LLM run (run ID)
	 * Value: RunDetails object
	 *
	 */
	runsMap: Record<string, RunDetail> = {};

	options = {
		// Default(OpenAI format) parser
		tokensUsageParser: (llmOutput: LLMResult['llmOutput']) => {
			const completionTokens = (llmOutput?.tokenUsage?.completionTokens as number) ?? 0;
			const promptTokens = (llmOutput?.tokenUsage?.promptTokens as number) ?? 0;

			return {
				completionTokens,
				promptTokens,
				totalTokens: completionTokens + promptTokens,
			};
		},
	};

	constructor(
		executionFunctions: IExecuteFunctions,
		options?: { tokensUsageParser: TokensUsageParser },
	) {
		super();
		this.executionFunctions = executionFunctions;
		this.options = { ...this.options, ...options };
	}

	async estimateTokensFromGeneration(generations: LLMResult['generations']) {
		const messages = generations.flatMap((gen) => gen.map((g) => g.text));
		return await this.estimateTokensFromStringList(messages);
	}

	async estimateTokensFromStringList(list: string[]) {
		const embeddingModel = getModelNameForTiktoken(TIKTOKEN_ESTIMATE_MODEL);
		const encoder = await encodingForModel(embeddingModel);

		const encodedListLength = await Promise.all(
			list.map(async (text) => encoder.encode(text).length),
		);

		return encodedListLength.reduce((acc, curr) => acc + curr, 0);
	}

	async handleLLMEnd(output: LLMResult, runId: string) {
		// The fallback should never happen since handleLLMStart should always set the run details
		// but just in case, we set the index to the length of the runsMap
		const runDetails = this.runsMap[runId] ?? { index: Object.keys(this.runsMap).length };

		output.generations = output.generations.map((gen) =>
			gen.map((g) => pick(g, ['text', 'generationInfo'])),
		);

		const tokenUsageEstimate = {
			completionTokens: 0,
			promptTokens: 0,
			totalTokens: 0,
		};
		const tokenUsage = this.options.tokensUsageParser(output.llmOutput);

		if (output.generations.length > 0) {
			tokenUsageEstimate.completionTokens = await this.estimateTokensFromGeneration(
				output.generations,
			);

			tokenUsageEstimate.promptTokens = this.promptTokensEstimate;
			tokenUsageEstimate.totalTokens =
				tokenUsageEstimate.completionTokens + this.promptTokensEstimate;
		}
		const response: {
			response: { generations: LLMResult['generations'] };
			tokenUsageEstimate?: typeof tokenUsageEstimate;
			tokenUsage?: typeof tokenUsage;
		} = {
			response: { generations: output.generations },
		};

		// If the LLM response contains actual tokens usage, otherwise fallback to the estimate
		if (tokenUsage.completionTokens > 0) {
			response.tokenUsage = tokenUsage;
		} else {
			response.tokenUsageEstimate = tokenUsageEstimate;
		}

		const parsedMessages =
			typeof runDetails.messages === 'string'
				? runDetails.messages
				: runDetails.messages.map((message) => {
						if (typeof message === 'string') return message;
						if (typeof message?.toJSON === 'function') return message.toJSON();

						return message;
					});

		this.executionFunctions.addOutputData(this.connectionType, runDetails.index, [
			[{ json: { ...response } }],
		]);
		void logAiEvent(this.executionFunctions, 'ai-llm-generated-output', {
			messages: parsedMessages,
			options: runDetails.options,
			response,
		});
	}

	async handleLLMStart(llm: Serialized, prompts: string[], runId: string) {
		const estimatedTokens = await this.estimateTokensFromStringList(prompts);

		const options = llm.type === 'constructor' ? llm.kwargs : llm;
		const { index } = this.executionFunctions.addInputData(this.connectionType, [
			[
				{
					json: {
						messages: prompts,
						estimatedTokens,
						options,
					},
				},
			],
		]);

		// Save the run details for later use when processing `handleLLMEnd` event
		this.runsMap[runId] = {
			index,
			options,
			messages: prompts,
		};
		this.promptTokensEstimate = estimatedTokens;
	}

	async handleLLMError(
		error: IDataObject | Error,
		runId: string,
		parentRunId?: string | undefined,
	) {
		// Filter out non-x- headers to avoid leaking sensitive information in logs
		if (typeof error === 'object' && error?.hasOwnProperty('headers')) {
			const errorWithHeaders = error as { headers: Record<string, unknown> };

			Object.keys(errorWithHeaders.headers).forEach((key) => {
				if (!key.startsWith('x-')) {
					delete errorWithHeaders.headers[key];
				}
			});
		}

		void logAiEvent(this.executionFunctions, 'ai-llm-errored', {
			error: Object.keys(error).length === 0 ? error.toString() : error,
			runId,
			parentRunId,
		});
	}
}
refactor: Implement LLM tracing callback to improve parsing of tokens usage stats (#9311) Signed-off-by: Oleg Ivaniv <me@olegivaniv.com> 2024-05-12 12:12:07 -07:00			`import { BaseCallbackHandler } from '@langchain/core/callbacks/base';`
			`import { getModelNameForTiktoken } from '@langchain/core/language_models/base';`
			`import { encodingForModel } from '@langchain/core/utils/tiktoken';`
			`import type {`
			`Serialized,`
			`SerializedNotImplemented,`
			`SerializedSecret,`
			`} from '@langchain/core/load/serializable';`
			`import type { LLMResult } from '@langchain/core/outputs';`
			`import type { IDataObject, IExecuteFunctions } from 'n8n-workflow';`
			`import { NodeConnectionType } from 'n8n-workflow';`
			`import { pick } from 'lodash';`
			`import type { BaseMessage } from '@langchain/core/messages';`
			`import type { SerializedFields } from '@langchain/core/dist/load/map_keys';`
			`import { logAiEvent } from '../../utils/helpers';`

			`type TokensUsageParser = (llmOutput: LLMResult['llmOutput']) => {`
			`completionTokens: number;`
			`promptTokens: number;`
			`totalTokens: number;`
			`};`

fix(editor): Fix error rendering and indexing of LLM sub-node outputs (#10688) 2024-09-11 07:17:13 -07:00			`type RunDetail = {`
refactor: Implement LLM tracing callback to improve parsing of tokens usage stats (#9311) Signed-off-by: Oleg Ivaniv <me@olegivaniv.com> 2024-05-12 12:12:07 -07:00			`index: number;`
			`messages: BaseMessage[] \| string[] \| string;`
			`options: SerializedSecret \| SerializedNotImplemented \| SerializedFields;`
			`};`

			`const TIKTOKEN_ESTIMATE_MODEL = 'gpt-3.5-turbo';`
			`export class N8nLlmTracing extends BaseCallbackHandler {`
			`name = 'N8nLlmTracing';`

			`executionFunctions: IExecuteFunctions;`

			`connectionType = NodeConnectionType.AiLanguageModel;`

			`promptTokensEstimate = 0;`

			`completionTokensEstimate = 0;`

fix(editor): Fix error rendering and indexing of LLM sub-node outputs (#10688) 2024-09-11 07:17:13 -07:00			`/**`
			`* A map to associate LLM run IDs to run details.`
			`* Key: Unique identifier for each LLM run (run ID)`
			`* Value: RunDetails object`
			`*`
			`*/`
			`runsMap: Record<string, RunDetail> = {};`
refactor: Implement LLM tracing callback to improve parsing of tokens usage stats (#9311) Signed-off-by: Oleg Ivaniv <me@olegivaniv.com> 2024-05-12 12:12:07 -07:00
			`options = {`
			`// Default(OpenAI format) parser`
			`tokensUsageParser: (llmOutput: LLMResult['llmOutput']) => {`
			`const completionTokens = (llmOutput?.tokenUsage?.completionTokens as number) ?? 0;`
			`const promptTokens = (llmOutput?.tokenUsage?.promptTokens as number) ?? 0;`

			`return {`
			`completionTokens,`
			`promptTokens,`
			`totalTokens: completionTokens + promptTokens,`
			`};`
			`},`
			`};`

			`constructor(`
			`executionFunctions: IExecuteFunctions,`
			`options?: { tokensUsageParser: TokensUsageParser },`
			`) {`
			`super();`
			`this.executionFunctions = executionFunctions;`
			`this.options = { ...this.options, ...options };`
			`}`

			`async estimateTokensFromGeneration(generations: LLMResult['generations']) {`
			`const messages = generations.flatMap((gen) => gen.map((g) => g.text));`
			`return await this.estimateTokensFromStringList(messages);`
			`}`

			`async estimateTokensFromStringList(list: string[]) {`
			`const embeddingModel = getModelNameForTiktoken(TIKTOKEN_ESTIMATE_MODEL);`
			`const encoder = await encodingForModel(embeddingModel);`

			`const encodedListLength = await Promise.all(`
			`list.map(async (text) => encoder.encode(text).length),`
			`);`

			`return encodedListLength.reduce((acc, curr) => acc + curr, 0);`
			`}`

fix(editor): Fix error rendering and indexing of LLM sub-node outputs (#10688) 2024-09-11 07:17:13 -07:00			`async handleLLMEnd(output: LLMResult, runId: string) {`
			`// The fallback should never happen since handleLLMStart should always set the run details`
			`// but just in case, we set the index to the length of the runsMap`
			`const runDetails = this.runsMap[runId] ?? { index: Object.keys(this.runsMap).length };`

refactor: Implement LLM tracing callback to improve parsing of tokens usage stats (#9311) Signed-off-by: Oleg Ivaniv <me@olegivaniv.com> 2024-05-12 12:12:07 -07:00			`output.generations = output.generations.map((gen) =>`
			`gen.map((g) => pick(g, ['text', 'generationInfo'])),`
			`);`

			`const tokenUsageEstimate = {`
			`completionTokens: 0,`
			`promptTokens: 0,`
			`totalTokens: 0,`
			`};`
			`const tokenUsage = this.options.tokensUsageParser(output.llmOutput);`

			`if (output.generations.length > 0) {`
			`tokenUsageEstimate.completionTokens = await this.estimateTokensFromGeneration(`
			`output.generations,`
			`);`

			`tokenUsageEstimate.promptTokens = this.promptTokensEstimate;`
			`tokenUsageEstimate.totalTokens =`
			`tokenUsageEstimate.completionTokens + this.promptTokensEstimate;`
			`}`
			`const response: {`
			`response: { generations: LLMResult['generations'] };`
			`tokenUsageEstimate?: typeof tokenUsageEstimate;`
			`tokenUsage?: typeof tokenUsage;`
			`} = {`
			`response: { generations: output.generations },`
			`};`

			`// If the LLM response contains actual tokens usage, otherwise fallback to the estimate`
			`if (tokenUsage.completionTokens > 0) {`
			`response.tokenUsage = tokenUsage;`
			`} else {`
			`response.tokenUsageEstimate = tokenUsageEstimate;`
			`}`

			`const parsedMessages =`
fix(editor): Fix error rendering and indexing of LLM sub-node outputs (#10688) 2024-09-11 07:17:13 -07:00			`typeof runDetails.messages === 'string'`
			`? runDetails.messages`
			`: runDetails.messages.map((message) => {`
refactor: Implement LLM tracing callback to improve parsing of tokens usage stats (#9311) Signed-off-by: Oleg Ivaniv <me@olegivaniv.com> 2024-05-12 12:12:07 -07:00			`if (typeof message === 'string') return message;`
			`if (typeof message?.toJSON === 'function') return message.toJSON();`

			`return message;`
			`});`

fix(editor): Fix error rendering and indexing of LLM sub-node outputs (#10688) 2024-09-11 07:17:13 -07:00			`this.executionFunctions.addOutputData(this.connectionType, runDetails.index, [`
refactor: Implement LLM tracing callback to improve parsing of tokens usage stats (#9311) Signed-off-by: Oleg Ivaniv <me@olegivaniv.com> 2024-05-12 12:12:07 -07:00			`[{ json: { ...response } }],`
			`]);`
refactor(core): Include AI events in log streaming relay (#10768) 2024-09-12 03:02:47 -07:00			`void logAiEvent(this.executionFunctions, 'ai-llm-generated-output', {`
refactor: Implement LLM tracing callback to improve parsing of tokens usage stats (#9311) Signed-off-by: Oleg Ivaniv <me@olegivaniv.com> 2024-05-12 12:12:07 -07:00			`messages: parsedMessages,`
fix(editor): Fix error rendering and indexing of LLM sub-node outputs (#10688) 2024-09-11 07:17:13 -07:00			`options: runDetails.options,`
refactor: Implement LLM tracing callback to improve parsing of tokens usage stats (#9311) Signed-off-by: Oleg Ivaniv <me@olegivaniv.com> 2024-05-12 12:12:07 -07:00			`response,`
			`});`
			`}`

fix(editor): Fix error rendering and indexing of LLM sub-node outputs (#10688) 2024-09-11 07:17:13 -07:00			`async handleLLMStart(llm: Serialized, prompts: string[], runId: string) {`
refactor: Implement LLM tracing callback to improve parsing of tokens usage stats (#9311) Signed-off-by: Oleg Ivaniv <me@olegivaniv.com> 2024-05-12 12:12:07 -07:00			`const estimatedTokens = await this.estimateTokensFromStringList(prompts);`

			`const options = llm.type === 'constructor' ? llm.kwargs : llm;`
fix(editor): Fix error rendering and indexing of LLM sub-node outputs (#10688) 2024-09-11 07:17:13 -07:00			`const { index } = this.executionFunctions.addInputData(this.connectionType, [`
refactor: Implement LLM tracing callback to improve parsing of tokens usage stats (#9311) Signed-off-by: Oleg Ivaniv <me@olegivaniv.com> 2024-05-12 12:12:07 -07:00			`[`
fix(editor): Fix error rendering and indexing of LLM sub-node outputs (#10688) 2024-09-11 07:17:13 -07:00			`{`
			`json: {`
			`messages: prompts,`
			`estimatedTokens,`
			`options,`
refactor: Implement LLM tracing callback to improve parsing of tokens usage stats (#9311) Signed-off-by: Oleg Ivaniv <me@olegivaniv.com> 2024-05-12 12:12:07 -07:00			`},`
fix(editor): Fix error rendering and indexing of LLM sub-node outputs (#10688) 2024-09-11 07:17:13 -07:00			`},`
refactor: Implement LLM tracing callback to improve parsing of tokens usage stats (#9311) Signed-off-by: Oleg Ivaniv <me@olegivaniv.com> 2024-05-12 12:12:07 -07:00			`],`
fix(editor): Fix error rendering and indexing of LLM sub-node outputs (#10688) 2024-09-11 07:17:13 -07:00			`]);`
refactor: Implement LLM tracing callback to improve parsing of tokens usage stats (#9311) Signed-off-by: Oleg Ivaniv <me@olegivaniv.com> 2024-05-12 12:12:07 -07:00
fix(editor): Fix error rendering and indexing of LLM sub-node outputs (#10688) 2024-09-11 07:17:13 -07:00			// Save the run details for later use when processing `handleLLMEnd` event
			`this.runsMap[runId] = {`
refactor: Implement LLM tracing callback to improve parsing of tokens usage stats (#9311) Signed-off-by: Oleg Ivaniv <me@olegivaniv.com> 2024-05-12 12:12:07 -07:00			`index,`
			`options,`
			`messages: prompts,`
			`};`
			`this.promptTokensEstimate = estimatedTokens;`
			`}`

			`async handleLLMError(`
			`error: IDataObject \| Error,`
			`runId: string,`
			`parentRunId?: string \| undefined,`
			`) {`
			`// Filter out non-x- headers to avoid leaking sensitive information in logs`
			`if (typeof error === 'object' && error?.hasOwnProperty('headers')) {`
			`const errorWithHeaders = error as { headers: Record<string, unknown> };`

			`Object.keys(errorWithHeaders.headers).forEach((key) => {`
			`if (!key.startsWith('x-')) {`
			`delete errorWithHeaders.headers[key];`
			`}`
			`});`
			`}`

refactor(core): Include AI events in log streaming relay (#10768) 2024-09-12 03:02:47 -07:00			`void logAiEvent(this.executionFunctions, 'ai-llm-errored', {`
refactor: Implement LLM tracing callback to improve parsing of tokens usage stats (#9311) Signed-off-by: Oleg Ivaniv <me@olegivaniv.com> 2024-05-12 12:12:07 -07:00			`error: Object.keys(error).length === 0 ? error.toString() : error,`
			`runId,`
			`parentRunId,`
			`});`
			`}`
			`}`