refactor: Implement LLM tracing callback to improve parsing of tokens usage stats (#9311)

Signed-off-by: Oleg Ivaniv <me@olegivaniv.com>
2025-03-05 20:50:17 -08:00 · 2024-05-12 21:12:07 +02:00 · 2024-05-12 21:12:07 +02:00 · 359ade45bc
parent 244520547b
commit 359ade45bc
19 changed files with 282 additions and 111 deletions
--- a/packages/@n8n/nodes-langchain/nodes/llms/LMChatAnthropic/LmChatAnthropic.node.ts
+++ b/packages/@n8n/nodes-langchain/nodes/llms/LMChatAnthropic/LmChatAnthropic.node.ts
@ -9,8 +9,9 @@ import {
 } from 'n8n-workflow';
 import { ChatAnthropic } from '@langchain/anthropic';
-import { logWrapper } from '../../../utils/logWrapper';
+import type { LLMResult } from '@langchain/core/outputs';
 import { getConnectionHintNoticeField } from '../../../utils/sharedFields';
 import { N8nLlmTracing } from '../N8nLlmTracing';
 const modelField: INodeProperties = {
 	displayName: 'Model',
@ -166,6 +167,17 @@ export class LmChatAnthropic implements INodeType {
 			topP: number;
 		};
 		const tokensUsageParser = (llmOutput: LLMResult['llmOutput']) => {
 			const usage = (llmOutput?.usage as { input_tokens: number; output_tokens: number }) ?? {
 				input_tokens: 0,
 				output_tokens: 0,
 			};
 			return {
 				completionTokens: usage.output_tokens,
 				promptTokens: usage.input_tokens,
 				totalTokens: usage.input_tokens + usage.output_tokens,
 			};
 		};
 		const model = new ChatAnthropic({
 			anthropicApiKey: credentials.apiKey as string,
 			modelName,
@ -173,10 +185,11 @@ export class LmChatAnthropic implements INodeType {
 			temperature: options.temperature,
 			topK: options.topK,
 			topP: options.topP,
 			callbacks: [new N8nLlmTracing(this, { tokensUsageParser })],
 		});
 		return {
-			response: logWrapper(model, this),
+			response: model,
 		};
 	}
 }
--- a/packages/@n8n/nodes-langchain/nodes/llms/LMChatOllama/LmChatOllama.node.ts
+++ b/packages/@n8n/nodes-langchain/nodes/llms/LMChatOllama/LmChatOllama.node.ts
@ -9,9 +9,9 @@ import {
 import type { ChatOllamaInput } from '@langchain/community/chat_models/ollama';
 import { ChatOllama } from '@langchain/community/chat_models/ollama';
 import { logWrapper } from '../../../utils/logWrapper';
 import { getConnectionHintNoticeField } from '../../../utils/sharedFields';
 import { ollamaModel, ollamaOptions, ollamaDescription } from '../LMOllama/description';
 import { N8nLlmTracing } from '../N8nLlmTracing';
 export class LmChatOllama implements INodeType {
 	description: INodeTypeDescription = {
@ -62,10 +62,11 @@ export class LmChatOllama implements INodeType {
 			baseUrl: credentials.baseUrl as string,
 			model: modelName,
 			format: options.format === 'default' ? undefined : options.format,
 			callbacks: [new N8nLlmTracing(this)],
 		});
 		return {
-			response: logWrapper(model, this),
+			response: model,
 		};
 	}
 }
--- a/packages/@n8n/nodes-langchain/nodes/llms/LMChatOpenAi/LmChatOpenAi.node.ts
+++ b/packages/@n8n/nodes-langchain/nodes/llms/LMChatOpenAi/LmChatOpenAi.node.ts
@ -8,8 +8,8 @@ import {
 } from 'n8n-workflow';
 import { ChatOpenAI, type ClientOptions } from '@langchain/openai';
 import { logWrapper } from '../../../utils/logWrapper';
 import { getConnectionHintNoticeField } from '../../../utils/sharedFields';
 import { N8nLlmTracing } from '../N8nLlmTracing';
 export class LmChatOpenAi implements INodeType {
 	description: INodeTypeDescription = {
@ -247,6 +247,7 @@ export class LmChatOpenAi implements INodeType {
 			timeout: options.timeout ?? 60000,
 			maxRetries: options.maxRetries ?? 2,
 			configuration,
 			callbacks: [new N8nLlmTracing(this)],
 			modelKwargs: options.responseFormat
 				? {
 						response_format: { type: options.responseFormat },
@ -255,7 +256,7 @@ export class LmChatOpenAi implements INodeType {
 		});
 		return {
-			response: logWrapper(model, this),
+			response: model,
 		};
 	}
 }
--- a/packages/@n8n/nodes-langchain/nodes/llms/LMCohere/LmCohere.node.ts
+++ b/packages/@n8n/nodes-langchain/nodes/llms/LMCohere/LmCohere.node.ts
@ -8,8 +8,8 @@ import {
 } from 'n8n-workflow';
 import { Cohere } from '@langchain/cohere';
 import { logWrapper } from '../../../utils/logWrapper';
 import { getConnectionHintNoticeField } from '../../../utils/sharedFields';
 import { N8nLlmTracing } from '../N8nLlmTracing';
 export class LmCohere implements INodeType {
 	description: INodeTypeDescription = {
@ -97,10 +97,11 @@ export class LmCohere implements INodeType {
 		const model = new Cohere({
 			apiKey: credentials.apiKey as string,
 			...options,
 			callbacks: [new N8nLlmTracing(this)],
 		});
 		return {
-			response: logWrapper(model, this),
+			response: model,
 		};
 	}
 }
--- a/packages/@n8n/nodes-langchain/nodes/llms/LMOllama/LmOllama.node.ts
+++ b/packages/@n8n/nodes-langchain/nodes/llms/LMOllama/LmOllama.node.ts
@ -8,8 +8,8 @@ import {
 } from 'n8n-workflow';
 import { Ollama } from '@langchain/community/llms/ollama';
 import { logWrapper } from '../../../utils/logWrapper';
 import { getConnectionHintNoticeField } from '../../../utils/sharedFields';
 import { N8nLlmTracing } from '../N8nLlmTracing';
 import { ollamaDescription, ollamaModel, ollamaOptions } from './description';
 export class LmOllama implements INodeType {
@ -60,10 +60,11 @@ export class LmOllama implements INodeType {
 			baseUrl: credentials.baseUrl as string,
 			model: modelName,
 			...options,
 			callbacks: [new N8nLlmTracing(this)],
 		});
 		return {
-			response: logWrapper(model, this),
+			response: model,
 		};
 	}
 }
--- a/packages/@n8n/nodes-langchain/nodes/llms/LMOpenAi/LmOpenAi.node.ts
+++ b/packages/@n8n/nodes-langchain/nodes/llms/LMOpenAi/LmOpenAi.node.ts
@ -9,8 +9,8 @@ import type {
 } from 'n8n-workflow';
 import { OpenAI, type ClientOptions } from '@langchain/openai';
 import { logWrapper } from '../../../utils/logWrapper';
 import { getConnectionHintNoticeField } from '../../../utils/sharedFields';
 import { N8nLlmTracing } from '../N8nLlmTracing';
 type LmOpenAiOptions = {
 	baseURL?: string;
@ -240,10 +240,11 @@ export class LmOpenAi implements INodeType {
 			configuration,
 			timeout: options.timeout ?? 60000,
 			maxRetries: options.maxRetries ?? 2,
 			callbacks: [new N8nLlmTracing(this)],
 		});
 		return {
-			response: logWrapper(model, this),
+			response: model,
 		};
 	}
 }
--- a/packages/@n8n/nodes-langchain/nodes/llms/LMOpenHuggingFaceInference/LmOpenHuggingFaceInference.node.ts
+++ b/packages/@n8n/nodes-langchain/nodes/llms/LMOpenHuggingFaceInference/LmOpenHuggingFaceInference.node.ts
@ -8,8 +8,8 @@ import {
 } from 'n8n-workflow';
 import { HuggingFaceInference } from '@langchain/community/llms/hf';
 import { logWrapper } from '../../../utils/logWrapper';
 import { getConnectionHintNoticeField } from '../../../utils/sharedFields';
 import { N8nLlmTracing } from '../N8nLlmTracing';
 export class LmOpenHuggingFaceInference implements INodeType {
 	description: INodeTypeDescription = {
@ -141,10 +141,11 @@ export class LmOpenHuggingFaceInference implements INodeType {
 			model: modelName,
 			apiKey: credentials.apiKey as string,
 			...options,
 			callbacks: [new N8nLlmTracing(this)],
 		});
 		return {
-			response: logWrapper(model, this),
+			response: model,
 		};
 	}
 }
--- a/packages/@n8n/nodes-langchain/nodes/llms/LmChatAwsBedrock/LmChatAwsBedrock.node.ts
+++ b/packages/@n8n/nodes-langchain/nodes/llms/LmChatAwsBedrock/LmChatAwsBedrock.node.ts
@ -7,12 +7,12 @@ import {
 	type SupplyData,
 } from 'n8n-workflow';
 import { BedrockChat } from '@langchain/community/chat_models/bedrock';
 import { logWrapper } from '../../../utils/logWrapper';
 import { getConnectionHintNoticeField } from '../../../utils/sharedFields';
 // Dependencies needed underneath the hood. We add them
 // here only to track where what dependency is used
 import '@aws-sdk/credential-provider-node';
 import '@aws-sdk/client-bedrock-runtime';
 import { N8nLlmTracing } from '../N8nLlmTracing';
 export class LmChatAwsBedrock implements INodeType {
 	description: INodeTypeDescription = {
@ -152,10 +152,11 @@ export class LmChatAwsBedrock implements INodeType {
 				accessKeyId: credentials.accessKeyId as string,
 				sessionToken: credentials.sessionToken as string,
 			},
 			callbacks: [new N8nLlmTracing(this)],
 		});
 		return {
-			response: logWrapper(model, this),
+			response: model,
 		};
 	}
 }
--- a/packages/@n8n/nodes-langchain/nodes/llms/LmChatAzureOpenAi/LmChatAzureOpenAi.node.ts
+++ b/packages/@n8n/nodes-langchain/nodes/llms/LmChatAzureOpenAi/LmChatAzureOpenAi.node.ts
@ -9,8 +9,8 @@ import {
 import type { ClientOptions } from '@langchain/openai';
 import { ChatOpenAI } from '@langchain/openai';
 import { logWrapper } from '../../../utils/logWrapper';
 import { getConnectionHintNoticeField } from '../../../utils/sharedFields';
 import { N8nLlmTracing } from '../N8nLlmTracing';
 export class LmChatAzureOpenAi implements INodeType {
 	description: INodeTypeDescription = {
@ -160,10 +160,11 @@ export class LmChatAzureOpenAi implements INodeType {
 			timeout: options.timeout ?? 60000,
 			maxRetries: options.maxRetries ?? 2,
 			configuration,
 			callbacks: [new N8nLlmTracing(this)],
 		});
 		return {
-			response: logWrapper(model, this),
+			response: model,
 		};
 	}
 }
--- a/packages/@n8n/nodes-langchain/nodes/llms/LmChatGoogleGemini/LmChatGoogleGemini.node.ts
+++ b/packages/@n8n/nodes-langchain/nodes/llms/LmChatGoogleGemini/LmChatGoogleGemini.node.ts
@ -8,8 +8,8 @@ import {
 } from 'n8n-workflow';
 import { ChatGoogleGenerativeAI } from '@langchain/google-genai';
 import type { HarmBlockThreshold, HarmCategory, SafetySetting } from '@google/generative-ai';
 import { logWrapper } from '../../../utils/logWrapper';
 import { getConnectionHintNoticeField } from '../../../utils/sharedFields';
 import { N8nLlmTracing } from '../N8nLlmTracing';
 import { harmCategories, harmThresholds } from './options';
 export class LmChatGoogleGemini implements INodeType {
@ -224,10 +224,11 @@ export class LmChatGoogleGemini implements INodeType {
 			temperature: options.temperature,
 			maxOutputTokens: options.maxOutputTokens,
 			safetySettings,
 			callbacks: [new N8nLlmTracing(this)],
 		});
 		return {
-			response: logWrapper(model, this),
+			response: model,
 		};
 	}
 }
--- a/packages/@n8n/nodes-langchain/nodes/llms/LmChatGooglePalm/LmChatGooglePalm.node.ts
+++ b/packages/@n8n/nodes-langchain/nodes/llms/LmChatGooglePalm/LmChatGooglePalm.node.ts
@ -7,8 +7,8 @@ import {
 	type SupplyData,
 } from 'n8n-workflow';
 import { ChatGooglePaLM } from '@langchain/community/chat_models/googlepalm';
 import { logWrapper } from '../../../utils/logWrapper';
 import { getConnectionHintNoticeField } from '../../../utils/sharedFields';
 import { N8nLlmTracing } from '../N8nLlmTracing';
 export class LmChatGooglePalm implements INodeType {
 	description: INodeTypeDescription = {
@ -156,10 +156,11 @@ export class LmChatGooglePalm implements INodeType {
 			apiKey: credentials.apiKey as string,
 			modelName,
 			...options,
 			callbacks: [new N8nLlmTracing(this)],
 		});
 		return {
-			response: logWrapper(model, this),
+			response: model,
 		};
 	}
 }
--- a/packages/@n8n/nodes-langchain/nodes/llms/LmChatGroq/LmChatGroq.node.ts
+++ b/packages/@n8n/nodes-langchain/nodes/llms/LmChatGroq/LmChatGroq.node.ts
@ -8,8 +8,8 @@ import {
 } from 'n8n-workflow';
 import { ChatGroq } from '@langchain/groq';
 import { logWrapper } from '../../../utils/logWrapper';
 import { getConnectionHintNoticeField } from '../../../utils/sharedFields';
 import { N8nLlmTracing } from '../N8nLlmTracing';
 export class LmChatGroq implements INodeType {
 	description: INodeTypeDescription = {
@ -142,10 +142,11 @@ export class LmChatGroq implements INodeType {
 			modelName,
 			maxTokens: options.maxTokensToSample,
 			temperature: options.temperature,
 			callbacks: [new N8nLlmTracing(this)],
 		});
 		return {
-			response: logWrapper(model, this),
+			response: model,
 		};
 	}
 }
--- a/packages/@n8n/nodes-langchain/nodes/llms/LmChatMistralCloud/LmChatMistralCloud.node.ts
+++ b/packages/@n8n/nodes-langchain/nodes/llms/LmChatMistralCloud/LmChatMistralCloud.node.ts
@ -9,8 +9,8 @@ import {
 import type { ChatMistralAIInput } from '@langchain/mistralai';
 import { ChatMistralAI } from '@langchain/mistralai';
 import { logWrapper } from '../../../utils/logWrapper';
 import { getConnectionHintNoticeField } from '../../../utils/sharedFields';
 import { N8nLlmTracing } from '../N8nLlmTracing';
 export class LmChatMistralCloud implements INodeType {
 	description: INodeTypeDescription = {
@ -188,10 +188,11 @@ export class LmChatMistralCloud implements INodeType {
 			apiKey: credentials.apiKey as string,
 			modelName,
 			...options,
 			callbacks: [new N8nLlmTracing(this)],
 		});
 		return {
-			response: logWrapper(model, this),
+			response: model,
 		};
 	}
 }
--- a/packages/@n8n/nodes-langchain/nodes/llms/LmGooglePalm/LmGooglePalm.node.ts
+++ b/packages/@n8n/nodes-langchain/nodes/llms/LmGooglePalm/LmGooglePalm.node.ts
@ -7,8 +7,8 @@ import {
 	type SupplyData,
 } from 'n8n-workflow';
 import { GooglePaLM } from '@langchain/community/llms/googlepalm';
 import { logWrapper } from '../../../utils/logWrapper';
 import { getConnectionHintNoticeField } from '../../../utils/sharedFields';
 import { N8nLlmTracing } from '../N8nLlmTracing';
 export class LmGooglePalm implements INodeType {
 	description: INodeTypeDescription = {
@ -163,10 +163,11 @@ export class LmGooglePalm implements INodeType {
 			apiKey: credentials.apiKey as string,
 			modelName,
 			...options,
 			callbacks: [new N8nLlmTracing(this)],
 		});
 		return {
-			response: logWrapper(model, this),
+			response: model,
 		};
 	}
 }
--- a/packages/@n8n/nodes-langchain/nodes/llms/N8nLlmTracing.ts
+++ b/packages/@n8n/nodes-langchain/nodes/llms/N8nLlmTracing.ts
@ -0,0 +1,193 @@
 import { BaseCallbackHandler } from '@langchain/core/callbacks/base';
 import { getModelNameForTiktoken } from '@langchain/core/language_models/base';
 import { encodingForModel } from '@langchain/core/utils/tiktoken';
 import type {
 	Serialized,
 	SerializedNotImplemented,
 	SerializedSecret,
 } from '@langchain/core/load/serializable';
 import type { LLMResult } from '@langchain/core/outputs';
 import type { IDataObject, IExecuteFunctions } from 'n8n-workflow';
 import { NodeConnectionType } from 'n8n-workflow';
 import { pick } from 'lodash';
 import type { BaseMessage } from '@langchain/core/messages';
 import type { SerializedFields } from '@langchain/core/dist/load/map_keys';
 import { logAiEvent } from '../../utils/helpers';
 type TokensUsageParser = (llmOutput: LLMResult['llmOutput']) => {
 	completionTokens: number;
 	promptTokens: number;
 	totalTokens: number;
 };
 type LastInput = {
 	index: number;
 	messages: BaseMessage[] | string[] | string;
 	options: SerializedSecret | SerializedNotImplemented | SerializedFields;
 };
 const TIKTOKEN_ESTIMATE_MODEL = 'gpt-3.5-turbo';
 export class N8nLlmTracing extends BaseCallbackHandler {
 	name = 'N8nLlmTracing';
 	executionFunctions: IExecuteFunctions;
 	connectionType = NodeConnectionType.AiLanguageModel;
 	promptTokensEstimate = 0;
 	completionTokensEstimate = 0;
 	lastInput: LastInput = {
 		index: 0,
 		messages: [],
 		options: {},
 	};
 	options = {
 		// Default(OpenAI format) parser
 		tokensUsageParser: (llmOutput: LLMResult['llmOutput']) => {
 			const completionTokens = (llmOutput?.tokenUsage?.completionTokens as number) ?? 0;
 			const promptTokens = (llmOutput?.tokenUsage?.promptTokens as number) ?? 0;
 			return {
 				completionTokens,
 				promptTokens,
 				totalTokens: completionTokens + promptTokens,
 			};
 		},
 	};
 	constructor(
 		executionFunctions: IExecuteFunctions,
 		options?: { tokensUsageParser: TokensUsageParser },
 	) {
 		super();
 		this.executionFunctions = executionFunctions;
 		this.options = { ...this.options, ...options };
 	}
 	async estimateTokensFromGeneration(generations: LLMResult['generations']) {
 		const messages = generations.flatMap((gen) => gen.map((g) => g.text));
 		return await this.estimateTokensFromStringList(messages);
 	}
 	async estimateTokensFromStringList(list: string[]) {
 		const embeddingModel = getModelNameForTiktoken(TIKTOKEN_ESTIMATE_MODEL);
 		const encoder = await encodingForModel(embeddingModel);
 		const encodedListLength = await Promise.all(
 			list.map(async (text) => encoder.encode(text).length),
 		);
 		return encodedListLength.reduce((acc, curr) => acc + curr, 0);
 	}
 	async handleLLMEnd(output: LLMResult) {
 		output.generations = output.generations.map((gen) =>
 			gen.map((g) => pick(g, ['text', 'generationInfo'])),
 		);
 		const tokenUsageEstimate = {
 			completionTokens: 0,
 			promptTokens: 0,
 			totalTokens: 0,
 		};
 		const tokenUsage = this.options.tokensUsageParser(output.llmOutput);
 		if (output.generations.length > 0) {
 			tokenUsageEstimate.completionTokens = await this.estimateTokensFromGeneration(
 				output.generations,
 			);
 			tokenUsageEstimate.promptTokens = this.promptTokensEstimate;
 			tokenUsageEstimate.totalTokens =
 				tokenUsageEstimate.completionTokens + this.promptTokensEstimate;
 		}
 		const response: {
 			response: { generations: LLMResult['generations'] };
 			tokenUsageEstimate?: typeof tokenUsageEstimate;
 			tokenUsage?: typeof tokenUsage;
 		} = {
 			response: { generations: output.generations },
 		};
 		// If the LLM response contains actual tokens usage, otherwise fallback to the estimate
 		if (tokenUsage.completionTokens > 0) {
 			response.tokenUsage = tokenUsage;
 		} else {
 			response.tokenUsageEstimate = tokenUsageEstimate;
 		}
 		const parsedMessages =
 			typeof this.lastInput.messages === 'string'
 				? this.lastInput.messages
 				: this.lastInput.messages.map((message) => {
 						if (typeof message === 'string') return message;
 						if (typeof message?.toJSON === 'function') return message.toJSON();
 						return message;
 					});
 		this.executionFunctions.addOutputData(this.connectionType, this.lastInput.index, [
 			[{ json: { ...response } }],
 		]);
 		void logAiEvent(this.executionFunctions, 'n8n.ai.llm.generated', {
 			messages: parsedMessages,
 			options: this.lastInput.options,
 			response,
 		});
 	}
 	async handleLLMStart(llm: Serialized, prompts: string[]) {
 		const estimatedTokens = await this.estimateTokensFromStringList(prompts);
 		const options = llm.type === 'constructor' ? llm.kwargs : llm;
 		const { index } = this.executionFunctions.addInputData(
 			this.connectionType,
 			[
 				[
 					{
 						json: {
 							messages: prompts,
 							estimatedTokens,
 							options,
 						},
 					},
 				],
 			],
 			this.lastInput.index + 1,
 		);
 		// Save the last input for later use when processing `handleLLMEnd` event
 		this.lastInput = {
 			index,
 			options,
 			messages: prompts,
 		};
 		this.promptTokensEstimate = estimatedTokens;
 	}
 	async handleLLMError(
 		error: IDataObject | Error,
 		runId: string,
 		parentRunId?: string | undefined,
 	) {
 		// Filter out non-x- headers to avoid leaking sensitive information in logs
 		if (typeof error === 'object' && error?.hasOwnProperty('headers')) {
 			const errorWithHeaders = error as { headers: Record<string, unknown> };
 			Object.keys(errorWithHeaders.headers).forEach((key) => {
 				if (!key.startsWith('x-')) {
 					delete errorWithHeaders.headers[key];
 				}
 			});
 		}
 		void logAiEvent(this.executionFunctions, 'n8n.ai.llm.error', {
 			error: Object.keys(error).length === 0 ? error.toString() : error,
 			runId,
 			parentRunId,
 		});
 	}
 }
--- a/packages/@n8n/nodes-langchain/utils/logWrapper.ts
+++ b/packages/@n8n/nodes-langchain/utils/logWrapper.ts
@ -4,20 +4,13 @@ import type { ConnectionTypes, IExecuteFunctions, INodeExecutionData } from 'n8n
 import { Tool } from '@langchain/core/tools';
 import type { BaseMessage } from '@langchain/core/messages';
 import type { InputValues, MemoryVariables, OutputValues } from '@langchain/core/memory';
 import type { ChatResult } from '@langchain/core/outputs';
 import { BaseChatMessageHistory } from '@langchain/core/chat_history';
-import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
+import type { BaseCallbackConfig, Callbacks } from '@langchain/core/callbacks/manager';
 import type {
 	CallbackManagerForLLMRun,
 	BaseCallbackConfig,
 	Callbacks,
 } from '@langchain/core/callbacks/manager';
 import { Embeddings } from '@langchain/core/embeddings';
 import { VectorStore } from '@langchain/core/vectorstores';
 import type { Document } from '@langchain/core/documents';
 import { TextSplitter } from 'langchain/text_splitter';
 import { BaseLLM } from '@langchain/core/language_models/llms';
 import { BaseChatMemory } from '@langchain/community/memory/chat_memory';
 import { BaseRetriever } from '@langchain/core/retrievers';
 import type { FormatInstructionsOptions } from '@langchain/core/output_parsers';
@ -26,7 +19,7 @@ import { isObject } from 'lodash';
 import type { BaseDocumentLoader } from 'langchain/dist/document_loaders/base';
 import { N8nJsonLoader } from './N8nJsonLoader';
 import { N8nBinaryLoader } from './N8nBinaryLoader';
-import { isChatInstance, logAiEvent } from './helpers';
+import { logAiEvent } from './helpers';
 const errorsMap: { [key: string]: { message: string; description: string } } = {
 	'You exceeded your current quota, please check your plan and billing details.': {
@ -115,9 +108,7 @@ export function callMethodSync<T>(
 export function logWrapper(
 	originalInstance:
 		| Tool
 		| BaseChatModel
 		| BaseChatMemory
 		| BaseLLM
 		| BaseChatMessageHistory
 		| BaseOutputParser
 		| BaseRetriever
@ -229,56 +220,6 @@ export function logWrapper(
 				}
 			}
 			// ========== BaseChatModel ==========
 			if (originalInstance instanceof BaseLLM || isChatInstance(originalInstance)) {
 				if (prop === '_generate' && '_generate' in target) {
 					return async (
 						messages: BaseMessage[] & string[],
 						options: any,
 						runManager?: CallbackManagerForLLMRun,
 					): Promise<ChatResult> => {
 						connectionType = NodeConnectionType.AiLanguageModel;
 						const { index } = executeFunctions.addInputData(connectionType, [
 							[{ json: { messages, options } }],
 						]);
 						try {
 							const response = (await callMethodAsync.call(target, {
 								executeFunctions,
 								connectionType,
 								currentNodeRunIndex: index,
 								method: target[prop],
 								arguments: [
 									messages,
 									{ ...options, signal: executeFunctions.getExecutionCancelSignal() },
 									runManager,
 								],
 							})) as ChatResult;
 							const parsedMessages =
 								typeof messages === 'string'
 									? messages
 									: messages.map((message) => {
 											if (typeof message === 'string') return message;
 											if (typeof message?.toJSON === 'function') return message.toJSON();
 											return message;
 									  });
 							void logAiEvent(executeFunctions, 'n8n.ai.llm.generated', {
 								messages: parsedMessages,
 								options,
 								response,
 							});
 							executeFunctions.addOutputData(connectionType, index, [[{ json: { response } }]]);
 							return response;
 						} catch (error) {
 							// Mute AbortError as they are expected
 							if (error?.name === 'AbortError') return { generations: [] };
 							throw error;
 						}
 					};
 				}
 			}
 			// ========== BaseOutputParser ==========
 			if (originalInstance instanceof BaseOutputParser) {
 				if (prop === 'getFormatInstructions' && 'getFormatInstructions' in target) {
--- a/packages/core/src/WorkflowExecute.ts
+++ b/packages/core/src/WorkflowExecute.ts
@ -140,6 +140,10 @@ export class WorkflowExecute {
 		return this.processRunExecutionData(workflow);
 	}
 	static isAbortError(e?: ExecutionBaseError) {
 		return e?.message === 'AbortError';
 	}
 	forceInputNodeExecution(workflow: Workflow): boolean {
 		return workflow.settings.executionOrder !== 'v1';
 	}
@ -834,7 +838,6 @@ export class WorkflowExecute {
 				this.abortController.abort();
 				const fullRunData = this.getFullRunData(startedAt);
 				void this.executeHook('workflowExecuteAfter', [fullRunData]);
 				setTimeout(() => resolve(fullRunData), 10);
 			});
 			// eslint-disable-next-line complexity
@ -1323,12 +1326,14 @@ export class WorkflowExecute {
 							// Add the execution data again so that it can get restarted
 							this.runExecutionData.executionData!.nodeExecutionStack.unshift(executionData);
-
+							// Only execute the nodeExecuteAfter hook if the node did not get aborted
-							await this.executeHook('nodeExecuteAfter', [
+							if (!WorkflowExecute.isAbortError(executionError)) {
-								executionNode.name,
+								await this.executeHook('nodeExecuteAfter', [
-								taskData,
+									executionNode.name,
-								this.runExecutionData,
+									taskData,
-							]);
+									this.runExecutionData,
 								]);
 							}
 							break;
 						}
@ -1770,8 +1775,10 @@ export class WorkflowExecute {
 		}
 		this.moveNodeMetadata();
-
+		// Prevent from running the hook if the error is an abort error as it was already handled
-		await this.executeHook('workflowExecuteAfter', [fullRunData, newStaticData]);
+		if (!WorkflowExecute.isAbortError(executionError)) {
 			await this.executeHook('workflowExecuteAfter', [fullRunData, newStaticData]);
 		}
 		if (closeFunction) {
 			try {
--- a/packages/editor-ui/src/components/RunDataAi/RunDataAiContent.vue
+++ b/packages/editor-ui/src/components/RunDataAi/RunDataAiContent.vue
@ -31,7 +31,7 @@
 						{{
 							$locale.baseText('runData.aiContentBlock.tokens', {
 								interpolate: {
-									count: consumedTokensSum?.totalTokens.toString()!,
+									count: formatTokenUsageCount(consumedTokensSum?.totalTokens ?? 0),
 								},
 							})
 						}}
@ -42,7 +42,7 @@
 									{{
 										$locale.baseText('runData.aiContentBlock.tokens', {
 											interpolate: {
-												count: consumedTokensSum?.promptTokens.toString()!,
+												count: formatTokenUsageCount(consumedTokensSum?.promptTokens ?? 0),
 											},
 										})
 									}}
@ -53,7 +53,7 @@
 									{{
 										$locale.baseText('runData.aiContentBlock.tokens', {
 											interpolate: {
-												count: consumedTokensSum?.completionTokens.toString()!,
+												count: formatTokenUsageCount(consumedTokensSum?.completionTokens ?? 0),
 											},
 										})
 									}}
@ -75,12 +75,7 @@
 import type { IAiData, IAiDataContent } from '@/Interface';
 import { useNodeTypesStore } from '@/stores/nodeTypes.store';
 import { useWorkflowsStore } from '@/stores/workflows.store';
-import type {
+import type { INodeExecutionData, INodeTypeDescription, NodeConnectionType } from 'n8n-workflow';
 	IDataObject,
 	INodeExecutionData,
 	INodeTypeDescription,
 	NodeConnectionType,
 } from 'n8n-workflow';
 import { computed } from 'vue';
 import NodeIcon from '@/components/NodeIcon.vue';
 import AiRunContentBlock from './AiRunContentBlock.vue';
@ -105,12 +100,13 @@ type TokenUsageData = {
 	promptTokens: number;
 	totalTokens: number;
 };
 const consumedTokensSum = computed(() => {
 	// eslint-disable-next-line @typescript-eslint/no-use-before-define
-	const consumedTokensSum1 = outputRun.value?.data?.reduce(
+	const tokenUsage = outputRun.value?.data?.reduce(
 		(acc: TokenUsageData, curr: INodeExecutionData) => {
-			const response = curr.json?.response as IDataObject;
+			const tokenUsageData = (curr.json?.tokenUsage ??
-			const tokenUsageData = (response?.llmOutput as IDataObject)?.tokenUsage as TokenUsageData;
+				curr.json?.tokenUsageEstimate) as TokenUsageData;
 			if (!tokenUsageData) return acc;
@ -127,9 +123,16 @@ const consumedTokensSum = computed(() => {
 		},
 	);
-	return consumedTokensSum1;
+	return tokenUsage;
 });
 const usingTokensEstimates = computed(() => {
 	return outputRun.value?.data?.some((d) => d.json?.tokenUsageEstimate);
 });
 function formatTokenUsageCount(count: number) {
 	return usingTokensEstimates.value ? `~${count}` : count.toString();
 }
 function extractRunMeta(run: IAiDataContent) {
 	const uiNode = workflowsStore.getNodeByName(props.inputData.node);
 	const nodeType = nodeTypesStore.getNodeType(uiNode?.type ?? '');
--- a/packages/workflow/src/Interfaces.ts
+++ b/packages/workflow/src/Interfaces.ts
@ -2020,6 +2020,7 @@ export const eventNamesAiNodes = [
 	'n8n.ai.tool.called',
 	'n8n.ai.vector.store.searched',
 	'n8n.ai.llm.generated',
 	'n8n.ai.llm.error',
 	'n8n.ai.vector.store.populated',
 ] as const;