feat(Ollama Chat Model Node): Add aditional Ollama config parameters & fix vision (#9215)

Signed-off-by: Oleg Ivaniv <me@olegivaniv.com> Co-authored-by: Michael Kret <michael.k@radency.com>
2025-03-05 20:50:17 -08:00 · 2024-04-29 13:41:48 +02:00 · 2024-04-29 13:41:48 +02:00 · e17e767e70
parent 3fbcbce362
commit e17e767e70
3 changed files with 145 additions and 7 deletions
--- a/packages/@n8n/nodes-langchain/nodes/chains/ChainLLM/ChainLlm.node.ts
+++ b/packages/@n8n/nodes-langchain/nodes/chains/ChainLLM/ChainLlm.node.ts
@ -22,6 +22,7 @@ import { LLMChain } from 'langchain/chains';
 import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
 import { HumanMessage } from '@langchain/core/messages';
 import { ChatGoogleGenerativeAI } from '@langchain/google-genai';
+import { ChatOllama } from '@langchain/community/chat_models/ollama';
 import { getTemplateNoticeField } from '../../../utils/sharedFields';
 import {
 	getOptionalOutputParsers,
@ -81,7 +82,10 @@ async function getImageMessage(
 	)) as BaseLanguageModel;
 	const dataURI = `data:image/jpeg;base64,${bufferData.toString('base64')}`;

-	const imageUrl = model instanceof ChatGoogleGenerativeAI ? dataURI : { url: dataURI, detail };
+	const directUriModels = [ChatGoogleGenerativeAI, ChatOllama];
+	const imageUrl = directUriModels.some((i) => model instanceof i)
+		? dataURI
+		: { url: dataURI, detail };

 	return new HumanMessage({
 		content: [
--- a/packages/@n8n/nodes-langchain/nodes/llms/LMChatOllama/LmChatOllama.node.ts
+++ b/packages/@n8n/nodes-langchain/nodes/llms/LMChatOllama/LmChatOllama.node.ts
@ -7,6 +7,7 @@ import {
 	type SupplyData,
 } from 'n8n-workflow';

+import type { ChatOllamaInput } from '@langchain/community/chat_models/ollama';
 import { ChatOllama } from '@langchain/community/chat_models/ollama';
 import { logWrapper } from '../../../utils/logWrapper';
 import { getConnectionHintNoticeField } from '../../../utils/sharedFields';
@ -54,12 +55,13 @@ export class LmChatOllama implements INodeType {
 		const credentials = await this.getCredentials('ollamaApi');

 		const modelName = this.getNodeParameter('model', itemIndex) as string;
-		const options = this.getNodeParameter('options', itemIndex, {}) as object;
+		const options = this.getNodeParameter('options', itemIndex, {}) as ChatOllamaInput;

 		const model = new ChatOllama({
+			...options,
 			baseUrl: credentials.baseUrl as string,
 			model: modelName,
-			...options,
+			format: options.format === 'default' ? undefined : options.format,
 		});

 		return {
--- a/packages/@n8n/nodes-langchain/nodes/llms/LMOllama/description.ts
+++ b/packages/@n8n/nodes-langchain/nodes/llms/LMOllama/description.ts
@ -76,16 +76,16 @@ export const ollamaOptions: INodeProperties = {
 			default: 0.7,
 			typeOptions: { maxValue: 1, minValue: 0, numberPrecision: 1 },
 			description:
-				'Controls randomness: Lowering results in less random completions. As the temperature approaches zero, the model will become deterministic and repetitive.',
+				'Controls the randomness of the generated text. Lower values make the output more focused and deterministic, while higher values make it more diverse and random.',
 			type: 'number',
 		},
 		{
 			displayName: 'Top K',
 			name: 'topK',
 			default: -1,
-			typeOptions: { maxValue: 1, minValue: -1, numberPrecision: 1 },
+			typeOptions: { maxValue: 100, minValue: -1, numberPrecision: 1 },
 			description:
-				'Used to remove "long tail" low probability responses. Defaults to -1, which disables it.',
+				'Limits the number of highest probability vocabulary tokens to consider at each step. A higher value increases diversity but may reduce coherence. Set to -1 to disable.',
 			type: 'number',
 		},
 		{
@ -94,8 +94,140 @@ export const ollamaOptions: INodeProperties = {
 			default: 1,
 			typeOptions: { maxValue: 1, minValue: 0, numberPrecision: 1 },
 			description:
-				'Controls diversity via nucleus sampling: 0.5 means half of all likelihood-weighted options are considered. We generally recommend altering this or temperature but not both.',
+				'Chooses from the smallest possible set of tokens whose cumulative probability exceeds the probability top_p. Helps generate more human-like text by reducing repetitions.',
 			type: 'number',
 		},
+		{
+			displayName: 'Frequency Penalty',
+			name: 'frequencyPenalty',
+			type: 'number',
+			default: 0.0,
+			typeOptions: { minValue: 0 },
+			description:
+				'Adjusts the penalty for tokens that have already appeared in the generated text. Higher values discourage repetition.',
+		},
+		{
+			displayName: 'Keep Alive',
+			name: 'keepAlive',
+			type: 'string',
+			default: '5m',
+			description:
+				'Specifies the duration to keep the loaded model in memory after use. Useful for frequently used models. Format: 1h30m (1 hour 30 minutes).',
+		},
+		{
+			displayName: 'Low VRAM Mode',
+			name: 'lowVram',
+			type: 'boolean',
+			default: false,
+			description:
+				'Whether to Activate low VRAM mode, which reduces memory usage at the cost of slower generation speed. Useful for GPUs with limited memory.',
+		},
+		{
+			displayName: 'Main GPU ID',
+			name: 'mainGpu',
+			type: 'number',
+			default: 0,
+			description:
+				'Specifies the ID of the GPU to use for the main computation. Only change this if you have multiple GPUs.',
+		},
+		{
+			displayName: 'Context Batch Size',
+			name: 'numBatch',
+			type: 'number',
+			default: 512,
+			description:
+				'Sets the batch size for prompt processing. Larger batch sizes may improve generation speed but increase memory usage.',
+		},
+		{
+			displayName: 'Context Length',
+			name: 'numCtx',
+			type: 'number',
+			default: 2048,
+			description:
+				'The maximum number of tokens to use as context for generating the next token. Smaller values reduce memory usage, while larger values provide more context to the model.',
+		},
+		{
+			displayName: 'Number of GPUs',
+			name: 'numGpu',
+			type: 'number',
+			default: -1,
+			description:
+				'Specifies the number of GPUs to use for parallel processing. Set to -1 for auto-detection.',
+		},
+		{
+			displayName: 'Max Tokens to Generate',
+			name: 'numPredict',
+			type: 'number',
+			default: -1,
+			description:
+				'The maximum number of tokens to generate. Set to -1 for no limit. Be cautious when setting this to a large value, as it can lead to very long outputs.',
+		},
+		{
+			displayName: 'Number of CPU Threads',
+			name: 'numThread',
+			type: 'number',
+			default: 0,
+			description:
+				'Specifies the number of CPU threads to use for processing. Set to 0 for auto-detection.',
+		},
+		{
+			displayName: 'Penalize Newlines',
+			name: 'penalizeNewline',
+			type: 'boolean',
+			default: true,
+			description:
+				'Whether the model will be less likely to generate newline characters, encouraging longer continuous sequences of text',
+		},
+		{
+			displayName: 'Presence Penalty',
+			name: 'presencePenalty',
+			type: 'number',
+			default: 0.0,
+			description:
+				'Adjusts the penalty for tokens based on their presence in the generated text so far. Positive values penalize tokens that have already appeared, encouraging diversity.',
+		},
+		{
+			displayName: 'Repetition Penalty',
+			name: 'repeatPenalty',
+			type: 'number',
+			default: 1.0,
+			description:
+				'Adjusts the penalty factor for repeated tokens. Higher values more strongly discourage repetition. Set to 1.0 to disable repetition penalty.',
+		},
+		{
+			displayName: 'Use Memory Locking',
+			name: 'useMLock',
+			type: 'boolean',
+			default: false,
+			description:
+				'Whether to lock the model in memory to prevent swapping. This can improve performance but requires sufficient available memory.',
+		},
+		{
+			displayName: 'Use Memory Mapping',
+			name: 'useMMap',
+			type: 'boolean',
+			default: true,
+			description:
+				'Whether to use memory mapping for loading the model. This can reduce memory usage but may impact performance. Recommended to keep enabled.',
+		},
+		{
+			displayName: 'Load Vocabulary Only',
+			name: 'vocabOnly',
+			type: 'boolean',
+			default: false,
+			description:
+				'Whether to only load the model vocabulary without the weights. Useful for quickly testing tokenization.',
+		},
+		{
+			displayName: 'Output Format',
+			name: 'format',
+			type: 'options',
+			options: [
+				{ name: 'Default', value: 'default' },
+				{ name: 'JSON', value: 'json' },
+			],
+			default: 'default',
+			description: 'Specifies the format of the API response',
+		},
 	],
 };