feat(Ollama Chat Model Node): Add aditional Ollama config parameters & fix vision (#9215)

Signed-off-by: Oleg Ivaniv <me@olegivaniv.com>
Co-authored-by: Michael Kret <michael.k@radency.com>
This commit is contained in:
oleg 2024-04-29 13:41:48 +02:00 committed by GitHub
parent 3fbcbce362
commit e17e767e70
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 145 additions and 7 deletions

View file

@ -22,6 +22,7 @@ import { LLMChain } from 'langchain/chains';
import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
import { HumanMessage } from '@langchain/core/messages';
import { ChatGoogleGenerativeAI } from '@langchain/google-genai';
import { ChatOllama } from '@langchain/community/chat_models/ollama';
import { getTemplateNoticeField } from '../../../utils/sharedFields';
import {
getOptionalOutputParsers,
@ -81,7 +82,10 @@ async function getImageMessage(
)) as BaseLanguageModel;
const dataURI = `data:image/jpeg;base64,${bufferData.toString('base64')}`;
const imageUrl = model instanceof ChatGoogleGenerativeAI ? dataURI : { url: dataURI, detail };
const directUriModels = [ChatGoogleGenerativeAI, ChatOllama];
const imageUrl = directUriModels.some((i) => model instanceof i)
? dataURI
: { url: dataURI, detail };
return new HumanMessage({
content: [

View file

@ -7,6 +7,7 @@ import {
type SupplyData,
} from 'n8n-workflow';
import type { ChatOllamaInput } from '@langchain/community/chat_models/ollama';
import { ChatOllama } from '@langchain/community/chat_models/ollama';
import { logWrapper } from '../../../utils/logWrapper';
import { getConnectionHintNoticeField } from '../../../utils/sharedFields';
@ -54,12 +55,13 @@ export class LmChatOllama implements INodeType {
const credentials = await this.getCredentials('ollamaApi');
const modelName = this.getNodeParameter('model', itemIndex) as string;
const options = this.getNodeParameter('options', itemIndex, {}) as object;
const options = this.getNodeParameter('options', itemIndex, {}) as ChatOllamaInput;
const model = new ChatOllama({
...options,
baseUrl: credentials.baseUrl as string,
model: modelName,
...options,
format: options.format === 'default' ? undefined : options.format,
});
return {

View file

@ -76,16 +76,16 @@ export const ollamaOptions: INodeProperties = {
default: 0.7,
typeOptions: { maxValue: 1, minValue: 0, numberPrecision: 1 },
description:
'Controls randomness: Lowering results in less random completions. As the temperature approaches zero, the model will become deterministic and repetitive.',
'Controls the randomness of the generated text. Lower values make the output more focused and deterministic, while higher values make it more diverse and random.',
type: 'number',
},
{
displayName: 'Top K',
name: 'topK',
default: -1,
typeOptions: { maxValue: 1, minValue: -1, numberPrecision: 1 },
typeOptions: { maxValue: 100, minValue: -1, numberPrecision: 1 },
description:
'Used to remove "long tail" low probability responses. Defaults to -1, which disables it.',
'Limits the number of highest probability vocabulary tokens to consider at each step. A higher value increases diversity but may reduce coherence. Set to -1 to disable.',
type: 'number',
},
{
@ -94,8 +94,140 @@ export const ollamaOptions: INodeProperties = {
default: 1,
typeOptions: { maxValue: 1, minValue: 0, numberPrecision: 1 },
description:
'Controls diversity via nucleus sampling: 0.5 means half of all likelihood-weighted options are considered. We generally recommend altering this or temperature but not both.',
'Chooses from the smallest possible set of tokens whose cumulative probability exceeds the probability top_p. Helps generate more human-like text by reducing repetitions.',
type: 'number',
},
{
displayName: 'Frequency Penalty',
name: 'frequencyPenalty',
type: 'number',
default: 0.0,
typeOptions: { minValue: 0 },
description:
'Adjusts the penalty for tokens that have already appeared in the generated text. Higher values discourage repetition.',
},
{
displayName: 'Keep Alive',
name: 'keepAlive',
type: 'string',
default: '5m',
description:
'Specifies the duration to keep the loaded model in memory after use. Useful for frequently used models. Format: 1h30m (1 hour 30 minutes).',
},
{
displayName: 'Low VRAM Mode',
name: 'lowVram',
type: 'boolean',
default: false,
description:
'Whether to Activate low VRAM mode, which reduces memory usage at the cost of slower generation speed. Useful for GPUs with limited memory.',
},
{
displayName: 'Main GPU ID',
name: 'mainGpu',
type: 'number',
default: 0,
description:
'Specifies the ID of the GPU to use for the main computation. Only change this if you have multiple GPUs.',
},
{
displayName: 'Context Batch Size',
name: 'numBatch',
type: 'number',
default: 512,
description:
'Sets the batch size for prompt processing. Larger batch sizes may improve generation speed but increase memory usage.',
},
{
displayName: 'Context Length',
name: 'numCtx',
type: 'number',
default: 2048,
description:
'The maximum number of tokens to use as context for generating the next token. Smaller values reduce memory usage, while larger values provide more context to the model.',
},
{
displayName: 'Number of GPUs',
name: 'numGpu',
type: 'number',
default: -1,
description:
'Specifies the number of GPUs to use for parallel processing. Set to -1 for auto-detection.',
},
{
displayName: 'Max Tokens to Generate',
name: 'numPredict',
type: 'number',
default: -1,
description:
'The maximum number of tokens to generate. Set to -1 for no limit. Be cautious when setting this to a large value, as it can lead to very long outputs.',
},
{
displayName: 'Number of CPU Threads',
name: 'numThread',
type: 'number',
default: 0,
description:
'Specifies the number of CPU threads to use for processing. Set to 0 for auto-detection.',
},
{
displayName: 'Penalize Newlines',
name: 'penalizeNewline',
type: 'boolean',
default: true,
description:
'Whether the model will be less likely to generate newline characters, encouraging longer continuous sequences of text',
},
{
displayName: 'Presence Penalty',
name: 'presencePenalty',
type: 'number',
default: 0.0,
description:
'Adjusts the penalty for tokens based on their presence in the generated text so far. Positive values penalize tokens that have already appeared, encouraging diversity.',
},
{
displayName: 'Repetition Penalty',
name: 'repeatPenalty',
type: 'number',
default: 1.0,
description:
'Adjusts the penalty factor for repeated tokens. Higher values more strongly discourage repetition. Set to 1.0 to disable repetition penalty.',
},
{
displayName: 'Use Memory Locking',
name: 'useMLock',
type: 'boolean',
default: false,
description:
'Whether to lock the model in memory to prevent swapping. This can improve performance but requires sufficient available memory.',
},
{
displayName: 'Use Memory Mapping',
name: 'useMMap',
type: 'boolean',
default: true,
description:
'Whether to use memory mapping for loading the model. This can reduce memory usage but may impact performance. Recommended to keep enabled.',
},
{
displayName: 'Load Vocabulary Only',
name: 'vocabOnly',
type: 'boolean',
default: false,
description:
'Whether to only load the model vocabulary without the weights. Useful for quickly testing tokenization.',
},
{
displayName: 'Output Format',
name: 'format',
type: 'options',
options: [
{ name: 'Default', value: 'default' },
{ name: 'JSON', value: 'json' },
],
default: 'default',
description: 'Specifies the format of the API response',
},
],
};