Compare commits

...

11 Commits

26 changed files with 413 additions and 246 deletions

View File

@@ -10,7 +10,17 @@ const {
validateVisionModel,
} = require('librechat-data-provider');
const { SplitStreamHandler: _Handler } = require('@librechat/agents');
const { Tokenizer, createFetch, createStreamEventHandlers } = require('@librechat/api');
const {
Tokenizer,
createFetch,
matchModelName,
getClaudeHeaders,
getModelMaxTokens,
configureReasoning,
checkPromptCacheSupport,
getModelMaxOutputTokens,
createStreamEventHandlers,
} = require('@librechat/api');
const {
truncateText,
formatMessage,
@@ -19,12 +29,6 @@ const {
parseParamFromPrompt,
createContextHandlers,
} = require('./prompts');
const {
getClaudeHeaders,
configureReasoning,
checkPromptCacheSupport,
} = require('~/server/services/Endpoints/anthropic/helpers');
const { getModelMaxTokens, getModelMaxOutputTokens, matchModelName } = require('~/utils');
const { spendTokens, spendStructuredTokens } = require('~/models/spendTokens');
const { encodeAndFormat } = require('~/server/services/Files/images/encode');
const { sleep } = require('~/server/utils');

View File

@@ -1,4 +1,5 @@
const { google } = require('googleapis');
const { getModelMaxTokens } = require('@librechat/api');
const { concat } = require('@langchain/core/utils/stream');
const { ChatVertexAI } = require('@langchain/google-vertexai');
const { Tokenizer, getSafetySettings } = require('@librechat/api');
@@ -21,7 +22,6 @@ const {
} = require('librechat-data-provider');
const { encodeAndFormat } = require('~/server/services/Files/images');
const { spendTokens } = require('~/models/spendTokens');
const { getModelMaxTokens } = require('~/utils');
const { sleep } = require('~/server/utils');
const { logger } = require('~/config');
const {

View File

@@ -7,7 +7,9 @@ const {
createFetch,
resolveHeaders,
constructAzureURL,
getModelMaxTokens,
genAzureChatCompletion,
getModelMaxOutputTokens,
createStreamEventHandlers,
} = require('@librechat/api');
const {
@@ -31,13 +33,13 @@ const {
titleInstruction,
createContextHandlers,
} = require('./prompts');
const { extractBaseURL, getModelMaxTokens, getModelMaxOutputTokens } = require('~/utils');
const { encodeAndFormat } = require('~/server/services/Files/images/encode');
const { addSpaceIfNeeded, sleep } = require('~/server/utils');
const { spendTokens } = require('~/models/spendTokens');
const { handleOpenAIErrors } = require('./tools/util');
const { summaryBuffer } = require('./memory');
const { runTitleChain } = require('./chains');
const { extractBaseURL } = require('~/utils');
const { tokenSplit } = require('./document');
const BaseClient = require('./BaseClient');
const { createLLM } = require('./llm');

View File

@@ -1,5 +1,5 @@
const { getModelMaxTokens } = require('@librechat/api');
const BaseClient = require('../BaseClient');
const { getModelMaxTokens } = require('../../../utils');
class FakeClient extends BaseClient {
constructor(apiKey, options = {}) {

View File

@@ -1,4 +1,4 @@
const { matchModelName } = require('../utils/tokens');
const { matchModelName } = require('@librechat/api');
const defaultRate = 6;
/**

View File

@@ -1,7 +1,7 @@
const { v4 } = require('uuid');
const { sleep } = require('@librechat/agents');
const { logger } = require('@librechat/data-schemas');
const { sendEvent, getBalanceConfig } = require('@librechat/api');
const { sendEvent, getBalanceConfig, getModelMaxTokens } = require('@librechat/api');
const {
Time,
Constants,
@@ -34,7 +34,6 @@ const { checkBalance } = require('~/models/balanceMethods');
const { getConvo } = require('~/models/Conversation');
const getLogStores = require('~/cache/getLogStores');
const { countTokens } = require('~/server/utils');
const { getModelMaxTokens } = require('~/utils');
const { getOpenAIClient } = require('./helpers');
/**

View File

@@ -1,7 +1,7 @@
const { v4 } = require('uuid');
const { sleep } = require('@librechat/agents');
const { logger } = require('@librechat/data-schemas');
const { sendEvent, getBalanceConfig } = require('@librechat/api');
const { sendEvent, getBalanceConfig, getModelMaxTokens } = require('@librechat/api');
const {
Time,
Constants,
@@ -31,7 +31,6 @@ const { checkBalance } = require('~/models/balanceMethods');
const { getConvo } = require('~/models/Conversation');
const getLogStores = require('~/cache/getLogStores');
const { countTokens } = require('~/server/utils');
const { getModelMaxTokens } = require('~/utils');
const { getOpenAIClient } = require('./helpers');
/**

View File

@@ -1,6 +1,7 @@
const { Providers } = require('@librechat/agents');
const {
primeResources,
getModelMaxTokens,
extractLibreChatParams,
optionalChainWithEmptyCheck,
} = require('@librechat/api');
@@ -17,7 +18,6 @@ const { getProviderConfig } = require('~/server/services/Endpoints');
const { processFiles } = require('~/server/services/Files/process');
const { getFiles, getToolFilesByIds } = require('~/models/File');
const { getConvoFiles } = require('~/models/Conversation');
const { getModelMaxTokens } = require('~/utils');
/**
* @param {object} params

View File

@@ -1,6 +1,6 @@
const { getLLMConfig } = require('@librechat/api');
const { EModelEndpoint } = require('librechat-data-provider');
const { getUserKey, checkUserKeyExpiry } = require('~/server/services/UserService');
const { getLLMConfig } = require('~/server/services/Endpoints/anthropic/llm');
const AnthropicClient = require('~/app/clients/AnthropicClient');
const initializeClient = async ({ req, res, endpointOption, overrideModel, optionsOnly }) => {

View File

@@ -1,3 +1,4 @@
const { getModelMaxTokens } = require('@librechat/api');
const { createContentAggregator } = require('@librechat/agents');
const {
EModelEndpoint,
@@ -7,7 +8,6 @@ const {
const { getDefaultHandlers } = require('~/server/controllers/agents/callbacks');
const getOptions = require('~/server/services/Endpoints/bedrock/options');
const AgentClient = require('~/server/controllers/agents/client');
const { getModelMaxTokens } = require('~/utils');
const initializeClient = async ({ req, res, endpointOption }) => {
if (!endpointOption) {

View File

@@ -1,13 +1,13 @@
const axios = require('axios');
const { Providers } = require('@librechat/agents');
const { logAxiosError } = require('@librechat/api');
const { logger } = require('@librechat/data-schemas');
const { HttpsProxyAgent } = require('https-proxy-agent');
const { logAxiosError, inputSchema, processModelData } = require('@librechat/api');
const { EModelEndpoint, defaultModels, CacheKeys } = require('librechat-data-provider');
const { inputSchema, extractBaseURL, processModelData } = require('~/utils');
const { OllamaClient } = require('~/app/clients/OllamaClient');
const { isUserProvided } = require('~/server/utils');
const getLogStores = require('~/cache/getLogStores');
const { extractBaseURL } = require('~/utils');
/**
* Splits a string by commas and trims each resulting value.

View File

@@ -11,8 +11,8 @@ const {
getAnthropicModels,
} = require('./ModelService');
jest.mock('~/utils', () => {
const originalUtils = jest.requireActual('~/utils');
jest.mock('@librechat/api', () => {
const originalUtils = jest.requireActual('@librechat/api');
return {
...originalUtils,
processModelData: jest.fn((...args) => {
@@ -108,7 +108,7 @@ describe('fetchModels with createTokenConfig true', () => {
beforeEach(() => {
// Clears the mock's history before each test
const _utils = require('~/utils');
const _utils = require('@librechat/api');
axios.get.mockResolvedValue({ data });
});
@@ -120,7 +120,7 @@ describe('fetchModels with createTokenConfig true', () => {
createTokenConfig: true,
});
const { processModelData } = require('~/utils');
const { processModelData } = require('@librechat/api');
expect(processModelData).toHaveBeenCalled();
expect(processModelData).toHaveBeenCalledWith(data);
});

View File

@@ -1,7 +1,7 @@
const axios = require('axios');
const deriveBaseURL = require('./deriveBaseURL');
jest.mock('~/utils', () => {
const originalUtils = jest.requireActual('~/utils');
jest.mock('@librechat/api', () => {
const originalUtils = jest.requireActual('@librechat/api');
return {
...originalUtils,
processModelData: jest.fn((...args) => {

View File

@@ -1,4 +1,3 @@
const tokenHelpers = require('./tokens');
const deriveBaseURL = require('./deriveBaseURL');
const extractBaseURL = require('./extractBaseURL');
const findMessageContent = require('./findMessageContent');
@@ -6,6 +5,5 @@ const findMessageContent = require('./findMessageContent');
module.exports = {
deriveBaseURL,
extractBaseURL,
...tokenHelpers,
findMessageContent,
};

View File

@@ -1,12 +1,12 @@
const { EModelEndpoint } = require('librechat-data-provider');
const {
maxTokensMap,
matchModelName,
processModelData,
getModelMaxTokens,
maxOutputTokensMap,
findMatchingPattern,
getModelMaxTokens,
processModelData,
matchModelName,
maxTokensMap,
} = require('./tokens');
} = require('@librechat/api');
describe('getModelMaxTokens', () => {
test('should return correct tokens for exact match', () => {
@@ -394,7 +394,7 @@ describe('getModelMaxTokens', () => {
});
test('should return correct max output tokens for GPT-5 models', () => {
const { getModelMaxOutputTokens } = require('./tokens');
const { getModelMaxOutputTokens } = require('@librechat/api');
['gpt-5', 'gpt-5-mini', 'gpt-5-nano'].forEach((model) => {
expect(getModelMaxOutputTokens(model)).toBe(maxOutputTokensMap[EModelEndpoint.openAI][model]);
expect(getModelMaxOutputTokens(model, EModelEndpoint.openAI)).toBe(
@@ -407,7 +407,7 @@ describe('getModelMaxTokens', () => {
});
test('should return correct max output tokens for GPT-OSS models', () => {
const { getModelMaxOutputTokens } = require('./tokens');
const { getModelMaxOutputTokens } = require('@librechat/api');
['gpt-oss-20b', 'gpt-oss-120b'].forEach((model) => {
expect(getModelMaxOutputTokens(model)).toBe(maxOutputTokensMap[EModelEndpoint.openAI][model]);
expect(getModelMaxOutputTokens(model, EModelEndpoint.openAI)).toBe(

View File

@@ -1,13 +1,14 @@
const { EModelEndpoint, anthropicSettings } = require('librechat-data-provider');
const { matchModelName } = require('~/utils');
const { logger } = require('~/config');
import { logger } from '@librechat/data-schemas';
import { AnthropicClientOptions } from '@librechat/agents';
import { EModelEndpoint, anthropicSettings } from 'librechat-data-provider';
import { matchModelName } from '~/utils/tokens';
/**
* @param {string} modelName
* @returns {boolean}
*/
function checkPromptCacheSupport(modelName) {
const modelMatch = matchModelName(modelName, EModelEndpoint.anthropic);
function checkPromptCacheSupport(modelName: string): boolean {
const modelMatch = matchModelName(modelName, EModelEndpoint.anthropic) ?? '';
if (
modelMatch.includes('claude-3-5-sonnet-latest') ||
modelMatch.includes('claude-3.5-sonnet-latest')
@@ -31,7 +32,10 @@ function checkPromptCacheSupport(modelName) {
* @param {boolean} supportsCacheControl Whether the model supports cache control
* @returns {AnthropicClientOptions['extendedOptions']['defaultHeaders']|undefined} The headers object or undefined if not applicable
*/
function getClaudeHeaders(model, supportsCacheControl) {
function getClaudeHeaders(
model: string,
supportsCacheControl: boolean,
): Record<string, string> | undefined {
if (!supportsCacheControl) {
return undefined;
}
@@ -72,9 +76,13 @@ function getClaudeHeaders(model, supportsCacheControl) {
* @param {number|null} extendedOptions.thinkingBudget The token budget for thinking
* @returns {Object} Updated request options
*/
function configureReasoning(anthropicInput, extendedOptions = {}) {
function configureReasoning(
anthropicInput: AnthropicClientOptions & { max_tokens?: number },
extendedOptions: { thinking?: boolean; thinkingBudget?: number | null } = {},
): AnthropicClientOptions & { max_tokens?: number } {
const updatedOptions = { ...anthropicInput };
const currentMaxTokens = updatedOptions.max_tokens ?? updatedOptions.maxTokens;
if (
extendedOptions.thinking &&
updatedOptions?.model &&
@@ -82,11 +90,16 @@ function configureReasoning(anthropicInput, extendedOptions = {}) {
/claude-(?:sonnet|opus|haiku)-[4-9]/.test(updatedOptions.model))
) {
updatedOptions.thinking = {
...updatedOptions.thinking,
type: 'enabled',
};
} as { type: 'enabled'; budget_tokens: number };
}
if (updatedOptions.thinking != null && extendedOptions.thinkingBudget != null) {
if (
updatedOptions.thinking != null &&
extendedOptions.thinkingBudget != null &&
updatedOptions.thinking.type === 'enabled'
) {
updatedOptions.thinking = {
...updatedOptions.thinking,
budget_tokens: extendedOptions.thinkingBudget,
@@ -95,9 +108,10 @@ function configureReasoning(anthropicInput, extendedOptions = {}) {
if (
updatedOptions.thinking != null &&
updatedOptions.thinking.type === 'enabled' &&
(currentMaxTokens == null || updatedOptions.thinking.budget_tokens > currentMaxTokens)
) {
const maxTokens = anthropicSettings.maxOutputTokens.reset(updatedOptions.model);
const maxTokens = anthropicSettings.maxOutputTokens.reset(updatedOptions.model ?? '');
updatedOptions.max_tokens = currentMaxTokens ?? maxTokens;
logger.warn(
@@ -115,4 +129,4 @@ function configureReasoning(anthropicInput, extendedOptions = {}) {
return updatedOptions;
}
module.exports = { checkPromptCacheSupport, getClaudeHeaders, configureReasoning };
export { checkPromptCacheSupport, getClaudeHeaders, configureReasoning };

View File

@@ -0,0 +1,2 @@
export * from './helpers';
export * from './llm';

View File

@@ -1,4 +1,4 @@
const { getLLMConfig } = require('~/server/services/Endpoints/anthropic/llm');
import { getLLMConfig } from './llm';
jest.mock('https-proxy-agent', () => ({
HttpsProxyAgent: jest.fn().mockImplementation((proxy) => ({ proxy })),
@@ -25,9 +25,9 @@ describe('getLLMConfig', () => {
});
expect(result.llmConfig.clientOptions).toHaveProperty('fetchOptions');
expect(result.llmConfig.clientOptions.fetchOptions).toHaveProperty('dispatcher');
expect(result.llmConfig.clientOptions.fetchOptions.dispatcher).toBeDefined();
expect(result.llmConfig.clientOptions.fetchOptions.dispatcher.constructor.name).toBe(
expect(result.llmConfig.clientOptions?.fetchOptions).toHaveProperty('dispatcher');
expect(result.llmConfig.clientOptions?.fetchOptions?.dispatcher).toBeDefined();
expect(result.llmConfig.clientOptions?.fetchOptions?.dispatcher.constructor.name).toBe(
'ProxyAgent',
);
});
@@ -93,9 +93,10 @@ describe('getLLMConfig', () => {
};
const result = getLLMConfig('test-key', { modelOptions });
const clientOptions = result.llmConfig.clientOptions;
expect(clientOptions.defaultHeaders).toBeDefined();
expect(clientOptions.defaultHeaders).toHaveProperty('anthropic-beta');
expect(clientOptions.defaultHeaders['anthropic-beta']).toBe(
expect(clientOptions?.defaultHeaders).toBeDefined();
expect(clientOptions?.defaultHeaders).toHaveProperty('anthropic-beta');
const defaultHeaders = clientOptions?.defaultHeaders as Record<string, string>;
expect(defaultHeaders['anthropic-beta']).toBe(
'prompt-caching-2024-07-31,context-1m-2025-08-07',
);
});
@@ -111,9 +112,10 @@ describe('getLLMConfig', () => {
const modelOptions = { model, promptCache: true };
const result = getLLMConfig('test-key', { modelOptions });
const clientOptions = result.llmConfig.clientOptions;
expect(clientOptions.defaultHeaders).toBeDefined();
expect(clientOptions.defaultHeaders).toHaveProperty('anthropic-beta');
expect(clientOptions.defaultHeaders['anthropic-beta']).toBe(
expect(clientOptions?.defaultHeaders).toBeDefined();
expect(clientOptions?.defaultHeaders).toHaveProperty('anthropic-beta');
const defaultHeaders = clientOptions?.defaultHeaders as Record<string, string>;
expect(defaultHeaders['anthropic-beta']).toBe(
'prompt-caching-2024-07-31,context-1m-2025-08-07',
);
});
@@ -211,13 +213,13 @@ describe('getLLMConfig', () => {
it('should handle empty modelOptions', () => {
expect(() => {
getLLMConfig('test-api-key', {});
}).toThrow("Cannot read properties of undefined (reading 'thinking')");
}).toThrow('No modelOptions provided');
});
it('should handle no options parameter', () => {
expect(() => {
getLLMConfig('test-api-key');
}).toThrow("Cannot read properties of undefined (reading 'thinking')");
}).toThrow('No modelOptions provided');
});
it('should handle temperature, stop sequences, and stream settings', () => {
@@ -254,9 +256,9 @@ describe('getLLMConfig', () => {
});
expect(result.llmConfig.clientOptions).toHaveProperty('fetchOptions');
expect(result.llmConfig.clientOptions.fetchOptions).toHaveProperty('dispatcher');
expect(result.llmConfig.clientOptions.fetchOptions.dispatcher).toBeDefined();
expect(result.llmConfig.clientOptions.fetchOptions.dispatcher.constructor.name).toBe(
expect(result.llmConfig.clientOptions?.fetchOptions).toHaveProperty('dispatcher');
expect(result.llmConfig.clientOptions?.fetchOptions?.dispatcher).toBeDefined();
expect(result.llmConfig.clientOptions?.fetchOptions?.dispatcher.constructor.name).toBe(
'ProxyAgent',
);
expect(result.llmConfig.clientOptions).toHaveProperty('baseURL', 'https://reverse-proxy.com');
@@ -272,7 +274,7 @@ describe('getLLMConfig', () => {
});
// claude-3-5-sonnet supports prompt caching and should get the appropriate headers
expect(result.llmConfig.clientOptions.defaultHeaders).toEqual({
expect(result.llmConfig.clientOptions?.defaultHeaders).toEqual({
'anthropic-beta': 'max-tokens-3-5-sonnet-2024-07-15,prompt-caching-2024-07-31',
});
});

View File

@@ -1,6 +1,12 @@
const { ProxyAgent } = require('undici');
const { anthropicSettings, removeNullishValues } = require('librechat-data-provider');
const { checkPromptCacheSupport, getClaudeHeaders, configureReasoning } = require('./helpers');
import { ProxyAgent } from 'undici';
import { AnthropicClientOptions } from '@librechat/agents';
import { anthropicSettings, removeNullishValues } from 'librechat-data-provider';
import type {
AnthropicConfigOptions,
AnthropicLLMConfigResult,
AnthropicParameters,
} from '~/types/anthropic';
import { checkPromptCacheSupport, getClaudeHeaders, configureReasoning } from './helpers';
/**
* Generates configuration options for creating an Anthropic language model (LLM) instance.
@@ -21,25 +27,42 @@ const { checkPromptCacheSupport, getClaudeHeaders, configureReasoning } = requir
*
* @returns {Object} Configuration options for creating an Anthropic LLM instance, with null and undefined values removed.
*/
function getLLMConfig(apiKey, options = {}) {
function getLLMConfig(
apiKey?: string,
options: AnthropicConfigOptions = {} as AnthropicConfigOptions,
): AnthropicLLMConfigResult {
const systemOptions = {
thinking: options.modelOptions.thinking ?? anthropicSettings.thinking.default,
promptCache: options.modelOptions.promptCache ?? anthropicSettings.promptCache.default,
thinkingBudget: options.modelOptions.thinkingBudget ?? anthropicSettings.thinkingBudget.default,
thinking: options.modelOptions?.thinking ?? anthropicSettings.thinking.default,
promptCache: options.modelOptions?.promptCache ?? anthropicSettings.promptCache.default,
thinkingBudget:
options.modelOptions?.thinkingBudget ?? anthropicSettings.thinkingBudget.default,
};
for (let key in systemOptions) {
delete options.modelOptions[key];
/** Couldn't figure out a way to still loop through the object while deleting the overlapping keys when porting this
* over from javascript, so for now they are being deleted manually until a better way presents itself.
*/
if (options.modelOptions) {
delete options.modelOptions.thinking;
delete options.modelOptions.promptCache;
delete options.modelOptions.thinkingBudget;
} else {
throw new Error('No modelOptions provided');
}
const defaultOptions = {
model: anthropicSettings.model.default,
maxOutputTokens: anthropicSettings.maxOutputTokens.default,
stream: true,
};
const mergedOptions = Object.assign(defaultOptions, options.modelOptions);
const mergedOptions = Object.assign(
defaultOptions,
options.modelOptions,
) as typeof defaultOptions &
Partial<AnthropicParameters> & { stop?: string[]; web_search?: boolean };
/** @type {AnthropicClientOptions} */
let requestOptions = {
let requestOptions: AnthropicClientOptions & { stream?: boolean } = {
apiKey,
model: mergedOptions.model,
stream: mergedOptions.stream,
@@ -66,20 +89,20 @@ function getLLMConfig(apiKey, options = {}) {
}
const supportsCacheControl =
systemOptions.promptCache === true && checkPromptCacheSupport(requestOptions.model);
const headers = getClaudeHeaders(requestOptions.model, supportsCacheControl);
if (headers) {
systemOptions.promptCache === true && checkPromptCacheSupport(requestOptions.model ?? '');
const headers = getClaudeHeaders(requestOptions.model ?? '', supportsCacheControl);
if (headers && requestOptions.clientOptions) {
requestOptions.clientOptions.defaultHeaders = headers;
}
if (options.proxy) {
if (options.proxy && requestOptions.clientOptions) {
const proxyAgent = new ProxyAgent(options.proxy);
requestOptions.clientOptions.fetchOptions = {
dispatcher: proxyAgent,
};
}
if (options.reverseProxyUrl) {
if (options.reverseProxyUrl && requestOptions.clientOptions) {
requestOptions.clientOptions.baseURL = options.reverseProxyUrl;
requestOptions.anthropicApiUrl = options.reverseProxyUrl;
}
@@ -96,8 +119,10 @@ function getLLMConfig(apiKey, options = {}) {
return {
tools,
/** @type {AnthropicClientOptions} */
llmConfig: removeNullishValues(requestOptions),
llmConfig: removeNullishValues(
requestOptions as Record<string, unknown>,
) as AnthropicClientOptions,
};
}
module.exports = { getLLMConfig };
export { getLLMConfig };

View File

@@ -1,3 +1,4 @@
export * from './custom';
export * from './google';
export * from './openai';
export * from './anthropic';

View File

@@ -1,10 +1,12 @@
import { ProxyAgent } from 'undici';
import { Providers } from '@librechat/agents';
import { KnownEndpoints, removeNullishValues } from 'librechat-data-provider';
import type { AnthropicClientOptions } from '@librechat/agents';
import { KnownEndpoints, removeNullishValues, EModelEndpoint } from 'librechat-data-provider';
import type { BindToolsInput } from '@langchain/core/language_models/chat_models';
import type { AzureOpenAIInput } from '@langchain/openai';
import type { OpenAI } from 'openai';
import type * as t from '~/types';
import { getLLMConfig as getAnthropicLLMConfig } from '~/endpoints/anthropic/llm';
import { sanitizeModelName, constructAzureURL } from '~/utils/azure';
import { createFetch } from '~/utils/generators';
import { isEnabled } from '~/utils/common';
@@ -80,6 +82,134 @@ function hasReasoningParams({
);
}
function getOpenAILLMConfig({
streaming,
modelOptions,
addParams,
dropParams,
}: {
streaming: boolean;
modelOptions: Partial<t.OpenAIParameters>;
addParams?: Record<string, unknown>;
dropParams?: string[];
}): {
llmConfig: Partial<t.ClientOptions> & Partial<t.OpenAIParameters> & Partial<AzureOpenAIInput>;
tools: BindToolsInput[];
} {
const { reasoning_effort, reasoning_summary, verbosity, web_search, ...restModelOptions } =
modelOptions;
const llmConfig = Object.assign(
{
streaming,
model: restModelOptions.model ?? '',
},
restModelOptions,
) as Partial<t.ClientOptions> & Partial<t.OpenAIParameters> & Partial<AzureOpenAIInput>;
const modelKwargs: Record<string, unknown> = {};
let hasModelKwargs = false;
if (verbosity != null && verbosity !== '') {
modelKwargs.verbosity = verbosity;
hasModelKwargs = true;
}
if (addParams && typeof addParams === 'object') {
for (const [key, value] of Object.entries(addParams)) {
if (knownOpenAIParams.has(key)) {
(llmConfig as Record<string, unknown>)[key] = value;
} else {
hasModelKwargs = true;
modelKwargs[key] = value;
}
}
}
if (
hasReasoningParams({ reasoning_effort, reasoning_summary }) &&
llmConfig.useResponsesApi === true
) {
llmConfig.reasoning = removeNullishValues(
{
effort: reasoning_effort,
summary: reasoning_summary,
},
true,
) as OpenAI.Reasoning;
} else if (hasReasoningParams({ reasoning_effort })) {
llmConfig.reasoning_effort = reasoning_effort;
}
if (llmConfig.max_tokens != null) {
llmConfig.maxTokens = llmConfig.max_tokens;
delete llmConfig.max_tokens;
}
const tools: BindToolsInput[] = [];
if (web_search) {
llmConfig.useResponsesApi = true;
tools.push({ type: 'web_search_preview' });
}
/**
* Note: OpenAI Web Search models do not support any known parameters besides `max_tokens`
*/
if (modelOptions.model && /gpt-4o.*search/.test(modelOptions.model as string)) {
const searchExcludeParams = [
'frequency_penalty',
'presence_penalty',
'reasoning',
'reasoning_effort',
'temperature',
'top_p',
'top_k',
'stop',
'logit_bias',
'seed',
'response_format',
'n',
'logprobs',
'user',
];
const updatedDropParams = dropParams || [];
const combinedDropParams = [...new Set([...updatedDropParams, ...searchExcludeParams])];
combinedDropParams.forEach((param) => {
if (param in llmConfig) {
delete llmConfig[param as keyof t.ClientOptions];
}
});
} else if (dropParams && Array.isArray(dropParams)) {
dropParams.forEach((param) => {
if (param in llmConfig) {
delete llmConfig[param as keyof t.ClientOptions];
}
});
}
if (modelKwargs.verbosity && llmConfig.useResponsesApi === true) {
modelKwargs.text = { verbosity: modelKwargs.verbosity };
delete modelKwargs.verbosity;
}
if (llmConfig.model && /\bgpt-[5-9]\b/i.test(llmConfig.model) && llmConfig.maxTokens != null) {
const paramName =
llmConfig.useResponsesApi === true ? 'max_output_tokens' : 'max_completion_tokens';
modelKwargs[paramName] = llmConfig.maxTokens;
delete llmConfig.maxTokens;
hasModelKwargs = true;
}
if (hasModelKwargs) {
llmConfig.modelKwargs = modelKwargs;
}
return { llmConfig, tools };
}
/**
* Generates configuration options for creating a language model (LLM) instance.
* @param apiKey - The API key for authentication.
@@ -104,34 +234,30 @@ export function getOpenAIConfig(
addParams,
dropParams,
} = options;
const { reasoning_effort, reasoning_summary, verbosity, ...modelOptions } = _modelOptions;
const llmConfig: Partial<t.ClientOptions> &
Partial<t.OpenAIParameters> &
Partial<AzureOpenAIInput> = Object.assign(
{
let llmConfig:
| (Partial<t.ClientOptions> & Partial<t.OpenAIParameters> & Partial<AzureOpenAIInput>)
| AnthropicClientOptions;
let tools: BindToolsInput[];
if (options.customParams?.defaultParamsEndpoint === EModelEndpoint.anthropic) {
const anthropicResult = getAnthropicLLMConfig(apiKey, {
modelOptions: _modelOptions,
userId: options.userId || '',
proxy: options.proxy,
reverseProxyUrl: options.reverseProxyUrl,
});
llmConfig = anthropicResult.llmConfig;
tools = anthropicResult.tools;
} else {
const openaiResult = getOpenAILLMConfig({
streaming,
model: modelOptions.model ?? '',
},
modelOptions,
);
const modelKwargs: Record<string, unknown> = {};
let hasModelKwargs = false;
if (verbosity != null && verbosity !== '') {
modelKwargs.verbosity = verbosity;
hasModelKwargs = true;
}
if (addParams && typeof addParams === 'object') {
for (const [key, value] of Object.entries(addParams)) {
if (knownOpenAIParams.has(key)) {
(llmConfig as Record<string, unknown>)[key] = value;
} else {
hasModelKwargs = true;
modelKwargs[key] = value;
}
}
modelOptions: _modelOptions,
addParams,
dropParams,
});
llmConfig = openaiResult.llmConfig;
tools = openaiResult.tools;
}
let useOpenRouter = false;
@@ -234,87 +360,6 @@ export function getOpenAIConfig(
configOptions.organization = process.env.OPENAI_ORGANIZATION;
}
if (
hasReasoningParams({ reasoning_effort, reasoning_summary }) &&
(llmConfig.useResponsesApi === true || useOpenRouter)
) {
llmConfig.reasoning = removeNullishValues(
{
effort: reasoning_effort,
summary: reasoning_summary,
},
true,
) as OpenAI.Reasoning;
} else if (hasReasoningParams({ reasoning_effort })) {
llmConfig.reasoning_effort = reasoning_effort;
}
if (llmConfig.max_tokens != null) {
llmConfig.maxTokens = llmConfig.max_tokens;
delete llmConfig.max_tokens;
}
const tools: BindToolsInput[] = [];
if (modelOptions.web_search) {
llmConfig.useResponsesApi = true;
tools.push({ type: 'web_search_preview' });
}
/**
* Note: OpenAI Web Search models do not support any known parameters besides `max_tokens`
*/
if (modelOptions.model && /gpt-4o.*search/.test(modelOptions.model)) {
const searchExcludeParams = [
'frequency_penalty',
'presence_penalty',
'reasoning',
'reasoning_effort',
'temperature',
'top_p',
'top_k',
'stop',
'logit_bias',
'seed',
'response_format',
'n',
'logprobs',
'user',
];
const updatedDropParams = dropParams || [];
const combinedDropParams = [...new Set([...updatedDropParams, ...searchExcludeParams])];
combinedDropParams.forEach((param) => {
if (param in llmConfig) {
delete llmConfig[param as keyof t.ClientOptions];
}
});
} else if (dropParams && Array.isArray(dropParams)) {
dropParams.forEach((param) => {
if (param in llmConfig) {
delete llmConfig[param as keyof t.ClientOptions];
}
});
}
if (modelKwargs.verbosity && llmConfig.useResponsesApi === true) {
modelKwargs.text = { verbosity: modelKwargs.verbosity };
delete modelKwargs.verbosity;
}
if (llmConfig.model && /\bgpt-[5-9]\b/i.test(llmConfig.model) && llmConfig.maxTokens != null) {
const paramName =
llmConfig.useResponsesApi === true ? 'max_output_tokens' : 'max_completion_tokens';
modelKwargs[paramName] = llmConfig.maxTokens;
delete llmConfig.maxTokens;
hasModelKwargs = true;
}
if (hasModelKwargs) {
llmConfig.modelKwargs = modelKwargs;
}
if (directEndpoint === true && configOptions?.baseURL != null) {
configOptions.fetch = createFetch({
directEndpoint: directEndpoint,

View File

@@ -0,0 +1,36 @@
import { z } from 'zod';
import { Dispatcher } from 'undici';
import { anthropicSchema } from 'librechat-data-provider';
import { AnthropicClientOptions } from '@librechat/agents';
export type AnthropicParameters = z.infer<typeof anthropicSchema>;
/**
* Configuration options for the getLLMConfig function
*/
export interface AnthropicConfigOptions {
modelOptions?: Partial<AnthropicParameters>;
/** The user ID for tracking and personalization */
userId?: string;
/** Proxy server URL */
proxy?: string;
/** URL for a reverse proxy, if used */
reverseProxyUrl?: string;
}
/**
* Return type for getLLMConfig function
*/
export interface AnthropicLLMConfigResult {
/** Configuration options for creating an Anthropic LLM instance */
llmConfig: AnthropicClientOptions & {
clientOptions?: {
fetchOptions?: { dispatcher: Dispatcher };
};
};
/** Array of tools to be used */
tools: Array<{
type: string;
name?: string;
}>;
}

View File

@@ -22,6 +22,10 @@ export interface OpenAIConfigOptions {
streaming?: boolean;
addParams?: Record<string, unknown>;
dropParams?: string[];
customParams?: {
defaultParamsEndpoint?: string;
};
userId?: string;
}
export type OpenAIConfiguration = OpenAIClientOptions['configuration'];

View File

@@ -15,3 +15,4 @@ export * from './text';
export { default as Tokenizer } from './tokenizer';
export * from './yaml';
export * from './http';
export * from './tokens';

View File

@@ -1,5 +1,23 @@
const z = require('zod');
const { EModelEndpoint } = require('librechat-data-provider');
import z from 'zod';
import { EModelEndpoint } from 'librechat-data-provider';
/** Configuration object mapping model keys to their respective prompt, completion rates, and context limit
*
* Note: the [key: string]: unknown is not in the original JSDoc typedef in /api/typedefs.js, but I've included it since
* getModelMaxOutputTokens calls getModelTokenValue with a key of 'output', which was not in the original JSDoc typedef,
* but would be referenced in a TokenConfig in the if(matchedPattern) portion of getModelTokenValue.
* So in order to preserve functionality for that case and any others which might reference an additional key I'm unaware of,
* I've included it here until the interface can be typed more tightly.
*/
export interface TokenConfig {
prompt: number;
completion: number;
context: number;
[key: string]: unknown;
}
/** An endpoint's config object mapping model keys to their respective prompt, completion rates, and context limit */
export type EndpointTokenConfig = Record<string, TokenConfig>;
const openAIModels = {
'o4-mini': 200000,
@@ -242,7 +260,7 @@ const aggregateModels = {
'gpt-oss-120b': 131000,
};
const maxTokensMap = {
export const maxTokensMap = {
[EModelEndpoint.azureOpenAI]: openAIModels,
[EModelEndpoint.openAI]: aggregateModels,
[EModelEndpoint.agents]: aggregateModels,
@@ -252,7 +270,7 @@ const maxTokensMap = {
[EModelEndpoint.bedrock]: bedrockModels,
};
const modelMaxOutputs = {
export const modelMaxOutputs = {
o1: 32268, // -500 from max: 32,768
'o1-mini': 65136, // -500 from max: 65,536
'o1-preview': 32268, // -500 from max: 32,768
@@ -261,7 +279,7 @@ const modelMaxOutputs = {
'gpt-5-nano': 128000,
'gpt-oss-20b': 131000,
'gpt-oss-120b': 131000,
system_default: 1024,
system_default: 32000,
};
/** Outputs from https://docs.anthropic.com/en/docs/about-claude/models/all-models#model-names */
@@ -277,7 +295,7 @@ const anthropicMaxOutputs = {
'claude-3-7-sonnet': 128000,
};
const maxOutputTokensMap = {
export const maxOutputTokensMap = {
[EModelEndpoint.anthropic]: anthropicMaxOutputs,
[EModelEndpoint.azureOpenAI]: modelMaxOutputs,
[EModelEndpoint.openAI]: modelMaxOutputs,
@@ -287,10 +305,13 @@ const maxOutputTokensMap = {
/**
* Finds the first matching pattern in the tokens map.
* @param {string} modelName
* @param {Record<string, number>} tokensMap
* @param {Record<string, number> | EndpointTokenConfig} tokensMap
* @returns {string|null}
*/
function findMatchingPattern(modelName, tokensMap) {
export function findMatchingPattern(
modelName: string,
tokensMap: Record<string, number> | EndpointTokenConfig,
): string | null {
const keys = Object.keys(tokensMap);
for (let i = keys.length - 1; i >= 0; i--) {
const modelKey = keys[i];
@@ -305,57 +326,79 @@ function findMatchingPattern(modelName, tokensMap) {
/**
* Retrieves a token value for a given model name from a tokens map.
*
* @param {string} modelName - The name of the model to look up.
* @param {EndpointTokenConfig | Record<string, number>} tokensMap - The map of model names to token values.
* @param {string} [key='context'] - The key to look up in the tokens map.
* @returns {number|undefined} The token value for the given model or undefined if no match is found.
* @param modelName - The name of the model to look up.
* @param tokensMap - The map of model names to token values.
* @param [key='context'] - The key to look up in the tokens map.
* @returns The token value for the given model or undefined if no match is found.
*/
function getModelTokenValue(modelName, tokensMap, key = 'context') {
export function getModelTokenValue(
modelName: string,
tokensMap?: EndpointTokenConfig | Record<string, number>,
key = 'context' as keyof TokenConfig,
): number | undefined {
if (typeof modelName !== 'string' || !tokensMap) {
return undefined;
}
if (tokensMap[modelName]?.context) {
return tokensMap[modelName].context;
const value = tokensMap[modelName];
if (typeof value === 'number') {
return value;
}
if (tokensMap[modelName]) {
return tokensMap[modelName];
if (value?.context) {
return value.context;
}
const matchedPattern = findMatchingPattern(modelName, tokensMap);
if (matchedPattern) {
const result = tokensMap[matchedPattern];
return result?.[key] ?? result ?? tokensMap.system_default;
if (typeof result === 'number') {
return result;
}
const tokenValue = result?.[key];
if (typeof tokenValue === 'number') {
return tokenValue;
}
return tokensMap.system_default as number | undefined;
}
return tokensMap.system_default;
return tokensMap.system_default as number | undefined;
}
/**
* Retrieves the maximum tokens for a given model name.
*
* @param {string} modelName - The name of the model to look up.
* @param {string} endpoint - The endpoint (default is 'openAI').
* @param {EndpointTokenConfig} [endpointTokenConfig] - Token Config for current endpoint to use for max tokens lookup
* @returns {number|undefined} The maximum tokens for the given model or undefined if no match is found.
* @param modelName - The name of the model to look up.
* @param endpoint - The endpoint (default is 'openAI').
* @param [endpointTokenConfig] - Token Config for current endpoint to use for max tokens lookup
* @returns The maximum tokens for the given model or undefined if no match is found.
*/
function getModelMaxTokens(modelName, endpoint = EModelEndpoint.openAI, endpointTokenConfig) {
const tokensMap = endpointTokenConfig ?? maxTokensMap[endpoint];
export function getModelMaxTokens(
modelName: string,
endpoint = EModelEndpoint.openAI,
endpointTokenConfig?: EndpointTokenConfig,
): number | undefined {
const tokensMap = endpointTokenConfig ?? maxTokensMap[endpoint as keyof typeof maxTokensMap];
return getModelTokenValue(modelName, tokensMap);
}
/**
* Retrieves the maximum output tokens for a given model name.
*
* @param {string} modelName - The name of the model to look up.
* @param {string} endpoint - The endpoint (default is 'openAI').
* @param {EndpointTokenConfig} [endpointTokenConfig] - Token Config for current endpoint to use for max tokens lookup
* @returns {number|undefined} The maximum output tokens for the given model or undefined if no match is found.
* @param modelName - The name of the model to look up.
* @param endpoint - The endpoint (default is 'openAI').
* @param [endpointTokenConfig] - Token Config for current endpoint to use for max tokens lookup
* @returns The maximum output tokens for the given model or undefined if no match is found.
*/
function getModelMaxOutputTokens(modelName, endpoint = EModelEndpoint.openAI, endpointTokenConfig) {
const tokensMap = endpointTokenConfig ?? maxOutputTokensMap[endpoint];
export function getModelMaxOutputTokens(
modelName: string,
endpoint = EModelEndpoint.openAI,
endpointTokenConfig?: EndpointTokenConfig,
): number | undefined {
const tokensMap =
endpointTokenConfig ?? maxOutputTokensMap[endpoint as keyof typeof maxOutputTokensMap];
return getModelTokenValue(modelName, tokensMap, 'output');
}
@@ -363,21 +406,24 @@ function getModelMaxOutputTokens(modelName, endpoint = EModelEndpoint.openAI, en
* Retrieves the model name key for a given model name input. If the exact model name isn't found,
* it searches for partial matches within the model name, checking keys in reverse order.
*
* @param {string} modelName - The name of the model to look up.
* @param {string} endpoint - The endpoint (default is 'openAI').
* @returns {string|undefined} The model name key for the given model; returns input if no match is found and is string.
* @param modelName - The name of the model to look up.
* @param endpoint - The endpoint (default is 'openAI').
* @returns The model name key for the given model; returns input if no match is found and is string.
*
* @example
* matchModelName('gpt-4-32k-0613'); // Returns 'gpt-4-32k-0613'
* matchModelName('gpt-4-32k-unknown'); // Returns 'gpt-4-32k'
* matchModelName('unknown-model'); // Returns undefined
*/
function matchModelName(modelName, endpoint = EModelEndpoint.openAI) {
export function matchModelName(
modelName: string,
endpoint = EModelEndpoint.openAI,
): string | undefined {
if (typeof modelName !== 'string') {
return undefined;
}
const tokensMap = maxTokensMap[endpoint];
const tokensMap: Record<string, number> = maxTokensMap[endpoint as keyof typeof maxTokensMap];
if (!tokensMap) {
return modelName;
}
@@ -390,7 +436,7 @@ function matchModelName(modelName, endpoint = EModelEndpoint.openAI) {
return matchedPattern || modelName;
}
const modelSchema = z.object({
export const modelSchema = z.object({
id: z.string(),
pricing: z.object({
prompt: z.string(),
@@ -399,7 +445,7 @@ const modelSchema = z.object({
context_length: z.number(),
});
const inputSchema = z.object({
export const inputSchema = z.object({
data: z.array(modelSchema),
});
@@ -408,7 +454,7 @@ const inputSchema = z.object({
* @param {{ data: Array<z.infer<typeof modelSchema>> }} input The input object containing base URL and data fetched from the API.
* @returns {EndpointTokenConfig} The processed model data.
*/
function processModelData(input) {
export function processModelData(input: z.infer<typeof inputSchema>): EndpointTokenConfig {
const validationResult = inputSchema.safeParse(input);
if (!validationResult.success) {
throw new Error('Invalid input data');
@@ -416,7 +462,7 @@ function processModelData(input) {
const { data } = validationResult.data;
/** @type {EndpointTokenConfig} */
const tokenConfig = {};
const tokenConfig: EndpointTokenConfig = {};
for (const model of data) {
const modelKey = model.id;
@@ -439,7 +485,7 @@ function processModelData(input) {
return tokenConfig;
}
const tiktokenModels = new Set([
export const tiktokenModels = new Set([
'text-davinci-003',
'text-davinci-002',
'text-davinci-001',
@@ -477,17 +523,3 @@ const tiktokenModels = new Set([
'gpt-3.5-turbo',
'gpt-3.5-turbo-0301',
]);
module.exports = {
inputSchema,
modelSchema,
maxTokensMap,
tiktokenModels,
maxOutputTokensMap,
matchModelName,
processModelData,
getModelMaxTokens,
getModelTokenValue,
findMatchingPattern,
getModelMaxOutputTokens,
};

View File

@@ -619,14 +619,14 @@ export const tConversationSchema = z.object({
userLabel: z.string().optional(),
model: z.string().nullable().optional(),
promptPrefix: z.string().nullable().optional(),
temperature: z.number().optional(),
temperature: z.number().nullable().optional(),
topP: z.number().optional(),
topK: z.number().optional(),
top_p: z.number().optional(),
frequency_penalty: z.number().optional(),
presence_penalty: z.number().optional(),
parentMessageId: z.string().optional(),
maxOutputTokens: coerceNumber.optional(),
maxOutputTokens: coerceNumber.nullable().optional(),
maxContextTokens: coerceNumber.optional(),
max_tokens: coerceNumber.optional(),
/* Anthropic */
@@ -634,6 +634,7 @@ export const tConversationSchema = z.object({
system: z.string().optional(),
thinking: z.boolean().optional(),
thinkingBudget: coerceNumber.optional(),
stream: z.boolean().optional(),
/* artifacts */
artifacts: z.string().optional(),
/* google */
@@ -1152,6 +1153,8 @@ export const anthropicBaseSchema = tConversationSchema.pick({
maxContextTokens: true,
web_search: true,
fileTokenLimit: true,
stop: true,
stream: true,
});
export const anthropicSchema = anthropicBaseSchema