Compare commits
11 Commits
main
...
refactor/m
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
088d90cf13 | ||
|
|
5b63aceda9 | ||
|
|
f1dab7f924 | ||
|
|
d5accf55c8 | ||
|
|
f5bb44e652 | ||
|
|
796cb2b1ab | ||
|
|
a50a098a6c | ||
|
|
9ed456ae14 | ||
|
|
33ca25bae3 | ||
|
|
d1d4c2eb27 | ||
|
|
efdad28b70 |
@@ -10,7 +10,17 @@ const {
|
||||
validateVisionModel,
|
||||
} = require('librechat-data-provider');
|
||||
const { SplitStreamHandler: _Handler } = require('@librechat/agents');
|
||||
const { Tokenizer, createFetch, createStreamEventHandlers } = require('@librechat/api');
|
||||
const {
|
||||
Tokenizer,
|
||||
createFetch,
|
||||
matchModelName,
|
||||
getClaudeHeaders,
|
||||
getModelMaxTokens,
|
||||
configureReasoning,
|
||||
checkPromptCacheSupport,
|
||||
getModelMaxOutputTokens,
|
||||
createStreamEventHandlers,
|
||||
} = require('@librechat/api');
|
||||
const {
|
||||
truncateText,
|
||||
formatMessage,
|
||||
@@ -19,12 +29,6 @@ const {
|
||||
parseParamFromPrompt,
|
||||
createContextHandlers,
|
||||
} = require('./prompts');
|
||||
const {
|
||||
getClaudeHeaders,
|
||||
configureReasoning,
|
||||
checkPromptCacheSupport,
|
||||
} = require('~/server/services/Endpoints/anthropic/helpers');
|
||||
const { getModelMaxTokens, getModelMaxOutputTokens, matchModelName } = require('~/utils');
|
||||
const { spendTokens, spendStructuredTokens } = require('~/models/spendTokens');
|
||||
const { encodeAndFormat } = require('~/server/services/Files/images/encode');
|
||||
const { sleep } = require('~/server/utils');
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
const { google } = require('googleapis');
|
||||
const { getModelMaxTokens } = require('@librechat/api');
|
||||
const { concat } = require('@langchain/core/utils/stream');
|
||||
const { ChatVertexAI } = require('@langchain/google-vertexai');
|
||||
const { Tokenizer, getSafetySettings } = require('@librechat/api');
|
||||
@@ -21,7 +22,6 @@ const {
|
||||
} = require('librechat-data-provider');
|
||||
const { encodeAndFormat } = require('~/server/services/Files/images');
|
||||
const { spendTokens } = require('~/models/spendTokens');
|
||||
const { getModelMaxTokens } = require('~/utils');
|
||||
const { sleep } = require('~/server/utils');
|
||||
const { logger } = require('~/config');
|
||||
const {
|
||||
|
||||
@@ -7,7 +7,9 @@ const {
|
||||
createFetch,
|
||||
resolveHeaders,
|
||||
constructAzureURL,
|
||||
getModelMaxTokens,
|
||||
genAzureChatCompletion,
|
||||
getModelMaxOutputTokens,
|
||||
createStreamEventHandlers,
|
||||
} = require('@librechat/api');
|
||||
const {
|
||||
@@ -31,13 +33,13 @@ const {
|
||||
titleInstruction,
|
||||
createContextHandlers,
|
||||
} = require('./prompts');
|
||||
const { extractBaseURL, getModelMaxTokens, getModelMaxOutputTokens } = require('~/utils');
|
||||
const { encodeAndFormat } = require('~/server/services/Files/images/encode');
|
||||
const { addSpaceIfNeeded, sleep } = require('~/server/utils');
|
||||
const { spendTokens } = require('~/models/spendTokens');
|
||||
const { handleOpenAIErrors } = require('./tools/util');
|
||||
const { summaryBuffer } = require('./memory');
|
||||
const { runTitleChain } = require('./chains');
|
||||
const { extractBaseURL } = require('~/utils');
|
||||
const { tokenSplit } = require('./document');
|
||||
const BaseClient = require('./BaseClient');
|
||||
const { createLLM } = require('./llm');
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
const { getModelMaxTokens } = require('@librechat/api');
|
||||
const BaseClient = require('../BaseClient');
|
||||
const { getModelMaxTokens } = require('../../../utils');
|
||||
|
||||
class FakeClient extends BaseClient {
|
||||
constructor(apiKey, options = {}) {
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
const { matchModelName } = require('../utils/tokens');
|
||||
const { matchModelName } = require('@librechat/api');
|
||||
const defaultRate = 6;
|
||||
|
||||
/**
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
const { v4 } = require('uuid');
|
||||
const { sleep } = require('@librechat/agents');
|
||||
const { logger } = require('@librechat/data-schemas');
|
||||
const { sendEvent, getBalanceConfig } = require('@librechat/api');
|
||||
const { sendEvent, getBalanceConfig, getModelMaxTokens } = require('@librechat/api');
|
||||
const {
|
||||
Time,
|
||||
Constants,
|
||||
@@ -34,7 +34,6 @@ const { checkBalance } = require('~/models/balanceMethods');
|
||||
const { getConvo } = require('~/models/Conversation');
|
||||
const getLogStores = require('~/cache/getLogStores');
|
||||
const { countTokens } = require('~/server/utils');
|
||||
const { getModelMaxTokens } = require('~/utils');
|
||||
const { getOpenAIClient } = require('./helpers');
|
||||
|
||||
/**
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
const { v4 } = require('uuid');
|
||||
const { sleep } = require('@librechat/agents');
|
||||
const { logger } = require('@librechat/data-schemas');
|
||||
const { sendEvent, getBalanceConfig } = require('@librechat/api');
|
||||
const { sendEvent, getBalanceConfig, getModelMaxTokens } = require('@librechat/api');
|
||||
const {
|
||||
Time,
|
||||
Constants,
|
||||
@@ -31,7 +31,6 @@ const { checkBalance } = require('~/models/balanceMethods');
|
||||
const { getConvo } = require('~/models/Conversation');
|
||||
const getLogStores = require('~/cache/getLogStores');
|
||||
const { countTokens } = require('~/server/utils');
|
||||
const { getModelMaxTokens } = require('~/utils');
|
||||
const { getOpenAIClient } = require('./helpers');
|
||||
|
||||
/**
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
const { Providers } = require('@librechat/agents');
|
||||
const {
|
||||
primeResources,
|
||||
getModelMaxTokens,
|
||||
extractLibreChatParams,
|
||||
optionalChainWithEmptyCheck,
|
||||
} = require('@librechat/api');
|
||||
@@ -17,7 +18,6 @@ const { getProviderConfig } = require('~/server/services/Endpoints');
|
||||
const { processFiles } = require('~/server/services/Files/process');
|
||||
const { getFiles, getToolFilesByIds } = require('~/models/File');
|
||||
const { getConvoFiles } = require('~/models/Conversation');
|
||||
const { getModelMaxTokens } = require('~/utils');
|
||||
|
||||
/**
|
||||
* @param {object} params
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
const { getLLMConfig } = require('@librechat/api');
|
||||
const { EModelEndpoint } = require('librechat-data-provider');
|
||||
const { getUserKey, checkUserKeyExpiry } = require('~/server/services/UserService');
|
||||
const { getLLMConfig } = require('~/server/services/Endpoints/anthropic/llm');
|
||||
const AnthropicClient = require('~/app/clients/AnthropicClient');
|
||||
|
||||
const initializeClient = async ({ req, res, endpointOption, overrideModel, optionsOnly }) => {
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
const { getModelMaxTokens } = require('@librechat/api');
|
||||
const { createContentAggregator } = require('@librechat/agents');
|
||||
const {
|
||||
EModelEndpoint,
|
||||
@@ -7,7 +8,6 @@ const {
|
||||
const { getDefaultHandlers } = require('~/server/controllers/agents/callbacks');
|
||||
const getOptions = require('~/server/services/Endpoints/bedrock/options');
|
||||
const AgentClient = require('~/server/controllers/agents/client');
|
||||
const { getModelMaxTokens } = require('~/utils');
|
||||
|
||||
const initializeClient = async ({ req, res, endpointOption }) => {
|
||||
if (!endpointOption) {
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
const axios = require('axios');
|
||||
const { Providers } = require('@librechat/agents');
|
||||
const { logAxiosError } = require('@librechat/api');
|
||||
const { logger } = require('@librechat/data-schemas');
|
||||
const { HttpsProxyAgent } = require('https-proxy-agent');
|
||||
const { logAxiosError, inputSchema, processModelData } = require('@librechat/api');
|
||||
const { EModelEndpoint, defaultModels, CacheKeys } = require('librechat-data-provider');
|
||||
const { inputSchema, extractBaseURL, processModelData } = require('~/utils');
|
||||
const { OllamaClient } = require('~/app/clients/OllamaClient');
|
||||
const { isUserProvided } = require('~/server/utils');
|
||||
const getLogStores = require('~/cache/getLogStores');
|
||||
const { extractBaseURL } = require('~/utils');
|
||||
|
||||
/**
|
||||
* Splits a string by commas and trims each resulting value.
|
||||
|
||||
@@ -11,8 +11,8 @@ const {
|
||||
getAnthropicModels,
|
||||
} = require('./ModelService');
|
||||
|
||||
jest.mock('~/utils', () => {
|
||||
const originalUtils = jest.requireActual('~/utils');
|
||||
jest.mock('@librechat/api', () => {
|
||||
const originalUtils = jest.requireActual('@librechat/api');
|
||||
return {
|
||||
...originalUtils,
|
||||
processModelData: jest.fn((...args) => {
|
||||
@@ -108,7 +108,7 @@ describe('fetchModels with createTokenConfig true', () => {
|
||||
|
||||
beforeEach(() => {
|
||||
// Clears the mock's history before each test
|
||||
const _utils = require('~/utils');
|
||||
const _utils = require('@librechat/api');
|
||||
axios.get.mockResolvedValue({ data });
|
||||
});
|
||||
|
||||
@@ -120,7 +120,7 @@ describe('fetchModels with createTokenConfig true', () => {
|
||||
createTokenConfig: true,
|
||||
});
|
||||
|
||||
const { processModelData } = require('~/utils');
|
||||
const { processModelData } = require('@librechat/api');
|
||||
expect(processModelData).toHaveBeenCalled();
|
||||
expect(processModelData).toHaveBeenCalledWith(data);
|
||||
});
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
const axios = require('axios');
|
||||
const deriveBaseURL = require('./deriveBaseURL');
|
||||
jest.mock('~/utils', () => {
|
||||
const originalUtils = jest.requireActual('~/utils');
|
||||
jest.mock('@librechat/api', () => {
|
||||
const originalUtils = jest.requireActual('@librechat/api');
|
||||
return {
|
||||
...originalUtils,
|
||||
processModelData: jest.fn((...args) => {
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
const tokenHelpers = require('./tokens');
|
||||
const deriveBaseURL = require('./deriveBaseURL');
|
||||
const extractBaseURL = require('./extractBaseURL');
|
||||
const findMessageContent = require('./findMessageContent');
|
||||
@@ -6,6 +5,5 @@ const findMessageContent = require('./findMessageContent');
|
||||
module.exports = {
|
||||
deriveBaseURL,
|
||||
extractBaseURL,
|
||||
...tokenHelpers,
|
||||
findMessageContent,
|
||||
};
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
const { EModelEndpoint } = require('librechat-data-provider');
|
||||
const {
|
||||
maxTokensMap,
|
||||
matchModelName,
|
||||
processModelData,
|
||||
getModelMaxTokens,
|
||||
maxOutputTokensMap,
|
||||
findMatchingPattern,
|
||||
getModelMaxTokens,
|
||||
processModelData,
|
||||
matchModelName,
|
||||
maxTokensMap,
|
||||
} = require('./tokens');
|
||||
} = require('@librechat/api');
|
||||
|
||||
describe('getModelMaxTokens', () => {
|
||||
test('should return correct tokens for exact match', () => {
|
||||
@@ -394,7 +394,7 @@ describe('getModelMaxTokens', () => {
|
||||
});
|
||||
|
||||
test('should return correct max output tokens for GPT-5 models', () => {
|
||||
const { getModelMaxOutputTokens } = require('./tokens');
|
||||
const { getModelMaxOutputTokens } = require('@librechat/api');
|
||||
['gpt-5', 'gpt-5-mini', 'gpt-5-nano'].forEach((model) => {
|
||||
expect(getModelMaxOutputTokens(model)).toBe(maxOutputTokensMap[EModelEndpoint.openAI][model]);
|
||||
expect(getModelMaxOutputTokens(model, EModelEndpoint.openAI)).toBe(
|
||||
@@ -407,7 +407,7 @@ describe('getModelMaxTokens', () => {
|
||||
});
|
||||
|
||||
test('should return correct max output tokens for GPT-OSS models', () => {
|
||||
const { getModelMaxOutputTokens } = require('./tokens');
|
||||
const { getModelMaxOutputTokens } = require('@librechat/api');
|
||||
['gpt-oss-20b', 'gpt-oss-120b'].forEach((model) => {
|
||||
expect(getModelMaxOutputTokens(model)).toBe(maxOutputTokensMap[EModelEndpoint.openAI][model]);
|
||||
expect(getModelMaxOutputTokens(model, EModelEndpoint.openAI)).toBe(
|
||||
|
||||
@@ -1,13 +1,14 @@
|
||||
const { EModelEndpoint, anthropicSettings } = require('librechat-data-provider');
|
||||
const { matchModelName } = require('~/utils');
|
||||
const { logger } = require('~/config');
|
||||
import { logger } from '@librechat/data-schemas';
|
||||
import { AnthropicClientOptions } from '@librechat/agents';
|
||||
import { EModelEndpoint, anthropicSettings } from 'librechat-data-provider';
|
||||
import { matchModelName } from '~/utils/tokens';
|
||||
|
||||
/**
|
||||
* @param {string} modelName
|
||||
* @returns {boolean}
|
||||
*/
|
||||
function checkPromptCacheSupport(modelName) {
|
||||
const modelMatch = matchModelName(modelName, EModelEndpoint.anthropic);
|
||||
function checkPromptCacheSupport(modelName: string): boolean {
|
||||
const modelMatch = matchModelName(modelName, EModelEndpoint.anthropic) ?? '';
|
||||
if (
|
||||
modelMatch.includes('claude-3-5-sonnet-latest') ||
|
||||
modelMatch.includes('claude-3.5-sonnet-latest')
|
||||
@@ -31,7 +32,10 @@ function checkPromptCacheSupport(modelName) {
|
||||
* @param {boolean} supportsCacheControl Whether the model supports cache control
|
||||
* @returns {AnthropicClientOptions['extendedOptions']['defaultHeaders']|undefined} The headers object or undefined if not applicable
|
||||
*/
|
||||
function getClaudeHeaders(model, supportsCacheControl) {
|
||||
function getClaudeHeaders(
|
||||
model: string,
|
||||
supportsCacheControl: boolean,
|
||||
): Record<string, string> | undefined {
|
||||
if (!supportsCacheControl) {
|
||||
return undefined;
|
||||
}
|
||||
@@ -72,9 +76,13 @@ function getClaudeHeaders(model, supportsCacheControl) {
|
||||
* @param {number|null} extendedOptions.thinkingBudget The token budget for thinking
|
||||
* @returns {Object} Updated request options
|
||||
*/
|
||||
function configureReasoning(anthropicInput, extendedOptions = {}) {
|
||||
function configureReasoning(
|
||||
anthropicInput: AnthropicClientOptions & { max_tokens?: number },
|
||||
extendedOptions: { thinking?: boolean; thinkingBudget?: number | null } = {},
|
||||
): AnthropicClientOptions & { max_tokens?: number } {
|
||||
const updatedOptions = { ...anthropicInput };
|
||||
const currentMaxTokens = updatedOptions.max_tokens ?? updatedOptions.maxTokens;
|
||||
|
||||
if (
|
||||
extendedOptions.thinking &&
|
||||
updatedOptions?.model &&
|
||||
@@ -82,11 +90,16 @@ function configureReasoning(anthropicInput, extendedOptions = {}) {
|
||||
/claude-(?:sonnet|opus|haiku)-[4-9]/.test(updatedOptions.model))
|
||||
) {
|
||||
updatedOptions.thinking = {
|
||||
...updatedOptions.thinking,
|
||||
type: 'enabled',
|
||||
};
|
||||
} as { type: 'enabled'; budget_tokens: number };
|
||||
}
|
||||
|
||||
if (updatedOptions.thinking != null && extendedOptions.thinkingBudget != null) {
|
||||
if (
|
||||
updatedOptions.thinking != null &&
|
||||
extendedOptions.thinkingBudget != null &&
|
||||
updatedOptions.thinking.type === 'enabled'
|
||||
) {
|
||||
updatedOptions.thinking = {
|
||||
...updatedOptions.thinking,
|
||||
budget_tokens: extendedOptions.thinkingBudget,
|
||||
@@ -95,9 +108,10 @@ function configureReasoning(anthropicInput, extendedOptions = {}) {
|
||||
|
||||
if (
|
||||
updatedOptions.thinking != null &&
|
||||
updatedOptions.thinking.type === 'enabled' &&
|
||||
(currentMaxTokens == null || updatedOptions.thinking.budget_tokens > currentMaxTokens)
|
||||
) {
|
||||
const maxTokens = anthropicSettings.maxOutputTokens.reset(updatedOptions.model);
|
||||
const maxTokens = anthropicSettings.maxOutputTokens.reset(updatedOptions.model ?? '');
|
||||
updatedOptions.max_tokens = currentMaxTokens ?? maxTokens;
|
||||
|
||||
logger.warn(
|
||||
@@ -115,4 +129,4 @@ function configureReasoning(anthropicInput, extendedOptions = {}) {
|
||||
return updatedOptions;
|
||||
}
|
||||
|
||||
module.exports = { checkPromptCacheSupport, getClaudeHeaders, configureReasoning };
|
||||
export { checkPromptCacheSupport, getClaudeHeaders, configureReasoning };
|
||||
2
packages/api/src/endpoints/anthropic/index.ts
Normal file
2
packages/api/src/endpoints/anthropic/index.ts
Normal file
@@ -0,0 +1,2 @@
|
||||
export * from './helpers';
|
||||
export * from './llm';
|
||||
@@ -1,4 +1,4 @@
|
||||
const { getLLMConfig } = require('~/server/services/Endpoints/anthropic/llm');
|
||||
import { getLLMConfig } from './llm';
|
||||
|
||||
jest.mock('https-proxy-agent', () => ({
|
||||
HttpsProxyAgent: jest.fn().mockImplementation((proxy) => ({ proxy })),
|
||||
@@ -25,9 +25,9 @@ describe('getLLMConfig', () => {
|
||||
});
|
||||
|
||||
expect(result.llmConfig.clientOptions).toHaveProperty('fetchOptions');
|
||||
expect(result.llmConfig.clientOptions.fetchOptions).toHaveProperty('dispatcher');
|
||||
expect(result.llmConfig.clientOptions.fetchOptions.dispatcher).toBeDefined();
|
||||
expect(result.llmConfig.clientOptions.fetchOptions.dispatcher.constructor.name).toBe(
|
||||
expect(result.llmConfig.clientOptions?.fetchOptions).toHaveProperty('dispatcher');
|
||||
expect(result.llmConfig.clientOptions?.fetchOptions?.dispatcher).toBeDefined();
|
||||
expect(result.llmConfig.clientOptions?.fetchOptions?.dispatcher.constructor.name).toBe(
|
||||
'ProxyAgent',
|
||||
);
|
||||
});
|
||||
@@ -93,9 +93,10 @@ describe('getLLMConfig', () => {
|
||||
};
|
||||
const result = getLLMConfig('test-key', { modelOptions });
|
||||
const clientOptions = result.llmConfig.clientOptions;
|
||||
expect(clientOptions.defaultHeaders).toBeDefined();
|
||||
expect(clientOptions.defaultHeaders).toHaveProperty('anthropic-beta');
|
||||
expect(clientOptions.defaultHeaders['anthropic-beta']).toBe(
|
||||
expect(clientOptions?.defaultHeaders).toBeDefined();
|
||||
expect(clientOptions?.defaultHeaders).toHaveProperty('anthropic-beta');
|
||||
const defaultHeaders = clientOptions?.defaultHeaders as Record<string, string>;
|
||||
expect(defaultHeaders['anthropic-beta']).toBe(
|
||||
'prompt-caching-2024-07-31,context-1m-2025-08-07',
|
||||
);
|
||||
});
|
||||
@@ -111,9 +112,10 @@ describe('getLLMConfig', () => {
|
||||
const modelOptions = { model, promptCache: true };
|
||||
const result = getLLMConfig('test-key', { modelOptions });
|
||||
const clientOptions = result.llmConfig.clientOptions;
|
||||
expect(clientOptions.defaultHeaders).toBeDefined();
|
||||
expect(clientOptions.defaultHeaders).toHaveProperty('anthropic-beta');
|
||||
expect(clientOptions.defaultHeaders['anthropic-beta']).toBe(
|
||||
expect(clientOptions?.defaultHeaders).toBeDefined();
|
||||
expect(clientOptions?.defaultHeaders).toHaveProperty('anthropic-beta');
|
||||
const defaultHeaders = clientOptions?.defaultHeaders as Record<string, string>;
|
||||
expect(defaultHeaders['anthropic-beta']).toBe(
|
||||
'prompt-caching-2024-07-31,context-1m-2025-08-07',
|
||||
);
|
||||
});
|
||||
@@ -211,13 +213,13 @@ describe('getLLMConfig', () => {
|
||||
it('should handle empty modelOptions', () => {
|
||||
expect(() => {
|
||||
getLLMConfig('test-api-key', {});
|
||||
}).toThrow("Cannot read properties of undefined (reading 'thinking')");
|
||||
}).toThrow('No modelOptions provided');
|
||||
});
|
||||
|
||||
it('should handle no options parameter', () => {
|
||||
expect(() => {
|
||||
getLLMConfig('test-api-key');
|
||||
}).toThrow("Cannot read properties of undefined (reading 'thinking')");
|
||||
}).toThrow('No modelOptions provided');
|
||||
});
|
||||
|
||||
it('should handle temperature, stop sequences, and stream settings', () => {
|
||||
@@ -254,9 +256,9 @@ describe('getLLMConfig', () => {
|
||||
});
|
||||
|
||||
expect(result.llmConfig.clientOptions).toHaveProperty('fetchOptions');
|
||||
expect(result.llmConfig.clientOptions.fetchOptions).toHaveProperty('dispatcher');
|
||||
expect(result.llmConfig.clientOptions.fetchOptions.dispatcher).toBeDefined();
|
||||
expect(result.llmConfig.clientOptions.fetchOptions.dispatcher.constructor.name).toBe(
|
||||
expect(result.llmConfig.clientOptions?.fetchOptions).toHaveProperty('dispatcher');
|
||||
expect(result.llmConfig.clientOptions?.fetchOptions?.dispatcher).toBeDefined();
|
||||
expect(result.llmConfig.clientOptions?.fetchOptions?.dispatcher.constructor.name).toBe(
|
||||
'ProxyAgent',
|
||||
);
|
||||
expect(result.llmConfig.clientOptions).toHaveProperty('baseURL', 'https://reverse-proxy.com');
|
||||
@@ -272,7 +274,7 @@ describe('getLLMConfig', () => {
|
||||
});
|
||||
|
||||
// claude-3-5-sonnet supports prompt caching and should get the appropriate headers
|
||||
expect(result.llmConfig.clientOptions.defaultHeaders).toEqual({
|
||||
expect(result.llmConfig.clientOptions?.defaultHeaders).toEqual({
|
||||
'anthropic-beta': 'max-tokens-3-5-sonnet-2024-07-15,prompt-caching-2024-07-31',
|
||||
});
|
||||
});
|
||||
@@ -1,6 +1,12 @@
|
||||
const { ProxyAgent } = require('undici');
|
||||
const { anthropicSettings, removeNullishValues } = require('librechat-data-provider');
|
||||
const { checkPromptCacheSupport, getClaudeHeaders, configureReasoning } = require('./helpers');
|
||||
import { ProxyAgent } from 'undici';
|
||||
import { AnthropicClientOptions } from '@librechat/agents';
|
||||
import { anthropicSettings, removeNullishValues } from 'librechat-data-provider';
|
||||
import type {
|
||||
AnthropicConfigOptions,
|
||||
AnthropicLLMConfigResult,
|
||||
AnthropicParameters,
|
||||
} from '~/types/anthropic';
|
||||
import { checkPromptCacheSupport, getClaudeHeaders, configureReasoning } from './helpers';
|
||||
|
||||
/**
|
||||
* Generates configuration options for creating an Anthropic language model (LLM) instance.
|
||||
@@ -21,25 +27,42 @@ const { checkPromptCacheSupport, getClaudeHeaders, configureReasoning } = requir
|
||||
*
|
||||
* @returns {Object} Configuration options for creating an Anthropic LLM instance, with null and undefined values removed.
|
||||
*/
|
||||
function getLLMConfig(apiKey, options = {}) {
|
||||
function getLLMConfig(
|
||||
apiKey?: string,
|
||||
options: AnthropicConfigOptions = {} as AnthropicConfigOptions,
|
||||
): AnthropicLLMConfigResult {
|
||||
const systemOptions = {
|
||||
thinking: options.modelOptions.thinking ?? anthropicSettings.thinking.default,
|
||||
promptCache: options.modelOptions.promptCache ?? anthropicSettings.promptCache.default,
|
||||
thinkingBudget: options.modelOptions.thinkingBudget ?? anthropicSettings.thinkingBudget.default,
|
||||
thinking: options.modelOptions?.thinking ?? anthropicSettings.thinking.default,
|
||||
promptCache: options.modelOptions?.promptCache ?? anthropicSettings.promptCache.default,
|
||||
thinkingBudget:
|
||||
options.modelOptions?.thinkingBudget ?? anthropicSettings.thinkingBudget.default,
|
||||
};
|
||||
for (let key in systemOptions) {
|
||||
delete options.modelOptions[key];
|
||||
|
||||
/** Couldn't figure out a way to still loop through the object while deleting the overlapping keys when porting this
|
||||
* over from javascript, so for now they are being deleted manually until a better way presents itself.
|
||||
*/
|
||||
if (options.modelOptions) {
|
||||
delete options.modelOptions.thinking;
|
||||
delete options.modelOptions.promptCache;
|
||||
delete options.modelOptions.thinkingBudget;
|
||||
} else {
|
||||
throw new Error('No modelOptions provided');
|
||||
}
|
||||
|
||||
const defaultOptions = {
|
||||
model: anthropicSettings.model.default,
|
||||
maxOutputTokens: anthropicSettings.maxOutputTokens.default,
|
||||
stream: true,
|
||||
};
|
||||
|
||||
const mergedOptions = Object.assign(defaultOptions, options.modelOptions);
|
||||
const mergedOptions = Object.assign(
|
||||
defaultOptions,
|
||||
options.modelOptions,
|
||||
) as typeof defaultOptions &
|
||||
Partial<AnthropicParameters> & { stop?: string[]; web_search?: boolean };
|
||||
|
||||
/** @type {AnthropicClientOptions} */
|
||||
let requestOptions = {
|
||||
let requestOptions: AnthropicClientOptions & { stream?: boolean } = {
|
||||
apiKey,
|
||||
model: mergedOptions.model,
|
||||
stream: mergedOptions.stream,
|
||||
@@ -66,20 +89,20 @@ function getLLMConfig(apiKey, options = {}) {
|
||||
}
|
||||
|
||||
const supportsCacheControl =
|
||||
systemOptions.promptCache === true && checkPromptCacheSupport(requestOptions.model);
|
||||
const headers = getClaudeHeaders(requestOptions.model, supportsCacheControl);
|
||||
if (headers) {
|
||||
systemOptions.promptCache === true && checkPromptCacheSupport(requestOptions.model ?? '');
|
||||
const headers = getClaudeHeaders(requestOptions.model ?? '', supportsCacheControl);
|
||||
if (headers && requestOptions.clientOptions) {
|
||||
requestOptions.clientOptions.defaultHeaders = headers;
|
||||
}
|
||||
|
||||
if (options.proxy) {
|
||||
if (options.proxy && requestOptions.clientOptions) {
|
||||
const proxyAgent = new ProxyAgent(options.proxy);
|
||||
requestOptions.clientOptions.fetchOptions = {
|
||||
dispatcher: proxyAgent,
|
||||
};
|
||||
}
|
||||
|
||||
if (options.reverseProxyUrl) {
|
||||
if (options.reverseProxyUrl && requestOptions.clientOptions) {
|
||||
requestOptions.clientOptions.baseURL = options.reverseProxyUrl;
|
||||
requestOptions.anthropicApiUrl = options.reverseProxyUrl;
|
||||
}
|
||||
@@ -96,8 +119,10 @@ function getLLMConfig(apiKey, options = {}) {
|
||||
return {
|
||||
tools,
|
||||
/** @type {AnthropicClientOptions} */
|
||||
llmConfig: removeNullishValues(requestOptions),
|
||||
llmConfig: removeNullishValues(
|
||||
requestOptions as Record<string, unknown>,
|
||||
) as AnthropicClientOptions,
|
||||
};
|
||||
}
|
||||
|
||||
module.exports = { getLLMConfig };
|
||||
export { getLLMConfig };
|
||||
@@ -1,3 +1,4 @@
|
||||
export * from './custom';
|
||||
export * from './google';
|
||||
export * from './openai';
|
||||
export * from './anthropic';
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
import { ProxyAgent } from 'undici';
|
||||
import { Providers } from '@librechat/agents';
|
||||
import { KnownEndpoints, removeNullishValues } from 'librechat-data-provider';
|
||||
import type { AnthropicClientOptions } from '@librechat/agents';
|
||||
import { KnownEndpoints, removeNullishValues, EModelEndpoint } from 'librechat-data-provider';
|
||||
import type { BindToolsInput } from '@langchain/core/language_models/chat_models';
|
||||
import type { AzureOpenAIInput } from '@langchain/openai';
|
||||
import type { OpenAI } from 'openai';
|
||||
import type * as t from '~/types';
|
||||
import { getLLMConfig as getAnthropicLLMConfig } from '~/endpoints/anthropic/llm';
|
||||
import { sanitizeModelName, constructAzureURL } from '~/utils/azure';
|
||||
import { createFetch } from '~/utils/generators';
|
||||
import { isEnabled } from '~/utils/common';
|
||||
@@ -80,6 +82,134 @@ function hasReasoningParams({
|
||||
);
|
||||
}
|
||||
|
||||
function getOpenAILLMConfig({
|
||||
streaming,
|
||||
modelOptions,
|
||||
addParams,
|
||||
dropParams,
|
||||
}: {
|
||||
streaming: boolean;
|
||||
modelOptions: Partial<t.OpenAIParameters>;
|
||||
addParams?: Record<string, unknown>;
|
||||
dropParams?: string[];
|
||||
}): {
|
||||
llmConfig: Partial<t.ClientOptions> & Partial<t.OpenAIParameters> & Partial<AzureOpenAIInput>;
|
||||
tools: BindToolsInput[];
|
||||
} {
|
||||
const { reasoning_effort, reasoning_summary, verbosity, web_search, ...restModelOptions } =
|
||||
modelOptions;
|
||||
|
||||
const llmConfig = Object.assign(
|
||||
{
|
||||
streaming,
|
||||
model: restModelOptions.model ?? '',
|
||||
},
|
||||
restModelOptions,
|
||||
) as Partial<t.ClientOptions> & Partial<t.OpenAIParameters> & Partial<AzureOpenAIInput>;
|
||||
|
||||
const modelKwargs: Record<string, unknown> = {};
|
||||
let hasModelKwargs = false;
|
||||
|
||||
if (verbosity != null && verbosity !== '') {
|
||||
modelKwargs.verbosity = verbosity;
|
||||
hasModelKwargs = true;
|
||||
}
|
||||
|
||||
if (addParams && typeof addParams === 'object') {
|
||||
for (const [key, value] of Object.entries(addParams)) {
|
||||
if (knownOpenAIParams.has(key)) {
|
||||
(llmConfig as Record<string, unknown>)[key] = value;
|
||||
} else {
|
||||
hasModelKwargs = true;
|
||||
modelKwargs[key] = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (
|
||||
hasReasoningParams({ reasoning_effort, reasoning_summary }) &&
|
||||
llmConfig.useResponsesApi === true
|
||||
) {
|
||||
llmConfig.reasoning = removeNullishValues(
|
||||
{
|
||||
effort: reasoning_effort,
|
||||
summary: reasoning_summary,
|
||||
},
|
||||
true,
|
||||
) as OpenAI.Reasoning;
|
||||
} else if (hasReasoningParams({ reasoning_effort })) {
|
||||
llmConfig.reasoning_effort = reasoning_effort;
|
||||
}
|
||||
|
||||
if (llmConfig.max_tokens != null) {
|
||||
llmConfig.maxTokens = llmConfig.max_tokens;
|
||||
delete llmConfig.max_tokens;
|
||||
}
|
||||
|
||||
const tools: BindToolsInput[] = [];
|
||||
|
||||
if (web_search) {
|
||||
llmConfig.useResponsesApi = true;
|
||||
tools.push({ type: 'web_search_preview' });
|
||||
}
|
||||
|
||||
/**
|
||||
* Note: OpenAI Web Search models do not support any known parameters besides `max_tokens`
|
||||
*/
|
||||
if (modelOptions.model && /gpt-4o.*search/.test(modelOptions.model as string)) {
|
||||
const searchExcludeParams = [
|
||||
'frequency_penalty',
|
||||
'presence_penalty',
|
||||
'reasoning',
|
||||
'reasoning_effort',
|
||||
'temperature',
|
||||
'top_p',
|
||||
'top_k',
|
||||
'stop',
|
||||
'logit_bias',
|
||||
'seed',
|
||||
'response_format',
|
||||
'n',
|
||||
'logprobs',
|
||||
'user',
|
||||
];
|
||||
|
||||
const updatedDropParams = dropParams || [];
|
||||
const combinedDropParams = [...new Set([...updatedDropParams, ...searchExcludeParams])];
|
||||
|
||||
combinedDropParams.forEach((param) => {
|
||||
if (param in llmConfig) {
|
||||
delete llmConfig[param as keyof t.ClientOptions];
|
||||
}
|
||||
});
|
||||
} else if (dropParams && Array.isArray(dropParams)) {
|
||||
dropParams.forEach((param) => {
|
||||
if (param in llmConfig) {
|
||||
delete llmConfig[param as keyof t.ClientOptions];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
if (modelKwargs.verbosity && llmConfig.useResponsesApi === true) {
|
||||
modelKwargs.text = { verbosity: modelKwargs.verbosity };
|
||||
delete modelKwargs.verbosity;
|
||||
}
|
||||
|
||||
if (llmConfig.model && /\bgpt-[5-9]\b/i.test(llmConfig.model) && llmConfig.maxTokens != null) {
|
||||
const paramName =
|
||||
llmConfig.useResponsesApi === true ? 'max_output_tokens' : 'max_completion_tokens';
|
||||
modelKwargs[paramName] = llmConfig.maxTokens;
|
||||
delete llmConfig.maxTokens;
|
||||
hasModelKwargs = true;
|
||||
}
|
||||
|
||||
if (hasModelKwargs) {
|
||||
llmConfig.modelKwargs = modelKwargs;
|
||||
}
|
||||
|
||||
return { llmConfig, tools };
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates configuration options for creating a language model (LLM) instance.
|
||||
* @param apiKey - The API key for authentication.
|
||||
@@ -104,34 +234,30 @@ export function getOpenAIConfig(
|
||||
addParams,
|
||||
dropParams,
|
||||
} = options;
|
||||
const { reasoning_effort, reasoning_summary, verbosity, ...modelOptions } = _modelOptions;
|
||||
const llmConfig: Partial<t.ClientOptions> &
|
||||
Partial<t.OpenAIParameters> &
|
||||
Partial<AzureOpenAIInput> = Object.assign(
|
||||
{
|
||||
|
||||
let llmConfig:
|
||||
| (Partial<t.ClientOptions> & Partial<t.OpenAIParameters> & Partial<AzureOpenAIInput>)
|
||||
| AnthropicClientOptions;
|
||||
let tools: BindToolsInput[];
|
||||
|
||||
if (options.customParams?.defaultParamsEndpoint === EModelEndpoint.anthropic) {
|
||||
const anthropicResult = getAnthropicLLMConfig(apiKey, {
|
||||
modelOptions: _modelOptions,
|
||||
userId: options.userId || '',
|
||||
proxy: options.proxy,
|
||||
reverseProxyUrl: options.reverseProxyUrl,
|
||||
});
|
||||
llmConfig = anthropicResult.llmConfig;
|
||||
tools = anthropicResult.tools;
|
||||
} else {
|
||||
const openaiResult = getOpenAILLMConfig({
|
||||
streaming,
|
||||
model: modelOptions.model ?? '',
|
||||
},
|
||||
modelOptions,
|
||||
);
|
||||
|
||||
const modelKwargs: Record<string, unknown> = {};
|
||||
let hasModelKwargs = false;
|
||||
|
||||
if (verbosity != null && verbosity !== '') {
|
||||
modelKwargs.verbosity = verbosity;
|
||||
hasModelKwargs = true;
|
||||
}
|
||||
|
||||
if (addParams && typeof addParams === 'object') {
|
||||
for (const [key, value] of Object.entries(addParams)) {
|
||||
if (knownOpenAIParams.has(key)) {
|
||||
(llmConfig as Record<string, unknown>)[key] = value;
|
||||
} else {
|
||||
hasModelKwargs = true;
|
||||
modelKwargs[key] = value;
|
||||
}
|
||||
}
|
||||
modelOptions: _modelOptions,
|
||||
addParams,
|
||||
dropParams,
|
||||
});
|
||||
llmConfig = openaiResult.llmConfig;
|
||||
tools = openaiResult.tools;
|
||||
}
|
||||
|
||||
let useOpenRouter = false;
|
||||
@@ -234,87 +360,6 @@ export function getOpenAIConfig(
|
||||
configOptions.organization = process.env.OPENAI_ORGANIZATION;
|
||||
}
|
||||
|
||||
if (
|
||||
hasReasoningParams({ reasoning_effort, reasoning_summary }) &&
|
||||
(llmConfig.useResponsesApi === true || useOpenRouter)
|
||||
) {
|
||||
llmConfig.reasoning = removeNullishValues(
|
||||
{
|
||||
effort: reasoning_effort,
|
||||
summary: reasoning_summary,
|
||||
},
|
||||
true,
|
||||
) as OpenAI.Reasoning;
|
||||
} else if (hasReasoningParams({ reasoning_effort })) {
|
||||
llmConfig.reasoning_effort = reasoning_effort;
|
||||
}
|
||||
|
||||
if (llmConfig.max_tokens != null) {
|
||||
llmConfig.maxTokens = llmConfig.max_tokens;
|
||||
delete llmConfig.max_tokens;
|
||||
}
|
||||
|
||||
const tools: BindToolsInput[] = [];
|
||||
|
||||
if (modelOptions.web_search) {
|
||||
llmConfig.useResponsesApi = true;
|
||||
tools.push({ type: 'web_search_preview' });
|
||||
}
|
||||
|
||||
/**
|
||||
* Note: OpenAI Web Search models do not support any known parameters besides `max_tokens`
|
||||
*/
|
||||
if (modelOptions.model && /gpt-4o.*search/.test(modelOptions.model)) {
|
||||
const searchExcludeParams = [
|
||||
'frequency_penalty',
|
||||
'presence_penalty',
|
||||
'reasoning',
|
||||
'reasoning_effort',
|
||||
'temperature',
|
||||
'top_p',
|
||||
'top_k',
|
||||
'stop',
|
||||
'logit_bias',
|
||||
'seed',
|
||||
'response_format',
|
||||
'n',
|
||||
'logprobs',
|
||||
'user',
|
||||
];
|
||||
|
||||
const updatedDropParams = dropParams || [];
|
||||
const combinedDropParams = [...new Set([...updatedDropParams, ...searchExcludeParams])];
|
||||
|
||||
combinedDropParams.forEach((param) => {
|
||||
if (param in llmConfig) {
|
||||
delete llmConfig[param as keyof t.ClientOptions];
|
||||
}
|
||||
});
|
||||
} else if (dropParams && Array.isArray(dropParams)) {
|
||||
dropParams.forEach((param) => {
|
||||
if (param in llmConfig) {
|
||||
delete llmConfig[param as keyof t.ClientOptions];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
if (modelKwargs.verbosity && llmConfig.useResponsesApi === true) {
|
||||
modelKwargs.text = { verbosity: modelKwargs.verbosity };
|
||||
delete modelKwargs.verbosity;
|
||||
}
|
||||
|
||||
if (llmConfig.model && /\bgpt-[5-9]\b/i.test(llmConfig.model) && llmConfig.maxTokens != null) {
|
||||
const paramName =
|
||||
llmConfig.useResponsesApi === true ? 'max_output_tokens' : 'max_completion_tokens';
|
||||
modelKwargs[paramName] = llmConfig.maxTokens;
|
||||
delete llmConfig.maxTokens;
|
||||
hasModelKwargs = true;
|
||||
}
|
||||
|
||||
if (hasModelKwargs) {
|
||||
llmConfig.modelKwargs = modelKwargs;
|
||||
}
|
||||
|
||||
if (directEndpoint === true && configOptions?.baseURL != null) {
|
||||
configOptions.fetch = createFetch({
|
||||
directEndpoint: directEndpoint,
|
||||
|
||||
36
packages/api/src/types/anthropic.ts
Normal file
36
packages/api/src/types/anthropic.ts
Normal file
@@ -0,0 +1,36 @@
|
||||
import { z } from 'zod';
|
||||
import { Dispatcher } from 'undici';
|
||||
import { anthropicSchema } from 'librechat-data-provider';
|
||||
import { AnthropicClientOptions } from '@librechat/agents';
|
||||
|
||||
export type AnthropicParameters = z.infer<typeof anthropicSchema>;
|
||||
|
||||
/**
|
||||
* Configuration options for the getLLMConfig function
|
||||
*/
|
||||
export interface AnthropicConfigOptions {
|
||||
modelOptions?: Partial<AnthropicParameters>;
|
||||
/** The user ID for tracking and personalization */
|
||||
userId?: string;
|
||||
/** Proxy server URL */
|
||||
proxy?: string;
|
||||
/** URL for a reverse proxy, if used */
|
||||
reverseProxyUrl?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return type for getLLMConfig function
|
||||
*/
|
||||
export interface AnthropicLLMConfigResult {
|
||||
/** Configuration options for creating an Anthropic LLM instance */
|
||||
llmConfig: AnthropicClientOptions & {
|
||||
clientOptions?: {
|
||||
fetchOptions?: { dispatcher: Dispatcher };
|
||||
};
|
||||
};
|
||||
/** Array of tools to be used */
|
||||
tools: Array<{
|
||||
type: string;
|
||||
name?: string;
|
||||
}>;
|
||||
}
|
||||
@@ -22,6 +22,10 @@ export interface OpenAIConfigOptions {
|
||||
streaming?: boolean;
|
||||
addParams?: Record<string, unknown>;
|
||||
dropParams?: string[];
|
||||
customParams?: {
|
||||
defaultParamsEndpoint?: string;
|
||||
};
|
||||
userId?: string;
|
||||
}
|
||||
|
||||
export type OpenAIConfiguration = OpenAIClientOptions['configuration'];
|
||||
|
||||
@@ -15,3 +15,4 @@ export * from './text';
|
||||
export { default as Tokenizer } from './tokenizer';
|
||||
export * from './yaml';
|
||||
export * from './http';
|
||||
export * from './tokens';
|
||||
|
||||
@@ -1,5 +1,23 @@
|
||||
const z = require('zod');
|
||||
const { EModelEndpoint } = require('librechat-data-provider');
|
||||
import z from 'zod';
|
||||
import { EModelEndpoint } from 'librechat-data-provider';
|
||||
|
||||
/** Configuration object mapping model keys to their respective prompt, completion rates, and context limit
|
||||
*
|
||||
* Note: the [key: string]: unknown is not in the original JSDoc typedef in /api/typedefs.js, but I've included it since
|
||||
* getModelMaxOutputTokens calls getModelTokenValue with a key of 'output', which was not in the original JSDoc typedef,
|
||||
* but would be referenced in a TokenConfig in the if(matchedPattern) portion of getModelTokenValue.
|
||||
* So in order to preserve functionality for that case and any others which might reference an additional key I'm unaware of,
|
||||
* I've included it here until the interface can be typed more tightly.
|
||||
*/
|
||||
export interface TokenConfig {
|
||||
prompt: number;
|
||||
completion: number;
|
||||
context: number;
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
/** An endpoint's config object mapping model keys to their respective prompt, completion rates, and context limit */
|
||||
export type EndpointTokenConfig = Record<string, TokenConfig>;
|
||||
|
||||
const openAIModels = {
|
||||
'o4-mini': 200000,
|
||||
@@ -242,7 +260,7 @@ const aggregateModels = {
|
||||
'gpt-oss-120b': 131000,
|
||||
};
|
||||
|
||||
const maxTokensMap = {
|
||||
export const maxTokensMap = {
|
||||
[EModelEndpoint.azureOpenAI]: openAIModels,
|
||||
[EModelEndpoint.openAI]: aggregateModels,
|
||||
[EModelEndpoint.agents]: aggregateModels,
|
||||
@@ -252,7 +270,7 @@ const maxTokensMap = {
|
||||
[EModelEndpoint.bedrock]: bedrockModels,
|
||||
};
|
||||
|
||||
const modelMaxOutputs = {
|
||||
export const modelMaxOutputs = {
|
||||
o1: 32268, // -500 from max: 32,768
|
||||
'o1-mini': 65136, // -500 from max: 65,536
|
||||
'o1-preview': 32268, // -500 from max: 32,768
|
||||
@@ -261,7 +279,7 @@ const modelMaxOutputs = {
|
||||
'gpt-5-nano': 128000,
|
||||
'gpt-oss-20b': 131000,
|
||||
'gpt-oss-120b': 131000,
|
||||
system_default: 1024,
|
||||
system_default: 32000,
|
||||
};
|
||||
|
||||
/** Outputs from https://docs.anthropic.com/en/docs/about-claude/models/all-models#model-names */
|
||||
@@ -277,7 +295,7 @@ const anthropicMaxOutputs = {
|
||||
'claude-3-7-sonnet': 128000,
|
||||
};
|
||||
|
||||
const maxOutputTokensMap = {
|
||||
export const maxOutputTokensMap = {
|
||||
[EModelEndpoint.anthropic]: anthropicMaxOutputs,
|
||||
[EModelEndpoint.azureOpenAI]: modelMaxOutputs,
|
||||
[EModelEndpoint.openAI]: modelMaxOutputs,
|
||||
@@ -287,10 +305,13 @@ const maxOutputTokensMap = {
|
||||
/**
|
||||
* Finds the first matching pattern in the tokens map.
|
||||
* @param {string} modelName
|
||||
* @param {Record<string, number>} tokensMap
|
||||
* @param {Record<string, number> | EndpointTokenConfig} tokensMap
|
||||
* @returns {string|null}
|
||||
*/
|
||||
function findMatchingPattern(modelName, tokensMap) {
|
||||
export function findMatchingPattern(
|
||||
modelName: string,
|
||||
tokensMap: Record<string, number> | EndpointTokenConfig,
|
||||
): string | null {
|
||||
const keys = Object.keys(tokensMap);
|
||||
for (let i = keys.length - 1; i >= 0; i--) {
|
||||
const modelKey = keys[i];
|
||||
@@ -305,57 +326,79 @@ function findMatchingPattern(modelName, tokensMap) {
|
||||
/**
|
||||
* Retrieves a token value for a given model name from a tokens map.
|
||||
*
|
||||
* @param {string} modelName - The name of the model to look up.
|
||||
* @param {EndpointTokenConfig | Record<string, number>} tokensMap - The map of model names to token values.
|
||||
* @param {string} [key='context'] - The key to look up in the tokens map.
|
||||
* @returns {number|undefined} The token value for the given model or undefined if no match is found.
|
||||
* @param modelName - The name of the model to look up.
|
||||
* @param tokensMap - The map of model names to token values.
|
||||
* @param [key='context'] - The key to look up in the tokens map.
|
||||
* @returns The token value for the given model or undefined if no match is found.
|
||||
*/
|
||||
function getModelTokenValue(modelName, tokensMap, key = 'context') {
|
||||
export function getModelTokenValue(
|
||||
modelName: string,
|
||||
tokensMap?: EndpointTokenConfig | Record<string, number>,
|
||||
key = 'context' as keyof TokenConfig,
|
||||
): number | undefined {
|
||||
if (typeof modelName !== 'string' || !tokensMap) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
if (tokensMap[modelName]?.context) {
|
||||
return tokensMap[modelName].context;
|
||||
const value = tokensMap[modelName];
|
||||
if (typeof value === 'number') {
|
||||
return value;
|
||||
}
|
||||
|
||||
if (tokensMap[modelName]) {
|
||||
return tokensMap[modelName];
|
||||
if (value?.context) {
|
||||
return value.context;
|
||||
}
|
||||
|
||||
const matchedPattern = findMatchingPattern(modelName, tokensMap);
|
||||
|
||||
if (matchedPattern) {
|
||||
const result = tokensMap[matchedPattern];
|
||||
return result?.[key] ?? result ?? tokensMap.system_default;
|
||||
if (typeof result === 'number') {
|
||||
return result;
|
||||
}
|
||||
|
||||
const tokenValue = result?.[key];
|
||||
if (typeof tokenValue === 'number') {
|
||||
return tokenValue;
|
||||
}
|
||||
return tokensMap.system_default as number | undefined;
|
||||
}
|
||||
|
||||
return tokensMap.system_default;
|
||||
return tokensMap.system_default as number | undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the maximum tokens for a given model name.
|
||||
*
|
||||
* @param {string} modelName - The name of the model to look up.
|
||||
* @param {string} endpoint - The endpoint (default is 'openAI').
|
||||
* @param {EndpointTokenConfig} [endpointTokenConfig] - Token Config for current endpoint to use for max tokens lookup
|
||||
* @returns {number|undefined} The maximum tokens for the given model or undefined if no match is found.
|
||||
* @param modelName - The name of the model to look up.
|
||||
* @param endpoint - The endpoint (default is 'openAI').
|
||||
* @param [endpointTokenConfig] - Token Config for current endpoint to use for max tokens lookup
|
||||
* @returns The maximum tokens for the given model or undefined if no match is found.
|
||||
*/
|
||||
function getModelMaxTokens(modelName, endpoint = EModelEndpoint.openAI, endpointTokenConfig) {
|
||||
const tokensMap = endpointTokenConfig ?? maxTokensMap[endpoint];
|
||||
export function getModelMaxTokens(
|
||||
modelName: string,
|
||||
endpoint = EModelEndpoint.openAI,
|
||||
endpointTokenConfig?: EndpointTokenConfig,
|
||||
): number | undefined {
|
||||
const tokensMap = endpointTokenConfig ?? maxTokensMap[endpoint as keyof typeof maxTokensMap];
|
||||
return getModelTokenValue(modelName, tokensMap);
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the maximum output tokens for a given model name.
|
||||
*
|
||||
* @param {string} modelName - The name of the model to look up.
|
||||
* @param {string} endpoint - The endpoint (default is 'openAI').
|
||||
* @param {EndpointTokenConfig} [endpointTokenConfig] - Token Config for current endpoint to use for max tokens lookup
|
||||
* @returns {number|undefined} The maximum output tokens for the given model or undefined if no match is found.
|
||||
* @param modelName - The name of the model to look up.
|
||||
* @param endpoint - The endpoint (default is 'openAI').
|
||||
* @param [endpointTokenConfig] - Token Config for current endpoint to use for max tokens lookup
|
||||
* @returns The maximum output tokens for the given model or undefined if no match is found.
|
||||
*/
|
||||
function getModelMaxOutputTokens(modelName, endpoint = EModelEndpoint.openAI, endpointTokenConfig) {
|
||||
const tokensMap = endpointTokenConfig ?? maxOutputTokensMap[endpoint];
|
||||
export function getModelMaxOutputTokens(
|
||||
modelName: string,
|
||||
endpoint = EModelEndpoint.openAI,
|
||||
endpointTokenConfig?: EndpointTokenConfig,
|
||||
): number | undefined {
|
||||
const tokensMap =
|
||||
endpointTokenConfig ?? maxOutputTokensMap[endpoint as keyof typeof maxOutputTokensMap];
|
||||
return getModelTokenValue(modelName, tokensMap, 'output');
|
||||
}
|
||||
|
||||
@@ -363,21 +406,24 @@ function getModelMaxOutputTokens(modelName, endpoint = EModelEndpoint.openAI, en
|
||||
* Retrieves the model name key for a given model name input. If the exact model name isn't found,
|
||||
* it searches for partial matches within the model name, checking keys in reverse order.
|
||||
*
|
||||
* @param {string} modelName - The name of the model to look up.
|
||||
* @param {string} endpoint - The endpoint (default is 'openAI').
|
||||
* @returns {string|undefined} The model name key for the given model; returns input if no match is found and is string.
|
||||
* @param modelName - The name of the model to look up.
|
||||
* @param endpoint - The endpoint (default is 'openAI').
|
||||
* @returns The model name key for the given model; returns input if no match is found and is string.
|
||||
*
|
||||
* @example
|
||||
* matchModelName('gpt-4-32k-0613'); // Returns 'gpt-4-32k-0613'
|
||||
* matchModelName('gpt-4-32k-unknown'); // Returns 'gpt-4-32k'
|
||||
* matchModelName('unknown-model'); // Returns undefined
|
||||
*/
|
||||
function matchModelName(modelName, endpoint = EModelEndpoint.openAI) {
|
||||
export function matchModelName(
|
||||
modelName: string,
|
||||
endpoint = EModelEndpoint.openAI,
|
||||
): string | undefined {
|
||||
if (typeof modelName !== 'string') {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const tokensMap = maxTokensMap[endpoint];
|
||||
const tokensMap: Record<string, number> = maxTokensMap[endpoint as keyof typeof maxTokensMap];
|
||||
if (!tokensMap) {
|
||||
return modelName;
|
||||
}
|
||||
@@ -390,7 +436,7 @@ function matchModelName(modelName, endpoint = EModelEndpoint.openAI) {
|
||||
return matchedPattern || modelName;
|
||||
}
|
||||
|
||||
const modelSchema = z.object({
|
||||
export const modelSchema = z.object({
|
||||
id: z.string(),
|
||||
pricing: z.object({
|
||||
prompt: z.string(),
|
||||
@@ -399,7 +445,7 @@ const modelSchema = z.object({
|
||||
context_length: z.number(),
|
||||
});
|
||||
|
||||
const inputSchema = z.object({
|
||||
export const inputSchema = z.object({
|
||||
data: z.array(modelSchema),
|
||||
});
|
||||
|
||||
@@ -408,7 +454,7 @@ const inputSchema = z.object({
|
||||
* @param {{ data: Array<z.infer<typeof modelSchema>> }} input The input object containing base URL and data fetched from the API.
|
||||
* @returns {EndpointTokenConfig} The processed model data.
|
||||
*/
|
||||
function processModelData(input) {
|
||||
export function processModelData(input: z.infer<typeof inputSchema>): EndpointTokenConfig {
|
||||
const validationResult = inputSchema.safeParse(input);
|
||||
if (!validationResult.success) {
|
||||
throw new Error('Invalid input data');
|
||||
@@ -416,7 +462,7 @@ function processModelData(input) {
|
||||
const { data } = validationResult.data;
|
||||
|
||||
/** @type {EndpointTokenConfig} */
|
||||
const tokenConfig = {};
|
||||
const tokenConfig: EndpointTokenConfig = {};
|
||||
|
||||
for (const model of data) {
|
||||
const modelKey = model.id;
|
||||
@@ -439,7 +485,7 @@ function processModelData(input) {
|
||||
return tokenConfig;
|
||||
}
|
||||
|
||||
const tiktokenModels = new Set([
|
||||
export const tiktokenModels = new Set([
|
||||
'text-davinci-003',
|
||||
'text-davinci-002',
|
||||
'text-davinci-001',
|
||||
@@ -477,17 +523,3 @@ const tiktokenModels = new Set([
|
||||
'gpt-3.5-turbo',
|
||||
'gpt-3.5-turbo-0301',
|
||||
]);
|
||||
|
||||
module.exports = {
|
||||
inputSchema,
|
||||
modelSchema,
|
||||
maxTokensMap,
|
||||
tiktokenModels,
|
||||
maxOutputTokensMap,
|
||||
matchModelName,
|
||||
processModelData,
|
||||
getModelMaxTokens,
|
||||
getModelTokenValue,
|
||||
findMatchingPattern,
|
||||
getModelMaxOutputTokens,
|
||||
};
|
||||
@@ -619,14 +619,14 @@ export const tConversationSchema = z.object({
|
||||
userLabel: z.string().optional(),
|
||||
model: z.string().nullable().optional(),
|
||||
promptPrefix: z.string().nullable().optional(),
|
||||
temperature: z.number().optional(),
|
||||
temperature: z.number().nullable().optional(),
|
||||
topP: z.number().optional(),
|
||||
topK: z.number().optional(),
|
||||
top_p: z.number().optional(),
|
||||
frequency_penalty: z.number().optional(),
|
||||
presence_penalty: z.number().optional(),
|
||||
parentMessageId: z.string().optional(),
|
||||
maxOutputTokens: coerceNumber.optional(),
|
||||
maxOutputTokens: coerceNumber.nullable().optional(),
|
||||
maxContextTokens: coerceNumber.optional(),
|
||||
max_tokens: coerceNumber.optional(),
|
||||
/* Anthropic */
|
||||
@@ -634,6 +634,7 @@ export const tConversationSchema = z.object({
|
||||
system: z.string().optional(),
|
||||
thinking: z.boolean().optional(),
|
||||
thinkingBudget: coerceNumber.optional(),
|
||||
stream: z.boolean().optional(),
|
||||
/* artifacts */
|
||||
artifacts: z.string().optional(),
|
||||
/* google */
|
||||
@@ -1152,6 +1153,8 @@ export const anthropicBaseSchema = tConversationSchema.pick({
|
||||
maxContextTokens: true,
|
||||
web_search: true,
|
||||
fileTokenLimit: true,
|
||||
stop: true,
|
||||
stream: true,
|
||||
});
|
||||
|
||||
export const anthropicSchema = anthropicBaseSchema
|
||||
|
||||
Reference in New Issue
Block a user