Compare commits
6 Commits
main
...
feat/Custo
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9486599268 | ||
|
|
f439f1a80a | ||
|
|
59a232812d | ||
|
|
edf23eb2ae | ||
|
|
262e6aa4c9 | ||
|
|
7dfb386f5a |
@@ -1,6 +1,49 @@
|
||||
const { matchModelName } = require('../utils');
|
||||
const defaultRate = 6;
|
||||
|
||||
const customTokenOverrides = {};
|
||||
const customCacheOverrides = {};
|
||||
|
||||
/**
|
||||
* Allows overriding the default token multipliers.
|
||||
*
|
||||
* @param {Object} overrides - An object mapping model keys to their custom token multipliers.
|
||||
* @param {Object} overrides.<model> - An object containing custom multipliers for the model.
|
||||
* @param {number} overrides.<model>.prompt - The custom prompt multiplier for the model.
|
||||
* @param {number} overrides.<model>.completion - The custom completion multiplier for the model.
|
||||
*
|
||||
* @example
|
||||
* // Override the multipliers for "gpt-4o-mini" and "gpt-3.5":
|
||||
* setCustomTokenOverrides({
|
||||
* "gpt-4o-mini": { prompt: 0.2, completion: 0.5 },
|
||||
* "gpt-3.5": { prompt: 1.0, completion: 2.0 }
|
||||
* });
|
||||
*/
|
||||
const setCustomTokenOverrides = (overrides) => {
|
||||
Object.assign(customTokenOverrides, overrides);
|
||||
};
|
||||
|
||||
/**
|
||||
* Allows overriding the default cache multipliers.
|
||||
* The override values should be nested under a key named "Cache".
|
||||
*
|
||||
* @param {Object} overrides - An object mapping model keys to their custom cache multipliers.
|
||||
* @param {Object} overrides.<model> - An object that must include a "Cache" property.
|
||||
* @param {Object} overrides.<model>.Cache - An object containing custom cache multipliers for the model.
|
||||
* @param {number} overrides.<model>.Cache.write - The custom cache write multiplier for the model.
|
||||
* @param {number} overrides.<model>.Cache.read - The custom cache read multiplier for the model.
|
||||
*
|
||||
* @example
|
||||
* // Override the cache multipliers for "gpt-4o-mini" and "gpt-3.5":
|
||||
* setCustomCacheOverrides({
|
||||
* "gpt-4o-mini": { cache: { write: 0.2, read: 0.5 } },
|
||||
* "gpt-3.5": { cache: { write: 1.0, read: 1.5 } }
|
||||
* });
|
||||
*/
|
||||
const setCustomCacheOverrides = (overrides) => {
|
||||
Object.assign(customCacheOverrides, overrides);
|
||||
};
|
||||
|
||||
/**
|
||||
* AWS Bedrock pricing
|
||||
* source: https://aws.amazon.com/bedrock/pricing/
|
||||
@@ -283,20 +326,23 @@ const getCacheMultiplier = ({ valueKey, cacheType, model, endpoint, endpointToke
|
||||
return endpointTokenConfig?.[model]?.[cacheType] ?? null;
|
||||
}
|
||||
|
||||
if (valueKey && cacheType) {
|
||||
return cacheTokenValues[valueKey]?.[cacheType] ?? null;
|
||||
if (!valueKey && model) {
|
||||
valueKey = getValueKey(model, endpoint);
|
||||
}
|
||||
|
||||
if (!cacheType || !model) {
|
||||
return null;
|
||||
}
|
||||
|
||||
valueKey = getValueKey(model, endpoint);
|
||||
if (!valueKey) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// If we got this far, and values[cacheType] is undefined somehow, return a rough average of default multipliers
|
||||
// Check for custom cache overrides under the "cache" property.
|
||||
if (
|
||||
customCacheOverrides[valueKey] &&
|
||||
customCacheOverrides[valueKey].cache &&
|
||||
customCacheOverrides[valueKey].cache[cacheType] != null
|
||||
) {
|
||||
return customCacheOverrides[valueKey].cache[cacheType];
|
||||
}
|
||||
|
||||
// Fallback to the default cacheTokenValues.
|
||||
return cacheTokenValues[valueKey]?.[cacheType] ?? null;
|
||||
};
|
||||
|
||||
@@ -307,4 +353,6 @@ module.exports = {
|
||||
getCacheMultiplier,
|
||||
defaultRate,
|
||||
cacheTokenValues,
|
||||
setCustomTokenOverrides,
|
||||
setCustomCacheOverrides,
|
||||
};
|
||||
|
||||
@@ -21,6 +21,7 @@ const { initializeRoles } = require('~/models/Role');
|
||||
const { isEnabled } = require('~/server/utils');
|
||||
const { getMCPManager } = require('~/config');
|
||||
const paths = require('~/config/paths');
|
||||
const { loadTokenRatesConfig } = require('./Config/loadTokenRatesConfig');
|
||||
|
||||
/**
|
||||
*
|
||||
@@ -33,6 +34,7 @@ const AppService = async (app) => {
|
||||
/** @type {TCustomConfig} */
|
||||
const config = (await loadCustomConfig()) ?? {};
|
||||
const configDefaults = getConfigDefaults();
|
||||
loadTokenRatesConfig(config, configDefaults);
|
||||
|
||||
const ocr = loadOCRConfig(config.ocr);
|
||||
const filteredTools = config.filteredTools;
|
||||
|
||||
71
api/server/services/Config/loadTokenRatesConfig.js
Normal file
71
api/server/services/Config/loadTokenRatesConfig.js
Normal file
@@ -0,0 +1,71 @@
|
||||
const { removeNullishValues } = require('librechat-data-provider');
|
||||
const { logger } = require('~/config');
|
||||
const { setCustomTokenOverrides, setCustomCacheOverrides } = require('~/models/tx');
|
||||
|
||||
/**
|
||||
* Loads token rates from the user's configuration, merging with default token rates if available.
|
||||
*
|
||||
* @param {TCustomConfig | undefined} config - The loaded custom configuration.
|
||||
* @param {TConfigDefaults} [configDefaults] - Optional default configuration values.
|
||||
* @returns {TCustomConfig['tokenRates']} - The final token rates configuration.
|
||||
*/
|
||||
function loadTokenRatesConfig(config, configDefaults) {
|
||||
const userTokenRates = removeNullishValues(config?.tokenRates ?? {});
|
||||
|
||||
if (!configDefaults?.tokenRates) {
|
||||
logger.info(`User tokenRates configuration:\n${JSON.stringify(userTokenRates, null, 2)}`);
|
||||
// Apply custom token rates even if there are no defaults
|
||||
applyCustomTokenRates(userTokenRates);
|
||||
return userTokenRates;
|
||||
}
|
||||
|
||||
/** @type {TCustomConfig['tokenRates']} */
|
||||
const defaultTokenRates = removeNullishValues(configDefaults.tokenRates);
|
||||
const merged = { ...defaultTokenRates, ...userTokenRates };
|
||||
|
||||
// Apply custom token rates configuration
|
||||
applyCustomTokenRates(merged);
|
||||
|
||||
logger.info(`Merged tokenRates configuration:\n${JSON.stringify(merged, null, 2)}`);
|
||||
return merged;
|
||||
}
|
||||
|
||||
/**
|
||||
* Processes the token rates configuration to set up custom overrides for each model.
|
||||
*
|
||||
* The configuration is expected to be specified per model:
|
||||
*
|
||||
* For each model in the tokenRates configuration, this function will call the tx.js
|
||||
* override functions to apply the custom token and cache multipliers.
|
||||
*
|
||||
* @param {TModelTokenRates} tokenRates - The token rates configuration mapping models to token costs.
|
||||
*/
|
||||
function applyCustomTokenRates(tokenRates) {
|
||||
// Iterate over each model in the tokenRates configuration.
|
||||
Object.keys(tokenRates).forEach((model) => {
|
||||
const rate = tokenRates[model];
|
||||
// If token multipliers are provided, set custom token overrides.
|
||||
if (rate.prompt != null || rate.completion != null) {
|
||||
setCustomTokenOverrides({
|
||||
[model]: {
|
||||
prompt: rate.prompt,
|
||||
completion: rate.completion,
|
||||
},
|
||||
});
|
||||
}
|
||||
// Check for cache overrides.
|
||||
const cacheOverrides = rate.cache;
|
||||
if (cacheOverrides && (cacheOverrides.write != null || cacheOverrides.read != null)) {
|
||||
setCustomCacheOverrides({
|
||||
[model]: {
|
||||
cache: {
|
||||
write: cacheOverrides.write,
|
||||
read: cacheOverrides.read,
|
||||
},
|
||||
},
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
module.exports = { loadTokenRatesConfig };
|
||||
@@ -71,6 +71,17 @@ interface:
|
||||
multiConvo: true
|
||||
agents: true
|
||||
|
||||
# Example Custom Token Rates (optional)
|
||||
#tokenRates:
|
||||
# gpt-4o-mini:
|
||||
# prompt: 200.0
|
||||
# completion: 400.0
|
||||
# claude-3.7-sonnet:
|
||||
# Cache:
|
||||
# read: 200.0
|
||||
# write: 400.0
|
||||
|
||||
|
||||
# Example Registration Object Structure (optional)
|
||||
registration:
|
||||
socialLogins: ['github', 'google', 'discord', 'openid', 'facebook', 'apple']
|
||||
|
||||
@@ -536,6 +536,7 @@ export type TStartupConfig = {
|
||||
helpAndFaqURL: string;
|
||||
customFooter?: string;
|
||||
modelSpecs?: TSpecsConfig;
|
||||
tokenRates?: TModelTokenRates;
|
||||
sharedLinksEnabled: boolean;
|
||||
publicSharedLinksEnabled: boolean;
|
||||
analyticsGtmId?: string;
|
||||
@@ -544,6 +545,31 @@ export type TStartupConfig = {
|
||||
staticBundlerURL?: string;
|
||||
};
|
||||
|
||||
|
||||
// Token cost schema type
|
||||
export type TTokenCost = {
|
||||
prompt?: number;
|
||||
completion?: number;
|
||||
cache?: {
|
||||
write?: number;
|
||||
read?: number;
|
||||
};
|
||||
};
|
||||
|
||||
// Endpoint token rates schema type
|
||||
export type TModelTokenRates = Record<string, TTokenCost>;
|
||||
|
||||
const tokenCostSchema = z.object({
|
||||
prompt: z.number().optional(), // e.g. 1.5 => $1.50 / 1M tokens
|
||||
completion: z.number().optional(), // e.g. 2.0 => $2.00 / 1M tokens
|
||||
cache: z
|
||||
.object({
|
||||
write: z.number().optional(),
|
||||
read: z.number().optional(),
|
||||
})
|
||||
.optional(),
|
||||
});
|
||||
|
||||
export enum OCRStrategy {
|
||||
MISTRAL_OCR = 'mistral_ocr',
|
||||
CUSTOM_OCR = 'custom_ocr',
|
||||
@@ -601,6 +627,7 @@ export const configSchema = z.object({
|
||||
rateLimits: rateLimitSchema.optional(),
|
||||
fileConfig: fileConfigSchema.optional(),
|
||||
modelSpecs: specsConfigSchema.optional(),
|
||||
tokenRates: tokenCostSchema.optional(),
|
||||
endpoints: z
|
||||
.object({
|
||||
all: baseEndpointSchema.optional(),
|
||||
|
||||
Reference in New Issue
Block a user