Compare commits
6 Commits
main
...
feat/Custo
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9486599268 | ||
|
|
f439f1a80a | ||
|
|
59a232812d | ||
|
|
edf23eb2ae | ||
|
|
262e6aa4c9 | ||
|
|
7dfb386f5a |
@@ -1,6 +1,49 @@
|
|||||||
const { matchModelName } = require('../utils');
|
const { matchModelName } = require('../utils');
|
||||||
const defaultRate = 6;
|
const defaultRate = 6;
|
||||||
|
|
||||||
|
const customTokenOverrides = {};
|
||||||
|
const customCacheOverrides = {};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Allows overriding the default token multipliers.
|
||||||
|
*
|
||||||
|
* @param {Object} overrides - An object mapping model keys to their custom token multipliers.
|
||||||
|
* @param {Object} overrides.<model> - An object containing custom multipliers for the model.
|
||||||
|
* @param {number} overrides.<model>.prompt - The custom prompt multiplier for the model.
|
||||||
|
* @param {number} overrides.<model>.completion - The custom completion multiplier for the model.
|
||||||
|
*
|
||||||
|
* @example
|
||||||
|
* // Override the multipliers for "gpt-4o-mini" and "gpt-3.5":
|
||||||
|
* setCustomTokenOverrides({
|
||||||
|
* "gpt-4o-mini": { prompt: 0.2, completion: 0.5 },
|
||||||
|
* "gpt-3.5": { prompt: 1.0, completion: 2.0 }
|
||||||
|
* });
|
||||||
|
*/
|
||||||
|
const setCustomTokenOverrides = (overrides) => {
|
||||||
|
Object.assign(customTokenOverrides, overrides);
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Allows overriding the default cache multipliers.
|
||||||
|
* The override values should be nested under a key named "Cache".
|
||||||
|
*
|
||||||
|
* @param {Object} overrides - An object mapping model keys to their custom cache multipliers.
|
||||||
|
* @param {Object} overrides.<model> - An object that must include a "Cache" property.
|
||||||
|
* @param {Object} overrides.<model>.Cache - An object containing custom cache multipliers for the model.
|
||||||
|
* @param {number} overrides.<model>.Cache.write - The custom cache write multiplier for the model.
|
||||||
|
* @param {number} overrides.<model>.Cache.read - The custom cache read multiplier for the model.
|
||||||
|
*
|
||||||
|
* @example
|
||||||
|
* // Override the cache multipliers for "gpt-4o-mini" and "gpt-3.5":
|
||||||
|
* setCustomCacheOverrides({
|
||||||
|
* "gpt-4o-mini": { cache: { write: 0.2, read: 0.5 } },
|
||||||
|
* "gpt-3.5": { cache: { write: 1.0, read: 1.5 } }
|
||||||
|
* });
|
||||||
|
*/
|
||||||
|
const setCustomCacheOverrides = (overrides) => {
|
||||||
|
Object.assign(customCacheOverrides, overrides);
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* AWS Bedrock pricing
|
* AWS Bedrock pricing
|
||||||
* source: https://aws.amazon.com/bedrock/pricing/
|
* source: https://aws.amazon.com/bedrock/pricing/
|
||||||
@@ -283,20 +326,23 @@ const getCacheMultiplier = ({ valueKey, cacheType, model, endpoint, endpointToke
|
|||||||
return endpointTokenConfig?.[model]?.[cacheType] ?? null;
|
return endpointTokenConfig?.[model]?.[cacheType] ?? null;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (valueKey && cacheType) {
|
if (!valueKey && model) {
|
||||||
return cacheTokenValues[valueKey]?.[cacheType] ?? null;
|
valueKey = getValueKey(model, endpoint);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!cacheType || !model) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
valueKey = getValueKey(model, endpoint);
|
|
||||||
if (!valueKey) {
|
if (!valueKey) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we got this far, and values[cacheType] is undefined somehow, return a rough average of default multipliers
|
// Check for custom cache overrides under the "cache" property.
|
||||||
|
if (
|
||||||
|
customCacheOverrides[valueKey] &&
|
||||||
|
customCacheOverrides[valueKey].cache &&
|
||||||
|
customCacheOverrides[valueKey].cache[cacheType] != null
|
||||||
|
) {
|
||||||
|
return customCacheOverrides[valueKey].cache[cacheType];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback to the default cacheTokenValues.
|
||||||
return cacheTokenValues[valueKey]?.[cacheType] ?? null;
|
return cacheTokenValues[valueKey]?.[cacheType] ?? null;
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -307,4 +353,6 @@ module.exports = {
|
|||||||
getCacheMultiplier,
|
getCacheMultiplier,
|
||||||
defaultRate,
|
defaultRate,
|
||||||
cacheTokenValues,
|
cacheTokenValues,
|
||||||
|
setCustomTokenOverrides,
|
||||||
|
setCustomCacheOverrides,
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -21,6 +21,7 @@ const { initializeRoles } = require('~/models/Role');
|
|||||||
const { isEnabled } = require('~/server/utils');
|
const { isEnabled } = require('~/server/utils');
|
||||||
const { getMCPManager } = require('~/config');
|
const { getMCPManager } = require('~/config');
|
||||||
const paths = require('~/config/paths');
|
const paths = require('~/config/paths');
|
||||||
|
const { loadTokenRatesConfig } = require('./Config/loadTokenRatesConfig');
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
@@ -33,6 +34,7 @@ const AppService = async (app) => {
|
|||||||
/** @type {TCustomConfig} */
|
/** @type {TCustomConfig} */
|
||||||
const config = (await loadCustomConfig()) ?? {};
|
const config = (await loadCustomConfig()) ?? {};
|
||||||
const configDefaults = getConfigDefaults();
|
const configDefaults = getConfigDefaults();
|
||||||
|
loadTokenRatesConfig(config, configDefaults);
|
||||||
|
|
||||||
const ocr = loadOCRConfig(config.ocr);
|
const ocr = loadOCRConfig(config.ocr);
|
||||||
const filteredTools = config.filteredTools;
|
const filteredTools = config.filteredTools;
|
||||||
|
|||||||
71
api/server/services/Config/loadTokenRatesConfig.js
Normal file
71
api/server/services/Config/loadTokenRatesConfig.js
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
const { removeNullishValues } = require('librechat-data-provider');
|
||||||
|
const { logger } = require('~/config');
|
||||||
|
const { setCustomTokenOverrides, setCustomCacheOverrides } = require('~/models/tx');
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Loads token rates from the user's configuration, merging with default token rates if available.
|
||||||
|
*
|
||||||
|
* @param {TCustomConfig | undefined} config - The loaded custom configuration.
|
||||||
|
* @param {TConfigDefaults} [configDefaults] - Optional default configuration values.
|
||||||
|
* @returns {TCustomConfig['tokenRates']} - The final token rates configuration.
|
||||||
|
*/
|
||||||
|
function loadTokenRatesConfig(config, configDefaults) {
|
||||||
|
const userTokenRates = removeNullishValues(config?.tokenRates ?? {});
|
||||||
|
|
||||||
|
if (!configDefaults?.tokenRates) {
|
||||||
|
logger.info(`User tokenRates configuration:\n${JSON.stringify(userTokenRates, null, 2)}`);
|
||||||
|
// Apply custom token rates even if there are no defaults
|
||||||
|
applyCustomTokenRates(userTokenRates);
|
||||||
|
return userTokenRates;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @type {TCustomConfig['tokenRates']} */
|
||||||
|
const defaultTokenRates = removeNullishValues(configDefaults.tokenRates);
|
||||||
|
const merged = { ...defaultTokenRates, ...userTokenRates };
|
||||||
|
|
||||||
|
// Apply custom token rates configuration
|
||||||
|
applyCustomTokenRates(merged);
|
||||||
|
|
||||||
|
logger.info(`Merged tokenRates configuration:\n${JSON.stringify(merged, null, 2)}`);
|
||||||
|
return merged;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Processes the token rates configuration to set up custom overrides for each model.
|
||||||
|
*
|
||||||
|
* The configuration is expected to be specified per model:
|
||||||
|
*
|
||||||
|
* For each model in the tokenRates configuration, this function will call the tx.js
|
||||||
|
* override functions to apply the custom token and cache multipliers.
|
||||||
|
*
|
||||||
|
* @param {TModelTokenRates} tokenRates - The token rates configuration mapping models to token costs.
|
||||||
|
*/
|
||||||
|
function applyCustomTokenRates(tokenRates) {
|
||||||
|
// Iterate over each model in the tokenRates configuration.
|
||||||
|
Object.keys(tokenRates).forEach((model) => {
|
||||||
|
const rate = tokenRates[model];
|
||||||
|
// If token multipliers are provided, set custom token overrides.
|
||||||
|
if (rate.prompt != null || rate.completion != null) {
|
||||||
|
setCustomTokenOverrides({
|
||||||
|
[model]: {
|
||||||
|
prompt: rate.prompt,
|
||||||
|
completion: rate.completion,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
}
|
||||||
|
// Check for cache overrides.
|
||||||
|
const cacheOverrides = rate.cache;
|
||||||
|
if (cacheOverrides && (cacheOverrides.write != null || cacheOverrides.read != null)) {
|
||||||
|
setCustomCacheOverrides({
|
||||||
|
[model]: {
|
||||||
|
cache: {
|
||||||
|
write: cacheOverrides.write,
|
||||||
|
read: cacheOverrides.read,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = { loadTokenRatesConfig };
|
||||||
@@ -71,6 +71,17 @@ interface:
|
|||||||
multiConvo: true
|
multiConvo: true
|
||||||
agents: true
|
agents: true
|
||||||
|
|
||||||
|
# Example Custom Token Rates (optional)
|
||||||
|
#tokenRates:
|
||||||
|
# gpt-4o-mini:
|
||||||
|
# prompt: 200.0
|
||||||
|
# completion: 400.0
|
||||||
|
# claude-3.7-sonnet:
|
||||||
|
# Cache:
|
||||||
|
# read: 200.0
|
||||||
|
# write: 400.0
|
||||||
|
|
||||||
|
|
||||||
# Example Registration Object Structure (optional)
|
# Example Registration Object Structure (optional)
|
||||||
registration:
|
registration:
|
||||||
socialLogins: ['github', 'google', 'discord', 'openid', 'facebook', 'apple']
|
socialLogins: ['github', 'google', 'discord', 'openid', 'facebook', 'apple']
|
||||||
|
|||||||
@@ -536,6 +536,7 @@ export type TStartupConfig = {
|
|||||||
helpAndFaqURL: string;
|
helpAndFaqURL: string;
|
||||||
customFooter?: string;
|
customFooter?: string;
|
||||||
modelSpecs?: TSpecsConfig;
|
modelSpecs?: TSpecsConfig;
|
||||||
|
tokenRates?: TModelTokenRates;
|
||||||
sharedLinksEnabled: boolean;
|
sharedLinksEnabled: boolean;
|
||||||
publicSharedLinksEnabled: boolean;
|
publicSharedLinksEnabled: boolean;
|
||||||
analyticsGtmId?: string;
|
analyticsGtmId?: string;
|
||||||
@@ -544,6 +545,31 @@ export type TStartupConfig = {
|
|||||||
staticBundlerURL?: string;
|
staticBundlerURL?: string;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
// Token cost schema type
|
||||||
|
export type TTokenCost = {
|
||||||
|
prompt?: number;
|
||||||
|
completion?: number;
|
||||||
|
cache?: {
|
||||||
|
write?: number;
|
||||||
|
read?: number;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
// Endpoint token rates schema type
|
||||||
|
export type TModelTokenRates = Record<string, TTokenCost>;
|
||||||
|
|
||||||
|
const tokenCostSchema = z.object({
|
||||||
|
prompt: z.number().optional(), // e.g. 1.5 => $1.50 / 1M tokens
|
||||||
|
completion: z.number().optional(), // e.g. 2.0 => $2.00 / 1M tokens
|
||||||
|
cache: z
|
||||||
|
.object({
|
||||||
|
write: z.number().optional(),
|
||||||
|
read: z.number().optional(),
|
||||||
|
})
|
||||||
|
.optional(),
|
||||||
|
});
|
||||||
|
|
||||||
export enum OCRStrategy {
|
export enum OCRStrategy {
|
||||||
MISTRAL_OCR = 'mistral_ocr',
|
MISTRAL_OCR = 'mistral_ocr',
|
||||||
CUSTOM_OCR = 'custom_ocr',
|
CUSTOM_OCR = 'custom_ocr',
|
||||||
@@ -601,6 +627,7 @@ export const configSchema = z.object({
|
|||||||
rateLimits: rateLimitSchema.optional(),
|
rateLimits: rateLimitSchema.optional(),
|
||||||
fileConfig: fileConfigSchema.optional(),
|
fileConfig: fileConfigSchema.optional(),
|
||||||
modelSpecs: specsConfigSchema.optional(),
|
modelSpecs: specsConfigSchema.optional(),
|
||||||
|
tokenRates: tokenCostSchema.optional(),
|
||||||
endpoints: z
|
endpoints: z
|
||||||
.object({
|
.object({
|
||||||
all: baseEndpointSchema.optional(),
|
all: baseEndpointSchema.optional(),
|
||||||
|
|||||||
Reference in New Issue
Block a user