Merge branch 'main' into feat/Custom-Token-Rates-for-Endpoints

2025-05-14 21:20:25 +02:00 · 2025-03-10 15:24:21 +01:00 · 2025-02-27 11:03:23 +01:00 · 2025-02-27 11:00:39 +01:00 · 2025-02-27 10:57:42 +01:00 · 2025-02-26 17:23:21 +01:00
5 changed files with 168 additions and 9 deletions
--- a/api/models/tx.js
+++ b/api/models/tx.js
@@ -1,6 +1,49 @@
 const { matchModelName } = require('../utils');
 const defaultRate = 6;
 const customTokenOverrides = {};
 const customCacheOverrides = {};
 /**
 * Allows overriding the default token multipliers.
 *
 * @param {Object} overrides - An object mapping model keys to their custom token multipliers.
 * @param {Object} overrides.<model> - An object containing custom multipliers for the model.
 * @param {number} overrides.<model>.prompt - The custom prompt multiplier for the model.
 * @param {number} overrides.<model>.completion - The custom completion multiplier for the model.
 *
 * @example
 * // Override the multipliers for "gpt-4o-mini" and "gpt-3.5":
 * setCustomTokenOverrides({
 *   "gpt-4o-mini": { prompt: 0.2, completion: 0.5 },
 *   "gpt-3.5": { prompt: 1.0, completion: 2.0 }
 * });
 */
 const setCustomTokenOverrides = (overrides) => {
  Object.assign(customTokenOverrides, overrides);
 };
 /**
 * Allows overriding the default cache multipliers.
 * The override values should be nested under a key named "Cache".
 *
 * @param {Object} overrides - An object mapping model keys to their custom cache multipliers.
 * @param {Object} overrides.<model> - An object that must include a "Cache" property.
 * @param {Object} overrides.<model>.Cache - An object containing custom cache multipliers for the model.
 * @param {number} overrides.<model>.Cache.write - The custom cache write multiplier for the model.
 * @param {number} overrides.<model>.Cache.read - The custom cache read multiplier for the model.
 *
 * @example
 * // Override the cache multipliers for "gpt-4o-mini" and "gpt-3.5":
 * setCustomCacheOverrides({
 *   "gpt-4o-mini": { cache: { write: 0.2, read: 0.5 } },
 *   "gpt-3.5": { cache: { write: 1.0, read: 1.5 } }
 * });
 */
 const setCustomCacheOverrides = (overrides) => {
  Object.assign(customCacheOverrides, overrides);
 };
 /**
 * AWS Bedrock pricing
 * source: https://aws.amazon.com/bedrock/pricing/
@@ -283,20 +326,23 @@ const getCacheMultiplier = ({ valueKey, cacheType, model, endpoint, endpointToke
    return endpointTokenConfig?.[model]?.[cacheType] ?? null;
  }
-  if (valueKey && cacheType) {
+  if (!valueKey && model) {
-    return cacheTokenValues[valueKey]?.[cacheType] ?? null;
+    valueKey = getValueKey(model, endpoint);
  }
  if (!cacheType || !model) {
    return null;
  }
  valueKey = getValueKey(model, endpoint);
  if (!valueKey) {
    return null;
  }
-  // If we got this far, and values[cacheType] is undefined somehow, return a rough average of default multipliers
+  // Check for custom cache overrides under the "cache" property.
  if (
    customCacheOverrides[valueKey] &&
    customCacheOverrides[valueKey].cache &&
    customCacheOverrides[valueKey].cache[cacheType] != null
  ) {
    return customCacheOverrides[valueKey].cache[cacheType];
  }
  // Fallback to the default cacheTokenValues.
  return cacheTokenValues[valueKey]?.[cacheType] ?? null;
 };
@@ -307,4 +353,6 @@ module.exports = {
  getCacheMultiplier,
  defaultRate,
  cacheTokenValues,
  setCustomTokenOverrides,
  setCustomCacheOverrides,
 };
--- a/api/server/services/AppService.js
+++ b/api/server/services/AppService.js
@@ -21,6 +21,7 @@ const { initializeRoles } = require('~/models/Role');
 const { isEnabled } = require('~/server/utils');
 const { getMCPManager } = require('~/config');
 const paths = require('~/config/paths');
 const { loadTokenRatesConfig } = require('./Config/loadTokenRatesConfig');
 /**
 *
@@ -33,6 +34,7 @@ const AppService = async (app) => {
  /** @type {TCustomConfig} */
  const config = (await loadCustomConfig()) ?? {};
  const configDefaults = getConfigDefaults();
  loadTokenRatesConfig(config, configDefaults);
  const ocr = loadOCRConfig(config.ocr);
  const filteredTools = config.filteredTools;
--- a/api/server/services/Config/loadTokenRatesConfig.js
+++ b/api/server/services/Config/loadTokenRatesConfig.js
@@ -0,0 +1,71 @@
 const { removeNullishValues } = require('librechat-data-provider');
 const { logger } = require('~/config');
 const { setCustomTokenOverrides, setCustomCacheOverrides } = require('~/models/tx');
 /**
 * Loads token rates from the user's configuration, merging with default token rates if available.
 *
 * @param {TCustomConfig | undefined} config - The loaded custom configuration.
 * @param {TConfigDefaults} [configDefaults] - Optional default configuration values.
 * @returns {TCustomConfig['tokenRates']} - The final token rates configuration.
 */
 function loadTokenRatesConfig(config, configDefaults) {
  const userTokenRates = removeNullishValues(config?.tokenRates ?? {});
  if (!configDefaults?.tokenRates) {
    logger.info(`User tokenRates configuration:\n${JSON.stringify(userTokenRates, null, 2)}`);
    // Apply custom token rates even if there are no defaults
    applyCustomTokenRates(userTokenRates);
    return userTokenRates;
  }
  /** @type {TCustomConfig['tokenRates']} */
  const defaultTokenRates = removeNullishValues(configDefaults.tokenRates);
  const merged = { ...defaultTokenRates, ...userTokenRates };
  // Apply custom token rates configuration
  applyCustomTokenRates(merged);
  logger.info(`Merged tokenRates configuration:\n${JSON.stringify(merged, null, 2)}`);
  return merged;
 }
 /**
 * Processes the token rates configuration to set up custom overrides for each model.
 *
 * The configuration is expected to be specified per model:
 *
 * For each model in the tokenRates configuration, this function will call the tx.js
 * override functions to apply the custom token and cache multipliers.
 *
 * @param {TModelTokenRates} tokenRates - The token rates configuration mapping models to token costs.
 */
 function applyCustomTokenRates(tokenRates) {
  // Iterate over each model in the tokenRates configuration.
  Object.keys(tokenRates).forEach((model) => {
    const rate = tokenRates[model];
    // If token multipliers are provided, set custom token overrides.
    if (rate.prompt != null || rate.completion != null) {
      setCustomTokenOverrides({
        [model]: {
          prompt: rate.prompt,
          completion: rate.completion,
        },
      });
    }
    // Check for cache overrides.
    const cacheOverrides = rate.cache;
    if (cacheOverrides && (cacheOverrides.write != null || cacheOverrides.read != null)) {
      setCustomCacheOverrides({
        [model]: {
          cache: {
            write: cacheOverrides.write,
            read: cacheOverrides.read,
          },
        },
      });
    }
  });
 }
 module.exports = { loadTokenRatesConfig };
--- a/librechat.example.yaml
+++ b/librechat.example.yaml
@@ -71,6 +71,17 @@ interface:
  multiConvo: true
  agents: true
 # Example Custom Token Rates (optional)
 #tokenRates:
 #  gpt-4o-mini:
 #    prompt: 200.0
 #    completion: 400.0
 #  claude-3.7-sonnet:
 #    Cache:
 #      read: 200.0
 #      write: 400.0
 # Example Registration Object Structure (optional)
 registration:
  socialLogins: ['github', 'google', 'discord', 'openid', 'facebook', 'apple']
--- a/packages/data-provider/src/config.ts
+++ b/packages/data-provider/src/config.ts
@@ -536,6 +536,7 @@ export type TStartupConfig = {
  helpAndFaqURL: string;
  customFooter?: string;
  modelSpecs?: TSpecsConfig;
  tokenRates?: TModelTokenRates;
  sharedLinksEnabled: boolean;
  publicSharedLinksEnabled: boolean;
  analyticsGtmId?: string;
@@ -544,6 +545,31 @@ export type TStartupConfig = {
  staticBundlerURL?: string;
 };
 // Token cost schema type
 export type TTokenCost = {
  prompt?: number;
  completion?: number;
  cache?: {
    write?: number;
    read?: number;
  };
 };
 // Endpoint token rates schema type
 export type TModelTokenRates = Record<string, TTokenCost>;
 const tokenCostSchema = z.object({
  prompt: z.number().optional(),     // e.g. 1.5 => $1.50 / 1M tokens
  completion: z.number().optional(), // e.g. 2.0 => $2.00 / 1M tokens
  cache: z
    .object({
      write: z.number().optional(),
      read: z.number().optional(),
    })
    .optional(),
 });
 export enum OCRStrategy {
  MISTRAL_OCR = 'mistral_ocr',
  CUSTOM_OCR = 'custom_ocr',
@@ -601,6 +627,7 @@ export const configSchema = z.object({
  rateLimits: rateLimitSchema.optional(),
  fileConfig: fileConfigSchema.optional(),
  modelSpecs: specsConfigSchema.optional(),
  tokenRates: tokenCostSchema.optional(),
  endpoints: z
    .object({
      all: baseEndpointSchema.optional(),
Author	SHA1	Message	Date
Ruben Talstra	9486599268	Merge branch 'main' into feat/Custom-Token-Rates-for-Endpoints	2025-05-14 21:20:25 +02:00
Ruben Talstra	f439f1a80a	Merge branch 'main' into feat/Custom-Token-Rates-for-Endpoints	2025-03-10 15:24:21 +01:00
Ruben Talstra	59a232812d	Merge branch 'main' into feat/Custom-Token-Rates-for-Endpoints	2025-02-27 11:03:23 +01:00
Ruben Talstra	edf23eb2ae	✨ feat: Add example custom token rates configuration to `librechat.example.yaml`	2025-02-27 11:00:39 +01:00
Ruben Talstra	262e6aa4c9	✨ feat: Refactor Token Rates Configuration and Introduce Custom Overrides	2025-02-27 10:57:42 +01:00
Ruben Talstra	7dfb386f5a	✨ feat: Implement Token Rates Configuration Loader and Update Config Types	2025-02-26 17:23:21 +01:00