From c87422a1e063597268426869db0a992ad3deb05b Mon Sep 17 00:00:00 2001 From: Danny Avila Date: Wed, 25 Jun 2025 15:14:33 -0400 Subject: [PATCH] =?UTF-8?q?=F0=9F=A7=A0=20feat:=20Thinking=20Budget,=20Inc?= =?UTF-8?q?lude=20Thoughts,=20and=20Dynamic=20Thinking=20for=20Gemini=202.?= =?UTF-8?q?5=20(#8055)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: support thinking budget parameter for Gemini 2.5 series (#6949, #7542) https://ai.google.dev/gemini-api/docs/thinking#set-budget * refactor: update thinking budget minimum value to -1 for dynamic thinking - see: https://ai.google.dev/gemini-api/docs/thinking#set-budget * chore: bump @librechat/agents to v2.4.43 * refactor: rename LLMConfigOptions to OpenAIConfigOptions for clarity and consistency - Updated type definitions and references in initialize.ts, llm.ts, and openai.ts to reflect the new naming convention. - Ensured that the OpenAI configuration options are consistently used across the relevant files. * refactor: port Google LLM methods to TypeScript Package * chore: update @librechat/agents version to 2.4.43 in package-lock.json and package.json * refactor: update thinking budget description for clarity and adjust placeholder in parameter settings * refactor: enhance googleSettings default value for thinking budget to support dynamic adjustment * chore: update @librechat/agents to v2.4.44 for Vertex Dynamic Thinking workaround * refactor: rename google config function, update `createRun` types, use `reasoning` as `reasoningKey` for Google * refactor: simplify placeholder handling in DynamicInput component * refactor: enhance thinking budget description for clarity and allow automatic decision by setting to "-1" * refactor: update text styling in OptionHover component for improved readability * chore: update @librechat/agents dependency to v2.4.46 in package.json and package-lock.json * chore: update @librechat/api version to 1.2.5 in package.json and package-lock.json * refactor: enhance `clientOptions` handling by filtering `omitTitleOptions`, add `json` field for Google models --------- Co-authored-by: ciffelia <15273128+ciffelia@users.noreply.github.com> --- api/app/clients/GoogleClient.js | 14 ++- api/package.json | 2 +- api/server/controllers/agents/client.js | 21 +++++ .../services/Endpoints/google/initialize.js | 5 +- .../SidePanel/Parameters/DynamicInput.tsx | 10 +-- .../SidePanel/Parameters/OptionHover.tsx | 2 +- client/src/locales/en/translation.json | 3 + package-lock.json | 64 ++++---------- packages/api/package.json | 4 +- packages/api/src/agents/run.ts | 9 +- packages/api/src/endpoints/google/index.ts | 1 + .../api/src/endpoints/google/llm.ts | 87 +++++++++++-------- packages/api/src/endpoints/index.ts | 1 + .../api/src/endpoints/openai/initialize.ts | 8 +- packages/api/src/endpoints/openai/llm.ts | 2 +- packages/api/src/types/google.ts | 24 +++++ packages/api/src/types/index.ts | 1 + packages/api/src/types/openai.ts | 2 +- packages/api/src/types/run.ts | 5 +- .../data-provider/src/parameterSettings.ts | 35 ++++++++ packages/data-provider/src/schemas.ts | 20 +++++ 21 files changed, 212 insertions(+), 108 deletions(-) create mode 100644 packages/api/src/endpoints/google/index.ts rename api/server/services/Endpoints/google/llm.js => packages/api/src/endpoints/google/llm.ts (61%) create mode 100644 packages/api/src/types/google.ts diff --git a/api/app/clients/GoogleClient.js b/api/app/clients/GoogleClient.js index 817239d14..2ec23a0a0 100644 --- a/api/app/clients/GoogleClient.js +++ b/api/app/clients/GoogleClient.js @@ -1,7 +1,7 @@ const { google } = require('googleapis'); -const { Tokenizer } = require('@librechat/api'); const { concat } = require('@langchain/core/utils/stream'); const { ChatVertexAI } = require('@langchain/google-vertexai'); +const { Tokenizer, getSafetySettings } = require('@librechat/api'); const { ChatGoogleGenerativeAI } = require('@langchain/google-genai'); const { GoogleGenerativeAI: GenAI } = require('@google/generative-ai'); const { HumanMessage, SystemMessage } = require('@langchain/core/messages'); @@ -12,13 +12,13 @@ const { endpointSettings, parseTextParts, EModelEndpoint, + googleSettings, ContentTypes, VisionModes, ErrorTypes, Constants, AuthKeys, } = require('librechat-data-provider'); -const { getSafetySettings } = require('~/server/services/Endpoints/google/llm'); const { encodeAndFormat } = require('~/server/services/Files/images'); const { spendTokens } = require('~/models/spendTokens'); const { getModelMaxTokens } = require('~/utils'); @@ -166,6 +166,16 @@ class GoogleClient extends BaseClient { ); } + // Add thinking configuration + this.modelOptions.thinkingConfig = { + thinkingBudget: + (this.modelOptions.thinking ?? googleSettings.thinking.default) + ? this.modelOptions.thinkingBudget + : 0, + }; + delete this.modelOptions.thinking; + delete this.modelOptions.thinkingBudget; + this.sender = this.options.sender ?? getResponseSender({ diff --git a/api/package.json b/api/package.json index 7b0e06436..571db53aa 100644 --- a/api/package.json +++ b/api/package.json @@ -48,7 +48,7 @@ "@langchain/google-genai": "^0.2.13", "@langchain/google-vertexai": "^0.2.13", "@langchain/textsplitters": "^0.1.0", - "@librechat/agents": "^2.4.42", + "@librechat/agents": "^2.4.46", "@librechat/api": "*", "@librechat/data-schemas": "*", "@node-saml/passport-saml": "^5.0.0", diff --git a/api/server/controllers/agents/client.js b/api/server/controllers/agents/client.js index f9d4e16a9..f4395b4b3 100644 --- a/api/server/controllers/agents/client.js +++ b/api/server/controllers/agents/client.js @@ -44,6 +44,17 @@ const BaseClient = require('~/app/clients/BaseClient'); const { loadAgent } = require('~/models/Agent'); const { getMCPManager } = require('~/config'); +const omitTitleOptions = new Set([ + 'stream', + 'thinking', + 'streaming', + 'clientOptions', + 'thinkingConfig', + 'thinkingBudget', + 'includeThoughts', + 'maxOutputTokens', +]); + /** * @param {ServerRequest} req * @param {Agent} agent @@ -1038,6 +1049,16 @@ class AgentClient extends BaseClient { delete clientOptions.maxTokens; } + clientOptions = Object.assign( + Object.fromEntries( + Object.entries(clientOptions).filter(([key]) => !omitTitleOptions.has(key)), + ), + ); + + if (provider === Providers.GOOGLE) { + clientOptions.json = true; + } + try { const titleResult = await this.run.generateTitle({ provider, diff --git a/api/server/services/Endpoints/google/initialize.js b/api/server/services/Endpoints/google/initialize.js index fe58a1fa8..60e874d5b 100644 --- a/api/server/services/Endpoints/google/initialize.js +++ b/api/server/services/Endpoints/google/initialize.js @@ -1,7 +1,6 @@ +const { getGoogleConfig, isEnabled } = require('@librechat/api'); const { EModelEndpoint, AuthKeys } = require('librechat-data-provider'); const { getUserKey, checkUserKeyExpiry } = require('~/server/services/UserService'); -const { getLLMConfig } = require('~/server/services/Endpoints/google/llm'); -const { isEnabled } = require('~/server/utils'); const { GoogleClient } = require('~/app'); const initializeClient = async ({ req, res, endpointOption, overrideModel, optionsOnly }) => { @@ -65,7 +64,7 @@ const initializeClient = async ({ req, res, endpointOption, overrideModel, optio if (overrideModel) { clientOptions.modelOptions.model = overrideModel; } - return getLLMConfig(credentials, clientOptions); + return getGoogleConfig(credentials, clientOptions); } const client = new GoogleClient(credentials, clientOptions); diff --git a/client/src/components/SidePanel/Parameters/DynamicInput.tsx b/client/src/components/SidePanel/Parameters/DynamicInput.tsx index 71714d050..57e55d75c 100644 --- a/client/src/components/SidePanel/Parameters/DynamicInput.tsx +++ b/client/src/components/SidePanel/Parameters/DynamicInput.tsx @@ -46,6 +46,10 @@ function DynamicInput({ setInputValue(e, !isNaN(Number(e.target.value))); }; + const placeholderText = placeholderCode + ? localize(placeholder as TranslationKeys) || placeholder + : placeholder; + return (
-

{text}

+

{text}

diff --git a/client/src/locales/en/translation.json b/client/src/locales/en/translation.json index b875644a2..4c6ab1df4 100644 --- a/client/src/locales/en/translation.json +++ b/client/src/locales/en/translation.json @@ -207,6 +207,8 @@ "com_endpoint_google_temp": "Higher values = more random, while lower values = more focused and deterministic. We recommend altering this or Top P but not both.", "com_endpoint_google_topk": "Top-k changes how the model selects tokens for output. A top-k of 1 means the selected token is the most probable among all tokens in the model's vocabulary (also called greedy decoding), while a top-k of 3 means that the next token is selected from among the 3 most probable tokens (using temperature).", "com_endpoint_google_topp": "Top-p changes how the model selects tokens for output. Tokens are selected from most K (see topK parameter) probable to least until the sum of their probabilities equals the top-p value.", + "com_endpoint_google_thinking": "Enables or disables reasoning. This setting is only supported by certain models (2.5 series). For older models, this setting may have no effect.", + "com_endpoint_google_thinking_budget": "Guides the number of thinking tokens the model uses. The actual amount may exceed or fall below this value depending on the prompt.\n\nThis setting is only supported by certain models (2.5 series). Gemini 2.5 Pro supports 128-32,768 tokens. Gemini 2.5 Flash supports 0-24,576 tokens. Gemini 2.5 Flash Lite supports 512-24,576 tokens.\n\nLeave blank or set to \"-1\" to let the model automatically decide when and how much to think. By default, Gemini 2.5 Flash Lite does not think.", "com_endpoint_instructions_assistants": "Override Instructions", "com_endpoint_instructions_assistants_placeholder": "Overrides the instructions of the assistant. This is useful for modifying the behavior on a per-run basis.", "com_endpoint_max_output_tokens": "Max Output Tokens", @@ -582,6 +584,7 @@ "com_ui_auth_url": "Authorization URL", "com_ui_authentication": "Authentication", "com_ui_authentication_type": "Authentication Type", + "com_ui_auto": "Auto", "com_ui_available_tools": "Available Tools", "com_ui_avatar": "Avatar", "com_ui_azure": "Azure", diff --git a/package-lock.json b/package-lock.json index 39e52db33..989883c0e 100644 --- a/package-lock.json +++ b/package-lock.json @@ -64,7 +64,7 @@ "@langchain/google-genai": "^0.2.13", "@langchain/google-vertexai": "^0.2.13", "@langchain/textsplitters": "^0.1.0", - "@librechat/agents": "^2.4.42", + "@librechat/agents": "^2.4.46", "@librechat/api": "*", "@librechat/data-schemas": "*", "@node-saml/passport-saml": "^5.0.0", @@ -1351,33 +1351,6 @@ } } }, - "api/node_modules/@librechat/agents": { - "version": "2.4.42", - "resolved": "https://registry.npmjs.org/@librechat/agents/-/agents-2.4.42.tgz", - "integrity": "sha512-52ux2PeEAV79yr6/h6GN3omlpqX6H0FYl6qwjJ6gT04MMko/imnLd3bQrX0gm3i0KL5ygHbRjQeonONKjJayHw==", - "license": "MIT", - "dependencies": { - "@langchain/anthropic": "^0.3.23", - "@langchain/aws": "^0.1.11", - "@langchain/community": "^0.3.47", - "@langchain/core": "^0.3.60", - "@langchain/deepseek": "^0.0.2", - "@langchain/google-genai": "^0.2.13", - "@langchain/google-vertexai": "^0.2.13", - "@langchain/langgraph": "^0.3.4", - "@langchain/mistralai": "^0.2.1", - "@langchain/ollama": "^0.2.3", - "@langchain/openai": "^0.5.14", - "@langchain/xai": "^0.0.3", - "cheerio": "^1.0.0", - "dotenv": "^16.4.7", - "https-proxy-agent": "^7.0.6", - "nanoid": "^3.3.7" - }, - "engines": { - "node": ">=14.0.0" - } - }, "api/node_modules/@smithy/abort-controller": { "version": "4.0.2", "resolved": "https://registry.npmjs.org/@smithy/abort-controller/-/abort-controller-4.0.2.tgz", @@ -19153,9 +19126,9 @@ } }, "node_modules/@langchain/langgraph": { - "version": "0.3.4", - "resolved": "https://registry.npmjs.org/@langchain/langgraph/-/langgraph-0.3.4.tgz", - "integrity": "sha512-Vuja8Qtu3Zjx7k4fK7Cnw+p8gtvIRPciWp9btPhAs3aUo6aBgOJOZVcK5Ii3mHfEHK/aQmRElR0x/u/YwykOrg==", + "version": "0.3.5", + "resolved": "https://registry.npmjs.org/@langchain/langgraph/-/langgraph-0.3.5.tgz", + "integrity": "sha512-7astlgnp6BdMQJqmr+cbDgR10FYWNCaDLnbfEDHpqhKCCajU59m5snOdl4Vtu5UM6V2k3lgatNqWoflBtxhIyg==", "license": "MIT", "dependencies": { "@langchain/langgraph-checkpoint": "~0.0.18", @@ -19463,11 +19436,10 @@ } }, "node_modules/@librechat/agents": { - "version": "2.4.41", - "resolved": "https://registry.npmjs.org/@librechat/agents/-/agents-2.4.41.tgz", - "integrity": "sha512-kYmdk5WVRp0qZxTx6BuGCs4l0Ir9iBLLx4ZY4/1wxr80al5/vq3P8wbgGdKMeO2qTu4ZaT4RyWRQYWBg5HDkUQ==", + "version": "2.4.46", + "resolved": "https://registry.npmjs.org/@librechat/agents/-/agents-2.4.46.tgz", + "integrity": "sha512-zR27U19/WGF3HN64oBbiaFgjjWHaF7BjYzRFWzQKEkk+iEzCe59IpuEZUizQ54YcY02nhhh6S3MNUjhAJwMYVA==", "license": "MIT", - "peer": true, "dependencies": { "@langchain/anthropic": "^0.3.23", "@langchain/aws": "^0.1.11", @@ -19495,7 +19467,6 @@ "resolved": "https://registry.npmjs.org/@langchain/community/-/community-0.3.47.tgz", "integrity": "sha512-Vo42kAfkXpTFSevhEkeqqE55az8NyQgDktCbitXYuhipNbFYx08XVvqEDkFkB20MM/Z7u+cvLb+DxCqnKuH0CQ==", "license": "MIT", - "peer": true, "dependencies": { "@langchain/openai": ">=0.2.0 <0.6.0", "@langchain/weaviate": "^0.2.0", @@ -20017,11 +19988,10 @@ } }, "node_modules/@librechat/agents/node_modules/@langchain/openai": { - "version": "0.5.14", - "resolved": "https://registry.npmjs.org/@langchain/openai/-/openai-0.5.14.tgz", - "integrity": "sha512-0GEj5K/qi1MRuZ4nE7NvyI4jTG+RSewLZqsExUwRukWdeqmkPNHGrogTa5ZDt7eaJxAaY7EgLC5ZnvCM3L1oug==", + "version": "0.5.15", + "resolved": "https://registry.npmjs.org/@langchain/openai/-/openai-0.5.15.tgz", + "integrity": "sha512-ANadEHyAj5sufQpz+SOPpKbyoMcTLhnh8/d+afbSPUqWsIMPpEFX3HoSY3nrBPG6l4NQQNG5P5oHb4SdC8+YIg==", "license": "MIT", - "peer": true, "dependencies": { "js-tiktoken": "^1.0.12", "openai": "^5.3.0", @@ -20039,7 +20009,6 @@ "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.3.tgz", "integrity": "sha512-jRR5wdylq8CkOe6hei19GGZnxM6rBGwFl3Bg0YItGDimvjGtAvdZk4Pu6Cl4u4Igsws4a1fd1Vq3ezrhn4KmFw==", "license": "MIT", - "peer": true, "engines": { "node": ">= 14" } @@ -20049,7 +20018,6 @@ "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.6.tgz", "integrity": "sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==", "license": "MIT", - "peer": true, "dependencies": { "agent-base": "^7.1.2", "debug": "4" @@ -20059,11 +20027,10 @@ } }, "node_modules/@librechat/agents/node_modules/openai": { - "version": "5.5.1", - "resolved": "https://registry.npmjs.org/openai/-/openai-5.5.1.tgz", - "integrity": "sha512-5i19097mGotHA1eFsM6Tjd/tJ8uo9sa5Ysv4Q6bKJ2vtN6rc0MzMrUefXnLXYAJcmMQrC1Efhj0AvfIkXrQamw==", + "version": "5.7.0", + "resolved": "https://registry.npmjs.org/openai/-/openai-5.7.0.tgz", + "integrity": "sha512-zXWawZl6J/P5Wz57/nKzVT3kJQZvogfuyuNVCdEp4/XU2UNrjL7SsuNpWAyLZbo6HVymwmnfno9toVzBhelygA==", "license": "Apache-2.0", - "peer": true, "bin": { "openai": "bin/cli" }, @@ -20089,7 +20056,6 @@ "https://github.com/sponsors/ctavan" ], "license": "MIT", - "peer": true, "bin": { "uuid": "dist/bin/uuid" } @@ -46568,7 +46534,7 @@ }, "packages/api": { "name": "@librechat/api", - "version": "1.2.4", + "version": "1.2.5", "license": "ISC", "devDependencies": { "@babel/preset-env": "^7.21.5", @@ -46600,7 +46566,7 @@ "typescript": "^5.0.4" }, "peerDependencies": { - "@librechat/agents": "^2.4.41", + "@librechat/agents": "^2.4.46", "@librechat/data-schemas": "*", "@modelcontextprotocol/sdk": "^1.12.3", "axios": "^1.8.2", diff --git a/packages/api/package.json b/packages/api/package.json index aa4fc4377..4aaf0f793 100644 --- a/packages/api/package.json +++ b/packages/api/package.json @@ -1,6 +1,6 @@ { "name": "@librechat/api", - "version": "1.2.4", + "version": "1.2.5", "type": "commonjs", "description": "MCP services for LibreChat", "main": "dist/index.js", @@ -69,7 +69,7 @@ "registry": "https://registry.npmjs.org/" }, "peerDependencies": { - "@librechat/agents": "^2.4.41", + "@librechat/agents": "^2.4.46", "@librechat/data-schemas": "*", "@modelcontextprotocol/sdk": "^1.12.3", "axios": "^1.8.2", diff --git a/packages/api/src/agents/run.ts b/packages/api/src/agents/run.ts index 41ec02d9b..e12d2cf2b 100644 --- a/packages/api/src/agents/run.ts +++ b/packages/api/src/agents/run.ts @@ -46,7 +46,10 @@ export async function createRun({ customHandlers?: Record; }): Promise> { const provider = - providerEndpointMap[agent.provider as keyof typeof providerEndpointMap] ?? agent.provider; + (providerEndpointMap[ + agent.provider as keyof typeof providerEndpointMap + ] as unknown as Providers) ?? agent.provider; + const llmConfig: t.RunLLMConfig = Object.assign( { provider, @@ -66,7 +69,9 @@ export async function createRun({ } let reasoningKey: 'reasoning_content' | 'reasoning' | undefined; - if ( + if (provider === Providers.GOOGLE) { + reasoningKey = 'reasoning'; + } else if ( llmConfig.configuration?.baseURL?.includes(KnownEndpoints.openrouter) || (agent.endpoint && agent.endpoint.toLowerCase().includes(KnownEndpoints.openrouter)) ) { diff --git a/packages/api/src/endpoints/google/index.ts b/packages/api/src/endpoints/google/index.ts new file mode 100644 index 000000000..4045e8de0 --- /dev/null +++ b/packages/api/src/endpoints/google/index.ts @@ -0,0 +1 @@ +export * from './llm'; diff --git a/api/server/services/Endpoints/google/llm.js b/packages/api/src/endpoints/google/llm.ts similarity index 61% rename from api/server/services/Endpoints/google/llm.js rename to packages/api/src/endpoints/google/llm.ts index 235e1e3df..0721acce2 100644 --- a/api/server/services/Endpoints/google/llm.js +++ b/packages/api/src/endpoints/google/llm.ts @@ -1,13 +1,15 @@ -const { Providers } = require('@librechat/agents'); -const { AuthKeys } = require('librechat-data-provider'); -const { isEnabled } = require('~/server/utils'); +import { Providers } from '@librechat/agents'; +import { googleSettings, AuthKeys } from 'librechat-data-provider'; +import type { GoogleClientOptions, VertexAIClientOptions } from '@librechat/agents'; +import type * as t from '~/types'; +import { isEnabled } from '~/utils'; -function getThresholdMapping(model) { +function getThresholdMapping(model: string) { const gemini1Pattern = /gemini-(1\.0|1\.5|pro$|1\.0-pro|1\.5-pro|1\.5-flash-001)/; const restrictedPattern = /(gemini-(1\.5-flash-8b|2\.0|exp)|learnlm)/; if (gemini1Pattern.test(model)) { - return (value) => { + return (value: string) => { if (value === 'OFF') { return 'BLOCK_NONE'; } @@ -16,7 +18,7 @@ function getThresholdMapping(model) { } if (restrictedPattern.test(model)) { - return (value) => { + return (value: string) => { if (value === 'OFF' || value === 'HARM_BLOCK_THRESHOLD_UNSPECIFIED') { return 'BLOCK_NONE'; } @@ -24,19 +26,16 @@ function getThresholdMapping(model) { }; } - return (value) => value; + return (value: string) => value; } -/** - * - * @param {string} model - * @returns {Array<{category: string, threshold: string}> | undefined} - */ -function getSafetySettings(model) { +export function getSafetySettings( + model?: string, +): Array<{ category: string; threshold: string }> | undefined { if (isEnabled(process.env.GOOGLE_EXCLUDE_SAFETY_SETTINGS)) { return undefined; } - const mapThreshold = getThresholdMapping(model); + const mapThreshold = getThresholdMapping(model ?? ''); return [ { @@ -74,24 +73,27 @@ function getSafetySettings(model) { * Replicates core logic from GoogleClient's constructor and setOptions, plus client determination. * Returns an object with the provider label and the final options that would be passed to createLLM. * - * @param {string | object} credentials - Either a JSON string or an object containing Google keys - * @param {object} [options={}] - The same shape as the "GoogleClient" constructor options + * @param credentials - Either a JSON string or an object containing Google keys + * @param options - The same shape as the "GoogleClient" constructor options */ -function getLLMConfig(credentials, options = {}) { - // 1. Parse credentials - let creds = {}; +export function getGoogleConfig( + credentials: string | t.GoogleCredentials | undefined, + options: t.GoogleConfigOptions = {}, +) { + let creds: t.GoogleCredentials = {}; if (typeof credentials === 'string') { try { creds = JSON.parse(credentials); - } catch (err) { - throw new Error(`Error parsing string credentials: ${err.message}`); + } catch (err: unknown) { + throw new Error( + `Error parsing string credentials: ${err instanceof Error ? err.message : 'Unknown error'}`, + ); } } else if (credentials && typeof credentials === 'object') { creds = credentials; } - // Extract from credentials const serviceKeyRaw = creds[AuthKeys.GOOGLE_SERVICE_KEY] ?? {}; const serviceKey = typeof serviceKeyRaw === 'string' ? JSON.parse(serviceKeyRaw) : (serviceKeyRaw ?? {}); @@ -102,9 +104,15 @@ function getLLMConfig(credentials, options = {}) { const reverseProxyUrl = options.reverseProxyUrl; const authHeader = options.authHeader; - /** @type {GoogleClientOptions | VertexAIClientOptions} */ - let llmConfig = { - ...(options.modelOptions || {}), + const { + thinking = googleSettings.thinking.default, + thinkingBudget = googleSettings.thinkingBudget.default, + ...modelOptions + } = options.modelOptions || {}; + + const llmConfig: GoogleClientOptions | VertexAIClientOptions = { + ...(modelOptions || {}), + model: modelOptions?.model ?? '', maxRetries: 2, }; @@ -121,16 +129,30 @@ function getLLMConfig(credentials, options = {}) { // If we have a GCP project => Vertex AI if (project_id && provider === Providers.VERTEXAI) { - /** @type {VertexAIClientOptions['authOptions']} */ - llmConfig.authOptions = { + (llmConfig as VertexAIClientOptions).authOptions = { credentials: { ...serviceKey }, projectId: project_id, }; - llmConfig.location = process.env.GOOGLE_LOC || 'us-central1'; + (llmConfig as VertexAIClientOptions).location = process.env.GOOGLE_LOC || 'us-central1'; } else if (apiKey && provider === Providers.GOOGLE) { llmConfig.apiKey = apiKey; } + const shouldEnableThinking = + thinking && thinkingBudget != null && (thinkingBudget > 0 || thinkingBudget === -1); + + if (shouldEnableThinking && provider === Providers.GOOGLE) { + (llmConfig as GoogleClientOptions).thinkingConfig = { + thinkingBudget: thinking ? thinkingBudget : googleSettings.thinkingBudget.default, + includeThoughts: Boolean(thinking), + }; + } else if (shouldEnableThinking && provider === Providers.VERTEXAI) { + (llmConfig as VertexAIClientOptions).thinkingBudget = thinking + ? thinkingBudget + : googleSettings.thinkingBudget.default; + (llmConfig as VertexAIClientOptions).includeThoughts = Boolean(thinking); + } + /* let legacyOptions = {}; // Filter out any "examples" that are empty @@ -152,11 +174,11 @@ function getLLMConfig(credentials, options = {}) { */ if (reverseProxyUrl) { - llmConfig.baseUrl = reverseProxyUrl; + (llmConfig as GoogleClientOptions).baseUrl = reverseProxyUrl; } if (authHeader) { - llmConfig.customHeaders = { + (llmConfig as GoogleClientOptions).customHeaders = { Authorization: `Bearer ${apiKey}`, }; } @@ -169,8 +191,3 @@ function getLLMConfig(credentials, options = {}) { llmConfig, }; } - -module.exports = { - getLLMConfig, - getSafetySettings, -}; diff --git a/packages/api/src/endpoints/index.ts b/packages/api/src/endpoints/index.ts index e919f9e42..e12780d87 100644 --- a/packages/api/src/endpoints/index.ts +++ b/packages/api/src/endpoints/index.ts @@ -1 +1,2 @@ +export * from './google'; export * from './openai'; diff --git a/packages/api/src/endpoints/openai/initialize.ts b/packages/api/src/endpoints/openai/initialize.ts index 91e92db85..ad44ed469 100644 --- a/packages/api/src/endpoints/openai/initialize.ts +++ b/packages/api/src/endpoints/openai/initialize.ts @@ -1,9 +1,9 @@ import { ErrorTypes, EModelEndpoint, mapModelToAzureConfig } from 'librechat-data-provider'; import type { - LLMConfigOptions, UserKeyValues, - InitializeOpenAIOptionsParams, OpenAIOptionsResult, + OpenAIConfigOptions, + InitializeOpenAIOptionsParams, } from '~/types'; import { createHandleLLMNewToken } from '~/utils/generators'; import { getAzureCredentials } from '~/utils/azure'; @@ -64,7 +64,7 @@ export const initializeOpenAI = async ({ ? userValues?.baseURL : baseURLOptions[endpoint as keyof typeof baseURLOptions]; - const clientOptions: LLMConfigOptions = { + const clientOptions: OpenAIConfigOptions = { proxy: PROXY ?? undefined, reverseProxyUrl: baseURL || undefined, streaming: true, @@ -135,7 +135,7 @@ export const initializeOpenAI = async ({ user: req.user.id, }; - const finalClientOptions: LLMConfigOptions = { + const finalClientOptions: OpenAIConfigOptions = { ...clientOptions, modelOptions, }; diff --git a/packages/api/src/endpoints/openai/llm.ts b/packages/api/src/endpoints/openai/llm.ts index ec7c4b863..ddf61016e 100644 --- a/packages/api/src/endpoints/openai/llm.ts +++ b/packages/api/src/endpoints/openai/llm.ts @@ -13,7 +13,7 @@ import { isEnabled } from '~/utils/common'; */ export function getOpenAIConfig( apiKey: string, - options: t.LLMConfigOptions = {}, + options: t.OpenAIConfigOptions = {}, endpoint?: string | null, ): t.LLMConfigResult { const { diff --git a/packages/api/src/types/google.ts b/packages/api/src/types/google.ts new file mode 100644 index 000000000..1bc40f06e --- /dev/null +++ b/packages/api/src/types/google.ts @@ -0,0 +1,24 @@ +import { z } from 'zod'; +import { AuthKeys, googleBaseSchema } from 'librechat-data-provider'; + +export type GoogleParameters = z.infer; + +export type GoogleCredentials = { + [AuthKeys.GOOGLE_SERVICE_KEY]?: string; + [AuthKeys.GOOGLE_API_KEY]?: string; +}; + +/** + * Configuration options for the getLLMConfig function + */ +export interface GoogleConfigOptions { + modelOptions?: Partial; + reverseProxyUrl?: string; + defaultQuery?: Record; + headers?: Record; + proxy?: string; + streaming?: boolean; + authHeader?: boolean; + addParams?: Record; + dropParams?: string[]; +} diff --git a/packages/api/src/types/index.ts b/packages/api/src/types/index.ts index 41ea33eb4..6db727529 100644 --- a/packages/api/src/types/index.ts +++ b/packages/api/src/types/index.ts @@ -1,5 +1,6 @@ export * from './azure'; export * from './events'; +export * from './google'; export * from './mistral'; export * from './openai'; export * from './run'; diff --git a/packages/api/src/types/openai.ts b/packages/api/src/types/openai.ts index cb11be984..5f609a641 100644 --- a/packages/api/src/types/openai.ts +++ b/packages/api/src/types/openai.ts @@ -9,7 +9,7 @@ export type OpenAIParameters = z.infer; /** * Configuration options for the getLLMConfig function */ -export interface LLMConfigOptions { +export interface OpenAIConfigOptions { modelOptions?: Partial; reverseProxyUrl?: string; defaultQuery?: Record; diff --git a/packages/api/src/types/run.ts b/packages/api/src/types/run.ts index 080b3adba..72c61a587 100644 --- a/packages/api/src/types/run.ts +++ b/packages/api/src/types/run.ts @@ -1,8 +1,9 @@ -import type { AgentModelParameters, EModelEndpoint } from 'librechat-data-provider'; +import type { Providers } from '@librechat/agents'; +import type { AgentModelParameters } from 'librechat-data-provider'; import type { OpenAIConfiguration } from './openai'; export type RunLLMConfig = { - provider: EModelEndpoint; + provider: Providers; streaming: boolean; streamUsage: boolean; usage?: boolean; diff --git a/packages/data-provider/src/parameterSettings.ts b/packages/data-provider/src/parameterSettings.ts index af79eb2c1..8b1dd222a 100644 --- a/packages/data-provider/src/parameterSettings.ts +++ b/packages/data-provider/src/parameterSettings.ts @@ -450,6 +450,37 @@ const google: Record = { optionType: 'model', columnSpan: 2, }, + thinking: { + key: 'thinking', + label: 'com_endpoint_thinking', + labelCode: true, + description: 'com_endpoint_google_thinking', + descriptionCode: true, + type: 'boolean', + default: googleSettings.thinking.default, + component: 'switch', + optionType: 'conversation', + showDefault: false, + columnSpan: 2, + }, + thinkingBudget: { + key: 'thinkingBudget', + label: 'com_endpoint_thinking_budget', + labelCode: true, + description: 'com_endpoint_google_thinking_budget', + descriptionCode: true, + placeholder: 'com_ui_auto', + placeholderCode: true, + type: 'number', + component: 'input', + range: { + min: googleSettings.thinkingBudget.min, + max: googleSettings.thinkingBudget.max, + step: googleSettings.thinkingBudget.step, + }, + optionType: 'conversation', + columnSpan: 2, + }, }; const googleConfig: SettingsConfiguration = [ @@ -461,6 +492,8 @@ const googleConfig: SettingsConfiguration = [ google.topP, google.topK, librechat.resendFiles, + google.thinking, + google.thinkingBudget, ]; const googleCol1: SettingsConfiguration = [ @@ -476,6 +509,8 @@ const googleCol2: SettingsConfiguration = [ google.topP, google.topK, librechat.resendFiles, + google.thinking, + google.thinkingBudget, ]; const openAI: SettingsConfiguration = [ diff --git a/packages/data-provider/src/schemas.ts b/packages/data-provider/src/schemas.ts index 6125c65e5..463150d36 100644 --- a/packages/data-provider/src/schemas.ts +++ b/packages/data-provider/src/schemas.ts @@ -255,6 +255,18 @@ export const googleSettings = { step: 1 as const, default: 40 as const, }, + thinking: { + default: true as const, + }, + thinkingBudget: { + min: -1 as const, + max: 32768 as const, + step: 1 as const, + /** `-1` = Dynamic Thinking, meaning the model will adjust + * the budget based on the complexity of the request. + */ + default: -1 as const, + }, }; const ANTHROPIC_MAX_OUTPUT = 128000 as const; @@ -785,6 +797,8 @@ export const googleBaseSchema = tConversationSchema.pick({ artifacts: true, topP: true, topK: true, + thinking: true, + thinkingBudget: true, iconURL: true, greeting: true, spec: true, @@ -810,6 +824,12 @@ export const googleGenConfigSchema = z presencePenalty: coerceNumber.optional(), frequencyPenalty: coerceNumber.optional(), stopSequences: z.array(z.string()).optional(), + thinkingConfig: z + .object({ + includeThoughts: z.boolean().optional(), + thinkingBudget: coerceNumber.optional(), + }) + .optional(), }) .strip() .optional();