From 798e8763d0674d978e4eec41771e2bbf0da204d8 Mon Sep 17 00:00:00 2001 From: Danny Avila Date: Sun, 24 Mar 2024 23:43:00 -0400 Subject: [PATCH] =?UTF-8?q?=F0=9F=91=93=20feat:=20Vision=20Support=20for?= =?UTF-8?q?=20Assistants=20(#2195)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * refactor(assistants/chat): use promises to speed up initialization, initialize shared variables, include `attachedFileIds` to streamRunManager * chore: additional typedefs * fix(OpenAIClient): handle edge case where attachments promise is resolved * feat: createVisionPrompt * feat: Vision Support for Assistants --- api/app/clients/OpenAIClient.js | 6 +- api/app/clients/prompts/createVisionPrompt.js | 34 +++ api/app/clients/prompts/index.js | 2 + api/server/routes/assistants/chat.js | 243 ++++++++++++------ api/server/services/Runs/StreamRunManager.js | 4 + api/server/services/Threads/manage.js | 13 +- api/server/services/ToolService.js | 48 +++- api/server/utils/handleText.js | 1 + api/typedefs.js | 12 + client/src/common/assistants-types.ts | 6 +- .../components/Chat/Messages/Content/Part.tsx | 26 +- .../SidePanel/Builder/AssistantPanel.tsx | 43 +++- .../SidePanel/Builder/AssistantSelect.tsx | 15 +- client/src/localization/languages/Eng.tsx | 1 + packages/data-provider/src/config.ts | 2 + packages/data-provider/src/schemas.ts | 20 +- 16 files changed, 376 insertions(+), 100 deletions(-) create mode 100644 api/app/clients/prompts/createVisionPrompt.js diff --git a/api/app/clients/OpenAIClient.js b/api/app/clients/OpenAIClient.js index 183f7999c..ef6868254 100644 --- a/api/app/clients/OpenAIClient.js +++ b/api/app/clients/OpenAIClient.js @@ -92,7 +92,11 @@ class OpenAIClient extends BaseClient { } this.defaultVisionModel = this.options.visionModel ?? 'gpt-4-vision-preview'; - this.options.attachments?.then((attachments) => this.checkVisionRequest(attachments)); + if (typeof this.options.attachments?.then === 'function') { + this.options.attachments.then((attachments) => this.checkVisionRequest(attachments)); + } else { + this.checkVisionRequest(this.options.attachments); + } const { OPENROUTER_API_KEY, OPENAI_FORCE_PROMPT } = process.env ?? {}; if (OPENROUTER_API_KEY && !this.azure) { diff --git a/api/app/clients/prompts/createVisionPrompt.js b/api/app/clients/prompts/createVisionPrompt.js new file mode 100644 index 000000000..5d8a7bbf5 --- /dev/null +++ b/api/app/clients/prompts/createVisionPrompt.js @@ -0,0 +1,34 @@ +/** + * Generates a prompt instructing the user to describe an image in detail, tailored to different types of visual content. + * @param {boolean} pluralized - Whether to pluralize the prompt for multiple images. + * @returns {string} - The generated vision prompt. + */ +const createVisionPrompt = (pluralized = false) => { + return `Please describe the image${ + pluralized ? 's' : '' + } in detail, covering relevant aspects such as: + + For photographs, illustrations, or artwork: + - The main subject(s) and their appearance, positioning, and actions + - The setting, background, and any notable objects or elements + - Colors, lighting, and overall mood or atmosphere + - Any interesting details, textures, or patterns + - The style, technique, or medium used (if discernible) + + For screenshots or images containing text: + - The content and purpose of the text + - The layout, formatting, and organization of the information + - Any notable visual elements, such as logos, icons, or graphics + - The overall context or message conveyed by the screenshot + + For graphs, charts, or data visualizations: + - The type of graph or chart (e.g., bar graph, line chart, pie chart) + - The variables being compared or analyzed + - Any trends, patterns, or outliers in the data + - The axis labels, scales, and units of measurement + - The title, legend, and any additional context provided + + Be as specific and descriptive as possible while maintaining clarity and concision.`; +}; + +module.exports = createVisionPrompt; diff --git a/api/app/clients/prompts/index.js b/api/app/clients/prompts/index.js index 9edb9954f..36bb6f7e2 100644 --- a/api/app/clients/prompts/index.js +++ b/api/app/clients/prompts/index.js @@ -4,6 +4,7 @@ const handleInputs = require('./handleInputs'); const instructions = require('./instructions'); const titlePrompts = require('./titlePrompts'); const truncateText = require('./truncateText'); +const createVisionPrompt = require('./createVisionPrompt'); const createContextHandlers = require('./createContextHandlers'); module.exports = { @@ -13,5 +14,6 @@ module.exports = { ...instructions, ...titlePrompts, truncateText, + createVisionPrompt, createContextHandlers, }; diff --git a/api/server/routes/assistants/chat.js b/api/server/routes/assistants/chat.js index a0cf57000..9602ffdfa 100644 --- a/api/server/routes/assistants/chat.js +++ b/api/server/routes/assistants/chat.js @@ -4,9 +4,11 @@ const { Constants, RunStatus, CacheKeys, + FileSources, ContentTypes, EModelEndpoint, ViolationTypes, + ImageVisionTool, AssistantStreamEvents, } = require('librechat-data-provider'); const { @@ -17,9 +19,10 @@ const { addThreadMetadata, saveAssistantMessage, } = require('~/server/services/Threads'); +const { sendResponse, sendMessage, sleep, isEnabled, countTokens } = require('~/server/utils'); const { runAssistant, createOnTextProgress } = require('~/server/services/AssistantService'); const { addTitle, initializeClient } = require('~/server/services/Endpoints/assistants'); -const { sendResponse, sendMessage, sleep, isEnabled, countTokens } = require('~/server/utils'); +const { formatMessage, createVisionPrompt } = require('~/app/clients/prompts'); const { createRun, StreamRunManager } = require('~/server/services/Runs'); const { getTransactions } = require('~/models/Transaction'); const checkBalance = require('~/models/checkBalance'); @@ -100,6 +103,16 @@ router.post('/', validateModel, buildEndpointOption, setHeaders, async (req, res let parentMessageId = _parentId; /** @type {TMessage[]} */ let previousMessages = []; + /** @type {import('librechat-data-provider').TConversation | null} */ + let conversation = null; + /** @type {string[]} */ + let file_ids = []; + /** @type {Set} */ + let attachedFileIds = new Set(); + /** @type {TMessage | null} */ + let requestMessage = null; + /** @type {undefined | Promise} */ + let visionPromise; const userMessageId = v4(); const responseMessageId = v4(); @@ -258,7 +271,10 @@ router.post('/', validateModel, buildEndpointOption, setHeaders, async (req, res throw new Error('Missing assistant_id'); } - if (isEnabled(process.env.CHECK_BALANCE)) { + const checkBalanceBeforeRun = async () => { + if (!isEnabled(process.env.CHECK_BALANCE)) { + return; + } const transactions = (await getTransactions({ user: req.user.id, @@ -288,7 +304,7 @@ router.post('/', validateModel, buildEndpointOption, setHeaders, async (req, res amount: promptTokens, }, }); - } + }; /** @type {{ openai: OpenAIClient }} */ const { openai: _openai, client } = await initializeClient({ @@ -300,15 +316,11 @@ router.post('/', validateModel, buildEndpointOption, setHeaders, async (req, res openai = _openai; - // if (thread_id) { - // previousMessages = await checkMessageGaps({ openai, thread_id, conversationId }); - // } - if (previousMessages.length) { parentMessageId = previousMessages[previousMessages.length - 1].messageId; } - const userMessage = { + let userMessage = { role: 'user', content: text, metadata: { @@ -316,75 +328,7 @@ router.post('/', validateModel, buildEndpointOption, setHeaders, async (req, res }, }; - let thread_file_ids = []; - if (convoId) { - const convo = await getConvo(req.user.id, convoId); - if (convo && convo.file_ids) { - thread_file_ids = convo.file_ids; - } - } - - const file_ids = files.map(({ file_id }) => file_id); - if (file_ids.length || thread_file_ids.length) { - userMessage.file_ids = file_ids; - openai.attachedFileIds = new Set([...file_ids, ...thread_file_ids]); - } - - // TODO: may allow multiple messages to be created beforehand in a future update - const initThreadBody = { - messages: [userMessage], - metadata: { - user: req.user.id, - conversationId, - }, - }; - - const result = await initThread({ openai, body: initThreadBody, thread_id }); - thread_id = result.thread_id; - - createOnTextProgress({ - openai, - conversationId, - userMessageId, - messageId: responseMessageId, - thread_id, - }); - - const requestMessage = { - user: req.user.id, - text, - messageId: userMessageId, - parentMessageId, - // TODO: make sure client sends correct format for `files`, use zod - files, - file_ids, - conversationId, - isCreatedByUser: true, - assistant_id, - thread_id, - model: assistant_id, - }; - - previousMessages.push(requestMessage); - - await saveUserMessage({ ...requestMessage, model }); - - const conversation = { - conversationId, - // TODO: title feature - title: 'New Chat', - endpoint: EModelEndpoint.assistants, - promptPrefix: promptPrefix, - instructions: instructions, - assistant_id, - // model, - }; - - if (file_ids.length) { - conversation.file_ids = file_ids; - } - - /** @type {CreateRunBody} */ + /** @type {CreateRunBody | undefined} */ const body = { assistant_id, model, @@ -398,6 +342,143 @@ router.post('/', validateModel, buildEndpointOption, setHeaders, async (req, res body.instructions = instructions; } + const getRequestFileIds = async () => { + let thread_file_ids = []; + if (convoId) { + const convo = await getConvo(req.user.id, convoId); + if (convo && convo.file_ids) { + thread_file_ids = convo.file_ids; + } + } + + file_ids = files.map(({ file_id }) => file_id); + if (file_ids.length || thread_file_ids.length) { + userMessage.file_ids = file_ids; + attachedFileIds = new Set([...file_ids, ...thread_file_ids]); + } + }; + + const addVisionPrompt = async () => { + if (!req.body.endpointOption.attachments) { + return; + } + + const assistant = await openai.beta.assistants.retrieve(assistant_id); + const visionToolIndex = assistant.tools.findIndex( + (tool) => tool.function.name === ImageVisionTool.function.name, + ); + + if (visionToolIndex === -1) { + return; + } + + const attachments = await req.body.endpointOption.attachments; + let visionMessage = { + role: 'user', + content: '', + }; + const files = await client.addImageURLs(visionMessage, attachments); + if (!visionMessage.image_urls?.length) { + return; + } + + const imageCount = visionMessage.image_urls.length; + const plural = imageCount > 1; + visionMessage.content = createVisionPrompt(plural); + visionMessage = formatMessage({ message: visionMessage, endpoint: EModelEndpoint.openAI }); + + visionPromise = openai.chat.completions.create({ + model: 'gpt-4-vision-preview', + messages: [visionMessage], + max_tokens: 4000, + }); + + const pluralized = plural ? 's' : ''; + body.additional_instructions = `${ + body.additional_instructions ? `${body.additional_instructions}\n` : '' + }The user has uploaded ${imageCount} image${pluralized}. + Use the \`${ImageVisionTool.function.name}\` tool to retrieve ${ + plural ? '' : 'a ' +}detailed text description${pluralized} for ${plural ? 'each' : 'the'} image${pluralized}.`; + + return files; + }; + + const initializeThread = async () => { + /** @type {[ undefined | MongoFile[]]}*/ + const [processedFiles] = await Promise.all([addVisionPrompt(), getRequestFileIds()]); + // TODO: may allow multiple messages to be created beforehand in a future update + const initThreadBody = { + messages: [userMessage], + metadata: { + user: req.user.id, + conversationId, + }, + }; + + if (processedFiles) { + for (const file of processedFiles) { + if (file.source !== FileSources.openai) { + attachedFileIds.delete(file.file_id); + const index = file_ids.indexOf(file.file_id); + if (index > -1) { + file_ids.splice(index, 1); + } + } + } + + userMessage.file_ids = file_ids; + } + + const result = await initThread({ openai, body: initThreadBody, thread_id }); + thread_id = result.thread_id; + + createOnTextProgress({ + openai, + conversationId, + userMessageId, + messageId: responseMessageId, + thread_id, + }); + + requestMessage = { + user: req.user.id, + text, + messageId: userMessageId, + parentMessageId, + // TODO: make sure client sends correct format for `files`, use zod + files, + file_ids, + conversationId, + isCreatedByUser: true, + assistant_id, + thread_id, + model: assistant_id, + }; + + previousMessages.push(requestMessage); + + /* asynchronous */ + saveUserMessage({ ...requestMessage, model }); + + conversation = { + conversationId, + title: 'New Chat', + endpoint: EModelEndpoint.assistants, + promptPrefix: promptPrefix, + instructions: instructions, + assistant_id, + // model, + }; + + if (file_ids.length) { + conversation.file_ids = file_ids; + } + }; + + const promises = [initializeThread(), checkBalanceBeforeRun()]; + await Promise.all(promises); + const sendInitialResponse = () => { sendMessage(res, { sync: true, @@ -421,6 +502,8 @@ router.post('/', validateModel, buildEndpointOption, setHeaders, async (req, res const processRun = async (retry = false) => { if (req.app.locals[EModelEndpoint.azureOpenAI]?.assistants) { + openai.attachedFileIds = attachedFileIds; + openai.visionPromise = visionPromise; if (retry) { response = await runAssistant({ openai, @@ -463,9 +546,11 @@ router.post('/', validateModel, buildEndpointOption, setHeaders, async (req, res req, res, openai, - thread_id, - responseMessage: openai.responseMessage, handlers, + thread_id, + visionPromise, + attachedFileIds, + responseMessage: openai.responseMessage, // streamOptions: { // }, diff --git a/api/server/services/Runs/StreamRunManager.js b/api/server/services/Runs/StreamRunManager.js index fe9e8da73..2059de6a5 100644 --- a/api/server/services/Runs/StreamRunManager.js +++ b/api/server/services/Runs/StreamRunManager.js @@ -59,6 +59,10 @@ class StreamRunManager { this.messages = []; /** @type {string} */ this.text = ''; + /** @type {Set} */ + this.attachedFileIds = fields.attachedFileIds; + /** @type {undefined | Promise} */ + this.visionPromise = fields.visionPromise; /** * @type {Object. Promise>} diff --git a/api/server/services/Threads/manage.js b/api/server/services/Threads/manage.js index 12386b60a..18dbea7fe 100644 --- a/api/server/services/Threads/manage.js +++ b/api/server/services/Threads/manage.js @@ -468,21 +468,28 @@ async function checkMessageGaps({ openai, latestMessageId, thread_id, run_id, co /** * Records token usage for a given completion request. - * * @param {Object} params - The parameters for initializing a thread. * @param {number} params.prompt_tokens - The number of prompt tokens used. * @param {number} params.completion_tokens - The number of completion tokens used. * @param {string} params.model - The model used by the assistant run. * @param {string} params.user - The user's ID. * @param {string} params.conversationId - LibreChat conversation ID. + * @param {string} [params.context='message'] - The context of the usage. Defaults to 'message'. * @return {Promise} A promise that resolves to the updated messages */ -const recordUsage = async ({ prompt_tokens, completion_tokens, model, user, conversationId }) => { +const recordUsage = async ({ + prompt_tokens, + completion_tokens, + model, + user, + conversationId, + context = 'message', +}) => { await spendTokens( { user, model, - context: 'message', + context, conversationId, }, { promptTokens: prompt_tokens, completionTokens: completion_tokens }, diff --git a/api/server/services/ToolService.js b/api/server/services/ToolService.js index 369ae37f7..923e9a7ab 100644 --- a/api/server/services/ToolService.js +++ b/api/server/services/ToolService.js @@ -4,14 +4,17 @@ const { StructuredTool } = require('langchain/tools'); const { zodToJsonSchema } = require('zod-to-json-schema'); const { Calculator } = require('langchain/tools/calculator'); const { + Tools, ContentTypes, imageGenTools, + actionDelimiter, + ImageVisionTool, openapiToFunction, validateAndParseOpenAPISpec, - actionDelimiter, } = require('librechat-data-provider'); const { loadActionSets, createActionTool, domainParser } = require('./ActionService'); const { processFileURL } = require('~/server/services/Files/process'); +const { recordUsage } = require('~/server/services/Threads'); const { loadTools } = require('~/app/clients/tools/util'); const { redactMessage } = require('~/config/parsers'); const { sleep } = require('~/server/utils'); @@ -83,6 +86,8 @@ function loadAndFormatTools({ directory, filter = new Set() }) { tools.push(formattedTool); } + tools.push(ImageVisionTool); + return tools.reduce((map, tool) => { map[tool.function.name] = tool; return map; @@ -100,8 +105,8 @@ function loadAndFormatTools({ directory, filter = new Set() }) { */ function formatToOpenAIAssistantTool(tool) { return { - type: 'function', - function: { + type: Tools.function, + [Tools.function]: { name: tool.name, description: tool.description, parameters: zodToJsonSchema(tool.schema), @@ -109,13 +114,42 @@ function formatToOpenAIAssistantTool(tool) { }; } +/** + * Processes the required actions by calling the appropriate tools and returning the outputs. + * @param {OpenAIClient} client - OpenAI or StreamRunManager Client. + * @param {RequiredAction} requiredActions - The current required action. + * @returns {Promise} The outputs of the tools. + */ +const processVisionRequest = async (client, currentAction) => { + if (!client.visionPromise) { + return { + tool_call_id: currentAction.toolCallId, + output: 'No image details found.', + }; + } + + /** @type {ChatCompletion | undefined} */ + const completion = await client.visionPromise; + if (completion.usage) { + recordUsage({ + user: client.req.user.id, + model: client.req.body.model, + conversationId: (client.responseMessage ?? client.finalMessage).conversationId, + ...completion.usage, + }); + } + const output = completion?.choices?.[0]?.message?.content ?? 'No image details found.'; + return { + tool_call_id: currentAction.toolCallId, + output, + }; +}; + /** * Processes return required actions from run. - * * @param {OpenAIClient} client - OpenAI or StreamRunManager Client. * @param {RequiredAction[]} requiredActions - The required actions to submit outputs for. * @returns {Promise} The outputs of the tools. - * */ async function processRequiredActions(client, requiredActions) { logger.debug( @@ -152,6 +186,10 @@ async function processRequiredActions(client, requiredActions) { for (let i = 0; i < requiredActions.length; i++) { const currentAction = requiredActions[i]; + if (currentAction.tool === ImageVisionTool.function.name) { + promises.push(processVisionRequest(client, currentAction)); + continue; + } let tool = ToolMap[currentAction.tool] ?? ActionToolMap[currentAction.tool]; const handleToolOutput = async (output) => { diff --git a/api/server/utils/handleText.js b/api/server/utils/handleText.js index 973218e4f..bfa37e279 100644 --- a/api/server/utils/handleText.js +++ b/api/server/utils/handleText.js @@ -172,6 +172,7 @@ function generateConfig(key, baseURL, assistants = false) { config.retrievalModels = defaultRetrievalModels; config.capabilities = [ Capabilities.code_interpreter, + Capabilities.image_vision, Capabilities.retrieval, Capabilities.actions, Capabilities.tools, diff --git a/api/typedefs.js b/api/typedefs.js index 36b341d92..5b358c787 100644 --- a/api/typedefs.js +++ b/api/typedefs.js @@ -32,6 +32,18 @@ * @memberof typedefs */ +/** + * @exports ChatCompletionContentPartImage + * @typedef {import('openai').OpenAI.ChatCompletionContentPartImage} ChatCompletionContentPartImage + * @memberof typedefs + */ + +/** + * @exports ChatCompletion + * @typedef {import('openai').OpenAI.ChatCompletion} ChatCompletion + * @memberof typedefs + */ + /** * @exports OpenAIRequestOptions * @typedef {import('openai').OpenAI.RequestOptions} OpenAIRequestOptions diff --git a/client/src/common/assistants-types.ts b/client/src/common/assistants-types.ts index c748de0de..3b9ad27da 100644 --- a/client/src/common/assistants-types.ts +++ b/client/src/common/assistants-types.ts @@ -1,3 +1,4 @@ +import { Capabilities } from 'librechat-data-provider'; import type { Assistant } from 'librechat-data-provider'; import type { Option, ExtendedFile } from './types'; @@ -6,8 +7,9 @@ export type TAssistantOption = | (Option & Assistant & { files?: Array<[string, ExtendedFile]> }); export type Actions = { - code_interpreter: boolean; - retrieval: boolean; + [Capabilities.code_interpreter]: boolean; + [Capabilities.image_vision]: boolean; + [Capabilities.retrieval]: boolean; }; export type AssistantForm = { diff --git a/client/src/components/Chat/Messages/Content/Part.tsx b/client/src/components/Chat/Messages/Content/Part.tsx index b96d0bcf8..e52ccfd08 100644 --- a/client/src/components/Chat/Messages/Content/Part.tsx +++ b/client/src/components/Chat/Messages/Content/Part.tsx @@ -1,4 +1,9 @@ -import { ToolCallTypes, ContentTypes, imageGenTools } from 'librechat-data-provider'; +import { + ToolCallTypes, + ContentTypes, + imageGenTools, + isImageVisionTool, +} from 'librechat-data-provider'; import type { TMessageContentParts, TMessage } from 'librechat-data-provider'; import type { TDisplayProps } from '~/common'; import { ErrorMessage } from './MessageContent'; @@ -96,6 +101,25 @@ export default function Part({ part[ContentTypes.TOOL_CALL].type === ToolCallTypes.FUNCTION ) { const toolCall = part[ContentTypes.TOOL_CALL]; + if (isImageVisionTool(toolCall)) { + if (isSubmitting && showCursor) { + return ( + +
+ +
+
+ ); + } + + return null; + } + return ( assistants?.capabilities?.includes(Capabilities.code_interpreter), [assistants], ); + const imageVisionEnabled = useMemo( + () => assistants?.capabilities?.includes(Capabilities.image_vision), + [assistants], + ); useEffect(() => { if (model && !retrievalModels.has(model)) { @@ -157,6 +162,9 @@ export default function AssistantPanel({ if (data.retrieval) { tools.push({ type: Tools.retrieval }); } + if (data.image_vision) { + tools.push(ImageVisionTool); + } const { name, @@ -374,6 +382,37 @@ export default function AssistantPanel({ )} + {imageVisionEnabled && ( +
+ ( + + )} + /> + +
+ )} {retrievalEnabled && (
- {functions.map((func) => ( + {functions.map((func, i) => ( tool.type !== 'function') - ?.map((tool) => tool.type) + ?.filter((tool) => tool.type !== 'function' || isImageVisionTool(tool)) + ?.map((tool) => tool?.function?.name || tool.type) .forEach((tool) => { actions[tool] = true; }); const functions = assistant?.tools - ?.filter((tool) => tool.type === 'function') + ?.filter((tool) => tool.type === 'function' && !isImageVisionTool(tool)) ?.map((tool) => tool.function?.name ?? '') ?? []; const formValues: Partial = { diff --git a/client/src/localization/languages/Eng.tsx b/client/src/localization/languages/Eng.tsx index 07b0e92fa..743d115ad 100644 --- a/client/src/localization/languages/Eng.tsx +++ b/client/src/localization/languages/Eng.tsx @@ -16,6 +16,7 @@ export default { 'If you upload files under Knowledge, conversations with your Assistant may include file contents.', com_assistants_knowledge_disabled: 'Assistant must be created, and Code Interpreter or Retrieval must be enabled and saved before uploading files as Knowledge.', + com_assistants_image_vision: 'Image Vision', com_assistants_code_interpreter: 'Code Interpreter', com_assistants_code_interpreter_files: 'The following files are only available for Code Interpreter:', diff --git a/packages/data-provider/src/config.ts b/packages/data-provider/src/config.ts index f6ce32f1f..393f2bb0e 100644 --- a/packages/data-provider/src/config.ts +++ b/packages/data-provider/src/config.ts @@ -82,6 +82,7 @@ export type TValidatedAzureConfig = { export enum Capabilities { code_interpreter = 'code_interpreter', + image_vision = 'image_vision', retrieval = 'retrieval', actions = 'actions', tools = 'tools', @@ -100,6 +101,7 @@ export const assistantEndpointSchema = z.object({ .optional() .default([ Capabilities.code_interpreter, + Capabilities.image_vision, Capabilities.retrieval, Capabilities.actions, Capabilities.tools, diff --git a/packages/data-provider/src/schemas.ts b/packages/data-provider/src/schemas.ts index 266aa9e98..f5f0c1853 100644 --- a/packages/data-provider/src/schemas.ts +++ b/packages/data-provider/src/schemas.ts @@ -1,5 +1,6 @@ import { z } from 'zod'; -import type { TMessageContentParts } from './types/assistants'; +import { Tools } from './types/assistants'; +import type { TMessageContentParts, FunctionTool, FunctionToolCall } from './types/assistants'; import type { TFile } from './types/files'; export const isUUID = z.string().uuid(); @@ -25,9 +26,26 @@ export const defaultAssistantFormValues = { model: '', functions: [], code_interpreter: false, + image_vision: false, retrieval: false, }; +export const ImageVisionTool: FunctionTool = { + type: Tools.function, + [Tools.function]: { + name: 'image_vision', + description: 'Get detailed text descriptions for all current image attachments.', + parameters: { + type: 'object', + properties: {}, + required: [], + }, + }, +}; + +export const isImageVisionTool = (tool: FunctionTool | FunctionToolCall) => + tool.type === 'function' && tool.function?.name === ImageVisionTool?.function?.name; + export const endpointSettings = { [EModelEndpoint.google]: { model: {