From 81139046e5f317ec45470190401ce676e5b557a2 Mon Sep 17 00:00:00 2001 From: Danny Avila Date: Thu, 18 Sep 2025 20:06:59 -0400 Subject: [PATCH] =?UTF-8?q?=F0=9F=94=84=20refactor:=20Convert=20OCR=20Tool?= =?UTF-8?q?=20Resource=20to=20Context=20(#9699)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * WIP: conversion of `ocr` to `context` * refactor: make `primeResources` backwards-compatible for `ocr` tool_resources * refactor: Convert legacy `ocr` tool resource to `context` in agent updates - Implemented conversion logic to replace `ocr` with `context` in both incoming updates and existing agent data. - Merged file IDs and files from `ocr` into `context` while ensuring deduplication. - Updated tools array to reflect the change from `ocr` to `context`. * refactor: Enhance context file handling in agent processing - Updated the logic for managing context files by consolidating file IDs from both `ocr` and `context` resources. - Improved backwards compatibility by ensuring that context files are correctly populated and handled. - Simplified the iteration over context files for better readability and maintainability. * refactor: Enhance tool_resources handling in primeResources - Added tests to verify the deletion behavior of tool_resources fields, ensuring original objects remain unchanged. - Implemented logic to delete `ocr` and `context` fields after fetching and re-categorizing files. - Preserved context field when the context capability is disabled, ensuring correct behavior in various scenarios. * refactor: Replace `ocrEnabled` with `contextEnabled` in AgentConfig * refactor: Adjust legacy tool handling order for improved clarity * refactor: Implement OCR to context conversion functions and remove original conversion logic in update agent handling * refactor: Move contextEnabled declaration to maintain consistent order in capabilities * refactor: Update localization keys for file context to improve clarity and accuracy * chore: Update localization key for file context information to improve clarity --- api/models/File.js | 2 +- .../controllers/agents/__tests__/v1.spec.js | 4 +- api/server/controllers/agents/v1.js | 40 +- api/server/controllers/agents/v1.spec.js | 4 +- .../middleware/accessResources/fileAccess.js | 10 +- api/server/services/Files/process.js | 2 +- api/server/services/ToolService.js | 7 +- .../Chat/Input/Files/AttachFileMenu.tsx | 4 +- .../Chat/Input/Files/DragDropModal.tsx | 4 +- .../SidePanel/Agents/AgentConfig.tsx | 8 +- .../SidePanel/Agents/FileContext.tsx | 10 +- .../useAgentToolPermissions.render.test.ts | 1 - .../src/hooks/Agents/useAgentCapabilities.ts | 7 + client/src/hooks/Files/useDragHelpers.ts | 4 +- client/src/locales/en/translation.json | 4 +- client/src/utils/files.ts | 2 +- client/src/utils/forms.tsx | 17 +- packages/api/src/agents/index.ts | 1 + packages/api/src/agents/legacy.test.ts | 697 ++++++++++++++++++ packages/api/src/agents/legacy.ts | 141 ++++ packages/api/src/agents/resources.test.ts | 339 ++++++++- packages/api/src/agents/resources.ts | 42 +- packages/api/src/agents/validation.ts | 2 + packages/data-provider/src/config.ts | 2 + .../data-provider/src/types/assistants.ts | 3 + 25 files changed, 1281 insertions(+), 76 deletions(-) create mode 100644 packages/api/src/agents/legacy.test.ts create mode 100644 packages/api/src/agents/legacy.ts diff --git a/api/models/File.js b/api/models/File.js index 1ee943131..5e90c86fe 100644 --- a/api/models/File.js +++ b/api/models/File.js @@ -42,7 +42,7 @@ const getToolFilesByIds = async (fileIds, toolResourceSet) => { $or: [], }; - if (toolResourceSet.has(EToolResources.ocr)) { + if (toolResourceSet.has(EToolResources.context)) { filter.$or.push({ text: { $exists: true, $ne: null }, context: FileContext.agents }); } if (toolResourceSet.has(EToolResources.file_search)) { diff --git a/api/server/controllers/agents/__tests__/v1.spec.js b/api/server/controllers/agents/__tests__/v1.spec.js index b097cd98c..b7e7b67a2 100644 --- a/api/server/controllers/agents/__tests__/v1.spec.js +++ b/api/server/controllers/agents/__tests__/v1.spec.js @@ -158,7 +158,7 @@ describe('duplicateAgent', () => { }); }); - it('should handle tool_resources.ocr correctly', async () => { + it('should convert `tool_resources.ocr` to `tool_resources.context`', async () => { const mockAgent = { id: 'agent_123', name: 'Test Agent', @@ -178,7 +178,7 @@ describe('duplicateAgent', () => { expect(createAgent).toHaveBeenCalledWith( expect.objectContaining({ tool_resources: { - ocr: { enabled: true, config: 'test' }, + context: { enabled: true, config: 'test' }, }, }), ); diff --git a/api/server/controllers/agents/v1.js b/api/server/controllers/agents/v1.js index eb98c5adb..0334d965d 100644 --- a/api/server/controllers/agents/v1.js +++ b/api/server/controllers/agents/v1.js @@ -2,7 +2,12 @@ const { z } = require('zod'); const fs = require('fs').promises; const { nanoid } = require('nanoid'); const { logger } = require('@librechat/data-schemas'); -const { agentCreateSchema, agentUpdateSchema } = require('@librechat/api'); +const { + agentCreateSchema, + agentUpdateSchema, + mergeAgentOcrConversion, + convertOcrToContextInPlace, +} = require('@librechat/api'); const { Tools, Constants, @@ -198,19 +203,32 @@ const getAgentHandler = async (req, res, expandProperties = false) => { * @param {object} req.params - Request params * @param {string} req.params.id - Agent identifier. * @param {AgentUpdateParams} req.body - The Agent update parameters. - * @returns {Agent} 200 - success response - application/json + * @returns {Promise} 200 - success response - application/json */ const updateAgentHandler = async (req, res) => { try { const id = req.params.id; const validatedData = agentUpdateSchema.parse(req.body); const { _id, ...updateData } = removeNullishValues(validatedData); + + // Convert OCR to context in incoming updateData + convertOcrToContextInPlace(updateData); + const existingAgent = await getAgent({ id }); if (!existingAgent) { return res.status(404).json({ error: 'Agent not found' }); } + // Convert legacy OCR tool resource to context format in existing agent + const ocrConversion = mergeAgentOcrConversion(existingAgent, updateData); + if (ocrConversion.tool_resources) { + updateData.tool_resources = ocrConversion.tool_resources; + } + if (ocrConversion.tools) { + updateData.tools = ocrConversion.tools; + } + let updatedAgent = Object.keys(updateData).length > 0 ? await updateAgent({ id }, updateData, { @@ -255,7 +273,7 @@ const updateAgentHandler = async (req, res) => { * @param {object} req - Express Request * @param {object} req.params - Request params * @param {string} req.params.id - Agent identifier. - * @returns {Agent} 201 - success response - application/json + * @returns {Promise} 201 - success response - application/json */ const duplicateAgentHandler = async (req, res) => { const { id } = req.params; @@ -288,9 +306,19 @@ const duplicateAgentHandler = async (req, res) => { hour12: false, })})`; + if (_tool_resources?.[EToolResources.context]) { + cloneData.tool_resources = { + [EToolResources.context]: _tool_resources[EToolResources.context], + }; + } + if (_tool_resources?.[EToolResources.ocr]) { cloneData.tool_resources = { - [EToolResources.ocr]: _tool_resources[EToolResources.ocr], + /** Legacy conversion from `ocr` to `context` */ + [EToolResources.context]: { + ...(_tool_resources[EToolResources.context] ?? {}), + ..._tool_resources[EToolResources.ocr], + }, }; } @@ -382,7 +410,7 @@ const duplicateAgentHandler = async (req, res) => { * @param {object} req - Express Request * @param {object} req.params - Request params * @param {string} req.params.id - Agent identifier. - * @returns {Agent} 200 - success response - application/json + * @returns {Promise} 200 - success response - application/json */ const deleteAgentHandler = async (req, res) => { try { @@ -484,7 +512,7 @@ const getListAgentsHandler = async (req, res) => { * @param {Express.Multer.File} req.file - The avatar image file. * @param {object} req.body - Request body * @param {string} [req.body.avatar] - Optional avatar for the agent's avatar. - * @returns {Object} 200 - success response - application/json + * @returns {Promise} 200 - success response - application/json */ const uploadAgentAvatarHandler = async (req, res) => { try { diff --git a/api/server/controllers/agents/v1.spec.js b/api/server/controllers/agents/v1.spec.js index c31839feb..b8d4d50ee 100644 --- a/api/server/controllers/agents/v1.spec.js +++ b/api/server/controllers/agents/v1.spec.js @@ -512,6 +512,7 @@ describe('Agent Controllers - Mass Assignment Protection', () => { mockReq.params.id = existingAgentId; mockReq.body = { tool_resources: { + /** Legacy conversion from `ocr` to `context` */ ocr: { file_ids: ['ocr1', 'ocr2'], }, @@ -531,7 +532,8 @@ describe('Agent Controllers - Mass Assignment Protection', () => { const updatedAgent = mockRes.json.mock.calls[0][0]; expect(updatedAgent.tool_resources).toBeDefined(); - expect(updatedAgent.tool_resources.ocr).toBeDefined(); + expect(updatedAgent.tool_resources.ocr).toBeUndefined(); + expect(updatedAgent.tool_resources.context).toBeDefined(); expect(updatedAgent.tool_resources.execute_code).toBeDefined(); expect(updatedAgent.tool_resources.invalid_tool).toBeUndefined(); }); diff --git a/api/server/middleware/accessResources/fileAccess.js b/api/server/middleware/accessResources/fileAccess.js index 3556a278a..b26a512f5 100644 --- a/api/server/middleware/accessResources/fileAccess.js +++ b/api/server/middleware/accessResources/fileAccess.js @@ -10,11 +10,12 @@ const { getFiles } = require('~/models/File'); */ const checkAgentBasedFileAccess = async ({ userId, role, fileId }) => { try { - // Find agents that have this file in their tool_resources + /** Agents that have this file in their tool_resources */ const agentsWithFile = await getAgents({ $or: [ - { 'tool_resources.file_search.file_ids': fileId }, { 'tool_resources.execute_code.file_ids': fileId }, + { 'tool_resources.file_search.file_ids': fileId }, + { 'tool_resources.context.file_ids': fileId }, { 'tool_resources.ocr.file_ids': fileId }, ], }); @@ -83,7 +84,6 @@ const fileAccess = async (req, res, next) => { }); } - // Get the file const [file] = await getFiles({ file_id: fileId }); if (!file) { return res.status(404).json({ @@ -92,20 +92,18 @@ const fileAccess = async (req, res, next) => { }); } - // Check if user owns the file if (file.user && file.user.toString() === userId) { req.fileAccess = { file }; return next(); } - // Check agent-based access (file inherits agent permissions) + /** Agent-based access (file inherits agent permissions) */ const hasAgentAccess = await checkAgentBasedFileAccess({ userId, role: userRole, fileId }); if (hasAgentAccess) { req.fileAccess = { file }; return next(); } - // No access logger.warn(`[fileAccess] User ${userId} denied access to file ${fileId}`); return res.status(403).json({ error: 'Forbidden', diff --git a/api/server/services/Files/process.js b/api/server/services/Files/process.js index 8b3b5fbcf..367e7bf34 100644 --- a/api/server/services/Files/process.js +++ b/api/server/services/Files/process.js @@ -552,7 +552,7 @@ const processAgentFileUpload = async ({ req, res, metadata }) => { throw new Error('File search is not enabled for Agents'); } // Note: File search processing continues to dual storage logic below - } else if (tool_resource === EToolResources.ocr) { + } else if (tool_resource === EToolResources.context) { const { file_id, temp_file_id = null } = metadata; /** diff --git a/api/server/services/ToolService.js b/api/server/services/ToolService.js index 87005e64d..174eae078 100644 --- a/api/server/services/ToolService.js +++ b/api/server/services/ToolService.js @@ -353,7 +353,12 @@ async function processRequiredActions(client, requiredActions) { async function loadAgentTools({ req, res, agent, signal, tool_resources, openAIApiKey }) { if (!agent.tools || agent.tools.length === 0) { return {}; - } else if (agent.tools && agent.tools.length === 1 && agent.tools[0] === AgentCapabilities.ocr) { + } else if ( + agent.tools && + agent.tools.length === 1 && + /** Legacy handling for `ocr` as may still exist in existing Agents */ + (agent.tools[0] === AgentCapabilities.context || agent.tools[0] === AgentCapabilities.ocr) + ) { return {}; } diff --git a/client/src/components/Chat/Input/Files/AttachFileMenu.tsx b/client/src/components/Chat/Input/Files/AttachFileMenu.tsx index 2ed5a0a3e..75398c263 100644 --- a/client/src/components/Chat/Input/Files/AttachFileMenu.tsx +++ b/client/src/components/Chat/Input/Files/AttachFileMenu.tsx @@ -94,11 +94,11 @@ const AttachFileMenu = ({ }, ]; - if (capabilities.ocrEnabled) { + if (capabilities.contextEnabled) { items.push({ label: localize('com_ui_upload_ocr_text'), onClick: () => { - setToolResource(EToolResources.ocr); + setToolResource(EToolResources.context); onAction(); }, icon: , diff --git a/client/src/components/Chat/Input/Files/DragDropModal.tsx b/client/src/components/Chat/Input/Files/DragDropModal.tsx index 6f506c65f..e9992c4dc 100644 --- a/client/src/components/Chat/Input/Files/DragDropModal.tsx +++ b/client/src/components/Chat/Input/Files/DragDropModal.tsx @@ -64,10 +64,10 @@ const DragDropModal = ({ onOptionSelect, setShowModal, files, isVisible }: DragD icon: , }); } - if (capabilities.ocrEnabled) { + if (capabilities.contextEnabled) { _options.push({ label: localize('com_ui_upload_ocr_text'), - value: EToolResources.ocr, + value: EToolResources.context, icon: , }); } diff --git a/client/src/components/SidePanel/Agents/AgentConfig.tsx b/client/src/components/SidePanel/Agents/AgentConfig.tsx index a48a37259..7f296fe8c 100644 --- a/client/src/components/SidePanel/Agents/AgentConfig.tsx +++ b/client/src/components/SidePanel/Agents/AgentConfig.tsx @@ -79,9 +79,9 @@ export default function AgentConfig({ createMutation }: Pick