From 5bfb06b417eb6e788167010d775572329cc18c0d Mon Sep 17 00:00:00 2001 From: Danny Avila Date: Sun, 14 Sep 2025 18:50:41 -0400 Subject: [PATCH] =?UTF-8?q?=F0=9F=92=BB=20feat:=20Add=20Proxy=20Config=20f?= =?UTF-8?q?or=20Mistral=20OCR=20API=20(#9629)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 💻 feat: Add proxy configuration support for Mistral OCR API requests * refactor: Implement proxy support for Mistral API requests using HttpsProxyAgent --- packages/api/src/files/mistral/crud.spec.ts | 405 ++++++++++++++++++++ packages/api/src/files/mistral/crud.ts | 112 ++++-- 2 files changed, 480 insertions(+), 37 deletions(-) diff --git a/packages/api/src/files/mistral/crud.spec.ts b/packages/api/src/files/mistral/crud.spec.ts index 686188588..688a553ff 100644 --- a/packages/api/src/files/mistral/crud.spec.ts +++ b/packages/api/src/files/mistral/crud.spec.ts @@ -10,6 +10,9 @@ jest.mock('form-data', () => { getLength: jest.fn().mockReturnValue(100), })); }); +jest.mock('https-proxy-agent', () => ({ + HttpsProxyAgent: jest.fn().mockImplementation((url) => ({ proxyUrl: url })), +})); jest.mock('axios', () => { const mockAxiosInstance = { get: jest.fn().mockResolvedValue({ data: {} }), @@ -44,6 +47,7 @@ jest.mock('~/utils/axios', () => ({ import * as fs from 'fs'; import axios from 'axios'; +import { HttpsProxyAgent } from 'https-proxy-agent'; import type { Readable } from 'stream'; import type { MistralFileUploadResponse, @@ -1182,6 +1186,8 @@ describe('MistralOCR Service', () => { describe('Mixed env var and hardcoded configuration', () => { beforeEach(() => { + // Clean up any PROXY env var from previous tests + delete process.env.PROXY; const mockReadStream: MockReadStream = { on: jest.fn().mockImplementation(function ( this: MockReadStream, @@ -1708,9 +1714,403 @@ describe('MistralOCR Service', () => { }); }); + describe('Proxy Configuration', () => { + const originalProxy = process.env.PROXY; + + beforeEach(() => { + // Reset the HttpsProxyAgent mock to its default implementation + (HttpsProxyAgent as unknown as jest.Mock).mockImplementation((url) => ({ proxyUrl: url })); + // Clear any previous axios mock calls + mockAxios.post!.mockClear(); + mockAxios.get!.mockClear(); + mockAxios.delete!.mockClear(); + }); + + afterEach(() => { + if (originalProxy) { + process.env.PROXY = originalProxy; + } else { + delete process.env.PROXY; + } + // Clear mocks after each test to prevent leaking + mockAxios.post!.mockClear(); + mockAxios.get!.mockClear(); + mockAxios.delete!.mockClear(); + }); + + describe('uploadDocumentToMistral with proxy', () => { + beforeEach(() => { + const mockReadStream: MockReadStream = { + on: jest.fn().mockImplementation(function ( + this: MockReadStream, + event: string, + handler: () => void, + ) { + if (event === 'end') { + handler(); + } + return this; + }), + pipe: jest.fn().mockImplementation(function (this: MockReadStream) { + return this; + }), + pause: jest.fn(), + resume: jest.fn(), + emit: jest.fn(), + once: jest.fn(), + destroy: jest.fn(), + path: '/path/to/test.pdf', + fd: 1, + flags: 'r', + mode: 0o666, + autoClose: true, + bytesRead: 0, + closed: false, + pending: false, + }; + + (jest.mocked(fs).createReadStream as jest.Mock).mockReturnValue(mockReadStream); + }); + + it('should use proxy configuration when PROXY env var is set', async () => { + process.env.PROXY = 'http://proxy.example.com:8080'; + + const mockResponse: { data: MistralFileUploadResponse } = { + data: { + id: 'file-proxy-123', + object: 'file', + bytes: 1024, + created_at: Date.now(), + filename: 'test.pdf', + purpose: 'ocr', + }, + }; + mockAxios.post!.mockResolvedValueOnce(mockResponse); + + await uploadDocumentToMistral({ + filePath: '/path/to/test.pdf', + fileName: 'test.pdf', + apiKey: 'test-api-key', + }); + + expect(mockAxios.post).toHaveBeenCalledWith( + 'https://api.mistral.ai/v1/files', + expect.anything(), + expect.objectContaining({ + httpsAgent: expect.objectContaining({ + proxyUrl: 'http://proxy.example.com:8080', + }), + }), + ); + }); + + it('should handle proxy URL with authentication', async () => { + process.env.PROXY = 'http://user:pass@proxy.example.com:8080'; + + const mockResponse: { data: MistralFileUploadResponse } = { + data: { + id: 'file-proxy-auth-123', + object: 'file', + bytes: 1024, + created_at: Date.now(), + filename: 'test.pdf', + purpose: 'ocr', + }, + }; + mockAxios.post!.mockResolvedValueOnce(mockResponse); + + await uploadDocumentToMistral({ + filePath: '/path/to/test.pdf', + fileName: 'test.pdf', + apiKey: 'test-api-key', + }); + + expect(mockAxios.post).toHaveBeenCalledWith( + 'https://api.mistral.ai/v1/files', + expect.anything(), + expect.objectContaining({ + httpsAgent: expect.objectContaining({ + proxyUrl: 'http://user:pass@proxy.example.com:8080', + }), + }), + ); + }); + + it('should handle IPv6 proxy addresses', async () => { + process.env.PROXY = 'http://[::1]:8080'; + + const mockResponse: { data: MistralFileUploadResponse } = { + data: { + id: 'file-proxy-ipv6-123', + object: 'file', + bytes: 1024, + created_at: Date.now(), + filename: 'test.pdf', + purpose: 'ocr', + }, + }; + mockAxios.post!.mockResolvedValueOnce(mockResponse); + + await uploadDocumentToMistral({ + filePath: '/path/to/test.pdf', + fileName: 'test.pdf', + apiKey: 'test-api-key', + }); + + expect(mockAxios.post).toHaveBeenCalledWith( + 'https://api.mistral.ai/v1/files', + expect.anything(), + expect.objectContaining({ + httpsAgent: expect.objectContaining({ + proxyUrl: 'http://[::1]:8080', + }), + }), + ); + }); + + it('should not use proxy when PROXY env var is not set', async () => { + delete process.env.PROXY; + + const mockResponse: { data: MistralFileUploadResponse } = { + data: { + id: 'file-no-proxy-123', + object: 'file', + bytes: 1024, + created_at: Date.now(), + filename: 'test.pdf', + purpose: 'ocr', + }, + }; + mockAxios.post!.mockResolvedValueOnce(mockResponse); + + await uploadDocumentToMistral({ + filePath: '/path/to/test.pdf', + fileName: 'test.pdf', + apiKey: 'test-api-key', + }); + + expect(mockAxios.post).toHaveBeenCalledWith( + 'https://api.mistral.ai/v1/files', + expect.anything(), + expect.not.objectContaining({ + httpsAgent: expect.anything(), + }), + ); + }); + }); + + describe('performOCR with proxy', () => { + it('should use proxy configuration when PROXY env var is set', async () => { + process.env.PROXY = 'http://proxy.example.com:3128'; + + const mockResponse: { data: OCRResult } = { + data: { + model: 'mistral-ocr-latest', + pages: [ + { + index: 0, + markdown: 'Proxy test content', + images: [], + dimensions: { dpi: 300, height: 1100, width: 850 }, + }, + ], + document_annotation: '', + usage_info: { + pages_processed: 1, + doc_size_bytes: 1024, + }, + }, + }; + mockAxios.post!.mockResolvedValueOnce(mockResponse); + + await performOCR({ + apiKey: 'test-api-key', + url: 'https://document-url.com', + model: 'mistral-ocr-latest', + documentType: 'document_url', + }); + + expect(mockAxios.post).toHaveBeenCalledWith( + 'https://api.mistral.ai/v1/ocr', + expect.anything(), + expect.objectContaining({ + httpsAgent: expect.objectContaining({ + proxyUrl: 'http://proxy.example.com:3128', + }), + }), + ); + }); + + it('should handle malformed proxy URLs gracefully', async () => { + (HttpsProxyAgent as unknown as jest.Mock).mockImplementationOnce(() => { + throw new Error('Invalid URL'); + }); + process.env.PROXY = 'not-a-valid-url'; + + const mockResponse: { data: OCRResult } = { + data: { + model: 'mistral-ocr-latest', + pages: [ + { + index: 0, + markdown: 'Test content', + images: [], + dimensions: { dpi: 300, height: 1100, width: 850 }, + }, + ], + document_annotation: '', + usage_info: { + pages_processed: 1, + doc_size_bytes: 1024, + }, + }, + }; + mockAxios.post!.mockResolvedValueOnce(mockResponse); + + await expect( + performOCR({ + apiKey: 'test-api-key', + url: 'https://document-url.com', + }), + ).rejects.toThrow('Invalid URL'); + }); + }); + + describe('Azure Mistral OCR with proxy', () => { + beforeEach(() => { + (jest.mocked(fs).readFileSync as jest.Mock).mockReturnValue( + Buffer.from('mock-file-content'), + ); + }); + + it('should use proxy for Azure Mistral OCR requests', async () => { + process.env.PROXY = 'http://proxy.example.com:8080'; + + mockLoadAuthValues.mockResolvedValue({ + OCR_API_KEY: 'azure-api-key', + OCR_BASEURL: 'https://azure.mistral.ai/v1', + }); + + mockAxios.post!.mockResolvedValueOnce({ + data: { + model: 'mistral-ocr-latest', + pages: [ + { + index: 0, + markdown: 'Azure OCR with proxy', + images: [], + dimensions: { dpi: 300, height: 1100, width: 850 }, + }, + ], + document_annotation: '', + usage_info: { + pages_processed: 1, + doc_size_bytes: 1024, + }, + }, + }); + + const req = { + user: { id: 'user123' }, + config: { + ocr: { + apiKey: '${OCR_API_KEY}', + baseURL: '${OCR_BASEURL}', + mistralModel: 'mistral-ocr-latest', + }, + }, + } as unknown as ServerRequest; + + const file = { + path: '/tmp/upload/azure-file.pdf', + originalname: 'azure-document.pdf', + mimetype: 'application/pdf', + } as Express.Multer.File; + + await uploadAzureMistralOCR({ + req, + file, + loadAuthValues: mockLoadAuthValues, + }); + + expect(mockAxios.post).toHaveBeenCalledWith( + 'https://azure.mistral.ai/v1/ocr', + expect.anything(), + expect.objectContaining({ + httpsAgent: expect.objectContaining({ + proxyUrl: 'http://proxy.example.com:8080', + }), + }), + ); + }); + }); + + describe('getSignedUrl with proxy', () => { + it('should use proxy configuration when PROXY env var is set', async () => { + process.env.PROXY = 'https://secure-proxy.example.com:443'; + + const mockResponse: { data: MistralSignedUrlResponse } = { + data: { + url: 'https://signed-url.com', + expires_at: Date.now() + 86400000, + }, + }; + mockAxios.get!.mockResolvedValueOnce(mockResponse); + + await getSignedUrl({ + fileId: 'file-123', + apiKey: 'test-api-key', + }); + + expect(mockAxios.get).toHaveBeenCalledWith( + 'https://api.mistral.ai/v1/files/file-123/url?expiry=24', + expect.objectContaining({ + httpsAgent: expect.objectContaining({ + proxyUrl: 'https://secure-proxy.example.com:443', + }), + }), + ); + }); + }); + + describe('deleteMistralFile with proxy', () => { + it('should use proxy configuration when PROXY env var is set', async () => { + process.env.PROXY = 'socks5://proxy.example.com:1080'; + + mockAxios.delete!.mockResolvedValueOnce({ data: {} }); + + await deleteMistralFile({ + fileId: 'file-123', + apiKey: 'test-api-key', + }); + + expect(mockAxios.delete).toHaveBeenCalledWith( + 'https://api.mistral.ai/v1/files/file-123', + expect.objectContaining({ + httpsAgent: expect.objectContaining({ + proxyUrl: 'socks5://proxy.example.com:1080', + }), + }), + ); + }); + }); + }); + describe('uploadAzureMistralOCR', () => { beforeEach(() => { (jest.mocked(fs).readFileSync as jest.Mock).mockReturnValue(Buffer.from('mock-file-content')); + // Reset the HttpsProxyAgent mock to its default implementation for Azure tests + (HttpsProxyAgent as unknown as jest.Mock).mockImplementation((url) => ({ proxyUrl: url })); + // Clean up any PROXY env var from previous tests + delete process.env.PROXY; + // Reset axios mocks completely to clear any queued responses + mockAxios.post!.mockReset(); + mockAxios.get!.mockReset(); + mockAxios.delete!.mockReset(); + // Re-establish default resolved values + mockAxios.post!.mockResolvedValue({ data: {} }); + mockAxios.get!.mockResolvedValue({ data: {} }); + mockAxios.delete!.mockResolvedValue({ data: {} }); }); it('should process OCR using Azure Mistral with base64 encoding', async () => { @@ -1796,6 +2196,11 @@ describe('MistralOCR Service', () => { }); describe('Mixed env var and hardcoded configuration', () => { + beforeEach(() => { + // Clean up any PROXY env var from previous tests + delete process.env.PROXY; + }); + it('should preserve hardcoded baseURL when only apiKey is an env var', async () => { // This test demonstrates the current bug mockLoadAuthValues.mockResolvedValue({ diff --git a/packages/api/src/files/mistral/crud.ts b/packages/api/src/files/mistral/crud.ts index 30e5ebbf5..dda29216f 100644 --- a/packages/api/src/files/mistral/crud.ts +++ b/packages/api/src/files/mistral/crud.ts @@ -2,6 +2,7 @@ import * as fs from 'fs'; import * as path from 'path'; import FormData from 'form-data'; import { logger } from '@librechat/data-schemas'; +import { HttpsProxyAgent } from 'https-proxy-agent'; import { FileSources, envVarRegex, @@ -9,7 +10,7 @@ import { extractVariableName, } from 'librechat-data-provider'; import type { TCustomConfig } from 'librechat-data-provider'; -import type { AxiosError } from 'axios'; +import type { AxiosError, AxiosRequestConfig } from 'axios'; import type { MistralFileUploadResponse, MistralSignedUrlResponse, @@ -77,15 +78,21 @@ export async function uploadDocumentToMistral({ const fileStream = fs.createReadStream(filePath); form.append('file', fileStream, { filename: actualFileName }); + const config: AxiosRequestConfig = { + headers: { + Authorization: `Bearer ${apiKey}`, + ...form.getHeaders(), + }, + maxBodyLength: Infinity, + maxContentLength: Infinity, + }; + + if (process.env.PROXY) { + config.httpsAgent = new HttpsProxyAgent(process.env.PROXY); + } + return axios - .post(`${baseURL}/files`, form, { - headers: { - Authorization: `Bearer ${apiKey}`, - ...form.getHeaders(), - }, - maxBodyLength: Infinity, - maxContentLength: Infinity, - }) + .post(`${baseURL}/files`, form, config) .then((res) => res.data) .catch((error) => { throw error; @@ -103,12 +110,18 @@ export async function getSignedUrl({ expiry?: number; baseURL?: string; }): Promise { + const config: AxiosRequestConfig = { + headers: { + Authorization: `Bearer ${apiKey}`, + }, + }; + + if (process.env.PROXY) { + config.httpsAgent = new HttpsProxyAgent(process.env.PROXY); + } + return axios - .get(`${baseURL}/files/${fileId}/url?expiry=${expiry}`, { - headers: { - Authorization: `Bearer ${apiKey}`, - }, - }) + .get(`${baseURL}/files/${fileId}/url?expiry=${expiry}`, config) .then((res) => res.data) .catch((error) => { logger.error('Error fetching signed URL:', error.message); @@ -139,6 +152,18 @@ export async function performOCR({ documentType?: 'document_url' | 'image_url'; }): Promise { const documentKey = documentType === 'image_url' ? 'image_url' : 'document_url'; + + const config: AxiosRequestConfig = { + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${apiKey}`, + }, + }; + + if (process.env.PROXY) { + config.httpsAgent = new HttpsProxyAgent(process.env.PROXY); + } + return axios .post( `${baseURL}/ocr`, @@ -151,12 +176,7 @@ export async function performOCR({ [documentKey]: url, }, }, - { - headers: { - 'Content-Type': 'application/json', - Authorization: `Bearer ${apiKey}`, - }, - }, + config, ) .then((res) => res.data) .catch((error) => { @@ -182,12 +202,18 @@ export async function deleteMistralFile({ apiKey: string; baseURL?: string; }): Promise { + const config: AxiosRequestConfig = { + headers: { + Authorization: `Bearer ${apiKey}`, + }, + }; + + if (process.env.PROXY) { + config.httpsAgent = new HttpsProxyAgent(process.env.PROXY); + } + try { - const result = await axios.delete(`${baseURL}/files/${fileId}`, { - headers: { - Authorization: `Bearer ${apiKey}`, - }, - }); + const result = await axios.delete(`${baseURL}/files/${fileId}`, config); logger.debug(`Mistral file ${fileId} deleted successfully:`, result.data); } catch (error) { logger.error(`Error deleting Mistral file ${fileId}:`, error); @@ -543,17 +569,23 @@ async function createJWT(serviceKey: GoogleServiceAccount): Promise { * Exchanges JWT for access token */ async function exchangeJWTForAccessToken(jwt: string): Promise { + const config: AxiosRequestConfig = { + headers: { + 'Content-Type': 'application/x-www-form-urlencoded', + }, + }; + + if (process.env.PROXY) { + config.httpsAgent = new HttpsProxyAgent(process.env.PROXY); + } + const response = await axios.post( 'https://oauth2.googleapis.com/token', new URLSearchParams({ grant_type: 'urn:ietf:params:oauth:grant-type:jwt-bearer', assertion: jwt, }), - { - headers: { - 'Content-Type': 'application/x-www-form-urlencoded', - }, - }, + config, ); if (!response.data?.access_token) { @@ -608,14 +640,20 @@ async function performGoogleVertexOCR({ }, }); + const config: AxiosRequestConfig = { + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${accessToken}`, + Accept: 'application/json', + }, + }; + + if (process.env.PROXY) { + config.httpsAgent = new HttpsProxyAgent(process.env.PROXY); + } + return axios - .post(baseURL, requestBody, { - headers: { - 'Content-Type': 'application/json', - Authorization: `Bearer ${accessToken}`, - Accept: 'application/json', - }, - }) + .post(baseURL, requestBody, config) .then((res) => { logger.debug('Google Vertex AI response received'); return res.data;