feat: simplify STTService request handling by refining SDK usage and improving error logging

🌊 feat: refine SDK usage logic in STT and TTS services, improve header handling
🌊 feat: enhance TTSService with Deepgram SDK integration and refactor voice validation
2024-12-04 17:33:38 +01:00 · 2024-11-24 01:14:17 +01:00 · 2024-11-23 16:49:56 +01:00 · 2024-11-23 12:20:17 +01:00 · 2024-11-23 12:17:53 +01:00
11 changed files with 326 additions and 55 deletions
--- a/api/server/services/Files/Audio/STTService.js
+++ b/api/server/services/Files/Audio/STTService.js
@@ -2,6 +2,7 @@ const axios = require('axios');
 const fs = require('fs').promises;
 const FormData = require('form-data');
 const { Readable } = require('stream');
+const { createClient } = require('@deepgram/sdk');
 const { extractEnvVariable, STTProviders } = require('librechat-data-provider');
 const { getCustomConfig } = require('~/server/services/Config');
 const { genAzureEndpoint } = require('~/utils');
@@ -18,10 +19,14 @@ class STTService {
   */
  constructor(customConfig) {
    this.customConfig = customConfig;
-    this.providerStrategies = {
+    this.apiStrategies = {
      [STTProviders.OPENAI]: this.openAIProvider,
      [STTProviders.AZURE_OPENAI]: this.azureOpenAIProvider,
    };
+
+    this.sdkStrategies = {
+      [STTProviders.DEEPGRAM]: this.deepgramSDKProvider,
+    };
  }

  /**
@@ -106,7 +111,7 @@ class STTService {
      'Content-Type': 'multipart/form-data',
      ...(apiKey && { Authorization: `Bearer ${apiKey}` }),
    };
-    [headers].forEach(this.removeUndefined);
+    this.removeUndefined(headers);

    return [url, data, headers];
  }
@@ -153,6 +158,70 @@ class STTService {
    return [url, formData, { ...headers, ...formData.getHeaders() }];
  }

+  /**
+   * Transcribes audio using the Deepgram SDK.
+   * @async
+   * @param {Object} sttSchema - The STT schema for Deepgram.
+   * @param {Stream} audioReadStream - The audio data to be transcribed.
+   * @returns {Promise<string>} A promise that resolves to the transcribed text.
+   * @throws {Error} If the transcription fails.
+   */
+  async deepgramSDKProvider(sttSchema, audioReadStream) {
+    const apiKey = extractEnvVariable(sttSchema.apiKey) || '';
+    const deepgram = createClient(apiKey);
+
+    const configOptions = {
+      // Model parameters
+      model: sttSchema.model?.model,
+      language: sttSchema.model?.language,
+      detect_language: sttSchema.model?.detect_language,
+      version: sttSchema.model?.version,
+
+      // Formatting parameters
+      smart_format: sttSchema.formatting?.smart_format,
+      diarize: sttSchema.formatting?.diarize,
+      filler_words: sttSchema.formatting?.filler_words,
+      numerals: sttSchema.formatting?.numerals,
+      punctuate: sttSchema.formatting?.punctuate,
+      paragraphs: sttSchema.formatting?.paragraphs,
+      profanity_filter: sttSchema.formatting?.profanity_filter,
+      redact: sttSchema.formatting?.redact,
+      utterances: sttSchema.formatting?.utterances,
+      utt_split: sttSchema.formatting?.utt_split,
+
+      // Custom vocabulary parameters
+      replace: sttSchema.custom_vocabulary?.replace,
+      keywords: sttSchema.custom_vocabulary?.keywords,
+
+      // Intelligence parameters
+      sentiment: sttSchema.intelligence?.sentiment,
+      intents: sttSchema.intelligence?.intents,
+      topics: sttSchema.intelligence?.topics,
+    };
+
+    this.removeUndefined(configOptions);
+
+    const { result, error } = await deepgram.listen.prerecorded.transcribeFile(
+      audioReadStream,
+      configOptions,
+    );
+
+    if (error) {
+      throw error;
+    }
+
+    return result.results?.channels[0]?.alternatives[0]?.transcript || '';
+  }
+
+  // TODO: Implement a better way to determine if the SDK should be used
+  shouldUseSDK(provider) {
+    if (provider === STTProviders.DEEPGRAM) {
+      return true;
+    }
+
+    return false;
+  }
+
  /**
   * Sends an STT request to the specified provider.
   * @async
@@ -165,27 +234,29 @@ class STTService {
   * @throws {Error} If the provider is invalid, the response status is not 200, or the response data is missing.
   */
  async sttRequest(provider, sttSchema, { audioBuffer, audioFile }) {
-    const strategy = this.providerStrategies[provider];
+    const useSDK = this.shouldUseSDK(provider);
+    const strategy = useSDK ? this.sdkStrategies[provider] : this.apiStrategies[provider];
+
    if (!strategy) {
-      throw new Error('Invalid provider');
+      throw new Error('Invalid provider or implementation');
    }

    const audioReadStream = Readable.from(audioBuffer);
-    audioReadStream.path = 'audio.wav';

-    const [url, data, headers] = strategy.call(this, sttSchema, audioReadStream, audioFile);
+    if (useSDK) {
+      return strategy.call(this, sttSchema, audioReadStream, audioFile);
+    }
+
+    const [url, data, headers] = strategy.call(this, sttSchema, audioReadStream);

    try {
      const response = await axios.post(url, data, { headers });
-
      if (response.status !== 200) {
        throw new Error('Invalid response from the STT API');
      }
-
      if (!response.data || !response.data.text) {
        throw new Error('Missing data in response from the STT API');
      }
-
      return response.data.text.trim();
    } catch (error) {
      logger.error(`STT request failed for provider ${provider}:`, error);
@@ -222,9 +293,9 @@ class STTService {
    } finally {
      try {
        await fs.unlink(req.file.path);
-        logger.debug('[/speech/stt] Temp. audio upload file deleted');
+        logger.debug('[/speech/stt] Temporary audio upload file deleted');
      } catch (error) {
-        logger.debug('[/speech/stt] Temp. audio upload file already deleted');
+        logger.debug('[/speech/stt] Temporary audio upload file already deleted');
      }
    }
  }
--- a/api/server/services/Files/Audio/TTSService.js
+++ b/api/server/services/Files/Audio/TTSService.js
@@ -1,9 +1,11 @@
 const axios = require('axios');
+const { createClient } = require('@deepgram/sdk');
 const { extractEnvVariable, TTSProviders } = require('librechat-data-provider');
 const { getRandomVoiceId, createChunkProcessor, splitTextIntoChunks } = require('./streamAudio');
 const { getCustomConfig } = require('~/server/services/Config');
 const { genAzureEndpoint } = require('~/utils');
 const { logger } = require('~/config');
+const { Readable } = require('stream');

 /**
 * Service class for handling Text-to-Speech (TTS) operations.
@@ -16,12 +18,16 @@ class TTSService {
   */
  constructor(customConfig) {
    this.customConfig = customConfig;
-    this.providerStrategies = {
+    this.apiStrategies = {
      [TTSProviders.OPENAI]: this.openAIProvider.bind(this),
      [TTSProviders.AZURE_OPENAI]: this.azureOpenAIProvider.bind(this),
      [TTSProviders.ELEVENLABS]: this.elevenLabsProvider.bind(this),
      [TTSProviders.LOCALAI]: this.localAIProvider.bind(this),
    };
+
+    this.sdkStrategies = {
+      [TTSProviders.DEEPGRAM]: this.deepgramSDKProvider.bind(this),
+    };
  }

  /**
@@ -109,25 +115,22 @@ class TTSService {
  openAIProvider(ttsSchema, input, voice) {
    const url = ttsSchema?.url || 'https://api.openai.com/v1/audio/speech';

-    if (
-      ttsSchema?.voices &&
-      ttsSchema.voices.length > 0 &&
-      !ttsSchema.voices.includes(voice) &&
-      !ttsSchema.voices.includes('ALL')
-    ) {
+    if (ttsSchema?.voices && ttsSchema.voices.length > 0 && !ttsSchema.voices.includes(voice)) {
      throw new Error(`Voice ${voice} is not available.`);
    }

    const data = {
      input,
      model: ttsSchema?.model,
-      voice: ttsSchema?.voices && ttsSchema.voices.length > 0 ? voice : undefined,
+      voice: voice,
      backend: ttsSchema?.backend,
    };

    const headers = {
      'Content-Type': 'application/json',
-      Authorization: `Bearer ${extractEnvVariable(ttsSchema?.apiKey)}`,
+      Authorization: `${
+        ttsSchema.apiKey ? 'Bearer ' + extractEnvVariable(ttsSchema.apiKey) : undefined
+      }`,
    };

    return [url, data, headers];
@@ -147,19 +150,14 @@ class TTSService {
      azureOpenAIApiDeploymentName: ttsSchema?.deploymentName,
    })}/audio/speech?api-version=${ttsSchema?.apiVersion}`;

-    if (
-      ttsSchema?.voices &&
-      ttsSchema.voices.length > 0 &&
-      !ttsSchema.voices.includes(voice) &&
-      !ttsSchema.voices.includes('ALL')
-    ) {
+    if (ttsSchema?.voices && ttsSchema.voices.length > 0 && !ttsSchema.voices.includes(voice)) {
      throw new Error(`Voice ${voice} is not available.`);
    }

    const data = {
      model: ttsSchema?.model,
      input,
-      voice: ttsSchema?.voices && ttsSchema.voices.length > 0 ? voice : undefined,
+      voice: voice,
    };

    const headers = {
@@ -184,7 +182,7 @@ class TTSService {
      ttsSchema?.url ||
      `https://api.elevenlabs.io/v1/text-to-speech/${voice}${stream ? '/stream' : ''}`;

-    if (!ttsSchema?.voices.includes(voice) && !ttsSchema?.voices.includes('ALL')) {
+    if (!ttsSchema?.voices.includes(voice)) {
      throw new Error(`Voice ${voice} is not available.`);
    }

@@ -202,7 +200,7 @@ class TTSService {

    const headers = {
      'Content-Type': 'application/json',
-      'xi-api-key': extractEnvVariable(ttsSchema?.apiKey),
+      'xi-api-key': ttsSchema.apiKey ? extractEnvVariable(ttsSchema.apiKey) : '',
      Accept: 'audio/mpeg',
    };

@@ -220,31 +218,107 @@ class TTSService {
  localAIProvider(ttsSchema, input, voice) {
    const url = ttsSchema?.url;

-    if (
-      ttsSchema?.voices &&
-      ttsSchema.voices.length > 0 &&
-      !ttsSchema.voices.includes(voice) &&
-      !ttsSchema.voices.includes('ALL')
-    ) {
+    if (ttsSchema?.voices && ttsSchema.voices.length > 0 && !ttsSchema.voices.includes(voice)) {
      throw new Error(`Voice ${voice} is not available.`);
    }

    const data = {
      input,
-      model: ttsSchema?.voices && ttsSchema.voices.length > 0 ? voice : undefined,
+      model: voice,
      backend: ttsSchema?.backend,
    };

    const headers = {
      'Content-Type': 'application/json',
-      Authorization: `Bearer ${extractEnvVariable(ttsSchema?.apiKey)}`,
+      Authorization: `${
+        ttsSchema.apiKey ? 'Bearer ' + extractEnvVariable(ttsSchema.apiKey) : undefined
+      }`,
    };

-    if (extractEnvVariable(ttsSchema.apiKey) === '') {
-      delete headers.Authorization;
+    return [url, data, headers];
+  }
+
+  /**
+   * Converts a ReadableStream to a Node.js stream (used in Deepgram SDK).
+   * @async
+   * @param {ReadableStream} readableStream - The ReadableStream to convert.
+   * @returns {Promise<Readable>} The Node.js stream.
+   * @throws {Error} If the conversion fails.
+   */
+  async streamToNodeStream(readableStream) {
+    const reader = readableStream.getReader();
+    const nodeStream = new Readable({
+      async read() {
+        try {
+          const { value, done } = await reader.read();
+          if (done) {
+            this.push(null);
+          } else {
+            this.push(Buffer.from(value));
+          }
+        } catch (err) {
+          this.destroy(err);
+        }
+      },
+    });
+    return nodeStream;
+  }
+
+  /**
+   * Prepares the request for Deepgram SDK TTS provider.
+   * @async
+   * @param {Object} ttsSchema - The TTS schema for Deepgram SDK.
+   * @param {string} input - The input text.
+   * @param {string} voice - The selected voice.
+   * @returns {Promise<Object>} The response object.
+   * @throws {Error} If the selected voice is not available or the request fails.
+   */
+  async deepgramSDKProvider(ttsSchema, input, voice) {
+    const apiKey = extractEnvVariable(ttsSchema.apiKey) || '';
+    const deepgram = createClient(apiKey);
+
+    if (ttsSchema?.voices && ttsSchema.voices.length > 0 && !ttsSchema.voices.includes(voice)) {
+      throw new Error(`Voice ${voice} is not available.`);
    }

-    return [url, data, headers];
+    const modelParts = [ttsSchema.model, voice, ttsSchema.language].filter(Boolean);
+
+    const configOptions = {
+      model: modelParts.join('-'),
+      encoding: 'linear16',
+      container: 'wav',
+      bit_rate: ttsSchema.media_settings?.bit_rate,
+      sample_rate: ttsSchema.media_settings?.sample_rate,
+    };
+
+    this.removeUndefined(configOptions);
+
+    try {
+      const response = await deepgram.speak.request({ text: input }, configOptions);
+      const audioStream = await response.getStream();
+      const headers = await response.getHeaders();
+
+      // Convert ReadableStream to Node.js stream
+      const nodeStream = await this.streamToNodeStream(audioStream);
+
+      return {
+        data: nodeStream,
+        headers,
+        status: 200,
+      };
+    } catch (error) {
+      logger.error('Deepgram TTS request failed:', error);
+      throw error;
+    }
+  }
+
+  // TODO: Implement a better way to determine if the SDK should be used
+  shouldUseSDK(provider) {
+    if (provider == TTSProviders.DEEPGRAM) {
+      return true;
+    }
+
+    return false;
  }

  /**
@@ -260,22 +334,34 @@ class TTSService {
   * @throws {Error} If the provider is invalid or the request fails.
   */
  async ttsRequest(provider, ttsSchema, { input, voice, stream = true }) {
-    const strategy = this.providerStrategies[provider];
+    const useSDK = this.shouldUseSDK(provider);
+    const strategy = useSDK ? this.sdkStrategies[provider] : this.apiStrategies[provider];
+
    if (!strategy) {
      throw new Error('Invalid provider');
    }

-    const [url, data, headers] = strategy.call(this, ttsSchema, input, voice, stream);
+    if (useSDK) {
+      const response = await strategy.call(this, ttsSchema, input, voice, stream);

-    [data, headers].forEach(this.removeUndefined.bind(this));
+      return {
+        data: response.data,
+        headers: response.headers,
+        status: response.status,
+      };
+    } else {
+      const [url, data, headers] = strategy.call(this, ttsSchema, input, voice, stream);

-    const options = { headers, responseType: stream ? 'stream' : 'arraybuffer' };
+      [data, headers].forEach(this.removeUndefined.bind(this));

-    try {
-      return await axios.post(url, data, options);
-    } catch (error) {
-      logger.error(`TTS request failed for provider ${provider}:`, error);
-      throw error;
+      const options = { headers, responseType: stream ? 'stream' : 'arraybuffer' };
+
+      try {
+        return await axios.post(url, data, options);
+      } catch (error) {
+        logger.error(`TTS request failed for provider ${provider}:`, error);
+        throw error;
+      }
    }
  }

--- a/api/server/services/Files/Audio/getVoices.js
+++ b/api/server/services/Files/Audio/getVoices.js
@@ -37,6 +37,9 @@ async function getVoices(req, res) {
      case TTSProviders.LOCALAI:
        voices = ttsSchema.localai?.voices;
        break;
+      case TTSProviders.DEEPGRAM:
+        voices = ttsSchema.deepgram?.voices;
+        break;
      default:
        throw new Error('Invalid provider');
    }
--- a/client/src/components/Chat/Messages/HoverButtons.tsx
+++ b/client/src/components/Chat/Messages/HoverButtons.tsx
@@ -79,7 +79,7 @@ export default function HoverButtons({
          messageId={message.messageId}
          content={message.content ?? message.text}
          isLast={isLast}
-          className="hover-button rounded-md p-1 pl-0 text-gray-500 hover:bg-gray-100 hover:text-gray-500 dark:text-gray-400/70 dark:hover:bg-gray-700 dark:hover:text-gray-200 disabled:dark:hover:text-gray-400 md:group-hover:visible md:group-[.final-completion]:visible"
+          className="hover-button rounded-md p-1 hover:bg-gray-100 hover:text-gray-500 focus:opacity-100 dark:text-gray-400/70 dark:hover:bg-gray-700 dark:hover:text-gray-200 disabled:dark:hover:text-gray-400 md:group-hover:visible md:group-[.final-completion]:visible"
        />
      )}
      {isEditableEndpoint && (
--- a/client/src/components/Chat/Messages/MessageAudio.tsx
+++ b/client/src/components/Chat/Messages/MessageAudio.tsx
@@ -1,4 +1,3 @@
-// client/src/components/Chat/Messages/MessageAudio.tsx
 import { memo } from 'react';
 import { useRecoilValue } from 'recoil';
 import type { TMessageAudio } from '~/common';
--- a/client/src/hooks/Audio/useTTSBrowser.ts
+++ b/client/src/hooks/Audio/useTTSBrowser.ts
@@ -1,4 +1,3 @@
-// client/src/hooks/Audio/useTTSBrowser.ts
 import { useRef, useEffect, useState } from 'react';
 import { useRecoilState, useRecoilValue } from 'recoil';
 import { parseTextParts } from 'librechat-data-provider';
--- a/client/src/hooks/Audio/useTTSEdge.ts
+++ b/client/src/hooks/Audio/useTTSEdge.ts
@@ -1,4 +1,3 @@
-// client/src/hooks/Audio/useTTSEdge.ts
 import { useRef, useEffect, useState } from 'react';
 import { useRecoilState, useRecoilValue } from 'recoil';
 import { parseTextParts } from 'librechat-data-provider';
--- a/client/src/hooks/Audio/useTTSExternal.ts
+++ b/client/src/hooks/Audio/useTTSExternal.ts
@@ -1,4 +1,3 @@
-// client/src/hooks/Audio/useTTSExternal.ts
 import { useRef, useEffect, useState } from 'react';
 import { useRecoilState, useRecoilValue } from 'recoil';
 import { parseTextParts } from 'librechat-data-provider';
--- a/package-lock.json
+++ b/package-lock.json
@@ -13,6 +13,9 @@
        "client",
        "packages/*"
      ],
+      "dependencies": {
+        "@deepgram/sdk": "^3.9.0"
+      },
      "devDependencies": {
        "@axe-core/playwright": "^4.9.1",
        "@playwright/test": "^1.38.1",
@@ -6635,6 +6638,44 @@
        "kuler": "^2.0.0"
      }
    },
+    "node_modules/@deepgram/captions": {
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/@deepgram/captions/-/captions-1.2.0.tgz",
+      "integrity": "sha512-8B1C/oTxTxyHlSFubAhNRgCbQ2SQ5wwvtlByn8sDYZvdDtdn/VE2yEPZ4BvUnrKWmsbTQY6/ooLV+9Ka2qmDSQ==",
+      "license": "MIT",
+      "dependencies": {
+        "dayjs": "^1.11.10"
+      },
+      "engines": {
+        "node": ">=18.0.0"
+      }
+    },
+    "node_modules/@deepgram/sdk": {
+      "version": "3.9.0",
+      "resolved": "https://registry.npmjs.org/@deepgram/sdk/-/sdk-3.9.0.tgz",
+      "integrity": "sha512-X/7JzoYjCObyEaPb2Dgnkwk2LwRe4bw0FJJCLdkjpnFfJCFgA9IWgRD8FEUI6/hp8dW/CqqXkGPA2Q3DIsVG8A==",
+      "license": "MIT",
+      "dependencies": {
+        "@deepgram/captions": "^1.1.1",
+        "@types/node": "^18.19.39",
+        "cross-fetch": "^3.1.5",
+        "deepmerge": "^4.3.1",
+        "events": "^3.3.0",
+        "ws": "^8.17.0"
+      },
+      "engines": {
+        "node": ">=18.0.0"
+      }
+    },
+    "node_modules/@deepgram/sdk/node_modules/@types/node": {
+      "version": "18.19.65",
+      "resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.65.tgz",
+      "integrity": "sha512-Ay5BZuO1UkTmVHzZJNvZKw/E+iB3GQABb6kijEz89w2JrfhNA+M/ebp18pfz9Gqe9ywhMC8AA8yC01lZq48J+Q==",
+      "license": "MIT",
+      "dependencies": {
+        "undici-types": "~5.26.4"
+      }
+    },
    "node_modules/@dicebear/adventurer": {
      "version": "7.0.4",
      "resolved": "https://registry.npmjs.org/@dicebear/adventurer/-/adventurer-7.0.4.tgz",
@@ -17942,6 +17983,12 @@
        "url": "https://github.com/sponsors/kossnocorp"
      }
    },
+    "node_modules/dayjs": {
+      "version": "1.11.13",
+      "resolved": "https://registry.npmjs.org/dayjs/-/dayjs-1.11.13.tgz",
+      "integrity": "sha512-oaMBel6gjolK862uaPQOVTA7q3TZhuSvuMQAAglQDOWYO9A91IrAOUJEyKVlqJlHE0vq5p5UXxzdPfMH/x6xNg==",
+      "license": "MIT"
+    },
    "node_modules/debug": {
      "version": "4.3.7",
      "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.7.tgz",
@@ -18067,7 +18114,6 @@
      "version": "4.3.1",
      "resolved": "https://registry.npmjs.org/deepmerge/-/deepmerge-4.3.1.tgz",
      "integrity": "sha512-3sUqbMEc77XqpdNO7FRyRog+eW3ph+GYCbj+rK+uYyRMuwsVy0rMiVtPn+QJlKFvWP/1PYpapqYn0Me2knFn+A==",
-      "dev": true,
      "engines": {
        "node": ">=0.10.0"
      }
--- a/package.json
+++ b/package.json
@@ -113,5 +113,8 @@
      "admin/",
      "packages/"
    ]
+  },
+  "dependencies": {
+    "@deepgram/sdk": "^3.9.0"
  }
 }
--- a/packages/data-provider/src/config.ts
+++ b/packages/data-provider/src/config.ts
@@ -336,11 +336,28 @@ const ttsLocalaiSchema = z.object({
  backend: z.string(),
 });

+const ttsDeepgramSchema = z
+  .object({
+    url: z.string().optional(),
+    apiKey: z.string().optional(),
+    voices: z.array(z.string()),
+    model: z.string(),
+    language: z.string().optional(),
+    media_settings: z
+      .object({
+        bit_rate: z.number().optional(),
+        sample_rate: z.number().optional(),
+      })
+      .optional(),
+  })
+  .optional();
+
 const ttsSchema = z.object({
  openai: ttsOpenaiSchema.optional(),
  azureOpenAI: ttsAzureOpenAISchema.optional(),
  elevenlabs: ttsElevenLabsSchema.optional(),
  localai: ttsLocalaiSchema.optional(),
+  deepgram: ttsDeepgramSchema.optional(),
 });

 const sttOpenaiSchema = z.object({
@@ -356,9 +373,50 @@ const sttAzureOpenAISchema = z.object({
  apiVersion: z.string(),
 });

+const sttDeepgramSchema = z.object({
+  url: z.string().optional(),
+  apiKey: z.string().optional(),
+  model: z
+    .object({
+      model: z.string().optional(),
+      language: z.string().optional(),
+      detect_language: z.boolean().optional(),
+      version: z.string().optional(),
+    })
+    .optional(),
+  formatting: z
+    .object({
+      smart_format: z.boolean().optional(),
+      diarize: z.boolean().optional(),
+      filler_words: z.boolean().optional(),
+      numerals: z.boolean().optional(),
+      punctuate: z.boolean().optional(),
+      paragraphs: z.boolean().optional(),
+      profanity_filter: z.boolean().optional(),
+      redact: z.boolean().optional(),
+      utterances: z.boolean().optional(),
+      utt_split: z.number().optional(),
+    })
+    .optional(),
+  custom_vocabulary: z
+    .object({
+      replace: z.array(z.string()).optional(),
+      keywords: z.array(z.string()).optional(),
+    })
+    .optional(),
+  intelligence: z
+    .object({
+      sentiment: z.boolean().optional(),
+      intents: z.boolean().optional(),
+      topics: z.boolean().optional(),
+    })
+    .optional(),
+});
+
 const sttSchema = z.object({
  openai: sttOpenaiSchema.optional(),
  azureOpenAI: sttAzureOpenAISchema.optional(),
+  deepgram: sttDeepgramSchema.optional(),
 });

 const speechTab = z
@@ -1054,6 +1112,10 @@ export enum STTProviders {
   * Provider for Microsoft Azure STT
   */
  AZURE_OPENAI = 'azureOpenAI',
+  /**
+   * Provider for Deepgram STT
+   */
+  DEEPGRAM = 'deepgram',
 }

 export enum TTSProviders {
@@ -1073,6 +1135,10 @@ export enum TTSProviders {
   * Provider for LocalAI TTS
   */
  LOCALAI = 'localai',
+  /**
+   * Provider for Deepgram TTS
+   */
+  DEEPGRAM = 'deepgram',
 }

 /** Enum for app-wide constants */
Author	SHA1	Message	Date
Marco Beretta	daacfce581	feat: simplify STTService request handling by refining SDK usage and improving error logging	2024-12-04 17:33:38 +01:00
Marco Beretta	ffa5f6f09b	🌊 feat: refine SDK usage logic in STT and TTS services, improve header handling	2024-11-24 01:14:17 +01:00
Marco Beretta	b7f4903acd	🌊 feat: enhance TTSService with Deepgram SDK integration and refactor voice validation	2024-11-23 16:49:56 +01:00
Marco Beretta	5eabd2493c	🌊 feat: update Deepgram SDK integration for STT and remove unused TTS provider	2024-11-23 12:20:17 +01:00
Marco Beretta	25d51eff31	🌊 feat: add Deepgram support for STT providers	2024-11-23 12:17:53 +01:00