Compare commits
5 Commits
explicit-m
...
speech/dee
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
daacfce581 | ||
|
|
ffa5f6f09b | ||
|
|
b7f4903acd | ||
|
|
5eabd2493c | ||
|
|
25d51eff31 |
@@ -2,6 +2,7 @@ const axios = require('axios');
|
|||||||
const fs = require('fs').promises;
|
const fs = require('fs').promises;
|
||||||
const FormData = require('form-data');
|
const FormData = require('form-data');
|
||||||
const { Readable } = require('stream');
|
const { Readable } = require('stream');
|
||||||
|
const { createClient } = require('@deepgram/sdk');
|
||||||
const { extractEnvVariable, STTProviders } = require('librechat-data-provider');
|
const { extractEnvVariable, STTProviders } = require('librechat-data-provider');
|
||||||
const { getCustomConfig } = require('~/server/services/Config');
|
const { getCustomConfig } = require('~/server/services/Config');
|
||||||
const { genAzureEndpoint } = require('~/utils');
|
const { genAzureEndpoint } = require('~/utils');
|
||||||
@@ -18,10 +19,14 @@ class STTService {
|
|||||||
*/
|
*/
|
||||||
constructor(customConfig) {
|
constructor(customConfig) {
|
||||||
this.customConfig = customConfig;
|
this.customConfig = customConfig;
|
||||||
this.providerStrategies = {
|
this.apiStrategies = {
|
||||||
[STTProviders.OPENAI]: this.openAIProvider,
|
[STTProviders.OPENAI]: this.openAIProvider,
|
||||||
[STTProviders.AZURE_OPENAI]: this.azureOpenAIProvider,
|
[STTProviders.AZURE_OPENAI]: this.azureOpenAIProvider,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
this.sdkStrategies = {
|
||||||
|
[STTProviders.DEEPGRAM]: this.deepgramSDKProvider,
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -106,7 +111,7 @@ class STTService {
|
|||||||
'Content-Type': 'multipart/form-data',
|
'Content-Type': 'multipart/form-data',
|
||||||
...(apiKey && { Authorization: `Bearer ${apiKey}` }),
|
...(apiKey && { Authorization: `Bearer ${apiKey}` }),
|
||||||
};
|
};
|
||||||
[headers].forEach(this.removeUndefined);
|
this.removeUndefined(headers);
|
||||||
|
|
||||||
return [url, data, headers];
|
return [url, data, headers];
|
||||||
}
|
}
|
||||||
@@ -153,6 +158,70 @@ class STTService {
|
|||||||
return [url, formData, { ...headers, ...formData.getHeaders() }];
|
return [url, formData, { ...headers, ...formData.getHeaders() }];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Transcribes audio using the Deepgram SDK.
|
||||||
|
* @async
|
||||||
|
* @param {Object} sttSchema - The STT schema for Deepgram.
|
||||||
|
* @param {Stream} audioReadStream - The audio data to be transcribed.
|
||||||
|
* @returns {Promise<string>} A promise that resolves to the transcribed text.
|
||||||
|
* @throws {Error} If the transcription fails.
|
||||||
|
*/
|
||||||
|
async deepgramSDKProvider(sttSchema, audioReadStream) {
|
||||||
|
const apiKey = extractEnvVariable(sttSchema.apiKey) || '';
|
||||||
|
const deepgram = createClient(apiKey);
|
||||||
|
|
||||||
|
const configOptions = {
|
||||||
|
// Model parameters
|
||||||
|
model: sttSchema.model?.model,
|
||||||
|
language: sttSchema.model?.language,
|
||||||
|
detect_language: sttSchema.model?.detect_language,
|
||||||
|
version: sttSchema.model?.version,
|
||||||
|
|
||||||
|
// Formatting parameters
|
||||||
|
smart_format: sttSchema.formatting?.smart_format,
|
||||||
|
diarize: sttSchema.formatting?.diarize,
|
||||||
|
filler_words: sttSchema.formatting?.filler_words,
|
||||||
|
numerals: sttSchema.formatting?.numerals,
|
||||||
|
punctuate: sttSchema.formatting?.punctuate,
|
||||||
|
paragraphs: sttSchema.formatting?.paragraphs,
|
||||||
|
profanity_filter: sttSchema.formatting?.profanity_filter,
|
||||||
|
redact: sttSchema.formatting?.redact,
|
||||||
|
utterances: sttSchema.formatting?.utterances,
|
||||||
|
utt_split: sttSchema.formatting?.utt_split,
|
||||||
|
|
||||||
|
// Custom vocabulary parameters
|
||||||
|
replace: sttSchema.custom_vocabulary?.replace,
|
||||||
|
keywords: sttSchema.custom_vocabulary?.keywords,
|
||||||
|
|
||||||
|
// Intelligence parameters
|
||||||
|
sentiment: sttSchema.intelligence?.sentiment,
|
||||||
|
intents: sttSchema.intelligence?.intents,
|
||||||
|
topics: sttSchema.intelligence?.topics,
|
||||||
|
};
|
||||||
|
|
||||||
|
this.removeUndefined(configOptions);
|
||||||
|
|
||||||
|
const { result, error } = await deepgram.listen.prerecorded.transcribeFile(
|
||||||
|
audioReadStream,
|
||||||
|
configOptions,
|
||||||
|
);
|
||||||
|
|
||||||
|
if (error) {
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
|
||||||
|
return result.results?.channels[0]?.alternatives[0]?.transcript || '';
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Implement a better way to determine if the SDK should be used
|
||||||
|
shouldUseSDK(provider) {
|
||||||
|
if (provider === STTProviders.DEEPGRAM) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sends an STT request to the specified provider.
|
* Sends an STT request to the specified provider.
|
||||||
* @async
|
* @async
|
||||||
@@ -165,27 +234,29 @@ class STTService {
|
|||||||
* @throws {Error} If the provider is invalid, the response status is not 200, or the response data is missing.
|
* @throws {Error} If the provider is invalid, the response status is not 200, or the response data is missing.
|
||||||
*/
|
*/
|
||||||
async sttRequest(provider, sttSchema, { audioBuffer, audioFile }) {
|
async sttRequest(provider, sttSchema, { audioBuffer, audioFile }) {
|
||||||
const strategy = this.providerStrategies[provider];
|
const useSDK = this.shouldUseSDK(provider);
|
||||||
|
const strategy = useSDK ? this.sdkStrategies[provider] : this.apiStrategies[provider];
|
||||||
|
|
||||||
if (!strategy) {
|
if (!strategy) {
|
||||||
throw new Error('Invalid provider');
|
throw new Error('Invalid provider or implementation');
|
||||||
}
|
}
|
||||||
|
|
||||||
const audioReadStream = Readable.from(audioBuffer);
|
const audioReadStream = Readable.from(audioBuffer);
|
||||||
audioReadStream.path = 'audio.wav';
|
|
||||||
|
|
||||||
const [url, data, headers] = strategy.call(this, sttSchema, audioReadStream, audioFile);
|
if (useSDK) {
|
||||||
|
return strategy.call(this, sttSchema, audioReadStream, audioFile);
|
||||||
|
}
|
||||||
|
|
||||||
|
const [url, data, headers] = strategy.call(this, sttSchema, audioReadStream);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const response = await axios.post(url, data, { headers });
|
const response = await axios.post(url, data, { headers });
|
||||||
|
|
||||||
if (response.status !== 200) {
|
if (response.status !== 200) {
|
||||||
throw new Error('Invalid response from the STT API');
|
throw new Error('Invalid response from the STT API');
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!response.data || !response.data.text) {
|
if (!response.data || !response.data.text) {
|
||||||
throw new Error('Missing data in response from the STT API');
|
throw new Error('Missing data in response from the STT API');
|
||||||
}
|
}
|
||||||
|
|
||||||
return response.data.text.trim();
|
return response.data.text.trim();
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.error(`STT request failed for provider ${provider}:`, error);
|
logger.error(`STT request failed for provider ${provider}:`, error);
|
||||||
@@ -222,9 +293,9 @@ class STTService {
|
|||||||
} finally {
|
} finally {
|
||||||
try {
|
try {
|
||||||
await fs.unlink(req.file.path);
|
await fs.unlink(req.file.path);
|
||||||
logger.debug('[/speech/stt] Temp. audio upload file deleted');
|
logger.debug('[/speech/stt] Temporary audio upload file deleted');
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.debug('[/speech/stt] Temp. audio upload file already deleted');
|
logger.debug('[/speech/stt] Temporary audio upload file already deleted');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,9 +1,11 @@
|
|||||||
const axios = require('axios');
|
const axios = require('axios');
|
||||||
|
const { createClient } = require('@deepgram/sdk');
|
||||||
const { extractEnvVariable, TTSProviders } = require('librechat-data-provider');
|
const { extractEnvVariable, TTSProviders } = require('librechat-data-provider');
|
||||||
const { getRandomVoiceId, createChunkProcessor, splitTextIntoChunks } = require('./streamAudio');
|
const { getRandomVoiceId, createChunkProcessor, splitTextIntoChunks } = require('./streamAudio');
|
||||||
const { getCustomConfig } = require('~/server/services/Config');
|
const { getCustomConfig } = require('~/server/services/Config');
|
||||||
const { genAzureEndpoint } = require('~/utils');
|
const { genAzureEndpoint } = require('~/utils');
|
||||||
const { logger } = require('~/config');
|
const { logger } = require('~/config');
|
||||||
|
const { Readable } = require('stream');
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Service class for handling Text-to-Speech (TTS) operations.
|
* Service class for handling Text-to-Speech (TTS) operations.
|
||||||
@@ -16,12 +18,16 @@ class TTSService {
|
|||||||
*/
|
*/
|
||||||
constructor(customConfig) {
|
constructor(customConfig) {
|
||||||
this.customConfig = customConfig;
|
this.customConfig = customConfig;
|
||||||
this.providerStrategies = {
|
this.apiStrategies = {
|
||||||
[TTSProviders.OPENAI]: this.openAIProvider.bind(this),
|
[TTSProviders.OPENAI]: this.openAIProvider.bind(this),
|
||||||
[TTSProviders.AZURE_OPENAI]: this.azureOpenAIProvider.bind(this),
|
[TTSProviders.AZURE_OPENAI]: this.azureOpenAIProvider.bind(this),
|
||||||
[TTSProviders.ELEVENLABS]: this.elevenLabsProvider.bind(this),
|
[TTSProviders.ELEVENLABS]: this.elevenLabsProvider.bind(this),
|
||||||
[TTSProviders.LOCALAI]: this.localAIProvider.bind(this),
|
[TTSProviders.LOCALAI]: this.localAIProvider.bind(this),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
this.sdkStrategies = {
|
||||||
|
[TTSProviders.DEEPGRAM]: this.deepgramSDKProvider.bind(this),
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -109,25 +115,22 @@ class TTSService {
|
|||||||
openAIProvider(ttsSchema, input, voice) {
|
openAIProvider(ttsSchema, input, voice) {
|
||||||
const url = ttsSchema?.url || 'https://api.openai.com/v1/audio/speech';
|
const url = ttsSchema?.url || 'https://api.openai.com/v1/audio/speech';
|
||||||
|
|
||||||
if (
|
if (ttsSchema?.voices && ttsSchema.voices.length > 0 && !ttsSchema.voices.includes(voice)) {
|
||||||
ttsSchema?.voices &&
|
|
||||||
ttsSchema.voices.length > 0 &&
|
|
||||||
!ttsSchema.voices.includes(voice) &&
|
|
||||||
!ttsSchema.voices.includes('ALL')
|
|
||||||
) {
|
|
||||||
throw new Error(`Voice ${voice} is not available.`);
|
throw new Error(`Voice ${voice} is not available.`);
|
||||||
}
|
}
|
||||||
|
|
||||||
const data = {
|
const data = {
|
||||||
input,
|
input,
|
||||||
model: ttsSchema?.model,
|
model: ttsSchema?.model,
|
||||||
voice: ttsSchema?.voices && ttsSchema.voices.length > 0 ? voice : undefined,
|
voice: voice,
|
||||||
backend: ttsSchema?.backend,
|
backend: ttsSchema?.backend,
|
||||||
};
|
};
|
||||||
|
|
||||||
const headers = {
|
const headers = {
|
||||||
'Content-Type': 'application/json',
|
'Content-Type': 'application/json',
|
||||||
Authorization: `Bearer ${extractEnvVariable(ttsSchema?.apiKey)}`,
|
Authorization: `${
|
||||||
|
ttsSchema.apiKey ? 'Bearer ' + extractEnvVariable(ttsSchema.apiKey) : undefined
|
||||||
|
}`,
|
||||||
};
|
};
|
||||||
|
|
||||||
return [url, data, headers];
|
return [url, data, headers];
|
||||||
@@ -147,19 +150,14 @@ class TTSService {
|
|||||||
azureOpenAIApiDeploymentName: ttsSchema?.deploymentName,
|
azureOpenAIApiDeploymentName: ttsSchema?.deploymentName,
|
||||||
})}/audio/speech?api-version=${ttsSchema?.apiVersion}`;
|
})}/audio/speech?api-version=${ttsSchema?.apiVersion}`;
|
||||||
|
|
||||||
if (
|
if (ttsSchema?.voices && ttsSchema.voices.length > 0 && !ttsSchema.voices.includes(voice)) {
|
||||||
ttsSchema?.voices &&
|
|
||||||
ttsSchema.voices.length > 0 &&
|
|
||||||
!ttsSchema.voices.includes(voice) &&
|
|
||||||
!ttsSchema.voices.includes('ALL')
|
|
||||||
) {
|
|
||||||
throw new Error(`Voice ${voice} is not available.`);
|
throw new Error(`Voice ${voice} is not available.`);
|
||||||
}
|
}
|
||||||
|
|
||||||
const data = {
|
const data = {
|
||||||
model: ttsSchema?.model,
|
model: ttsSchema?.model,
|
||||||
input,
|
input,
|
||||||
voice: ttsSchema?.voices && ttsSchema.voices.length > 0 ? voice : undefined,
|
voice: voice,
|
||||||
};
|
};
|
||||||
|
|
||||||
const headers = {
|
const headers = {
|
||||||
@@ -184,7 +182,7 @@ class TTSService {
|
|||||||
ttsSchema?.url ||
|
ttsSchema?.url ||
|
||||||
`https://api.elevenlabs.io/v1/text-to-speech/${voice}${stream ? '/stream' : ''}`;
|
`https://api.elevenlabs.io/v1/text-to-speech/${voice}${stream ? '/stream' : ''}`;
|
||||||
|
|
||||||
if (!ttsSchema?.voices.includes(voice) && !ttsSchema?.voices.includes('ALL')) {
|
if (!ttsSchema?.voices.includes(voice)) {
|
||||||
throw new Error(`Voice ${voice} is not available.`);
|
throw new Error(`Voice ${voice} is not available.`);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -202,7 +200,7 @@ class TTSService {
|
|||||||
|
|
||||||
const headers = {
|
const headers = {
|
||||||
'Content-Type': 'application/json',
|
'Content-Type': 'application/json',
|
||||||
'xi-api-key': extractEnvVariable(ttsSchema?.apiKey),
|
'xi-api-key': ttsSchema.apiKey ? extractEnvVariable(ttsSchema.apiKey) : '',
|
||||||
Accept: 'audio/mpeg',
|
Accept: 'audio/mpeg',
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -220,31 +218,107 @@ class TTSService {
|
|||||||
localAIProvider(ttsSchema, input, voice) {
|
localAIProvider(ttsSchema, input, voice) {
|
||||||
const url = ttsSchema?.url;
|
const url = ttsSchema?.url;
|
||||||
|
|
||||||
if (
|
if (ttsSchema?.voices && ttsSchema.voices.length > 0 && !ttsSchema.voices.includes(voice)) {
|
||||||
ttsSchema?.voices &&
|
|
||||||
ttsSchema.voices.length > 0 &&
|
|
||||||
!ttsSchema.voices.includes(voice) &&
|
|
||||||
!ttsSchema.voices.includes('ALL')
|
|
||||||
) {
|
|
||||||
throw new Error(`Voice ${voice} is not available.`);
|
throw new Error(`Voice ${voice} is not available.`);
|
||||||
}
|
}
|
||||||
|
|
||||||
const data = {
|
const data = {
|
||||||
input,
|
input,
|
||||||
model: ttsSchema?.voices && ttsSchema.voices.length > 0 ? voice : undefined,
|
model: voice,
|
||||||
backend: ttsSchema?.backend,
|
backend: ttsSchema?.backend,
|
||||||
};
|
};
|
||||||
|
|
||||||
const headers = {
|
const headers = {
|
||||||
'Content-Type': 'application/json',
|
'Content-Type': 'application/json',
|
||||||
Authorization: `Bearer ${extractEnvVariable(ttsSchema?.apiKey)}`,
|
Authorization: `${
|
||||||
|
ttsSchema.apiKey ? 'Bearer ' + extractEnvVariable(ttsSchema.apiKey) : undefined
|
||||||
|
}`,
|
||||||
};
|
};
|
||||||
|
|
||||||
if (extractEnvVariable(ttsSchema.apiKey) === '') {
|
return [url, data, headers];
|
||||||
delete headers.Authorization;
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Converts a ReadableStream to a Node.js stream (used in Deepgram SDK).
|
||||||
|
* @async
|
||||||
|
* @param {ReadableStream} readableStream - The ReadableStream to convert.
|
||||||
|
* @returns {Promise<Readable>} The Node.js stream.
|
||||||
|
* @throws {Error} If the conversion fails.
|
||||||
|
*/
|
||||||
|
async streamToNodeStream(readableStream) {
|
||||||
|
const reader = readableStream.getReader();
|
||||||
|
const nodeStream = new Readable({
|
||||||
|
async read() {
|
||||||
|
try {
|
||||||
|
const { value, done } = await reader.read();
|
||||||
|
if (done) {
|
||||||
|
this.push(null);
|
||||||
|
} else {
|
||||||
|
this.push(Buffer.from(value));
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
this.destroy(err);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
});
|
||||||
|
return nodeStream;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Prepares the request for Deepgram SDK TTS provider.
|
||||||
|
* @async
|
||||||
|
* @param {Object} ttsSchema - The TTS schema for Deepgram SDK.
|
||||||
|
* @param {string} input - The input text.
|
||||||
|
* @param {string} voice - The selected voice.
|
||||||
|
* @returns {Promise<Object>} The response object.
|
||||||
|
* @throws {Error} If the selected voice is not available or the request fails.
|
||||||
|
*/
|
||||||
|
async deepgramSDKProvider(ttsSchema, input, voice) {
|
||||||
|
const apiKey = extractEnvVariable(ttsSchema.apiKey) || '';
|
||||||
|
const deepgram = createClient(apiKey);
|
||||||
|
|
||||||
|
if (ttsSchema?.voices && ttsSchema.voices.length > 0 && !ttsSchema.voices.includes(voice)) {
|
||||||
|
throw new Error(`Voice ${voice} is not available.`);
|
||||||
}
|
}
|
||||||
|
|
||||||
return [url, data, headers];
|
const modelParts = [ttsSchema.model, voice, ttsSchema.language].filter(Boolean);
|
||||||
|
|
||||||
|
const configOptions = {
|
||||||
|
model: modelParts.join('-'),
|
||||||
|
encoding: 'linear16',
|
||||||
|
container: 'wav',
|
||||||
|
bit_rate: ttsSchema.media_settings?.bit_rate,
|
||||||
|
sample_rate: ttsSchema.media_settings?.sample_rate,
|
||||||
|
};
|
||||||
|
|
||||||
|
this.removeUndefined(configOptions);
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await deepgram.speak.request({ text: input }, configOptions);
|
||||||
|
const audioStream = await response.getStream();
|
||||||
|
const headers = await response.getHeaders();
|
||||||
|
|
||||||
|
// Convert ReadableStream to Node.js stream
|
||||||
|
const nodeStream = await this.streamToNodeStream(audioStream);
|
||||||
|
|
||||||
|
return {
|
||||||
|
data: nodeStream,
|
||||||
|
headers,
|
||||||
|
status: 200,
|
||||||
|
};
|
||||||
|
} catch (error) {
|
||||||
|
logger.error('Deepgram TTS request failed:', error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Implement a better way to determine if the SDK should be used
|
||||||
|
shouldUseSDK(provider) {
|
||||||
|
if (provider == TTSProviders.DEEPGRAM) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -260,22 +334,34 @@ class TTSService {
|
|||||||
* @throws {Error} If the provider is invalid or the request fails.
|
* @throws {Error} If the provider is invalid or the request fails.
|
||||||
*/
|
*/
|
||||||
async ttsRequest(provider, ttsSchema, { input, voice, stream = true }) {
|
async ttsRequest(provider, ttsSchema, { input, voice, stream = true }) {
|
||||||
const strategy = this.providerStrategies[provider];
|
const useSDK = this.shouldUseSDK(provider);
|
||||||
|
const strategy = useSDK ? this.sdkStrategies[provider] : this.apiStrategies[provider];
|
||||||
|
|
||||||
if (!strategy) {
|
if (!strategy) {
|
||||||
throw new Error('Invalid provider');
|
throw new Error('Invalid provider');
|
||||||
}
|
}
|
||||||
|
|
||||||
const [url, data, headers] = strategy.call(this, ttsSchema, input, voice, stream);
|
if (useSDK) {
|
||||||
|
const response = await strategy.call(this, ttsSchema, input, voice, stream);
|
||||||
|
|
||||||
[data, headers].forEach(this.removeUndefined.bind(this));
|
return {
|
||||||
|
data: response.data,
|
||||||
|
headers: response.headers,
|
||||||
|
status: response.status,
|
||||||
|
};
|
||||||
|
} else {
|
||||||
|
const [url, data, headers] = strategy.call(this, ttsSchema, input, voice, stream);
|
||||||
|
|
||||||
const options = { headers, responseType: stream ? 'stream' : 'arraybuffer' };
|
[data, headers].forEach(this.removeUndefined.bind(this));
|
||||||
|
|
||||||
try {
|
const options = { headers, responseType: stream ? 'stream' : 'arraybuffer' };
|
||||||
return await axios.post(url, data, options);
|
|
||||||
} catch (error) {
|
try {
|
||||||
logger.error(`TTS request failed for provider ${provider}:`, error);
|
return await axios.post(url, data, options);
|
||||||
throw error;
|
} catch (error) {
|
||||||
|
logger.error(`TTS request failed for provider ${provider}:`, error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -37,6 +37,9 @@ async function getVoices(req, res) {
|
|||||||
case TTSProviders.LOCALAI:
|
case TTSProviders.LOCALAI:
|
||||||
voices = ttsSchema.localai?.voices;
|
voices = ttsSchema.localai?.voices;
|
||||||
break;
|
break;
|
||||||
|
case TTSProviders.DEEPGRAM:
|
||||||
|
voices = ttsSchema.deepgram?.voices;
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
throw new Error('Invalid provider');
|
throw new Error('Invalid provider');
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -79,7 +79,7 @@ export default function HoverButtons({
|
|||||||
messageId={message.messageId}
|
messageId={message.messageId}
|
||||||
content={message.content ?? message.text}
|
content={message.content ?? message.text}
|
||||||
isLast={isLast}
|
isLast={isLast}
|
||||||
className="hover-button rounded-md p-1 pl-0 text-gray-500 hover:bg-gray-100 hover:text-gray-500 dark:text-gray-400/70 dark:hover:bg-gray-700 dark:hover:text-gray-200 disabled:dark:hover:text-gray-400 md:group-hover:visible md:group-[.final-completion]:visible"
|
className="hover-button rounded-md p-1 hover:bg-gray-100 hover:text-gray-500 focus:opacity-100 dark:text-gray-400/70 dark:hover:bg-gray-700 dark:hover:text-gray-200 disabled:dark:hover:text-gray-400 md:group-hover:visible md:group-[.final-completion]:visible"
|
||||||
/>
|
/>
|
||||||
)}
|
)}
|
||||||
{isEditableEndpoint && (
|
{isEditableEndpoint && (
|
||||||
|
|||||||
@@ -1,4 +1,3 @@
|
|||||||
// client/src/components/Chat/Messages/MessageAudio.tsx
|
|
||||||
import { memo } from 'react';
|
import { memo } from 'react';
|
||||||
import { useRecoilValue } from 'recoil';
|
import { useRecoilValue } from 'recoil';
|
||||||
import type { TMessageAudio } from '~/common';
|
import type { TMessageAudio } from '~/common';
|
||||||
|
|||||||
@@ -1,4 +1,3 @@
|
|||||||
// client/src/hooks/Audio/useTTSBrowser.ts
|
|
||||||
import { useRef, useEffect, useState } from 'react';
|
import { useRef, useEffect, useState } from 'react';
|
||||||
import { useRecoilState, useRecoilValue } from 'recoil';
|
import { useRecoilState, useRecoilValue } from 'recoil';
|
||||||
import { parseTextParts } from 'librechat-data-provider';
|
import { parseTextParts } from 'librechat-data-provider';
|
||||||
|
|||||||
@@ -1,4 +1,3 @@
|
|||||||
// client/src/hooks/Audio/useTTSEdge.ts
|
|
||||||
import { useRef, useEffect, useState } from 'react';
|
import { useRef, useEffect, useState } from 'react';
|
||||||
import { useRecoilState, useRecoilValue } from 'recoil';
|
import { useRecoilState, useRecoilValue } from 'recoil';
|
||||||
import { parseTextParts } from 'librechat-data-provider';
|
import { parseTextParts } from 'librechat-data-provider';
|
||||||
|
|||||||
@@ -1,4 +1,3 @@
|
|||||||
// client/src/hooks/Audio/useTTSExternal.ts
|
|
||||||
import { useRef, useEffect, useState } from 'react';
|
import { useRef, useEffect, useState } from 'react';
|
||||||
import { useRecoilState, useRecoilValue } from 'recoil';
|
import { useRecoilState, useRecoilValue } from 'recoil';
|
||||||
import { parseTextParts } from 'librechat-data-provider';
|
import { parseTextParts } from 'librechat-data-provider';
|
||||||
|
|||||||
48
package-lock.json
generated
48
package-lock.json
generated
@@ -13,6 +13,9 @@
|
|||||||
"client",
|
"client",
|
||||||
"packages/*"
|
"packages/*"
|
||||||
],
|
],
|
||||||
|
"dependencies": {
|
||||||
|
"@deepgram/sdk": "^3.9.0"
|
||||||
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@axe-core/playwright": "^4.9.1",
|
"@axe-core/playwright": "^4.9.1",
|
||||||
"@playwright/test": "^1.38.1",
|
"@playwright/test": "^1.38.1",
|
||||||
@@ -6635,6 +6638,44 @@
|
|||||||
"kuler": "^2.0.0"
|
"kuler": "^2.0.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@deepgram/captions": {
|
||||||
|
"version": "1.2.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/@deepgram/captions/-/captions-1.2.0.tgz",
|
||||||
|
"integrity": "sha512-8B1C/oTxTxyHlSFubAhNRgCbQ2SQ5wwvtlByn8sDYZvdDtdn/VE2yEPZ4BvUnrKWmsbTQY6/ooLV+9Ka2qmDSQ==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"dayjs": "^1.11.10"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=18.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@deepgram/sdk": {
|
||||||
|
"version": "3.9.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/@deepgram/sdk/-/sdk-3.9.0.tgz",
|
||||||
|
"integrity": "sha512-X/7JzoYjCObyEaPb2Dgnkwk2LwRe4bw0FJJCLdkjpnFfJCFgA9IWgRD8FEUI6/hp8dW/CqqXkGPA2Q3DIsVG8A==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"@deepgram/captions": "^1.1.1",
|
||||||
|
"@types/node": "^18.19.39",
|
||||||
|
"cross-fetch": "^3.1.5",
|
||||||
|
"deepmerge": "^4.3.1",
|
||||||
|
"events": "^3.3.0",
|
||||||
|
"ws": "^8.17.0"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=18.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@deepgram/sdk/node_modules/@types/node": {
|
||||||
|
"version": "18.19.65",
|
||||||
|
"resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.65.tgz",
|
||||||
|
"integrity": "sha512-Ay5BZuO1UkTmVHzZJNvZKw/E+iB3GQABb6kijEz89w2JrfhNA+M/ebp18pfz9Gqe9ywhMC8AA8yC01lZq48J+Q==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"undici-types": "~5.26.4"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/@dicebear/adventurer": {
|
"node_modules/@dicebear/adventurer": {
|
||||||
"version": "7.0.4",
|
"version": "7.0.4",
|
||||||
"resolved": "https://registry.npmjs.org/@dicebear/adventurer/-/adventurer-7.0.4.tgz",
|
"resolved": "https://registry.npmjs.org/@dicebear/adventurer/-/adventurer-7.0.4.tgz",
|
||||||
@@ -17942,6 +17983,12 @@
|
|||||||
"url": "https://github.com/sponsors/kossnocorp"
|
"url": "https://github.com/sponsors/kossnocorp"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/dayjs": {
|
||||||
|
"version": "1.11.13",
|
||||||
|
"resolved": "https://registry.npmjs.org/dayjs/-/dayjs-1.11.13.tgz",
|
||||||
|
"integrity": "sha512-oaMBel6gjolK862uaPQOVTA7q3TZhuSvuMQAAglQDOWYO9A91IrAOUJEyKVlqJlHE0vq5p5UXxzdPfMH/x6xNg==",
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
"node_modules/debug": {
|
"node_modules/debug": {
|
||||||
"version": "4.3.7",
|
"version": "4.3.7",
|
||||||
"resolved": "https://registry.npmjs.org/debug/-/debug-4.3.7.tgz",
|
"resolved": "https://registry.npmjs.org/debug/-/debug-4.3.7.tgz",
|
||||||
@@ -18067,7 +18114,6 @@
|
|||||||
"version": "4.3.1",
|
"version": "4.3.1",
|
||||||
"resolved": "https://registry.npmjs.org/deepmerge/-/deepmerge-4.3.1.tgz",
|
"resolved": "https://registry.npmjs.org/deepmerge/-/deepmerge-4.3.1.tgz",
|
||||||
"integrity": "sha512-3sUqbMEc77XqpdNO7FRyRog+eW3ph+GYCbj+rK+uYyRMuwsVy0rMiVtPn+QJlKFvWP/1PYpapqYn0Me2knFn+A==",
|
"integrity": "sha512-3sUqbMEc77XqpdNO7FRyRog+eW3ph+GYCbj+rK+uYyRMuwsVy0rMiVtPn+QJlKFvWP/1PYpapqYn0Me2knFn+A==",
|
||||||
"dev": true,
|
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">=0.10.0"
|
"node": ">=0.10.0"
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -113,5 +113,8 @@
|
|||||||
"admin/",
|
"admin/",
|
||||||
"packages/"
|
"packages/"
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
"dependencies": {
|
||||||
|
"@deepgram/sdk": "^3.9.0"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -336,11 +336,28 @@ const ttsLocalaiSchema = z.object({
|
|||||||
backend: z.string(),
|
backend: z.string(),
|
||||||
});
|
});
|
||||||
|
|
||||||
|
const ttsDeepgramSchema = z
|
||||||
|
.object({
|
||||||
|
url: z.string().optional(),
|
||||||
|
apiKey: z.string().optional(),
|
||||||
|
voices: z.array(z.string()),
|
||||||
|
model: z.string(),
|
||||||
|
language: z.string().optional(),
|
||||||
|
media_settings: z
|
||||||
|
.object({
|
||||||
|
bit_rate: z.number().optional(),
|
||||||
|
sample_rate: z.number().optional(),
|
||||||
|
})
|
||||||
|
.optional(),
|
||||||
|
})
|
||||||
|
.optional();
|
||||||
|
|
||||||
const ttsSchema = z.object({
|
const ttsSchema = z.object({
|
||||||
openai: ttsOpenaiSchema.optional(),
|
openai: ttsOpenaiSchema.optional(),
|
||||||
azureOpenAI: ttsAzureOpenAISchema.optional(),
|
azureOpenAI: ttsAzureOpenAISchema.optional(),
|
||||||
elevenlabs: ttsElevenLabsSchema.optional(),
|
elevenlabs: ttsElevenLabsSchema.optional(),
|
||||||
localai: ttsLocalaiSchema.optional(),
|
localai: ttsLocalaiSchema.optional(),
|
||||||
|
deepgram: ttsDeepgramSchema.optional(),
|
||||||
});
|
});
|
||||||
|
|
||||||
const sttOpenaiSchema = z.object({
|
const sttOpenaiSchema = z.object({
|
||||||
@@ -356,9 +373,50 @@ const sttAzureOpenAISchema = z.object({
|
|||||||
apiVersion: z.string(),
|
apiVersion: z.string(),
|
||||||
});
|
});
|
||||||
|
|
||||||
|
const sttDeepgramSchema = z.object({
|
||||||
|
url: z.string().optional(),
|
||||||
|
apiKey: z.string().optional(),
|
||||||
|
model: z
|
||||||
|
.object({
|
||||||
|
model: z.string().optional(),
|
||||||
|
language: z.string().optional(),
|
||||||
|
detect_language: z.boolean().optional(),
|
||||||
|
version: z.string().optional(),
|
||||||
|
})
|
||||||
|
.optional(),
|
||||||
|
formatting: z
|
||||||
|
.object({
|
||||||
|
smart_format: z.boolean().optional(),
|
||||||
|
diarize: z.boolean().optional(),
|
||||||
|
filler_words: z.boolean().optional(),
|
||||||
|
numerals: z.boolean().optional(),
|
||||||
|
punctuate: z.boolean().optional(),
|
||||||
|
paragraphs: z.boolean().optional(),
|
||||||
|
profanity_filter: z.boolean().optional(),
|
||||||
|
redact: z.boolean().optional(),
|
||||||
|
utterances: z.boolean().optional(),
|
||||||
|
utt_split: z.number().optional(),
|
||||||
|
})
|
||||||
|
.optional(),
|
||||||
|
custom_vocabulary: z
|
||||||
|
.object({
|
||||||
|
replace: z.array(z.string()).optional(),
|
||||||
|
keywords: z.array(z.string()).optional(),
|
||||||
|
})
|
||||||
|
.optional(),
|
||||||
|
intelligence: z
|
||||||
|
.object({
|
||||||
|
sentiment: z.boolean().optional(),
|
||||||
|
intents: z.boolean().optional(),
|
||||||
|
topics: z.boolean().optional(),
|
||||||
|
})
|
||||||
|
.optional(),
|
||||||
|
});
|
||||||
|
|
||||||
const sttSchema = z.object({
|
const sttSchema = z.object({
|
||||||
openai: sttOpenaiSchema.optional(),
|
openai: sttOpenaiSchema.optional(),
|
||||||
azureOpenAI: sttAzureOpenAISchema.optional(),
|
azureOpenAI: sttAzureOpenAISchema.optional(),
|
||||||
|
deepgram: sttDeepgramSchema.optional(),
|
||||||
});
|
});
|
||||||
|
|
||||||
const speechTab = z
|
const speechTab = z
|
||||||
@@ -1054,6 +1112,10 @@ export enum STTProviders {
|
|||||||
* Provider for Microsoft Azure STT
|
* Provider for Microsoft Azure STT
|
||||||
*/
|
*/
|
||||||
AZURE_OPENAI = 'azureOpenAI',
|
AZURE_OPENAI = 'azureOpenAI',
|
||||||
|
/**
|
||||||
|
* Provider for Deepgram STT
|
||||||
|
*/
|
||||||
|
DEEPGRAM = 'deepgram',
|
||||||
}
|
}
|
||||||
|
|
||||||
export enum TTSProviders {
|
export enum TTSProviders {
|
||||||
@@ -1073,6 +1135,10 @@ export enum TTSProviders {
|
|||||||
* Provider for LocalAI TTS
|
* Provider for LocalAI TTS
|
||||||
*/
|
*/
|
||||||
LOCALAI = 'localai',
|
LOCALAI = 'localai',
|
||||||
|
/**
|
||||||
|
* Provider for Deepgram TTS
|
||||||
|
*/
|
||||||
|
DEEPGRAM = 'deepgram',
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Enum for app-wide constants */
|
/** Enum for app-wide constants */
|
||||||
|
|||||||
Reference in New Issue
Block a user