Compare commits

...

9 Commits

33 changed files with 2671 additions and 133 deletions

View File

@@ -97,11 +97,13 @@
"passport-ldapauth": "^3.0.1",
"passport-local": "^1.0.0",
"sharp": "^0.32.6",
"socket.io": "^4.8.1",
"tiktoken": "^1.0.15",
"traverse": "^0.6.7",
"ua-parser-js": "^1.0.36",
"winston": "^3.11.0",
"winston-daily-rotate-file": "^4.7.1",
"wrtc": "^0.4.7",
"youtube-transcript": "^1.2.1",
"zod": "^3.22.4"
},

View File

@@ -4,6 +4,7 @@ require('module-alias')({ base: path.resolve(__dirname, '..') });
const cors = require('cors');
const axios = require('axios');
const express = require('express');
const { createServer } = require('http');
const compression = require('compression');
const passport = require('passport');
const mongoSanitize = require('express-mongo-sanitize');
@@ -14,6 +15,7 @@ const { connectDb, indexSync } = require('~/lib/db');
const { isEnabled } = require('~/server/utils');
const { ldapLogin } = require('~/strategies');
const { logger } = require('~/config');
const { SocketIOService } = require('./services/WebSocket/WebSocketServer');
const validateImageRequest = require('./middleware/validateImageRequest');
const errorController = require('./controllers/ErrorController');
const configureSocialLogins = require('./socialLogins');
@@ -36,7 +38,18 @@ const startServer = async () => {
await indexSync();
const app = express();
const server = createServer(app);
app.disable('x-powered-by');
app.use(
cors({
origin: true,
credentials: true,
}),
);
new SocketIOService(server);
await AppService(app);
const indexPath = path.join(app.locals.paths.dist, 'index.html');
@@ -109,6 +122,7 @@ const startServer = async () => {
app.use('/api/agents', routes.agents);
app.use('/api/banner', routes.banner);
app.use('/api/bedrock', routes.bedrock);
app.use('/api/websocket', routes.websocket);
app.use('/api/tags', routes.tags);
@@ -126,7 +140,7 @@ const startServer = async () => {
res.send(updatedIndexHtml);
});
app.listen(port, host, () => {
server.listen(port, host, () => {
if (host == '0.0.0.0') {
logger.info(
`Server listening on all interfaces at port ${port}. Use http://localhost:${port} to access it`,
@@ -134,6 +148,8 @@ const startServer = async () => {
} else {
logger.info(`Server listening at http://${host == '0.0.0.0' ? 'localhost' : host}:${port}`);
}
logger.info(`Socket.IO endpoint: http://${host}:${port}`);
});
};

View File

@@ -3,7 +3,7 @@ const router = express.Router();
const { getCustomConfigSpeech } = require('~/server/services/Files/Audio');
router.get('/get', async (req, res) => {
router.get('/', async (req, res) => {
await getCustomConfigSpeech(req, res);
});

View File

@@ -4,6 +4,7 @@ const { createTTSLimiters, createSTTLimiters } = require('~/server/middleware');
const stt = require('./stt');
const tts = require('./tts');
const customConfigSpeech = require('./customConfigSpeech');
const realtime = require('./realtime');
const router = express.Router();
@@ -14,4 +15,6 @@ router.use('/tts', ttsIpLimiter, ttsUserLimiter, tts);
router.use('/config', customConfigSpeech);
router.use('/realtime', realtime);
module.exports = router;

View File

@@ -0,0 +1,10 @@
const express = require('express');
const router = express.Router();
const { getRealtimeConfig } = require('~/server/services/Files/Audio');
router.get('/', async (req, res) => {
await getRealtimeConfig(req, res);
});
module.exports = router;

View File

@@ -2,6 +2,7 @@ const assistants = require('./assistants');
const categories = require('./categories');
const tokenizer = require('./tokenizer');
const endpoints = require('./endpoints');
const websocket = require('./websocket');
const staticRoute = require('./static');
const messages = require('./messages');
const presets = require('./presets');
@@ -15,6 +16,7 @@ const models = require('./models');
const convos = require('./convos');
const config = require('./config');
const agents = require('./agents');
const banner = require('./banner');
const roles = require('./roles');
const oauth = require('./oauth');
const files = require('./files');
@@ -25,7 +27,6 @@ const edit = require('./edit');
const keys = require('./keys');
const user = require('./user');
const ask = require('./ask');
const banner = require('./banner');
module.exports = {
ask,
@@ -39,6 +40,7 @@ module.exports = {
files,
share,
agents,
banner,
bedrock,
convos,
search,
@@ -50,10 +52,10 @@ module.exports = {
presets,
balance,
messages,
websocket,
endpoints,
tokenizer,
assistants,
categories,
staticRoute,
banner,
};

View File

@@ -0,0 +1,19 @@
const express = require('express');
const optionalJwtAuth = require('~/server/middleware/optionalJwtAuth');
const router = express.Router();
router.get('/', optionalJwtAuth, async (req, res) => {
const isProduction = process.env.NODE_ENV === 'production';
const protocol = isProduction && req.secure ? 'https' : 'http';
const serverDomain = process.env.SERVER_DOMAIN
? process.env.SERVER_DOMAIN.replace(/^https?:\/\//, '')
: req.headers.host;
const socketIoUrl = `${protocol}://${serverDomain}`;
res.json({ url: socketIoUrl });
});
module.exports = router;

View File

@@ -0,0 +1,102 @@
const { extractEnvVariable, RealtimeVoiceProviders } = require('librechat-data-provider');
const { getCustomConfig } = require('~/server/services/Config');
const { logger } = require('~/config');
class RealtimeService {
constructor(customConfig) {
this.customConfig = customConfig;
this.providerStrategies = {
[RealtimeVoiceProviders.OPENAI]: this.openaiProvider.bind(this),
};
}
static async getInstance() {
const customConfig = await getCustomConfig();
if (!customConfig) {
throw new Error('Custom config not found');
}
return new RealtimeService(customConfig);
}
async getProviderSchema() {
const realtimeSchema = this.customConfig.speech.realtime;
if (!realtimeSchema) {
throw new Error('No Realtime schema is set in config');
}
const providers = Object.entries(realtimeSchema).filter(
([, value]) => Object.keys(value).length > 0,
);
if (providers.length !== 1) {
throw new Error(providers.length > 1 ? 'Multiple providers set' : 'No provider set');
}
return providers[0];
}
async openaiProvider(schema, voice) {
const defaultRealtimeUrl = 'https://api.openai.com/v1/realtime';
const allowedVoices = ['alloy', 'ash', 'ballad', 'coral', 'echo', 'sage', 'shimmer', 'verse'];
if (!voice) {
throw new Error('Voice not specified');
}
if (!allowedVoices.includes(voice)) {
throw new Error(`Invalid voice: ${voice}`);
}
const apiKey = extractEnvVariable(schema.apiKey);
if (!apiKey) {
throw new Error('OpenAI API key not configured');
}
const response = await fetch('https://api.openai.com/v1/realtime/sessions', {
method: 'POST',
headers: {
Authorization: `Bearer ${apiKey}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'gpt-4o-realtime-preview-2024-12-17',
modalities: ['audio', 'text'],
voice: voice,
}),
});
const token = response.json();
return {
provider: RealtimeVoiceProviders.OPENAI,
token: token,
url: schema.url || defaultRealtimeUrl,
};
}
async getRealtimeConfig(req, res) {
try {
const [provider, schema] = await this.getProviderSchema();
const strategy = this.providerStrategies[provider];
if (!strategy) {
throw new Error(`Unsupported provider: ${provider}`);
}
const voice = req.query.voice;
const config = strategy(schema, voice);
res.json(config);
} catch (error) {
logger.error('[RealtimeService] Config generation failed:', error);
res.status(500).json({ error: error.message });
}
}
}
async function getRealtimeConfig(req, res) {
const service = await RealtimeService.getInstance();
await service.getRealtimeConfig(req, res);
}
module.exports = getRealtimeConfig;

View File

@@ -1,4 +1,5 @@
const getCustomConfigSpeech = require('./getCustomConfigSpeech');
const getRealtimeConfig = require('./getRealtimeConfig');
const TTSService = require('./TTSService');
const STTService = require('./STTService');
const getVoices = require('./getVoices');
@@ -6,6 +7,7 @@ const getVoices = require('./getVoices');
module.exports = {
getVoices,
getCustomConfigSpeech,
getRealtimeConfig,
...STTService,
...TTSService,
};

View File

@@ -0,0 +1,193 @@
const { Server } = require('socket.io');
const { RTCPeerConnection, RTCIceCandidate, MediaStream } = require('wrtc');
class WebRTCConnection {
constructor(socket, config) {
this.socket = socket;
this.config = config;
this.peerConnection = null;
this.audioTransceiver = null;
this.pendingCandidates = [];
this.state = 'idle';
this.log = config.log || console.log;
}
async handleOffer(offer) {
try {
if (!this.peerConnection) {
this.peerConnection = new RTCPeerConnection(this.config.rtcConfig);
this.setupPeerConnectionListeners();
}
await this.peerConnection.setRemoteDescription(offer);
const mediaStream = new MediaStream();
this.audioTransceiver = this.peerConnection.addTransceiver('audio', {
direction: 'sendrecv',
streams: [mediaStream],
});
const answer = await this.peerConnection.createAnswer();
await this.peerConnection.setLocalDescription(answer);
this.socket.emit('webrtc-answer', answer);
} catch (error) {
this.log(`Error handling offer: ${error}`, 'error');
this.socket.emit('webrtc-error', {
message: error.message,
code: 'OFFER_ERROR',
});
}
}
setupPeerConnectionListeners() {
if (!this.peerConnection) {
return;
}
this.peerConnection.ontrack = ({ track }) => {
this.log(`Received ${track.kind} track from client`);
if (track.kind === 'audio') {
this.handleIncomingAudio(track);
}
track.onended = () => {
this.log(`${track.kind} track ended`);
};
};
this.peerConnection.onicecandidate = ({ candidate }) => {
if (candidate) {
this.socket.emit('icecandidate', candidate);
}
};
this.peerConnection.onconnectionstatechange = () => {
if (!this.peerConnection) {
return;
}
const state = this.peerConnection.connectionState;
this.log(`Connection state changed to ${state}`);
this.state = state;
if (state === 'failed' || state === 'closed') {
this.cleanup();
}
};
}
handleIncomingAudio(track) {
if (this.peerConnection) {
const stream = new MediaStream([track]);
this.peerConnection.addTrack(track, stream);
}
}
async addIceCandidate(candidate) {
try {
if (this.peerConnection?.remoteDescription) {
if (candidate && candidate.candidate) {
await this.peerConnection.addIceCandidate(new RTCIceCandidate(candidate));
} else {
this.log('Invalid ICE candidate', 'warn');
}
} else {
this.pendingCandidates.push(candidate);
}
} catch (error) {
this.log(`Error adding ICE candidate: ${error}`, 'error');
}
}
cleanup() {
if (this.peerConnection) {
try {
this.peerConnection.close();
} catch (error) {
this.log(`Error closing peer connection: ${error}`, 'error');
}
this.peerConnection = null;
}
this.audioTransceiver = null;
this.pendingCandidates = [];
this.state = 'idle';
}
}
class SocketIOService {
constructor(httpServer, config = {}) {
this.config = {
rtcConfig: {
iceServers: [
{
urls: ['stun:stun.l.google.com:19302', 'stun:stun1.l.google.com:19302'],
},
],
iceCandidatePoolSize: 10,
bundlePolicy: 'max-bundle',
rtcpMuxPolicy: 'require',
},
...config,
};
this.io = new Server(httpServer, {
path: '/socket.io',
cors: {
origin: '*',
methods: ['GET', 'POST'],
},
});
this.connections = new Map();
this.setupSocketHandlers();
}
setupSocketHandlers() {
this.io.on('connection', (socket) => {
this.log(`Client connected: ${socket.id}`);
const rtcConnection = new WebRTCConnection(socket, {
...this.config,
log: this.log.bind(this),
});
this.connections.set(socket.id, rtcConnection);
socket.on('webrtc-offer', (offer) => {
this.log(`Received WebRTC offer from ${socket.id}`);
rtcConnection.handleOffer(offer);
});
socket.on('icecandidate', (candidate) => {
rtcConnection.addIceCandidate(candidate);
});
socket.on('vad-status', (status) => {
this.log(`VAD status from ${socket.id}: ${JSON.stringify(status)}`);
});
socket.on('disconnect', () => {
this.log(`Client disconnected: ${socket.id}`);
rtcConnection.cleanup();
this.connections.delete(socket.id);
});
});
}
log(message, level = 'info') {
const timestamp = new Date().toISOString();
console.log(`[WebRTC ${timestamp}] [${level.toUpperCase()}] ${message}`);
}
shutdown() {
for (const connection of this.connections.values()) {
connection.cleanup();
}
this.connections.clear();
this.io.close();
}
}
module.exports = { SocketIOService };

View File

@@ -51,6 +51,7 @@
"@radix-ui/react-switch": "^1.0.3",
"@radix-ui/react-tabs": "^1.0.3",
"@radix-ui/react-toast": "^1.1.5",
"@ricky0123/vad-react": "^0.0.28",
"@tanstack/react-query": "^4.28.0",
"@tanstack/react-table": "^8.11.7",
"class-variance-authority": "^0.6.0",
@@ -97,6 +98,7 @@
"remark-gfm": "^4.0.0",
"remark-math": "^6.0.0",
"remark-supersub": "^1.0.0",
"socket.io-client": "^4.8.1",
"sse.js": "^2.5.0",
"tailwind-merge": "^1.9.1",
"tailwindcss-animate": "^1.0.5",

View File

@@ -48,6 +48,32 @@ export type AudioChunk = {
};
};
export interface RTCMessage {
type:
| 'audio-chunk'
| 'audio-received'
| 'transcription'
| 'llm-response'
| 'tts-chunk'
| 'call-ended'
| 'webrtc-answer'
| 'icecandidate';
payload?: RTCSessionDescriptionInit | RTCIceCandidateInit;
}
export type MessagePayload =
| RTCSessionDescriptionInit
| RTCIceCandidateInit
| { speaking: boolean };
export enum CallState {
IDLE = 'idle',
CONNECTING = 'connecting',
ACTIVE = 'active',
ERROR = 'error',
ENDED = 'ended',
}
export type AssistantListItem = {
id: string;
name: string;

View File

@@ -0,0 +1,241 @@
import React, { useEffect, useRef } from 'react';
import { useRecoilState } from 'recoil';
import {
Phone,
PhoneOff,
AlertCircle,
Mic,
MicOff,
Volume2,
VolumeX,
Activity,
} from 'lucide-react';
import { OGDialog, OGDialogContent, Button } from '~/components';
import { useWebSocket, useCall } from '~/hooks';
import { CallState } from '~/common';
import store from '~/store';
export const Call: React.FC = () => {
const { isConnected } = useWebSocket();
const {
callState,
error,
startCall,
hangUp,
isConnecting,
localStream,
remoteStream,
connectionQuality,
isMuted,
toggleMute,
} = useCall();
const [open, setOpen] = useRecoilState(store.callDialogOpen(0));
const [eventLog, setEventLog] = React.useState<string[]>([]);
const [isAudioEnabled, setIsAudioEnabled] = React.useState(true);
const remoteAudioRef = useRef<HTMLAudioElement>(null);
const logEvent = (message: string) => {
console.log(message);
setEventLog((prev) => [...prev, `${new Date().toISOString()}: ${message}`]);
};
useEffect(() => {
if (remoteAudioRef.current && remoteStream) {
remoteAudioRef.current.srcObject = remoteStream;
remoteAudioRef.current.play().catch((err) => console.error('Error playing audio:', err));
}
}, [remoteStream]);
useEffect(() => {
if (localStream) {
localStream.getAudioTracks().forEach((track) => {
track.enabled = !isMuted;
});
}
}, [localStream, isMuted]);
useEffect(() => {
if (isConnected) {
logEvent('Connected to server.');
} else {
logEvent('Disconnected from server.');
}
}, [isConnected]);
useEffect(() => {
if (error) {
logEvent(`Error: ${error.message} (${error.code})`);
}
}, [error]);
useEffect(() => {
logEvent(`Call state changed to: ${callState}`);
}, [callState]);
const handleStartCall = () => {
logEvent('Attempting to start call...');
startCall();
};
const handleHangUp = () => {
logEvent('Attempting to hang up call...');
hangUp();
};
const handleToggleMute = () => {
toggleMute();
logEvent(`Microphone ${isMuted ? 'unmuted' : 'muted'}`);
};
const toggleAudio = () => {
setIsAudioEnabled((prev) => !prev);
if (remoteAudioRef.current) {
remoteAudioRef.current.muted = !isAudioEnabled;
}
logEvent(`Speaker ${isAudioEnabled ? 'disabled' : 'enabled'}`);
};
const isActive = callState === CallState.ACTIVE;
const isError = callState === CallState.ERROR;
// TESTS
useEffect(() => {
if (remoteAudioRef.current && remoteStream) {
console.log('Setting up remote audio:', {
tracks: remoteStream.getTracks().length,
active: remoteStream.active,
});
remoteAudioRef.current.srcObject = remoteStream;
remoteAudioRef.current.muted = false;
remoteAudioRef.current.volume = 1.0;
const playPromise = remoteAudioRef.current.play();
if (playPromise) {
playPromise.catch((err) => {
console.error('Error playing audio:', err);
// Retry play on user interaction
document.addEventListener(
'click',
() => {
remoteAudioRef.current?.play();
},
{ once: true },
);
});
}
}
}, [remoteStream]);
return (
<OGDialog open={open} onOpenChange={setOpen}>
<OGDialogContent className="w-[28rem] p-8">
<div className="flex flex-col items-center gap-6">
{/* Connection Status */}
<div className="flex w-full items-center justify-between">
<div
className={`flex items-center gap-2 rounded-full px-4 py-2 ${
isConnected ? 'bg-green-100 text-green-700' : 'bg-red-100 text-red-700'
}`}
>
<div
className={`h-2 w-2 rounded-full ${isConnected ? 'bg-green-500' : 'bg-red-500'}`}
/>
<span className="text-sm font-medium">
{isConnected ? 'Connected' : 'Disconnected'}
</span>
</div>
{isActive && (
<div
className={`flex items-center gap-2 rounded-full px-4 py-2 ${
(connectionQuality === 'good' && 'bg-green-100 text-green-700') ||
(connectionQuality === 'poor' && 'bg-yellow-100 text-yellow-700') ||
'bg-gray-100 text-gray-700'
}`}
>
<Activity size={16} />
<span className="text-sm font-medium capitalize">{connectionQuality} Quality</span>
</div>
)}
</div>
{/* Error Display */}
{error && (
<div className="flex w-full items-center gap-2 rounded-md bg-red-100 p-3 text-red-700">
<AlertCircle size={16} />
<span className="text-sm">{error.message}</span>
</div>
)}
{/* Call Controls */}
<div className="flex items-center gap-4">
{isActive && (
<>
<Button
onClick={handleToggleMute}
className={`rounded-full p-3 ${
isMuted ? 'bg-red-100 text-red-700' : 'bg-gray-100 text-gray-700'
}`}
title={isMuted ? 'Unmute microphone' : 'Mute microphone'}
>
{isMuted ? <MicOff size={20} /> : <Mic size={20} />}
</Button>
<Button
onClick={toggleAudio}
className={`rounded-full p-3 ${
!isAudioEnabled ? 'bg-red-100 text-red-700' : 'bg-gray-100 text-gray-700'
}`}
title={isAudioEnabled ? 'Disable speaker' : 'Enable speaker'}
>
{isAudioEnabled ? <Volume2 size={20} /> : <VolumeX size={20} />}
</Button>
</>
)}
{isActive ? (
<Button
onClick={handleHangUp}
className="flex items-center gap-2 rounded-full bg-red-500 px-6 py-3 text-white hover:bg-red-600"
>
<PhoneOff size={20} />
<span>End Call</span>
</Button>
) : (
<Button
onClick={handleStartCall}
disabled={!isConnected || isError || isConnecting}
className="flex items-center gap-2 rounded-full bg-green-500 px-6 py-3 text-white hover:bg-green-600 disabled:opacity-50"
>
<Phone size={20} />
<span>{isConnecting ? 'Connecting...' : 'Start Call'}</span>
</Button>
)}
</div>
{/* Event Log */}
<h3 className="mb-2 text-lg font-medium">Event Log</h3>
<div className="h-64 overflow-y-auto rounded-md bg-surface-secondary p-2 shadow-inner">
<ul className="space-y-1 text-xs text-text-secondary">
{eventLog.map((log, index) => (
<li key={index} className="font-mono">
{log}
</li>
))}
</ul>
</div>
{/* Hidden Audio Element */}
<audio ref={remoteAudioRef} autoPlay>
<track kind="captions" />
</audio>
</div>
</OGDialogContent>
</OGDialog>
);
};

View File

@@ -0,0 +1,40 @@
import React, { forwardRef } from 'react';
import { TooltipAnchor } from '~/components/ui';
import { SendIcon } from '~/components/svg';
import { useLocalize } from '~/hooks';
import { cn } from '~/utils';
const Button = React.memo(
forwardRef((props: { disabled: boolean }) => {
const localize = useLocalize();
return (
<TooltipAnchor
description={localize('com_nav_call_mode')}
render={
<button
aria-label={localize('com_nav_send_message')}
id="call-button"
disabled={props.disabled}
className={cn(
'rounded-full bg-text-primary p-2 text-text-primary outline-offset-4 transition-all duration-200 disabled:cursor-not-allowed disabled:text-text-secondary disabled:opacity-10',
)}
data-testid="call-button"
type="submit"
>
<span className="" data-state="closed">
<SendIcon size={24} />
</span>
</button>
}
></TooltipAnchor>
);
}),
);
const CallButton = React.memo(
forwardRef((props: { disabled: boolean }) => {
return <Button disabled={props.disabled} />;
}),
);
export default CallButton;

View File

@@ -34,6 +34,7 @@ import StreamAudio from './StreamAudio';
import StopButton from './StopButton';
import SendButton from './SendButton';
import Mention from './Mention';
import { Call } from './Call';
import store from '~/store';
const ChatForm = ({ index = 0 }) => {
@@ -156,116 +157,119 @@ const ChatForm = ({ index = 0 }) => {
: `pl-${uploadActive ? '12' : '4'} pr-12`;
return (
<form
onSubmit={methods.handleSubmit((data) => submitMessage(data))}
className={cn(
'mx-auto flex flex-row gap-3 pl-2 transition-all duration-200 last:mb-2',
maximizeChatSpace ? 'w-full max-w-full' : 'md:max-w-2xl xl:max-w-3xl',
)}
>
<div className="relative flex h-full flex-1 items-stretch md:flex-col">
<div className="flex w-full items-center">
{showPlusPopover && !isAssistantsEndpoint(endpoint) && (
<Mention
setShowMentionPopover={setShowPlusPopover}
newConversation={generateConversation}
textAreaRef={textAreaRef}
commandChar="+"
placeholder="com_ui_add_model_preset"
includeAssistants={false}
/>
)}
{showMentionPopover && (
<Mention
setShowMentionPopover={setShowMentionPopover}
newConversation={newConversation}
textAreaRef={textAreaRef}
/>
)}
<PromptsCommand index={index} textAreaRef={textAreaRef} submitPrompt={submitPrompt} />
<div className="transitional-all relative flex w-full flex-grow flex-col overflow-hidden rounded-3xl bg-surface-tertiary text-text-primary duration-200">
<TemporaryChat
isTemporaryChat={isTemporaryChat}
setIsTemporaryChat={setIsTemporaryChat}
/>
<TextareaHeader addedConvo={addedConvo} setAddedConvo={setAddedConvo} />
<FileFormWrapper disableInputs={disableInputs}>
{endpoint && (
<>
<CollapseChat
isCollapsed={isCollapsed}
isScrollable={isScrollable}
setIsCollapsed={setIsCollapsed}
/>
<TextareaAutosize
{...registerProps}
ref={(e) => {
ref(e);
textAreaRef.current = e;
}}
disabled={disableInputs}
onPaste={handlePaste}
onKeyDown={handleKeyDown}
onKeyUp={handleKeyUp}
onHeightChange={() => {
if (textAreaRef.current) {
const scrollable = checkIfScrollable(textAreaRef.current);
setIsScrollable(scrollable);
}
}}
onCompositionStart={handleCompositionStart}
onCompositionEnd={handleCompositionEnd}
id={mainTextareaId}
tabIndex={0}
data-testid="text-input"
rows={1}
onFocus={() => isCollapsed && setIsCollapsed(false)}
onClick={() => isCollapsed && setIsCollapsed(false)}
style={{ height: 44, overflowY: 'auto' }}
className={cn(
baseClasses,
speechClass,
removeFocusRings,
'transition-[max-height] duration-200',
)}
/>
</>
)}
</FileFormWrapper>
{SpeechToText && (
<AudioRecorder
isRTL={isRTL}
methods={methods}
ask={submitMessage}
<>
<form
onSubmit={methods.handleSubmit((data) => submitMessage(data))}
className={cn(
'mx-auto flex flex-row gap-3 pl-2 transition-all duration-200 last:mb-2',
maximizeChatSpace ? 'w-full max-w-full' : 'md:max-w-2xl xl:max-w-3xl',
)}
>
<div className="relative flex h-full flex-1 items-stretch md:flex-col">
<div className="flex w-full items-center">
{showPlusPopover && !isAssistantsEndpoint(endpoint) && (
<Mention
setShowMentionPopover={setShowPlusPopover}
newConversation={generateConversation}
textAreaRef={textAreaRef}
disabled={!!disableInputs}
isSubmitting={isSubmitting}
commandChar="+"
placeholder="com_ui_add_model_preset"
includeAssistants={false}
/>
)}
{TextToSpeech && automaticPlayback && <StreamAudio index={index} />}
</div>
<div
className={cn(
'mb-[5px] ml-[8px] flex flex-col items-end justify-end',
isRTL && 'order-first mr-[8px]',
{showMentionPopover && (
<Mention
setShowMentionPopover={setShowMentionPopover}
newConversation={newConversation}
textAreaRef={textAreaRef}
/>
)}
style={{ alignSelf: 'flex-end' }}
>
{(isSubmitting || isSubmittingAdded) && (showStopButton || showStopAdded) ? (
<StopButton stop={handleStopGenerating} setShowStopButton={setShowStopButton} />
) : (
endpoint && (
<SendButton
ref={submitButtonRef}
control={methods.control}
disabled={!!(filesLoading || isSubmitting || disableInputs)}
<PromptsCommand index={index} textAreaRef={textAreaRef} submitPrompt={submitPrompt} />
<div className="transitional-all relative flex w-full flex-grow flex-col overflow-hidden rounded-3xl bg-surface-tertiary text-text-primary duration-200">
<TemporaryChat
isTemporaryChat={isTemporaryChat}
setIsTemporaryChat={setIsTemporaryChat}
/>
<TextareaHeader addedConvo={addedConvo} setAddedConvo={setAddedConvo} />
<FileFormWrapper disableInputs={disableInputs}>
{endpoint && (
<>
<CollapseChat
isCollapsed={isCollapsed}
isScrollable={isScrollable}
setIsCollapsed={setIsCollapsed}
/>
<TextareaAutosize
{...registerProps}
ref={(e) => {
ref(e);
textAreaRef.current = e;
}}
disabled={disableInputs}
onPaste={handlePaste}
onKeyDown={handleKeyDown}
onKeyUp={handleKeyUp}
onHeightChange={() => {
if (textAreaRef.current) {
const scrollable = checkIfScrollable(textAreaRef.current);
setIsScrollable(scrollable);
}
}}
onCompositionStart={handleCompositionStart}
onCompositionEnd={handleCompositionEnd}
id={mainTextareaId}
tabIndex={0}
data-testid="text-input"
rows={1}
onFocus={() => isCollapsed && setIsCollapsed(false)}
onClick={() => isCollapsed && setIsCollapsed(false)}
style={{ height: 44, overflowY: 'auto' }}
className={cn(
baseClasses,
speechClass,
removeFocusRings,
'transition-[max-height] duration-200',
)}
/>
</>
)}
</FileFormWrapper>
{SpeechToText && (
<AudioRecorder
isRTL={isRTL}
methods={methods}
ask={submitMessage}
textAreaRef={textAreaRef}
disabled={!!disableInputs}
isSubmitting={isSubmitting}
/>
)
)}
)}
{TextToSpeech && automaticPlayback && <StreamAudio index={index} />}
</div>
<div
className={cn(
'mb-[5px] ml-[8px] flex flex-col items-end justify-end',
isRTL && 'order-first mr-[8px]',
)}
style={{ alignSelf: 'flex-end' }}
>
{(isSubmitting || isSubmittingAdded) && (showStopButton || showStopAdded) ? (
<StopButton stop={handleStopGenerating} setShowStopButton={setShowStopButton} />
) : (
endpoint && (
<SendButton
ref={submitButtonRef}
control={methods.control}
disabled={!!(filesLoading || isSubmitting || disableInputs)}
/>
)
)}
</div>
</div>
</div>
</div>
</form>
</form>
<Call />
</>
);
};

View File

@@ -1,49 +1,104 @@
import React, { forwardRef } from 'react';
import { useWatch } from 'react-hook-form';
import { useSetRecoilState } from 'recoil';
import type { TRealtimeEphemeralTokenResponse } from 'librechat-data-provider';
import type { Control } from 'react-hook-form';
import { TooltipAnchor } from '~/components/ui';
import { SendIcon } from '~/components/svg';
import { useRealtimeEphemeralTokenMutation } from '~/data-provider';
import { TooltipAnchor, SendIcon, CallIcon } from '~/components';
import { useToastContext } from '~/Providers/ToastContext';
import { useLocalize } from '~/hooks';
import store from '~/store';
import { cn } from '~/utils';
type SendButtonProps = {
type ButtonProps = {
disabled: boolean;
control: Control<{ text: string }>;
};
const SubmitButton = React.memo(
forwardRef((props: { disabled: boolean }, ref: React.ForwardedRef<HTMLButtonElement>) => {
const localize = useLocalize();
const ActionButton = forwardRef(
(
props: {
disabled: boolean;
icon: React.ReactNode;
tooltip: string;
testId: string;
onClick?: () => void;
},
ref: React.ForwardedRef<HTMLButtonElement>,
) => {
return (
<TooltipAnchor
description={localize('com_nav_send_message')}
description={props.tooltip}
render={
<button
ref={ref}
aria-label={localize('com_nav_send_message')}
id="send-button"
aria-label={props.tooltip}
id="action-button"
disabled={props.disabled}
className={cn(
'rounded-full bg-text-primary p-2 text-text-primary outline-offset-4 transition-all duration-200 disabled:cursor-not-allowed disabled:text-text-secondary disabled:opacity-10',
'rounded-full bg-text-primary p-2 text-text-primary outline-offset-4',
'transition-all duration-200',
'disabled:cursor-not-allowed disabled:text-text-secondary disabled:opacity-10',
)}
data-testid="send-button"
data-testid={props.testId}
type="submit"
onClick={props.onClick}
>
<span className="" data-state="closed">
<SendIcon size={24} />
{props.icon}
</span>
</button>
}
></TooltipAnchor>
/>
);
}),
},
);
const SendButton = React.memo(
forwardRef((props: SendButtonProps, ref: React.ForwardedRef<HTMLButtonElement>) => {
const data = useWatch({ control: props.control });
return <SubmitButton ref={ref} disabled={props.disabled || !data.text} />;
}),
);
const SendButton = forwardRef((props: ButtonProps, ref: React.ForwardedRef<HTMLButtonElement>) => {
const localize = useLocalize();
const { showToast } = useToastContext();
const { text = '' } = useWatch({ control: props.control });
const setCallOpen = useSetRecoilState(store.callDialogOpen(0));
// const { mutate: startCall, isLoading: isProcessing } = useRealtimeEphemeralTokenMutation({
// onSuccess: async (data: TRealtimeEphemeralTokenResponse) => {
// showToast({
// message: 'IT WORKS!!',
// status: 'success',
// });
// },
// onError: (error: unknown) => {
// showToast({
// message: localize('com_nav_audio_process_error', (error as Error).message),
// status: 'error',
// });
// },
// });
const handleClick = () => {
if (text.trim() === '') {
setCallOpen(true);
// startCall({ voice: 'verse' });
}
};
const buttonProps =
text.trim() !== ''
? {
icon: <SendIcon size={24} />,
tooltip: localize('com_nav_send_message'),
testId: 'send-button',
}
: {
icon: <CallIcon size={24} />,
tooltip: localize('com_nav_call'),
testId: 'call-button',
onClick: handleClick,
};
return <ActionButton ref={ref} disabled={props.disabled} {...buttonProps} />;
});
SendButton.displayName = 'SendButton';
export default SendButton;

View File

@@ -0,0 +1,30 @@
import { cn } from '~/utils';
export default function CallIcon({ size = 24, className = '' }) {
return (
<svg
width={size}
height={size}
viewBox={'0 0 24 24'}
fill="none"
className={cn('text-white dark:text-black', className)}
>
<path
d="M9.5 4C8.67157 4 8 4.67157 8 5.5V18.5C8 19.3284 8.67157 20 9.5 20C10.3284 20 11 19.3284 11 18.5V5.5C11 4.67157 10.3284 4 9.5 4Z"
fill="currentColor"
></path>
<path
d="M13 8.5C13 7.67157 13.6716 7 14.5 7C15.3284 7 16 7.67157 16 8.5V15.5C16 16.3284 15.3284 17 14.5 17C13.6716 17 13 16.3284 13 15.5V8.5Z"
fill="currentColor"
></path>
<path
d="M4.5 9C3.67157 9 3 9.67157 3 10.5V13.5C3 14.3284 3.67157 15 4.5 15C5.32843 15 6 14.3284 6 13.5V10.5C6 9.67157 5.32843 9 4.5 9Z"
fill="currentColor"
></path>
<path
d="M19.5 9C18.6716 9 18 9.67157 18 10.5V13.5C18 14.3284 18.6716 15 19.5 15C20.3284 15 21 14.3284 21 13.5V10.5C21 9.67157 20.3284 9 19.5 9Z"
fill="currentColor"
></path>
</svg>
);
}

View File

@@ -56,3 +56,4 @@ export { default as SpeechIcon } from './SpeechIcon';
export { default as SaveIcon } from './SaveIcon';
export { default as CircleHelpIcon } from './CircleHelpIcon';
export { default as BedrockIcon } from './BedrockIcon';
export { default as CallIcon } from './CallIcon';

View File

@@ -726,6 +726,21 @@ export const useTextToSpeechMutation = (
});
};
export const useRealtimeEphemeralTokenMutation = (
options?: t.MutationOptions<t.TRealtimeEphemeralTokenResponse, t.TRealtimeEphemeralTokenRequest>,
): UseMutationResult<
t.TRealtimeEphemeralTokenResponse,
unknown,
t.TRealtimeEphemeralTokenRequest,
unknown
> => {
return useMutation([MutationKeys.realtimeEphemeralToken], {
mutationFn: (data: t.TRealtimeEphemeralTokenRequest) =>
dataService.getRealtimeEphemeralToken(data),
...(options || {}),
});
};
/**
* ASSISTANTS
*/

View File

@@ -18,10 +18,12 @@ export * from './AuthContext';
export * from './ThemeContext';
export * from './ScreenshotContext';
export * from './ApiErrorBoundaryContext';
export { default as useCall } from './useCall';
export { default as useToast } from './useToast';
export { default as useTimeout } from './useTimeout';
export { default as useNewConvo } from './useNewConvo';
export { default as useLocalize } from './useLocalize';
export { default as useWebSocket } from './useWebSocket';
export type { TranslationKeys } from './useLocalize';
export { default as useMediaQuery } from './useMediaQuery';
export { default as useScrollToRef } from './useScrollToRef';

281
client/src/hooks/useCall.ts Normal file
View File

@@ -0,0 +1,281 @@
import { useState, useRef, useCallback, useEffect } from 'react';
import { WebRTCService, ConnectionState, useVADSetup } from '../services/WebRTC/WebRTCService';
import useWebSocket, { WebSocketEvents } from './useWebSocket';
interface CallError {
code: string;
message: string;
}
export enum CallState {
IDLE = 'idle',
CONNECTING = 'connecting',
ACTIVE = 'active',
ERROR = 'error',
ENDED = 'ended',
}
interface CallStatus {
callState: CallState;
isConnecting: boolean;
error: CallError | null;
localStream: MediaStream | null;
remoteStream: MediaStream | null;
connectionQuality: 'good' | 'poor' | 'unknown';
isUserSpeaking: boolean;
remoteAISpeaking: boolean;
}
const INITIAL_STATUS: CallStatus = {
callState: CallState.IDLE,
isConnecting: false,
error: null,
localStream: null,
remoteStream: null,
connectionQuality: 'unknown',
isUserSpeaking: false,
remoteAISpeaking: false,
};
const useCall = () => {
const { isConnected, sendMessage, addEventListener } = useWebSocket();
const [status, setStatus] = useState<CallStatus>(INITIAL_STATUS);
const webrtcServiceRef = useRef<WebRTCService | null>(null);
const statsIntervalRef = useRef<NodeJS.Timeout>();
const [isMuted, setIsMuted] = useState(false);
const vad = useVADSetup(webrtcServiceRef.current);
const updateStatus = useCallback((updates: Partial<CallStatus>) => {
setStatus((prev) => ({ ...prev, ...updates }));
}, []);
useEffect(() => {
updateStatus({ isUserSpeaking: vad.userSpeaking });
}, [vad.userSpeaking, updateStatus]);
const handleRemoteStream = (stream: MediaStream | null) => {
if (!stream) {
console.error('[WebRTC] Received null remote stream');
updateStatus({
error: {
code: 'NO_REMOTE_STREAM',
message: 'No remote stream received',
},
});
return;
}
const audioTracks = stream.getAudioTracks();
if (!audioTracks.length) {
console.error('[WebRTC] No audio tracks in remote stream');
updateStatus({
error: {
code: 'NO_AUDIO_TRACKS',
message: 'Remote stream contains no audio',
},
});
return;
}
updateStatus({
remoteStream: stream,
callState: CallState.ACTIVE,
});
};
const handleConnectionStateChange = useCallback(
(state: ConnectionState) => {
switch (state) {
case ConnectionState.CONNECTED:
updateStatus({
callState: CallState.ACTIVE,
isConnecting: false,
});
break;
case ConnectionState.CONNECTING:
case ConnectionState.RECONNECTING:
updateStatus({
callState: CallState.CONNECTING,
isConnecting: true,
});
break;
case ConnectionState.FAILED:
updateStatus({
callState: CallState.ERROR,
isConnecting: false,
error: {
code: 'CONNECTION_FAILED',
message: 'Connection failed. Please try again.',
},
});
break;
case ConnectionState.CLOSED:
updateStatus({
...INITIAL_STATUS,
callState: CallState.ENDED,
});
break;
}
},
[updateStatus],
);
const startConnectionMonitoring = useCallback(() => {
if (!webrtcServiceRef.current) {
return;
}
statsIntervalRef.current = setInterval(async () => {
const stats = await webrtcServiceRef.current?.getStats();
if (!stats) {
return;
}
let totalRoundTripTime = 0;
let samplesCount = 0;
stats.forEach((report) => {
if (report.type === 'candidate-pair' && report.currentRoundTripTime) {
totalRoundTripTime += report.currentRoundTripTime;
samplesCount++;
}
});
const averageRTT = samplesCount > 0 ? totalRoundTripTime / samplesCount : 0;
updateStatus({
connectionQuality: averageRTT < 0.3 ? 'good' : 'poor',
});
}, 2000);
}, [updateStatus]);
const startCall = useCallback(async () => {
if (!isConnected) {
console.log('Cannot start call - not connected to server');
updateStatus({
callState: CallState.ERROR,
error: {
code: 'NOT_CONNECTED',
message: 'Not connected to server',
},
});
return;
}
try {
console.log('Starting new call...');
if (webrtcServiceRef.current) {
console.log('Cleaning up existing WebRTC connection');
webrtcServiceRef.current.close();
}
updateStatus({
callState: CallState.CONNECTING,
isConnecting: true,
error: null,
});
webrtcServiceRef.current = new WebRTCService(sendMessage, {
debug: true,
});
webrtcServiceRef.current.on('connectionStateChange', handleConnectionStateChange);
webrtcServiceRef.current.on('remoteStream', handleRemoteStream);
webrtcServiceRef.current.on('vadStatusChange', (speaking: boolean) => {
updateStatus({ isUserSpeaking: speaking });
});
webrtcServiceRef.current.on('error', (error: string) => {
console.error('WebRTC error:', error);
updateStatus({
callState: CallState.ERROR,
isConnecting: false,
error: {
code: 'WEBRTC_ERROR',
message: error,
},
});
});
console.log('Initializing WebRTC connection...');
await webrtcServiceRef.current.initialize();
console.log('WebRTC initialization complete');
startConnectionMonitoring();
} catch (error) {
console.error('Failed to start call:', error);
updateStatus({
callState: CallState.ERROR,
isConnecting: false,
error: {
code: 'INITIALIZATION_FAILED',
message: error instanceof Error ? error.message : 'Failed to start call',
},
});
}
}, [
isConnected,
sendMessage,
handleConnectionStateChange,
startConnectionMonitoring,
updateStatus,
]);
const hangUp = useCallback(() => {
if (webrtcServiceRef.current) {
webrtcServiceRef.current.close();
webrtcServiceRef.current = null;
}
if (statsIntervalRef.current) {
clearInterval(statsIntervalRef.current);
}
updateStatus({
...INITIAL_STATUS,
callState: CallState.ENDED,
});
}, [updateStatus]);
useEffect(() => {
const cleanupFns = [
addEventListener(WebSocketEvents.WEBRTC_ANSWER, (answer: RTCSessionDescriptionInit) => {
webrtcServiceRef.current?.handleAnswer(answer);
}),
addEventListener(WebSocketEvents.ICE_CANDIDATE, (candidate: RTCIceCandidateInit) => {
webrtcServiceRef.current?.addIceCandidate(candidate);
}),
];
return () => cleanupFns.forEach((fn) => fn());
}, [addEventListener, updateStatus]);
const toggleMute = useCallback(() => {
if (webrtcServiceRef.current) {
const newMutedState = !isMuted;
webrtcServiceRef.current.setMuted(newMutedState);
setIsMuted(newMutedState);
}
}, [isMuted]);
useEffect(() => {
if (webrtcServiceRef.current) {
const handleMuteChange = (muted: boolean) => setIsMuted(muted);
webrtcServiceRef.current.on('muteStateChange', handleMuteChange);
return () => {
webrtcServiceRef.current?.off('muteStateChange', handleMuteChange);
};
}
}, []);
return {
...status,
isMuted,
toggleMute,
startCall,
hangUp,
vadLoading: vad.loading,
vadError: vad.errored,
};
};
export default useCall;

View File

@@ -0,0 +1,140 @@
import { useEffect, useRef, useState } from 'react';
import { useGetWebsocketUrlQuery } from 'librechat-data-provider/react-query';
import type { MessagePayload } from '~/common';
import { io, Socket } from 'socket.io-client';
import { EventEmitter } from 'events';
export const WebSocketEvents = {
CALL_STARTED: 'call-started',
CALL_ERROR: 'call-error',
WEBRTC_ANSWER: 'webrtc-answer',
ICE_CANDIDATE: 'icecandidate',
} as const;
type EventHandler = (...args: unknown[]) => void;
class WebSocketManager extends EventEmitter {
private socket: Socket | null = null;
private reconnectAttempts = 0;
private readonly MAX_RECONNECT_ATTEMPTS = 5;
private isConnected = false;
connect(url: string) {
if (this.socket && this.socket.connected) {
return;
}
this.socket = io(url, {
transports: ['websocket'],
reconnectionAttempts: this.MAX_RECONNECT_ATTEMPTS,
timeout: 10000,
});
this.setupEventHandlers();
}
private setupEventHandlers() {
if (!this.socket) {
return;
}
this.socket.on('connect', () => {
this.isConnected = true;
this.reconnectAttempts = 0;
this.emit('connectionChange', true);
});
this.socket.on('disconnect', (reason) => {
this.isConnected = false;
this.emit('connectionChange', false);
});
this.socket.on('connect_error', (error) => {
this.reconnectAttempts++;
this.emit('connectionChange', false);
if (this.reconnectAttempts >= this.MAX_RECONNECT_ATTEMPTS) {
this.emit('error', 'Failed to connect after maximum attempts');
this.disconnect();
}
});
// WebRTC signals
this.socket.on(WebSocketEvents.CALL_STARTED, () => {
this.emit(WebSocketEvents.CALL_STARTED);
});
this.socket.on(WebSocketEvents.WEBRTC_ANSWER, (answer) => {
this.emit(WebSocketEvents.WEBRTC_ANSWER, answer);
});
this.socket.on(WebSocketEvents.ICE_CANDIDATE, (candidate) => {
this.emit(WebSocketEvents.ICE_CANDIDATE, candidate);
});
this.socket.on('error', (error) => {
this.emit('error', error);
});
}
disconnect() {
if (this.socket) {
this.socket.disconnect();
this.socket = null;
}
this.isConnected = false;
}
sendMessage(type: string, payload?: MessagePayload) {
if (!this.socket || !this.socket.connected) {
return false;
}
this.socket.emit(type, payload);
return true;
}
getConnectionState() {
return this.isConnected;
}
}
export const webSocketManager = new WebSocketManager();
const useWebSocket = () => {
const { data: wsConfig } = useGetWebsocketUrlQuery();
const [isConnected, setIsConnected] = useState(false);
const eventHandlersRef = useRef<Record<string, EventHandler>>({});
useEffect(() => {
if (wsConfig?.url && !webSocketManager.getConnectionState()) {
webSocketManager.connect(wsConfig.url);
const handleConnectionChange = (connected: boolean) => setIsConnected(connected);
webSocketManager.on('connectionChange', handleConnectionChange);
webSocketManager.on('error', console.error);
return () => {
webSocketManager.off('connectionChange', handleConnectionChange);
webSocketManager.off('error', console.error);
};
}
}, [wsConfig, wsConfig?.url]);
const sendMessage = (message: { type: string; payload?: MessagePayload }) => {
return webSocketManager.sendMessage(message.type, message.payload);
};
const addEventListener = (event: string, handler: EventHandler) => {
eventHandlersRef.current[event] = handler;
webSocketManager.on(event, handler);
return () => {
webSocketManager.off(event, handler);
delete eventHandlersRef.current[event];
};
};
return {
isConnected,
sendMessage,
addEventListener,
};
};
export default useWebSocket;

View File

@@ -0,0 +1,422 @@
import { useEffect } from 'react';
import { EventEmitter } from 'events';
import { useMicVAD } from '@ricky0123/vad-react';
import type { MessagePayload } from '~/common';
export enum ConnectionState {
IDLE = 'idle',
CONNECTING = 'connecting',
CONNECTED = 'connected',
RECONNECTING = 'reconnecting',
FAILED = 'failed',
CLOSED = 'closed',
}
export enum MediaState {
INACTIVE = 'inactive',
PENDING = 'pending',
ACTIVE = 'active',
FAILED = 'failed',
}
interface WebRTCConfig {
iceServers?: RTCIceServer[];
maxReconnectAttempts?: number;
connectionTimeout?: number;
debug?: boolean;
}
export function useVADSetup(webrtcService: WebRTCService | null) {
const vad = useMicVAD({
startOnLoad: true,
onSpeechStart: () => {
// Only emit speech events if not muted
if (webrtcService && !webrtcService.isMuted()) {
webrtcService.handleVADStatusChange(true);
}
},
onSpeechEnd: () => {
// Only emit speech events if not muted
if (webrtcService && !webrtcService.isMuted()) {
webrtcService.handleVADStatusChange(false);
}
},
onVADMisfire: () => {
if (webrtcService && !webrtcService.isMuted()) {
webrtcService.handleVADStatusChange(false);
}
},
});
// Add effect to handle mute state changes
useEffect(() => {
if (webrtcService) {
const handleMuteChange = (muted: boolean) => {
if (muted) {
// Stop VAD processing when muted
vad.pause();
} else {
// Resume VAD processing when unmuted
vad.start();
}
};
webrtcService.on('muteStateChange', handleMuteChange);
return () => {
webrtcService.off('muteStateChange', handleMuteChange);
};
}
}, [webrtcService, vad]);
return vad;
}
export class WebRTCService extends EventEmitter {
private peerConnection: RTCPeerConnection | null = null;
private localStream: MediaStream | null = null;
private remoteStream: MediaStream | null = null;
private reconnectAttempts = 0;
private connectionTimeoutId: NodeJS.Timeout | null = null;
private config: Required<WebRTCConfig>;
private connectionState: ConnectionState = ConnectionState.IDLE;
private mediaState: MediaState = MediaState.INACTIVE;
private isUserSpeaking = false;
private readonly DEFAULT_CONFIG: Required<WebRTCConfig> = {
iceServers: [
{
urls: ['stun:stun.l.google.com:19302', 'stun:stun1.l.google.com:19302'],
},
],
maxReconnectAttempts: 3,
connectionTimeout: 30000,
debug: false,
};
constructor(
private readonly sendMessage: (message: { type: string; payload?: MessagePayload }) => boolean,
config: WebRTCConfig = {},
) {
super();
this.config = { ...this.DEFAULT_CONFIG, ...config };
this.log('WebRTCService initialized with config:', this.config);
}
private log(...args: unknown[]) {
if (this.config.debug) {
console.log('[WebRTC]', ...args);
}
}
private setConnectionState(state: ConnectionState) {
this.connectionState = state;
this.emit('connectionStateChange', state);
this.log('Connection state changed to:', state);
}
private setMediaState(state: MediaState) {
this.mediaState = state;
this.emit('mediaStateChange', state);
this.log('Media state changed to:', state);
}
public handleVADStatusChange(isSpeaking: boolean) {
if (this.isUserSpeaking !== isSpeaking) {
this.isUserSpeaking = isSpeaking;
this.sendMessage({
type: 'vad-status',
payload: { speaking: isSpeaking },
});
this.emit('vadStatusChange', isSpeaking);
}
}
public setMuted(muted: boolean) {
if (this.localStream) {
this.localStream.getAudioTracks().forEach((track) => {
// Stop the track completely when muted instead of just disabling
if (muted) {
track.stop();
} else {
// If unmuting, we need to get a new audio track
this.refreshAudioTrack();
}
});
if (muted) {
// Ensure VAD knows we're not speaking when muted
this.handleVADStatusChange(false);
}
this.emit('muteStateChange', muted);
}
}
public isMuted(): boolean {
if (!this.localStream) {
return false;
}
const audioTrack = this.localStream.getAudioTracks()[0];
return audioTrack ? !audioTrack.enabled : false;
}
private async refreshAudioTrack() {
try {
const newStream = await navigator.mediaDevices.getUserMedia({
audio: {
echoCancellation: true,
noiseSuppression: true,
autoGainControl: true,
},
});
const newTrack = newStream.getAudioTracks()[0];
if (this.localStream && this.peerConnection) {
const oldTrack = this.localStream.getAudioTracks()[0];
if (oldTrack) {
this.localStream.removeTrack(oldTrack);
}
this.localStream.addTrack(newTrack);
// Update the sender with the new track
const senders = this.peerConnection.getSenders();
const audioSender = senders.find((sender) => sender.track?.kind === 'audio');
if (audioSender) {
audioSender.replaceTrack(newTrack);
}
}
} catch (error) {
this.handleError(error);
}
}
async initialize() {
try {
this.setConnectionState(ConnectionState.CONNECTING);
this.setMediaState(MediaState.PENDING);
this.localStream = await navigator.mediaDevices.getUserMedia({
audio: {
echoCancellation: true,
noiseSuppression: true,
autoGainControl: true,
},
});
this.peerConnection = new RTCPeerConnection({
iceServers: this.config.iceServers,
iceCandidatePoolSize: 10,
bundlePolicy: 'max-bundle',
rtcpMuxPolicy: 'require',
});
this.setupPeerConnectionListeners();
this.localStream.getTracks().forEach((track) => {
if (this.localStream && this.peerConnection) {
this.peerConnection.addTrack(track, this.localStream);
}
});
this.startConnectionTimeout();
await this.createAndSendOffer();
this.setMediaState(MediaState.ACTIVE);
} catch (error) {
this.log('Initialization error:', error);
this.handleError(error);
}
}
private sendSignalingMessage(message: { type: string; payload?: MessagePayload }) {
const sent = this.sendMessage(message);
if (!sent) {
this.handleError(new Error('Failed to send signaling message - WebSocket not connected'));
}
}
private setupPeerConnectionListeners() {
if (!this.peerConnection) {
return;
}
this.peerConnection.ontrack = ({ track, streams }) => {
this.log('Track received:', {
kind: track.kind,
enabled: track.enabled,
readyState: track.readyState,
});
if (track.kind === 'audio') {
if (!this.remoteStream) {
this.remoteStream = new MediaStream();
}
this.remoteStream.addTrack(track);
if (this.peerConnection) {
this.peerConnection.addTrack(track, this.remoteStream);
}
this.log('Audio track added to remote stream', {
tracks: this.remoteStream.getTracks().length,
active: this.remoteStream.active,
});
this.emit('remoteStream', this.remoteStream);
}
};
this.peerConnection.onconnectionstatechange = () => {
if (!this.peerConnection) {
return;
}
const state = this.peerConnection.connectionState;
this.log('Connection state changed:', state);
switch (state) {
case 'connected':
this.clearConnectionTimeout();
this.setConnectionState(ConnectionState.CONNECTED);
break;
case 'disconnected':
case 'failed':
if (this.reconnectAttempts < this.config.maxReconnectAttempts) {
this.attemptReconnection();
} else {
this.handleError(new Error('Connection failed after max reconnection attempts'));
}
break;
case 'closed':
this.setConnectionState(ConnectionState.CLOSED);
break;
}
};
}
private async createAndSendOffer() {
if (!this.peerConnection) {
return;
}
try {
const offer = await this.peerConnection.createOffer({
offerToReceiveAudio: true,
});
await this.peerConnection.setLocalDescription(offer);
this.sendSignalingMessage({
type: 'webrtc-offer',
payload: offer,
});
} catch (error) {
this.handleError(error);
}
}
public async handleAnswer(answer: RTCSessionDescriptionInit) {
if (!this.peerConnection) {
return;
}
try {
await this.peerConnection.setRemoteDescription(new RTCSessionDescription(answer));
this.log('Remote description set successfully');
} catch (error) {
this.handleError(error);
}
}
public async addIceCandidate(candidate: RTCIceCandidateInit) {
if (!this.peerConnection?.remoteDescription) {
this.log('Delaying ICE candidate addition - no remote description');
return;
}
try {
await this.peerConnection.addIceCandidate(new RTCIceCandidate(candidate));
this.log('ICE candidate added successfully');
} catch (error) {
this.handleError(error);
}
}
private startConnectionTimeout() {
this.clearConnectionTimeout();
this.connectionTimeoutId = setTimeout(() => {
if (
this.connectionState !== ConnectionState.CONNECTED &&
this.connectionState !== ConnectionState.CONNECTING
) {
this.handleError(new Error('Connection timeout'));
}
}, this.config.connectionTimeout);
}
private clearConnectionTimeout() {
if (this.connectionTimeoutId) {
clearTimeout(this.connectionTimeoutId);
this.connectionTimeoutId = null;
}
}
private async attemptReconnection() {
this.reconnectAttempts++;
this.log(
`Attempting reconnection (${this.reconnectAttempts}/${this.config.maxReconnectAttempts})`,
);
this.setConnectionState(ConnectionState.RECONNECTING);
this.emit('reconnecting', this.reconnectAttempts);
try {
if (this.peerConnection) {
const offer = await this.peerConnection.createOffer({ iceRestart: true });
await this.peerConnection.setLocalDescription(offer);
this.sendSignalingMessage({
type: 'webrtc-offer',
payload: offer,
});
}
} catch (error) {
this.handleError(error);
}
}
private handleError(error: Error | unknown) {
const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred';
this.log('Error:', errorMessage);
if (this.connectionState !== ConnectionState.CONNECTED) {
this.setConnectionState(ConnectionState.FAILED);
this.emit('error', errorMessage);
}
if (this.connectionState !== ConnectionState.CONNECTED) {
this.close();
}
}
public close() {
this.clearConnectionTimeout();
if (this.localStream) {
this.localStream.getTracks().forEach((track) => track.stop());
this.localStream = null;
}
if (this.peerConnection) {
this.peerConnection.close();
this.peerConnection = null;
}
this.setConnectionState(ConnectionState.CLOSED);
this.setMediaState(MediaState.INACTIVE);
}
public getStats(): Promise<RTCStatsReport> | null {
return this.peerConnection?.getStats() ?? null;
}
}

View File

@@ -368,6 +368,11 @@ const updateConversationSelector = selectorFamily({
},
});
const callDialogOpen = atomFamily<boolean, string | number | null>({
key: 'callDialogOpen',
default: false,
});
export default {
conversationKeysAtom,
conversationByIndex,
@@ -399,4 +404,5 @@ export default {
useClearLatestMessages,
showPromptsPopoverFamily,
updateConversationSelector,
callDialogOpen,
};

View File

@@ -62,7 +62,7 @@ export default defineConfig({
useCredentials: true,
workbox: {
globPatterns: ['assets/**/*.{png,jpg,svg,ico}', '**/*.{js,css,html,ico,woff2}'],
maximumFileSizeToCacheInBytes: 4 * 1024 * 1024,
maximumFileSizeToCacheInBytes: 5 * 1024 * 1024,
navigateFallbackDenylist: [/^\/oauth/],
},
manifest: {

871
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -171,7 +171,9 @@ export const textToSpeechManual = () => `${textToSpeech()}/manual`;
export const textToSpeechVoices = () => `${textToSpeech()}/voices`;
export const getCustomConfigSpeech = () => `${speech()}/config/get`;
export const getCustomConfigSpeech = () => `${speech()}/config`;
export const getRealtimeEphemeralToken = () => `${speech()}/realtime`;
export const getPromptGroup = (_id: string) => `${prompts()}/groups/${_id}`;
@@ -237,3 +239,5 @@ export const addTagToConversation = (conversationId: string) =>
export const userTerms = () => '/api/user/terms';
export const acceptUserTerms = () => '/api/user/terms/accept';
export const banner = () => '/api/banner';
export const websocket = () => '/api/websocket';

View File

@@ -379,6 +379,18 @@ const speechTab = z
})
.optional();
const realtime = z
.object({
openai: z
.object({
url: z.string().optional(),
apiKey: z.string().optional(),
voices: z.array(z.string()).optional(),
})
.optional(),
})
.optional();
export enum RateLimitPrefix {
FILE_UPLOAD = 'FILE_UPLOAD',
IMPORT = 'IMPORT',
@@ -534,6 +546,7 @@ export const configSchema = z.object({
tts: ttsSchema.optional(),
stt: sttSchema.optional(),
speechTab: speechTab.optional(),
realtime: realtime.optional(),
})
.optional(),
rateLimits: rateLimitSchema.optional(),
@@ -1135,6 +1148,13 @@ export enum TTSProviders {
LOCALAI = 'localai',
}
export enum RealtimeVoiceProviders {
/**
* Provider for OpenAI Realtime Voice API
*/
OPENAI = 'openai',
}
/** Enum for app-wide constants */
export enum Constants {
/** Key for the app's version. */

View File

@@ -576,6 +576,12 @@ export const getCustomConfigSpeech = (): Promise<t.TCustomConfigSpeechResponse>
return request.get(endpoints.getCustomConfigSpeech());
};
export const getRealtimeEphemeralToken = (
data: t.TRealtimeEphemeralTokenRequest,
): Promise<t.TRealtimeEphemeralTokenResponse> => {
return request.get(endpoints.getRealtimeEphemeralToken(), { params: data });
};
/* conversations */
export function duplicateConversation(
@@ -774,3 +780,7 @@ export function acceptTerms(): Promise<t.TAcceptTermsResponse> {
export function getBanner(): Promise<t.TBannerResponse> {
return request.get(endpoints.banner());
}
export function getWebsocketUrl(): Promise<t.TWebsocketUrlResponse> {
return request.get(endpoints.websocket());
}

View File

@@ -46,6 +46,7 @@ export enum QueryKeys {
health = 'health',
userTerms = 'userTerms',
banner = 'banner',
websocketUrl = 'websocketUrl',
}
export enum MutationKeys {
@@ -67,4 +68,5 @@ export enum MutationKeys {
deleteAgentAction = 'deleteAgentAction',
deleteUser = 'deleteUser',
updateRole = 'updateRole',
realtimeEphemeralToken = 'realtimeEphemeralToken',
}

View File

@@ -376,3 +376,14 @@ export const useGetCustomConfigSpeechQuery = (
},
);
};
export const useGetWebsocketUrlQuery = (
config?: UseQueryOptions<t.TWebsocketUrlResponse>,
): QueryObserverResult<t.TWebsocketUrlResponse> => {
return useQuery<t.TWebsocketUrlResponse>([QueryKeys.websocketUrl], () => dataService.getWebsocketUrl(), {
refetchOnWindowFocus: false,
refetchOnReconnect: false,
refetchOnMount: false,
...config,
});
};

View File

@@ -10,6 +10,7 @@ import type {
TConversationTag,
TBanner,
} from './schemas';
import { string } from 'zod';
export type TOpenAIMessage = OpenAI.Chat.ChatCompletionMessageParam;
export * from './schemas';
@@ -472,3 +473,16 @@ export type TAcceptTermsResponse = {
};
export type TBannerResponse = TBanner | null;
export type TRealtimeEphemeralTokenRequest = {
voice: string;
};
export type TRealtimeEphemeralTokenResponse = {
token: string;
url: string;
};
export type TWebsocketUrlResponse = {
url: string;
};