From 269958f0a98c22c11f74ffc42ead602d1df17b7f Mon Sep 17 00:00:00 2001 From: Thomas Marchand Date: Sat, 3 Jan 2026 22:16:02 +0000 Subject: [PATCH] Fix SSE event filtering and add Picture-in-Picture support (#25) * Fix SSE event filtering race condition in mission views Events were being filtered out during mission load due to a race condition where viewingMissionId was set before currentMission finished loading. Now events only get filtered when both IDs are set and different, allowing streaming updates to display while missions are loading. * Improve desktop stream UX with auto-open and auto-close - Auto-extract display ID from desktop_start_session tool result - Auto-open desktop stream when agent starts a desktop session - Auto-close desktop stream when agent finishes (status becomes idle) - Apply same improvements to both web and iOS dashboards * Fix desktop display extraction from JSON string results Tool results may be returned as JSON strings rather than parsed objects. Handle both cases when extracting the display ID from desktop_start_session. * Fix desktop stream staying open when status=idle during loading The event filtering was updated to allow events through when currentMissionId is null (during initial load), but the status application logic wasn't updated to match. This created a window where tool_result could open the desktop stream but status=idle wouldn't close it because shouldApplyStatus was false. Now both the event filter and status application logic use consistent conditions: allow when currentMissionId hasn't loaded yet. * Fix desktop auto-open and add Picture-in-Picture support - Use tool_result event's name field directly for desktop_start_session detection (fixes auto-open when tool_call event was filtered or missed) - Add native Picture-in-Picture button to desktop stream - Converts canvas to video stream for OS-level floating window - Works outside the browser tab - Shows PiP button only when browser supports it * Add iOS Picture-in-Picture support for desktop stream - Implement AVSampleBufferDisplayLayer-based PiP for iOS - Convert JPEG frames to CMSampleBuffer for PiP playback - Add PiP buttons to desktop stream header and controls - Fix web dashboard auto-open to use tool name from event data directly - Add audio background mode to Info.plist for PiP support * Fix React anti-patterns flagged by Bugbot - Use itemsRef for synchronous read instead of calling state setters inside setItems updater callback (React strict mode safe) - Attach PiP event listeners directly to video element instead of document, since these events don't bubble * Fix PiP issues flagged by Bugbot - iOS: Only disconnect stream onDisappear if PiP is not active, allowing stream to continue in PiP mode after sheet is dismissed - Web: Stop existing stream tracks before creating new ones to prevent resource leaks on repeated PiP toggle * Fix iOS PiP cleanup when stopped after view dismissal - Add shouldDisconnectAfterPip flag to track deferred cleanup - Set flag in onDisappear when PiP is active - Clean up WebSocket and PiP resources when PiP stops if flag is set * Fix additional PiP issues flagged by Bugbot - iOS: Return actual isPaused state in PiP delegate using MainActor.assumeIsolated - iOS: Add isPipReady flag and disable PiP button until setup completes - Web: Don't forcibly exit PiP on unmount to match iOS behavior --- dashboard/src/app/control/control-client.tsx | 73 +++++- dashboard/src/components/desktop-stream.tsx | 91 +++++++ ios_dashboard/OpenAgentDashboard/Info.plist | 4 + .../Services/DesktopStreamService.swift | 231 +++++++++++++++++- .../Views/Control/ControlView.swift | 36 ++- .../Views/Desktop/DesktopStreamView.swift | 89 ++++++- 6 files changed, 504 insertions(+), 20 deletions(-) diff --git a/dashboard/src/app/control/control-client.tsx b/dashboard/src/app/control/control-client.tsx index 7ff3454..3a9fc10 100644 --- a/dashboard/src/app/control/control-client.tsx +++ b/dashboard/src/app/control/control-client.tsx @@ -584,6 +584,7 @@ export default function ControlClient() { const router = useRouter(); const [items, setItems] = useState([]); + const itemsRef = useRef([]); const [draftInput, setDraftInput] = useLocalStorage("control-draft", ""); const [input, setInput] = useState(draftInput); @@ -691,6 +692,10 @@ export default function ControlClient() { viewingMissionRef.current = viewingMission; }, [viewingMission]); + useEffect(() => { + itemsRef.current = items; + }, [items]); + // Smart auto-scroll const { containerRef, endRef, isAtBottom, scrollToBottom } = useScrollToBottom(); @@ -1205,8 +1210,9 @@ export default function ControlClient() { } } else { // Event has NO mission_id (from main session) - // Only show if we're viewing the current/main mission - if (viewingId !== currentMissionId) { + // Only show if we're viewing the current/main mission OR if currentMission + // hasn't been loaded yet (to handle race condition during initial load) + if (currentMissionId && viewingId !== currentMissionId) { // We're viewing a parallel mission, skip main session events if (event.type !== "status") { return; @@ -1248,16 +1254,20 @@ export default function ControlClient() { // Status for a specific mission - only apply if viewing that mission shouldApplyStatus = statusMissionId === viewingId; } else { - // Status for main session - only apply if viewing main mission or no specific mission - shouldApplyStatus = !viewingId || viewingId === currentMissionId; + // Status for main session - only apply if viewing main mission, no specific mission, + // or currentMissionId hasn't loaded yet (to match event filter logic and avoid + // desktop stream staying open when status=idle comes during loading) + shouldApplyStatus = !viewingId || viewingId === currentMissionId || !currentMissionId; } if (shouldApplyStatus) { setQueueLen(typeof q === "number" ? q : 0); - // Clear progress when idle + // Clear progress and auto-close desktop stream when idle if (newState === "idle") { setProgress(null); + // Auto-close desktop stream when agent finishes + setShowDesktopStream(false); } // If we reconnected and agent is already running, add a visual indicator @@ -1395,6 +1405,59 @@ export default function ControlClient() { if (event.type === "tool_result" && isRecord(data)) { const toolCallId = String(data["tool_call_id"] ?? ""); const endTime = Date.now(); + + // Extract display ID from desktop_start_session tool result + // Get tool name from the event data (preferred) or fall back to stored tool item + const eventToolName = typeof data["name"] === "string" ? data["name"] : null; + + // Check for desktop_start_session right away using event data + // This handles the case where tool_call events might be filtered or missed + if (eventToolName === "desktop_start_session" || eventToolName === "desktop_desktop_start_session") { + let result = data["result"]; + // Handle case where result is a JSON string that needs parsing + if (typeof result === "string") { + try { + result = JSON.parse(result); + } catch { + // Not valid JSON, leave as-is + } + } + if (isRecord(result) && typeof result["display"] === "string") { + const display = result["display"]; + setDesktopDisplayId(display); + // Auto-open desktop stream when session starts + setShowDesktopStream(true); + } + } + + // If eventToolName wasn't available, check stored items for desktop_start_session + // Use itemsRef for synchronous read to avoid side effects in state updaters + if (!eventToolName) { + const toolItem = itemsRef.current.find( + (it) => it.kind === "tool" && it.toolCallId === toolCallId + ); + if (toolItem && toolItem.kind === "tool") { + const toolName = toolItem.name; + // Check for desktop_start_session (with or without desktop_ prefix from MCP) + if (toolName === "desktop_start_session" || toolName === "desktop_desktop_start_session") { + let result = data["result"]; + // Handle case where result is a JSON string that needs parsing + if (typeof result === "string") { + try { + result = JSON.parse(result); + } catch { + // Not valid JSON, leave as-is + } + } + if (isRecord(result) && typeof result["display"] === "string") { + const display = result["display"]; + setDesktopDisplayId(display); + setShowDesktopStream(true); + } + } + } + } + setItems((prev) => prev.map((it) => it.kind === "tool" && it.toolCallId === toolCallId diff --git a/dashboard/src/components/desktop-stream.tsx b/dashboard/src/components/desktop-stream.tsx index cee2327..6c6d685 100644 --- a/dashboard/src/components/desktop-stream.tsx +++ b/dashboard/src/components/desktop-stream.tsx @@ -13,6 +13,7 @@ import { Settings, Maximize2, Minimize2, + PictureInPicture2, } from "lucide-react"; interface DesktopStreamProps { @@ -41,10 +42,14 @@ export function DesktopStream({ const [fps, setFps] = useState(initialFps); const [quality, setQuality] = useState(initialQuality); const [isFullscreen, setIsFullscreen] = useState(false); + const [isPipActive, setIsPipActive] = useState(false); + const [isPipSupported, setIsPipSupported] = useState(false); const wsRef = useRef(null); const canvasRef = useRef(null); const containerRef = useRef(null); + const pipVideoRef = useRef(null); + const pipStreamRef = useRef(null); const connectionIdRef = useRef(0); // Guard against stale callbacks from old connections // Refs to store current values without triggering reconnection on slider changes @@ -215,6 +220,75 @@ export function DesktopStream({ } }, [isFullscreen]); + // Picture-in-Picture handler + const handlePip = useCallback(async () => { + if (!canvasRef.current) return; + + if (isPipActive && document.pictureInPictureElement) { + // Exit PiP + try { + await document.exitPictureInPicture(); + } catch { + // Ignore errors + } + return; + } + + try { + // Stop any existing stream tracks to prevent resource leaks + if (pipStreamRef.current) { + pipStreamRef.current.getTracks().forEach((track) => track.stop()); + } + + // Create a video element from canvas stream + const canvas = canvasRef.current; + const stream = canvas.captureStream(fps); + pipStreamRef.current = stream; + + // Create or reuse video element + if (!pipVideoRef.current) { + const video = document.createElement("video"); + video.muted = true; + video.autoplay = true; + video.playsInline = true; + // Attach PiP event listeners directly to the video element + // These events fire on the video, not document, so we need to listen here + video.addEventListener("enterpictureinpicture", () => setIsPipActive(true)); + video.addEventListener("leavepictureinpicture", () => setIsPipActive(false)); + pipVideoRef.current = video; + } + + pipVideoRef.current.srcObject = stream; + await pipVideoRef.current.play(); + + // Request PiP + await pipVideoRef.current.requestPictureInPicture(); + } catch (err) { + console.error("Failed to enter Picture-in-Picture:", err); + } + }, [isPipActive, fps]); + + // Check PiP support on mount + useEffect(() => { + setIsPipSupported( + "pictureInPictureEnabled" in document && document.pictureInPictureEnabled + ); + }, []); + + // Cleanup PiP resources on unmount + // Note: We don't forcibly exit PiP here to match iOS behavior where + // PiP continues when the sheet is dismissed. The PiP will naturally + // close when the WebSocket disconnects and the stream ends. + useEffect(() => { + return () => { + // Only stop stream tracks if PiP is not active + // This allows PiP to continue showing the last frame briefly + if (!document.pictureInPictureElement && pipStreamRef.current) { + pipStreamRef.current.getTracks().forEach((track) => track.stop()); + } + }; + }, []); + // Connect on mount useEffect(() => { connect(); @@ -291,6 +365,23 @@ export function DesktopStream({
+ {isPipSupported && ( + + )}