Files
openagent/scripts/install_desktop.sh
Thomas Marchand 3d0b4d19b7 Th0rgal/update branding (#32)
* feat: chroots

* wip

* Update workspace templates and Playwright tests

* Fix thinking panel close button not working during active thinking

The auto-show useEffect was including showThinkingPanel in its dependency
array, causing the panel to immediately reopen when closed since the state
change would trigger the effect while hasActiveThinking was still true.

Changed to use a ref to track previous state and only auto-show on
transition from inactive to active thinking.

* wip

* wip

* wip

* Cleanup web search tool and remove hardcoded OAuth credentials

* Ralph iteration 1: work in progress

* Ralph iteration 2: work in progress

* Ralph iteration 3: work in progress

* Ralph iteration 4: work in progress

* Ralph iteration 5: work in progress

* Ralph iteration 6: work in progress

* Ralph iteration 1: work in progress

* Ralph iteration 2: work in progress

* Ralph iteration 3: work in progress

* Ralph iteration 4: work in progress

* Ralph iteration 5: work in progress

* Ralph iteration 6: work in progress

* Ralph iteration 7: work in progress

* Ralph iteration 1: work in progress

* Ralph iteration 2: work in progress

* improve readme

* fix: remove unused file

* feat: hero screenshot

* Update README with cleaner vision and hero screenshot

Simplified the vision section with "what if" framing, removed
architecture diagram, added hero screenshot showing mission view.
2026-01-12 14:45:05 -08:00

238 lines
7.6 KiB
Bash
Executable File

#!/bin/bash
# Install desktop automation dependencies for Open Agent
# Run this on the production server: bash scripts/install_desktop.sh
set -e
echo "=== Installing desktop automation packages ==="
# Update package list
apt update
# Install core X11 and window manager
echo "Installing Xvfb and i3..."
apt install -y xvfb i3 x11-utils
# Install automation tools
echo "Installing xdotool and screenshot tools..."
apt install -y xdotool scrot imagemagick
# Install Chromium browser
echo "Installing Chromium..."
apt install -y chromium chromium-sandbox || apt install -y chromium-browser
# Install accessibility tools (AT-SPI2)
echo "Installing AT-SPI2 for accessibility tree..."
apt install -y at-spi2-core libatspi2.0-0 python3-gi python3-gi-cairo gir1.2-atspi-2.0
# Install OCR
echo "Installing Tesseract OCR..."
apt install -y tesseract-ocr
# Install fonts for proper rendering
echo "Installing fonts..."
apt install -y fonts-liberation fonts-dejavu-core fonts-noto
# Create i3 config directories for both root and opencode user
# OpenCode service runs with HOME=/var/lib/opencode, so config must exist there
echo "Creating i3 configuration..."
mkdir -p /root/.config/i3
mkdir -p /var/lib/opencode/.config/i3
# Write i3 config to both locations
I3_CONFIG_FILE=/root/.config/i3/config
cat > "$I3_CONFIG_FILE" << 'EOF'
# Open Agent i3 Config - Optimized for LLM Vision & Control
# Key principle: LLM needs to SEE state (URL bar, focus indicator, all windows)
set $mod Mod4
font pango:DejaVu Sans Mono 10
# ============================================================================
# WINDOW DECORATIONS - Minimal but useful for LLM
# ============================================================================
# Thin border shows focus state (colored differently for focused vs unfocused)
default_border pixel 3
default_floating_border pixel 3
# Colors: focused window gets bright orange border, unfocused gets dim gray
# class border backgr. text indicator child_border
client.focused #4c7899 #285577 #ffffff #2e9ef4 #ff5500
client.focused_inactive #333333 #5f676a #ffffff #484e50 #333333
client.unfocused #333333 #222222 #888888 #292d2e #222222
# Hide edge borders when only one window (still shows focus on multi-window)
hide_edge_borders smart
# ============================================================================
# FOCUS BEHAVIOR - Predictable but functional
# ============================================================================
focus_follows_mouse no
focus_wrapping no
force_display_urgency_hint 0 ms
# DO give focus to new windows - LLM expects to type into launched apps
# (intentionally NOT using no_focus - that prevents typing into new windows)
# Workspace back-and-forth for quick switching
workspace_auto_back_and_forth yes
# ============================================================================
# LAYOUT - Tiling with visible windows
# ============================================================================
# Use split layout (not tabbed) so LLM can see all windows
# When second window opens, split horizontally
default_orientation horizontal
# New windows open to the right of current - predictable positioning
workspace_layout default
# ============================================================================
# CHROMIUM - NOT fullscreen (need to see URL bar!)
# ============================================================================
# Just thin border, don't fullscreen - LLM needs to see URL bar and tabs
for_window [class="Chromium"] border pixel 2
for_window [class="chromium"] border pixel 2
for_window [class="Google-chrome"] border pixel 2
# ============================================================================
# FLOATING WINDOWS - Dialogs centered and predictable
# ============================================================================
# Common dialog types should float and center (file picker, alerts, etc)
for_window [window_role="pop-up"] floating enable, move position center
for_window [window_role="dialog"] floating enable, move position center
for_window [window_role="alert"] floating enable, move position center
for_window [window_type="dialog"] floating enable, move position center
for_window [class="Gcr-prompter"] floating enable, move position center
# All floating windows get centered
for_window [floating] move position center
# ============================================================================
# KEYBINDINGS - For i3-msg programmatic control
# ============================================================================
# Kill window
bindsym $mod+Shift+q kill
# Focus movement
bindsym $mod+h focus left
bindsym $mod+j focus down
bindsym $mod+k focus up
bindsym $mod+l focus right
# Move windows
bindsym $mod+Shift+h move left
bindsym $mod+Shift+j move down
bindsym $mod+Shift+k move up
bindsym $mod+Shift+l move right
# Fullscreen toggle (LLM can use when needed)
bindsym $mod+f fullscreen toggle
# Toggle floating (for dialogs)
bindsym $mod+Shift+space floating toggle
# Focus floating/tiling toggle
bindsym $mod+space focus mode_toggle
# Split direction
bindsym $mod+b split h
bindsym $mod+v split v
# Layout modes
bindsym $mod+s layout stacking
bindsym $mod+w layout tabbed
bindsym $mod+e layout toggle split
# Workspace switching
bindsym $mod+1 workspace 1
bindsym $mod+2 workspace 2
bindsym $mod+3 workspace 3
# Move to workspace
bindsym $mod+Shift+1 move container to workspace 1
bindsym $mod+Shift+2 move container to workspace 2
bindsym $mod+Shift+3 move container to workspace 3
# Exit/reload
bindsym $mod+Shift+e exit
bindsym $mod+Shift+r reload
# ============================================================================
# STARTUP
# ============================================================================
workspace 1 output primary
exec --no-startup-id i3-msg workspace 1
# Disable screensaver
exec --no-startup-id xset s off
exec --no-startup-id xset -dpms
exec --no-startup-id xset s noblank
# Set solid dark background (clean for screenshots, good contrast)
exec --no-startup-id xsetroot -solid "#1a1a2e"
EOF
# Copy to opencode user location
cp "$I3_CONFIG_FILE" /var/lib/opencode/.config/i3/config
echo "i3 configuration written to:"
echo " - /root/.config/i3/config"
echo " - /var/lib/opencode/.config/i3/config"
# Add DESKTOP_ENABLED to environment file
echo "Enabling desktop in environment..."
if ! grep -q "DESKTOP_ENABLED" /etc/open_agent/open_agent.env 2>/dev/null; then
echo "" >> /etc/open_agent/open_agent.env
echo "# Desktop automation" >> /etc/open_agent/open_agent.env
echo "DESKTOP_ENABLED=true" >> /etc/open_agent/open_agent.env
echo "DESKTOP_RESOLUTION=1920x1080" >> /etc/open_agent/open_agent.env
fi
# Create work and screenshots directories
echo "Creating working directories..."
mkdir -p /root/work/screenshots
mkdir -p /root/tools
# Test installation
echo ""
echo "=== Testing installation ==="
echo -n "Xvfb: "
which Xvfb && echo "OK" || echo "MISSING"
echo -n "i3: "
which i3 && echo "OK" || echo "MISSING"
echo -n "xdotool: "
which xdotool && echo "OK" || echo "MISSING"
echo -n "scrot: "
which scrot && echo "OK" || echo "MISSING"
echo -n "chromium: "
(which chromium || which chromium-browser) && echo "OK" || echo "MISSING"
echo -n "tesseract: "
which tesseract && echo "OK" || echo "MISSING"
echo -n "python3 with gi: "
python3 -c "import gi; print('OK')" 2>/dev/null || echo "MISSING"
echo ""
echo "=== Installation complete ==="
echo "Run: systemctl restart open_agent"
echo "To test manually:"
echo " Xvfb :99 -screen 0 1920x1080x24 &"
echo " DISPLAY=:99 i3 &"
echo " DISPLAY=:99 chromium --no-sandbox &"
echo " DISPLAY=:99 scrot /tmp/test.png"