diff --git a/README.md b/README.md index 430e430..5be7002 100644 --- a/README.md +++ b/README.md @@ -6,12 +6,14 @@ This is the default template for OpenAgent configuration libraries. Fork this re ``` library/ -├── skill/ # Reusable skills (SKILL.md + reference files) -├── command/ # Slash commands (markdown with YAML frontmatter) -├── tool/ # Custom TypeScript tools (@opencode-ai/plugin) -├── rule/ # Reusable instruction sets -├── agent/ # Custom agent configurations -└── mcp.json # MCP server configurations +├── skill/ # Reusable skills (SKILL.md + reference files) +├── command/ # Slash commands (markdown with YAML frontmatter) +├── tool/ # Custom TypeScript tools (@opencode-ai/plugin) +├── rule/ # Reusable instruction sets +├── agent/ # Custom agent configurations +├── mcp/ # MCP server configurations (servers.json) +├── opencode/ # OpenCode plugin configs (oh-my-opencode.json) +└── workspace-template/ # Workspace templates (distro/skills/env/init script) ``` ## Built-in Library Tools diff --git a/mcp/servers.json b/mcp/servers.json new file mode 100644 index 0000000..4a47daa --- /dev/null +++ b/mcp/servers.json @@ -0,0 +1,8 @@ +{ + "example-remote-mcp": { + "type": "remote", + "url": "https://example.com/mcp", + "headers": {}, + "enabled": false + } +} diff --git a/opencode/oh-my-opencode.json b/opencode/oh-my-opencode.json new file mode 100644 index 0000000..303cdd0 --- /dev/null +++ b/opencode/oh-my-opencode.json @@ -0,0 +1,11 @@ +{ + "$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-opencode/master/assets/oh-my-opencode.schema.json", + "agents": { + "Sisyphus": { + "model": "anthropic/claude-sonnet-4-20250514" + }, + "explore": { + "model": "anthropic/claude-haiku-4-20250514" + } + } +} diff --git a/skill/bugbot-review/SKILL.md b/skill/bugbot-review/SKILL.md new file mode 100644 index 0000000..f2dcef5 --- /dev/null +++ b/skill/bugbot-review/SKILL.md @@ -0,0 +1,76 @@ +--- +name: bugbot-review +description: "Resolves review threads on open PRs (Bugbot or human), fixing valid issues and replying when not applicable. Triggers: bugbot, review issues, bot review, github action review, unresolved review threads, review conversations." +--- + +# Role: Review Thread Resolver +You clear review threads on open PRs with minimal disruption and precise replies. + +# Mission +Close all unresolved review threads, keep checks green, and leave the PR in a clean state. + +# Operating Principles +1. Work on the correct PR branch before changing code. +2. Verify each report against the codebase before replying. +3. Prefer minimal, targeted fixes. +4. Resolve threads explicitly after action. +5. Loop until no unresolved threads remain and Bugbot is idle. + +# Activation + +## Use when +- You are asked to check or address review findings on a pull request. +- A PR shows unresolved review threads or conversations (Bugbot or other reviewers). +- You need to verify reported issues and act on them. + +## Avoid when +- The request is a general code review unrelated to existing review threads. +- There is no open pull request to inspect. + +# Inputs to Ask For (only if missing) +- PR number or branch name (if multiple PRs exist) +- Whether to only respond or also make code changes + +# Mode Selection +- Respond-only: reply and resolve threads without code changes. +- Fix-and-respond: patch code, add tests if needed, then reply and resolve. +If unclear, ask once then proceed. + +# Procedure +1. Confirm repo context and `gh` auth. +2. Identify the target PR: + - `gh pr status` or `gh pr list --state open` + - If multiple, ask for the PR number. +3. Check out the PR branch: `gh pr checkout `. +4. Fetch review threads via GraphQL (required; `gh pr view` omits `reviewThreads`): + - `gh api graphql -F owner= -F name= -F number= -f query='query($owner:String!, $name:String!, $number:Int!){repository(owner:$owner,name:$name){pullRequest(number:$number){reviewThreads(first:100){nodes{id isResolved isOutdated comments(first:50){nodes{id author{login} body}}} pageInfo{hasNextPage endCursor}}}}}'` + - If `pageInfo.hasNextPage` is true, repeat with `after` until all threads are collected. +5. For each unresolved review thread (`isResolved=false`): + - Verify the issue in code (run targeted checks/tests if needed). + - If valid, fix the issue and update/add tests if warranted. + - If not valid, reply concisely with evidence. + - Resolve the thread via GraphQL: + - `gh api graphql -f query='mutation($threadId:ID!){resolveReviewThread(input:{threadId:$threadId}){thread{isResolved}}}' -F threadId=` + - Note: top-level PR comments are not review threads and cannot be resolved by this mutation. +6. If there are uncommitted changes, commit and push them. +7. Check Bugbot runs: `gh pr checks ` or `gh run list --workflow bugbot`. +8. Loop: + - If Bugbot is running, wait 5 minutes and repeat steps 4-8. + - If Bugbot is not running but unresolved threads remain, repeat steps 4-8. + - If Bugbot is not running and no unresolved threads remain, you are done. + +# Outputs / Definition of Done +- All review threads are resolved in GitHub +- Valid issues are fixed (with tests if needed) +- Invalid issues have concise replies +- Bugbot is not running and no unresolved threads remain +- Any code changes are committed and pushed + +# Guardrails +- Do not resolve a thread without either a fix or a reply. +- Do not do unrelated refactors. +- Keep replies factual and short. +- Do not ask to continue the loop; continue until done unless inputs are missing. + +# References +- GitHub CLI: `gh pr view`, `gh pr checks`, `gh run list` diff --git a/skill/ffmpeg/SKILL.md b/skill/ffmpeg/SKILL.md new file mode 100644 index 0000000..317832d --- /dev/null +++ b/skill/ffmpeg/SKILL.md @@ -0,0 +1,43 @@ +--- +name: ffmpeg +description: Extract audio and transcode MP4 to WebM using ffmpeg. +--- + +# ffmpeg + +## Scope +Use this skill to: +- Extract audio from a video file. +- Transcode an MP4 video to WebM. + +## Requirements +- Prefer safe defaults and explicit codecs. +- Keep commands minimal and reproducible. +- Use ASCII-only output unless file already uses Unicode. + +## Commands + +### Extract audio (MP4 → MP3) +``` +ffmpeg -y -i input.mp4 -vn -c:a libmp3lame -q:a 2 output.mp3 +``` + +### Extract audio (MP4 → WAV, lossless) +``` +ffmpeg -y -i input.mp4 -vn -c:a pcm_s16le -ar 44100 -ac 2 output.wav +``` + +### Transcode MP4 → WebM (VP9 + Opus) +``` +ffmpeg -y -i input.mp4 -c:v libvpx-vp9 -crf 32 -b:v 0 -row-mt 1 -c:a libopus -b:a 128k output.webm +``` + +### Transcode MP4 → WebM (VP8 + Vorbis) +``` +ffmpeg -y -i input.mp4 -c:v libvpx -crf 10 -b:v 1M -c:a libvorbis -b:a 128k output.webm +``` + +## Notes +- `-y` overwrites output files. Remove if you want interactive prompts. +- Lower `-crf` means higher quality (and larger files). +- If audio-only extraction is desired, use `-vn` to drop video. diff --git a/skill/media-creation/SKILL.md b/skill/media-creation/SKILL.md new file mode 100644 index 0000000..b6cc644 --- /dev/null +++ b/skill/media-creation/SKILL.md @@ -0,0 +1,141 @@ +--- +name: media-creation +description: "Creates images and video via Alibaba Wan 2.6 (DashScope), Google Gemini/Veo, and OpenAI GPT Image APIs, plus background extraction workflows. Triggers: image generation, video generation, dashscope, wan 2.6, alibaba, gemini, veo, gpt image, openai images, background removal, alpha extraction, transparent png." +--- + +# Role: Media Generation Operator +You generate images and video using the correct provider and workflow, with safe handling of API keys. + +# Mission +Produce the requested media (including transparency when needed) with clear, reproducible steps. + +# Operating Principles +1. Choose the provider and model based on the task and constraints. +2. Ask for missing inputs once, then proceed. +3. Keep credentials out of logs and outputs. +4. Prefer native transparency when available. +5. Provide a minimal, executable command sequence. + +# Activation + +## Use when +- Generating images or video via Alibaba Wan, Google Gemini/Veo, or OpenAI GPT Image APIs +- Creating transparent PNGs +- Extracting alpha from consistent renders (3D/compositing) + +## Avoid when +- API access/credentials are unavailable +- The task does not involve media generation or background extraction + +# Inputs to Ask For (only if missing) +- Provider (OpenAI, Google, Alibaba) +- Model ID and task type (T2I, I2V, T2V) +- Prompt text and input image path (if I2V) +- Output size/aspect ratio, format, and count +- For transparency: native transparency vs background extraction +- For background extraction: paths to black/white/colored backgrounds and colored RGB (0-1) + +# Decision Flow +1. Image vs video? +2. If transparency required: + - Use GPT Image native transparency when possible. + - Only use 3-background extraction for consistent renders (3D/compositing). +3. Provider selection: + - OpenAI: best quality and transparency + - Google: fast general image/video + - Alibaba Wan: fewer restrictions when content is blocked elsewhere + +# Procedure +1. Gather inputs and pick provider/model. +2. Build the API request (use env vars for keys). +3. Submit, poll if required, and decode output. +4. Save outputs with clear filenames and verify results. + +# Transparent Image Generation (Recommended Approach) + +## Option 1: GPT Image Native Transparency (BEST) +GPT Image supports native transparency output: + +```bash +curl -X POST "https://api.openai.com/v1/images/generations" \ + -H "Authorization: Bearer ${OPENAI_API_KEY}" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "gpt-image-1.5", + "prompt": "A cute cartoon cat mascot", + "size": "1024x1024", + "quality": "high", + "background": "transparent", + "output_format": "png" + }' +``` + +Notes: +- `background: "transparent"` requires `output_format: "png"` or `"webp"` +- Returns base64 data in `data[0].b64_json` +- This is the only method that produces true transparency from a single generation + +## Option 2: Three-Background Extraction (Consistent Renders Only) +IMPORTANT LIMITATION: This only works when the foreground pixels are identical across renders: +- OK: 3D renders (Blender, Maya, etc.) +- OK: compositing software with controlled backgrounds +- OK: screenshots with different backgrounds +- NOT OK: generative AI (outputs differ every run) + +For 3D/compositing: +```bash +python3 scripts/extract_transparency.py \ + --black render_black.png \ + --white render_white.png \ + --colored render_red.png \ + --output result.png +``` + +## Option 3: AI Image + Manual Background Removal +For AI-generated images that need transparency: +1. Generate the image with any provider +2. Use a dedicated background removal tool (rembg, remove.bg API, etc.) + +# API Keys +These environment variables are used (automatically substituted during skill sync): +- `OPENAI_API_KEY` - GPT Image generations +- `GOOGLE_GENAI_API_KEY` - Gemini/Veo +- `DASHSCOPE_API_KEY` - Alibaba Wan 2.6 + +# Outputs / Definition of Done +- A clear, credential-safe request plan or script snippet +- For generation: submission, polling, and decode/download steps +- For transparency: verified RGBA output + +# Procedure References +- `references/alibaba-wan-api.md` for Wan 2.6 endpoints and parameters +- `references/gemini-banana-api.md` for Gemini image and Veo video +- `references/openai-gpt-image-api.md` for GPT Image endpoints +- `references/background-removal-3-bg.md` for 3-background alpha extraction + +# Model Quick Reference + +| Provider | Model | Use Case | +|----------|-------|----------| +| OpenAI | `gpt-image-1.5` | Best for transparent images, high quality | +| OpenAI | `gpt-image-1` | Image edits/inpainting | +| Google | `gemini-2.5-flash-image` | Fast image generation | +| Google | `veo-3.1-generate-preview` | Video generation | +| Alibaba | `wan2.6-t2v` | Text-to-video | +| Alibaba | `wan2.6-i2v` | Image-to-video | +| Alibaba | `wan2.6-image` | Image generation (fewer restrictions) | + +# Guardrails +- Do not embed or log API keys; use env var placeholders. +- Validate sizes/formats and rate limits. +- Use the correct transparency workflow for the source type. + +# References +- `references/alibaba-wan-api.md` +- `references/gemini-banana-api.md` +- `references/openai-gpt-image-api.md` +- `references/background-removal-3-bg.md` + +# Scripts +- `scripts/extract_transparency.py` - Extract RGBA from black/white/colored backgrounds. + Usage: `python3 scripts/extract_transparency.py --black img_black.png --white img_white.png --colored img_red.png --output result.png` diff --git a/skill/media-creation/references/alibaba-wan-api.md b/skill/media-creation/references/alibaba-wan-api.md new file mode 100644 index 0000000..e353858 --- /dev/null +++ b/skill/media-creation/references/alibaba-wan-api.md @@ -0,0 +1,98 @@ +# Alibaba Wan 2.6 (DashScope) API Notes + +Sources: Alibaba Cloud DashScope API references for Wan 2.6 text-to-video, image-to-video, and image generation. Credentials redacted. + +## Overview (latest models only) +- Text-to-video: `wan2.6-t2v` +- Image-to-video: `wan2.6-i2v` +- Image generation / editing: `wan2.6-image` + +## Auth and Base URL +- Base URL (Singapore): `https://dashscope-intl.aliyuncs.com/api/v1` +- Header: `Authorization: Bearer ${DASHSCOPE_API_KEY}` (placeholder; replaced at deploy time) +- Async header (for task-based flows): `X-DashScope-Async: enable` + +## Video Generation (Wan 2.6) + +Endpoint: +- `POST /services/aigc/video-generation/video-synthesis` + +Text-to-video (T2V) request shape: +```json +{ + "model": "wan2.6-t2v", + "input": { + "prompt": "", + "audio_url": "" + }, + "parameters": { + "size": "1280*720", + "duration": 5, + "prompt_extend": true + } +} +``` + +Image-to-video (I2V) request shape: +```json +{ + "model": "wan2.6-i2v", + "input": { + "prompt": "", + "img_url": "data:image/jpeg;base64,", + "audio_url": "" + }, + "parameters": { + "resolution": "1080P", + "duration": 5, + "prompt_extend": true, + "shot_type": "multi" + } +} +``` + +Notes: +- T2V uses `size` (e.g., `1280*720`); I2V uses `resolution` (720P/1080P). +- `duration` for Wan 2.6 is 5, 10, or 15 seconds. +- Multi-shot narrative is available via `shot_type: "multi"` when `prompt_extend` is true (Wan 2.6). + +Task flow: +- Response returns `output.task_id` and `output.task_status`. +- Poll `GET /tasks/{task_id}` until `SUCCEEDED`, then use `output.video_url`. + +## Image Generation (Wan 2.6) + +Two HTTP options are documented: +1) **Sync/SSE**: `POST /services/aigc/multimodal-generation/generation` +2) **Async**: `POST /services/aigc/image-generation/generation` with `X-DashScope-Async: enable` + +Request shape: +```json +{ + "model": "wan2.6-image", + "input": { + "messages": [ + {"role": "user", "content": [ + {"text": ""}, + {"image": ""} + ]} + ] + }, + "parameters": { + "size": "1024*1024", + "n": 1, + "prompt_extend": true, + "watermark": false + } +} +``` + +Async response returns a task ID; poll `GET /tasks/{task_id}` for result URLs. + +## Input Requirements (image fields) +- `image` or `img_url` can be a public URL or base64 data URI +- Data URI format: `data:{MIME_type};base64,{base64_data}` + +## Security +- Do not store API keys in repo. +- Use environment variables or a local config file ignored by Git. diff --git a/skill/media-creation/references/background-removal-3-bg.md b/skill/media-creation/references/background-removal-3-bg.md new file mode 100644 index 0000000..d341ddc --- /dev/null +++ b/skill/media-creation/references/background-removal-3-bg.md @@ -0,0 +1,119 @@ +# Background Removal (Three-Background Method) + +Script location: `scripts/extract_transparency.py` + +## When to Use This Method + +**This workflow is ONLY suitable for:** +- 3D renders (Blender, Maya, Cinema 4D, etc.) +- Compositing software (After Effects, Nuke, Fusion) +- Screenshots with controlled desktop backgrounds +- Any situation where you can render the EXACT same pixels on different backgrounds + +**This workflow does NOT work for:** +- AI-generated images (Gemini, DALL-E, Midjourney, etc.) +- Photos that need background removal +- Any source where the foreground changes between captures + +For AI-generated transparent images, use **GPT Image 1.5** with `background: "transparent"` instead. + +## Why It Doesn't Work with Generative AI + +The algorithm relies on this equation for each pixel: +``` +Result = ForegroundColor × Alpha + BackgroundColor × (1 - Alpha) +``` + +This only works when `ForegroundColor` and `Alpha` are IDENTICAL across all three images. Generative AI models produce different outputs each time, even with the same prompt. + +## Problem Setup + +Given three renders of the same image: +- Black background (B=0) +- White background (B=1) +- Colored background (default red: B=(1,0,0)) + +For a pixel with true color C and alpha A composited over background B: +``` +Result = C × A + B × (1 - A) +``` + +## Algorithm + +### Two-Background Alpha (Black + White) +From black and white backgrounds: +- Black: `R_black = C × A` +- White: `R_white = C × A + (1 - A)` + +Solve for alpha (per channel): +``` +A = 1 - (R_white - R_black) +``` + +Implementation: +- Compute alpha per channel, then average across RGB +- Clamp alpha to [0, 1] + +### Three-Background Refinement +The third (colored) background improves accuracy: + +For a colored background channel where component > 0.1: +``` +A = 1 - (R_colored - R_black) / B +``` + +Weighted average: +- 50% weight to black/white estimate +- 50% distributed across colored-channel estimates + +### Color Recovery +Color is recovered from the black background: +``` +C = R_black / A (with epsilon to avoid divide-by-zero) +``` + +If A is effectively zero, color is set to 0. + +## Usage + +```bash +# Full three-background extraction (best quality) +python3 scripts/extract_transparency.py \ + --black render_black.png \ + --white render_white.png \ + --colored render_red.png \ + --output result.png + +# Two-background extraction (simpler, still good) +python3 scripts/extract_transparency.py \ + --black render_black.png \ + --white render_white.png \ + --output result.png + +# Custom colored background (e.g., green) +python3 scripts/extract_transparency.py \ + --black render_black.png \ + --white render_white.png \ + --colored render_green.png \ + --colored-rgb 0.0 1.0 0.0 \ + --output result.png +``` + +## Output +- Combines RGB color and alpha into an RGBA PNG +- Creates a `_preview.png` with checkerboard background for visualization +- Prints alpha statistics (min, max, transparent/opaque pixel counts) + +## Two-Background Option (Quality Tradeoff) + +Using only black + white gives good results. Using black + colored (without white) is weaker: +- Colored background only provides alpha for its non-zero channels +- Example: red background gives no new information for green/blue channels + +**Recommendation**: Use black + white if limited to two images; use all three for best accuracy. + +## Technical Notes +- Images must have identical dimensions +- All inputs are normalized to [0, 1] float32 +- Colored background defaults to red `(1.0, 0.0, 0.0)` +- Output is saved as PNG with full transparency support diff --git a/skill/media-creation/references/gemini-banana-api.md b/skill/media-creation/references/gemini-banana-api.md new file mode 100644 index 0000000..7a16c00 --- /dev/null +++ b/skill/media-creation/references/gemini-banana-api.md @@ -0,0 +1,111 @@ +# Google Gemini / Veo API Notes + +Sources: Google AI for Developers docs (Gemini image generation and Veo video). + +## Overview (Current Models) + +### Image Generation +- `gemini-2.5-flash-image` - Fast, good quality (recommended for most uses) + +### Video Generation +- `veo-3.1-generate-preview` - Latest video generation (paid preview) + +## Auth and Base URL +- Base URL: `https://generativelanguage.googleapis.com/v1beta` +- Header: `X-goog-api-key: ${GOOGLE_GENAI_API_KEY}` + +## Image Generation + +Endpoint pattern: +- `POST /models/{model}:generateContent` + +### Basic Request +```bash +curl -X POST "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-image:generateContent" \ + -H "X-goog-api-key: ${GOOGLE_GENAI_API_KEY}" \ + -H "Content-Type: application/json" \ + -d '{ + "contents": [{ + "parts": [{"text": "A futuristic cityscape at sunset"}] + }], + "generationConfig": { + "responseModalities": ["IMAGE"], + "imageConfig": { + "imageSize": "4K", + "aspectRatio": "16:9" + } + } + }' +``` + +### Parameters +- `imageSize` values: `"1K"`, `"2K"`, `"4K"` (uppercase K) +- `aspectRatio` values: `1:1`, `2:3`, `3:2`, `3:4`, `4:3`, `9:16`, `16:9`, `21:9` + +### Response +```json +{ + "candidates": [{ + "content": { + "parts": [{ + "inlineData": { + "mimeType": "image/jpeg", + "data": "" + } + }] + } + }] +} +``` + +## Transparency Note + +**Gemini does NOT support native transparency output.** + +For transparent images, use one of these approaches: +1. **GPT Image 1.5** with `background: "transparent"` (recommended) +2. Generate with Gemini, then use a background removal tool +3. For 3D renders only: use the three-background extraction workflow + +The "generate variants on different backgrounds" approach does NOT work reliably with Gemini because each generation produces different results. + +## Video Generation (Veo 3.1) + +Uses long-running operations for video generation. + +### Python SDK Pattern +```python +from google import genai + +client = genai.Client(api_key="${GOOGLE_GENAI_API_KEY}") + +# Start generation +operation = client.models.generate_videos( + model="veo-3.1-generate-preview", + prompt="A time-lapse of clouds over mountains" +) + +# Poll until done +while not operation.done: + time.sleep(10) + operation = client.operations.get(operation.name) + +# Download result +video_data = operation.result.generated_videos[0].video +``` + +### REST Pattern +1. `POST /models/veo-3.1-generate-preview:predictLongRunning` +2. Poll `GET /{operation_name}` until `done=true` +3. Download from `response.generateVideoResponse.generatedSamples[0].video.uri` + +### Image-to-Video +Pass a starting image along with the prompt to create video from a still image. + +## Rate Limits +- Check current quotas in Google AI Studio +- Video generation has stricter limits than image generation + +## Security +- Never store API keys in code +- Use `${GOOGLE_GENAI_API_KEY}` placeholder (substituted during skill sync) diff --git a/skill/media-creation/references/openai-gpt-image-api.md b/skill/media-creation/references/openai-gpt-image-api.md new file mode 100644 index 0000000..46ac262 --- /dev/null +++ b/skill/media-creation/references/openai-gpt-image-api.md @@ -0,0 +1,62 @@ +# OpenAI GPT Image API (Latest) + +Sources: OpenAI API reference for Images and GPT Image 1.5 (Jan 1, 2026). Credentials redacted. + +## Latest Model +- `gpt-image-1.5` is the current GPT Image model and is supported by the Images API for generations. + +## Which API to Use +- **Images API** supports GPT Image models (`gpt-image-1.5`, `gpt-image-1`, `gpt-image-1-mini`) for **Generations**. +- **Images API Edits** only supports `gpt-image-1` and `dall-e-2` (not `gpt-image-1.5`). + +## Auth and Base URL +- Base URL: `https://api.openai.com/v1` +- Header: `Authorization: Bearer ${OPENAI_API_KEY}` (placeholder; replaced at deploy time) + +## Image API: Generate +Endpoint: +- `POST https://api.openai.com/v1/images/generations` + +Request shape: +```json +{ + "model": "gpt-image-1.5", + "prompt": "", + "n": 1, + "size": "1024x1024", + "quality": "high", + "background": "transparent" +} +``` + +Output: +- GPT image models return base64 image data in `data[0].b64_json`. + +Notes: +- `size` for GPT image models supports `1024x1024`, `1536x1024`, `1024x1536`, or `auto`. +- `quality` supports `low`, `medium`, `high`, or `auto` for GPT image models. +- `background` can be `transparent`, `opaque`, or `auto` (transparent requires `png` or `webp`). +- `output_format` supports `png`, `jpeg`, `webp` for GPT image models; GPT image models always return base64 (not URLs). +- `n` can be 1-10 for GPT image models. + +## Image API: Edit (Inpainting / Variants) +Endpoint: +- `POST https://api.openai.com/v1/images/edits` + +Typical multipart fields: +- `model=gpt-image-1` +- `image[]=@input.png` (one or more images) +- `mask=@mask.png` (optional; defines the area to replace) +- `prompt=...` + +Edits endpoint accepts image inputs and optional masks; response returns base64 output. + +Notes: +- For GPT image models, each input image must be `png`, `webp`, or `jpg` and under 50MB; up to 16 images. + +## Responses API (Image Generation Tool) +- Use a mainline model that supports the `image_generation` tool, and it will call a GPT Image model under the hood. + +## Security +- Do not store API keys in repo. +- Use environment variables or local config files ignored by Git. diff --git a/skill/media-creation/scripts/extract_transparency.py b/skill/media-creation/scripts/extract_transparency.py new file mode 100644 index 0000000..ec127e5 --- /dev/null +++ b/skill/media-creation/scripts/extract_transparency.py @@ -0,0 +1,374 @@ +#!/usr/bin/env python3 +""" +Transparency Extraction from Multiple Solid Background Images + +This script extracts true RGBA values from images rendered on different +solid color backgrounds (black, white, and optionally red). + +Mathematical basis: +For a pixel with true color C and alpha A, composited over background B: + Result = C * A + B * (1 - A) + +With black background (B=0): R_black = C * A +With white background (B=1): R_white = C * A + (1 - A) + +Solving: + R_white - R_black = 1 - A + A = 1 - (R_white - R_black) + C = R_black / A (when A > 0) + +The third background (red) provides additional constraints for better accuracy, +especially for pixels where the color is close to black or white. +""" + +from __future__ import annotations + +import argparse +import numpy as np +from PIL import Image +from pathlib import Path +from typing import Optional, Tuple + + +def load_image(path: Path) -> np.ndarray: + """Load image and convert to float32 array normalized to [0, 1].""" + img = Image.open(path).convert("RGB") + return np.array(img, dtype=np.float32) / 255.0 + + +def extract_alpha_from_two_backgrounds( + img_black: np.ndarray, + img_white: np.ndarray, +) -> np.ndarray: + """ + Extract alpha channel using black and white backgrounds. + + For each channel: + alpha = 1 - (white - black) + + We average across RGB channels for more robust alpha estimation. + """ + # Calculate alpha for each channel + alpha_per_channel = 1.0 - (img_white - img_black) + + # Average alpha across channels (they should be similar) + alpha = np.mean(alpha_per_channel, axis=2) + + # Clamp to valid range + alpha = np.clip(alpha, 0.0, 1.0) + + return alpha + + +def extract_alpha_with_three_backgrounds( + img_black: np.ndarray, + img_white: np.ndarray, + img_colored: np.ndarray, + bg_color: Tuple[float, float, float], +) -> np.ndarray: + """ + Extract alpha using three backgrounds for improved accuracy. + + Uses least squares fitting across all three backgrounds to find + the best alpha value that explains all observations. + """ + h, w, c = img_black.shape + + # Stack all observations: shape (3, H, W, C) + observations = np.stack([img_black, img_white, img_colored], axis=0) + + # Background colors: shape (3, C) + backgrounds = np.array([ + [0.0, 0.0, 0.0], # black + [1.0, 1.0, 1.0], # white + list(bg_color), # colored (e.g., red) + ], dtype=np.float32) + + # For each pixel, we want to find alpha that minimizes error + # Result_i = C * A + B_i * (1 - A) + # Rearranging: Result_i = C * A + B_i - B_i * A = B_i + A * (C - B_i) + + # From black and white, we can get a good initial estimate + alpha_initial = extract_alpha_from_two_backgrounds(img_black, img_white) + + # Refine using the colored background + # For colored bg: Result_colored = C * A + bg_color * (1 - A) + # We know C * A = img_black (from black bg) + # So: Result_colored = img_black + bg_color * (1 - A) + # Therefore: A = 1 - (Result_colored - img_black) / bg_color + + # Calculate alpha from each color channel of the colored background + bg_color_arr = np.array(bg_color, dtype=np.float32) + + # Only use channels where background color is significantly non-zero + alpha_estimates = [] + alpha_estimates.append(alpha_initial) + + for ch in range(3): + if bg_color_arr[ch] > 0.1: # Only use this channel if bg has significant color + alpha_ch = 1.0 - (img_colored[:, :, ch] - img_black[:, :, ch]) / bg_color_arr[ch] + alpha_ch = np.clip(alpha_ch, 0.0, 1.0) + alpha_estimates.append(alpha_ch) + + # Weighted average of all alpha estimates + # Give more weight to the black/white estimate as it's generally more reliable + alpha = alpha_estimates[0] * 0.5 + if len(alpha_estimates) > 1: + weight_per_colored = 0.5 / (len(alpha_estimates) - 1) + for i in range(1, len(alpha_estimates)): + alpha += alpha_estimates[i] * weight_per_colored + + return np.clip(alpha, 0.0, 1.0) + + +def extract_color( + img_black: np.ndarray, + alpha: np.ndarray, + epsilon: float = 1e-6, +) -> np.ndarray: + """ + Extract the true premultiplied color from the black background image. + + Since Result_black = C * A, we have C = Result_black / A + """ + h, w, c = img_black.shape + + # Expand alpha to match color dimensions + alpha_expanded = alpha[:, :, np.newaxis] + + # Avoid division by zero + safe_alpha = np.maximum(alpha_expanded, epsilon) + + # Extract color + color = img_black / safe_alpha + + # For fully transparent pixels, set color to 0 + color = np.where(alpha_expanded > epsilon, color, 0.0) + + # Clamp to valid range + color = np.clip(color, 0.0, 1.0) + + return color + + +def create_rgba_image(color: np.ndarray, alpha: np.ndarray) -> np.ndarray: + """Combine color and alpha into RGBA image.""" + h, w, _ = color.shape + + # Create RGBA array + rgba = np.zeros((h, w, 4), dtype=np.float32) + rgba[:, :, :3] = color + rgba[:, :, 3] = alpha + + return rgba + + +def save_rgba_image(rgba: np.ndarray, path: Path): + """Save RGBA array as PNG with transparency.""" + # Convert to uint8 + rgba_uint8 = (rgba * 255).astype(np.uint8) + + # Create PIL image and save + img = Image.fromarray(rgba_uint8) + img = img.convert("RGBA") + img.save(path, "PNG") + + +def extract_transparency( + black_path: Path, + white_path: Path, + colored_path: Optional[Path] = None, + colored_bg: Tuple[float, float, float] = (1.0, 0.0, 0.0), + output_path: Optional[Path] = None, +) -> np.ndarray: + """ + Main function to extract transparency from background images. + + Args: + black_path: Path to image on black background + white_path: Path to image on white background + colored_path: Optional path to image on colored background + colored_bg: RGB tuple (0-1) of the colored background + output_path: Path to save the result + + Returns: + RGBA numpy array of the extracted image + """ + print(f"Loading images...") + img_black = load_image(black_path) + img_white = load_image(white_path) + + print(f" Black: {img_black.shape}") + print(f" White: {img_white.shape}") + + # Verify dimensions match + if img_black.shape != img_white.shape: + raise ValueError("Black and white images must have the same dimensions") + + # Extract alpha + if colored_path is not None: + img_colored = load_image(colored_path) + print(f" Colored: {img_colored.shape}") + + if img_colored.shape != img_black.shape: + raise ValueError("All images must have the same dimensions") + + print(f"Extracting alpha using three backgrounds...") + alpha = extract_alpha_with_three_backgrounds( + img_black, img_white, img_colored, colored_bg + ) + else: + print(f"Extracting alpha using two backgrounds...") + alpha = extract_alpha_from_two_backgrounds(img_black, img_white) + + # Extract color + print(f"Extracting color...") + color = extract_color(img_black, alpha) + + # Combine into RGBA + rgba = create_rgba_image(color, alpha) + + # Print statistics + print(f"\nStatistics:") + print(f" Alpha range: [{alpha.min():.4f}, {alpha.max():.4f}]") + print(f" Fully transparent pixels: {np.sum(alpha < 0.01):,}") + print(f" Fully opaque pixels: {np.sum(alpha > 0.99):,}") + print(f" Semi-transparent pixels: {np.sum((alpha >= 0.01) & (alpha <= 0.99)):,}") + + # Save if output path provided + if output_path is not None: + print(f"\nSaving to {output_path}...") + save_rgba_image(rgba, output_path) + print(f"Done!") + + return rgba + + +def create_checkerboard(width: int, height: int, tile_size: int = 16) -> np.ndarray: + """Create a checkerboard pattern for transparency visualization.""" + # Create tile pattern + light = 0.9 + dark = 0.7 + + # Calculate number of tiles + tiles_x = (width + tile_size - 1) // tile_size + tiles_y = (height + tile_size - 1) // tile_size + + # Create pattern + pattern = np.zeros((tiles_y, tiles_x), dtype=np.float32) + pattern[0::2, 0::2] = light + pattern[1::2, 1::2] = light + pattern[0::2, 1::2] = dark + pattern[1::2, 0::2] = dark + + # Scale up to pixel size + checker = np.repeat(np.repeat(pattern, tile_size, axis=0), tile_size, axis=1) + checker = checker[:height, :width] + + # Make RGB + return np.stack([checker, checker, checker], axis=2) + + +def composite_over_checkerboard(rgba: np.ndarray, tile_size: int = 16) -> np.ndarray: + """Composite RGBA image over checkerboard for transparency visualization.""" + h, w = rgba.shape[:2] + checker = create_checkerboard(w, h, tile_size) + + color = rgba[:, :, :3] + alpha = rgba[:, :, 3:4] + + result = color * alpha + checker * (1 - alpha) + return result + + +def create_preview(rgba: np.ndarray, max_size: int = 512) -> Image.Image: + """Create a downscaled preview of the RGBA image with checkerboard background.""" + h, w = rgba.shape[:2] + scale = min(max_size / max(h, w), 1.0) + new_h, new_w = int(h * scale), int(w * scale) + + rgba_uint8 = (rgba * 255).astype(np.uint8) + img = Image.fromarray(rgba_uint8).convert("RGBA") + + if scale < 1.0: + img = img.resize((new_w, new_h), Image.Resampling.LANCZOS) + rgba_small = np.array(img, dtype=np.float32) / 255.0 + else: + rgba_small = rgba + + # Create checkerboard composite for visualization + checker_composite = composite_over_checkerboard(rgba_small, tile_size=8) + checker_uint8 = (checker_composite * 255).astype(np.uint8) + + return Image.fromarray(checker_uint8).convert("RGB") + + +def main(): + parser = argparse.ArgumentParser( + description="Extract transparency from images on solid backgrounds" + ) + parser.add_argument( + "--black", "-b", + type=Path, + default=Path(__file__).parent / "black.jpeg", + help="Path to image on black background" + ) + parser.add_argument( + "--white", "-w", + type=Path, + default=Path(__file__).parent / "white.jpeg", + help="Path to image on white background" + ) + parser.add_argument( + "--colored", "-c", + type=Path, + default=None, + help="Path to image on colored background (optional)" + ) + parser.add_argument( + "--colored-rgb", + type=float, + nargs=3, + default=[1.0, 0.0, 0.0], + metavar=("R", "G", "B"), + help="RGB values (0-1) of the colored background (default: 1 0 0 for red)" + ) + parser.add_argument( + "--output", "-o", + type=Path, + default=Path(__file__).parent / "output.png", + help="Output path for the transparent PNG" + ) + parser.add_argument( + "--preview-size", + type=int, + default=512, + help="Max size for preview images" + ) + + args = parser.parse_args() + + # Use red.jpeg as default colored image if it exists + if args.colored is None: + default_red = Path(__file__).parent / "red.jpeg" + if default_red.exists(): + args.colored = default_red + + rgba = extract_transparency( + black_path=args.black, + white_path=args.white, + colored_path=args.colored, + colored_bg=tuple(args.colored_rgb), + output_path=args.output, + ) + + # Create and save preview + preview_path = args.output.with_stem(args.output.stem + "_preview") + preview = create_preview(rgba, args.preview_size) + preview.save(preview_path, "PNG") + print(f"Preview saved to {preview_path}") + + +if __name__ == "__main__": + main() + diff --git a/skill/video-editing/SKILL.md b/skill/video-editing/SKILL.md new file mode 100644 index 0000000..49fdcd0 --- /dev/null +++ b/skill/video-editing/SKILL.md @@ -0,0 +1,385 @@ +--- +name: video-editing +description: "Convert, edit, and process video and audio files using ffmpeg and ffprobe. Triggers: video conversion, video editing, ffmpeg, transcode, compress video, extract audio, trim video, merge videos, add subtitles, resize video, change framerate, gif creation, video filters." +--- + +# Role: FFmpeg Editor +You transform video/audio with ffmpeg using the safest and most efficient commands. + +# Mission +Produce the requested output with minimal quality loss and clear, reproducible commands. + +# Operating Principles +1. Prefer stream copy when no re-encode is needed. +2. Preserve audio unless asked to remove or replace it. +3. Do not overwrite inputs; use a new output name by default. +4. Ask for missing details once, then proceed. +5. Verify output properties before calling it done. + +# Activation + +## Use when +- Convert video between formats (MP4, MOV, WebM, MKV, GIF) +- Compress or resize videos +- Extract or replace audio +- Trim, cut, concatenate, or stabilize +- Add subtitles, watermarks, or text overlays +- Apply filters or change framerate/resolution + +## Avoid when +- AI-based video generation (use media-creation instead) +- ffmpeg/ffprobe are not installed + +# Inputs to Ask For (only if missing) +- Input file path(s) +- Desired output format and path +- Target resolution, bitrate, or file size +- Keep audio? (yes/no) +- Frame rate or duration changes +- Subtitle file path (if adding subs) + +# Decision Flow +1. Can the job be done with stream copy (`-c copy`)? If yes, avoid re-encode. +2. If re-encoding, pick codec and quality (CRF/preset) based on size vs quality. +3. Decide audio handling (copy, re-encode, replace, remove). +4. For web delivery, add `-movflags +faststart`. + +# Procedure +1. Inspect inputs with ffprobe. +2. Draft the exact ffmpeg command and confirm outputs. +3. Run the command and verify duration, resolution, audio, and file size. +4. Provide the final command and output path in the response. + +# Outputs / Definition of Done +- Output file exists at the requested path +- Properties match the requested format and specs +- Any transforms (trim, resize, subtitles) are confirmed + +# Guardrails +- Never overwrite the input file. +- Use `-y` only when the output path is explicitly confirmed or safe to overwrite. +- Quote paths with spaces. + +# Quick Reference - Common Tasks + +## Get Video Information +```bash +ffprobe -v quiet -print_format json -show_format -show_streams "input.mp4" +``` + +## Convert Format +```bash +# MP4 to WebM (VP9 + Opus) +ffmpeg -i input.mp4 -c:v libvpx-vp9 -crf 30 -b:v 0 -c:a libopus output.webm + +# Any format to MP4 (H.264 + AAC - most compatible) +ffmpeg -i input.mov -c:v libx264 -preset medium -crf 23 -c:a aac -b:a 128k output.mp4 + +# Convert to H.265/HEVC (smaller file, good quality) +ffmpeg -i input.mp4 -c:v libx265 -crf 28 -c:a aac output_hevc.mp4 +``` + +## Compress Video +```bash +# Reduce file size (higher CRF = more compression, 18-28 is typical) +ffmpeg -i input.mp4 -c:v libx264 -crf 28 -preset slow -c:a aac -b:a 128k output.mp4 + +# Two-pass encoding for target file size +ffmpeg -i input.mp4 -c:v libx264 -b:v 1M -pass 1 -an -f null /dev/null && \ +ffmpeg -i input.mp4 -c:v libx264 -b:v 1M -pass 2 -c:a aac -b:a 128k output.mp4 +``` + +## Resize/Scale Video +```bash +# Scale to 1080p (maintain aspect ratio) +ffmpeg -i input.mp4 -vf "scale=-1:1080" -c:a copy output.mp4 + +# Scale to 720p width +ffmpeg -i input.mp4 -vf "scale=1280:-1" -c:a copy output.mp4 + +# Scale to exact dimensions (may stretch) +ffmpeg -i input.mp4 -vf "scale=1920:1080" output.mp4 + +# Scale with padding to fit exact dimensions +ffmpeg -i input.mp4 -vf "scale=1920:1080:force_original_aspect_ratio=decrease,pad=1920:1080:(ow-iw)/2:(oh-ih)/2" output.mp4 +``` + +## Trim/Cut Video +```bash +# Cut from timestamp to timestamp (fast, no re-encode) +ffmpeg -ss 00:01:30 -to 00:02:45 -i input.mp4 -c copy output.mp4 + +# Cut with re-encoding (more accurate timestamps) +ffmpeg -i input.mp4 -ss 00:01:30 -to 00:02:45 -c:v libx264 -c:a aac output.mp4 + +# Cut first N seconds +ffmpeg -i input.mp4 -t 30 -c copy output.mp4 + +# Skip first N seconds +ffmpeg -ss 10 -i input.mp4 -c copy output.mp4 +``` + +## Extract/Replace Audio +```bash +# Extract audio to MP3 +ffmpeg -i input.mp4 -vn -acodec libmp3lame -q:a 2 output.mp3 + +# Extract audio to WAV +ffmpeg -i input.mp4 -vn -acodec pcm_s16le output.wav + +# Remove audio from video +ffmpeg -i input.mp4 -c:v copy -an output.mp4 + +# Replace audio track +ffmpeg -i video.mp4 -i audio.mp3 -c:v copy -c:a aac -map 0:v:0 -map 1:a:0 output.mp4 + +# Add audio to video (mix with original) +ffmpeg -i video.mp4 -i music.mp3 -filter_complex "[0:a][1:a]amix=inputs=2:duration=first" -c:v copy output.mp4 +``` + +## Concatenate Videos +```bash +# Create a file list (files.txt): +# file 'video1.mp4' +# file 'video2.mp4' +# file 'video3.mp4' + +# Concatenate (same codec) +ffmpeg -f concat -safe 0 -i files.txt -c copy output.mp4 + +# Concatenate with re-encoding (different codecs/resolutions) +ffmpeg -f concat -safe 0 -i files.txt -c:v libx264 -c:a aac output.mp4 +``` + +## Create GIF +```bash +# Simple GIF (low quality) +ffmpeg -i input.mp4 -vf "fps=10,scale=480:-1" output.gif + +# High quality GIF with palette +ffmpeg -i input.mp4 -vf "fps=15,scale=480:-1:flags=lanczos,split[s0][s1];[s0]palettegen[p];[s1][p]paletteuse" output.gif + +# GIF from specific time range +ffmpeg -ss 00:00:05 -t 3 -i input.mp4 -vf "fps=15,scale=320:-1:flags=lanczos,split[s0][s1];[s0]palettegen[p];[s1][p]paletteuse" output.gif +``` + +## Change Framerate/Speed +```bash +# Change framerate +ffmpeg -i input.mp4 -filter:v fps=30 output.mp4 + +# Speed up video 2x +ffmpeg -i input.mp4 -filter:v "setpts=0.5*PTS" -filter:a "atempo=2.0" output.mp4 + +# Slow down video 0.5x +ffmpeg -i input.mp4 -filter:v "setpts=2.0*PTS" -filter:a "atempo=0.5" output.mp4 +``` + +## Add Subtitles +```bash +# Burn subtitles into video (hardcoded) +ffmpeg -i input.mp4 -vf "subtitles=subs.srt" output.mp4 + +# Add subtitle stream (soft subs) +ffmpeg -i input.mp4 -i subs.srt -c:v copy -c:a copy -c:s mov_text output.mp4 +``` + +## Add Text/Watermark +```bash +# Add text overlay +ffmpeg -i input.mp4 -vf "drawtext=text='Hello World':fontsize=24:fontcolor=white:x=10:y=10" output.mp4 + +# Add image watermark +ffmpeg -i input.mp4 -i watermark.png -filter_complex "overlay=10:10" output.mp4 + +# Add watermark bottom-right corner +ffmpeg -i input.mp4 -i watermark.png -filter_complex "overlay=W-w-10:H-h-10" output.mp4 +``` + +## Video Filters +```bash +# Rotate video +ffmpeg -i input.mp4 -vf "transpose=1" output.mp4 # 90 clockwise +ffmpeg -i input.mp4 -vf "transpose=2" output.mp4 # 90 counter-clockwise +ffmpeg -i input.mp4 -vf "hflip" output.mp4 # horizontal flip +ffmpeg -i input.mp4 -vf "vflip" output.mp4 # vertical flip + +# Crop video (width:height:x:y) +ffmpeg -i input.mp4 -vf "crop=640:480:100:50" output.mp4 + +# Crop to center square +ffmpeg -i input.mp4 -vf "crop=min(iw\,ih):min(iw\,ih)" output.mp4 + +# Blur video +ffmpeg -i input.mp4 -vf "boxblur=5:1" output.mp4 + +# Sharpen video +ffmpeg -i input.mp4 -vf "unsharp=5:5:1.0:5:5:0.0" output.mp4 + +# Adjust brightness/contrast/saturation +ffmpeg -i input.mp4 -vf "eq=brightness=0.1:contrast=1.2:saturation=1.3" output.mp4 + +# Denoise video +ffmpeg -i input.mp4 -vf "hqdn3d=4:3:6:4.5" output.mp4 + +# Color correction (curves) +ffmpeg -i input.mp4 -vf "curves=preset=lighter" output.mp4 +``` + +## Video Stabilization +```bash +# Two-pass stabilization +ffmpeg -i input.mp4 -vf vidstabdetect -f null - +ffmpeg -i input.mp4 -vf vidstabtransform=smoothing=10 output.mp4 +``` + +## Green Screen / Chroma Key +```bash +# Remove green background +ffmpeg -i greenscreen.mp4 -vf "chromakey=0x00FF00:0.1:0.2" -c:v png output.mov + +# Replace green screen with another video +ffmpeg -i background.mp4 -i greenscreen.mp4 -filter_complex "[1:v]chromakey=0x00FF00:0.1:0.2[fg];[0:v][fg]overlay[out]" -map "[out]" output.mp4 +``` + +## Extract Frames +```bash +# Extract all frames as images +ffmpeg -i input.mp4 frame_%04d.png + +# Extract 1 frame per second +ffmpeg -i input.mp4 -vf fps=1 frame_%04d.png + +# Extract single frame at timestamp +ffmpeg -ss 00:00:10 -i input.mp4 -frames:v 1 frame.png + +# Extract thumbnail/poster +ffmpeg -i input.mp4 -ss 00:00:01 -frames:v 1 thumbnail.jpg +``` + +## Create Video from Images +```bash +# Images to video (image sequence) +ffmpeg -framerate 30 -i frame_%04d.png -c:v libx264 -pix_fmt yuv420p output.mp4 + +# Single image to video with duration +ffmpeg -loop 1 -i image.png -c:v libx264 -t 10 -pix_fmt yuv420p output.mp4 +``` + +## Screen Recording +```bash +# macOS screen capture +ffmpeg -f avfoundation -i "1" -r 30 -c:v libx264 output.mp4 + +# List available devices (macOS) +ffmpeg -f avfoundation -list_devices true -i "" +``` + +## Hardware Accelerated Encoding (macOS) +```bash +# H.264 with VideoToolbox (much faster) +ffmpeg -i input.mp4 -c:v h264_videotoolbox -b:v 5M output.mp4 + +# HEVC with VideoToolbox +ffmpeg -i input.mp4 -c:v hevc_videotoolbox -b:v 5M output.mp4 + +# Check available hardware encoders +ffmpeg -encoders | grep videotoolbox +``` + +## Batch Processing +```bash +# Convert all MP4 files in a directory to WebM +for f in *.mp4; do ffmpeg -i "$f" -c:v libvpx-vp9 -crf 30 -b:v 0 -c:a libopus "${f%.mp4}.webm"; done + +# Extract thumbnails from multiple videos +for f in *.mp4; do ffmpeg -i "$f" -ss 00:00:01 -frames:v 1 "${f%.mp4}_thumb.jpg"; done + +# Compress all videos in directory +for f in *.mp4; do ffmpeg -i "$f" -c:v libx264 -crf 28 -preset fast -c:a aac "compressed_$f"; done +``` + +# Codec Quick Reference + +| Codec | Encoder | Use Case | +|-------|---------|----------| +| H.264 | `libx264` | Most compatible, good quality/size | +| H.265/HEVC | `libx265` | Better compression, newer devices | +| VP9 | `libvpx-vp9` | WebM format, web streaming | +| AV1 | `libaom-av1` / `libsvtav1` | Best compression, slow encode | +| ProRes | `prores_ks` | Professional editing, large files | +| AAC | `aac` | Standard audio codec | +| MP3 | `libmp3lame` | Legacy audio format | +| Opus | `libopus` | Modern audio, web streaming | + +# Quality Presets + +## CRF (Constant Rate Factor) +- 0 = Lossless +- 18 = Visually lossless +- 23 = Default (good balance) +- 28 = Lower quality, smaller file +- 51 = Worst quality + +## Encoding Presets (libx264/libx265) +- `ultrafast` - Fastest, largest file +- `fast` - Good balance for quick encodes +- `medium` - Default +- `slow` - Better compression +- `veryslow` - Best compression, very slow + +# Common Issues & Solutions + +## Audio/Video Sync Issues +```bash +# Re-encode with fixed timestamps +ffmpeg -i input.mp4 -c:v libx264 -c:a aac -async 1 output.mp4 +``` + +## Variable Frame Rate (VFR) to Constant Frame Rate (CFR) +```bash +ffmpeg -i input.mp4 -vsync cfr -r 30 output.mp4 +``` + +## Fix Rotation Metadata +```bash +ffmpeg -i input.mp4 -c copy -metadata:s:v:0 rotate=0 output.mp4 +``` + +## Convert for Web/HTML5 +```bash +# MP4 for web (H.264 baseline, compatible with all browsers) +ffmpeg -i input.mp4 -c:v libx264 -profile:v baseline -level 3.0 -pix_fmt yuv420p -c:a aac -movflags +faststart output.mp4 +``` + +# Execution Notes +1. Analyze the input with ffprobe. +2. Prefer `-c copy` when no re-encode is needed. +3. Use a short clip test for long jobs. +4. Use `-y` only when the output path is confirmed safe to overwrite. + +# Best Practices +- Quote file paths with spaces: `ffmpeg -i "my video.mp4" output.mp4` +- Use `-hide_banner` for cleaner output: `ffmpeg -hide_banner -i input.mp4 ...` +- Preserve metadata when copying: `ffmpeg -i input.mp4 -c copy -map_metadata 0 output.mp4` +- For web videos, add `-movflags +faststart` for progressive playback +- Check codec support before encoding: `ffmpeg -encoders | grep ` + +# Error Handling +```bash +# Check if ffmpeg is installed +command -v ffmpeg >/dev/null 2>&1 || { echo "ffmpeg not installed"; exit 1; } + +# Verify input file exists +[ -f "input.mp4" ] || { echo "Input file not found"; exit 1; } + +# Check supported formats +ffmpeg -formats | grep + +# Verbose output for debugging +ffmpeg -v verbose -i input.mp4 output.mp4 +``` + +# References +- `references/ffmpeg-filters.md` - Comprehensive filter reference +- `references/ffmpeg-codecs.md` - Detailed codec information diff --git a/skill/video-editing/references/ffmpeg-codecs.md b/skill/video-editing/references/ffmpeg-codecs.md new file mode 100644 index 0000000..80f09c2 --- /dev/null +++ b/skill/video-editing/references/ffmpeg-codecs.md @@ -0,0 +1,308 @@ +# FFmpeg Codecs Reference + +This document provides detailed information about video and audio codecs available in ffmpeg. + +## Video Codecs + +### H.264 (libx264) - Most Compatible + +The most widely supported video codec. Works on virtually all devices and platforms. + +```bash +# Basic encoding +ffmpeg -i input.mp4 -c:v libx264 -crf 23 output.mp4 + +# High quality +ffmpeg -i input.mp4 -c:v libx264 -crf 18 -preset slow output.mp4 + +# Fast encoding +ffmpeg -i input.mp4 -c:v libx264 -crf 23 -preset fast output.mp4 + +# Web compatible (baseline profile) +ffmpeg -i input.mp4 -c:v libx264 -profile:v baseline -level 3.0 -pix_fmt yuv420p output.mp4 + +# High profile for best quality +ffmpeg -i input.mp4 -c:v libx264 -profile:v high -level 4.1 output.mp4 +``` + +**Presets** (speed vs compression): +- `ultrafast` - Fastest encoding, largest file +- `superfast` +- `veryfast` +- `faster` +- `fast` +- `medium` - Default +- `slow` +- `slower` +- `veryslow` - Best compression, slowest + +**CRF Values** (quality): +- 0 = Lossless +- 18 = Visually lossless +- 23 = Default +- 28 = Smaller file +- 51 = Worst quality + +**Profiles**: +- `baseline` - Most compatible, no B-frames +- `main` - Good balance +- `high` - Best quality + +### H.265/HEVC (libx265) - Better Compression + +50% smaller files than H.264 at same quality. Requires more processing power. + +```bash +# Basic encoding +ffmpeg -i input.mp4 -c:v libx265 -crf 28 output.mp4 + +# High quality +ffmpeg -i input.mp4 -c:v libx265 -crf 22 -preset slow output.mp4 + +# 10-bit encoding (HDR support) +ffmpeg -i input.mp4 -c:v libx265 -crf 22 -pix_fmt yuv420p10le output.mp4 +``` + +**Note**: CRF values differ from x264. Add ~6 to get equivalent quality (x264 CRF 23 ≈ x265 CRF 28). + +### VP9 (libvpx-vp9) - WebM Format + +Open source codec used by YouTube. Good for web streaming. + +```bash +# Basic encoding +ffmpeg -i input.mp4 -c:v libvpx-vp9 -crf 30 -b:v 0 output.webm + +# Two-pass encoding (recommended) +ffmpeg -i input.mp4 -c:v libvpx-vp9 -b:v 2M -pass 1 -an -f null /dev/null && \ +ffmpeg -i input.mp4 -c:v libvpx-vp9 -b:v 2M -pass 2 -c:a libopus output.webm + +# High quality +ffmpeg -i input.mp4 -c:v libvpx-vp9 -crf 20 -b:v 0 -deadline good -cpu-used 2 output.webm +``` + +### AV1 (libaom-av1, libsvtav1) - Best Compression + +Newest codec with best compression. Slow to encode. + +```bash +# Using libaom-av1 (slow but high quality) +ffmpeg -i input.mp4 -c:v libaom-av1 -crf 30 -cpu-used 4 output.mp4 + +# Using SVT-AV1 (faster) +ffmpeg -i input.mp4 -c:v libsvtav1 -crf 30 -preset 6 output.mp4 + +# Using rav1e +ffmpeg -i input.mp4 -c:v librav1e -crf 30 output.mp4 +``` + +### ProRes (prores_ks) - Professional Editing + +Lossless/near-lossless codec for editing. Large files. + +```bash +# ProRes 422 (standard) +ffmpeg -i input.mp4 -c:v prores_ks -profile:v 2 output.mov + +# ProRes 422 HQ (high quality) +ffmpeg -i input.mp4 -c:v prores_ks -profile:v 3 output.mov + +# ProRes 4444 (with alpha channel) +ffmpeg -i input.mp4 -c:v prores_ks -profile:v 4 -pix_fmt yuva444p10le output.mov +``` + +**Profiles**: +- 0 = ProRes 422 Proxy +- 1 = ProRes 422 LT +- 2 = ProRes 422 +- 3 = ProRes 422 HQ +- 4 = ProRes 4444 +- 5 = ProRes 4444 XQ + +### DNxHD/DNxHR - Avid Professional + +```bash +# DNxHD for 1080p +ffmpeg -i input.mp4 -c:v dnxhd -profile:v dnxhr_hq output.mov + +# DNxHR for higher resolutions +ffmpeg -i input.mp4 -c:v dnxhd -profile:v dnxhr_hqx output.mov +``` + +### Hardware Accelerated Encoding + +#### macOS VideoToolbox (H.264/HEVC) +```bash +# H.264 +ffmpeg -i input.mp4 -c:v h264_videotoolbox -b:v 5M output.mp4 + +# HEVC +ffmpeg -i input.mp4 -c:v hevc_videotoolbox -b:v 5M output.mp4 +``` + +#### NVIDIA NVENC +```bash +# H.264 +ffmpeg -i input.mp4 -c:v h264_nvenc -preset p4 -cq 23 output.mp4 + +# HEVC +ffmpeg -i input.mp4 -c:v hevc_nvenc -preset p4 -cq 28 output.mp4 +``` + +#### Intel QuickSync +```bash +# H.264 +ffmpeg -i input.mp4 -c:v h264_qsv -global_quality 23 output.mp4 + +# HEVC +ffmpeg -i input.mp4 -c:v hevc_qsv -global_quality 28 output.mp4 +``` + +## Audio Codecs + +### AAC - Most Compatible + +```bash +# Default AAC +ffmpeg -i input.mp4 -c:a aac -b:a 128k output.mp4 + +# High quality +ffmpeg -i input.mp4 -c:a aac -b:a 256k output.mp4 + +# Variable bitrate +ffmpeg -i input.mp4 -c:a aac -q:a 2 output.mp4 +``` + +### MP3 (libmp3lame) + +```bash +# Constant bitrate +ffmpeg -i input.wav -c:a libmp3lame -b:a 320k output.mp3 + +# Variable bitrate (quality 0-9, lower is better) +ffmpeg -i input.wav -c:a libmp3lame -q:a 2 output.mp3 +``` + +**VBR Quality**: +- 0 = ~245 kbps +- 2 = ~190 kbps +- 4 = ~165 kbps +- 6 = ~130 kbps + +### Opus (libopus) - Best Quality + +Modern codec, best quality at low bitrates. + +```bash +# VoIP quality +ffmpeg -i input.wav -c:a libopus -b:a 64k output.opus + +# Music quality +ffmpeg -i input.wav -c:a libopus -b:a 128k output.opus + +# High quality +ffmpeg -i input.wav -c:a libopus -b:a 256k output.opus +``` + +### Vorbis (libvorbis) - OGG Format + +```bash +ffmpeg -i input.wav -c:a libvorbis -q:a 5 output.ogg +``` + +### FLAC - Lossless + +```bash +# Standard compression +ffmpeg -i input.wav -c:a flac output.flac + +# Maximum compression +ffmpeg -i input.wav -c:a flac -compression_level 12 output.flac +``` + +### WAV/PCM - Uncompressed + +```bash +# 16-bit PCM +ffmpeg -i input.mp3 -c:a pcm_s16le output.wav + +# 24-bit PCM +ffmpeg -i input.mp3 -c:a pcm_s24le output.wav + +# 32-bit float +ffmpeg -i input.mp3 -c:a pcm_f32le output.wav +``` + +## Container Formats + +### MP4 (.mp4) +- Video: H.264, H.265, AV1 +- Audio: AAC, MP3, AC3 +- Most compatible format + +```bash +# Enable fast start for streaming +ffmpeg -i input.mp4 -c copy -movflags +faststart output.mp4 +``` + +### MKV (.mkv) +- Supports almost all codecs +- Great for archiving +- Less device compatibility + +### WebM (.webm) +- Video: VP8, VP9, AV1 +- Audio: Vorbis, Opus +- Web-optimized + +### MOV (.mov) +- Apple's container format +- Video: H.264, ProRes, HEVC +- Audio: AAC, ALAC + +### AVI (.avi) +- Legacy format +- Limited codec support +- Not recommended for new content + +## Codec Comparison Table + +| Codec | Quality | File Size | Encode Speed | Compatibility | +|-------|---------|-----------|--------------|---------------| +| H.264 | Good | Medium | Fast | Excellent | +| H.265 | Better | Small | Slow | Good | +| VP9 | Better | Small | Slow | Web only | +| AV1 | Best | Smallest | Very slow | Growing | +| ProRes | Excellent | Very large | Fast | Apple/Pro | + +## Choosing the Right Codec + +### For Social Media/Web +```bash +# H.264 with fast start (most compatible) +ffmpeg -i input.mp4 -c:v libx264 -crf 23 -c:a aac -b:a 128k -movflags +faststart output.mp4 +``` + +### For Archiving +```bash +# H.265 for smaller files +ffmpeg -i input.mp4 -c:v libx265 -crf 22 -preset slow -c:a flac output.mkv +``` + +### For Editing/Post-Production +```bash +# ProRes for quality +ffmpeg -i input.mp4 -c:v prores_ks -profile:v 3 -c:a pcm_s16le output.mov +``` + +### For Streaming +```bash +# HLS with multiple bitrates +ffmpeg -i input.mp4 -c:v libx264 -crf 23 -c:a aac -f hls -hls_time 10 -hls_list_size 0 output.m3u8 +``` + +### For Smallest File Size +```bash +# AV1 (if time permits) +ffmpeg -i input.mp4 -c:v libsvtav1 -crf 35 -preset 4 -c:a libopus -b:a 96k output.mp4 +``` diff --git a/skill/video-editing/references/ffmpeg-filters.md b/skill/video-editing/references/ffmpeg-filters.md new file mode 100644 index 0000000..a68d387 --- /dev/null +++ b/skill/video-editing/references/ffmpeg-filters.md @@ -0,0 +1,455 @@ +# FFmpeg Video Filters Reference + +This document provides detailed examples of commonly used ffmpeg video filters. + +## Filter Syntax + +Filters are applied using the `-vf` (video filter) or `-af` (audio filter) flags, or `-filter_complex` for complex filtergraphs. + +```bash +# Single filter +ffmpeg -i input.mp4 -vf "filtername=param1=value1:param2=value2" output.mp4 + +# Multiple filters (chained) +ffmpeg -i input.mp4 -vf "filter1,filter2,filter3" output.mp4 + +# Complex filtergraph (multiple inputs/outputs) +ffmpeg -i input1.mp4 -i input2.mp4 -filter_complex "[0:v][1:v]overlay[out]" -map "[out]" output.mp4 +``` + +## Video Transformation Filters + +### scale - Resize Video +```bash +# Scale to width 1280, auto height (maintain aspect ratio) +-vf "scale=1280:-1" + +# Scale to height 720, auto width +-vf "scale=-1:720" + +# Scale to exact dimensions +-vf "scale=1920:1080" + +# Scale with high quality algorithm +-vf "scale=1280:720:flags=lanczos" + +# Scale to fit within bounds (no stretch) +-vf "scale=1920:1080:force_original_aspect_ratio=decrease" + +# Scale to fill bounds (may crop) +-vf "scale=1920:1080:force_original_aspect_ratio=increase" + +# Scale to even dimensions (required for many codecs) +-vf "scale=trunc(iw/2)*2:trunc(ih/2)*2" +``` + +### crop - Crop Video +```bash +# Crop to width:height starting at x:y +-vf "crop=640:480:100:50" + +# Crop center region +-vf "crop=640:480" + +# Crop to 16:9 aspect ratio (center) +-vf "crop=ih*16/9:ih" + +# Crop to square (center) +-vf "crop=min(iw\,ih):min(iw\,ih)" + +# Remove 10 pixels from each edge +-vf "crop=iw-20:ih-20:10:10" +``` + +### pad - Add Padding/Letterbox +```bash +# Add black bars to fit 16:9 +-vf "pad=1920:1080:(ow-iw)/2:(oh-ih)/2:black" + +# Add padding to top and bottom +-vf "pad=iw:ih+100:0:50:black" + +# Add colored padding +-vf "pad=1920:1080:(ow-iw)/2:(oh-ih)/2:color=white" +``` + +### transpose - Rotate Video +```bash +# Rotate 90 degrees clockwise +-vf "transpose=1" + +# Rotate 90 degrees counter-clockwise +-vf "transpose=2" + +# Rotate 90 clockwise and flip vertically +-vf "transpose=3" + +# Rotate 180 degrees +-vf "transpose=1,transpose=1" + +# Horizontal flip (mirror) +-vf "hflip" + +# Vertical flip +-vf "vflip" + +# Rotate arbitrary angle (in radians, with black background) +-vf "rotate=PI/4:fillcolor=black" + +# Rotate arbitrary angle (in degrees) +-vf "rotate=45*PI/180" +``` + +## Color and Enhancement Filters + +### eq - Brightness/Contrast/Saturation +```bash +# Adjust brightness (-1.0 to 1.0, default 0) +-vf "eq=brightness=0.2" + +# Adjust contrast (0 to 2.0, default 1) +-vf "eq=contrast=1.3" + +# Adjust saturation (0 to 3.0, default 1) +-vf "eq=saturation=1.5" + +# Adjust gamma (0.1 to 10, default 1) +-vf "eq=gamma=1.2" + +# Combined adjustments +-vf "eq=brightness=0.1:contrast=1.2:saturation=1.3:gamma=0.9" +``` + +### curves - Color Curves +```bash +# Apply preset +-vf "curves=preset=lighter" +-vf "curves=preset=darker" +-vf "curves=preset=increase_contrast" +-vf "curves=preset=strong_contrast" +-vf "curves=preset=vintage" +-vf "curves=preset=negative" + +# Custom curve (all channels) +-vf "curves=all='0/0 0.5/0.4 1/1'" + +# Per-channel curves +-vf "curves=red='0/0 0.5/0.6 1/1':green='0/0 1/1':blue='0/0 0.5/0.4 1/1'" +``` + +### colorbalance - Color Balance +```bash +# Adjust shadows/midtones/highlights for RGB +-vf "colorbalance=rs=0.3:gs=-0.1:bs=0.1:rm=0.1:gm=-0.1:bm=0.1" +``` + +### colortemperature - White Balance +```bash +# Warmer (higher temperature) +-vf "colortemperature=temperature=7000" + +# Cooler (lower temperature) +-vf "colortemperature=temperature=4000" +``` + +### vibrance - Saturation Enhancement +```bash +# Boost vibrance (avoids oversaturating already saturated colors) +-vf "vibrance=intensity=0.5" +``` + +### hue - Hue/Saturation Adjustment +```bash +# Rotate hue (degrees) +-vf "hue=h=90" + +# Adjust saturation +-vf "hue=s=2" + +# Combined +-vf "hue=h=30:s=1.5" +``` + +## Blur and Sharpen Filters + +### boxblur - Box Blur +```bash +# Simple blur +-vf "boxblur=5:1" + +# Separate horizontal and vertical +-vf "boxblur=luma_radius=3:luma_power=2:chroma_radius=2:chroma_power=1" +``` + +### gblur - Gaussian Blur +```bash +# Gaussian blur +-vf "gblur=sigma=10" +``` + +### unsharp - Sharpen/Blur +```bash +# Sharpen +-vf "unsharp=5:5:1.0:5:5:0.0" + +# Strong sharpen +-vf "unsharp=7:7:2.5:7:7:1.0" + +# Blur (negative values) +-vf "unsharp=5:5:-1.0:5:5:-1.0" +``` + +### cas - Contrast Adaptive Sharpen +```bash +# Subtle sharpening +-vf "cas=strength=0.5" + +# Strong sharpening +-vf "cas=strength=1.0" +``` + +## Noise and Denoise Filters + +### hqdn3d - High Quality Denoise +```bash +# Light denoise +-vf "hqdn3d=2:2:3:3" + +# Medium denoise +-vf "hqdn3d=4:3:6:4.5" + +# Strong denoise +-vf "hqdn3d=8:6:12:9" +``` + +### nlmeans - Non-Local Means Denoise +```bash +# Standard denoise +-vf "nlmeans=s=3.0:p=7:r=15" +``` + +### noise - Add Noise +```bash +# Add film grain +-vf "noise=alls=20:allf=t+u" +``` + +## Overlay and Composition Filters + +### overlay - Composite Videos/Images +```bash +# Overlay at position +-filter_complex "[0:v][1:v]overlay=10:10" + +# Overlay centered +-filter_complex "[0:v][1:v]overlay=(W-w)/2:(H-h)/2" + +# Overlay bottom-right corner +-filter_complex "[0:v][1:v]overlay=W-w-10:H-h-10" + +# Overlay with transparency +-filter_complex "[0:v][1:v]overlay=10:10:format=auto" +``` + +### drawtext - Text Overlay +```bash +# Simple text +-vf "drawtext=text='Hello World':fontsize=24:fontcolor=white:x=10:y=10" + +# Centered text +-vf "drawtext=text='Centered':fontsize=48:fontcolor=white:x=(w-text_w)/2:y=(h-text_h)/2" + +# Text with background box +-vf "drawtext=text='With Box':fontsize=24:fontcolor=white:box=1:boxcolor=black@0.5:boxborderw=5:x=10:y=10" + +# Custom font +-vf "drawtext=text='Custom Font':fontfile=/path/to/font.ttf:fontsize=36:fontcolor=yellow:x=100:y=100" + +# Timecode overlay +-vf "drawtext=text='%{pts\\:hms}':fontsize=24:fontcolor=white:x=10:y=10" + +# Frame number +-vf "drawtext=text='%{frame_num}':fontsize=24:fontcolor=white:x=10:y=10" +``` + +### drawbox - Draw Rectangle +```bash +# Draw box +-vf "drawbox=x=100:y=100:w=200:h=150:color=red@0.5:t=fill" + +# Draw border only +-vf "drawbox=x=100:y=100:w=200:h=150:color=blue:t=5" +``` + +## Chroma Key Filters + +### chromakey - Remove Color (YUV) +```bash +# Remove green screen +-vf "chromakey=0x00FF00:0.1:0.2" + +# Remove blue screen +-vf "chromakey=0x0000FF:0.1:0.2" +``` + +### colorkey - Remove Color (RGB) +```bash +# Remove green +-vf "colorkey=0x00FF00:0.1:0.2" +``` + +## Time Filters + +### setpts - Change Speed +```bash +# 2x speed (half duration) +-vf "setpts=0.5*PTS" + +# 0.5x speed (double duration) +-vf "setpts=2.0*PTS" + +# Reverse video +-vf "reverse" +``` + +### fps - Change Framerate +```bash +# Set output framerate +-vf "fps=30" + +# Convert 60fps to 24fps with frame blending +-vf "fps=24" +``` + +### trim - Cut Video +```bash +# Trim to range (in seconds) +-vf "trim=start=10:end=20,setpts=PTS-STARTPTS" + +# Trim by frame count +-vf "trim=start_frame=100:end_frame=200,setpts=PTS-STARTPTS" +``` + +### loop - Loop Video +```bash +# Loop 5 times +-vf "loop=loop=5:size=1000:start=0" +``` + +## Transition Filters + +### xfade - Cross Fade +```bash +# Cross fade between two videos +-filter_complex "[0:v][1:v]xfade=transition=fade:duration=1:offset=4[v]" + +# Available transitions: fade, wipeleft, wiperight, wipeup, wipedown, slideleft, slideright, slideup, slidedown, circlecrop, rectcrop, distance, fadeblack, fadewhite, radial, smoothleft, smoothright, smoothup, smoothdown, circleopen, circleclose, vertopen, vertclose, horzopen, horzclose, dissolve, pixelize, diagtl, diagtr, diagbl, diagbr, hlslice, hrslice, vuslice, vdslice, hblur, fadegrays, wipetl, wipetr, wipebl, wipebr, squeezeh, squeezev, zoomin +``` + +### fade - Fade In/Out +```bash +# Fade in (first 30 frames) +-vf "fade=t=in:st=0:d=1" + +# Fade out (last second) +-vf "fade=t=out:st=9:d=1" + +# Both fade in and out +-vf "fade=t=in:st=0:d=0.5,fade=t=out:st=9.5:d=0.5" +``` + +## Edge Detection and Artistic + +### edgedetect - Edge Detection +```bash +# Basic edge detection +-vf "edgedetect" + +# Colored edges +-vf "edgedetect=mode=colormix" +``` + +### negate - Invert Colors +```bash +-vf "negate" +``` + +### vignette - Vignette Effect +```bash +# Add vignette +-vf "vignette" + +# Custom vignette +-vf "vignette=PI/4" +``` + +### deshake - Video Stabilization +```bash +# Basic stabilization +-vf "deshake" +``` + +### vidstab - Advanced Stabilization (Two-Pass) +```bash +# Pass 1: Analyze +ffmpeg -i input.mp4 -vf vidstabdetect=shakiness=10:accuracy=15 -f null - + +# Pass 2: Transform +ffmpeg -i input.mp4 -vf vidstabtransform=smoothing=30:zoom=5 output.mp4 +``` + +## Audio Filters + +### volume - Adjust Volume +```bash +# Double volume +-af "volume=2.0" + +# Half volume +-af "volume=0.5" + +# Volume in dB +-af "volume=3dB" +``` + +### atempo - Change Audio Speed +```bash +# 2x speed +-af "atempo=2.0" + +# 0.5x speed +-af "atempo=0.5" + +# More than 2x (chain filters) +-af "atempo=2.0,atempo=2.0" # 4x speed +``` + +### afade - Audio Fade +```bash +# Fade in +-af "afade=t=in:st=0:d=3" + +# Fade out +-af "afade=t=out:st=7:d=3" +``` + +### loudnorm - Loudness Normalization +```bash +# Normalize to broadcast standards +-af "loudnorm=I=-16:TP=-1.5:LRA=11" +``` + +### aecho - Add Echo +```bash +# Simple echo +-af "aecho=0.8:0.9:1000:0.3" +``` + +### equalizer - Audio EQ +```bash +# Boost bass +-af "equalizer=f=100:width_type=o:width=2:g=5" + +# Cut high frequencies +-af "equalizer=f=8000:width_type=o:width=2:g=-10" +``` diff --git a/workspace-template/basic-ubuntu.json b/workspace-template/basic-ubuntu.json index 0920480..99d51dc 100644 --- a/workspace-template/basic-ubuntu.json +++ b/workspace-template/basic-ubuntu.json @@ -4,5 +4,5 @@ "distro": "ubuntu-noble", "skills": [], "env_vars": {}, - "init_script": "#!/usr/bin/env bash\nset -euo pipefail\n\nexport DEBIAN_FRONTEND=noninteractive\n\nif command -v apt-get >/dev/null 2>&1; then\n apt-get update\n apt-get install -y --no-install-recommends ca-certificates curl iproute2 iptables busybox git\n git config --global user.name \"Thomas Marchand (agent)\"\n git config --global user.email \"agent@thomas.md\"\nfi\n\nif command -v busybox >/dev/null 2>&1; then\n ln -sf \"$(command -v busybox)\" /usr/local/bin/udhcpc\n ln -sf \"$(command -v busybox)\" /usr/local/bin/udhcpc6\nfi\n\ncat >/usr/local/bin/openagent-udhcpc-script <<'EOF'\n#!/bin/sh\nset -eu\n\nmask2cidr() {\n local nbits=0 IFS=.\n for dec in $1; do\n case $dec in\n 255) nbits=$((nbits+8));;\n 254) nbits=$((nbits+7));;\n 252) nbits=$((nbits+6));;\n 248) nbits=$((nbits+5));;\n 240) nbits=$((nbits+4));;\n 224) nbits=$((nbits+3));;\n 192) nbits=$((nbits+2));;\n 128) nbits=$((nbits+1));;\n 0) ;;\n esac\n done\n echo \"$nbits\"\n}\n\ncase \"$1\" in\n deconfig)\n ip addr flush dev \"$interface\" || true\n ;;\n bound|renew)\n ip addr flush dev \"$interface\" || true\n mask_val=${subnet:-${mask:-255.255.255.0}}\n cidr=$(mask2cidr \"$mask_val\")\n ip addr add \"$ip/$cidr\" dev \"$interface\"\n gateway=${router%% *}\n if [ -z \"$gateway\" ]; then\n gateway=10.88.0.1\n fi\n ip route replace default via \"$gateway\" dev \"$interface\"\n if [ -n \"${dns:-}\" ]; then\n printf \"nameserver %s\n\" $dns > /etc/resolv.conf\n fi\n ;;\n esac\nEOF\nchmod +x /usr/local/bin/openagent-udhcpc-script\n\n# Helper to bring up host0 networking (veth) with DHCP + sane DNS\ncat >/usr/local/bin/openagent-network-up <<'EOF'\n#!/usr/bin/env bash\nset -euo pipefail\n\nif ip link show host0 >/dev/null 2>&1; then\n ip link set host0 up\n if command -v udhcpc >/dev/null 2>&1; then\n udhcpc -s /usr/local/bin/openagent-udhcpc-script -i host0 -q -n || true\n elif command -v busybox >/dev/null 2>&1; then\n busybox udhcpc -s /usr/local/bin/openagent-udhcpc-script -i host0 -q -n || true\n fi\nfi\n\nprintf \"nameserver 1.1.1.1\nnameserver 8.8.8.8\n\" >/etc/resolv.conf\nEOF\nchmod +x /usr/local/bin/openagent-network-up\n" + "init_script": "#!/usr/bin/env bash\nset -euo pipefail\n\nexport DEBIAN_FRONTEND=noninteractive\n\nif command -v apt-get >/dev/null 2>&1; then\n apt-get update\n apt-get install -y --no-install-recommends ca-certificates curl iproute2 iptables busybox git\n git config --global user.name \"OpenAgent\"\n git config --global user.email \"agent@localhost\"\nfi\n\nif command -v busybox >/dev/null 2>&1; then\n ln -sf \"$(command -v busybox)\" /usr/local/bin/udhcpc\n ln -sf \"$(command -v busybox)\" /usr/local/bin/udhcpc6\nfi\n\ncat >/usr/local/bin/openagent-udhcpc-script <<'EOF'\n#!/bin/sh\nset -eu\n\nmask2cidr() {\n local nbits=0 IFS=.\n for dec in $1; do\n case $dec in\n 255) nbits=$((nbits+8));;\n 254) nbits=$((nbits+7));;\n 252) nbits=$((nbits+6));;\n 248) nbits=$((nbits+5));;\n 240) nbits=$((nbits+4));;\n 224) nbits=$((nbits+3));;\n 192) nbits=$((nbits+2));;\n 128) nbits=$((nbits+1));;\n 0) ;;\n esac\n done\n echo \"$nbits\"\n}\n\ncase \"$1\" in\n deconfig)\n ip addr flush dev \"$interface\" || true\n ;;\n bound|renew)\n ip addr flush dev \"$interface\" || true\n mask_val=${subnet:-${mask:-255.255.255.0}}\n cidr=$(mask2cidr \"$mask_val\")\n ip addr add \"$ip/$cidr\" dev \"$interface\"\n gateway=${router%% *}\n if [ -z \"$gateway\" ]; then\n gateway=10.88.0.1\n fi\n ip route replace default via \"$gateway\" dev \"$interface\"\n if [ -n \"${dns:-}\" ]; then\n printf \"nameserver %s\n\" $dns > /etc/resolv.conf\n fi\n ;;\n esac\nEOF\nchmod +x /usr/local/bin/openagent-udhcpc-script\n\n# Helper to bring up host0 networking (veth) with DHCP + sane DNS\ncat >/usr/local/bin/openagent-network-up <<'EOF'\n#!/usr/bin/env bash\nset -euo pipefail\n\nif ip link show host0 >/dev/null 2>&1; then\n ip link set host0 up\n if command -v udhcpc >/dev/null 2>&1; then\n udhcpc -s /usr/local/bin/openagent-udhcpc-script -i host0 -q -n || true\n elif command -v busybox >/dev/null 2>&1; then\n busybox udhcpc -s /usr/local/bin/openagent-udhcpc-script -i host0 -q -n || true\n fi\nfi\n\nprintf \"nameserver 1.1.1.1\nnameserver 8.8.8.8\n\" >/etc/resolv.conf\nEOF\nchmod +x /usr/local/bin/openagent-network-up\n" } diff --git a/workspace-template/residential.json b/workspace-template/residential.json index 86d3287..f51b26f 100644 --- a/workspace-template/residential.json +++ b/workspace-template/residential.json @@ -10,5 +10,5 @@ "TS_EXIT_NODE_ALLOW_LAN": "false", "TS_STATE_DIR": "/var/lib/tailscale" }, - "init_script": "#!/usr/bin/env bash\nset -euo pipefail\n\nexport DEBIAN_FRONTEND=noninteractive\n\nif command -v apt-get >/dev/null 2>&1; then\n apt-get update\n apt-get install -y --no-install-recommends ca-certificates curl iproute2 iptables busybox git\n git config --global user.name \"Thomas Marchand (agent)\"\n git config --global user.email \"agent@thomas.md\"\nfi\n\nif command -v busybox >/dev/null 2>&1; then\n ln -sf \"$(command -v busybox)\" /usr/local/bin/udhcpc\n ln -sf \"$(command -v busybox)\" /usr/local/bin/udhcpc6\nfi\n\ncat >/usr/local/bin/openagent-udhcpc-script <<'EOF'\n#!/bin/sh\nset -eu\n\nmask2cidr() {\n local nbits=0 IFS=.\n for dec in $1; do\n case $dec in\n 255) nbits=$((nbits+8));;\n 254) nbits=$((nbits+7));;\n 252) nbits=$((nbits+6));;\n 248) nbits=$((nbits+5));;\n 240) nbits=$((nbits+4));;\n 224) nbits=$((nbits+3));;\n 192) nbits=$((nbits+2));;\n 128) nbits=$((nbits+1));;\n 0) ;;\n esac\n done\n echo \"$nbits\"\n}\n\ncase \"$1\" in\n deconfig)\n ip addr flush dev \"$interface\" || true\n ;;\n bound|renew)\n ip addr flush dev \"$interface\" || true\n mask_val=${subnet:-${mask:-255.255.255.0}}\n cidr=$(mask2cidr \"$mask_val\")\n ip addr add \"$ip/$cidr\" dev \"$interface\"\n gateway=${router%% *}\n if [ -z \"$gateway\" ]; then\n gateway=10.88.0.1\n fi\n ip route replace default via \"$gateway\" dev \"$interface\"\n if [ -n \"${dns:-}\" ]; then\n printf \"nameserver %s\n\" $dns > /etc/resolv.conf\n fi\n ;;\n esac\nEOF\nchmod +x /usr/local/bin/openagent-udhcpc-script\n\n# Install Tailscale\nif ! command -v tailscale >/dev/null 2>&1; then\n curl -fsSL https://tailscale.com/install.sh | sh\nfi\n\n# Ensure runtime directories exist\nmkdir -p /var/lib/tailscale /run/tailscale\n\n# Helper to bring up host0 networking (veth) with DHCP + sane DNS\ncat >/usr/local/bin/openagent-network-up <<'EOF'\n#!/usr/bin/env bash\nset -euo pipefail\n\nif ip link show host0 >/dev/null 2>&1; then\n ip link set host0 up\n if command -v udhcpc >/dev/null 2>&1; then\n udhcpc -s /usr/local/bin/openagent-udhcpc-script -i host0 -q -n || true\n elif command -v busybox >/dev/null 2>&1; then\n busybox udhcpc -s /usr/local/bin/openagent-udhcpc-script -i host0 -q -n || true\n fi\nfi\n\nprintf \"nameserver 1.1.1.1\nnameserver 8.8.8.8\n\" >/etc/resolv.conf\nEOF\nchmod +x /usr/local/bin/openagent-network-up\n\n# Helper to bring up Tailscale with an exit node\ncat >/usr/local/bin/openagent-tailscale-up <<'EOF'\n#!/usr/bin/env bash\nset -euo pipefail\n\n: \"${TS_AUTHKEY:?TS_AUTHKEY not set}\"\n: \"${TS_EXIT_NODE:?TS_EXIT_NODE not set}\"\n\nTS_STATE_DIR=\"${TS_STATE_DIR:-/var/lib/tailscale}\"\nTS_SOCKET=\"${TS_SOCKET:-/run/tailscale/tailscaled.sock}\"\nTS_ACCEPT_DNS=\"${TS_ACCEPT_DNS:-true}\"\nTS_EXIT_NODE_ALLOW_LAN=\"${TS_EXIT_NODE_ALLOW_LAN:-false}\"\n\n/usr/local/bin/openagent-network-up >/dev/null 2>&1 || true\nmkdir -p \"$TS_STATE_DIR\" /run/tailscale\n\nif ! pgrep -f \"tailscaled\" >/dev/null 2>&1; then\n tailscaled --state=\"${TS_STATE_DIR}/tailscaled.state\" --socket=\"$TS_SOCKET\" &\n sleep 2\nfi\n\ntailscale up --authkey \"${TS_AUTHKEY}\" --exit-node \"${TS_EXIT_NODE}\" --exit-node-allow-lan-access=\"${TS_EXIT_NODE_ALLOW_LAN}\" --accept-dns=\"${TS_ACCEPT_DNS}\"\nEOF\nchmod +x /usr/local/bin/openagent-tailscale-up\n\n# Helper to verify exit node routing\ncat >/usr/local/bin/openagent-tailscale-check <<'EOF'\n#!/usr/bin/env bash\nset -euo pipefail\n\ntailscale status || true\nprintf \"Public IP: \"\n(curl -fsSL https://api.ipify.org || curl -fsSL https://ifconfig.me || true) && echo\nEOF\nchmod +x /usr/local/bin/openagent-tailscale-check\n" + "init_script": "#!/usr/bin/env bash\nset -euo pipefail\n\nexport DEBIAN_FRONTEND=noninteractive\n\nif command -v apt-get >/dev/null 2>&1; then\n apt-get update\n apt-get install -y --no-install-recommends ca-certificates curl iproute2 iptables busybox git\n git config --global user.name \"OpenAgent\"\n git config --global user.email \"agent@localhost\"\nfi\n\nif command -v busybox >/dev/null 2>&1; then\n ln -sf \"$(command -v busybox)\" /usr/local/bin/udhcpc\n ln -sf \"$(command -v busybox)\" /usr/local/bin/udhcpc6\nfi\n\ncat >/usr/local/bin/openagent-udhcpc-script <<'EOF'\n#!/bin/sh\nset -eu\n\nmask2cidr() {\n local nbits=0 IFS=.\n for dec in $1; do\n case $dec in\n 255) nbits=$((nbits+8));;\n 254) nbits=$((nbits+7));;\n 252) nbits=$((nbits+6));;\n 248) nbits=$((nbits+5));;\n 240) nbits=$((nbits+4));;\n 224) nbits=$((nbits+3));;\n 192) nbits=$((nbits+2));;\n 128) nbits=$((nbits+1));;\n 0) ;;\n esac\n done\n echo \"$nbits\"\n}\n\ncase \"$1\" in\n deconfig)\n ip addr flush dev \"$interface\" || true\n ;;\n bound|renew)\n ip addr flush dev \"$interface\" || true\n mask_val=${subnet:-${mask:-255.255.255.0}}\n cidr=$(mask2cidr \"$mask_val\")\n ip addr add \"$ip/$cidr\" dev \"$interface\"\n gateway=${router%% *}\n if [ -z \"$gateway\" ]; then\n gateway=10.88.0.1\n fi\n ip route replace default via \"$gateway\" dev \"$interface\"\n if [ -n \"${dns:-}\" ]; then\n printf \"nameserver %s\n\" $dns > /etc/resolv.conf\n fi\n ;;\n esac\nEOF\nchmod +x /usr/local/bin/openagent-udhcpc-script\n\n# Install Tailscale\nif ! command -v tailscale >/dev/null 2>&1; then\n curl -fsSL https://tailscale.com/install.sh | sh\nfi\n\n# Ensure runtime directories exist\nmkdir -p /var/lib/tailscale /run/tailscale\n\n# Helper to bring up host0 networking (veth) with DHCP + sane DNS\ncat >/usr/local/bin/openagent-network-up <<'EOF'\n#!/usr/bin/env bash\nset -euo pipefail\n\nif ip link show host0 >/dev/null 2>&1; then\n ip link set host0 up\n if command -v udhcpc >/dev/null 2>&1; then\n udhcpc -s /usr/local/bin/openagent-udhcpc-script -i host0 -q -n || true\n elif command -v busybox >/dev/null 2>&1; then\n busybox udhcpc -s /usr/local/bin/openagent-udhcpc-script -i host0 -q -n || true\n fi\nfi\n\nprintf \"nameserver 1.1.1.1\nnameserver 8.8.8.8\n\" >/etc/resolv.conf\nEOF\nchmod +x /usr/local/bin/openagent-network-up\n\n# Helper to bring up Tailscale with an exit node\ncat >/usr/local/bin/openagent-tailscale-up <<'EOF'\n#!/usr/bin/env bash\nset -euo pipefail\n\n: \"${TS_AUTHKEY:?TS_AUTHKEY not set}\"\n: \"${TS_EXIT_NODE:?TS_EXIT_NODE not set}\"\n\nTS_STATE_DIR=\"${TS_STATE_DIR:-/var/lib/tailscale}\"\nTS_SOCKET=\"${TS_SOCKET:-/run/tailscale/tailscaled.sock}\"\nTS_ACCEPT_DNS=\"${TS_ACCEPT_DNS:-true}\"\nTS_EXIT_NODE_ALLOW_LAN=\"${TS_EXIT_NODE_ALLOW_LAN:-false}\"\n\n/usr/local/bin/openagent-network-up >/dev/null 2>&1 || true\nmkdir -p \"$TS_STATE_DIR\" /run/tailscale\n\nif ! pgrep -f \"tailscaled\" >/dev/null 2>&1; then\n tailscaled --state=\"${TS_STATE_DIR}/tailscaled.state\" --socket=\"$TS_SOCKET\" &\n sleep 2\nfi\n\ntailscale up --authkey \"${TS_AUTHKEY}\" --exit-node \"${TS_EXIT_NODE}\" --exit-node-allow-lan-access=\"${TS_EXIT_NODE_ALLOW_LAN}\" --accept-dns=\"${TS_ACCEPT_DNS}\"\nEOF\nchmod +x /usr/local/bin/openagent-tailscale-up\n\n# Helper to verify exit node routing\ncat >/usr/local/bin/openagent-tailscale-check <<'EOF'\n#!/usr/bin/env bash\nset -euo pipefail\n\ntailscale status || true\nprintf \"Public IP: \"\n(curl -fsSL https://api.ipify.org || curl -fsSL https://ifconfig.me || true) && echo\nEOF\nchmod +x /usr/local/bin/openagent-tailscale-check\n" }