From 20d3a8c4bfb3356b14b5458d41a6562975c97009 Mon Sep 17 00:00:00 2001 From: Travis Vasceannie Date: Sat, 22 Nov 2025 01:14:11 +0000 Subject: [PATCH] . --- .gitignore | 3 +- .repomixignore | 4 + CLAUDE.md | 163 ++++++++++++++++++++++++++++++++++ docs/rolling_feedback.md | 187 +++++++++++++++++++++++++++++++++++++++ repomix.config.json | 41 +++++++++ 5 files changed, 397 insertions(+), 1 deletion(-) create mode 100644 .repomixignore create mode 100644 CLAUDE.md create mode 100644 docs/rolling_feedback.md create mode 100644 repomix.config.json diff --git a/.gitignore b/.gitignore index e58f7d6..71da009 100644 --- a/.gitignore +++ b/.gitignore @@ -2,7 +2,8 @@ __pycache__/ *.py[cod] *$py.class - +repomix-output.md +logs/ # C extensions *.so diff --git a/.repomixignore b/.repomixignore new file mode 100644 index 0000000..c63e5bc --- /dev/null +++ b/.repomixignore @@ -0,0 +1,4 @@ +# Add patterns to ignore here, one per line +# Example: +# *.log +# tmp/ diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..c5d7022 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,163 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +A FastAPI-based guided demo platform that automates browser interactions with Raindrop using Playwright. The app executes data-driven actions (stored in `ActionRegistry`) on behalf of personas that target configured browser hosts (CDP or headless). All configuration is externalized via YAML files and environment overrides. + +**Entry Point:** `python -m guide` (runs `src/guide/main.py` → `guide.app.main:app`) +**Python Version:** 3.12+ +**Key Dependencies:** FastAPI, Playwright, Pydantic v2, PyYAML, httpx + +## Essential Commands + +```bash +# Install dependencies +uv sync + +# Type checking (required before commits) +basedpyright src + +# Compile sanity check +python -m compileall src/guide + +# Run development server (default: localhost:8000) +python -m guide +# or with custom host/port: +HOST=127.0.0.1 PORT=9000 python -m guide + +# View API docs +# Navigate to http://localhost:8000/docs + +# Key endpoints: +# GET /healthz # liveness check +# GET /actions # list action metadata +# POST /actions/{id}/execute # execute action; returns ActionEnvelope with correlation_id +# GET /config/browser-hosts # view current default + host map +``` + +## Code Structure + +**Root module:** `src/guide/app/` + +- **`actions/`** — FastAPI-triggered demo actions; thin, declarative, action-scoped side effects. Registry wiring happens in `actions/registry.py`. +- **`auth/`** — Pluggable MFA/auth helpers (currently `DummyMfaCodeProvider`; needs real provider for production). +- **`browser/`** — `BrowserClient` + Playwright wrappers; centralizes navigation, timeouts, error handling, tracing. Handles both CDP attach and headless launch. +- **`core/`** — App bootstrap: `AppSettings` (Pydantic v2 w/ env prefix `RAINDROP_DEMO_`), logging, dependency wiring, venv detection. +- **`errors/`** — `GuideError` hierarchy; routes normalize error responses to HTTP status + payload. +- **`raindrop/`** — GraphQL client + operations. Queries/mutations defined in `raindrop/operations`, schemas/types colocated in `strings/`. +- **`strings/`** — Centralized selectors, labels, copy, GraphQL strings (no inline literals in actions). Service enforces strict key lookup to catch UI mismatches early. +- **`models/`** — Domain/persona models. `PersonaStore` loads from config; use Pydantic v2 with explicit types. +- **`utils/`** — Shared helpers. Keep <300 LoC per file; avoid circular imports. +- **`api/`** — FastAPI routers; map requests → `ActionRegistry` → `BrowserClient` → responses. + +**Config files (git-tracked):** +- `config/hosts.yaml` — Browser host targets (id, kind: cdp|headless, host, port, browser type). +- `config/personas.yaml` — Personas (id, role, email, login_method, browser_host_id). + +**Config overrides (runtime only, never commit):** +- `RAINDROP_DEMO_BROWSER_HOSTS_JSON` — JSON array overrides `hosts.yaml`. +- `RAINDROP_DEMO_PERSONAS_JSON` — JSON array overrides `personas.yaml`. +- `RAINDROP_DEMO_RAINDROP_BASE_URL` — Override default `https://app.raindrop.com`. + +## Architecture Patterns + +### App Initialization (main.py → create_app) + +1. Load `AppSettings` (env + YAML). +2. Build `PersonaStore` from config. +3. Build `ActionRegistry` with default actions (dependency-injected with persona store + Raindrop URL). +4. Create `BrowserClient` (manages Playwright contexts/browsers, handles CDP + headless). +5. Stash instances on `app.state` for dependency injection in routers. +6. Register error handlers (GuideError → HTTP, unhandled → 500 + logging). + +### Action Execution Pipeline + +- Request: `POST /actions/{action_id}/execute` with `ActionRequest` (persona_id, host_id, etc.). +- Router resolves persona + host from config → validates persona exists. +- `BrowserClient.open_page()` — resolves host by ID → CDP attach or headless launch → reuse existing Raindrop page. +- `Action.run(context)` — executes logic; may call `ensure_persona()` (login flow) before starting. +- Response: `ActionEnvelope` with correlation_id (from `ActionContext`) + status + result. + +### Browser Host Resolution + +- `kind: cdp` — connect to running Raindrop instance (requires `host` + `port` in config). Errors surface as `BrowserConnectionError`. +- `kind: headless` — launch Playwright browser (chromium/firefox/webkit); set `browser` field in config. +- Always use `async with BrowserClient.open_page()` to ensure proper cleanup. + +### GraphQL & Data Layer + +- `raindrop/graphql.py` — HTTP client (httpx, 10s timeout). +- `raindrop/operations/` — query/mutation definitions + response models. +- Validate all responses with Pydantic models; schema mismatches → `GuideError`. +- Never embed tokens/URLs; pull from `AppSettings` (env-driven). +- Transport errors → `GraphQLTransportError`; operation errors → `GraphQLOperationError` (includes `details` from server). + +### Selector & String Management (strings/) + +- Keep all selectors, labels, copy, GraphQL queries in `strings/` submodules. +- Use `strings.service` (enforces domain-keyed lookups); missing keys raise immediately. +- Selectors should be reusable and labeled; avoid brittle text selectors—prefer `data-testid` or aria labels. + +## Development Workflow + +1. **Edit code** (actions, browser logic, GraphQL ops, etc.). +2. **Run type check:** `basedpyright src` (catches generic types, missing annotations). +3. **Sanity compile:** `python -m compileall src/guide` (syntax check). +4. **Smoke test:** `python -m guide` then hit `/docs` or manual test via curl. +5. **Review error handling:** ensure `GuideError` subclasses are raised, not generic exceptions. +6. **Commit** with scoped, descriptive message (e.g., `feat: add auth login action`, `chore: tighten typing`). + +## Type & Linting Standards + +- **Python 3.12+:** Use PEP 604 unions (`str | None`), built-in generics (`list[str]`, `dict[str, JSONValue]`). +- **Ban `Any` and `# type: ignore`:** Use type guards or Protocol instead. +- **Pydantic v2:** Explicit types, model_validate for parsing, model_copy for immutable updates. +- **Type checker:** Pyright (via basedpyright). +- **Docstrings:** Imperative style, document public APIs, include usage examples. + +## Error Handling & Logging + +- Always raise `GuideError` subclasses (not generic `Exception`); routers translate to HTTP responses. +- Log via `core/logging` (structured, levelled). Include persona/action IDs and host targets for traceability. +- For browser flows, use Playwright traces (enabled by default in `BrowserClient`); disable only intentionally. +- Validate external inputs early; surface schema/connection issues as `GuideError`. + +## Testing & Quality Gates + +- **Minimum gate:** `basedpyright src` + `python -m compileall src/guide` before merge. +- Add unit tests under `tests/` alongside code (not yet in structure, but expected). +- Mock Playwright/GraphQL in tests; avoid real network/CDP calls. +- Require deterministic fixtures; document any env vars needed in test module docstring. + +## MFA & Auth + +- Default `DummyMfaCodeProvider` raises `NotImplementedError`. +- For real runs, implement a provider and wire it in `core/config.py` or `auth/` modules. +- `ensure_persona()` in actions calls the provider; stub or override for demo/test execution. + +## Performance & Footprint + +- Keep browser sessions short-lived; close contexts to avoid handle leaks. +- Cache expensive GraphQL lookups (per-request OK, global only if safe). +- Don't widen dependencies without justification; stick to project pins in `pyproject.toml`. +- Promptly close Playwright contexts/browser handles (wrapped in contextmanager; keep action code lean). + +## Git & PR Hygiene + +- Scoped, descriptive commits (e.g., `feat: add sourcing action`, `fix: handle missing persona host`). +- PRs should state changes, commands run, new config entries (hosts/personas). +- Link related issues; include screenshots/logs for UI or API behavior changes. +- Never commit credentials, MFA tokens, or sensitive config; use env overrides. + +## Quick Checklist (New Feature) + +- [ ] Add/verify action in `actions/` with thin logic; use `strings/` for selectors/copy. +- [ ] Ensure persona/host exist in `config/hosts.yaml` + `config/personas.yaml` (or use env overrides). +- [ ] Run `basedpyright src` + `python -m compileall src/guide`. +- [ ] Test via `python -m guide` + `/docs` or manual curl. +- [ ] Add GraphQL queries to `raindrop/operations/` if needed; validate responses with Pydantic. +- [ ] If auth flow required, implement/mock MFA provider. +- [ ] Review error handling; raise `GuideError` subclasses. +- [ ] Commit with descriptive message. diff --git a/docs/rolling_feedback.md b/docs/rolling_feedback.md new file mode 100644 index 0000000..00e276d --- /dev/null +++ b/docs/rolling_feedback.md @@ -0,0 +1,187 @@ +This is a well-structured application. You have successfully separated concerns (Actions vs. API vs. Browser Control vs. Domain logic), effectively used **Pydantic** for data validation, and established a clear pattern for your automation logic (the **Action pattern**). + +However, as you scale from a "Demo" to a larger production system with dozens of actions and higher concurrency, specific architectural choices in this scaffold will become bottlenecks. + +Here is a critique of your project structure with actionable design decisions for scalability and organization. + +--- + +### 1. The "Strings" Abstraction: Runtime vs. Static Analysis +**Location:** `src/guide/app/strings/` + +**Critique:** +You have a sophisticated system (`service.py`) that dynamically resolves strings using `getattr`: +```python +# Current approach +description_val = strings.text("INTAKE", "CONVEYOR_BELT_REQUEST") +``` +While this centralizes text, it breaks **Static Analysis** and **IDE Autocompletion**. +1. If you typo `"INTAKE"`, you won't know until runtime. +2. Refactoring tools (Rename Symbol) won't work across your codebase. +3. The `_SELECTORS`, `_LABELS` dict mappings need manual updating. + +**Design Decision:** +Replace the dynamic lookup service with **Static Nested Classes** or **Pydantic Models**. This keeps the centralization but restores developer ergonomics. + +**Proposed Change:** +```python +# src/guide/app/strings/registry.py +class IntakeStrings: + description_field = '[data-test="intake-description"]' + conveyor_request = "Requesting a conveyor belt..." + +class AppStrings: + intake = IntakeStrings() + auth = AuthStrings() + +strings = AppStrings() + +# Usage (Type-safe, Autocompletable) +val = strings.intake.conveyor_request +``` + +### 2. Action Registry Scalability (The Open/Closed Principle) +**Location:** `src/guide/app/actions/registry.py` + +**Critique:** +Currently, you manually import and instantiate every action in `registry.py`: +```python +actions: list[DemoAction] = [ + LoginAsPersonaAction(...), + FillIntakeBasicAction(), + # ... as this grows to 100 actions, this file becomes a merge-conflict nightmare +] +``` +This violates the Open/Closed Principle. To add a new action, you must modify the registry file. + +**Design Decision:** +Use a **Decorator-based Registration** pattern or **Dynamic Discovery**. + +**Proposed Change:** +Create a decorator that registers the class to a singleton registry upon import. + +```python +# In src/guide/app/actions/base.py +action_registry = {} + +def register_action(cls): + action_registry[cls.id] = cls + return cls + +# In src/guide/app/actions/intake/basic.py +@register_action +class FillIntakeBasicAction(DemoAction): + id = "fill-intake-basic" + # ... +``` +*Then, in `main.py`, you simply import the `actions` package, and the registry populates automatically.* + +### 3. Browser Resource Management (Performance) +**Location:** `src/guide/app/browser/client.py` + +**Critique:** +Your `open_page` context manager appears to launch a browser or connect via CDP for *every single action request*: +```python +browser = await playwright.chromium.connect_over_cdp(cdp_url) +# ... +await browser.close() +``` +Browser startup/connection is the most expensive part of automation. If you receive 10 requests/second, this will choke the host machine. + +**Design Decision:** +Implement **Browser Context Pooling**. +1. The Application startup should initialize the `Browser` object (keep the connection open). +2. Each `Action` request should only create a new `BrowserContext` (incognito window equivalent) or `Page`. +3. Closing a Context is instant; closing a Browser is slow. + +### 4. Configuration Complexity +**Location:** `src/guide/app/core/config.py` + +**Critique:** +You have written significant custom logic (`_normalize_records`, `_coerce_mapping`) to handle YAML/JSON loading and merging. This is "infrastructure code" that you have to maintain and debug. + +**Design Decision:** +Offload this to libraries designed for it. Since you are already using Pydantic, use **`pydantic-settings`** with standard loaders, or **Hydra** if the config is hierarchical. + +Minimize custom parsing logic. If you need to support dynamic personas/hosts, consider loading them from the database or a simple JSON file without trying to merge/normalize complex structures manually. + +### 5. GraphQL Type Safety +**Location:** `src/guide/app/raindrop/` + +**Critique:** +Your GraphQL client returns untyped dictionaries: +```python +# src/guide/app/raindrop/operations/sourcing.py +result = data.get("createIntakeRequest") # result is JSONValue (Any) +``` +As the external API changes, your code will break silently. + +**Design Decision:** +Use **Code Generation**. Tools like `ariadne-codegen` or `gql` can read the Raindrop GraphQL schema (`schema.graphql`) and your query strings, then generate Pydantic models for the responses. + +**Result:** +```python +# Instead of dict access +result = await client.create_intake(...) +print(result.create_intake_request.id) # Fully typed +``` + +### 6. Dependency Injection (DI) Strategy +**Location:** `src/guide/app/api/routes/actions.py` + +**Critique:** +You are using a mix of approaches. +1. `app.state` accessed via `Request`. +2. Dependencies passed into Action `__init__`. +3. Context passed into Action `run`. + +The usage of `cast(AppStateProtocol, cast(object, app.state))` is verbose and un-Pythonic. + +**Design Decision:** +Standardize on **FastAPI Dependency Injection**. +Create a strictly typed `Deps` module. + +```python +# src/guide/app/deps.py +def get_registry(request: Request) -> ActionRegistry: + return request.app.state.registry + +# In router +@router.post("/actions") +async def run_action( + registry: Annotated[ActionRegistry, Depends(get_registry)], + # ... +): + pass +``` +Remove `Action` instantiation from the registry. The Registry should hold *Classes*, and the Router should instantiate them, injecting dependencies (like the Browser Client) at runtime. This makes unit testing Actions significantly easier because you don't have to mock the entire registry setup. + +### Summary of Recommended Structure Changes + +```text +src/guide/app/ + actions/ + # Use decorators for registration + registry.py <-- Logic to hold class references, not instances + base.py <-- Base class handles common DI logic + browser/ + pool.py <-- NEW: Manages long-lived Browser connections + client.py <-- Requests a Page from the Pool + strings/ + # Refactor to static classes/Pydantic models + definitions.py <-- Actual string data + raindrop/ + codegen/ <-- Generated Pydantic models for GraphQL +``` + +### Final Verdict +The project is currently at **Level 2 (Robust Prototype)**. +* It works. +* It's readable. +* It handles errors well. + +To get to **Level 3 (Production Scale)**, you must: +1. Remove dynamic string lookups (for DX). +2. Pool browser connections (for Performance). +3. Automate action registration (for Maintenance). +4. Generate GraphQL types (for Reliability). \ No newline at end of file diff --git a/repomix.config.json b/repomix.config.json new file mode 100644 index 0000000..4df4325 --- /dev/null +++ b/repomix.config.json @@ -0,0 +1,41 @@ +{ + "$schema": "https://repomix.com/schemas/latest/schema.json", + "input": { + "maxFileSize": 52428800 + }, + "output": { + "filePath": "repomix-output.md", + "style": "markdown", + "parsableStyle": true, + "fileSummary": true, + "directoryStructure": true, + "files": true, + "removeComments": false, + "removeEmptyLines": true, + "compress": true, + "topFilesLength": 5, + "showLineNumbers": true, + "truncateBase64": true, + "copyToClipboard": false, + "tokenCountTree": false, + "git": { + "sortByChanges": true, + "sortByChangesMaxCommits": 100, + "includeDiffs": false, + "includeLogs": false, + "includeLogsCount": 50 + } + }, + "include": ["src/"], + "ignore": { + "useGitignore": true, + "useDefaultPatterns": true, + "customPatterns": [] + }, + "security": { + "enableSecurityCheck": true + }, + "tokenCount": { + "encoding": "o200k_base" + } +} \ No newline at end of file