This commit is contained in:
2025-11-22 01:47:00 +00:00
parent 20d3a8c4bf
commit b79a545a4d
3 changed files with 446 additions and 0 deletions

View File

@@ -0,0 +1,228 @@
"""Browser connection pooling for efficient resource management.
This module manages long-lived browser connections per host, avoiding the
expensive overhead of launching/connecting to browsers on each request.
Architecture:
- BrowserPool: Manages the lifecycle of browser instances by host
- Per CDP host: Single persistent connection, multiple pages
- Per Headless host: Single persistent browser, multiple contexts
"""
import contextlib
import logging
from collections.abc import AsyncIterator
from playwright.async_api import Browser, BrowserContext, Page, Playwright, async_playwright
from guide.app.core.config import AppSettings, BrowserHostConfig, HostKind
from guide.app import errors
_logger = logging.getLogger(__name__)
class BrowserInstance:
"""Manages a single browser connection and its lifecycle."""
def __init__(self, host_id: str, host_config: BrowserHostConfig, browser: Browser) -> None:
"""Initialize a browser instance for a host.
Args:
host_id: The host identifier
host_config: The host configuration
browser: The Playwright browser instance
"""
self.host_id = host_id
self.host_config = host_config
self.browser = browser
self._contexts: list[BrowserContext] = []
async def allocate_page(self) -> Page:
"""Allocate a new page from the browser.
For CDP hosts, uses the existing page pool.
For headless hosts, creates a new context and page.
"""
if self.host_config.kind == HostKind.CDP:
# CDP: reuse existing pages from Raindrop browser
return self._pick_raindrop_page()
else:
# Headless: create a new context and page
context = await self.browser.new_context()
self._contexts.append(context)
return await context.new_page()
def _pick_raindrop_page(self) -> Page:
"""Find and return an existing Raindrop page from the browser.
Raises:
BrowserConnectionError: If no pages are available in the browser
"""
raindrop_url_snippet = "raindrop.io" # Common URL pattern
pages: list[Page] = []
for context in self.browser.contexts:
pages.extend(context.pages)
pages = pages or (list(self.browser.contexts[0].pages) if self.browser.contexts else [])
# Try to find a Raindrop page, fall back to any page
if not pages:
raise errors.BrowserConnectionError(
f"No pages available in {self.host_id} browser"
)
# Try to find a page with Raindrop URL
raindrop_page = next(
(page for page in reversed(pages) if raindrop_url_snippet in (page.url or "")),
None,
)
return raindrop_page or pages[-1]
async def close(self) -> None:
"""Close all contexts and the browser connection."""
for context in self._contexts:
with contextlib.suppress(Exception):
await context.close()
self._contexts.clear()
with contextlib.suppress(Exception):
await self.browser.close()
class BrowserPool:
"""Manages browser instances across multiple hosts.
Maintains one persistent browser connection per host, allocating pages
on demand and managing the lifecycle of connections.
"""
def __init__(self, settings: AppSettings) -> None:
"""Initialize the browser pool.
Args:
settings: Application settings containing host configurations
"""
self.settings = settings
self._instances: dict[str, BrowserInstance] = {}
self._playwright: Playwright | None = None
self._closed = False
async def initialize(self) -> None:
"""Initialize the browser pool.
Starts the Playwright instance. Note that browser connections are
created lazily on first request to avoid startup delays.
"""
if self._playwright is not None:
return
self._playwright = await async_playwright().start()
_logger.info("Browser pool initialized")
async def close(self) -> None:
"""Close all browser connections and the Playwright instance."""
if self._closed:
return
self._closed = True
for instance in self._instances.values():
with contextlib.suppress(Exception):
await instance.close()
self._instances.clear()
if self._playwright:
with contextlib.suppress(Exception):
await self._playwright.stop()
self._playwright = None
_logger.info("Browser pool closed")
async def get_page(self, host_id: str | None = None) -> Page:
"""Get a page from the pool for the specified host.
Lazily creates browser connections on first request per host.
Args:
host_id: The host identifier, or None for the default host
Returns:
A Playwright Page instance
Raises:
ConfigError: If the host_id is invalid or not configured
BrowserConnectionError: If the browser connection fails
"""
if self._playwright is None:
raise errors.ConfigError("Browser pool not initialized. Call initialize() first.")
resolved_id = host_id or self.settings.default_browser_host_id
host_config = self.settings.browser_hosts.get(resolved_id)
if not host_config:
known = ", ".join(self.settings.browser_hosts.keys()) or "<none>"
raise errors.ConfigError(f"Unknown browser host '{resolved_id}'. Known: {known}")
# Get or create the browser instance for this host
if resolved_id not in self._instances:
instance = await self._create_instance(resolved_id, host_config)
self._instances[resolved_id] = instance
return await self._instances[resolved_id].allocate_page()
async def _create_instance(self, host_id: str, host_config: BrowserHostConfig) -> BrowserInstance:
"""Create a new browser instance for the given host."""
assert self._playwright is not None
if host_config.kind == HostKind.CDP:
browser = await self._connect_cdp(host_config)
else:
browser = await self._launch_headless(host_config)
instance = BrowserInstance(host_id, host_config, browser)
_logger.info(f"Created browser instance for host '{host_id}' ({host_config.kind})")
return instance
async def _connect_cdp(self, host_config: BrowserHostConfig) -> Browser:
"""Connect to a CDP host."""
assert self._playwright is not None
if not host_config.host or host_config.port is None:
raise errors.ConfigError("CDP host requires 'host' and 'port' fields.")
cdp_url = f"http://{host_config.host}:{host_config.port}"
try:
browser = await self._playwright.chromium.connect_over_cdp(cdp_url)
_logger.info(f"Connected to CDP endpoint: {cdp_url}")
return browser
except Exception as exc:
raise errors.BrowserConnectionError(
f"Cannot connect to CDP endpoint {cdp_url}",
details={"host": host_config.host, "port": host_config.port},
) from exc
async def _launch_headless(self, host_config: BrowserHostConfig) -> Browser:
"""Launch a headless browser."""
assert self._playwright is not None
browser_type = self._resolve_browser_type(host_config.browser)
try:
browser = await browser_type.launch(headless=True)
_logger.info(f"Launched headless browser: {host_config.browser or 'chromium'}")
return browser
except Exception as exc:
raise errors.BrowserConnectionError(
f"Cannot launch headless browser: {host_config.browser}",
details={"browser_type": host_config.browser},
) from exc
def _resolve_browser_type(self, browser: str | None):
"""Resolve browser type from configuration."""
assert self._playwright is not None
desired = (browser or "chromium").lower()
if desired == "chromium":
return self._playwright.chromium
if desired == "firefox":
return self._playwright.firefox
if desired == "webkit":
return self._playwright.webkit
raise errors.ConfigError(f"Unsupported browser type '{browser}'")
__all__ = ["BrowserPool", "BrowserInstance"]

View File

@@ -0,0 +1,67 @@
"""Type-safe GraphQL response models for Raindrop API operations.
These models are generated/maintained to match the GraphQL schema and queries.
They provide type safety and IDE support for GraphQL responses.
Generated from queries in guide.app.strings.graphql.*
"""
from pydantic import BaseModel, Field, ConfigDict
class IntakeRequestData(BaseModel):
"""Represents an intake request object from Raindrop."""
id: str
title: str
status: str
class GetIntakeRequestResponse(BaseModel):
"""Response type for GetIntakeRequest query."""
model_config = ConfigDict(extra="ignore")
intake_request: IntakeRequestData | None = Field(None, alias="intakeRequest")
class CreateIntakeRequestResponse(BaseModel):
"""Response type for CreateIntakeRequest mutation."""
model_config = ConfigDict(extra="ignore")
create_intake_request: IntakeRequestData | None = Field(None, alias="createIntakeRequest")
class SupplierData(BaseModel):
"""Represents a supplier object from Raindrop."""
id: str
name: str
status: str | None = None
class ListSuppliersResponse(BaseModel):
"""Response type for ListSuppliers query."""
model_config = ConfigDict(extra="ignore")
suppliers: list[SupplierData] = Field(default_factory=list)
class AddSupplierResponse(BaseModel):
"""Response type for AddSupplier mutation."""
model_config = ConfigDict(extra="ignore")
add_supplier: SupplierData | None = Field(None, alias="addSupplier")
__all__ = [
"IntakeRequestData",
"GetIntakeRequestResponse",
"CreateIntakeRequestResponse",
"SupplierData",
"ListSuppliersResponse",
"AddSupplierResponse",
]

View File

@@ -0,0 +1,151 @@
"""Static type-safe registry for UI strings, selectors, labels, and GraphQL queries.
This module replaces the dynamic getattr-based lookup service with a statically-typed
nested class structure, enabling IDE autocompletion, rename refactoring, and type safety.
Usage:
from guide.app.strings import app_strings
# Selectors (type-safe, autocomplete-friendly)
selector = app_strings.intake.selectors.description_field
# Labels
label = app_strings.intake.labels.description_placeholder
# Demo text
text = app_strings.intake.texts.conveyor_belt_request
# GraphQL queries
query = app_strings.graphql.get_intake_request
"""
from typing import ClassVar
from guide.app.strings.graphql import (
ADD_SUPPLIER,
CREATE_INTAKE_REQUEST,
GET_INTAKE_REQUEST,
LIST_SUPPLIERS,
)
from guide.app.strings.demo_texts import IntakeTexts, SupplierTexts
from guide.app.strings.labels import AuthLabels, IntakeLabels, SourcingLabels
from guide.app.strings.selectors import (
AuthSelectors,
IntakeSelectors,
NavigationSelectors,
SourcingSelectors,
)
class GraphQLStrings:
"""GraphQL query strings."""
get_intake_request: ClassVar[str] = GET_INTAKE_REQUEST
create_intake_request: ClassVar[str] = CREATE_INTAKE_REQUEST
list_suppliers: ClassVar[str] = LIST_SUPPLIERS
add_supplier: ClassVar[str] = ADD_SUPPLIER
class IntakeStrings:
"""Intake flow strings: selectors, labels, and demo text."""
class _Selectors:
description_field: ClassVar[str] = IntakeSelectors.DESCRIPTION_FIELD
next_button: ClassVar[str] = IntakeSelectors.NEXT_BUTTON
class _Labels:
description_placeholder: ClassVar[str] = IntakeLabels.DESCRIPTION_PLACEHOLDER
next_button: ClassVar[str] = IntakeLabels.NEXT_BUTTON
class _Texts:
conveyor_belt_request: ClassVar[str] = IntakeTexts.CONVEYOR_BELT_REQUEST
alt_request: ClassVar[str] = IntakeTexts.ALT_REQUEST
selectors: ClassVar[type[_Selectors]] = _Selectors
labels: ClassVar[type[_Labels]] = _Labels
texts: ClassVar[type[_Texts]] = _Texts
class SourcingStrings:
"""Sourcing flow strings: selectors, labels, and demo text."""
class _Selectors:
supplier_search_input: ClassVar[str] = SourcingSelectors.SUPPLIER_SEARCH_INPUT
add_supplier_button: ClassVar[str] = SourcingSelectors.ADD_SUPPLIER_BUTTON
supplier_row: ClassVar[str] = SourcingSelectors.SUPPLIER_ROW
class _Labels:
suppliers_tab: ClassVar[str] = SourcingLabels.SUPPLIERS_TAB
add_button: ClassVar[str] = SourcingLabels.ADD_BUTTON
class _Texts:
default_trio: ClassVar[list[str]] = SupplierTexts.DEFAULT_TRIO
notes: ClassVar[str] = SupplierTexts.NOTES
selectors: ClassVar[type[_Selectors]] = _Selectors
labels: ClassVar[type[_Labels]] = _Labels
texts: ClassVar[type[_Texts]] = _Texts
class NavigationStrings:
"""Navigation flow strings: selectors and labels."""
class _Selectors:
global_search: ClassVar[str] = NavigationSelectors.GLOBAL_SEARCH
first_result: ClassVar[str] = NavigationSelectors.FIRST_RESULT
class _Labels:
pass # No labels defined yet for navigation
selectors: ClassVar[type[_Selectors]] = _Selectors
labels: ClassVar[type[_Labels]] = _Labels
class AuthStrings:
"""Authentication flow strings: selectors and labels."""
class _Selectors:
email_input: ClassVar[str] = AuthSelectors.EMAIL_INPUT
send_code_button: ClassVar[str] = AuthSelectors.SEND_CODE_BUTTON
code_input: ClassVar[str] = AuthSelectors.CODE_INPUT
submit_button: ClassVar[str] = AuthSelectors.SUBMIT_BUTTON
logout_button: ClassVar[str] = AuthSelectors.LOGOUT_BUTTON
current_user_display: ClassVar[str] = AuthSelectors.CURRENT_USER_DISPLAY
class _Labels:
login_email_label: ClassVar[str] = AuthLabels.LOGIN_EMAIL_LABEL
login_send_code_button: ClassVar[str] = AuthLabels.LOGIN_SEND_CODE_BUTTON
login_verify_code_label: ClassVar[str] = AuthLabels.LOGIN_VERIFY_CODE_LABEL
logout_label: ClassVar[str] = AuthLabels.LOGOUT_LABEL
current_user_display_prefix: ClassVar[str] = AuthLabels.CURRENT_USER_DISPLAY_PREFIX
selectors: ClassVar[type[_Selectors]] = _Selectors
labels: ClassVar[type[_Labels]] = _Labels
class AppStrings:
"""Root registry for all application strings.
Provides hierarchical, type-safe access to selectors, labels, texts, and GraphQL queries.
Each namespace (intake, sourcing, etc.) exposes nested selectors/labels/texts classes.
"""
intake: ClassVar[type[IntakeStrings]] = IntakeStrings
sourcing: ClassVar[type[SourcingStrings]] = SourcingStrings
navigation: ClassVar[type[NavigationStrings]] = NavigationStrings
auth: ClassVar[type[AuthStrings]] = AuthStrings
graphql: ClassVar[type[GraphQLStrings]] = GraphQLStrings
# Module-level instance for convenience
app_strings = AppStrings()
__all__ = [
"AppStrings",
"app_strings",
"IntakeStrings",
"SourcingStrings",
"NavigationStrings",
"AuthStrings",
"GraphQLStrings",
]