From d4829238041c0e0ca7586a7871739b4a75c7fbab Mon Sep 17 00:00:00 2001 From: Travis Vasceannie Date: Fri, 19 Sep 2025 13:34:17 +0000 Subject: [PATCH] xx --- .env | 1 + .env.example | 3 + .vscode/settings.json | 41 +- docs/feeds.md | 307 +- .../tui/__pycache__/styles.cpython-312.pyc | Bin 47261 -> 47623 bytes .../__pycache__/dashboard.cpython-312.pyc | Bin 34012 -> 31026 bytes .../__pycache__/dialogs.cpython-312.pyc | Bin 17158 -> 17867 bytes .../__pycache__/documents.cpython-312.pyc | Bin 18900 -> 23692 bytes ingest_pipeline/cli/tui/screens/dashboard.py | 75 +- ingest_pipeline/cli/tui/screens/dialogs.py | 38 +- ingest_pipeline/cli/tui/screens/documents.py | 116 +- ingest_pipeline/cli/tui/styles.py | 41 +- .../utils/__pycache__/runners.cpython-312.pyc | Bin 6961 -> 5161 bytes .../storage_manager.cpython-312.pyc | Bin 24918 -> 25251 bytes ingest_pipeline/cli/tui/utils/runners.py | 41 +- .../cli/tui/utils/storage_manager.py | 26 +- .../__pycache__/settings.cpython-312.pyc | Bin 9806 -> 10453 bytes ingest_pipeline/config/settings.py | 13 +- .../core/__pycache__/models.cpython-312.pyc | Bin 9557 -> 11783 bytes ingest_pipeline/core/models.py | 78 +- .../__pycache__/ingestion.cpython-312.pyc | Bin 30027 -> 30021 bytes .../__pycache__/scheduler.cpython-312.pyc | Bin 4204 -> 4235 bytes ingest_pipeline/flows/scheduler.py | 12 +- .../__pycache__/firecrawl.cpython-312.pyc | Bin 25327 -> 28485 bytes ingest_pipeline/ingestors/firecrawl.py | 130 +- .../storage/__pycache__/base.cpython-312.pyc | Bin 7680 -> 14107 bytes .../__pycache__/openwebui.cpython-312.pyc | Bin 31504 -> 27878 bytes .../__pycache__/weaviate.cpython-312.pyc | Bin 43251 -> 44001 bytes ingest_pipeline/storage/base.py | 135 +- ingest_pipeline/storage/openwebui.py | 349 +- .../r2r/__pycache__/storage.cpython-312.pyc | Bin 40079 -> 50082 bytes ingest_pipeline/storage/r2r/storage.py | 253 +- ingest_pipeline/storage/types.py | 22 + ingest_pipeline/storage/weaviate.py | 24 +- .../metadata_tagger.cpython-312.pyc | Bin 15565 -> 16319 bytes .../__pycache__/vectorizer.cpython-312.pyc | Bin 7724 -> 7910 bytes ingest_pipeline/utils/metadata_tagger.py | 56 +- ingest_pipeline/utils/vectorizer.py | 39 +- pyproject.toml | 2 +- repomix-output.xml | 13165 ++++++++-------- .../conftest.cpython-312-pytest-8.4.2.pyc | Bin 22992 -> 24383 bytes .../__pycache__/openapi_mocks.cpython-312.pyc | Bin 41440 -> 52262 bytes tests/conftest.py | 40 + ...est_scheduler.cpython-312-pytest-8.4.2.pyc | Bin 7574 -> 7522 bytes tests/unit/flows/test_scheduler.py | 8 +- ...est_openwebui.cpython-312-pytest-8.4.2.pyc | Bin 18917 -> 19090 bytes ...t_r2r_helpers.cpython-312-pytest-8.4.2.pyc | Bin 27342 -> 29532 bytes tests/unit/storage/test_openwebui.py | 10 +- tests/unit/storage/test_r2r_helpers.py | 2 +- ...hboard_screen.cpython-312-pytest-8.4.2.pyc | Bin 16098 -> 17750 bytes ...st_vectorizer.cpython-312-pytest-8.4.2.pyc | Bin 10964 -> 10958 bytes tests/unit/utils/test_vectorizer.py | 2 +- uv.lock | 157 +- 53 files changed, 8013 insertions(+), 7173 deletions(-) create mode 100644 ingest_pipeline/storage/types.py diff --git a/.env b/.env index db5989a..b75aa08 100644 --- a/.env +++ b/.env @@ -25,6 +25,7 @@ FIRECRAWL_ENDPOINT=http://crawl.lab:30002 # Model Configuration EMBEDDING_MODEL=ollama/bge-m3:latest EMBEDDING_DIMENSION=1024 +METADATA_MODEL=fireworks/glm-4p5-air # Ingestion Settings BATCH_SIZE=50 diff --git a/.env.example b/.env.example index 9167b21..4211fac 100644 --- a/.env.example +++ b/.env.example @@ -2,6 +2,8 @@ FIRECRAWL_API_KEY= OPENWEBUI_API_KEY= WEAVIATE_API_KEY= +LLM_API_KEY= +OPENAI_API_KEY= # Endpoints LLM_ENDPOINT=http://llm.lab @@ -12,6 +14,7 @@ FIRECRAWL_ENDPOINT=http://crawl.lab:30002 # Model Configuration EMBEDDING_MODEL=ollama/bge-m3:latest EMBEDDING_DIMENSION=1024 +METADATA_MODEL=fireworks/glm-4p5-air # Ingestion Settings BATCH_SIZE=50 diff --git a/.vscode/settings.json b/.vscode/settings.json index 2217470..1d3f24c 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -7,14 +7,23 @@ "python.linting.mypyPath": "./.venv/bin/mypy", "python.linting.pylintEnabled": false, "python.linting.flake8Enabled": false, - "python.analysis.typeCheckingMode": "basic", + "python.analysis.typeCheckingMode": "strict", "python.analysis.autoImportCompletions": true, "python.analysis.stubPath": "./.venv/lib/python3.12/site-packages", + "python.analysis.memory.keepLibraryAst": false, + "python.analysis.indexing": true, + "python.analysis.packageIndexDepths": [ + { + "name": "", + "depth": 2 + } + ], "basedpyright.analysis.typeCheckingMode": "standard", "basedpyright.analysis.autoSearchPaths": true, "basedpyright.analysis.autoImportCompletions": true, "basedpyright.analysis.diagnosticMode": "workspace", "basedpyright.analysis.stubPath": "./.venv/lib/python3.12/site-packages", + "basedpyright.analysis.useLibraryCodeForTypes": false, "basedpyright.analysis.extraPaths": [ "./ingest_pipeline", "./.venv/lib/python3.12/site-packages" @@ -29,9 +38,33 @@ "./.venv/lib/python3.12/site-packages" ], "files.exclude": { + ".mypy_cache": true, "**/__pycache__": true, "**/.pytest_cache": true, - "**/node_modules": true, - ".mypy_cache": true - } + "**/.ruff": true, + "**/.uv**": true, + "**/.venv": true, + "**/node_modules": true + }, + "python.analysis.enableTroubleshootMissingImports": true, + "python.analysis.generateWithTypeAnnotation": true, + "python.analysis.inlayHints.callArgumentNames": "partial", + "python.analysis.languageServerMode": "full", + "python.analysis.regenerateStdLibIndices": true, + "python.analysis.typeEvaluation.enableExperimentalFeatures": true, + "python.analysis.typeEvaluation.strictDictionaryInference": true, + "python.analysis.typeEvaluation.strictListInference": true, + "python.analysis.typeEvaluation.strictSetInference": true, + "python.terminal.activateEnvInCurrentTerminal": true, + "python.testing.pytestEnabled": true, + "python.useEnvironmentsExtension": true, + "editor.formatOnSave": true, + "mcp": {}, + "python.pyrefly.displayTypeErrors": "force-on", + "python-envs.defaultEnvManager": "ms-python.python:venv", + "python-envs.defaultPackageManager": "charliermarsh.ruff:uv", + "python-envs.pythonProjects": [], + "python.analysis.fixAll": [], + "python.analysis.includeAliasesFromUserFiles": true, + "python.analysis.showOnlyDirectDependenciesInAutoImport": true } \ No newline at end of file diff --git a/docs/feeds.md b/docs/feeds.md index 08e85f0..b8c7f69 100644 --- a/docs/feeds.md +++ b/docs/feeds.md @@ -1,106 +1,263 @@ -# TUI Feeds +## Codebase Analysis Report: RAG Manager Ingestion Pipeline -This guide explains how the terminal dashboard surfaces collection activity and status signals so new backends can plug in without duplicating UI logic. +**Status:** Validated against current codebase implementation +**Target:** Enhanced implementation guidance for efficient agent execution -*** +This analysis has been validated against the actual codebase structure and provides implementation-specific details for executing recommended improvements. The codebase demonstrates solid architecture with clear separation of concerns between ingestion flows, storage adapters, and TUI components. -## Activity Feed +### Architecture Overview +- **Storage Backends**: Weaviate, OpenWebUI, R2R with unified `BaseStorage` interface +- **TUI Framework**: Textual-based with reactive components and async worker patterns +- **Orchestration**: Prefect flows with retry logic and progress callbacks +- **Configuration**: Pydantic-based settings with environment variable support -- **Primary surface:** `#activity_feed` widget inside `DashboardScreen` (`ingest_pipeline/cli/tui/screens/dashboard.py`). -- **Data source:** `self.collections`, populated by `refresh_collections()` after gathering payloads from Weaviate and OpenWebUI via `describe_collections()`. -- **Selection logic:** `_generate_activity_text()` formats the three most recent `CollectionInfo` entries and appends an aggregate line when additional collections exist. -- **Empty state:** Presents the call-to-action _โ€œ๐Ÿš€ No collections foundโ€ฆโ€_ encouraging the user to launch an ingestion run. -- **Icons:** `_get_content_type_icon()` maps collection names containing `web`, `doc`, or `repo` to ๐ŸŒ/๐Ÿ“–/๐Ÿ“ฆ respectively, and falls back to ๐Ÿ“„. Update this helper when introducing new naming conventions. +### Validated Implementation Analysis -### When it refreshes +### 1. Bug Fixes & Potential Issues -1. `refresh_collections()` loads data for each connected backend and caches it in `self.collections`. -2. `_update_activity_feed()` is triggered from `update_metrics()` immediately after metrics cards recompute. -3. The Static widget updates with a newline-delimited summary, keeping the dashboard reactive without rerendering the entire layout. +These are areas where the code may not function as intended or could lead to errors. -To surface a new backend, extend either `list_weaviate_collections()` or `list_openwebui_collections()` with the additional source (or introduce a new list helper) and ensure the resulting dictionaries match the `CollectionInfo` contract. +*
+ + HIGH PRIORITY: `R2RStorage.store_batch` inefficient looping (Lines 161-179) + -*** + * **File:** `ingest_pipeline/storage/r2r/storage.py:161-179` + * **Issue:** CONFIRMED - Method loops through documents calling `_store_single_document` individually + * **Impact:** ~5-10x performance degradation for batch operations + * **Implementation:** Check R2R v3 API for bulk endpoints; current implementation uses `/v3/documents` per document + * **Effort:** Medium (API research + refactor) + * **Priority:** High - affects all R2R ingestion workflows +
-## Status Ticker +*
+ + MEDIUM PRIORITY: Mixed HTTP client usage in `R2RStorage` (Lines 80, 99, 258) + -- **Widget:** `#status_text` Static component under the metrics card cluster. -- **Lifecycle:** `refresh_collections()` pushes human-readable messages as each backend initializes, succeeds, or fails, ending with a ready state. -- **Problem reporting:** Failures bubble into rich notifications via `self.notify` and remain visible in the ticker until the next refresh attempt. -- **System health badge:** `_update_status_card()` converts backend counts into ๐ŸŸข/๐ŸŸก/๐Ÿ”ด badges so operators can judge connectivity at a glance. + * **File:** `ingest_pipeline/storage/r2r/storage.py:80,99,258` + * **Issue:** VALIDATED - Mixes `R2RAsyncClient` (line 80) with direct `httpx.AsyncClient` (lines 99, 258) + * **Specific Methods:** `initialize()`, `_ensure_collection()`, `_attempt_document_creation()` + * **Impact:** Inconsistent auth/header handling, connection pooling inefficiency + * **Implementation:** Extend `R2RAsyncClient` or create adapter pattern for missing endpoints + * **Test Coverage:** Check if affected methods have unit tests before refactoring + * **Effort:** Medium (requires SDK analysis) +
-When adding a backend integration, hook into the progress text updates inside `refresh_collections()` so the ticker narrates each stage consistently. +*
+ + MEDIUM PRIORITY: TUI blocking during storage init (Line 91) + -*** + * **File:** `ingest_pipeline/cli/tui/utils/runners.py:91` + * **Issue:** CONFIRMED - `await storage_manager.initialize_all_backends()` blocks TUI startup + * **Current Implementation:** 30s timeout per backend in `StorageManager.initialize_all_backends()` + * **User Impact:** Frozen terminal for up to 90s if all backends timeout + * **Solution:** Move to `CollectionOverviewScreen.on_mount()` as `@work` task + * **Dependencies:** `dashboard.py:304` already has worker pattern for `refresh_collections` + * **Implementation:** Use existing loading indicators and status updates (lines 308-312) + * **Effort:** Low (pattern exists, needs relocation) +
-## Notifications & Progress +*
+ + LOW PRIORITY: Weak URL validation in `IngestionScreen` (Lines 240-260) + -- **Toast notifications:** All feed-relevant exceptions use `self.notify` with severity hints, keeping the activity feed focused on successful runs. -- **Ingestion progress:** `IngestionScreen.perform_ingestion()` (same module) drives the animated progress bar and sends celebratory/failure messages that complement the dashboard feed once control returns to the main screen. + * **File:** `ingest_pipeline/cli/tui/screens/ingestion.py:240-260` + * **Issue:** CONFIRMED - Method accepts `foo/bar` as valid (line 258) + * **Security Risk:** Medium - malicious URLs could be passed to ingestors + * **Current Logic:** Basic prefix checks only (http/https/file://) + * **Enhancement:** Add `pathlib.Path.exists()` for file:// paths, `.git` directory check for repos + * **Dependencies:** Import `pathlib` and add proper regex validation + * **Alternative:** Use `validators` library (not currently imported) + * **Effort:** Low (validation logic only) +
-*** +### 2. Code Redundancy & Refactoring Opportunities -## Extending the Feed System +These suggestions aim to make the code more concise, maintainable, and reusable (D.R.Y. - Don't Repeat Yourself). -1. Return a fully populated `CollectionInfo` (name, type, backend label, status, last_updated, size_mb, count). -2. Call `update_metrics()` after mutating `self.collections` so both metrics cards and the activity feed stay in sync. -3. Adjust `_get_content_type_icon()` or `_format_collection_item()` if the new source warrants distinct labeling. -4. Update end-to-end tests or manual runbooks to verify the ticker, notifications, and activity feed stay coherent after integration. +*
+ + HIGH IMPACT: Redundant collection logic in dashboard (Lines 356-424) + -*** + * **File:** `ingest_pipeline/cli/tui/screens/dashboard.py:356-424` + * **Issue:** CONFIRMED - `list_weaviate_collections()` and `list_openwebui_collections()` duplicate `StorageManager.get_all_collections()` + * **Code Duplication:** ~70 lines of redundant collection listing logic + * **Architecture Violation:** UI layer coupled to specific storage implementations + * **Current Usage:** `refresh_collections()` calls `get_all_collections()` (line 327), making methods obsolete + * **Action:** DELETE methods `list_weaviate_collections` and `list_openwebui_collections` + * **Impact:** Code reduction ~70 lines, improved maintainability + * **Risk:** Low - methods appear unused in current flow + * **Effort:** Low (deletion only) +
-## Implementation Status (September 17, 2025) +*
+ + MEDIUM IMPACT: Repetitive backend init pattern (Lines 255-291) + -| Component | Responsibility | Location | -| --- | --- | --- | -| Activity feed rendering | `_update_activity_feed`, `_generate_activity_text`, `_format_collection_item` | `ingest_pipeline/cli/tui/screens/dashboard.py` -| Backend loaders | `list_weaviate_collections`, `list_openwebui_collections` | `ingest_pipeline/cli/tui/screens/dashboard.py` -| Status ticker & health badge | `_update_status_card`, `refresh_collections` progress updates | `ingest_pipeline/cli/tui/screens/dashboard.py` -| Ingestion progress hand-off | `perform_ingestion` success/error notifications | `ingest_pipeline/cli/tui/screens/ingestion.py` + * **File:** `ingest_pipeline/cli/tui/utils/storage_manager.py:255-291` + * **Issue:** CONFIRMED - Pattern repeated 3x for each backend type + * **Code Structure:** Check settings โ†’ Create config โ†’ Add task (12 lines ร— 3 backends) + * **Current Backends:** Weaviate (258-267), OpenWebUI (270-279), R2R (282-291) + * **Refactor Pattern:** Create `BackendConfig` dataclass with `(backend_type, endpoint_setting, api_key_setting, storage_class)` + * **Implementation:** Loop over config list, reducing ~36 lines to ~15 lines + * **Extensibility:** Adding new backend becomes one-line config addition + * **Testing:** Ensure `asyncio.gather()` behavior unchanged (line 296) + * **Effort:** Medium (requires dataclass design + testing) +
-*** +*
+ + MEDIUM IMPACT: Repeated Prefect block loading pattern (Lines 266-311) + -## Multi-Storage Ingestion Refactor Plan + * **File:** `ingest_pipeline/flows/ingestion.py:266-311` + * **Issue:** CONFIRMED - Pattern in `_create_ingestor()` and `_create_storage()` methods + * **Duplication:** `Block.aload()` + fallback logic repeated 4x across both methods + * **Variable Resolution:** Batch size logic (lines 244-255) also needs abstraction + * **Helper Functions Needed:** + - `load_block_with_fallback(block_slug: str, default_config: T) -> T` + - `resolve_prefect_variable(var_name: str, default: T, type_cast: Type[T]) -> T` + * **Impact:** Cleaner flow logic, better error handling, type safety + * **Lines Reduced:** ~20 lines of repetitive code + * **Effort:** Medium (requires generic typing) +
-### 0. Guardrails and Baseline -- Activate the virtual environment (`source .venv/bin/activate`) before running any tooling. -- Capture current lint, type, and test status (`uv run basedpyright`, `uv run ruff check`, `uv run pytest`) to compare after the refactor. -- Record the existing ingestion modal behaviour (screenshots or a short `textual run --dev ingest_pipeline/cli/tui` demo) to verify UX parity later. +### 3. User Experience (UX) Enhancements -### 1. Storage Layer Enhancements -- Graduate `MultiStorageAdapter` into `ingest_pipeline/storage/` so it can be reused outside the TUI package. -- Extend `BaseStorage` with a descriptive `display_name` property that downstream UIs can show without hard-coding labels. -- Harden the adapter: aggregate per-backend failures, short-circuit `close()` safely, and surface a structured result containing `success_ids` and `failed_targets`. -- Add `StorageManager.build_multi_adapter(backends: Sequence[StorageBackend])` that returns an initialised adapter (invokes `initialize()` on each child) and memoises singletons for reuse inside the session. +These are suggestions to make your TUI more powerful, intuitive, and enjoyable for the user. -### 2. Application Wiring -- Refactor `CollectionManagementApp` to accept a `StorageManager` plus optional cached clients, removing direct constructor parameters for Weaviate/OpenWebUI. -- Update all screens (`dashboard.py`, `documents.py`, `search.py`, dialogs) to pull storages through the shared manager instead of owning bespoke references. -- Expose a capability flag (e.g., `StorageCapabilities.REPLICATION`) so the dashboard can badge backends that support multi-target ingestion. +*
+ + HIGH IMPACT: Document content viewer modal (Add to documents.py) + -### 3. Ingestion Modal UX -- Replace the single-backend select with a checkbox group generated from `StorageManager.get_available_backends()`; preserve keyboard shortcuts (`1`, `2`, `3`, plus `ctrl+shift+` for toggling if feasible). -- Default the selection to the collectionโ€™s current backend but allow "Select All"/"Clear" convenience buttons. -- Persist the latest selection inside a lightweight config file (for example `~/.config/rag-manager/tui.json`) to improve repeated runs. + * **Target File:** `ingest_pipeline/cli/tui/screens/documents.py` + * **Current State:** READY - `DocumentManagementScreen` has table selection (line 212) + * **Implementation:** + - Add `Binding("v", "view_document", "View")` to BINDINGS (line 27) + - Create `DocumentContentModal(ModalScreen)` with `ScrollableContainer` + `Markdown` + - Use existing `get_current_document()` method (line 212) + - Fetch full content via `storage.retrieve(document_id)` + * **Dependencies:** Import `ModalScreen`, `ScrollableContainer`, `Markdown` from textual + * **User Value:** HIGH - essential for content inspection workflow + * **Effort:** Low-Medium (~50 lines of modal code) + * **Pattern:** Follow existing modal patterns in codebase +
-### 4. Flow Integration -- Update `IngestionScreen.perform_ingestion()` to build the multi-adapter, pass it to `ingest_documents_task`, and capture per-backend success/failure counts for feed reporting. -- Teach `ingest_pipeline/flows/ingestion.py` helpers to recognise the adapter (inspect for `fanout_targets`) and log progress per backend, while keeping Firecrawlโ†’R2R flow single-target until replication lands there. -- Ensure partial failures propagate as `IngestionStatus.PARTIAL` with an error message enumerating the failing targets. +*
+ + HIGH IMPACT: Analytics tab visualization (Lines 164-189) + -### 5. Feeds, Ticker, and Notifications -- Extend `_generate_activity_text()` to append the backend list (e.g., `โ†’ weaviate + open_webui`) when a multi-target run finishes. -- Add per-backend status lines to the progress ticker so operators know which replication stage is executing. -- Emit granular toast notifications: success summary plus warning toasts for any backend that failed to store documents. + * **Target File:** `ingest_pipeline/cli/tui/screens/dashboard.py:164-189` + * **Current State:** PLACEHOLDER - Static widgets with dummy content + * **Data Source:** Use existing `self.collections` (line 65) populated by `refresh_collections()` + * **Implementation Options:** + 1. **Simple Text Chart:** ASCII bar chart using existing collections data + 2. **textual-plotext:** Add dependency + bar chart widget + 3. **Custom Widget:** Simple bar visualization with Static widgets + * **Metrics to Show:** + - Documents per collection (data available) + - Storage usage per backend (calculated in `_calculate_metrics()`) + - Ingestion timeline (requires timestamp tracking) + * **Effort:** Low-Medium (depends on visualization complexity) + * **Dependencies:** Consider `textual-plotext` or pure ASCII approach +
-### 6. Validation -- Add unit coverage for `MultiStorageAdapter` (full success, partial failure, close semantics) under `ingest_pipeline/tests/storage/`. -- Create a focused TUI smoke test that opens the ingestion modal, toggles multiple checkboxes, and asserts the resulting progress copy. -- Re-run `uv run basedpyright`, `uv run ruff check`, and the targeted pytest suite before and after changes; address new diagnostics immediately. -- Optionally script a headless `textual run` that simulates ingestion across two mock storages to guard against regressions. +*
+ + MEDIUM IMPACT: Global search implementation (Button exists, needs screen) + -### 7. Documentation and Rollout -- Update this document and `README.md` with refreshed screenshots/GIFs demonstrating multi-backend ingestion. -- Draft release notes covering required configuration (API keys for every backend) and outline rollback instructions (git tag + revert steps). -- Brief support/playbook owners on interpreting the enriched feed/ticker signals so incidents can be triaged quickly. + * **Target File:** `ingest_pipeline/cli/tui/screens/dashboard.py` + * **Current State:** READY - "Search All" button exists (line 122), handler stubbed + * **Backend Support:** `StorageManager.search_across_backends()` method exists (line 413-441) + * **Implementation:** + - Create `GlobalSearchScreen(ModalScreen)` with search input + results table + - Use existing `search_across_backends()` method for data + - Add "Backend" column to results table showing data source + - Handle async search with loading indicators + * **Current Limitation:** Search only works for Weaviate (line 563), need to extend + * **Data Flow:** Input โ†’ `storage_manager.search_across_backends()` โ†’ Results display + * **Effort:** Medium (~100 lines for new screen + search logic) +
+*
+ + MEDIUM IMPACT: R2R advanced features integration (Widgets ready) + + + * **Target File:** `ingest_pipeline/cli/tui/screens/documents.py` + * **Available Widgets:** CONFIRMED - `ChunkViewer`, `EntityGraph`, `CollectionStats`, `DocumentOverview` in `r2r_widgets.py` + * **Current Implementation:** Basic document table only, R2R-specific features unused + * **Integration Points:** + - Add "R2R Details" button when `collection["type"] == "r2r"` (conditional UI) + - Create `R2RDocumentDetailsScreen` using existing widgets + - Use `StorageManager.get_r2r_storage()` method (exists at line 442) + * **R2R Methods Available:** + - `get_document_chunks()`, `extract_entities()`, `get_document_overview()` + * **User Value:** Medium-High for R2R users, showcases advanced features + * **Effort:** Low-Medium (widgets exist, need screen integration) +
+ +*
+ + LOW IMPACT: Create collection dialog (Backend methods exist) + + + * **Target File:** `ingest_pipeline/cli/tui/screens/dashboard.py` + * **Backend Support:** CONFIRMED - `create_collection()` method exists for R2R storage (line 690) + * **Current State:** No "Create Collection" button in existing UI + * **Implementation:** + - Add "New Collection" button to dashboard action buttons + - Create `CreateCollectionModal` with name input + backend checkboxes + - Iterate over `storage_manager.get_available_backends()` for backend selection + - Call `storage.create_collection()` on selected backends + * **Backend Compatibility:** Check which storage backends support collection creation + * **User Value:** Low-Medium (manual workflow, not critical) + * **Effort:** Low-Medium (~75 lines for modal + integration) +
+ +## Implementation Priority Matrix + +### Quick Wins (High Impact, Low Effort) +1. **Delete redundant collection methods** (dashboard.py:356-424) - 5 min +2. **Fix TUI startup blocking** (runners.py:91) - 15 min +3. **Document content viewer modal** (documents.py) - 30 min + +### High Impact Fixes (Medium Effort) +1. **R2R batch operation optimization** (storage.py:161-179) - Research R2R v3 API + implementation +2. **Analytics tab visualization** (dashboard.py:164-189) - Choose visualization approach + implement +3. **Backend initialization refactoring** (storage_manager.py:255-291) - Dataclass design + testing + +### Technical Debt (Long-term) +1. **R2R client consistency** (storage.py) - SDK analysis + refactoring +2. **Prefect block loading helpers** (ingestion.py:266-311) - Generic typing + testing +3. **URL validation enhancement** (ingestion.py:240-260) - Security + validation logic + +### Feature Enhancements (User Value) +1. **Global search implementation** - Medium effort, requires search backend extension +2. **R2R advanced features integration** - Showcase existing widget capabilities +3. **Create collection dialog** - Nice-to-have administrative feature + +## Agent Execution Notes + +**Context Efficiency Tips:** +- Focus on one priority tier at a time +- Read specific file ranges mentioned in line numbers +- Use existing patterns (worker decorators, modal screens, async methods) +- Test changes incrementally, especially async operations +- Verify import dependencies before implementation + +**Architecture Constraints:** +- Maintain async/await patterns throughout +- Follow Textual reactive widget patterns +- Preserve Prefect flow structure for orchestration +- Keep storage backend abstraction intact + +The codebase demonstrates excellent architectural foundations - these enhancements build upon existing strengths rather than requiring structural changes. \ No newline at end of file diff --git a/ingest_pipeline/cli/tui/__pycache__/styles.cpython-312.pyc b/ingest_pipeline/cli/tui/__pycache__/styles.cpython-312.pyc index 45304920578ed54eae4bc51faf28b88ede5cb054..940720f4eccf18dabbdec07b05a9b22cc32bcfe4 100644 GIT binary patch delta 4821 zcmZ`+Yiu0V6`q-$eRyA9$B+0Gd;N%$&BNK3ckRS^I3WcGF$jY(^dcV=Rz8?SX| zOk(4BP2vU@19Ey>h}wcu+8>xikkkaI)E}`_@dweyBD63C6(RV6)K*4uMa8e4b7yB0 zn{-xt=A3iyJ@?%2o_o%nm%bLBUJ?B7`Fw5;p8kFB5B9up(O=GAySQn4R_0}U+Ih;= z>*6`Zr4&>^mtMEPJ;=$9X-;-3jw?1C23il(xnIKo5OpHl%kUxX>D<3Rw|YQidQaDN>ou^ zH!b#-0$-+h(Op%Li$SvtPB7vD2&b50q6ewrP~6!)3Sa|akMBp42& z=%Zw~D`cnFgzdYCAJhaRj}#*DA#qs*B@G*=xsS!R6;ZHz&2HF(ZWLxqND=m3hol%u z2{wExgeuqCEW=hLrBt%jJJ@7p^m#|c!Yj6FA+XjA{jq}h~OTBU@lk{#x_cs!nJNsur|B!VPr_KGAT17IO$O3_F( z5tT{Ylw#pnA|?|zB0SGCl!$WG{NK?DnFRV51|HKNAU zID?2G!yWMFig zlt_ld3>lE*NHiW*88Q|i2)hzyfItUTPAZZ3U1S&_pjRYCQKJkHbLj_gd8=c{C^(_$ zed!?@_f~uGfRlqr5R?7%b-$O+d!NsTP#7NzpNND%3mHqW{5VDoYw7daA%BpEJbA9llIMLLjJM@j}j2CTqQ!w_33 z7C2-V_QK;t+L7ds64{Gl4v=J9Dts4^m^#qbf_VG@dhEt-c!bCvBzSySkdXbz;fX=a zlC6sZ)wRn32s>_5coi5J@MJhmouf zI?{?dBpvjRfiq^}YF*e_)F>QgX|c*`C(Kwiip=5$jovk!Nh}oSakwNE|l)%m*`?? zkS|>Ly!749>@jFH#FLq!Vb@i8LEg*qgQ)HBw#o>e{+4L93}K79-T2+)zScXu$8 z{n+O0d91S!?gt^BNFZCV0&C8TJ@->(d>iERpN*e5$l_@A#6}68XXy!PC@l@7m0Z}6 z6EO0S2qrOlrnoOWg+DKt-Fr*K{oM1U(#f(S+s`%TSb9i}5~2+2rZF-ECJv)5f6i9< z-#krAo~B!36IlD_ljDytYYdwZbL>IIf=f|L0P!Ma%U9Q9?_^F?&=CuKB~^`yix_%(1k!pfdTsI6yh zGTaZ-&a>i#coLVj=8^3awqJ2C3D%XG=Z%hOrkzi1$>QDmTt}dzlv^w<*w5J(Ysx!z*%o)%fyX`1Kh7L?X9k!x zw{eebQ$)qiz8?feMOlEa68p5h*C9I;rz|QikezU6FJQbNyMTAgyRku#3qbHdgBQNH zT#8R|Rlp=@$keEUC7T?KaKi{ctvL6Xhrx@PHJ4bQJ9l8uqacPv!kY5w-td{RKRBf6 z88T)iUhI@lOTd#J>OWt#c@%L+bReV0lQZJ8u1|KfzA-sJd8>I(E*E2Gi9UM%KM8gw zCP6kFuy4w+Y)*~)2d9M93`{9305ltLY}@I?^mcxXzBYXT6868Rck@X+-1p=>R)JVS zdI}q`NLgN=0zU6G9k`I>&(mLCI1VT6n+uhh%krBonA(1o!?7Ncas_^U&Au5w`cz zp;xPxs^i!6rRrVF94Kz;cko5$W)qvPiPyTX7tHT?=YgBWH&Zvu|IoFx<*}eEbyzpx^K&FKtPMS(^rbY7ybKOpMNeG7;OXyhdPP#W zaAy81fq#5q;ObS;f%%&pr|sAGCe2FRjEWsd!bl=WSWRX{9jiE$H^kg3Z}ge#wf4)P zAPcWwmjwu*;x~UAIt&UpKv*;HiN}7_@&xlje)vD$th2uYir-WFjmz*891rH z23|z-2#V=i5IMZ+neMXN0=h{0k;q6`Ft9_)Ha~wn%s9n5#d0m%e535>!R{lp{O#f_ z?f|)rgsliYT@y3ky4p`=sayjB7ZJ~$_?aPV4Q#|fTqgj&9MnXBdL+}ke%eSCW^^p-i` XZ-&_G){&k&D1H;9``$SwB69x&C-fXf delta 4307 zcmZ`+eQXrR72n<4y*uBRKkv?;V1xNM!1O-sJ6{3;Od6mBH!Y9^lX6|(?BcV`na-_4 zOpMPYP}>Dkl0k~26hYKh5m4GFPJn*=0jRAir5~VzNLEHlt5&MoKkCF(tx#0;y;k^>N-1%+KN2gx*Rg1%~FKg+RgiQibjtIoA z$ro&BAP8L!P2ze7=$#iNOMAik2whIzR|0*mLSI)2*SSIGDe7F3pt&>$n_gbUe(`Om zz00F{dh3{4Rhw6R#H%@oPb<}BQhG%0Dg(Y;bJV~bx>4;yqlK|*~d^JH>O?7cK)ue{hk~&gP8c1@%_mr+0KD|U1!hqUYz7hDk zS-uJQ`dNMv@C}3h&|)?v9*}JtvV+ViA7f{2E$m%67V*dj}cmhP@NMku;u{Be(y?%31)YHD*D-DYU^DHy zT7XH9=rO&O!^F|y+Xdv>IXWPIvfTHv=}kCNnKSRV4~{~7p?O!iSt&(1VR_gixI;y z;ZT6DR)W9(k4ZUV(vx97FJ0vxn1@LxTVM7@5f>7PL=qeqnnW{9Df$?aLOKxuq`MF> zdKh9V%>#zEq7|kRZ9~$C1k;GNBbfyxkxYjl020%?lWmB{^I#E2GfW?vK!T~mgM&Vf z9A*n*mS~T}3Zg(SKSKJ=?NuITBpz*6xlQv;PyM+O4J)lZ`dHy62i zP6P!`SrU9q{3Uxb*a(JSjMhvJ2QO~PZ--7(?(NTH`504qQSBShOl4sIevMKr-Bu!D z*|uVXXA@RD>!^86#7MJJoX2`2ii?n78d(v78DzyK)=n$xmI`)jSL{8tN(``HsjI}_ zOn$DeuYLh~Alx0!*Lyu!gN3>MY~2 zGG}sje5kfg1;e#6c)&Ow5&q&XJymjS-3_@Gs@%09ZOb>%C7#+t%Qkr#NDok|WeqE& zXd28q&==m<=&SmUd+AO0(i`$p@Ya9mpr3o=aLirDe-8x}4TfxDwZpZdf3j`ZE=C_r zT-0B;ngOQCqcK79jN(Q8-*?9iTu<<|S7mB^?pZS~dsIWaF!KBY{TN*(a5{LvA=pGtc7_&U@vlZS-w0ubD_2NT~ zpwXbRSTGO(-kKra4%vuIls@Mnc}VWXM0}_b`;hHz;q;uv?1D$;A=|KRP_e!T!~?=h zwq3#jG2~$PM^@Ija>ihvmg~_p!?Zydm=3Dx6clXr;bko@9s{PMN6o2*L1`aMXe#MG zO{J!?U!^(CbmTNBQP7uVBZr&vy& zqz9m5C!YD6z>EmfvR7F%9y>?!?j=XmEGPvcEb<1Npk zd2;-0O7P3RJd z&?OPE%SPP9rpZJ)286Md(alrf5$$5z^bAnJZE0h550^Y4T zG>`l@l3G#Kn<@9jLFBHQwx@z$-b4n(al^+Xq>_V52Jcaq_bI9@O=Q}9CfCEg?m;*xQSVSCSZy)Qk4%7FjyTP>gclT;( zgCCg%5Wf#NEF(DO82Xh-25c}m=?NrriX@7I1+o_p7Y_s<2%A5XC1v%*Q)f$eytQpw z0RFulGx%XYI36!M`}{k;dxA%CS^pmmPKqbe4ePk$`QFV6eAht@*$@9yWnR=$P+RHS zFvmMMnG+by?hOu)47L1fMb+fx^S32&=j4ftKay8-{P+WzqsA}gI$SNkfE$qG*E4_9 zHL`W@uPcR{3$ZNRTSgv3K&5_ld->t?!36g96fH6FMlS2fsyOINlJ&;jT+f z${A3+%fgqI^CwbTVB%O6qnF-vV+Y?0J`PBOXs8WSI>0h$|~Om$Lm63wHwP-?YXJEckUN7I?e$aHFx8SOcD#Tuvk$M1ai zeCM3+eCK@kyL;y-_x3$z{3I$WT!X(a8*cP|)cs52gq9sqeyufe2`zllVO?`F-@+H8 zOhLICWh&nSUxwIO3nc5;A8O$@9M=7U{PEXpMVo50$>ui-M&3SX&}jI4-rlX_H=)+X z=PzixIes&0a?t)+ly;O`P&!aPc80har?Mj^8hH@3k8kBi_O98(f2l=R| zMOlE-g|ZN39m*n<^(c!`HlSS1Hwq;?WcYFA5=$_xCe)Vl-xf+9rP++n-x}5IW6f(_ z8f$AH9x77el~F^siJb=}%8caI)8wJY0BCv%?tN`I8*B$_&1rn#%210`C2Z zkeC_~r&gGZ_H!~8CHRx$;ER$}7)}jianQOVlS9RLZyJ=@4KR@!#ln^MkSr2@la>Id zgjg6aOVI~(U4DNbJWQ)*$DrE0$~PMk{iC%<*G@&4Pt=`hKiNLDqV$UHdgx^6RC&#v z^vo0TElcUNrSz7icG^-qW2u`-uOF?v8EO7|dggiE*}Czzb8RzOrI+ijWL$SlI%d{x z|AVV>s;Tu>Q|EM3=ZwoeW$Btp_oyyng9b*Vc;$WbF^=7W_Vf?+=Wr_?4kpa8N_ob| zEQX#X4q-Nn^(e$skes=SeXck&3%Hc?sBv2LVm8fK2}cM^3FB})%fu$&V%7@wI^4-x z#r~-nt<#MC9A>l2*l{r1tZT-}GLrBHVFlqd!WtsFXncmyj|fD$J+gO?SKi+t3;Sg8 zP57>@n*Ce(#CC~eufq2n6Ij>Pxf|JAFp}HGF2Y}P)7S-gm|MtcnNqZ}Sj)!1m!FFB zFXZ!!UZc1#5k?4?ffb~(NiY}muuJHa24@OPY>Dz-!A}^w0^5uJgtQf>=&qqPj5H=e zPqB$vmFJ4j>e*GWtgC1BFtl!rSs|w^n~g(bSyPDkG8tbl%TByZYosFrQM>zjw=8r> zvRmFQiElte`L9@ka;-eTDcYL%IsGJ=egHRW<7?l-;chx;bI0MZd8>Dyz>8BfeT61M z)rlXxXl@j`#A(umsT%mHxzg{E#NR)mI&EI>=LPX9{B}bkRJrol4D54tnLb1lNslnA zzhHl7Va`>=yo#mH&-7p6Vl2`4+RSo{d+sS zK3>=-QYMlPL8%5ay^oDbg*Thy*m31*^AKb2DCMpAevN^%t!D9kHGtRGBS^B>?-Qe7 zye%p2Q`B0u>JouaN$3+iGFCMT{?=B_-c_va&y)Lwj*jSP%F2jvS`FfT4WxJ4@=T;# zOo$<!;e-%(C=h|=d+eS_9-)m@6%Csz>Z$_a+iI$u9gS7b)NcxCYx8Qw>l9#wFoRgA z991e1?eX`~=5Xg7YRhh*_Nn^CVa|gE>mM6#+XMDYBNTQga=&BB_U=D(>;c%lu12bb zQyil;SRm3RxJ6H|%OeVcPpyzO6bpn&P(8%egmiRxqEK{P)Ga@%yhzm&)3N9YFz0n} zcbQ_|(Mpvc+L=^AMHhFIi@GkJGqsfl-s|r3_ef&qH-kUX(O}$6HSWEg4*dhPlq$i| zo&*(rA4D5vcUPCdcX<3hS@6lg{0We;YxOb%r_s#EhY=?@x{Vx2kQzp;iHp|t0?U9?-4pYeQrsza={vs?KeJZTTwA+ z$NvTiuzo0(`JrhjldXe8LwO+qtrySA#i7FBHzSSm+C+E-z8LLCwcpUf) delta 5316 zcmbVQdr(`~nZM^=JrOU10PzsW7#Yhv444GJVhjia0c3}Vu_+)6-D{8yl6@r}!IE*B zjEkE%_WHC3_R!l`Bwj`gTIKM;>gea*SJ(R zy_+AwC`SeJo!G z-fJJQ)Ptn};xvFPj2(S~(Wl;SD$n_vwK$e4S1^EcbHjs~kYzJ8Zau>)AAUi;A1i2gJCVr2w zdH*OqWKPZxcdHYukMZ9THZOIv3(8w3m{HPM*~*xDf;xI*ozBlI*QSyrdQyLw^(bl1 z>Jpm2I*w{`GEtI5dOkKIzK^>^q={m@j8*L~&xs@DF+pxXaEkR{7fk>`&OJ0Ju0c)K z>v9c6U_n?5Ku;U$h?NEnrKJlwxv!WnXy4FZ)X(Kr{G;|Nf6e(1&bf`vw>&d# zyXV`wX4|@cb7W>!&s>gmLiR7JEdTJzw^%Yq3-aD27yS3~J|RgLAf%fGFc`#k%<&Uy zsmi9x{ABX6KQq6aC70+N~9s0*rXJa4rR*xYWDOl(E(H|WzQBmHc3K7HS$A;0zC zG0hTkmR>2SBrns?3rw4^ofwPo3W5>gX8DNNy16_XeBdoeJdaskM0g6}9r_+rU8kRy_mQ`yhVDnwNP_>_M@|!Rjb>MT2>4}% zUU3t=$JW6bXVHoc$;9Yy+;BlbZqR#M+sPC3@ygf9Mfz3cTJk#0+qRp!40?KOn~}Uj zFK^S4SLn@cZ<7-L(W)Su90!4(hTq~S%ylC}m=_qja!snr&Gs>^j9MztH@mdd;8f*3 z&G6(f$1p$QUf_<&<&2N?F+SEy%uM$!hVgM;Zoiz~Xx8rHxl6co@c3jN*h}HZEAyx# zx<}gHSlY)%d_|2%9jW)qB5$RW@SIN`4sA~Dia7feQGpdvfpJZxCo08qcOV6hSbDsQ zs1$Oa(#z7lgIb+uMa03QiD*)G*nD{kUK!~?q%p7Z%6T;(<4Iofrh8*k>Cx><`n^rs zYF-oZ^C~aLURs)qh>bAMy(eS5Y%5d2Jon)$#-od5%wxn=K9!f*pNX4(JkG27d*;V1 zAI~RTPP_z5fDd8%7(2TM7R9GJqH0}|BV>db<@PWmYmM?I5|Me_2RN>wc{upoEQ)C=0GV%sv{U&4cMP)dU z%=6?yA)M?6eZDSZ{5mun%c~VdmuMJ(ZabWP%Q2&SzR4htfa`YgLqJo!)RCZ;7uv`aCcOJlJ%$G-*j}nqk`o8ZA zB&SR?gk(B(YCym8T=$vod41)qzH&}qHJ?^BQ6DHNn`rzd(|q zc3iN{WtE&#FBEUMnm*lnYt{9~Zx`=4TQjLXJ8qe2-*rkI$j&|2f2M!($*EOy*^i#m zER=7(diYlB?Us()d%9+tx^I{FoE0V;Z)cax@O`H=fyB)D#N64$+y!m!g5EHxeNA^> zcVYFEX>R3~IsIb+eSRQ2KVT>eMUA52Xs^9IH z>F%9qyd7(Y**MYgbx00ge{$Et?0+}GyosFFn>EZE#H^N0)#yN-RuQmHs|o5FwYh?u z*6Pe<+;kx^SFGozi{w~cCdY~j4s<(A`tq|(r~mW-oziQ&)ih~WGX24TR_#$Nxzp>$ z6nbutO5-D5vP8J_=aon3NTOPW6j`?~!gD@lR9IzHSZyS1RDx)tO^MWdl~D=gKGiZ)GT}iq zK7>_$YT(?oNQC>+gU2}O@v4D&6%zAiMr`}DXjMf9yFUj>v+6$esvb(cGeEtZM7?)_ zdJT~v67|La^{!f?-kATDdSi~nNYo4RThv=7QSS~QnGT`1y&;;x{;%9SF8(`oOrlHi zKTyL{*HAn_=-hLs49l^_&$04%2)_pyGd@V7%YM;)c#uVt!UwuWXdTo5X!{3mMvg|N)2;+I(b3PCz)No=YKv*`*K zOwEYUJLCu_gIs?^N9o}CHEMtQ4&pZm|BdixfMCLXez!VpK+_VT;@3LDry#?Wnjtaf z{WSMmy2sYvr__f!JdK>%5aHC2f&mh>oUwe6<+#Al7gx^~SI-w)W{WMij?C}qpWV?v zSL~QkrGJ=USmNDux_pH>^qxrt%U9sir8`~TwHg9Hg6@(&Pi9jwxKx76m4Ysuyk=0?&cVlI~5NUq6%(sU7lis@p4dWpI=i<{o4 zt4-r>t{|Y_Op-(G%``dcSzK+7zsdXoD;*icX^1+;8JepdpTTz=w7Tl>vEnjR4B=rU zbdEk_Xm*SWy!bv8r42QN$5Ck;!S)nq&=tSR;QAwj*=2W&vr(p34K*%a5O07<`~$*W z1eRW`DXA$~&FwWN@I+O#*F z_Ouj}d;SwG&tjnTHfw?j8j5QGEGDH03BT7?P*#Gr0)#>Yi6WLRaz$VWCb&H=(bgv{ zvqTB~w5^_e<1cEzDjUQJ42GvL>P~tCtQJ#Tn5J-gqw1tyvY1MuHdRM$)vkuBwpx+H zY+OrOOPJW6!e$=;MCK4?-PWm1zXhhhn7UGEIaV&+$bR0bV>PUQy7LZ8LUgjbbqkK! zB3{8jBpBN&*hG7OYk0zNCQK@rh^29h8!+c)EQTk9uIour;4Fxnr3tn76te}apZ0X& zjE1b4RTx4X!e&w)c#WjRQt}SlfUD0fZoMB2HEDLa@GV!0yA|Trv71=>!@Zx%Wgc6v zn;x~N)AP0sh8j4=EaGQ4e5O2d{K(nCxrD+ASs-=ggzSWdezdoT?(WT1RAXc_J=(k5 zP!Da~4{x(-sf~m7+EXHJeD*d)Gqe%w{PXrwqQhyD?j9DnCP*716)iQ?AY{d@ggh`n z>5gAZ8-&A!E!aiZL%JAD+B~5QB&EJk@OK3h$`C788|hsk*Oakxa_wwJ(S-R#||zc<^gtr?H0yiH(jO%1XnN=>vbE2w)g1LN21e zI;4d^Q0^RBO}0^1ER##$aJo@^B>b0z^!0AQVBs`?;Sj5k**oe@M(23AP;BAsvF{s=r3T`)W zct2JMw8y=kRd9alew8R>^1Wm+mR1fOGlupNy*zbFJLQ}$tzBe54{bGoA#du)Y}xij z2J}TU{q>L|C3i7~ljk9T79%$<>Hui%@HXP~9~+(}$pGPe_k3B{%JxOertX7i%zeD%FF=^|5+U8Z0#qy_{_RJ7LB%(fL zPFtduv^8qwWkbrAwny!0N7TW~#*{Pdin`Ke(XzBV>P~y2p0qdWP5YugUT;d3rz@fr zAe%)?%AXEI14NK?)CFa0DwwW}RzjUkw5O`l)zNA$J5r%^P4wOxBB+FYg6KRWh%U(? zmQ}zk^)f)K6cb#AEhY^bZ_bzZ3RN%nQ`)t@d4m$No9Xfek#nQT6uPi8YRJ5J^U zVWRlj?wlcuZ^Fo*6}}`$LX?O?RCO0DQB*CeB#o%v z1$CkZBrQskRxCd&j1y6J#&QnI_>1aPf}|JqqCqq+eFxW=aBv zn$ScIVg(<-dBzeoN+!`Sy3S~phcb%+vFwZ{Y7v8?8=h7ud*Eq3V!I4hhY zQRh)1T&a|?pLojHL+Z8$>W1kmro{4MP#-F+xj2ROh{*{mNg4JnqZ@67z1ZPBtPzBx z!W@~ZIwIu@G!x2CN}>GeoFs45*JDR(6H6Tp+U`lT6q7q7rc7P9Ng_}CT;d%Vz;}2do_!mJXu%D?Dgs@GT z{lhc_WyKJSB{RuQ))@zE50BAd=-Woasa zBN|GjcJ{X63a%}D(dGI|_TR=QEQjHxr8n6rQ!5!{FPTE5i9J&1gJ#dv&9gakz`F+r zI{+{`r&V-gg?v7np%JE-{mm4YQ!(YTxzg^~B_&>boV$&H8N~Rc^WTu&r55BddzL*n2%*cG>12)9f9a5AfUeD(1775#DIG z-OU>ux7&CFuS!47p0-!>cAF5L-OTMMFI8HwvWg`f&QjGk9H!C$Hg?Tn;qq-qaWF5d zaVCo|Ivk?$yzU&@DEx?k{q89B>0B8w#P+zb=XY#gm3u{9oJS9P*X7Yn3&(@(k?Jyw zXKioYzHP!k|FurYR~5$)gnEJ2Jf!8FS-sQRuL{qpM5E|CU(TK=3zG`=9l#yzw`B$W zFuXEK$5_;Tj!ZJ0r;!D{ZT?BnD!O!>eyWfQJ674h#KydlV`vvNif#0haXFsP!_k!U zl#Zdwl!&KNu~a+{2R0C+(m0jm$ymt?tat}mG44H;kaFlY!#Y~We&^lifKvs(;<>`k zSck>S{u0)+w>>7~9iWhJ!8)=D-+=$+`m4Po3%w&(d+%E4z3Xc4eG9$!UG1IxW$)yL zulo+DtY03!8r-!I-1TzeSC%36LB-*jN6APFIkVS!s85*ppgiv#G(q8F2n!c$23w%; znwcEhsd>%Pg>pYuyf!eTfx;y<7A|ReQ10dOPJ(iuaimH8y3RLJtA4#w3vFJn)uN_J z4f3kv52qS!01w0Bb~e}_V4w5^Cy8*HgjG{*`=u0kxKLWkPiDnXJ{u}6T&TEkp;Zgl zJ{neW_oS39t?sH?y8eLWVbH0~=q+ac&TcSVT=d|x!gYh?s=>ct@Lx5wFBsZ?ZRp^h z3>>eHaz{-)&<=bf_gMH2pAC2S;2|DH1sq_ZBrBTHY(|>X(EF-EIb|E-E~fnsFbi1{ zi_IT_ny`z#>fbb@*kiF(BpZt4je?;}rRxu167#%(^y6-4=ztjafI*M=% z;eLdB5YUJyu7F}K(M3W6>x_YCtp_dlkGqTZ=EtV~EWZf7 zT3PEAP3v{7^`=Ut4J@h!o$Urn@0wV1`_5hO+tk|D8z#`cr*de0rFxX=Roa$CgH~I+ zXj5ss77c?$dq24qfp7%D3zkEm@Ov2XqXi104nhWfDP$-zx?w}eU=$%lSmJ&j zGFU|Gh5*4-iVtRBaNfuXja*BwVaQ+{#1G7&=x3!wLG9GTIRokV#R+#RUInU zYKx|*g$ne3r5SH!p}Lyoh~hlEP_=m@yJ83d9g_d7ewvtju;*<6Qz6!`53wJF+@2k% z>qY2C7(m#~{uc7h(7h<{M?m(c2N4bdDEd@3E+#V*VL~IQ903TM=-O~aQKzKLs_$3{ zTrl9`(aQ!;ga>r0uDJ8;!J10l_dp?^XYUw%?7KBz^Q>}~f`ChmzLI5XAJ~PiKXThp zSDG2U6Uw7uH9Z1Pu#%*lOT|yqqfpu~=rLHwFuBAI)^(DV*ym#KW7zoP2=@Xk$1ZxW7%vftWs3%9rQG`T3#OZO8B41pP8#=uxcE;m;jh^M&ij7MD4&@qLh;IQiu+- z6D@T^$8i7`-zEAbY<>a(BWwCB0-we5S%4U#kXCdL#8U-Hrk}?KS@vd2%RS5C1}%Ux ztS_D+2C||D7Ad6iGVf0@$%RA$Xl1;RQgq{dQgjNM$;cUaw%}Lv2RQOn8{e0|gdMuy zV576_$<}H^rLq26Q~2yY_RZEvO?mKn%QKd1&29f-pR>QLTWB7*p%ohX76o;K_m-fp zh7D@_KJl_&v>lvjCC?5H?Cla>36Q<*npc8Dtx%ZPlf509c|#S-jfO}onQtd3cW~@7 zMuO`3oxX@qebJ?jR0|h`ge*t+t`Ie&tryOQc zdK4h+q@PCl48ms+9z*yn!dV2qu(&q#afGc1|0k95&3XcxJPELxIiEu14#a1$=34-q zIlHb~y;n5eYyUrg4ju5{&>De2b%Lkvh8`t@P~P#r5hX7tRp&CPqSUAcQZ zTv8NsY{||1$kn*?9cf|9POEpGbGY*G%HLD){XslYc zZWir8usQ-3v5H{Xv5wACdis3F@FwnMG&BTzR5ZGCD18%Q19NX>zRpUOldzu7hK&Sh zf_IPrzn1SLWE(4V4MUpy{jO>qXRht+cU{d(qIFB_{5_loROMU$ zWujY1etN$SHpZF%kxq!M+%vIGA-?;G)vcf zFMDHKH`&b8+eeBW&F;i&gzWZrT3*F5Ujk6HCuuS-frCv@xZjCM84&_{Sa_&OB{P!D zPVESi2KI#=n{_>~_wqs3vD3|dyQ6n?mz}*|Z=8pBkhyu+KgIrEL--lM(!C070sEic zO{AZ-?%cQP{R>~%X(rpbmGI9(##3~KoCv8V^vGvpKodtphIz%uq?D|NM%?SOE4#Yk zlET<`Ab`uYCe~1a0vfK$%h*eOHD11fQK(zHT)*ktOh%cxKd^D{ki4-YHqiek9S^o9 zSm!`x9dc!aaynEM?L51o+Yj&|)F;`Z?-34RGxv zMeQ$9dmdp$>-oniU}ilB{204@72yqpUm*Mn;Z1~V2p`G|-@qCKxb+ZZb!TJ?gYFpN|=(CWL`sp8`2s9}?jH%`)&P0CQ5kxh3c zag)&9I8D^FS-ZDw(k4x$7HDF)*iGxAkF-UT)ONaY7O3Gh;>@C{i(-NO(T7?lo51Lz zd(I3giFN|)1^C@_&pC7No%=Z79A12aeBlekebMEzb5J5z-aI%pbjls#xe9lxdwV(W zNI3{65y_%B^RARD?@qb(wpD4!ds3dfH|5RyQog)D<7zBScKI1{&vlNwHNl1Fw+-X@sL8J^pYlj2fs zy3q$l{|cjFYu9Z7umxq;(u0OA`hYhPVmrt<`zm>jeb)5xGFQ0X(v1hoh1Fx1}1xuYcI+s`GM*gQWT)$G}#qR81&u2`T!=tFbOWT;#yrPVBz>wSi#oX0d0X3(Ysa%?+1sG(xctDK7ukuAU*>U^X@0ovL8gfCKS`g4`3}i*g zf#3uB?CYXZ4eP{wU$zw0{B=A4lb9sxQ5FnL-NO+$s5WknE%+ns6}QE-FvqSrg6ti4(DZ~MKG)F0(_~HFN9Ne8 z;A{8TLCSbTCIJ;~Y>&qe3VgQY34pZV53=Vx?y8do746V?)jt6E7yk&m)!V2W!+;CW z!Bbvu)$)!P`^HfOpV3vsai+S#5Aph_>@HtUe>%jj_!{)*)(HX~F{b2Q9xcj___1G{ zX<%7hokZUpcPIi&bVJcEQ1FCR3Z1ELAb}-0k;YXClSb0ULDf&eaoPvw>(EW=bWh@oI_Edr@VFF_pBEDZxFr?L3;5XD~1%%rwC! z!@FGfN3rNLhL!ksxYvFwJf&*zCLgdX;jL!}8+V&*ms=)(vv2=@IPYX9nr|yVLiTJV zkM8K+XgHPB^c zSh+L=p$?P>ozC_%?zzfR^t9eDEmuws6ZePupdo?W%v^vM}&hGkbo`SeY05 z=C>?zQ2l1&-uJLxv~yNv3JobimT=izwq$F!;2hry5-a`ra z&WijeS!-OTYk>bGN1etAKCU{A8+;mS8el-1WGW-c;*dbG79dt(+N@t?4{)Jfc1T{? z30&w~!G$iVNeTe3xq;go4n^V78;azW{J@3CfnM-M4)j6Y2(@45!T@k#vliK2EKKLv zg;B94gH_-?l`qRfc*{iV@U|oZopJ77B>NboC z`c0?{Rc_GY{Lb#}I7Q5>(Ps_&!>SE~!zivr7)8KHMmHdAMA(FYdrmPF(k*N#7XFlG zQHmKUS2&O$bStWN0wnCTo~|`skqdPBO_qrp=c2PWO@dV8ZdC4KKacg0@3Q}j{SpYA zbR&WPwDa)zZZIaVTVvI6>Bpcwnc(RjsA1c3>Wq?kfbPXEZtM~-v$0O>OI=p-GW%NB zkg-O{${)q`7h3jVXCFtn8(@jOQSk}(YJAx7OBC-xSf=q^YiJ5I5cod@mFAITMJ~x{ zI9IcI__9!Mn%#c(1#Y78Rg0)8HC<7;H;x!<_xMlmBG*_K&yS4|}^XXYdGf(T2qDR0@#Scup zSWt}M{Um#CV95Li+8$w72HN=+TPOSbK(Zsy_|)E0d#`r(ym0%%?Jp%Sb#7V|xvsGa z$9MVO=lFIX%MQLwe5`Ht$I1!v?D*KuHQZ}WWM{wdTJuC7G|r1;=b&)j+=g$rqazO3n8B{!k;77o-L?1@zLHH=fqh+)%vz@*P$5HorfRzk~2a_Uv84Y+Dbb=ec z=JKA5&3)2)SqNRdk?SVP!IRljO^c!pIL^!kV~ZA)tX$*pdp49pI?Z_v-h-;jdnmk@Ykel3){X z80LJwo3zM&eqg`qJgdIQcL9U%0;yn?@IpG_g^}=B*Kjg2sX5Y0@j&`WPC6i$AZ5eG zG+a>{Vd1qyqD~#v-08lxJ8sc0y|99@=3yb}0surg%)JbDTJc>=d)V(rgZ3ILUFX=J z)^;`B6y5CbYz%(i>AQuHHLQPR2ieFzJJN2}DP@R#eWa%*o*U`wttS`SfFnB*k^uD_ z(zLF{^bd}WuKOqWzv5Z8;EJ%f*1f$N$-;PU9wj~8=+WaPY~I+-ZSWGU#KHb|bYdw_ zyJ!86Bg>Ed;Ks*T?}k3O{daFzE8yGWBEkf|%Kp5qg}t)jnZ!Ikurv$JdB<#_l*`LI zDJ{~v@!5%uBczeNym4eQej})uf#r*}UJN9NfBEsdZa9$~rd1nPcqX_)o!xT&oZAGicDC zBm4rOn&s{XVFjJiEp8#2l-b}dJW-q zgnvW0h_L-Q;^zTY2JYukiSTDM{5yaixW}$}8=t=A6l5puR~!5%rMb^t7MdYLd1m`a zdC_DuLNZj{aRIY&EtVROpt7)^vLMa2ci>k&S zO;+s1U9%ehhpnSF&{7Oo%iS6AQ(gWm%J}?sqo*F6JnYhru!2MjiFpyeeN-bOwMiNE z;8Za~CF8$0+w#|82p?`lcmx4A>9mpL*Zweoj%F2k3%v>^IM1A_04(x^khi%pmf0Cg zuKlgB_FeNVA@`A|Z<1YplK!@_X6Z{>793m$_Gv9eB)cqc( IxAe>Kf3p(jo&W#< diff --git a/ingest_pipeline/cli/tui/screens/__pycache__/documents.cpython-312.pyc b/ingest_pipeline/cli/tui/screens/__pycache__/documents.cpython-312.pyc index 9355284e9557e9aa7fc95aa8da382bfcfbc89f8e..5d61cd66bc30214f4f976000379be76bdbb6ae9e 100644 GIT binary patch delta 7388 zcmdTpZE#e_k#F|9&&ZOp*Bwm7hue>~P%;!5l9*!K7H5$ZenyqzonaP_Mnn6e zQl7!5Q|x_l8j3=nBn>Gh75mI`l&rE^`aWC4#zi)o3xRKF?Bhi#-ybh*Yb=n6QgPBKt~WTN_% zSr7&cW5NKT%_kiJmsCLgl6yc$SDZ9l%qbfvq&;-yNkgCra0}pKz^ee40A3Av5#Tj| zOF`C>8&gMHfs!tRuC>tc0A2^U9PoO;6@YI6yclpB;7Y*lfU5v+09*}tBj6=~Hvtyu zW~t^tcTS^ui2A91z(BVEPaW`h58OJhQ;=^rm)P{4Hr&48yZZ1XTbOeL_mIYW!S+Km_13D29X-9bhV}{8Mw(v=Qj38knct8Vp8; zBFSKI`NW@D&g;kp`MT{HvQ0i?Uq$*S-n3f@=)wBsDaX1VCk|;sP>6t!=fakVz=Hss zKwx$R1qd7fKzuWx1OXtj4tb~3Pg*A)az3Xc9@*>mmte1AOyW^U=#UAy2HY#j9TR8V zT}E>lXuDXC{K29I2U=V)fFXjcS1u{7BY}zarB*^ZSsUq;UoETkA}8xb zkV5bPf>8wDMDQ?zM`VX*&)%KT?Dgr`w*f1r7#yjUZU;1>f%+l2M6rt9(?4Qrs43_>U|Ad`0TgaXS@bI4Xr0iz6HQ zm+D%$IQF>wZcR_;_mImRqBO|jBR(tNX_mnLv)E$+$CJWIm@aRB#gROAND9(O;!ren z4EA7eZK-OFC*{%FM)LT?^R=t>4Yz@Kzt5=Z;=%zji17Ip)9@h*miFn{W|`DCs?xID zQr}AcULLIPCY=*6)W1a*Z3l^u(Ct!|o%`b>k)c76-6}t}tiF$L?wk|jJ}UZHWJsNa zuT%sY`w;-2SzXm0WU?jW5WUe45{faA46!6z|D>-$RpGmGv(Hanm+$la9GtLbRry3m zV>>Cvjb&)xexD_0TltBmN)umDmHc9p=w#44=lj2I+D5KSG%R053J;^~UhLJ5&w=I* z*hgbBI59Ob&I0fok%4)8XLFC6rOZf?-Il|0=ekM3HkcWG!lx&2NUd>4uP~%*ORBS+|8) zm()e?zVQ|A8-4U zAJt<&&gsY(DI#Uz5ZZ>mEdR8<975q#`xa6mdp1y5TR7lJzx+>PKa z0ANzZv~M_>j1Q@yO5A(3h@DM8Y&%~cgDCoyaf{MCLZN8s5^->abIQsh;teMfTQ9av89ue z7Wpqbp8;cS>h6{w?rJLHDu0BGc;YzQKXmOY-wekm;R4DD;i|DdWZ5z(hD^9u*EaHU2qFbyWX?;UX>z zeqY09q^5;@K1*s1W(;C-za)ks|AZVu)WrwnKvQkH2L?-3_Q%CUGR{JSlDIDvJ}3=Q zF?1vp;rS`s2`uas@|6IemW7D<*ggea`RK!Qk#|u-0icw0=aLr8a$#Pc->PQ}k1MOn z0?h9fmk5QEk@!#$Qy%$uO&@u}`OkLT%n0zpW)_uB6*XmxnkI{yGxp}O)c~do zik}$$_UN>`z&{m!OWg;wvSFOm9Dr4t^%ih z+@7gy``KNm?N{7=R~_yrcAVOg@%S&5kL|eP=qhBm`QZCqfckySY+MfXG^{K|eG7uY z(9jTE3#fG>7-S_tW&ebLrvjU>^$>ywC3k+!f7A$EK$ zKi7MYMSnVYZeY^VEdQbRF-ui}+jL^vOs)K#t)G<58V$zU&m0zGH{o{W;{D<&dEgF|g%7+hasVk|;wRBGqF$89&tr*j8H+$83!!g+&@L}+q< zn|RC0rRoqk!G0++xIft@u5X=3#4|j|;=@CXEiqIfg4G_CwGtWKNvOCgY-r`Lk%<_(9Fi4bw3ahT1Ul#V9n^gaZM37ED` zY?-6cbUb_z^ok#vqiO3~t&;v1>S+<~f&3=iY%XowmI0=XEQ-6}!l*%Ct_7eNyQ1-g zq>#gkMRNeP)H({iVLZLs4{rf}6JXl0+x=agyr5_S|+0J96`Mu7UVy#TZ#0KY>3Qk6H|_i#LR2po45Y?4R;+rRRCPrJ(A za}-S%)tvJ_-~3$j-n{~ws?F4H%+zdpOaFTJJAH5TWdfn>ZTlv-g);;Df3a=< zOT(F=Lzzv7Gj2BRslR5_J6+c-f>V6Y?a7p{&9wJjcK5&M7Bk+~%kH()o`!2?gVQx@ zw-wlC1)J6O9|>I9LmmE(m85EcJK^qq3;t$bhTQ^pk_(oukY90$`r|sQykcz`q)m;G z@W_9vtk^e~&3sFs#s@&)`-U_6hmA(zxIUS8gHe4F6FmOYww^soO^{8YDZliiX7tdf z1!}>5`GeKP4QT%_{qUVo8w+^tU(4&D&Uw5pjn{XxaSNzhi=K5JHw=yae_LPE|Gd7^XYI!gqvSxb{Gh)$ZP1Q3yv%w=b-{c%IA&ZYoFtVWr`v}_c^jBakcd%&BH8^39Mv{}U?WJEZszegA$b@{o{>Ht~? z4Yd3b~N(9J&V!BXLuJIJ7bL(uhrP}Mn9(WRB zq819Z7-X~cLd76axW7e=bT`DvHnG>|P+vfJT&n~P6bpk#vWRp$@3k;u{_=B9!#4kt1r5O^^B7AAP9~cxn7+kZpQiv-ECE@)^BeAD|i3cUTLa{u= zQSr9^iaQhvkr?Dr>EtPlrn3NeG`(70^@HlCsx!;le{%O^dGDBOx~TMwf83v`Sv^^_ zX3RWesJD8rir(iP&pD>V)~wiy<%o0Kd6s6~b&N{2l zj-KB=>0F(0teFu=t9{yD@WjSb8>j4*S$pN#`_JDuX>ZHe+M#cQV;ZWOzEgcuj;gGq z>TL9U$E3r5{&2?L0==u;IsK;GHCcDfxu)~OlkV0`!P*(Yu#U`F(t^%;!U>EnGE;0@ zY5<-YkKk*{EBJq`U;1klG=hS5&?&=paxR{7)MOnsncCHt!6-IiiFa)4W4F(2Ct%>~ z*E)&dF8_CdSO>|pqaw%Z4Q2L)Ga;I(m8Kj6S;xR1XY|PN#~D*jJpG@EK5&wUmTj#f z7uWSR2(Q$VUa#Smy6#)x(&bWYZZFnXP?4|P<)J|3;qGYX9zwApxC>6dOLRCwkz1Rb0>q05B&c+cJhi6!<~9O z)YK6_LkQ51*%t^PKU+|sVS&S|oL$&~Y02j&g?9~WW(=NU%ZVMoUn^MMr!AR^#`9Yz zEocW7NA%8=!cz8H_8=Rf0npS6s4ll)3YsPw{icVK>^&R?P~4 zKC5Uldes~fC-sT!bNTUY{dX|DgcTPS0mGrFUoEm2zKbwSEtLYTM?K96^}h%X?8hsf z;oYugLOga9%2nIT_F);VIC3u+{(Yf@#H(+3#*b4enqXLYq@$IGCw`Yaz?+gC-g*{Wko~*FvS3>=7gk|pwEx!^P ou9>zNNjEur7EelD3^y!=xEKlnWV4OHY2EdT%j delta 3194 zcmZ`*du&@*8NbJN?CZpK?B?M(X&NVKlis8;Zkk8)Xx>fJQn{hYwBnMwuFp;C?%Gbz zwfhpZnUt{(hG09I)J;`k|4{~`5X3{`(J|0AG9)TmMZ2Yh4T4mlEW9N^g6})mZ5qn_ z$M5_6o$q|-d!6sxXTL*ka^hKZy9*`wb-wlX@k4FTc`C@apKI*7KqN}2U2~=iqlJQZ zXhkVk)Rl5a-6>DhlPZoDr@T>b$`|#Cby?exDv6c=@1%uVX{sz*27D29Y2_(@)Ss$| zR*3&@Es&~=Ruah~?UATwUZTZnA@v5pg;%WTh$Ka;&RV^SkGlvZ7JicWT?#Sm6A7lO zX~oXJNm}`zNxINyYD<5)#7|hhK2<_X?;Eg4s*9FWKdo3N2|OaHq*Zj|I-P3jrM`8N zO|*sv`B`hRNXiheEqI-r<0}Qt3tR2KBikz=Y=)I&l>mlS zSF@}N|87L622gBSrl4lAmNq#K%ka%@soi`JBG#{ z!LBx0vImXr6&8~CSP$*4gR zcVz%&gBdgy(kMu1YMilYRKsR6`v{|CPw`mY?!>dG<%m-nW0}*6Q`DH9!1d4KiVR(* zCbDqcgUlPQ?Acj0Mw9xi7C+0*qnd?gb!?a5{T~x~dIm)2wun3h>s{nC3oZ3Ktc@ce zjVJ}Cm!k1r28r;@u;*qe6hpDHo&4#BCNtfiLNnpK83+GGgO|LIxLhZ+1NPb+@1oqtkk zbc!fG4U7D%omD(o?GH(e5;nYaz z2pPw|iGbVRLsc9QhMlRY%qchp3d(+fVk`l}1Y$~&7iG2M)k{(xUkWIhsFva9p|GEq zgj>n4`Dl0^PFV~)NVj>4{h{`G9qs&&b<3LdwQyXmL*#vap!IRM%&#^D`1RIrZ4ra) z^N(6sAM52EBjoS=-5q`4MX=39$Ti;5KKzk6*oP?=!8&L%u4QI)wg^%Pd3M$)n$79Q zW4d_a44anJv*vhv8Z=-whymZN*n*gk~) z2oZ#FgaZJOf5Uz(m(6C<=H$ALD>o3{KzIw`=Loj|uI96T8~C-XU+Etrwfx=w@4|^! z27XG8noZ_3oNzthdQtVvLswL^%n#Ax|LZ)PSyntiG&lPZ9%Tc-VPv*;9sk^JKd%`s z;`fGjkuYx_z6^mD;Pt()Un}(0km*|j8Zx*>16B?lSq z9^vxtuwTUch=m5Q(7&5rl_jTpNv^pe*W8k8Sq})ID|>-Nlx^%Hzc|(i zRlYV>b^$-i*v}Bev($;4nA&HMD*!O$p}mojy^%c=tOI4m2pBaxgzyA{Xx0Z4fZ-rB zhsAZ|evGhIGEu>N-7dh&5-uAEP5{F(&Qx88hhP~+;&CZFm^nR}nVG@Y=snbNPKeiH zP}A6N@V{pmUmsJLgm(EY3f-|-0{T!-sG1gw!B`j?}OsZC#eO-;o;sYR}n7-KA4spf7&>ZxZst`hNkYN&Aoh diff --git a/ingest_pipeline/cli/tui/screens/dashboard.py b/ingest_pipeline/cli/tui/screens/dashboard.py index b94a5c1..dacd32e 100644 --- a/ingest_pipeline/cli/tui/screens/dashboard.py +++ b/ingest_pipeline/cli/tui/screens/dashboard.py @@ -206,7 +206,11 @@ class CollectionOverviewScreen(Screen[None]): """Calculate basic metrics from collections.""" self.total_collections = len(self.collections) self.total_documents = sum(col["count"] for col in self.collections) - self.active_backends = sum([bool(self.weaviate), bool(self.openwebui), bool(self.r2r)]) + # Calculate active backends from storage manager if individual storages are None + if self.weaviate is None and self.openwebui is None and self.r2r is None: + self.active_backends = len(self.storage_manager.get_available_backends()) + else: + self.active_backends = sum([bool(self.weaviate), bool(self.openwebui), bool(self.r2r)]) def _update_metrics_cards(self) -> None: """Update the metrics cards display.""" @@ -353,75 +357,6 @@ class CollectionOverviewScreen(Screen[None]): self.is_loading = False loading_indicator.display = False - async def list_weaviate_collections(self) -> list[CollectionInfo]: - """List Weaviate collections with enhanced metadata.""" - if not self.weaviate: - return [] - - try: - overview = await self.weaviate.describe_collections() - collections: list[CollectionInfo] = [] - - for item in overview: - count_raw = item.get("count", 0) - count_val = int(count_raw) if isinstance(count_raw, (int, str)) else 0 - size_mb_raw = item.get("size_mb", 0.0) - size_mb_val = float(size_mb_raw) if isinstance(size_mb_raw, (int, float, str)) else 0.0 - collections.append( - CollectionInfo( - name=str(item.get("name", "Unknown")), - type="weaviate", - count=count_val, - backend="๐Ÿ—„๏ธ Weaviate", - status="โœ“ Active", - last_updated=datetime.now().strftime("%Y-%m-%d %H:%M"), - size_mb=size_mb_val, - ) - ) - - return collections - except Exception as e: - self.notify(f"Error listing Weaviate collections: {e}", severity="error", markup=False) - return [] - - async def list_openwebui_collections(self) -> list[CollectionInfo]: - """List OpenWebUI collections with enhanced metadata.""" - # Try to get OpenWebUI backend from storage manager if direct instance not available - openwebui_backend = self.openwebui - if not openwebui_backend: - backend = self.storage_manager.get_backend(StorageBackend.OPEN_WEBUI) - if not isinstance(backend, OpenWebUIStorage): - return [] - openwebui_backend = backend - if not openwebui_backend: - return [] - - try: - overview = await openwebui_backend.describe_collections() - collections: list[CollectionInfo] = [] - - for item in overview: - count_raw = item.get("count", 0) - count_val = int(count_raw) if isinstance(count_raw, (int, str)) else 0 - size_mb_raw = item.get("size_mb", 0.0) - size_mb_val = float(size_mb_raw) if isinstance(size_mb_raw, (int, float, str)) else 0.0 - collection_name = str(item.get("name", "Unknown")) - collections.append( - CollectionInfo( - name=collection_name, - type="openwebui", - count=count_val, - backend="๐ŸŒ OpenWebUI", - status="โœ“ Active", - last_updated=datetime.now().strftime("%Y-%m-%d %H:%M"), - size_mb=size_mb_val, - ) - ) - - return collections - except Exception as e: - self.notify(f"Error listing OpenWebUI collections: {e}", severity="error", markup=False) - return [] async def update_collections_table(self) -> None: """Update the collections table with enhanced formatting.""" diff --git a/ingest_pipeline/cli/tui/screens/dialogs.py b/ingest_pipeline/cli/tui/screens/dialogs.py index 7a33092..8bbcc64 100644 --- a/ingest_pipeline/cli/tui/screens/dialogs.py +++ b/ingest_pipeline/cli/tui/screens/dialogs.py @@ -3,7 +3,7 @@ from __future__ import annotations from pathlib import Path -from typing import TYPE_CHECKING, ClassVar +from typing import TYPE_CHECKING from textual.app import ComposeResult from textual.binding import Binding @@ -15,6 +15,7 @@ from typing_extensions import override from ..models import CollectionInfo if TYPE_CHECKING: + from ..app import CollectionManagementApp from .dashboard import CollectionOverviewScreen from .documents import DocumentManagementScreen @@ -25,7 +26,12 @@ class ConfirmDeleteScreen(Screen[None]): collection: CollectionInfo parent_screen: CollectionOverviewScreen - BINDINGS: list[Binding] = [ + @property + def app(self) -> CollectionManagementApp: # type: ignore[override] + """Return the typed app instance.""" + return super().app # type: ignore[return-value] + + BINDINGS = [ Binding("escape", "app.pop_screen", "Cancel"), Binding("y", "confirm_delete", "Yes"), Binding("n", "app.pop_screen", "No"), @@ -132,12 +138,16 @@ class ConfirmDeleteScreen(Screen[None]): return # Refresh parent screen after a short delay to ensure deletion is processed - self.call_later(lambda _: self.parent_screen.refresh_collections(), 0.5) # 500ms delay + self.call_later(self._refresh_parent_collections, 0.5) # 500ms delay self.app.pop_screen() except Exception as e: self.notify(f"Failed to delete collection: {e}", severity="error", markup=False) + def _refresh_parent_collections(self) -> None: + """Helper method to refresh parent collections.""" + self.parent_screen.refresh_collections() + class ConfirmDocumentDeleteScreen(Screen[None]): @@ -145,9 +155,14 @@ class ConfirmDocumentDeleteScreen(Screen[None]): doc_ids: list[str] collection: CollectionInfo - parent_screen: "DocumentManagementScreen" + parent_screen: DocumentManagementScreen - BINDINGS: list[Binding] = [ + @property + def app(self) -> CollectionManagementApp: # type: ignore[override] + """Return the typed app instance.""" + return super().app # type: ignore[return-value] + + BINDINGS = [ Binding("escape", "app.pop_screen", "Cancel"), Binding("y", "confirm_delete", "Yes"), Binding("n", "app.pop_screen", "No"), @@ -158,7 +173,7 @@ class ConfirmDocumentDeleteScreen(Screen[None]): self, doc_ids: list[str], collection: CollectionInfo, - parent_screen: "DocumentManagementScreen", + parent_screen: DocumentManagementScreen, ): super().__init__() self.doc_ids = doc_ids @@ -244,7 +259,12 @@ class LogViewerScreen(ModalScreen[None]): _log_widget: RichLog | None _log_file: Path | None - BINDINGS: list[Binding] = [ + @property + def app(self) -> CollectionManagementApp: # type: ignore[override] + """Return the typed app instance.""" + return super().app # type: ignore[return-value] + + BINDINGS = [ Binding("escape", "close", "Close"), Binding("ctrl+l", "close", "Close"), Binding("s", "show_path", "Log File"), @@ -272,13 +292,13 @@ class LogViewerScreen(ModalScreen[None]): self._log_widget = self.query_one(RichLog) if hasattr(self.app, 'attach_log_viewer'): - self.app.attach_log_viewer(self) + self.app.attach_log_viewer(self) # type: ignore[arg-type] def on_unmount(self) -> None: """Detach from the parent application when closed.""" if hasattr(self.app, 'detach_log_viewer'): - self.app.detach_log_viewer(self) + self.app.detach_log_viewer(self) # type: ignore[arg-type] def _get_log_widget(self) -> RichLog: if self._log_widget is None: diff --git a/ingest_pipeline/cli/tui/screens/documents.py b/ingest_pipeline/cli/tui/screens/documents.py index 989a710..4d04e3c 100644 --- a/ingest_pipeline/cli/tui/screens/documents.py +++ b/ingest_pipeline/cli/tui/screens/documents.py @@ -4,9 +4,9 @@ from datetime import datetime from textual.app import ComposeResult from textual.binding import Binding -from textual.containers import Container, Horizontal -from textual.screen import Screen -from textual.widgets import Button, Footer, Header, Label, LoadingIndicator, Static +from textual.containers import Container, Horizontal, ScrollableContainer +from textual.screen import ModalScreen, Screen +from textual.widgets import Button, Footer, Header, Label, LoadingIndicator, Markdown, Static from typing_extensions import override from ....storage.base import BaseStorage @@ -27,6 +27,7 @@ class DocumentManagementScreen(Screen[None]): BINDINGS = [ Binding("escape", "app.pop_screen", "Back"), Binding("r", "refresh", "Refresh"), + Binding("v", "view_document", "View"), Binding("delete", "delete_selected", "Delete Selected"), Binding("a", "select_all", "Select All"), Binding("ctrl+a", "select_all", "Select All"), @@ -324,3 +325,112 @@ class DocumentManagementScreen(Screen[None]): ) -> None: """Handle clear selection from enhanced table.""" self.action_select_none() + + def action_view_document(self) -> None: + """View the content of the currently selected document.""" + if doc := self.get_current_document(): + if self.storage: + self.app.push_screen(DocumentContentModal(doc, self.storage, self.collection["name"])) + else: + self.notify("No storage backend available", severity="error") + else: + self.notify("No document selected", severity="warning") + + +class DocumentContentModal(ModalScreen[None]): + """Modal screen for viewing document content.""" + + DEFAULT_CSS = """ + DocumentContentModal { + align: center middle; + } + + DocumentContentModal > Container { + width: 90%; + height: 85%; + background: $surface; + border: thick $primary; + } + + DocumentContentModal .modal-header { + background: $primary; + color: $text; + padding: 1; + dock: top; + height: 3; + } + + DocumentContentModal .modal-content { + padding: 1; + height: 1fr; + } + """ + + BINDINGS = [ + Binding("escape", "app.pop_screen", "Close"), + Binding("q", "app.pop_screen", "Close"), + ] + + def __init__(self, document: DocumentInfo, storage: BaseStorage, collection_name: str): + super().__init__() + self.document = document + self.storage = storage + self.collection_name = collection_name + + def compose(self) -> ComposeResult: + yield Container( + Static( + f"๐Ÿ“„ Document: {self.document['title'][:60]}{'...' if len(self.document['title']) > 60 else ''}", + classes="modal-header" + ), + ScrollableContainer( + Markdown("Loading document content...", id="document_content"), + LoadingIndicator(id="content_loading"), + classes="modal-content" + ) + ) + + async def on_mount(self) -> None: + """Load and display the document content.""" + content_widget = self.query_one("#document_content", Markdown) + loading = self.query_one("#content_loading") + + try: + # Get full document content + doc_content = await self.storage.retrieve( + self.document["id"], + collection_name=self.collection_name + ) + + # Format content for display + if isinstance(doc_content, str): + formatted_content = f"""# {self.document['title']} + +**Source:** {self.document.get('source_url', 'N/A')} +**Type:** {self.document.get('content_type', 'text/plain')} +**Words:** {self.document.get('word_count', 0):,} +**Timestamp:** {self.document.get('timestamp', 'N/A')} + +--- + +{doc_content} +""" + else: + formatted_content = f"""# {self.document['title']} + +**Source:** {self.document.get('source_url', 'N/A')} +**Type:** {self.document.get('content_type', 'text/plain')} +**Words:** {self.document.get('word_count', 0):,} +**Timestamp:** {self.document.get('timestamp', 'N/A')} + +--- + +*Content format not supported for display* +""" + + content_widget.update(formatted_content) + + except Exception as e: + content_widget.update(f"# Error Loading Document\n\nFailed to load document content: {e}") + finally: + loading.display = False diff --git a/ingest_pipeline/cli/tui/styles.py b/ingest_pipeline/cli/tui/styles.py index 1c7398f..0095d1e 100644 --- a/ingest_pipeline/cli/tui/styles.py +++ b/ingest_pipeline/cli/tui/styles.py @@ -2,7 +2,20 @@ from dataclasses import dataclass from enum import Enum -from typing import Any +from typing import Protocol + +from textual.app import App + +# Type alias for Textual apps with unknown return type +TextualApp = App[object] + + +class AppProtocol(Protocol): + """Protocol for apps that support CSS and refresh.""" + + def refresh(self) -> None: + """Refresh the app.""" + ... class ThemeType(Enum): @@ -181,8 +194,8 @@ class ThemeManager: """Manages theme selection and CSS generation.""" def __init__(self, default_theme: ThemeType = ThemeType.DARK): - self.current_theme = default_theme - self._themes = { + self.current_theme: ThemeType = default_theme + self._themes: dict[ThemeType, ColorPalette] = { ThemeType.DARK: ThemeRegistry.get_enhanced_dark(), ThemeType.LIGHT: ThemeRegistry.get_light(), ThemeType.HIGH_CONTRAST: ThemeRegistry.get_high_contrast(), @@ -1106,18 +1119,16 @@ def get_css_for_theme(theme_type: ThemeType) -> str: return css -def apply_theme_to_app(app: object, theme_type: ThemeType) -> None: +def apply_theme_to_app(app: TextualApp | AppProtocol, theme_type: ThemeType) -> None: """Apply a theme to a Textual app instance.""" try: css = set_theme(theme_type) - if hasattr(app, "stylesheet"): - app.stylesheet.clear() - app.stylesheet.parse(css) - elif hasattr(app, "CSS"): + # Set CSS using the standard Textual approach + if hasattr(app, "CSS") or isinstance(app, App): setattr(app, "CSS", css) - elif hasattr(app, "refresh"): - # Fallback: try to refresh the app with new CSS - app.refresh() + # Refresh the app to apply new CSS + if hasattr(app, "refresh"): + app.refresh() except Exception as e: # Graceful fallback - log but don't crash the UI import logging @@ -1127,9 +1138,9 @@ def apply_theme_to_app(app: object, theme_type: ThemeType) -> None: class ThemeSwitcher: """Helper class for managing theme switching in TUI applications.""" - def __init__(self, app: object | None = None) -> None: - self.app = app - self.theme_history = [ThemeType.DARK] + def __init__(self, app: TextualApp | AppProtocol | None = None) -> None: + self.app: TextualApp | AppProtocol | None = app + self.theme_history: list[ThemeType] = [ThemeType.DARK] def switch_theme(self, theme_type: ThemeType) -> str: """Switch to a new theme and apply it to the app if available.""" @@ -1157,7 +1168,7 @@ class ThemeSwitcher: next_theme = themes[(current_index + 1) % len(themes)] return self.switch_theme(next_theme) - def get_theme_info(self) -> dict[str, Any]: + def get_theme_info(self) -> dict[str, str | list[str] | dict[str, str]]: """Get information about the current theme.""" palette = get_theme_palette() return { diff --git a/ingest_pipeline/cli/tui/utils/__pycache__/runners.cpython-312.pyc b/ingest_pipeline/cli/tui/utils/__pycache__/runners.cpython-312.pyc index 91f9b8119d79f0abcfe3cd94f6799bd54ebb0be6..cd7f620376e698eba173fe724098816d5cc8eb5a 100644 GIT binary patch delta 613 zcmdmJwo-%dG%qg~0}$xSoy~Z}w~=oGH`6Mn$qTqoO#a6$ugTllHHu4#~p}e zlqV+&C}7tMH20N&z9f>ia)t`#NQQF8wan#=n%t9(1Q$3|+4&@v<|Suj=A|oygnBCI zDioLG7bT{rDkLQ)XQ$?+6f2Zx=Hw`3=4F;-Cgx;Tr2?5?x%8s^(!7+(tA!O9{U)Cl z&SK=7Y$l@0#++DCP~fPj0yul%UgM$ZxCSMRwVdR`_Ct(%n2h`W(r0FWm zct}Usg`44!J);Xd%V7ot?4Sje8hy& z#hl}asWgy%RDju4c=9!gX^j4pb0y_igMnrjPM#oX#TYvIw4@Sa;N*9b#X#>RN$IG7 v+*<@vP$UH+q(OuXkod)6lbfGXnv-f*6gzp7Q~|#{qa))N7e*#VWw0UuZj+3h delta 2160 zcmah~Yi!e26u#GS66gJDo0hcHJW3mALuspa6#7WoGSWbbcvS;R_GB#@Ah_G@T=cuZ5W6eU;;?FXColWic4{g`&0#-(V|xKhqN_nhxM ze9pNyUpC$Dv_G=h%m6Ll}@rB`o2PDcmDRabwlTAdY{i1K=8V zUH7S84>CGQmvCun_aGQM2*4Q(fDE3|OL~5b{Ewwxub1$ITLpQMMqLnG@r%S74?v<) z{#{>zYarJf&@c{AcyG!7Gn{Z4B!keu!LSAydZ1C>Z1qDzrE=J2!RB!_YK>X+X%QsD z3S8skmPDOublUm}kPL%hr3}&ZE9m3Y1wq^vx3i9EXU16K9jt5Gogtu_P~#eABNdyp zQ(~u@?yhpYjOm^D=C|K*}Wa3~P&saC;&6070-dxh# z;?B5>t(o4Gu|t3X$u41%RkBGI2_`ya%HYw##Cz)A2ov2}q&NFb_lj}tlyR7Vw6n8Yip^icmtTnyTCGb1$2Qc!{}GP4XC~Re-{S(?wSoQ z=DkU z@pRKbGR4M8E-7kuq?jZrZk`mxBu`D!|l zdix)2cL8oUnCwRGcP9+WBvqkgF9XDK#C`!NLzv?@j|UDZ2D^0jFD+q8d@ z=IDzvd~bRypW?*U5F@Ele9Qa&7yWw6D+*}Nb`J>S9C1RDR?5uaEhzk@<0;x zW{#dnG5oOV!KumT%H8851FGDjdM*d%ZK{MA9SWWd2agXa=8+>q(ecrt(2203Lk(8U zjKFY$NO3V*(NQxq{9*YQhaWm+hqHM>v6tP9BZQa|c=Wsi#aNzS#r@y(OPTSJQhHii zS}uxf{R7rT7QT&NKMkD#BB1#}jbC5#u(Q^39?3>jWiKXQa2{GH z`gRt4p`0(2_YD>*2N$C+44`gTp|&+w+nTR!`vc(m5X=$PmyOHbCl>cptNUtXDUz*j zzcqSy?Dp7W>%em)iY`U7zCE|P?)Kd7d29{7FsW8AtYDMBP}7&I>C4vyvv}1LOWjk# ze$~2U%~tlU68-3~aMiM8$yN-k5`(I$bO@~y14Y9A95;U!`6yB(0#6A`*19=Q)E5cs zRnw9Q4PSP5aMcyc6QP$zTyJ<~2l}p`@Q$CoI||;moVV>xZ=roS*FK#0MzVNCvAVHP z-JYv%zY{HV9L{wd&R0jXcxBPOr7+@d$+=r@a)m&DF3_KM2NxqR4Pbj98|eMkvAXR@ z&QYH|d1~?C(}vyI)?jwv*lNS^oO4U|)Y--GD+AC6o|v~332%{b77168@W{`caWi`D ztA^m~7iXY6;Oahm8kPG}up^>}_c~#?6TjDWU>iEz_j$rS;C=vx_u}_=<0{|Ygyau= zFuV_cQ0r6qy%5PC?1L)bK5RHe9F^oD48bZE1o(yFMs8karD9O(p IxUJ;uU$u=taR2}S diff --git a/ingest_pipeline/cli/tui/utils/__pycache__/storage_manager.cpython-312.pyc b/ingest_pipeline/cli/tui/utils/__pycache__/storage_manager.cpython-312.pyc index eeb7c6ff388462b0f7984cf42db24a109a5af019..e62a89ebc709b88272f4f480e22dbef87bb5650c 100644 GIT binary patch delta 5511 zcmbtY4Qx}_7525Cf8xaXcjEjz2@vcMCxp@wl7bz`ACS-_{KX_Vj{Oo0es;L;1xOYa zrv3~heuQpSs%;hhrKNPzkf*NgI!(0eXuEbwC@t(s6?9Uic3W4fYE#>7+RnMpPJp%3 zq&>lx^Ugi@-gC}9_nhPR{;TB4*GcAYZ8ob8o&w)HyI<)$mFXb=I8{?SBbohXq6?F- zF=~<0{ArTaZ{_c%Xu6c)&j8vSwnS}GrazO@Y0)ex+n+7v_;aLOe=hK>;q+*pl<&`% z?0!3!$%qz6h5kZL+oBGs$X~?i%xJMx;xFNJR+D>vrgxCa&8`Q^Ton>ZYAg1fm?81pOmbcG`I?xgKQ`@sF^-_G8XC( zV@qNr)gZYs*V;M^nWli$NONJynLflxi;( zAR?y&e60KOup5Km?4+VVoztr(@YMv3sX|XY zCY}?xK3`j9g*vC5$f6CnkRcLN&gp3b+hD4!b#V@eP&cl27eX!ajHA(bP~l6^707L1 z51aZMQFXc)f$zQyDGvgk#~1Kl$IYe=q!5Z&iFwrI<*$h!nCJAQj15>fnYfoXvGdkS z;$iPt!?wB=M}%XU*A~}4>wA2Ftw}E>UN&5q&*JIPdh`k$b{DRK8i4xzKzo6{_2Bv7 zR5|;pHJ_DdSjk%E%IFfRKq_$}|82bqsZR(j`}F#ALJ!5)E$uG7YLp@VL^ZALU`QNS zBJr3@pt!bb5D$j94^&Gy9-5HEm?8@>;B+274B@goyN`4w?$0hH=_pQf5gORpoW|@8 zNcsU(LsX2>F7|QGQqswCa!Xe1M6R$W9*I#50#}x1%n;7NP*$zuG$IA*WFQizyO7(< z0=auNm-*RjZaG<(_$+s?!0n;i!Je~A25=?4n$9f=j0Ba?Zi*WvEun+BPBmNRaPW21 zVs_F|%)*W~vX4FI7|IFY_%Og08bor0If`~iLqNK0lpAvp*flgQvErZ1Q%(z(WU~ewE-sgrXY^*qzE%lDla^MbW;0X=BX*r^znf9#YRk>mlKhi3et#jv>SoPtqmyExIY+$ zoZgSz7Jy`?*Ia&_y}Z<=RRs*~DHnLs&J3=6a*a7$-4-uS5NHpRTqQG@Vd-*&dSsg@ zzttJYz@ts3QlwUE<9H>eGNTb$*||QMVTSG$y;~FI0fN)+7*~ancsxoQVPp?oiR0fv zXaY#xel0N}mv5WJt)%xLV-3PugiQz)2z~^9-+5ePVBDGSBW$1BUW^{&#nY>uQY%U# z)UwCiFKKDNH4&(No)DR3)K%@`Y5QTEr3XkBNxt(`VLv3{;F#DQi~&+lAP|W~ltADk zly(9f*4?ObLy@puYrcp7>F7ElT(J~gtExF7yk{w*UUsam2u|f}-3Iam`@C*ipcRUZ z<#;OC%cXg&)NUz=k+42&cwRVVV^b>B6DE}c=_);l)=n>%PRqWEAkA2;=ui~U_S+8 zoj-dDn4!LosSWd+m9pg?7ckres!e^KA!!f1eSb$DE3=!w-J&vRmvFz*afKD}Sa9VN=)`At%Tn9NHn{A(Mg;!Jj=#aP?kn5iKZjpDwKbu0f}J zSby5^pwXy1WWHq`1lR!WorR1jYc(NBpBK&#=-$*a$Ka{?8B9LcAjT3PGuti<{&KfX?JfXHxmPQ zw~(10)f^T_gA-8&uGev{3M?@W^;}WS!SP67uQFA zQ}BnPu393>MCxgPsT|Z}XKPRvlb2zL_iXeO47v&_UL2L>ugY7-r|=aXMPBv+F}Obx zRK!4P|1@B0_1MmOR@H{j;Cmj*X3$0&NGVJoM^-wrQZw_BW0;DJQ*`q!B_5Taj{vx= zTK1NZaTXzp(9J$r)kvOV*-cK2)~5B&KLcBChoJeAce*z`4M)s>6*AMxZdU0wl5WCS zQ?0YD>(bE0p{s47%Wa__r#-glnYzd8u2y;Hs=QYmzS)SxHm@#ZA2y9R+c#X2E=pJ1 z#mnvDf}n4%%6G-lI=eRxf|Ap_{*vvY?W%X+vUgxXsCllc`HG`uHW1>gU0>a9_jO(> zzgRxoJ3Q-~oK+6a`6jP6PqL=w&f=CS^0kZ^^S%x4jc)Q*9oe{4c+1ng(anC;yuTa+ zO*KFTgv!Iy8J^~RI%D-M%gAyz*y1GK#G#gHGSddb3$=!J;42T-7m-?zZ~}ohPnbJh zW^G+A)B_AvkVWVrd8o}ttQeb+c!845AcA2E56{AC$;d<`8V*Php^*SPxTcw`WItQ; zRI;I#)*dV5HV6=>@@>B}Shly7w6T6~HOLa~bb4e}(IIvht2aC8&{_8Hz`vVhG## zfD|0pE?X}f>L|+NC73TwL^_M}-e5;ND#=N9zGEpEaiyb$^dxdRGldR*xA1aMykHcs z5B%bb9}sEEa=sWp6KgCei2**C>>pEw?J^tPw3HlUCpR4cznHrh>33jL1pUQA1T4e# z`Qh`Vl*|sj$mRX=4WxK0#k(a5U(rz60)U5p7#_$__igSY=h^$4YxNC{%y91oaTvz8 zfVKa(KWR&pjW*t>(& zF4$zldE9&aNHL|*tvESV_1X2lyT~gnzrP9m($nwNXH>!P7Wyno2H6j{*3a&jD@`-|Z5-o%;(osg zudH*MEB}2UWK5;#Xa0c24{Td)x=3I^V6SiciGC|d$lDG2MsEGPxR3_{cfud-@bjPR z%roa5PIz`;Dfu8VH&8)xFt6Tz7s<}Xl234(Y#4&i1SVZ}{?#n!T$Xdz;)GtOXXp9q z-{35~n)C^PR6@c7Tkv%0;0E#+Ha57p+KVy*uOhSnXsxy^M=pF1fIrY+SMIV8U^_YV zAAvKKdrMtPeo+7+G<(X?FTzKWSV-KNtXr*GT}_rRp4IS^L)lPiC2FS?zh5K8wda9( z3SLnW<{v7(hug)w|A|n7;6*4$cqj#b078Bg;C|ij1>YBf(YTaV1|H%^%cWe^ zyqQ<6fxzeld<_x<0cyb6BD)qSpP?q8RTH!sm_)I1P)sAB#AG-aQz9XX(XM8N;CqM% zK7MGQLu4%rr*p0pho7Y~&A_=fgiM@aO0u(&gJ-H{$XY+|8G&|&e;QG9>QX$gICemU zI;Hg_c_}_LBAz5o^Vx~vf^Ivm1^UEBBV!&}RN(vIn5>1b1NXw%()@F~&%U;_fJzfl kaScs}TLk}fa)^CCY`3nR)2+R(bAO~OXVze+;DwQY15gD5ivR!s delta 5279 zcmbtYdvui55#PJ->}KFLaTy9wcF|LJ$& zxAV=Nd+*HLnYr`j!C$f0o@Z&lb2w}|cs5>qaYx1aV`)zI(Xq<86HFImK~vZgv3jkM z6mJT=n}e3HEn@fDiMEDQBMz@4lIBf|q`0C`hv?LB zZY0l}N3UrjxAP411)9iz3C|j$)?M>pFD7jGJ}AOey8G0d>^r zyd}iV0&cdHGsP_>ZVqsBPwNwsWqS;yT)DXVqkAAFjPY*s+`gjeJLdzl9eqW(RWJ!0UrI}06p(rbIQgm zNg4)u6;QkZ!HqoAKse@C$T7SgxwYaK<_!&~G_OD)MXHgShk)Mv4F2mlxg!rLgnZF% z88EldYy1(*q@ERt{kAn`lIj#M+sfHI@wqKnUFbS!Je+pHS$cfei4-6mf3iAmvrJl@ z$8%1WoG3Y8P%~Lj^QyI$w}|2N9C435T#b%~6YRjnQ8BQXj;#ymsneuEQIlH1n#KCm zRm>H?FZB<4<`P5cKL#w!D7K@#YLIRS@N(hF$Y;f(H=~<%#LoatgFFLQsTD6}He{?s zax={cOHtk-GP0`J3elKVw4fKc#+|WHl&1jcPS@1gf-~@(RNF8QMf`k^FBIfkk=rG1 z%^K1I2$viy5J#K^%oESg9x|F;C~-Yl`@L4a323)owaXCAlCR&d1a@%T&TZtVJ@+Ee z9x1_CU?d_%6N`qq8rw4<)w&^l%@N+eK@^jtM`IZU-mhb|bqV zfjk>PDu^H<3?QsP7(_rN_zr~2O&&!CsbiluS>`t&oAlj@)DS`#K~$BOh-(XTwXoPN ze1)^wVEpF7KE`A^ugjb&+ZZ*mU+Lz=#8%48Agwz z8vmcsdvOUeIz4H$reToZghN*vE@qdPPa8chO0FufXcLacZ!hl!)5TwA-Pq`Yxe(=a zG?;mA!*tD3Qv5lRp? zBT%AK;A31tMF$~{$E=lf%x~4<^990wSq@0yu+Qga z@muGfVQiOJSTif1oPH+=%qYp7B2rVl{9RF&s_A6=#jg5XQCmBh71W1}(pbJ`WH0X49>+dn#VlwIWS}t41G;RqR&rCIC*XsI;bvjzpx;I6^VnEM$ih?zP zo@86b_2Vf~76fI-tr&=hOO3jq^7x(NKTGoMB#u_{`-H18&rJ7f zw(vC04c?F3OaL{NH*gAljQUAuz4&5jUru?|_e&or{Ylxm z^4@bjn=Zhu17+3&eZZJjArN^p6H67h%v;rD~hdLT3Dv%_rAhi9;h^(eUomYG# z!$H3y1^Ejw?#>`*sV2JGwU(bQSh}Kc1=SGo-$1ROY6-^%2Sd?8)iUbmQ6MzWQiJ$& z+zfrJ=J??7e1VwQy3$#MDywOMSU4;Nlu#_{i};7NjOr3cR~BSr>e1JR$|U|0zPuow zT3OES5O1!m0xQj{ma#SQ_El-d4m*xwZt(jMFnd%}I5sNrq-$s~GT0XNMHyu+FC6jI|A#i0Sa5tCAvz>QAB=bm9Kg0KbJ4 z&Z5sOt=&bFWh(#%%|3Xb{5*bLH#;S)>*wlSv&7ZwACOyv6G7HgbaeCQkaqxKH3G#E#+_U<=9Mye?gmi(MlrV4t658m)ru{Ywz zEoCed>+6g>77MpF6#Wf%!JZ7h?9IB2ob&0f$#mB_s|!9~_isI2@k^YANzA_kkSu0+ zUek{5>g!}5iF^Czlwv4CIA9aSa}cJ(0Y37@n|-tFmxYS3Z7ZCIMvXbATG&MU+NRx) zi7vJkWMJ!Baox7!a@o;hzW>0aWWSj}1$WT-n3FK<1YS1qgTuTFd(_q!8YL z|1y@En{}@nJ)ap(rdgtA`<)deiUMvIHQuVt=NlMNM&Q%k#|^;e0b%!-PN-&R60lO5 zk%w1_njU};{95>1gfxs5QPyg!ow$)0d>+c&hQj2CRGeW>u+xx@$EKzxdY;;T_;8`W zU8tE!78n;DmEewo-acTQyCjEv;)sN~-rg8I+1kqyd32SS6jvZN@WrmO{D9&Xf% zK#rK{oJ-bapwCEHYT;JM0r;nr`v5*MFvecjE&1H6V-@e}(%#lBd|OxjSDonXUtvUl F{TB+zu;c&$ diff --git a/ingest_pipeline/cli/tui/utils/runners.py b/ingest_pipeline/cli/tui/utils/runners.py index 63d9400..fc629ea 100644 --- a/ingest_pipeline/cli/tui/utils/runners.py +++ b/ingest_pipeline/cli/tui/utils/runners.py @@ -86,49 +86,18 @@ async def run_textual_tui() -> None: LOGGER.info("Initializing collection management TUI") LOGGER.info("Scanning available storage backends") - # Initialize storage manager + # Create storage manager without initialization - let TUI handle it asynchronously storage_manager = StorageManager(settings) - backend_status = await storage_manager.initialize_all_backends() - # Report initialization results - for backend, success in backend_status.items(): - if success: - LOGGER.info("%s connected successfully", backend.value) - else: - LOGGER.warning("%s connection failed", backend.value) - - available_backends = storage_manager.get_available_backends() - if not available_backends: - LOGGER.error("Could not connect to any storage backend") - LOGGER.info("Please check your configuration and try again") - LOGGER.info("Supported backends: Weaviate, OpenWebUI, R2R") - return - - LOGGER.info( - "Launching TUI with %d backend(s): %s", - len(available_backends), - ", ".join(backend.value for backend in available_backends), - ) - - # Get individual storage instances for backward compatibility - from ....storage.openwebui import OpenWebUIStorage - from ....storage.weaviate import WeaviateStorage - - weaviate_backend = storage_manager.get_backend(StorageBackend.WEAVIATE) - openwebui_backend = storage_manager.get_backend(StorageBackend.OPEN_WEBUI) - r2r_backend = storage_manager.get_backend(StorageBackend.R2R) - - # Type-safe casting to specific storage types - weaviate = weaviate_backend if isinstance(weaviate_backend, WeaviateStorage) else None - openwebui = openwebui_backend if isinstance(openwebui_backend, OpenWebUIStorage) else None + LOGGER.info("Launching TUI - storage backends will initialize in background") # Import here to avoid circular import from ..app import CollectionManagementApp app = CollectionManagementApp( storage_manager, - weaviate, - openwebui, - r2r_backend, + None, # weaviate - will be available after initialization + None, # openwebui - will be available after initialization + None, # r2r_backend - will be available after initialization log_queue=logging_context.queue, log_formatter=logging_context.formatter, log_file=logging_context.log_file, diff --git a/ingest_pipeline/cli/tui/utils/storage_manager.py b/ingest_pipeline/cli/tui/utils/storage_manager.py index 28ece27..42313e2 100644 --- a/ingest_pipeline/cli/tui/utils/storage_manager.py +++ b/ingest_pipeline/cli/tui/utils/storage_manager.py @@ -4,9 +4,11 @@ from __future__ import annotations import asyncio -from collections.abc import AsyncGenerator, Sequence +from collections.abc import AsyncGenerator, Coroutine, Sequence from typing import TYPE_CHECKING, Protocol +from pydantic import SecretStr + from ....core.exceptions import StorageError from ....core.models import Document, StorageBackend, StorageConfig from ..models import CollectionInfo, StorageCapabilities @@ -54,8 +56,8 @@ class MultiStorageAdapter(BaseStorage): seen_ids.add(storage_id) unique.append(storage) - self._storages = unique - self._primary = unique[0] + self._storages: list[BaseStorage] = unique + self._primary: BaseStorage = unique[0] super().__init__(self._primary.config) async def initialize(self) -> None: @@ -226,10 +228,10 @@ class StorageManager: def __init__(self, settings: Settings) -> None: """Initialize storage manager with application settings.""" - self.settings = settings + self.settings: Settings = settings self.backends: dict[StorageBackend, BaseStorage] = {} self.capabilities: dict[StorageBackend, StorageCapabilities] = {} - self._initialized = False + self._initialized: bool = False async def initialize_all_backends(self) -> dict[StorageBackend, bool]: """Initialize all available storage backends with timeout protection.""" @@ -252,14 +254,14 @@ class StorageManager: return False # Initialize backends concurrently with timeout protection - tasks = [] + tasks: list[tuple[StorageBackend, Coroutine[None, None, bool]]] = [] # Try Weaviate if self.settings.weaviate_endpoint: config = StorageConfig( backend=StorageBackend.WEAVIATE, endpoint=self.settings.weaviate_endpoint, - api_key=self.settings.weaviate_api_key, + api_key=SecretStr(self.settings.weaviate_api_key) if self.settings.weaviate_api_key else None, collection_name="default", ) tasks.append((StorageBackend.WEAVIATE, init_backend(StorageBackend.WEAVIATE, config, WeaviateStorage))) @@ -271,7 +273,7 @@ class StorageManager: config = StorageConfig( backend=StorageBackend.OPEN_WEBUI, endpoint=self.settings.openwebui_endpoint, - api_key=self.settings.openwebui_api_key, + api_key=SecretStr(self.settings.openwebui_api_key) if self.settings.openwebui_api_key else None, collection_name="default", ) tasks.append((StorageBackend.OPEN_WEBUI, init_backend(StorageBackend.OPEN_WEBUI, config, OpenWebUIStorage))) @@ -283,7 +285,7 @@ class StorageManager: config = StorageConfig( backend=StorageBackend.R2R, endpoint=self.settings.r2r_endpoint, - api_key=self.settings.r2r_api_key, + api_key=SecretStr(self.settings.r2r_api_key) if self.settings.r2r_api_key else None, collection_name="default", ) tasks.append((StorageBackend.R2R, init_backend(StorageBackend.R2R, config, R2RStorage))) @@ -293,7 +295,7 @@ class StorageManager: # Execute initialization tasks concurrently if tasks: backend_types, task_coroutines = zip(*tasks, strict=False) - task_results = await asyncio.gather(*task_coroutines, return_exceptions=True) + task_results: Sequence[bool | BaseException] = await asyncio.gather(*task_coroutines, return_exceptions=True) for backend_type, task_result in zip(backend_types, task_results, strict=False): results[backend_type] = task_result if isinstance(task_result, bool) else False @@ -426,7 +428,7 @@ class StorageManager: storage = self.backends.get(backend_type) if storage: try: - documents = [] + documents: list[Document] = [] async for doc in storage.search(query, limit=limit): documents.append(doc) results[backend_type] = documents @@ -455,7 +457,7 @@ class StorageManager: for collection in collections: total_docs += await storage.count(collection_name=collection) - backend_status = { + backend_status: dict[str, str | int | bool | StorageCapabilities] = { "available": True, "collections": len(collections), "total_documents": total_docs, diff --git a/ingest_pipeline/config/__pycache__/settings.cpython-312.pyc b/ingest_pipeline/config/__pycache__/settings.cpython-312.pyc index f7519dbd84ad9754a73f75802a8db2f029a3e019..d4d2d4c2cf9c224449b9f1029133ad83b28504d9 100644 GIT binary patch delta 3061 zcmZuzZERat8NTQG`rFR6V>@>2GoFQG25hXF>kQ|8PxdBc#E_1R;5pU>VX^xYe3NM=# z=QuB0mW3Myl9MW#t(Dr^Ztw+CauqCH)YM*P>R!8b?0}$|`?A^TTr#J~nyEjXRMi)f zSks?OWs_-Ai!k4)Bd?;yykby}8z2$W;`%JZxM6B+n_LEZ#uH;2erbHI9)_W0GgOTi)suVMIAi$&=$;+ezOsbW9N;TC5nG}-zNx(ao`BE zi%AV-cKZeNOmV$f$*xDvKfBL@z0B~a6sHj#D@N4ExZ~8>enlJ?*nyUs=S_(rGAU%bHCtv{R??$m)7kOV z#IWWZPhn*YCuh^6$(hvXMP**Irqh{{P%%BDWRt0l_k?;>x8MQm*=c-HJv5QdGz1q@2tp$;bLxlpwQ)9@I{LX}E;yNP}?^9p8A}_;2KBr;Ud@0lFwGOp!XV zmE1AUSq}hL3lJfr79ScQbC&O*yrk}TnbY&LqxO6h07_-90Ff79gSHt6k<&naTVn-6bAbN z0$?IeD7n$H)Lf{&FCo-LYQ(Rj4)SC1)BbK)J_2wCfF+fsH$YsD zh=FBqfb=-xPMPXe1BDQK@L6)%F@)yG?;O9Wgw)UwPQel)j9bX6vn62%*#n@bpoycS z$-8D3DN51b3+VK})i(=E0KYE)(KLP^UXNkKLIrMt(ittLs zeO_p?tO#$LYVZj0`rZi671kJ}FZ^YIMG7kXrJ-#~oAi#aAH7LJ)e!@PH>M-^g6@qk zR(}n}>!_SCVOn<%&j6SJK1)Hfq|{Va%_XyA3cf^a0iU&OeR3$!LGKbdaMb*BYOKCN zfo#^gH+~zK*xm{~hqMiModYG9 z=)KYP6!~fRT%MIOfrb+zzDRXA4UhrI0xSSPvgnn3VM-ax71H-8CU6#mADSSisQQ~A zuL3;YV)zPh06(NZ^}iIB=>a11k&jrR@D&HDb85u#udLdi_vh%``JFC$qWKSm9D(SB{mu81?3%!7;nF z*r|ZVFE|QoKM}!yOg;32eL1c4;_p!{G_DGn+0`M0-r<_x=Yn^+u5GUCeXijy*RjQQ n+~s;78cy@by|Va_gZJ*|1dg}v@gDmVnI z!3CtbnxJCBgE5q1qG_TpZt=-PLqH_L9~F5Zk@!+552!KmoIAsGSSIN==brtXd(NF+ z-g17E>j$UPM&R?`=RfXzGBDw)p=T%Bf;mbQsz^yw$}F1&ZA@BHR@s`e$+nbTwhNvq z=}0+cXUZkJC^3*BqL{~tVo{w_W$2O+*{vEBtLly$6x+Bom1h#!BXsSc>zFd+nX*@C zdO_0(n$~-&QgW&e@ail|Tt$gXNOJC?$Ws)#g~(f!lour)A*m=x zyoy<=JXck$zZ^o}E2J8A6rWt7njwK2ajKOo)hamo#i>rNR((0O3NrR8^-9CJ#(R$9 zhFUNN#U`N`kZXilvpBWLwW_aR6;wj!!Vg)s3cWVjU!vFk2)zjC<%lv*u2buk`Gt@R zgmH)5Ao5-)PG-5Wq_RZ?UQ$EMt$5^t$B`6tt>PP>^n_2jZV9-$XO~X8D)p8-N`#ur=tyiFAa2 zQL!**2U2&WGK!iUiDlC9#12ufUZ!Q4?j9M5=3ksdCasN(JkSMZL1#XMx$E%b5niG~ z%1uZG0Q9m%I?F%?Pp`lZ9|>0a<+_^ zg13;$XCQ*F;kR0s@ngQH9v8=*m{2i4-2pQ+qN&+zBE3UnqaZwvi8=rdlRF;bsC=^< z5`x)is9}_@;U{XJ?Gz2n_YS?ywN7+xu0z-`7|>B(<8N|DsF+j@pLCpx0e?r}6wn&Z z(qZyzZTN%6qtaWh+Xg9MJ1Ska)UyqI!vAqoI`2h{*e;+ML#w&*-d4Ake#GCe3(^hK zU)PY2LQABzY&0EH*$!kFRIH{d>XcA?Lajg+8)<7zf?5{RjsTUIMs(9cBX?=~vUe&41Cx z3~J!`!BQ0K)9`m#EYAAiAiX$!Jm{ezp?L%~3k&};nqrbAo%k-a#d{f90|0-LyNAgi z-U=QHZ_eSy4M*s3#LZp+l0^}=Bg7EK5U^FcDYJc4jb-!An=3`E2!_`sB?01bGm00p*TTSQ(|Dk&e=I%lIPFfVtJw)cx(JT2mI9b5qm*7H9XCkRH4?r-TCb6= ei=^usS$)UUXP}iw58fg8J+qDY diff --git a/ingest_pipeline/config/settings.py b/ingest_pipeline/config/settings.py index 1ad4b1f..dd3fdeb 100644 --- a/ingest_pipeline/config/settings.py +++ b/ingest_pipeline/config/settings.py @@ -1,7 +1,7 @@ """Application settings and configuration.""" from functools import lru_cache -from typing import Annotated, ClassVar, Literal +from typing import Annotated, ClassVar, Final, Literal from prefect.variables import Variable from pydantic import Field, HttpUrl, model_validator @@ -20,6 +20,8 @@ class Settings(BaseSettings): # API Keys firecrawl_api_key: str | None = None + llm_api_key: str | None = None + openai_api_key: str | None = None openwebui_api_key: str | None = None weaviate_api_key: str | None = None r2r_api_key: str | None = None @@ -33,6 +35,7 @@ class Settings(BaseSettings): # Model Configuration embedding_model: str = "ollama/bge-m3:latest" + metadata_model: str = "fireworks/glm-4p5-air" embedding_dimension: int = 1024 # Ingestion Settings @@ -100,14 +103,20 @@ class Settings(BaseSettings): Returns: API key or None """ - service_map = { + service_map: Final[dict[str, str | None]] = { "firecrawl": self.firecrawl_api_key, "openwebui": self.openwebui_api_key, "weaviate": self.weaviate_api_key, "r2r": self.r2r_api_key, + "llm": self.get_llm_api_key(), + "openai": self.openai_api_key, } return service_map.get(service) + def get_llm_api_key(self) -> str | None: + """Get API key for LLM services with OpenAI fallback.""" + return self.llm_api_key or (self.openai_api_key or None) + @model_validator(mode="after") def validate_backend_configuration(self) -> "Settings": """Validate that required configuration is present for the default backend.""" diff --git a/ingest_pipeline/core/__pycache__/models.cpython-312.pyc b/ingest_pipeline/core/__pycache__/models.cpython-312.pyc index 719e24c0d6dcee75c29934b968bd204287b1d739..51f00b44b246327dc39c1d32b6b3ae17675eff9d 100644 GIT binary patch literal 11783 zcmd5iTWlLwc0+OaCdG%S_rtU$Tc&JLk*&m5VkfcnaO4LSNODpW?b0L8l|-Ab%nWUl z-Q6;E7PY$tDg$jJ1N~5oemICw7zMiM7K=70wrID-0@RW~sEZbHP%OGee^N*b6#Lb4 z?hIceWj9T0pakad%sKbox%bX_-}z^k%g({?M;Cs)_UFAE_iuPp{h2jnmuum;4>^&G zaH2tMNf?qXk(Q((Vld#nF=0&d5uV|E!jv>e%t=eclC(ywjAlyMlJ4-Sky*c4b zx*{%yTN15Fcf_6aL_A4v#GCX*d_cE~wnSUfAMpcjC+#aOqT>TT(gC;^a3|oMfOi4z z0=!$pTLJIU+PeYo)#x6;`!w7Oc)y1G03T@KE^%Vp2b}09?S%iBS63W4#OUon?`Wb2 z7`+qdT}|}EjNT3O9^(GksEu-jQG0>fM_e`PAfxsJb)bPd#HfdW8fc&%Wz@q!J<>ou z#;Aip9U|7+xWkNk6sX6@usFPRytbO-j1vUTNW<)cjCum7CmX0Ej5-R`Py_WuZY=mx zerPH!k$@;DLLiwINkR_XjVo&bS&`x~C7|Ro@ziSQIW9ifLXFp#rl=Jf6D6J`K?CJy zQrTqCNNpEVsk8!gky@t`f-JuyNYu8J%MfuI#(}Q<_3Lxf)RfJ}#R=$Wx|m4EZU=d4 zyC}%y8a|ntX5%CwQp;P4lDRG=sBMwNB%&-T(sPhi4thCPi4v8GqQInNY6fOjO2yPS zqe5(muWEM$z=xb-P{|zkv4-5j7P{Xe_mLqSY>~V`r*4v5CZY&Wh_Vg&h}*^HznC6d zODDIiN-f<-H>`R9x&5d+kR-_gG+9j?6Gafu_Ax$#Z0!XzF{H=UrLUrQ~Ejg z9Wdv5;{Z$;7uh`N(QU_Qksp0AllE2L_OhVF)}nGePxNzctIfHg%^*y(XWvv$ZaYpE z`I9eZs@>J8#?pxdh&60)G$WCf_&xoM>oVNXG$ka7bPT3Euy5L@w;iX8{OK1nZFf?* z7mZ26-307oMp^sv$?6*hVWLOil5nG6_~bHR9ptYvIkn+Qm=o^i3!Iz_H)JKAAP0(N zM@=l7n~lMw(6%-!(eSooxX2Ih-^Hr^-8~L{KI8}&F+j-CA{rt_u_eNb#)wJeBWBSQ zv54k~RkTEGIcv~H-E+*2f)Fl(Tgl4#UiCkaUI_@PfNEud8@QET4pB=6EECYEBaT9w zUX_R}Q(G*Z%p?dpB}+z-l(>+f<`n@}ECNqR#Rv$x7!KO0H5x_vjYg?GidH`hn-ZnY zX!O0TkkB}mXjDwcfNXhtCOkbCzC;~!;pp27^OqK878j{)YW~{WS7(-HrlG-wg{8R* zSE+gS!rWCL*rqOor)I7KsuKWd9A7bl5N;`I{Ri9*fAm>vXW>%Ccc^fs;^{5SRoJbo zy)X;6@6WMaX1$s`LT#)2V3;r8>YOQPrmc%g8oU;{D8z2V*5!LI+<|=&mJp^N(u)p(ph+#G>PC<1g`<0)^}$vyfb%UX@=V8 zVYj01&Ro1cM~w@o7W5l1g?$j<2r@b+bpzbpg|{l+{=(&o-TnTQx?*hhwO8CAQ0c4` zBl%v8PGpp)x-iL%1xq%<8mV0(nY0{-{m4-ly!$M89S}y4x-VO|5f)1A3o~!eFU~E^ zFWjK6>G`Sa*Ji>?7nbJc!}`TvfdxqjF2L2Wc)PnWr7qq-`2IC@@z|_s@wGtykAYKv z-Pa*}j~H7x!V}Y0%g35fM9joOtfKK8v}HV71J6zzz~gH?X9LegT7hS(@!SnO5Al*# z98ENfmM!bYT4c4-f}srbvSF{BJ?Nm$cfg^frKxmkCBB*usQ+1s?PXvkEd}l{emu`8 zp^psIUG+vQ0`|;N6N{m!uO02uZDxZgXC3m zK{pnFN$&tgjS&4(cQguN9tO4W!sTdG!k9;T7XgN8QUJlP0iZ^Vo(A#uHxS^9(ZA_M zq#>AtOU6OI1A0F4xwoS@{6^XPW?|;R=@S3u&XM8b*~Rjar6S+4VJY!TJMBHi6PL>E zZx!Yq$R+-*oipc(*KU^22!+e*wvEBZ*(Zi4V)@YNV$bPq&qSFQRAIp<9V{$A1CCx; z&=3<7F^k5$h2z-Ev#8<#X4$%rW5(BthI${ziX37m7WLJGSXo57COS^hxaFvcBZ!sy zG-%7KVcsI&s`VX>F}ZGTAt$n{`ODSkfuYeoC;**OYM3R1 zM-v0pw1(+J)!Ni;6L=$$$T2CNL3yMW%{)_URSiICESl0#i;#&&Zf&is9*nXg5pb$wQ6rA)81W^Pc0)k))0aFI1K)3MLjetciw-M8+#Z9C&CmPZ+ z(hw}bB|idyx^eQ^=yZ|yudi*$n@f-L+Z`h%e!4PxUQat;31$md*N<7i}MKTlSiUIEa&&HEnfhdSWN8h8C^FO+2ipQ*>>$?$Z-@t(Ui< zm#?9h=YU?`YA?~Z)mEDgXe8*Xzs75C;I%Ve2l7BGJE?m%EtqwaV0!O&4$K&t*!`BoL@rM!0sd{szT0D6FlvEOJ3(m|By9bX!c{O;LVL zNhVY!Xx4;ZSkEPdrbeA(4I^P-??aUd@tF`>81rPsWucX$^0@xQo%G01ofx_{%+^ zVtZ)YF;?P*oq@q8)8&EjBHzBQl=$(TLqkulqpt?v&X=`(m@TA9hERU+xBCph{|}=A z<=IBV<7zNGuByV5)&>&-W~ae<+eP=5r)DU;#0NZPM8FvMsA~bD>}33&D(F~`#2~RT zIMf={4FNdmDy^xtrs%C0x5J1{YCcnK32KgfHLkCZ33{!7F-fV;7}MeaB$6@6BT?zA z&iPlW{3-$VlD`v}iPw ziwUtc$cd3;2U2J?t^j_J6_bA1#8I_t6ThKNoO;MT)y!1qEMYoawM&r8XCP3D!Xx9W zDJZ7~+rF*|D!{PPZzA|@1X#;pkCJX8b|1kv5$ImUw~>Y*1(*Cs0MO!uzIud`(OI0# z=SPNjx|a5gErVt>P~yhvM{zijF`56<%ncsj>AG<+tLIR0Y^K~jTe!9k>B4N2g(>sn zYVeN}ZF=doO!2>f^I|QEXp3Mr>4c!eCE7*DmUCaw!7PT@8fmSybBneE+Icj~;u5`E zzMAz=ElXRCXKdi1WigXBHUF%o7c3+JOVdtmP=8Pr>7W!8n00Fq%6sv?YBN|^qFEO^ z)IFLki#BE!NBcGizo?4JK45G2%S>QbrcJ48wdnOW7LcajtqbfRo18}J(JR<@uq}e$f=m80 z0H|Oi&qm+WyYDwEqmz2tWF`15oq~Pq+2r|Jifnw!mk+&M?0I?H^Gb<7UwQRB=+ETF z;-4ISGXB(GKJrSj|CQ~wGr-yz3O+qr9y(j(dpDL!{Mnte=RXn4XRomP&B4t(nW!gDQ z&`P_ewer(7q6m0cw?N*@LO3htk5{n(1jCFKSixg7ndOl}pqfdBs9mjYMYDLchUa&( zA|x}^8B?P@me^AJ-Lxb^0GLfFC~JbG;^Cn6K-s8gg*+@tHKRJ?`TNK~@OyB{0RZ@J zPgOi^_vb6_&Jy2Q@nNE-5G-lu&CzNUdg}a81oZR#{6+)U4(u)CK{aT9={TH6pO{+qF2Pi&AV2%D378 zYW73jYk~}d4WSOJ>B!WMgm@CqXs8WwIJ>0f6C`@RL`q{aL00cOhu5E)Sp3I~r1rIZ{(OnX`6DN10pxtn8LS1ke4`CA#qY4p{ zLC3F9V-_6Ugcq%vZq#aOT1ljZeKs4zAPK=A!X^I)0MzS?Uzj{3YIBaf8{wCQ#w&f$@>l*1I7-dZWZ2L3;m&^5~l-ez4NjzsWy#miQr{dfL{P z9_De7_O6YIN0AbLs6Mv_tOxr*p$YqR_Q9wL7!@Y5;eQ0{>DRC_->;e|T>UJ5%%DJ- zIB~GC`U_~IcLER{fq`kLq&HOOiLF|TheB#YmXAqj!t2I3M(jslnuf&SL~idtiA ziNhhA=1HK|p}C^48i|5F5%`zdN%JqO%VRA?`W^xs0#<`$euqim_mRe= z7I#|uYXnyiuo~nK5bHpIA5gFl8>GKP48afKl5YY)!5#kG?ccm!cAqF*e889Z6Pkn$ zl=y+2!^fT|<--$&*$30>?GLYP3~f7sKC#nt=m}r$8HEDC%j**l&z1PmiW4i3I~~2n zlhfsn83;j)Wqw9|I6m@V85uahp?zf;acB}yKcjKp>VZe;ep}|6^t6KXz)35bb0#rX z$nNVWMHpdgXE%&%Z{m6xw_0xGMQ6jfKH?Fb+H9(If#w!%#8GQeE$#DA9jMO@KiRU1 zs=Mp}Osx|*p-$kOtDXp7PA}*C8f8*{9?V3Ni41jUK@V2#rFB@bCXuRj-1j<+&|-H2 zK;x%F2YoW6pk-vg>b5R&?Aw};j1n`N##H*Qgv05@kEO@$jf<(J^`AoXT{nON_k*s# z?fZV;4`Zdt%LP?trz27ASR>eZpr2yd#Xq{s95Yn|_4YKOwk@UML@3W0sB|k*X@CipWF0xle z+p2;g<98fbeuKEmQzvSlZ+JwlKnTezcpXtzy-;hDsLA1=>p(H8`w=E6H(`e)<~^8v zF=z4HM~;7Rt;gest@$do7WRi$J?)C`6T9Il#SW5rLhvKIIqrRon+9cUYV zsRc1|NR4Mh7<)<%IXY=ulw(9TdZY#vlw*_V&%a`Ba_N8L9O3m_Oo z!2D?tF;=M`LyT=7Qyr#&?38;M8A$}+Mu6!tOaEA`s5)O1RT=s)+hHk{NM9A@NQ<0!~eZLhhbu!;LXHdU#sC1GT?9T zh~042ux{Gp@PDtzZ18X7Z~*_F@1$X%*g5tz`*RMky>A$7hFQY{VB-H?mlK{0kAlCw R0juHMdi>`wzjI8w{u@2jPTv3k delta 4134 zcmb7HZERE58NS!w_Vst{IQ~ci2~lt$kN`;=NEsMQM;~c9m!^M#bl8Zc3pS!LCF#kSxkRgQr+<09PI3&b+@U5KeBIGf^XWg z?f^cuAOsOY?Ax~X;qBPuah=%essRQUd;r&kWkc7BD_B0hX^A_T zU;5hTlYdj%(U`m!lOwcmPHuCRk7~(%oo}4hlJ~wD3W~ScCsLq9bo2WAB6NXI+(=E- zObo-&)(O-AUkfqTymfI_YJoUYEsmJ$d^Re9&w@T;joYb%N|=k-h%|4%ufuT!VN!=x zf;g)K181C37rSZiOV0XoijvGE_fDi~Z)X2N1!yP~?}=42p_9pJrC2p)a)c_g&Z%NC ze{k4Jb!$#??k0zY{Afc%e-h9TlRiog< zUGXldwC`UC?fl$%3VylaAP$&2#PMkgNqHgK|;%vl0ob#>*;w;2H47r3EbJW9Zt})lFvrx>b zNnkCN%TCJdhAYPY?(&vm>L7$w&B?r+xJ+k&j$CRQ2q_k-t`u-hQ$?UAkxgc(dI%y_ z6o`s*P+vee0uc3X#aGpxm{4-5%ZcJlp7I2nEd^ydT`k!Ygcee&oX26b1^6Q7~gVGLme;Rpif`Y5&<2p>foLLYn!;{Ztb?v=ov1%q!X(2dxXD4$w!gqgv!ZO`3s zB|K0*Gk0#$FrX3q1^kX8zkk@oFR)-H#%sd7X`M|AFm2;YNFrv=n1wGjCy%uPuTleJ z60!3*2QUjvuf;jHh;v~a_=n8IJ?~i`668Q7;$3r^NmHHA%Y8od&G@6u?2b3@opn#j zDov@$D~hIxH0y0T#Kr>=_T8q4o7c3iuPSBH$i+Pgm8YsrnUYR;`)jRbV_{oYK^UGpZUDIU^U(tAlKxZ&xW6 zZQ4ps>PZMyPa&K};M%x|-3^LdC~CUk(s>2l8{&8sA7A5v6SFU^#z49AMY|+qLuN49m3!#kc-% zeQ>_vCQzp#d!2@AJA#=j2TKc|IPM5*y5apDpq82sROAMylXLEhy4icpr`)qnmF9C9 z`D%@EU*H%pe%8LwXq%4%MKLKWG*OUe>G`ThrG-3fJc+bil+)RqN@G#am(@f` zr7)g4jxd72HIc^F6awEcWNal78s0gDHUtz};W7Z~V9(~QBVIoBaYy%Z%lTEkuG0k4 zm?O9L-;_TUbVk#E*oVRP&O0-eV1N1a9Golt%Wa)^zglVQUoiORiWNh@c1)yj8+n8> z@zstAVvNHj$`s>Vqrm;Bsoy^fIx1(xqv z)rNVUUF!TO0#`WmI{j{b+u*0lj4A41qdSg?gY0`dLM8PCCbw;z@6T9Xy#7`k?S}{! z1YQAN3qFmv&^C_neT26W@RFNMsUWbNHAcij_X_w*_+ls=ogqUzDrFyuM*c6TWB z5Imo{0LsFzT0V;WGV;6B;?P(bH?zg=*FrzVj(4BcTY7|^pCNRx{LYJ|t$2K9sdzE? zZH5=kjcLLN?;_ks;CgxwTkj+M9N`xTe6Q-lR{dK+GW8uaBD~MeMt`J9;2kU*bN1X5 z-(`<`F2uK*<_3j)B76W&9H0LydwNow^0`Uro9+~w{?Dl(@y3TKJvxP zR~Q!_*Ag!SpV#KOer6elzai7gUV2#lHFOr?TlhV|Cwfs7mxZ2x3LSqF{7;1LCqm>O zLe~>vXi*qiHL;7myM2e|hE@fWcxcrtiHGJ!R|WWCw|ige>c26wDp str: + return get_settings().embedding_model + + +def _default_embedding_endpoint() -> HttpUrl: + return get_settings().llm_endpoint + + +def _default_embedding_dimension() -> int: + return get_settings().embedding_dimension + + +def _default_batch_size() -> int: + return get_settings().default_batch_size + + +def _default_collection_name() -> str: + return get_settings().default_collection_prefix + + +def _default_max_crawl_depth() -> int: + return get_settings().max_crawl_depth + + +def _default_max_crawl_pages() -> int: + return get_settings().max_crawl_pages + + +def _default_max_file_size() -> int: + return get_settings().max_file_size + class IngestionStatus(str, Enum): """Status of an ingestion job.""" @@ -39,36 +73,36 @@ class IngestionSource(str, Enum): class VectorConfig(BaseModel): """Configuration for vectorization.""" - model: str = Field(default="ollama/bge-m3:latest") - embedding_endpoint: HttpUrl = Field(default=HttpUrl("http://llm.lab")) - dimension: int = Field(default=1024) - batch_size: Annotated[int, Field(gt=0, le=1000)] = 100 + model: str = Field(default_factory=_default_embedding_model) + embedding_endpoint: HttpUrl = Field(default_factory=_default_embedding_endpoint) + dimension: int = Field(default_factory=_default_embedding_dimension) + batch_size: Annotated[int, Field(gt=0, le=1000)] = Field(default_factory=_default_batch_size) class StorageConfig(Block): """Configuration for storage backend.""" - _block_type_name: ClassVar[str] = "Storage Configuration" - _block_type_slug: ClassVar[str] = "storage-config" - _description: ClassVar[str] = "Configures storage backend connections and settings for document ingestion" + _block_type_name: ClassVar[str | None] = "Storage Configuration" + _block_type_slug: ClassVar[str | None] = "storage-config" + _description: ClassVar[str | None] = "Configures storage backend connections and settings for document ingestion" backend: StorageBackend endpoint: HttpUrl api_key: SecretStr | None = Field(default=None) - collection_name: str = Field(default="documents") - batch_size: Annotated[int, Field(gt=0, le=1000)] = 100 + collection_name: str = Field(default_factory=_default_collection_name) + batch_size: Annotated[int, Field(gt=0, le=1000)] = Field(default_factory=_default_batch_size) class FirecrawlConfig(Block): """Configuration for Firecrawl ingestion (operational parameters only).""" - _block_type_name: ClassVar[str] = "Firecrawl Configuration" - _block_type_slug: ClassVar[str] = "firecrawl-config" - _description: ClassVar[str] = "Configures Firecrawl web scraping and crawling parameters" + _block_type_name: ClassVar[str | None] = "Firecrawl Configuration" + _block_type_slug: ClassVar[str | None] = "firecrawl-config" + _description: ClassVar[str | None] = "Configures Firecrawl web scraping and crawling parameters" formats: list[str] = Field(default_factory=lambda: ["markdown", "html"]) - max_depth: Annotated[int, Field(ge=1, le=20)] = 5 - limit: Annotated[int, Field(ge=1, le=1000)] = 100 + max_depth: Annotated[int, Field(ge=1, le=20)] = Field(default_factory=_default_max_crawl_depth) + limit: Annotated[int, Field(ge=1, le=1000)] = Field(default_factory=_default_max_crawl_pages) only_main_content: bool = Field(default=True) include_subdomains: bool = Field(default=False) @@ -76,9 +110,9 @@ class FirecrawlConfig(Block): class RepomixConfig(Block): """Configuration for Repomix ingestion.""" - _block_type_name: ClassVar[str] = "Repomix Configuration" - _block_type_slug: ClassVar[str] = "repomix-config" - _description: ClassVar[str] = "Configures repository ingestion patterns and file processing settings" + _block_type_name: ClassVar[str | None] = "Repomix Configuration" + _block_type_slug: ClassVar[str | None] = "repomix-config" + _description: ClassVar[str | None] = "Configures repository ingestion patterns and file processing settings" include_patterns: list[str] = Field( default_factory=lambda: ["*.py", "*.js", "*.ts", "*.md", "*.yaml", "*.json"] @@ -86,16 +120,16 @@ class RepomixConfig(Block): exclude_patterns: list[str] = Field( default_factory=lambda: ["**/node_modules/**", "**/__pycache__/**", "**/.git/**"] ) - max_file_size: int = Field(default=1_000_000) # 1MB + max_file_size: int = Field(default_factory=_default_max_file_size) # 1MB respect_gitignore: bool = Field(default=True) class R2RConfig(Block): """Configuration for R2R ingestion.""" - _block_type_name: ClassVar[str] = "R2R Configuration" - _block_type_slug: ClassVar[str] = "r2r-config" - _description: ClassVar[str] = "Configures R2R-specific ingestion settings including chunking and graph enrichment" + _block_type_name: ClassVar[str | None] = "R2R Configuration" + _block_type_slug: ClassVar[str | None] = "r2r-config" + _description: ClassVar[str | None] = "Configures R2R-specific ingestion settings including chunking and graph enrichment" chunk_size: Annotated[int, Field(ge=100, le=8192)] = 1000 chunk_overlap: Annotated[int, Field(ge=0, le=1000)] = 200 @@ -168,7 +202,7 @@ class Document(BaseModel): vector: list[float] | None = Field(default=None) score: float | None = Field(default=None) source: IngestionSource - collection: str = Field(default="documents") + collection: str = Field(default_factory=_default_collection_name) class IngestionJob(BaseModel): diff --git a/ingest_pipeline/flows/__pycache__/ingestion.cpython-312.pyc b/ingest_pipeline/flows/__pycache__/ingestion.cpython-312.pyc index 5303be42c35a94e1038887315805ab66509414c1..029aa8dab9f4d34e36d9f2ce31c07112126cee1d 100644 GIT binary patch delta 46 zcmX^8it*?xM&8rByj%=GaFzdThSEmfRe78o1(hj@c_o?2n~&u^W@MDxysBU!3jm5@ B5j_9^ delta 52 zcmX^5it+R-M&8rByj%=GAo}A>hQ>zTReAh;1(hj@c_o?2dL@+wsl}TQ7%Q6rPROYkSx0f3FkUiS5QM*+48Pkw}PgD2=I-N)-ZB%1Xs@JY(XpYe%zo z(-crRdUI6E#4wgak(WqjD!ZzHH5<(2Ib_<0LU~ zipV%U#GSCgt7j0Q=bWCeS~44wL9Pf?Bpn+DEr_^n|)%>9GHpGRoQcw|Gi zM%JQc;ogNe8cPKCY{IZ~ZP=9hvDbU;HlcH5#qO**q}}BkSs z|3QNeZtT3`|5=)ah~jkNsv|UVf*uSUp%0@UM3+k^*0V3031pcQ$Z6lMqPxDg(W+!d zcUa)d0m)=9sG}Dza#wl@#4}v+K* zW{ip^|2H=}2K+dPXF)(F$N6#8uZ2(ATf0-IbP8;m+v(8;sgZWi>4z8VjrT|!3yn?# zpW@lB)>d2Hr5ZjRw;SVB@qdTr*h8(PRyhUZa2MF0+CFdL!*Ds1FYXsF?@!J@$t~>4 zgHN@>FWSVOHo^W58wpQC;mQG0!it9!zw*!FRaVePbOSvyw7IhH(Wo?6W()c^@9-tl zS14(cMz7?6!wiS01-dVpyD3C3HNM#XC!ZLtG;Lxmum1Qtv z-E-p&wc76z7dC`KLn$Okr^SqqUc+0}D>vt_yji8NICK_5$##qH95h)JcK%hd`DjIc z={NK|dq4W}`fHM;JXLki2jKTu((?oG6qG1>0Re-^81q5`swkD)%}wkjE1o6{VPp(# zO>SDAF4PFhWVhxvm%XS^V@QjB?0#b2$9rnliwj!<4du7+=c<QoAq%G>xW)k_QA gK#FmY5wJNBkjA$scgLpoMyEZ*m9Oz3e8epN1@-$#U;qFB delta 1300 zcmZ8gOKclO7@o1$uYGveuXX*1<42)!M4d{lls+V+LPDTWkW$2BrKoZ|6JoP#N3(Wc zxJnK=6me-ZLJFWr;fT^(4j>MkazN07qw=aHB)B87QhneCGm|t#9ce#*|M&m%d-nTh zKPmlH)PB@78L*Z9`r)#UTnnrE&I#z^8@n$Bet~cC-eR?Q?`PIs-3R)5AM}-ev>!rRAASM0d=H{KfrG-kMtA(_7Nqx$ z9UpCmO3HUP!7cy$;3ljZyPt-(d~lMUl`G2gtdyMMFhT2bGFanuihd-Qph>snZ1yx0 zh6=)NgEUvV%}$$e52^-H=|efYP~qvolHI9yhmqM^4SZ0;-;O98_SDd|A#NiP zk&ApgZ$FgL(JjUz5}O$?3JLk7EV; zuWDvTN-!Q=yD~C>Y7C6>_LWD<)Igg0XLOC8(h6D%?4^{Mihr*RXDW1F`!-3~Ee2Ic z=6G+&pveC4g=ns_!UcY!H86^TfPC-LKq_r6K9Wk}R#gk6Ok2@OIkZCX5^i)!nBI>T zSGoDa*r8T)4{x$djtu`l$&6~&7j5hkm2XlS9fE642VvViHjqfO{Q-8xVEJiCoP!w8 z!4-aY_VPQgU4DC(@JW;6i@cR&H{CAbmXX&v9QzjNvR;!izk_M|xqfkV9>O!f$(rZq zJVgj9Ff*IyHY`sQVHBjZn=>0NPZyy9 z6#bt2arHjhm$P0>XmOCqZ=%m+FCoJ7Nl-eujlPtM#295ZSaQ{DCA2Y}R diff --git a/ingest_pipeline/flows/scheduler.py b/ingest_pipeline/flows/scheduler.py index a806558..748705f 100644 --- a/ingest_pipeline/flows/scheduler.py +++ b/ingest_pipeline/flows/scheduler.py @@ -3,8 +3,8 @@ from datetime import timedelta from typing import Literal, Protocol, cast -from prefect import serve from prefect.deployments.runner import RunnerDeployment +from prefect.flows import serve as prefect_serve from prefect.schedules import Cron, Interval from prefect.variables import Variable @@ -82,7 +82,7 @@ def create_scheduled_deployment( tags = [source_enum.value, backend_enum.value] # Create deployment parameters with block support - parameters = { + parameters: dict[str, str | bool] = { "source_url": source_url, "source_type": source_enum.value, "storage_backend": backend_enum.value, @@ -97,8 +97,8 @@ def create_scheduled_deployment( # Create deployment # The flow decorator adds the to_deployment method at runtime - to_deployment = create_ingestion_flow.to_deployment - deployment = to_deployment( + flow_with_deployment = cast(FlowWithDeployment, create_ingestion_flow) + return flow_with_deployment.to_deployment( name=name, schedule=schedule, parameters=parameters, @@ -106,8 +106,6 @@ def create_scheduled_deployment( description=f"Scheduled ingestion from {source_url}", ) - return cast("RunnerDeployment", deployment) - def serve_deployments(deployments: list[RunnerDeployment]) -> None: """ @@ -116,4 +114,4 @@ def serve_deployments(deployments: list[RunnerDeployment]) -> None: Args: deployments: List of deployment configurations """ - serve(*deployments, limit=10) + prefect_serve(*deployments, limit=10) diff --git a/ingest_pipeline/ingestors/__pycache__/firecrawl.cpython-312.pyc b/ingest_pipeline/ingestors/__pycache__/firecrawl.cpython-312.pyc index 9ff5513ffd6199942a93d2474856e792102bbd0a..20330d4e9976bed61470f80d644345d46ab658e8 100644 GIT binary patch delta 9043 zcmbU`3wV>)b^n!qNtR{#ZTz-{-!itbvB8D_0oxeN!`PUY1wj`2f7=2*-200f2t94 zi3k#rOx|Q)iX(;Nao$v4nj_7Z?nw7#I5IfR>|Nl?bYyZo-kasic4Px?krKQ)zFbEx z$F1HxU%n%sbM& z$)(ah)^}47dYcRdlzsDtoOaTjcJiEdO0@mRjP$9TV2bT94cnmDbTy^TLuEI0 z4SBc7L6_tTx{g*v)oSYtP+P3u=9^)#1@<41-9e>>Jz;$~&#VLvQMbHf+h#THpnPN~ zKqW;r(!l;eP+>~rwhYzO=lZnA9q_C1O3)QNq_hMinPnT679}~IF26qjL{Gr4IGw8K z^(aAA@9_sO2zSvf*hz6~r~nX(pb-9*F#t~rH^n8lEQ#UtGb<*#F7cI zC<&^=zOd9g8ONAAU7M}wLav(q+NIphrQQRA?N5BoT6tM;bO$fMPD-c81 z;s8068~|vp##@Pn=Ss#C%O=FK8QV;XS=ulhF~2_YpeHm}A+iYPS;swCZx_|%nB-er zgPT46gAaCK*PzV{3^A!y{UPe@up3d#Pe{pS0~tX6ITEfxZUndo1@n>SkmiI_8#IZq zbAE{`pl3dTcWqLeB(v;cgc6%nFT8 zEs2u;l;{w-?`Eu3j`-a%Gj_|00s-M@l_q8=;N^knekPluam8%e;hxDbuok8nL3jfG z?hG`{=Jh8WIoFfgkcMCb{3~$)PJx1#-4@F^-D4Jt ziVGdhnQv#BO}cUDW&~RhtOWo`CqR9!pyJj;EBbk$SIn+%*VB!_&L^SU0aI`STgW!! zBStNL&UF$^Q8ilU4xAvi9~DlZ&l99AK>dI31`i?K1wh+QbT@i9@<+4g)eQ6?|Li_! z2R2d7cI&(j%AJwN0q&|i{_y|-VGe@eP8{Bdpa;R6SkZ+v1h|T_1^|YNrMD7`&q?En zl@ns+&&0A@V&^%1MC?2-jnrT805Z09LhSshxO=!eBJMt)c0PC^2aw3a-IL;O?F8VI zZb3MkmbMw~^4b6W5SS!Uh7cM(91`S;KI3Vjk4VN-R)^`daF{sa*cbJ!ITo!Rak`-( zQB6JwD6W2)`z-sNesiCuE4^w^4h_l_yxQsUdxB0U-3uGBi<+N20yrnAW~bBbbtzEC zC>RuMHrojvR%kD@j%LQf+d4`Ev?e;4ll?}tg)~{YZOrr9hU=Z%hMNVaRIM`$x@E7| z>9iB-0xCZ^YqW*;K${|PU?Px}r808P8QqUG1P9>%ZZ!6l#76hSXwSy^M&ba9 zU=Wc31bp*IQq2J{7OyB(i_dk~2|<~9WQA@Webe+cl7#gLE()JJ57Sh3-2ArjAT+3# zZHQl3!1F*Vjyd8S@&pH*`(5sXfxbR^1b9BpJ{#|{KsmYa+^)Z>9M7#Dy&Hd0w|XYv zK!qEGVA(;jOr5X9Pse@ejA zM0miA=o9#P(3GqpeU$jw&(l{gdJ5P&AVg=vH$9CY4S<@`7rm%CU4tGcT(xKe>&mFw zhT`zZOOp{hg8&Z(lpz}t$0^h}MGgi%{(gnx^lF0amj(lH&vGh3su+L-;c6zS&|da- zMxpUa#67^ zm#}w=%8jq$8jp?szUT!)wz8*7{^nu28b}uiFJz#y;l6=+Ts8H>Wj+|Bc0HD@cwy9p zLeaGn;{;?c_L7z7y!9n(N7^p0Yp8F>+2gxeI3CN0Pa)0z1*kR%Vmu^ z8yUS-_K0rHH??si$jJ4|WBBZpGijAi`Fw0P5ipnwYA|-(UaxB;?CbWwk2eApk=uRW$afN5mThn66nr7X%fB?`iE{V_coNlRaHR`>a2QE=;y>$@Q#bPJ8b=bPL* z!YM-n9BjNf@DCj9vg2YfiJe=Q7boeDi7!AfVxx=dh=IMo=$UwK^j*N(!C2MmJ!VbK zJwVWoFLu#Z1e|k@z%!?mccJ?55yXFRR*5J!$c@8CSY5R()J{OXe+F&1ft(QTq+86{ zw-(pG^5}&}UkP0ZU8%n={ovqt58mi-j(0fEhb9*H-mv$cZu@Cc#&Fq-RnJ%5s;+-! z?}fcT%3N`7`>^<8+zW9(%v=#kUNM!Hak~AEMW}RAHlA6%Is0}(+84H- z*%~Qmz0!51g}ql@xUZ6&YFOJOjFGi<;_EiR-po#0TQ9y@r9-@iG*^;07Zb#5IbO%{ zdIM6L%+2Nct0cF%NPjhtAf9gk{;Nd>#LM-~m7{xWen~v&^xNu9?<3CU8chgAohs|45Pr^Y1UhRl3X7BS^Xb% zR$)gq0-n_uBgWn7IARz9R0s?3^)adXQ)+{~I1Gz5h=;L=p}F#Q>>5LG48eB*gm}z^ zB6-jYmmDlY>={}d_@Bk$Cwx|%|5ME}8!j^hwxlrC1g7NiYO<*-^aP%d%MeFD;s<1^ zw&69)tCkzJ&EvJrSL!33+i!I48SmV4qtiX!>5kMkPt-~`Dy6XTL!*$Mb9?!kD+{jY zM0WH{EO&%A{MIOJAv?$=(@5M{`IUyriVYFRzVN!6yL&J7j|^O~UJp*zY>)IE2yX(; ztz;Lulvy~EHP&=x`=otiWN&Y{?dCSerN)t_F~{}v$weKJUWxs~^1RSavX5NaHL`o` z(d(^~)m;&He|Y1~eSMdQ#t!qvCmS6R?_hYt&5oUyGVh-d7TQJ}giqRYU7TFBCE|1g z=Ogmv%(1L14@b7|o~-MM91MgzZf@DZM>k$+owT<{_BdfXT|F?ev1#m{p)jL^>?fCo zMh;)e2mMSg-Vy0P7~Xud$2Et>o?4Mt+C_TErSg%AvGyy$$;!=AH-Em39bY3GbD$TlBtKvC zxg80rj>>9WpT{p@ZKImE?AW}$eRF%qI-WIELq8218iZ2^mzzNz4Cxy%S~V)Ni@FE2 zGimhlZWUe$4nskCoLya8Z}}Hoo- zYu=LaeLPuq0FZX#gxtlalcox6j5WWpL2q3WHoaS1c5djUm0`=f*?GfzCbI39T1IBh z-9&BcMAf?S>~+@{j%RJU?i$b98aBL}S9s2Re(6MB-C6S;6HIX@fj!kSV}QcZi!JZy zNDe#P*1YRk-0lkNb6p|h5UN-)%A@;Tn=5VcGZGWU3-wY zu_#cDemUq3xZzNGRXnZF;R@nMzz-Oa2 z_Pwr!+lp`jo$Tk=PSv!-f6xz)_4bX!h{YnPwHUm*W~BEZd212_rAT1?f$9w;6Ut=i*evtjcD z0v`Wy-`Z9ANx)P+l%_nh@MNMIf`8QAkOf^1gP zU7W$w#EaO*EUj()G1k}JPFAsz?sOwBs-9!t>n_t(=te*6j_7jH+V?$J8i2rS%qI5Z zo##m(3-79Ar*?hCcti&k<4IPz+ew<(=XO7B;O~^Itf$yOx4|IR-Ltrfr_W8e_-xe| z!6tIpby~eL1o(Y*|0aQbqbJvhR#JY${$Q*>Yg`C zQ7zF|v=a0e_zfK@Z2-~tGvq(NcM*A#P42A=ZNRxYaFC9_(b3N$ZlCwL1mmpslSUU} zy$DheWFgQa;LdUmG4553i2WS`EMe#~2zZS%j2QSFynucS(QhLdL2wztTL9Dqd`R@T z;TN=lfJ76Kj&7)mlE)pScs5i+pEuwNYDa{hj^~hzyGAjHtct-S@NV^w$ZFJnU!@hO z53WJ<%9Yw30EOm~e-#krX>bvI(rdYukjsDmv`vV%--vv-`4V%&$OcJd2UqPS7G*#Z=zXbmUjA#(wOl?c`#XhG11fcw!N#3Tg$ z2o4~q#*K6DIf6~}nM$ssS<`XLeH^zE_IAiCucpgk2>c03C1IBRg~s=U+Hs+lRqwCb znMQJl4jaMQGa h-_uzpQXbwnEx^}wI+OPwNf|J~+zG!E@SDGv{x`ItuY&*p delta 6044 zcma)A4NzRyb$)mE?Jm0u;$Qr;3(Jod32YDu2@nWaKnPd~goLydQvz-Jtb}XSf@id+e7*biUw&ONVr%gO9ww#XYwv(Q7 zpFpzfPP+r&JNMjk&%5v5^LOu?ACP~2lWh1|LV`trU;mxAM_%v0xFJ*beR6S=XNm|C zkxao2p+rw2r{jW2p=3{TD8-W!O7*02o;jEnO82C5+7jFt%J5_WZI$AKnV~FC7N-+} z*`XXy4yQK+b3=KayimR;KV4s8O1-O3A^ZP_d_& z2s)udkW$VGQmUMDRgaSu1WyU)rU5ryE?MW6a_&apX57c!#JQQk&AN~4Gz(IZlr5Wv z1=%QHg(np1E#2DENKhJAy2D zh$MJ)67lFIlcbkLNt6we?vx4U1xgTPfIOEpJe@qN)iB&=FPz7t;99vYApYKtYN zzDyD`L1q~K;kVjEli1JF-ZY|J&>igQ^n!f+E;R+mob*lvq9fj8KL2QBXo$8$#}4-M zbe}aX<5K0N;b-cW(n?~@8_()CsQPglR89U!I4XyuY}nA0XHg>K)GvFnBW_eaIT4|f z0;Uo<7Ktj1WF4MTO+&s%129Ijjtu%0i%qxSY!ba*UpO2AlY?&pW1()G?p}m_2)zh> z0IC=aC{el}+x-X!5Dp>?ARIzCjNlPqFX=-_qHR*#g)eVWC-M;7vD;Z9(QKoW{a1FD zu1xo2MqaEYXAjB9*SvX1lyc9B>kevTf0mn(XO~3usJjwIDgQo+0`?ENj@ClSE~iLF zaD0<&g+NenPV_h=r&I{fBB@Mrz_VB?l1t=L9y7Q;saP)6G)pCNKHHslmR;yHlV{mV z-pBfqXfnH*-;gvZc+D_({yS*`=0#gt;TM?>A)2-(QY6qcB4}bFim^Yp#YjDq?TwrL zAmnyhX*xXVQG{dwHF;>wLA}1QfEPBIZef38-%_#xMSKW50h|U}g!EAWHBOPE(Li`u zp?+3YkZa^#s- zu#l6CSf1T#(cdr=KvjP{atyp9P?(@gM}y-ArN)JPV_qc?m1zU}R#BOif5a$u?_t-9 zT%?2j#bpoFG+g`f`28+iB|0iaCc-ZeaOemP`J#$b&v#!nDiFTMWYsh*gYQNuWCq9;%6KrUeP=Je z3^T3$%o$q3Y?TLfs{)&-e4-9x4L2Uzh*T56ni=6B`V_(+BDeq)1HjRvkUl`muBuwy zFNN62szKeZr-2XS&Z8h-YP84uREqcJK`C}|Z6Z*h3zcoG*{b_oU|lsouvmcmHTb_B zX4Y+Ia_Xb&-ZiO{^pb8Z3)2eLl)sVvam7~N_8eR6t9j3$5z{+raV<@vRd7j+l1 zMZvE-t=|s;dcROBTxj*{2CZ{0Gija$1ZgueFb-u*dULO@>MFIYBbAZMBue z#71hL&cq#84Z%Pt5Ow1-JgRU@VsYCWk6N@{(u1ATqaa8xBQS(NLg23Or4_sD%cz&H zKStc}=)GOQb%TY&?(^)s+wD{Fka-nU{7=H2Gs0?`)tvQVj_vvSXX>ACdZuYsnJ;={ z^Q)V0)^#n_bzN#&&gs6H-hICP?-SE6m(G;Tmd;hLWN-h_wdbeZ*Sr5Vqj%nQ*)${0 z$Pf2T9&5CNwKhn0L$ zAMJ)F?Lqir&Q;A?em~IH%`-30xopAEH^q~xYS9YBR;UlP(A9-HoSDg`9D)M zjNr)`ODCsU%P3Exo}&mC5xBKtj;2=-wg^-327@vd3`CvjT7-BJJN~!txK}&;|EdO1 zb+2eBLe1z33T8%agIK&P3F2^`t9Pm$P5*g_w|BVhziV?m|JXB+-LzFM*(&E8^U*gR zd-buKO?^vEeT%lrW!wInS^KArpBjaM_&MJ^A_9GX2bpPjan*SNT^f4cov zyZiFQ%!#?|`TiAK%VPf_7T=aTwTJYPD;cwybM@E6m4dd#frqB|{13_L-M3oy+*P=) z58l&G+=T1Qns=?(niuyEfMoCCD+9BK=BC~fSBiTUz5Z$Ut>#WXcw$~$$=mr>6z0;_ zqm8S&HxTr>$w6|Zc6R&R;5FBZqjT{g?{xRAo&#TGu@BmEi`z&KxjZ~GGH0HzUdi9J z*mrQc<5t&xz82lridG6c77q_HTl-o4hk3>9mG%>v+5O}{lqA@puhn#vka^PK5MOQI z+)=`sI?@MtiZE(k%Cio)qxGn#8YKDH`0!ft!Of;WMEJ_&gB{-kIAv$O1*z=E9SNi^ z_IAf^l80fRS^?ld(hCoMAh{bTr&wiY8M(k7?#$L|%q}+BnML|zPj~(`DQ}$AxrD(T zkV-63j05~PX<17i_w$aYvZc;cHg+hHz1fwVV~wI&YVzhu>)IjmwE03k`=m=YVoe0~ z|HIwi+-FsFR954L0$~XY9@X4)pu4}byVKpq^QLMTrjhY6a4a}UjPYnjzXPKo!^=ME zA5o1#-lIY;IpK__U97%mYs2fP|HlY#psX3sA`%3@5!Eyz`y{9Wadn}s#6$nx_{JZQ zdFOk2(l4R|owm}X2-PQymLbU_-}nZWG?2~UrR`7Y_k6k$Po&S#m z=?L)nL@+Om1$~fF@Ks=tXp5?G%tsYjqQAmMyc7xf0%0%kq6+;-VDZXmZApg@HtTK@ z_Ts^5&CDiZy9Rz@Aic~q=t@DQr!4#6aBm-?k&vDCu))ECzP+edCx?0XS4{`PqhYA^ z`0Mx!=#M~vzJ`E>CB;2&zkk!$&j@b%4hk%@#lcM*H4@rRgQ^Qi+7iCVHXO;G;;YI1 zY!T%@K)}h-j}h>otmi9PC9CZMBeF!*jEoFItQuR=QY8he>n}+E4SWwmu zeOpixjhUY%)$9cP{D2bCYFV0!J&;rcn!&CgaisH@KY{I01U}nd_U}hJb6$kDVg*>= zQk)8ZKabq5Q`I z_g%AehKFS(3O6nMW!12s%6sf7*_HMerE681d#TR7xNl&k>Ja-+xmdsTFkzd9j!vBf z&1W-Qdh%>uOR?}n(&7;3MW9}*1>W`Q($;i1c9GUpaUoYk+O7xwf`fp_Lb16uNxx8$ z*=p6_FdBe=!)gG|jU<%0kxD@RM!MN0=x^j_w$|%!Y~wQZ2H^aafHHoeArm83f{87~yId)w3M=AP{Y8VPed{NDn zK1RVu5V*Nud9I4llW=47C&*03CBbE+r8^OMC66D?zkC=7Zm$qc9xh()4vmPHJCAkL6Z0o83k2%+T>ip1r&Qf*Tr`W!&7mO*Z zI-ph?*vq4f4s;7`dC5QHVmTk+*KKqc^7$$78k-4rPpR>6r4$SttKgB3@|Y=(k3$ea z-_sU?XWQRm6SqQ5hVO%Ha-z;KF|%*8ndc56X?S5JteCpcK6ixT+_F+!A*DQrPf!_-Fow(6l5py)P8}LMVJ+ mu(KWEE%63YI(_6b0Un None: ... + + +class AsyncFirecrawlClient(Protocol): + """Protocol for AsyncFirecrawl client objects.""" + + _session: AsyncFirecrawlSession | None + + async def close(self) -> None: ... + + async def scrape(self, url: str, formats: list[str]) -> FirecrawlResult: ... + + async def map(self, url: str, limit: int | None = None) -> "FirecrawlMapResult": ... + + class FirecrawlError(IngestionError): """Base exception for Firecrawl-related errors.""" + status_code: int | None + def __init__(self, message: str, status_code: int | None = None) -> None: super().__init__(message) self.status_code = status_code @@ -64,7 +125,7 @@ async def retry_with_backoff( except Exception as e: if attempt == max_retries - 1: raise e - delay = 1.0 * (2**attempt) + delay: float = 1.0 * (2**attempt) logging.warning( f"Firecrawl operation failed (attempt {attempt + 1}/{max_retries}): {e}. Retrying in {delay:.1f}s..." ) @@ -104,7 +165,7 @@ class FirecrawlIngestor(BaseIngestor): """Ingestor for web and documentation sites using Firecrawl.""" config: FirecrawlConfig - client: AsyncFirecrawl + client: AsyncFirecrawlClient def __init__(self, config: FirecrawlConfig | None = None): """ @@ -130,15 +191,15 @@ class FirecrawlIngestor(BaseIngestor): "http://localhost" ): # Self-hosted instance - try with api_url if supported - self.client = AsyncFirecrawl( + self.client = cast(AsyncFirecrawlClient, AsyncFirecrawl( api_key=api_key, api_url=str(settings.firecrawl_endpoint) - ) + )) else: # Cloud instance - use standard initialization - self.client = AsyncFirecrawl(api_key=api_key) + self.client = cast(AsyncFirecrawlClient, AsyncFirecrawl(api_key=api_key)) except Exception: # Fallback to standard initialization - self.client = AsyncFirecrawl(api_key=api_key) + self.client = cast(AsyncFirecrawlClient, AsyncFirecrawl(api_key=api_key)) @override async def ingest(self, job: IngestionJob) -> AsyncGenerator[Document, None]: @@ -277,11 +338,11 @@ class FirecrawlIngestor(BaseIngestor): """ try: # Use SDK v2 map endpoint following official pattern - result = await self.client.map(url=url, limit=self.config.limit) + result: FirecrawlMapResult = await self.client.map(url=url, limit=self.config.limit) - if result and getattr(result, "links", None): + if result and result.links: # Extract URLs from the result following official pattern - return [getattr(link, "url", str(link)) for link in result.links] + return [link.url for link in result.links] return [] except Exception as e: # If map fails (might not be available in all versions), fall back to single URL @@ -324,43 +385,43 @@ class FirecrawlIngestor(BaseIngestor): try: # Use SDK v2 scrape endpoint following official pattern with retry async def scrape_operation() -> FirecrawlPage | None: - result = await self.client.scrape(url, formats=self.config.formats) + result: FirecrawlResult = await self.client.scrape(url, formats=self.config.formats) # Extract data from the result following official response handling if result: # The SDK returns a ScrapeData object with typed metadata - metadata = getattr(result, "metadata", None) + metadata: FirecrawlMetadata | None = getattr(result, "metadata", None) # Extract basic metadata - title = getattr(metadata, "title", None) if metadata else None - description = getattr(metadata, "description", None) if metadata else None + title: str | None = getattr(metadata, "title", None) if metadata else None + description: str | None = getattr(metadata, "description", None) if metadata else None # Extract enhanced metadata if available - author = getattr(metadata, "author", None) if metadata else None - language = getattr(metadata, "language", None) if metadata else None - sitemap_last_modified = ( + author: str | None = getattr(metadata, "author", None) if metadata else None + language: str | None = getattr(metadata, "language", None) if metadata else None + sitemap_last_modified: str | None = ( getattr(metadata, "sitemap_last_modified", None) if metadata else None ) - source_url = getattr(metadata, "sourceURL", None) if metadata else None - keywords = getattr(metadata, "keywords", None) if metadata else None - robots = getattr(metadata, "robots", None) if metadata else None + source_url: str | None = getattr(metadata, "sourceURL", None) if metadata else None + keywords: str | list[str] | None = getattr(metadata, "keywords", None) if metadata else None + robots: str | None = getattr(metadata, "robots", None) if metadata else None # Open Graph metadata - og_title = getattr(metadata, "ogTitle", None) if metadata else None - og_description = getattr(metadata, "ogDescription", None) if metadata else None - og_url = getattr(metadata, "ogUrl", None) if metadata else None - og_image = getattr(metadata, "ogImage", None) if metadata else None + og_title: str | None = getattr(metadata, "ogTitle", None) if metadata else None + og_description: str | None = getattr(metadata, "ogDescription", None) if metadata else None + og_url: str | None = getattr(metadata, "ogUrl", None) if metadata else None + og_image: str | None = getattr(metadata, "ogImage", None) if metadata else None # Twitter metadata - twitter_card = getattr(metadata, "twitterCard", None) if metadata else None - twitter_site = getattr(metadata, "twitterSite", None) if metadata else None - twitter_creator = ( + twitter_card: str | None = getattr(metadata, "twitterCard", None) if metadata else None + twitter_site: str | None = getattr(metadata, "twitterSite", None) if metadata else None + twitter_creator: str | None = ( getattr(metadata, "twitterCreator", None) if metadata else None ) # Additional metadata - favicon = getattr(metadata, "favicon", None) if metadata else None - status_code = getattr(metadata, "statusCode", None) if metadata else None + favicon: str | None = getattr(metadata, "favicon", None) if metadata else None + status_code: int | None = getattr(metadata, "statusCode", None) if metadata else None return FirecrawlPage( url=url, @@ -373,7 +434,7 @@ class FirecrawlIngestor(BaseIngestor): source_url=source_url, keywords=keywords.split(",") if keywords and isinstance(keywords, str) - else keywords, + else (keywords if isinstance(keywords, list) else None), robots=robots, og_title=og_title, og_description=og_description, @@ -399,11 +460,11 @@ class FirecrawlIngestor(BaseIngestor): return uuid5(NAMESPACE_URL, source_url) @staticmethod - def _analyze_content_structure(content: str) -> dict[str, object]: + def _analyze_content_structure(content: str) -> dict[str, str | int | bool | list[str]]: """Analyze markdown content to extract structural information.""" # Extract heading hierarchy heading_pattern = r"^(#{1,6})\s+(.+)$" - headings = [] + headings: list[str] = [] for match in re.finditer(heading_pattern, content, re.MULTILINE): level = len(match.group(1)) text = match.group(2).strip() @@ -418,7 +479,8 @@ class FirecrawlIngestor(BaseIngestor): max_depth = 0 if headings: for heading in headings: - depth = (len(heading) - len(heading.lstrip())) // 2 + 1 + heading_str: str = str(heading) + depth = (len(heading_str) - len(heading_str.lstrip())) // 2 + 1 max_depth = max(max_depth, depth) return { @@ -570,7 +632,7 @@ class FirecrawlIngestor(BaseIngestor): await self.client.close() except Exception as e: logging.debug(f"Error closing Firecrawl client: {e}") - elif hasattr(self.client, "_session") and hasattr(self.client._session, "close"): + elif hasattr(self.client, "_session") and self.client._session and hasattr(self.client._session, "close"): try: await self.client._session.close() except Exception as e: diff --git a/ingest_pipeline/storage/__pycache__/base.cpython-312.pyc b/ingest_pipeline/storage/__pycache__/base.cpython-312.pyc index dcc297631c3a0ad636d09a2aaf870e0d3a122b2e..0e0534a13dca075d5feeaf270c8b7295e3d0e244 100644 GIT binary patch literal 14107 zcmcIrd2kfhneU#No=bB`qgz6v5ePgO1jLlh3L9G>z}O&TgkvKgILROT z`(7V2BZN(kZ8Wc6zw`UP_g(LMkADpY{T!Z`j{JCHU<=3noIb2aFf+@)LgpOz6en{d zoXpFPBtOFA?MymS&JkzIHR9r_+?5nk?h!Z33rSDPJK|+|chZ;gkN8>Mk*pi3Lt9TW zkP40jQRbC>$xtdh5@vaSvOd)?(vXUbMDSfF2hg{1q%qYr(!_HPPT?mvTi{jRO3NkQ z_!?X7~OAe!cq=S{!qpU%3UUHhHoq0zzQfS>P zX^N<2GpaPMh>3JoQOBgX()R(i8j3n}=fK{5dPo}8vZ@r%rj+bNMvn4&Xh6%S;|GjA6Fq4ZcrcOQP@;KBXJ;smwz zM9j?cag1_~Q#dRvH{!rj^CM2hHOZL6YHYj}x9lJyDjwvVl#^YufS>zqr|eNYvRCoS zu7U^UKE)^d6+c>f#vHT&M){amt~=uf8wAkH_!oRzkVEJj#;;xpU=gTSu)3EVS*5Iz zm7%ACo?}j$qoz-jF}o5Vcxq~zft`rvD;^WVdbtGlxQbMDCW`@NSgbl_sUR zf{EOrG@H0~DlKLXCBSeE+PCIi(Y1P*mRNo?o1NO1Okn+_gP^rv#4{jHshKH7P3FZc zwGkgZe*CByXRX9jsx*Z<@@M+|tkY37^KwE~G%=n@YY8oj5k=*dxH3g6FHT5l zIhjb0i+Hm^kx6H=5>;xfKaFScvLs7W;ATzCPDoiX2QFt23>-bAnX?ULvP!==oJlFh zXz>Xpev*3Jb2Oin(s3m!rlh>6DpMIXE1r_nG$z+XX*83|8ep@2#?*2Sqh!QnVpIit z0)f;N02@st6WP3&(6pSQ^@+zP6x7IyIF?H@ZYE4s6`Tz+0ZKKO&L&byA45iW8`RQ$ zu~7(VET<-i#*HQ2^GMXGd!(sE?4**{J=6y<>7EHil7WEkwK}NH=%BVx(uyQ*ssxUd z0I5KFFMkQiIc}Pt=3>Yh&$R0mUUTqVf}0Vh9cqirPdg?})nT=v_a*b=E$-`%X~B>l zj3i7uW#`+@8F$uFwA`e-x}TAkg#@edyDVfC5zJztj8Y^hD7Dbb+{o)gh?WCj# zo)JN4qgxITV3p#G?P?}6m!V%gW(+Hu1yHM^rhuj8+KJ57>K6xO8Te(OK1H*PSSgW? zCv)JHQ;F=v?T{8cdlgBA4C%oEfRs@a1qM;nskVWTY`NLO4_)MKy?$KD#sCSF zaqML&nNxIO0;2nhUdOb#A;M}SnnpdkOH-0###psXq=yW?j9J_kG`~+mM8-7@+K4l0 z`Ro1LCLmthUY4}D0%1)k+n^OD!SC8O7@VysDGjruZi8H58Ddk3DFp(qY%^te8!cVm zR9^SSVhON7ELLb+p>X=Fnst~}djZL7T)Btq=$UnWAyjsAn|J(e*X6ylp`X@8mbO1| z#k08UXld8cgy}!3ms-1j;c@Nd{jdE{r9Til)1#Bvne`Z5T}5 z1!XXy_M)%4k&;c6+(SteiSEXV3p6~FNOyPT-lO3PIEMIk=bAC<`xEb%XchVNo%D9$T^W3y|n0tWxLd$wC zYYXrOPQ5wfn|7*uvjio3rhT&0@;KD{vvrm7$$*uwykQz0mM0<$CqW5yO*UlJPc~S& z%3F35Mphr5I{{2)e6RZQj%nYdW$G)SDYufQ~Y8%Bsu*eX>H0u5hkJMUS`JD((Eb()ts^gok+aIp-dX47a&7wQKn77Z-BFrNW;^RWHB=)s!~FOGO8JH^JYV_ zK~JmW%uCe(il)J*ftF?p)Kbre7emVL7@Kj0C1MVUHLsW%B}KSJ1Uw9|#F%Kq{t&^< z{2UGbO%5<>1a#H-0908qe6>@}khj)tOiCmxrYGuJO$QC>UJEk4&X_btu1ex7;EclB zV~Uo^sc}Vw!vzxN;DOx(fTI0kuZBO}MY!pneVKF`Na-Ha1nQn#`eZtDDqRp5@`Zq? zFs2z(Gbyp)=obqS%UTkY22G!QT4bTI=o-M8Oz3*q%hnv~6|x(DWs zGHM^BM6E}n3$ikr8;1uKizl&Uu^5?>w3Jd}F_rv(W*4IYbuHy-_od!T$(duW`UND8ehO_qt(Uh4NLQ1`#NU%_bYZ1zQ#y4HQ;+4IjX zc5N?pZC|QynR8v(IWJ%7z3whMx$YgyoTEGROU}_2ntc?uE!eym=q&|$-wkY9YV4eS zqU=M>M=)IV?YFqFFY>c++uKiHcyiwVZr6Q3@Lc8p*?-M{{YdfI*ut(Ci}LtSc8%Yt zZ#ftEa-i6?=c@a<>)rYzH(EO}a7X7iI?s15wr?%9Z=HQ?Dcn5QQ3|iWkiYy`Y15vo z(d&iM-V-+(JLY!GO}^V0{m%Gh=^rP*H+gmIyM0G*^hD>^{r%>Pn=hxXo>=I4;_Tx$ zIyTNX&wpk+e9u73;tOQ?A8>EhETiX+b!Pdr!L^!!4{3q@HeHYxw!v9TE4eP!^6 zL*E}N?tc9Gr>;xahl`t@TBP-I!*9xKf!J>~qTOkJa4Uai!vN3C^AEc& zHY4*-RP;k0xof;{aI^E8|KY)2=Q}izgI3eu7(sJ(J zFWkYH1lV=qTDz6O|EabqTJt5!MFJ+chNTxasETgK+t*3E1>LJP(Z=8h!gpmRVUcl zs+03!tok%J%rUFT7uGPpn0BLC<&`XUgUnqO;s2}oKD85FF7k{J8gY_I0)H$fp@pKt zUF}#&hpIaC(8x_<#JCx5oUQw4nONksQ{Z(kw!N_|HqN?-a`4u5H!B*A+iZlkaODi} zA3??ff3ljbQBKco?CCHZK#M&Y!j3}c9T0vck?k9b^6CJFg8g9X@&NMcK}sG)q7hgk z9)k~{aU})4K!f(xXuzsorGDfG8T=69!i{fEzA^dvTbMrkUf==b4jRnJrIk%l#`N%qSrmKn4F12<9DH*+D(na z0Iw1|Ko2M4TdJ3;gt25s%EFyb#Ix#pDshcwG9bTUuXQ1tn@VEmG@8%C(f7hIGB<+h zdPX00kac2<7o;txZK_n-8mW&_LcF7HprnTqLWCtGciJcx@Uz@69A3d&ZsLNG#Xwgn z(6!*|S_(&AA1Vj9-sn<$*HU!rt>9YWPi{t<%L3kIFBj>YJ6>v7SN74TpNq7X>nIoC zA}wd*bN8K{_;UA!4W)*S%ha!YnCCi0j6#3?dzQA{cdK!m5V=|3SQhYx9SBFt9?JQ- zrnYh&1ifhavI}r+VBe3UEy!aB%xGX^%@hN_} zNA^^e)S<*Jd*9{}JPaUs=#||plt_Ku6gIT`2JMysC8PvcP_DXtNb$08pj!@B^$MG# zqqb)BfH4o|2qW-Uk5=_^!`lu7C9B&-lm=iCL3pnbtr~sUZOZG-5u4Dm8TBnnQ_aZD zz{72X9>@F$CwfL&Y{+Xzr;*(VG*)nIHOH>Tt6|I~w`2A;Kw{zOkUK!*c2?411VQCa zlyovkXj~TyNUkx*>#};*K-z`adBlX&Eq7y%Zie9ovln5v7Qbp}Ldse;pH~)vnTXX~ zhk4iK-O*mXjs)9uxC_w%Gd#{>O|YtPHQ7F6Wh}A}F+CR8wgYj*c4UJ{kuMW3t_;NG z`KY5XW^YKUB4H~A|Jd!9F)Mu#-JFY53;=gJs<*Uv1r)IxI9*p>Bs? zzDP~Un!%F|ajjhxD=`F&6n2ftB}2i`9YO}^t{&1LqO5pU3slgNjpZf{b5Unu{i8XU z>8&a@bkb-N`xQ9*Y%QKTB+3bGDk^D#!SJZtdsy68XvnO-- z3vJZ|sIK`cbz6r7L~uj%z4_`JuP%l+mckns!+oW2-+cZ`&qDa2qW2+2nxPu95W=tH zw@h0J!<6TZ-MORiNY%2Fi)Sr89kM9cicvruQ9UV3;brTj1e?Y&F|8=FBKHj$24^K+ zeu}CYfz?7wHCC1Kw*XDWO#xcIpy{SKT>M2|4_78+)X}}LwQQHHj-#v1=;V`p9EzW} zxscWddtZ?}Q!Jo0+{f~_)u>+_G-s#1Igv@J+B2`VC%4-l8nhuWs0G$we$E6SXSiri z#NRE}+#iiGtzPv=GHq1>d^Q0#mzdvdr5+8DvT2lGz0O51T9$zJS`v z#wYHIKw7_e7zZm9>#%{jTT-p;C>|QrZ18GnmIS)1&MW3B3Au)Xq*9pdkV-LRCrb>+9w?aAlnM1&@gUa5f8BA(?0MRhJ_#qe z#+eqmw+QU02?g;d$tIvUng=PwdmwIFZi)_z5XL~6DV@=~S{R^I#k^ofo3g8hl5DoE ziXPdanX104go$wMbtnZEqVrZoW*b2kEwYq}oKdv&rmTev1wEbR$p43dBju#mBbtN_gEU6FHCf7Q>M0c4tVL{N zvsMeZz+f#FhPT-ZZOn%P)Cjek%ywI--8Jocjn6w}yjNpLgrN`Otl3Q$4`zgvW5%6w z&3F)iU$wRDBG+WbTNP;cPP?tS)t+fD0`%_5paomyJuO%a?%<|9)6VaBtnUIMVl)0} zgv3w6@(*))F9H|OBXD84#2f+_P6|fkoi*WOpLDERME21+DjAmY<}6!0i@OlNmsMjJVY^|B$i8f@CZ=Gh_KQcPR}!gQN=)ZcqmVf|fKfDfI2E^o z8ruZHfFMy)Qt3{GU0M;t6A2|LuQ(JrLcRvYIBk?F?T`HttfR(Iw2>>c-eww&7V1k_ zHw0FhOL+!)^|O?GjuNs(8lA!twq^j$ZFDO{YWzz!+5b@U9we`EA8uLA?j0}oJhc$| zbkX~1w$HHb8SxP*kk|2BZJY5IHX4IUQo&}TqQHMIMO{Vdk5!OtG8qm>Jg#WkST32& z+mM($7Zy4u{;v=S`VKJG638fn)#TKZXw2MhdzbY|=vTHN2bC;r27CjXYhYe*H|MUT zVO4)^$mVdH54VWh*$$0tq2)H(Xfs7`M3~J`J7W@BJ&QvG22$+?gcUUb(JBWz#ML(# z8xWfk7^T7pWZezNX1o&le#?f0dHvGMIGkhq>57@XJKeA z%Ql$?6)mnNMj!wcs+r2N-4$(c5WhmrR|a5Fh<WOKtT+;2*sl0#_cCd+g5SoAHj*V=Od88 zz&4hJb+@^bE87~5rFkaHe9AfSkHN9DX&}NH8?X?KPDi1}C8k10wf?ECdyLq%5?$H6 znwR-Pu_v|=da>wz@gp^diB>n8t3vYy3KuuiwhO14=Q64 zHVR_GiewpUL}4lN8pz`}Zc!8Zt4F`tVeJa{iQth4_e%U0?o*SYc-6;&Wz_KjT^t%8 zW6ys+jBgp=#>Tvdog=7+so)4DB)uw;Kz)Le^+bP2X%M@5KMkP>1RBbxNM3gfYr2c6-tg#a*mQPC10at zfD-0PZJ=BaCC^$3jj82f#Xrma)OFv@K>LELz3lP`U2`+b99}rJb%)9hhp=ZZj%~59 zXW3~L;K0@0bPH8MDr=%fK~}c(#PBW7F9a_(JwsHVJuwJ*k`TTW4EXFcd`b)fu-=gL%v36}-D%aD(j zHfrf>5uWC8EWYX9TS3a+#F@Fk$`F-kEPGhi%QdyzEjzmAvgbR>tTm3x{B3jI3mfJ_ zCEupyFl)UXm@f?-V+5IZ8l-Bxn!VOxkm~7GNcG?&{4K0X!+iYm;Kj)@Wv}j|b!sR_ zJVIpNH9vgON8dPYr|-eyj=iNVtOZK^Li1&f25-KSU8YyLgFD0@0R@9XWND3fq4E5o zGRyDYbG7OF{VdODe|G0=g2;$=xOCmpzSPoM_B0C;k5hI~q>i$TBCO2KH8k1PCK*^0 z(9a*5O_qF}%Ry@SVV_56GENVPA){-U9O<59W_%oitvjXBxJuTN$xZbUl-5U8Bo(?F z7ckIRb<>AHw=p#1{sr9vfe_|piLUV*mp*Wcr7C^6U}OAO6OCOC)azI&Zj~rWO=W_~ zM48DJ$&v2Ejl;tL3bz4wLoOR%HcE#32ry> zyv}v~oD03rt$&Z({2tf#9=G=AT;M&f`#-qN?{hoe=RS2)+;PEUhPUon5O=@ko)u=t n-%P)eE_u5axZS^Wt32QN3y#tcC!H@k`KGh$|CggTBkTVGqazVf delta 1691 zcmZuxO>7%Q6rNfCt)19Oihq*WuD5kbvT^I+hDM0=hbFDUpHd{s#Z+2tcE`bB@0!_7 zX>&U{pcXC^tYA5(>(t0wFF)2!Rkep)4S7l)$A@iugJ4-nxh+Vx|3d-n{v~ zdGF2JxnW&8n)oUnj}dq>e|-Ji`OKS%-i61(dYhP}N=#}>4ce5dQZrBuP_zS$U^7$= z!8mA!8sTQ78fnT^nG%Ui5i`6<%m|Y{2%sV%)#yc8jm~E#bcZR9Yjb^;DRs+byrvth zyoqa6$qSsEnDi3*jN@|MaK%~L`_QJ;cFCt~W3I_8*NZ>h=1+DKpHW4e$}F8@GRxMaK3IWoO)|5K!D=o_qQu;1hD~{ynkm*}=HQJn*$mG| z2&BdmtbZ3%HuH;-YThiEdqH9E#jv_xoa^<{9Uu!i1Rd6>aWotZUA!DQJC?+}?}c5~ zw7OMyH4TGxaNQ+xCqXj#%fYK1TVN6|%$YmKE%934@*yv(X+}eL98L3NO|$eSgHeRD zRs$+`lkzyqPymG|#8iKYW`xr}OuNNOzuk3{@ZRnILUxem#8md-U;(DZa!wT=X0P?4 z23HVp+QCCp6x4e*mrw5lYYXH2h5QMjrb#sZ7tf2=bDM460dVc+y^&YbWtjK#YxDZs zytQxgU(y39Q3EqT-`F57qIM?yy)o*25Y%>+fqAr?Z`)vrXctNzLc*Lck4|C zD$SN=&pgj7s2n!tT)S2i^Ti&T5Lb#NI=yzQ_?=Wz!K;Sa+Q(4rIKl}8zo;P?K~ep( zemqd1E#VE6g55CdAH(6n8;KGa+lv4FV4?dm@{0(00E3E^+#noTOpFg*NvTqMJ6|80 z%@_Y+D4)It#i9i;vXuPX1tNyyJb~@kbFJ6YrMK)A!bLBSW4}np`z*jjg`0UYS@wIJre4p*?rfqIKkvVRLEpo$%85dZMr%Rkq@(kWTNG zBcWtl5yhs}7=H%7V&lj_t;4W6D?^o7!)a%T^=WUqGmyXKqwQiI=QFSbFDnPn{Z4>V cxr1x z1BQyMWU0o9Dc^NP$1geYHngoYa+KC;zO)f#x>a`D1PqlDfwi^LIQ#A9k6lDmYD-PN z-E-~$q^x@P>$igsXU=`k+ zW9F^rD~m@}F}{AVPrtdd&8W^vM8T(D0 zrhap$8PZxw7qj$RJFWe;P8&<>WA=VWr=#E5=_H(ri}HQd`N0?OQP(?!5hs} zX(`=E-1|I|VLjxjxUHOIJj+R@s9G{#vb-ZGO*_4;#0n+0he~{`#118nsP!E_SHI|# zD(s}skPzYN+ayHws#QIzjG?(Z9_>gB%H2n!8Oz>N$D`7=MB@17SZ{P75msevn+IdD zXiuVdaG+zTzrR~Pl`*yq_6+qyaofO=!Hjz6uB}^J_w*>EC^Ywo-25?)qfe`fMun&- z@%T1TF~!Ioh3wN?qNFjL;uVbkk7V(y9y(U1cm?UEKo%gNbgOn|N1O zhVLDKT2gOM;w^3-CxV{VJo2O+7#WEJ{_)j7zQX-dSen)vp52-@+n#+Stv5Z}mbMqn z3Iz&lbv(N(tv8Rw$2U)uoo^fa(u}_Nrcf-G<$Kx^CK)jSBgg3AHW#0p=NzwK%N{6Z-I^V-Jgy>{^)39Rq_WVo~X66oqBDmi{s38O3>hXj)}_MY#Y-#vn!G zJ#z1HoSTe@GaZMi=}vUZg;=Z~It~doukB%-?8SCIBt=O4NQ#jJfMkSNFU&Wa#}dpe zMKa1}BZO>ao?w&sS|B*XtCSgLlWTL%3eL2_lCqZ17%FCkirn;$ojg}JD-@>Lj9VRN zcPVp>9ezfh4UmD?yrkl||G-_~q8xojwSiRAAE?TR`N}`5v}9EBM9q>q>Nz4v-m^ld z_kpwz(mtsus*!Z?&)3sRe#kF^Hm0Z_SLujKDu$e5Nh6tGA(9p9YES`ky#~&zI|Hl- zHH^ecV5}gt2ud~5XbCVSQfag_T85qFnwCY&a$G1H%5iE*b3`kZL;Z3cr{kpB+<+BP zk5mc&2M0DqE7+rHrK+=<&Pu6T3PZc9G43Sk40mwhB^j$CCbUs(QX7r|c68A%imNuh z0MGCuxxgjhu3YC-N$%?+$0Z2@9$th?Kw7yH@S@6aNX1fKlWAMDggHNz)OMIf^gdD zz0iKXJ?$*Ks}YLyW5P{sVOGzTl#L0$(E6GDWz=#MU}~VfCpQ7mFUR3TJq!Q&f8uD? zSVA*e2Yp9-k^1x|UK}o=r`0YR(YxtYeTWz3W%MI`r8A>zJ=qgwN+@fgZCE9*rA>x) zwg@ERCxQHTeB!KHL!UFW30ltTp@a6&6YHSl42diuXCqw1#7zk#Vi2Z80ePJY@}~?0 zd6O3k#c^jTtvQ_hf)RsJ)kNJmkv1g@K$PYtZb1~Niej%oFqu%E5app-bOtO zIbVeN{ZrKi{;Q(Mji{2U(}qPEku-V9IYX$mD@ea#auY56GgG}`L5?IHy=^MfcYc>Y zLBjgs&}RCy)maei9SEYF2AR>Q%LtF|>ex%0EL+V8S8^2+w9WK2OX2$AmHP&wCy#^G zi%P-v!2!jHFt9r!5B3MYkcYz{N?nlI2*5c8fAoizrQKl-lcS7Ajy^pU1@OYjm$w1Q z@JFMGjJ`MCI}lHF5A;Nt5Qaq=VM~EdZ5~-s4&p!S?@y~g}GoJ9AXW5KrS=wk%8!K)X7NrB*(uLl1X~pk!qTQU;aNgzfoX)HNgwr|o zW7@2fGdfcS-z|G_DzJanejsH$khXa)_|N-kU`(BM`!5WiAHEoxbytoVe`WE#**F>d z(dL_FO}DmTi(Rw!M^nZ}KepJ%*UwslDQz%Y4*lMbTj9CRR5q*0TclYK-j4Wy{d{>Y zLXi+8)jKM>tk7*BNpf0slA1sST7(8wLQ5wdfvupcBf<14!8X?E)^WUsM8lBQ5i+C++l*r%>Gl zg~O%v^o}48;(z50&oWYjeP*;$&JkrpEC3FoCXU1aXIgdTkKS#>DjAa^c=9H&tSWS( z>4pZkpiZh!(`->8eai2rZxwm`w!9>aXp>smns7rm>`H1Ah4iB$Ke5w47Oiv`BuA2e znUkEa@=`%k+eN_s56B|8GQMp6<}`*kHFL zJE$tSk8Gu}V6ez6H$g89ellA0UzrQWCf`9PgMmHF+&v`B%=1DEkzbKR*aut1cI2E$ z@abff>5L8s?_wQ44Qcw6$3yp(1RUof5q}XDL$NWgD&02v!}=BUhb7VGIscLw|B^ZX z${GL4wAGci2GcHIx~%eA`HXv8R>u{un&-q~^Cz5GXdc^^HFDO%lqGP>RhlYmpLOj@ zS$EO>rK=u$xEp)u;=zmUm%}OR`m`%>(VZ#?1K?q20(K@~XLpU9(>;EIu1^^Q**3x# zneSzbV3<#{K`6g>*NR5)U|Fd9DR}-fq%}y+x?oYhNm|wlFE_h^y}q2ZtQW4Yh^&Id zbeObk6sD`q$gjcb(`yOx>lxjM^aD=U>gPYuSy~JE4+=%({d{YXilI`~3#u#OP=|_G z=}1L``2=^8f1Eo(81$Iv^OciJH^5`G%ZGtxc#wA)ZS!Y<6h)oFpE;*5s;Jq~NZ+V> zo@nT@aCM!eO{yfFq?ZgJ_s0C@5ezN&8AHn_`O}378`V{p8ediAwE(Q_8{yT&On(`! zH`rhdc1afc(IYN9h`;q!ezEE~Hegt?ol`@R9W=v{6hSkb4_NmH2c?&)i$>o7EwQn; zEohe!^kfN16bWjGj82|>3<*pmC-(x`yG8Cpz8}c&>VLOl1dj~L!8~FFnXw+Gx|+44 zgV@<|Bq+b~Rv=-cEF-UI3UYBI=*!AONEY>mVcIs#V@QU|6V}SZn6aQJ8H=KuU3t?k zKZE6LxOV1>qWO=rmHbW3q0_s!f zn^;YgDQj7_9>)AMTMF6tV#>(75O>>G-H#a z4)kxkqIu^uKCEgC()X7%)ws8G1DZ&|#2{?f2jwVuSc#rvjlr;O5y@m7PF^v6y)M|! z<{Ys~?m&WOM=`-x;4+#M-SR;1z|loJlo9&C*AJT%Q-dC`0&FA54bzJHdIP#uax)Sz zKlQElzXdkF7jW#J0%YDz^lJSoXR#@C>9H&aw;z@L3*NF#gp#E2b z+O_@O-D-rC1It2MG{ZZi2ke$EfK!xI9Z`+&S1Oke5Rag|#-HHBGK)yM;sPXs^z9W(wnNN`iw7a8 zJa-XiDx3(Uj8$Xgu9mZurb_qEnh&J32kx;pXRLPe854@W2s5Dv3H_sYGkvU~-Y$vE z?5RPyHAw=>tp&4Js3IJlYN{~-%p}Rn+^Z_daLI@TA;O(9fQCN_|E2KPfjz`-*%gAF zJos>0YI7H9B@?`vaLG)c*jz*PtKVx?V2|lM>gTYr9?eX>9PJ-`Cc3Do8FfqR&epxH z3NXAyGP>?qZ16-^!_tP~@>Y6kP3RD0YC+Oq=XYcfoUtb8?E&>=vcrT$jLPz$oOi_+ z=2S+Tm*JrY=6MJ4l5y*oQA^QSG!dn{)_QDJIM@FSxjAF!rl-~}8Fjel?4>jI(m8we zjJ+BSUCu()rmUOLLith!sAY~C&@u;FD3ckq42Y3?xNTwD9=zzgXi8b@(%$0FZKUU% zh1p7wzE87O=;YJUtPyVaKxM6-JDf`Axn;CJzw_MAl&9%hP3o~H#&+H`9=zo!{MW1q zg@1_0;Q7yx=320Lk&R&SB5Q<~)xf=hdEcr8Zc0ZYjlz_^3i;(EvR;^4At1jRt4ysS z$Tu>&o6$rys@4)&$4{*j zfxpi2P^U*lyR}Z(vVyvl&{vvvLpUg@0$Zp7brBR*DJ7`FT;GBgz&N?<8BR7~34K@V z5XlBk(G2>>h}m= zqk`v$AsBSx@wb@sIOL4*U*b;*|F&$vSjH{m;^YJX&EqgA;RblcPfFnWva8QSBb(hz zc<}9cB?s%~`h;Ku_I-kU#=o-CuWX(zW5is_mPW!INGR;DeqErb(^6(R-bJn zgVehvV(UT~M?qI)+*wLvTSBWo2U`@+xiV#K{J)`#nf`c7J%nA04vVv0|=b(@e#t2}9afF(puMT+DtU^572zUnE8<_X4 za^R*kq?r?@v@4O{fZ0dO0V#l+1#69#0OeF|>111@c+pKjmmWPR z;i4DNJv$yF&tG|GM-^GZwp%I`189V$3t@*052qMmUhxE2>+lPYw2+r*>mzG}Wmpw^ z>p{05f1&@xkhY1w_DFxFBq&xpuZ*xc5ZUH{&qd(?F-|W@(0^(u_T>N#8%Ww$9|)Wz zIr^^8X-zI{2q1tX^qC-z>s2Eh^jT=hN~ibeLU?9G{QyF#6L$J)ySp4Gh+Ro_9%TlI zq#x0s=dE0QZn~_<(fHtl7?K)z9OJ7C(Lul^4U6Ht2zZGH4(tNFTsxv=?_n}2z|7vm zOn2{c(C_;k^u#WUUmVeXN!tNafcJ+G!$QBkE3tkz^v0l~{l0a_XG4LN_cfKlsgL$tZLHK~s z3JVx2f`YkE(YZ%UO(-TAu?NOXfRQurb5ZZ^)wb6V1nL$*Kyc8*yGuLgLJc#ahPlxC znb7*QwE$g{eHXW;thMtZUk<(qA25&Y%0f&ykg`dFDb;f18!&rnWuycW@4GPZzMCNL(Y5&a_e<+qT>P|^06%RLp>*0MBJbl{is{8Y zZnBeJTDQ)86hp2^15>?o!(#18}8jr3%P(Y+W$T#(wNkkHYWJA7>I zf6`$pdlG_wDkeC`z}Rko$k;kTcr5f@$Dv9TQ~A$;;O-Qz+nYcES>_KQmC@kYVORfQ zn%rC2qgDKpx1c~-Q`B}i5`$PxLfN$7f-2$p+^V9Zk}>ow9K6OAuLb*7M)^yupouHz zv239`R{jMp@&{0$Aidzw?IGz<)m&)POlZ?=C^As7;v2r2++Z=hAuv43` z9Pv`%uB#Xobj>CFb(5vJh`(MWB45HoodstP9+bn?U=VZ_v?9N?iPz{j@PM$KUG!&r z%IL55zeax>aT?%=I4R)f0^-C!Tjv%5Q%*0TKU*2_!%hUz2umg1XECYIf!>tqK~NHD zm%oVKIpFEgKLAFW2SRPh(7Kfto(^ zG=M4~Rrf)v7Qh%FiAO)>6rcA@pvFh=|Mo+*%13ngt{O9YS0l|nR!=Nff{(vO3Yl0S zwzPu~GP4kb1niIhvGaf`LumaI9RX&27H~`cE>`$+B;P|4reA-egj}ZId!l;OBL5Jw z;DmdqUvX3lz(46)^t}(C>cZ157SoQl>_uT|bCzu`_j3J7-a zruE6sL$E6P)RRk85kjAPvP7N3vL8ITY!nsw4BzS{Uv;#&xR*$aQ}|{9aNi@4_pY-g z%uR`;rCgX&tC2Ti_LRwie30=arNCe3VdpPg7nUQx3iGe8jub&++JT8_CxI@fUAmT1 zembDSoDzb3=@uM%x}0FuN=C!FRyRMr+}CR7KQM_<`hi`9oDbYsb74mhPrZ1A_**#V zdTI@k=*1?#aGIRrY4)k&L$H@v0Ld|O1U`E?t4;DP9GV`y7Tt9w$h9D%3+^`vvJ*D* z2Bjv{6?4j(Q|6>_MnwlV7Z5dlv+KR3OceJ(<$rs|xdL+MzY63^yb1JsQurEIL;sY)2HvW`cAaJnLC~&8gxT=bQXglomnS_?WSc*7{$W;OX zMfz*xnQ$VmFC6;9#-HcpAJZ4RH>$0mbn!;|%kJu;yCPTMx#_9DY@c&B{Eml!*gY04 zcy=QQQ1jvHo$3b+J(_p;<^*E?Ov4oDf8YdA#{GtjMu3H(l(560eAq#rqJMq3rtyEH zI(!Z4e46v@ExNrV?Js#SBz`%X+OTidzkklXe{35Z4nYVVB%KC&YP1Hf@Bq1Y*Gu>G zRJArkH_wvD3i6de#KFBuA~xYaseyY3^Dc#eo3bK1Wh2Ntbdja}RFNf8!%x+S$S>t1 zE9hVJd|A60dU}h%A@fFQHSyACq&_G5F?oPbs*?N>eng;w2G;92o=F+Y$4J_2f4=Wr-`8SuTJO)b-urRf9G~%& zk(Wt{@QS*O-iZ43h|kQ{{OHR6idqS5vblrM=ZJ1Z5B9w7=x+BDk|n8!{kJ-)mn`_X z!-#QUH~0p1M*})*UfSsHatFUWsi(Gxvrt;d9x)|!(1Kk_6RTu{J5-|o^Qg~fStv8+ zI~p;9M{EA~bmYi&lssk<%>#C&35Ar&{J@}Q`lDm3mzX8TFsNUCki6w$?z1InNE#)z z%T!v*{dXgq$Z!!FSHEh9FF zEU04U5xa^@+WU&pGv_@?8x8h_4U2DXqkH?x4Yn`Yz^zt|!2QbEz5^;XL-}vgU&UPH zar$X&9XUoD`Zp=Ist3l%=;MP!au55gLphCrKfD6%netgtml9=X&KItnj7N4d z;?V!V7|XDF3xo|DgR+a>>o3u;uaD$k(5ityIkF5ZGD9?l+(Z8MmoX1rHRV`0h}?TX zGJJpc$wj{{qlpb3J&GabGf)_IC~o;rvG&i9Y@&sOemA~p#&WzHDzdMhFq9dWPtu)( z-mnv^Ek5~8tcnMlEMBR^q1kbOgPtCA(iaBT*utm~e+5KQh}vHI;oxiDCG}%%w}Rox z`q^OPJXde3Id2>%<7+?m6wz-TKTz%qy}se44U?`}-_r4xTkevJp-aL|_Y&A!e`n1b zYbLkMmaUl3Qq$AHinKd;VdVVC#r?DH+Hv8Q!#^Rt*>cknPWu9{uX$FgZmAZasGgRBhwcwW8 z;7mF=V+~#`{6{#F?jrDJ*~5f$`cplZ??;fUhIXPNH&Mx1k=l+sx(+KZr zfcqQLRz!Zh6u7B+WT%#HaY5pS8WT4(1bMx#&BNbttZQ@fKQR;Fe_|1#^e0Xcc@Gc# zg6j&4!=dxW;A8+81=$h+#&A(n+)BzI6kh#8)ZkY=c)$jMaMdXlICg4w*u~S|#(ksc zKB3e0I@}fe@b}35Es!|&sKl9N%p6|)7%%s+W?fHgFdlsyWdQ?BOcp*i*vA_t{5K7i zX}c>QdajzWSIye1=Zw`e#_BB38LQI9YDCIWfXeEXd*a755uBQXIK>%L?i;V3Xe`#%6(({cl2)hSgU??`~HTw{^DhmUDqSB9`QuBb%*&#_9GwceoPKGs$Q1 zcFl1uG38}!>Hs`8WaU}_m)$~aVmp}>COh8J&2z|SD{4j0<#?8>6-PZ+6Z3HQWDgOo zxR0ck!(1S91Lh6yStl>I=lRS65^?xLxaEx74nva_{S($Lwrf z0W_FwfnC8&?Pko{L$Y<;`X+LRTP7Av_FQgxt8X-m{52?sTlNU?h4DSPIye`MP8jVl@Y7SZ`8?je}3;1{$KCzA~ zx{FGf(ZMEjCm2#Vf9&OcxSWse*m41)~x^l delta 15320 zcmc(GX>=RMnP3;X8}|(mBzS@(cvHM}kUGIz7iCG*MM(-Z88Iz$`XSpFUlr6JK6X01`$1prDS&Z`>Fww zl4VbR?0egM{Q9e}s=u!8{_ZOL^FNUbzb6I%)nL$aaJ;Mj$P`*j_Lhlpx9H4g++g-%rCz8>E|AgXT|92oF*2bv^lP>Pyb+uI-68R!a}@XG}K4&UP^e15r`ewD0P zMxwg`kV{1PxYqxmW=c&eQ8GoZ~9rGBmJR08PJpA&Gf0f8uvQA7#d+lU+;J@5XN+SY5J%MB0vMmC;WlYFZ(01-g}yXz5&SQm!h`eW;L_r= zs*Zebska1J6rAIpBpro2xuSCFRJRf(eN4Tg^nIAY^W+j2h<^f(JI@EXZ-52`NkW_m z*TW4ff`lH=wJRm7moPNN5ma~;C{_V@2l5fna2)+tbvpg6S|cjF{M!OGX>xh7NJ*D# zj-?E#>U~|3PjW>Ko)dwigHq2>SD00KjKYbDtVHjq$u{MxAomaZ~NOo#Q^Iis&rQt^3-# zNKwV8<42kAW`-*^gbO_9S}tsx(Yt@`$i9^XgI~?Mk~N!D6tSm8>?xnBgkgZ|oWPpZ@`&?`m(R2beL#(QZyE87BB5 z<-;p})0v!MOf(nIFM-?z$xGZT0)1R(R|L#-yUs~e^q4MH5wNm`8X5(>W+;s0taBA32AyX(P9+nc-~Dc=ot0 zoYgdIZJzdo^;`dV%an=TU2=EbSmg(G@72w%-afN>d$?{#ct?A9*Y0rpo^aOQS?j)V z=b^Cv5L}an*KC%rSvU6hSa&$9e%9JBZ3*i)L3avvZ^Lf-p*}6t36nZQ8j{Fa59cv( z69Vz*gb7hVT%!Q`gpMF?&@@>235Us($4}(x5wFazPv$3+5ueO8)mQP8RSaKeYB2EE z4ITRp2$f`Y*ohB=;o0k2^`e&z~ek2D;stPlt^y)hKvTo!95@mU@^-Mg?zI zpet(Z>hAV|sp%dBqcqs}CVZq`|KLDW(c`80ri#!|#g{9a%X>oV8FL4P1>mb!ZU zzRn|qQm3qo(rSEa{xM%5s_pUj3;=bY8%#G)Wff-1jCw3pAgDxusw-jGQKXb|%CLd& zjtxL7=s(d10mQ;YI(xiQHT3(H09^D(riM@}HY>nR$~rEjgO)`yvR~bQW&d2pvYCu! z5p(hFbXO#IOC&url3)Bgjp(q(L@u*zo>ONU|G=qJjOSWoR?d(#r_Y+pRfi(7)HaLkHpIoFR=CayGAC!@WuBRl@f?>3~iukvgd&h-);BMt;(2 zs?X*pv-OBC%Wtsp*KLSj&oVWX@z={3UT135@KYKQ=u<`>ILHnP?t%w?T4r9JgwL^= zBOIMJm(fRa)%IsNui`15<6cl);{QSvxnT?lep!V!g&7!dY0SXr3=3W?s0e6jt0had zo#TdufQJ6NAtQ@CYYz&;Vvz4$Ffi=S3WKSF+8YTupKI@c-AIx-Tbg>c!5f|XLkx*kl)FzlyNSqXG_$Y7lmM&B%fr zi;^e&X!cz$`n+{Z=qRB6?*d@ja@(2lYTA`Fj7h>po>^!8oUQ(xI-)kusk3L)**Dd> z5mWMW!{>*;9y%x7GC421-wA zIts`W*Crj6qy(8o)!xDG6aA3hNZ4HPvKX%|L~4J-Q&QG&tt&+&;6flz)O!stE8aHJ zvK1-HVP#O+n?#>>r=o{d(g~03VLx)FWSA1NIjjzK^j8UH0Gia}T7^)2Ysa3XtTN;NK1YIz7RWc{@!2bW=XON%Z~H zR966_Uv@CpymWvgsOfc~Yu<|Q>VX3$lN{7wh?#hVpoTeo&1nrde0O5_=Wd&Vw@nY^ zf8hWp3@9L}^q*5-CWZ8wv>lE|VQf@50zv3KZNT?rp!|4WSI+>Ar7gEaMI`&9%Hv&9 z7wFC!+L^v9sSYlXiV>jGlrSNdmeIdXcct5WeHhBf0v#WVhXz! z$K0>5vUmG(#}*f=y?8Yf#CmocoFPVN(h=-HjgD%N*2(VOi$vz?(5lk&4ikMUBg^tF zX!sS1gtv2)(6=&PT)EWQxgw6VNN&M+-b`|9OvT)sfNoA;Zce+@&C&mqxgxaWTC4l& zfvat!MPc)r@r~0b!wq|4JY%3h1`5VNxs(BVy_0lJSf3SZAbh&Y# z>CcF>xBov6v}sb7J9Gx#(;3p@BxfB>7VbsTq!nJOM|851G#P}+D$gotOcj$RlQ311 z3HS{S4!@x#h#MF*F=)}WIQSc>rlunPMv)%z<@wEN{D*0Xe^_8@UcrC30`ZR&rWOnT zkwt8ADEW^Ze2bI5n3bbA%wKyyYo~(bP-{+&>pm4I8P+Y)JVQ{|o6i&rZQcV1D3&p( zyHBxnOtEyQb)Z-%)$Cx3wUn!gmgb~Vb50sD16DvRv^{5}0`Jf!bpVX=V96xZyZd}O z8Ziq0^L-#}&nxJ9Llt=POT4;Qk5i;NcajmZc{P@qz^3Wm;%S9DHQ|8Kr1A6-r)<(* za;FeI{h_;@IBAx&X8{ONSBnAz1Ux{I$#xNBRBA!7guSB5{?DV^=R4!l``Ht>Q-CW_SnZi{OyE~Ga6)7kg_syh% z|Ky5l=Q*Ls3TmEfyjqG_VvNwX$JS(dt)4&{!8xf@f540j~7pe9!K;1 z$2+ZrOR6Uk$ELfV7B5L6z95HyGRPNYC*$kGJKMuMc7@HmzaTl;E9X6%r#;ivVN=$& zS?eR1&*=BC>-i7$h<})3YAygfEFxaY1G}G;xV0|ejTy)#?iyb@K$3X*wSN`J z?6vzb;GthzwrpJ%yhpzsTI^kZn!}lL!a2KVjeElCJ$G3}@QmI_9v6p_yl)Lfb#WRdALKvarv>|F!CCy&F5%0iv>!6|^5VFB&^9xJM3<9u@Eg$1q5 ziixaP%POq0xmqmBs2Hw9C5l#w%|&e!WNq7p)ZGZGskf%t;zPB873jFy7|rzWYI;Kd zpVlxEt0uH^d5XFpCJDb|nmnudiPd_*Csq0N z%b>W(@LE%Yn!m0_{JP!Lkjr1s6%jAwfzN`w1^$cm6eZSEvgtoA->wZR;5#cZjX*yt z4D*m~$R$Lz8gdU37a4=6RoEAJvNjvF#FbvZ`{~RnV;3V#U`uda!n7_UeZ?MtRXN?hxsV zwP|_M*Wn&RwO{Umainf&Wg`Cf(HCmVj6)?$y(EybdSLxx(JP``l8ZU48ps@>fG+z% z^?VKoGUZ)VR8dycGVEeW8!O1Nat*A5b98&1$8s9Awr4?W?aSzQ>fGfCXIDOxQ68>p z96vDL7T&QhT+uO`v41Xk|GCzP-oCI75mVBCuPYB_aOr!pKZo*AsDD)zDoe#rCsn{O9yU@GyL4R2V7Y5v?+Xe(Z$7G*emeP z&X8t;F3f~3OouMa1TG9{uZMB~g%#3z1FVpGL}*NEv13w45H~Pr(lo8(CsWEB*74U> zup@+A*Dpi7mf`F4&^x6~X)NHUa*#4rz$n!zO=id}ka7dO+B*J5dP-9*f1`#`YDJ{1 zOXH!t8>YB$#Px)mub}CQ%#hM2dU?zYVgHHf6%acG8yN28EQok~D@$3!U|%1^ZLmhY z^F)7tmvm~VY-d0^(G83JK8Pc6x$rPXl3fc+3G&*T%P$WvW2<9Pv3u|YY^+iFq3qw; zPYzQBUlEdAaNnq@v$JbpU@*Xz2VqY`l!yHfP!5G6MBq(M!i1fLWl_uL<|18z&L5%F zVgm40?w{0aZW}FM*F;R#uWKU~$FoNH@;8tNx~KcB^tItNMv~0(W9BbayX5G#tw@m8 z)7RHFB)i}eXt4YjFPHWpSWf@6b}#wh+OBnlWEm@kqCv)suy+H3W&lwhs-;EoG>%<; zgnn;*Bl(#A>-t(Hd#pXQY=aXPomw~ayAxR!TnPs(KM8F4iH|Qop*`1N?H$&2XgCG) zEe#PN^GdE3AL9}(0kR|X*X#%`NwyOG5+qAs(FH8@RZo(Dt4p*pHM2y;d=ty_AQHiH zmi84@qTB_(NyV0wo&X;M`Bc4=PI~Owx+T}>gKCIgmMlbpFLTmu_1PK1u==!mCp-(D zPm=o*8w34DeV`bvZJS(B#{3r2dV zF()76_U~?cZCiLn>vYnzDqQo(tb6-h=5`3%mqh9GCyx|Tp=p)c!e#A114tX18aK57 zGYCD_k+W;+Yq<%I)UOmKh-W!8CRZSRawS2$R@1PWzpf~2SixVfAc$9r(0hG_i1=#0 zVIBQ>Q!*Jy#Gd8N+5)Ki4fBhY*&rW?`)6)JHvF&@NzI#*>8|EXqCr1TKVG9Zh(Wsr7yJ1;wr8?5jC%cyA_`MC|Fe>GxZAC#6A0ER(Zmh4q=SJT9vq{GgGhY%D*r8CaYl z^~=dwTYVCDiPT$!%N2lOvSU+DXgn%tOqy}Tq=g`EV=zgBduB3CED{yj{uv&|ETkkb zy~T+0N&42tTUfWD%AF>9smdZ^!N!1=LkX%Z_L`_`bM;2$5}Tn4zy%nUM`aDHsh{`NLG^6)`ae?gz>bauB7_&Dpsh0CpB6VnH%N zBJzeci_v0l8cL-~D|4f(wv-dywWC|UO;XrLL;KJMVzL3w(rXA@G_!4wLam@D+jg?e zpDcKjj?mwHprgo+Q(Ck5vEp zc4Ze?N!{DCRSCngd3$Au1tWZ89r+7e!wT*SX{ZriOakm30C2h}~5pQdZ*7`}{7(o6o@iyn;~Aj#aqbfE3_n! zK1Ys#*qw!Jx{*Wsj)zx~5L6~=wIE;%fj%=i!lFy`f)@o(wU|W4tR*zv&+D_$U%2<-OQBXFnS&9Z$SK#lM7P%tyv zRwT-L!i**p?v22cMO@JJBaX|D3+z@Tw;@2I=*N$PPyV!A3VV;vwy#sJ0@d^Hpvk+6 z*TX)f(z16}y}9bGx;gub*J`d-UhVqU&dV0M`||dS`+)R2Ua(m2{>;AOt{+YRGiy9< z@`1yXsh&tOdEE5iuHxOQhq-Rp{0A%IOU!sYV_~Mc0_V32!9vF3Is~(nC;4~KF_cdu z?fEMUai-^>{h4g6?M6*|Vsq82omV=;%bI30n&*<6?@wFks@)}O4VTga&N_u2+g-SN z74Ue5c-E7zW_q%??+{Oh@Wv{@uwJm9Ok@zmvoxMH{6wM2vyz`!DI&gx_pGOp-Ct4H z!1YB0c6D~`0k2N4?CB*NXl!>aZP{Cs8V@NaV2m`0;5q<5+C}=(-inGAdUEFzBzU3C+gIDek`h=v1gJONV;%InMqaqGPLYv1AYGLwX%I?Qa%vhL!=qj zE|DnhVwoVJHV_x2Kt#B?V|&tCuPLZ~f|Ha%t=EKGIEM8D)%4wtl2ju!*pawOvcniN zjG>lINwj5unsd>K=o3;utoLe%4PU^(v6R8x{RU#7zuceaG%T5|VbL2w=sB^~Myn2_ zz_`r^(h7`0b??G1lf>*`N4GAhhaVQ$gL+hS_FP%?g#+olF^=Cnu(mESKf+xg&pr+z z3v*L|UkB@vnqUUjqj}im)xwV`jKgLH7c}=~qJ!d{LD<@Gut>M~#3uUA!91Pmv#rMGlK8zXAM~hPC)Z0ay~;j^yR^FCR^z#}8$aL-e^r>kC;jvLAY)TL0h) zsoU3iLh6%NB0-NkFk$yjXW-OvAN}p2qUAVP-Zs)HFW;i7L2{~)YE zc;$z|fJ^!Z1gP>-1b`o3IQ{wI7Rw{3A=d!N8dBOzmv?`=sH*1N##^qUk@8tr-8@%e zuwK+(AQx&eNGp1~{bq6%1ZhwPPtBme?cQV0DjadlWL3;%)y!nojP9PzT63Y5_IO=| zw`>`gdtU9o(m&EXn_2zC_M5i4NSgcV?wPdGxwMLzG}tUZW|>V}P2cg>hHjgkb7t3! z*>yY99my_=C)NKJ+p;-Bg-~U7fz>!OWGs( z6}R#VBk6gsZoRT~q;xiY=|KwdaO&qWr{82-ltwF`GO%MIa}4ThWi`RQY-vO1=#y!qmg`20oX)2q7xO( zYG{143L78Q62#YNHtP6karH(e|1n1Z|Cor-`>|3)T*m{x5NE>6=N5eTzd%EFdI5-K z;Gq3S%!}PieYk^oB|(BL%Jk7b0n2*Yig6=%X336_G=thci$jv-F_ z6%XwGRYp}uyZo?cL1G>U_9xguaTLxvis#J5Gv;FO zpoP_8UCwQD@!eZCsEPA1^k?SG+vcnbs!OJerf^REc;$F**ws8|ZkaK+{1J8=!jM1t z*TU`2I6a{%@^XfU=iVb8BK)lY*!#SvicAm!=mf9vl=BmMr>7Y9--}2u7LihpW8ilN z`M{(tewMIo0{DM}6MLscMbgWNV?-%^8^NmxFxZiEh$8W!-Io;awQWTiS#&vvk1^tv-LLVTZmmkfHEe%fq>Z=mhpWTnnxj^-!@NSU9 z2z(jrjig~mItamMP*M`s-qt723d!&>FIQjXFR!||Z(vq%#{{dGdwKZZI5@=$Ii>NA z!Ypnh^E{w2hQ{2PKp5E>11;S~Mt9D$b8G|Ai>V{t(V8)SZ13BzMv1#j$M7_68xKsRGBmsgrVXU{6@Y;SOBUP6h%mDA zP0c)qc&uur=p6OOxRqkaITo0Qv-4Alh+^{Pjh}LGika2oE;8yK^^Y}=J~dO-5aWILGX~xPy!YMjOT)qnl^SH^ew3jKfREb4+U%Ysac%9Gu3x*=ag}KZx1a8E)^K z*>;GL!oH*PoH-=c#@s2$t#Q;o+C8>n^w>-#>~L?G=a3q!;nr**ceqk9X{39!@y*^C zqT|pDr`Q3KBDRxJcnNqO6UKIq=a225S+#Mxac0#c$f2Fg*YKh}rd5e)x2=u~!!y>r z5n;w!8WVvMQ*t3|)>ZqAB`>DJwwkjfU$$Q~#5CB_a$4iO4lxJka7G--G1UfzXasse z$sHY{w@vU%0Y-z(Q1*rH)Qro4E2%Ly1gi2{<2mnz>dVRtt7nW^GaA>t0WbMOqo5XB z*AuyB%1lGLtp7u(8E7w~N>DWP?`R<<3|M7tMUx&Cj|KwApQP^|vxk^PU{(W@8F_sI z*CL{(?m@{{wq!>YR&20J1Jr)_VN{=A-pYi=1bn1H0Af5LB+6y~iZlLm;X!N{6B@Zn?f6W30umXHFw6E`}F1rptigjTi(-KV6W<&qh!WW^7OVP)c?U1 j|AMRhIhS$URe4o4M_=hnw}rH%m0Wo0-#CV_m->GJ8T9I2 diff --git a/ingest_pipeline/storage/__pycache__/weaviate.cpython-312.pyc b/ingest_pipeline/storage/__pycache__/weaviate.cpython-312.pyc index 999089399aab5bbc84e03a3187a995be3981938d..4b205693ec5e7732c579a890892ce14e5a328d7f 100644 GIT binary patch delta 8121 zcma)B33yZ2m45d<>1nZK$(DD#$-BTS#w<3AWn)NKLrtJD!zjjj#s+!eN)8xoOcEe8 zUqT?46bK}BXu{Go6rz4j+D`MuVIa-Y27P2=@=6kDGikO?O-QCGGwsYd_sNprgPFe1 z{@;1$u4lRDo_lpIBwiU6q1@rN8O8!jHyZ@ODC1AkQGPz%N zr3U|*e4pgIL^YwY$LCYp0-ZhG&3#>6t+ZdY%r%P{cXqe;u&>2pk`c_5dL;IQ z{5$JZ9If5xow?o6GL1cC$jX|o7?lidHtn)^_I9~thFA1vA12;8%5*kj$RV}tiXqim z=QS!b+eL4~c3sOXZ=*6>nS*_KMLzer${dz%oRC`Yz0*4%YPjKj2fQ1037beuV{oDI zaS~Sq^=URjG5ge1Ra63`OHy@y#n(<<@S&PKo;F{r-|zAGQQq9#ZP)XeVlE)1>|t{j zDF_}n+jZ%@s!{3o_V#pk2UG)ZqWEbo3&rP>hZ_6|$ZT0oCY$0(VK2Cix8Qm8ycCy+}y<2uHt|@ z+mV^!wcN5RYq-%PiPqHU__lz3e1_{%*fR}|eETh1C)`|*{WZ}NBZnj0d%43Fmlo+U zgC(qSkSunuCxhMRNT^2h@~1$cNrO@#?6%-2DFf0W;VJQXqLmLw4~X8>L20YRe&IMv zn%TCbV`LdCNp4KFQ|>8fI2wmgkKkthxf@PI+zORh64 zK;9jMtuM)#G$!Vn7LrTJ>4!^3Gpa%vRi~!BRCl`WLh_tl^DbHvM=jYQOZKRxFk~rY z3sc?$AIeLuDVk>`hnf)R>{Q!4y>w2m2l|{@nr96Tre^9$6?<|* z1KG_!m{4Kw#ChRtRB5depoJ_mGb1h^ndncniMcZ~<#kA|XKk5z#LISO-jm%048I8g zW#A93!aBVGn{(K!i_;DF!AkF;y}_)k_rzj8N+#B5Lg)ifrIk$$E<(2g?MFSME;4ef z$bPmc_dc?Uoynavp#%6Q1Xu>EfS?ax)yP6HIqMRiLk>hDVVf`(FPJQJGF#!C?80Vr zJHQDk+LG?X%Gy$ev9QVxcG_8-coN8K;ZE#YC!ag>vm%|)UAJlYGWPL3gvkIP%g$h1 zUK^>2>Qoo#?^QIvK*3eHXU#g$%k6q3ud)1sUNSkj53taMJ#)X|VL|T?(9T{}-`5LW zM%;?IiV9>dOc`q~svz&N2aDEC!1=VHrp6z%@PqJw?LXPFLpiMX;WReUV-{}{D%j;x zWBi~rKnA4y2+bG}*4fzUO_}T`#dAGNp_e5tljTb3#F7D5By}yi7gir z>UPN&9qFTv!jPkI)KMOClwWXEvO^{1wsQ9jGEzz0({v+M2KNkhu_P-lB7F*Z+_l>r zFhSq^VfQ9vlL2zHnZG{v_uoV3wep_8k@qN(8wvUxO&sDU15TbeS8xT1M`F8r^d zqTd2A?iu(7@Azd?G4Tig(6m7$ZwBwa*AhBaqg<#tE}l;>oRD=9A~Q+S7t^f!;LB|8FxuFnVnyp%wAbs47m?B>aMuW zsD|-1^a0jLMc^XwoSZvhoMsH};k?I>uo4ecmJNLh?H?a5;+(2Mi&TD1WH2C;Rwlxt zOj(&vt_5pXnk872O)dE{pUktYyCt)Nk0onk-RSTt@n|tmhJiJufI@!Wl25XOAGCNx z=xf&MGPww+b%6D*POtwHl2KV=yN)iS&#)4=33{0ShR)wu?Sxe1i`9BUjI65JdE2ak z1mc*>l+E7Wo5bpC?9-ctd?A{k^W<8LNv`2Mazk=0LUJt)id#kY3(pp^luf-i|4viP zCd-|z9w+KX@FVa|b{|q)uJDrN(sMy1DAIgp=e^SsnnaKd7ucu5x0op!ne&O1DIec5 z#lGZ7Js^*!O$?{MwA+f$l^ z>6me@u)19k1|`mXdv7u%CnobCBrJaNh!fJ5%Eyyfs6AOP{#bgRuzE*E-X`J~iQv@@ z>Id{*{bm?@P#%y5^aHY2J}eCy;8^IkB&7`)#*aBfc%(xo42ax*Y3!$iWqRYFajTKp z+UlK4VcN^-mw*(sDXiU+baMWc&D-ZdI9!IzG1l8wdAIfUc8i*fN7nzqMt?@&jv7RY z+tQ@DKer`svy5G7OPV}}6}|>g6Qh}d$FE%^XiEdrI&EoarcdzBkyoJUzrr+f$(YVz zT5`LCRdvm-g%{JZZ!-K>g2g&*kM5$gc+^=Pa#oKzr-hu;Ue;Z3&SIy%Q(c!cb1!D) zUCb%EYS7I{Ja5k%GYbisqxQUzJ@0&e&A-{Fj-^8nW0}mX)D#wxLk$xfW#NpTG!~IF zvZ*m&I%BgnW=m(X^+@MSjYUDPQYa5$Oj~k-XwhpGN^SSIcJ=xc5A6K>#>Bt;gvd`s z{9Hqap$`2n0$PXOgTRA5&wTiP9og<^GnTGESP5|598fj|Dtdjbo!#0Fs85O69C+I! zSotW+_7yCO#7@=PuEZuFf519B5cqC!(|*8>&U1kESVh|&*lu6m&=2uDVt*|iOwgQw zhuUbg;`_#nSp7!;ssv$M+eR)S?+o(dH}+7kr>&>2JK(vm)yGw$#TUc4|nG!BUbk?}Dg)e1zE*CwGrfrx-e#KU7meRk!(oRuemhHQshiyI{X zjA$Z4FKFcz$Q58%`YuqaiMDR^@Z^B6I-axKxN)r9*hJn7KDCiz7GmF=C7;6{hFSII z$yx#u@yQ#~%NR$)`tJj{;$j?1#wnN((y{y||KbkCo$P*W7(P$%X>+$hUC|>>#NQ!s zmw6Q^zY$hIYt9G1FD8ZB%K+n?z5bvrBp%b@+SiB%UABqalU<7YY`Q8_|HmkBr8< zy91p8pW@X{4BVwLf^cvC43pQ*Qf_&9Irnog!E;!KEo1Am(9UF;cf5jqxijxpx}$%? zHqm&p3O{k}X-WA_i*gtWRLQ4wM}vqfe0GY88`bY;*iR&gKn{)C;NF39tM(fM2;_D+ zSJeng@^r#2HC@DBc_FM#&RG6Y__A3Fe(%y=v zU%pX>Wz4|&bMgKi$2f|B9;uDO@thP)B!9 zBb`CpV;6MAG0wzo*!igh-YcpGty90|P=95nedRYB(t$(8t?Bpl(jK_63iPXbWkVl? zWbX7)3#0h-Q-omzE}C{)ppzn^aj9^Us%2w`LKRPpP+>vym2lkGVijLEW@s_KwD}3* z{e9EOzF@)=|3XM+IHyf#S$k7hj?2v6xic9q%9Elu=hA@0h7*z?u}x+j`|Z_s=#D=; zr`5&ew9}@|2^_>`@5DhmT<7DsKEkB zZLzyl7@0(X9w{|7n52;!TZ2w|O{WL?HIvj}VI2n-IB+nyT&jRqT!AV4AR}Y4= zkxi$g{bnH40>oVPvCB`o$cM~)Xdd|zE+lhpQ%yotn*kkL20u+ie!w6DgzYWnX>b7% z`o(?`&KGD}{(vp=21PINiar_cEG4+BOrS}*Hy$#Ga1Z?agNKZMQG2t$ANtPH@HJ)} zbVTbB+z2yq#%Ke+x6wC%8o$Ae7(msT!mE@knDxXS~V4bil_H;Y_M-^%Y zg_3@P3S}q%DTDp$sj?}u=4ZFl%6RsHx`<>S%QliLo06F?Zgn%)0&cB&i9}OAx z%k{`~v(KN-7~=az7Jio^#3L+5Xai6!ez=Y4Y=isDjvlXe73Igm^$5KPk0N9u+>c;E z&>`$Vz~q7Eqr>ney#PK{eV`9Ab((}l2Zj)Na&iKx*AU)Dn1JwGgi(YK5Y{1p5(WAR z0uKgbNPUIiMEDEBbp*@=X*4Oqyn@;g^03qa6x_LX4#Ce26OlU^p$5T(<L`9Srqp^z!(S{$BLP9M?}XI40hiRq%6n&{-vzbw*ir_nf+&bce<1 z5B_`Zsj5?T9(C&6%GCjJxL-_o({7Iw;P>Y1|F&u7j~`FSAX6SM9$uhGN~tobMH~lB zib0u#&rY)-$R@<5G9^wamz^_#D+-GQIc1k1rz$09C1Aj>siMQ=P74RfLBfiqVp1Ny zM+!=DibZkiBH62^KQ1kgzN7dSi#&6exv5$fSdcHnI9cSATNE6Vsmo|F z$+LHvnx;ki&r!|=me!!Bxe1>dMYFb2j-Y!E^tvp|usm$V{bTJE*SrLKtigQG? zEW!d7LjT$FVtElf=fLw$c+TA|Y$Hu|;mwu<#8v2&tHgq(U@qn7MZ0%|)|Jk3GBAExpJ0~8la(-6=Ynzepgv(knwy&^3`4pCGQp1HZO zm~uxcsR%bYJS2M(@S3q<Yc)+WCHXhr?tLPx|HpxFO@Ep8j-G)>Z`*2>DdNS!9LHLNA|MG z*%gVKaB>5HCT&zgG@rF)XO_8;X+)ThEyfKUAw|`!`0!Hd?a&Nwuyz?XqfcOz5+fG&@Q zf05Trrbg{+2z9h6)P`M9am^ImfcfG3B$8jUtp#nQGJFltRE)zWv2{h`Ot_1I>@F%W zapRV<(?u2JT{cj(ISc0=vx`whRDSp(TT?uZ`2riq+VoDaX&M`vm}L48MyM-U zQcG6a6=(XBwU5^hI7|ASB?Hc?erMHX=Tx@DUG7M!%OQPfq;9;SFWp?1!+zi%ZyVL% zBZ0l>evEXn99t?|ygr5X?M-FpOOjy&yk0VUp}uGnEyeZfI}NEj5pel50N^&nY`G_p z`#6A#@sAM;QMaaamgySuHnQWT3+7&NIQMPevwgr()bA)7aFp~rO3u_=c2r-8d&e>F z9m~8SqhPPTsV2dQhv(E)kUm1{$_#ztB%mP(CWWgf{gYJ3XIc(@=n4Q$pv#b6iSQ2y zs}Sx3=(W;&kRI(D>B;J5q#H1R*}}@oo~N-(A%JGx0udCXzD`9S_arhctxCu%dqZBX zvS#(E!ImIx^@iwb9N39{47|Mt-+3^KKAB>^;irb%qFd<@GB7)7anTb-v)52X4f?kz zvM1neRrsdTOu%dRhx9aNfgxy4{PE4D8C6)N(wWM3)})fJ!}XK@)0}k#dNsyqdK72V z4Fl>rO;6$bG3J@^m(-skiCYu=_W(RJ;-kIHQyrJez9(}K?;K0_I3v)2p)1LvT4uIVC+)@#k z3qkn%nPsHlrCFbt$r1MHf~*=$b}^Rv-smy^zfNe$BCa{A>NZiXRn!#RM%)U|ux<5= z!F(6%%a(HU&A>^2g|G{OXI+dH-F!ZzcrwdEYA-?%!G*xZW+G)r*umy5ENtNRir4LR z5!qwyMNb1yGeX(arfT+)81~3M$dTM_Tqrl)Z`k>Tsc?+_dSNjMhCg5Uph(^buU~wf zB#myI#+XoA@N19^y8qqPkL0sAaAm&(7?aAhH1=Ub8hfi@B6*Shwc%wGW?FhW{L)>s z2|3Ab+&yRd^EioFGs7|J4TUJ(4DXuM>fJ_F?2G3d{SESrva+FlquZ=o;xyaTSedS? z1+Dx9i-ZD1!i$Y{K6+1%n2T;`?6j+J5I1kijqJKhD4Uz-IMbH8d!i0E z{~d?(wW!UoG2IEekKaK#Zy|73a`$q7zK6VV2>d+bPUY6*KHh;1JiW$oo#t>?@=ZXw zInjNZP0v*xZX8^!X5*84ws`&Rie_nV_sJgW-AZ3$eaoES-9IdwPu^kUmfz5gJtLgB z;xn<}2u_xR@HT+kpxFXo3y&_G{yT4Au2q2^OpyA{kB0LV;A!TO&G;wmk&eJs<5e>c zQk(7?z5}?#53m!4s}}!YyFxogri*TNv+fF2R~_8x6jZcd%+>~Xy6(#-*TePq#YwQU z@4Y|Y#Fuu8b=;p_!DlwzZhL6NqbF9=s6deQtNZgwX879u9uZ{SxvtDqfQx*RZC^KT z?tdW})ht$YbkpH6H@aLw9V7hRbuRd#`|CO*5#JTq(}$*uAtQVB*aWuBHj&rCudKIM zjaKHo4kj^mFw_)Bg=6Ab0#m@Ri##mKQa5>su&>u|CylJ#n_p&%Ii)Fd(oboI6t0&p z0f<(3SH07c>Y(0Jn}K~z_zH_WGaEXXHsNDu`XR${+hN;4Mn!){#X!dN{*39DGiok7 zYu|~lWx);QbLu9NU3F5OMd&jVp!+P=x1MC%1S`sj7+I*b52GBL094Ftdm&-uIc@*=>x8r{jQl8R$O+? zW1TJ4`GbjB1BrS4iFsFY3kF?9gZU*l%!bsoFC|k_@{j|@4^3e2w@fWeAxGzB%)3Ll zSViWikc)R%=O;-QXF2A_ORvWpfqp$nnx7J`+E{4n!2|^PSWh*Wj{( z%1<$4T1-agV==kXXV_=01$Xgmf>}$mwGIY!Wn78qs_{62kHaz?+6ge4 zI)K)kEJ<~;|D}cG0xMLf!B^E<^$D?uo1J?BZ9h^npafo)-$aT#j<1T_-x5Up4{bm4B}-chmVXfZ*5ueWoxzr&`>0GbufQ4c>|`G(_l$Jhj4)KO3R!~PB` z!sqpe+sB(O;o##;?3$vN52L|3rf7_bIP5B5m30*{^7fKn|0BS$HmeeZVNO+I3I-EV^@zvw<8PH2Soh4cs@7!D9} z8twyqlU`-#cNFySaeV>W7?Tiz@6~kRSA2s#FEL*cTQ^kunb)Jx~RRIO~4og5= zUIo!w0{{Gu<{0`lD#%Mc?yG2q!C=-bD+3{4$gjwHa#@C*N2B_a-@I*o*>|8Lm~cfRW%b6+U_!*pK%_GDs3TyC8>6gKJ)UY@9P3Sp=6WCB;`#n3jQWCpsE>-RBmV?Gw`5s@uEe@LOa&bSsW zPA@F+7zM>x%YGNXpU_oKh5_8$-ABki=d%tU_6tccgHB zY*|@~lk%$M6QsBnF*iV(3GPVn)|2mF19ZXt3+d(fkr~d{z;-GHUZmKJw>Ge=dr}V^ zbz}~i1bfo0cd=)x-;JNbYL8AXs+$7UroAp%=u09%_a$5FDx|(FM_q~3S7JoELaLj> z4jf(Ld<5h}DS_F0^xpIO(Neh0%X+GR0#EAG(A?7zW&p%qRmB~1lRQ8k2Au%7W(&E#<`fn3keSFfQ5*YHZy)KjN&zvMYCjxQKT9+>U zJfkjI>a!c+y)W4ajJ|XU=rM&ms*Nid2IsMz$1=q+X0e$`2Z18mz}B8vG+xq+-Kcg` z(Z=v!z%Up*QMmF`RA3q~b_k=Zm+YzEw|F6C{1*bTr@o(sKLwFM*3*jw<_YnSs;6p&$)Bix=n~sqzNLq17HBvu9VfJ}mRT=%`7M+O=+>LX9((I8x&^0T5zsCa? zvE?v^`*p4!o%tvY0WG68Lh1I^qr&@!;w$kf`X5+M$nf`-%Z3uxc4|smzOjK^kcI?& z{?ahapF>N+7fu}^^=4x&Ho*g1;4zf!FwQ4e#$_K09nKsw;>(al$jBbDaxzYcbL{Rs zRJFUO-h`n z%loSinfBNAC*<^7bJ+`Lwq`nEUFX#cD}L5FBq06U6~p{FRKn);E=e~V7m>dun~bUa zZxdW?cO_lOPW0Ym*_%RU>1Q{O8W*w`p=G(b9>G^$s8#@V(=xL)D7X8S i*>p8@fq#P9$<~~`o1HsbkhH= diff --git a/ingest_pipeline/storage/base.py b/ingest_pipeline/storage/base.py index b3e4127..2e3e8fa 100644 --- a/ingest_pipeline/storage/base.py +++ b/ingest_pipeline/storage/base.py @@ -1,9 +1,136 @@ """Base storage interface.""" +import logging from abc import ABC, abstractmethod from collections.abc import AsyncGenerator +from typing import Final +from types import TracebackType +import httpx +from pydantic import SecretStr + +from ..core.exceptions import StorageError from ..core.models import Document, StorageConfig +from .types import CollectionSummary, DocumentInfo + +LOGGER: Final[logging.Logger] = logging.getLogger(__name__) + + +class TypedHttpClient: + """ + A properly typed HTTP client wrapper for HTTPX. + + Provides consistent exception handling and type annotations + for storage adapters that use HTTP APIs. + + Note: Some type checkers (Pylance) may report warnings about HTTPX types + due to library compatibility issues. The code functions correctly at runtime. + """ + + client: httpx.AsyncClient + _base_url: str + + def __init__( + self, + base_url: str, + *, + api_key: SecretStr | None = None, + timeout: float = 30.0, + headers: dict[str, str] | None = None, + ): + """ + Initialize the typed HTTP client. + + Args: + base_url: Base URL for all requests + api_key: Optional API key for authentication + timeout: Request timeout in seconds + headers: Additional headers to include with requests + """ + self._base_url = base_url + + # Build headers with optional authentication + client_headers: dict[str, str] = headers or {} + if api_key: + client_headers["Authorization"] = f"Bearer {api_key.get_secret_value()}" + + # Note: Pylance incorrectly reports "No parameter named 'base_url'" + # but base_url is a valid AsyncClient parameter (see HTTPX docs) + client_kwargs: dict[str, str | dict[str, str] | float] = { + "base_url": base_url, + "headers": client_headers, + "timeout": timeout, + } + self.client = httpx.AsyncClient(**client_kwargs) # type: ignore + + async def request( + self, + method: str, + path: str, + *, + allow_404: bool = False, + json: dict[str, object] | None = None, + data: dict[str, object] | None = None, + files: dict[str, tuple[str, bytes, str]] | None = None, + params: dict[str, str | bool] | None = None, + ) -> httpx.Response | None: + """ + Perform an HTTP request with consistent error handling. + + Args: + method: HTTP method (GET, POST, DELETE, etc.) + path: URL path relative to base_url + allow_404: If True, return None for 404 responses instead of raising + **kwargs: Arguments passed to httpx request + + Returns: + HTTP response object, or None if allow_404=True and status is 404 + + Raises: + StorageError: If request fails + """ + try: + response = await self.client.request( # type: ignore + method, path, json=json, data=data, files=files, params=params + ) + response.raise_for_status() # type: ignore + return response # type: ignore + except Exception as e: + # Handle 404 as special case if requested + if allow_404 and hasattr(e, 'response') and getattr(e.response, 'status_code', None) == 404: # type: ignore + LOGGER.debug("Resource not found (404): %s %s", method, path) + return None + + # Convert all HTTP-related exceptions to StorageError + error_name = e.__class__.__name__ + if 'HTTP' in error_name or 'Connect' in error_name or 'Request' in error_name: + if hasattr(e, 'response') and hasattr(e.response, 'status_code'): # type: ignore + status_code = getattr(e.response, 'status_code', 'unknown') # type: ignore + raise StorageError(f"HTTP {status_code} error from {self._base_url}: {e}") from e + else: + raise StorageError(f"Request failed to {self._base_url}: {e}") from e + # Re-raise non-HTTP exceptions + raise + + async def close(self) -> None: + """Close the HTTP client and cleanup resources.""" + try: + await self.client.aclose() + except Exception as e: + LOGGER.warning("Error closing HTTP client: %s", e) + + async def __aenter__(self) -> "TypedHttpClient": + """Async context manager entry.""" + return self + + async def __aexit__( + self, + exc_type: type[BaseException] | None, + exc_val: BaseException | None, + exc_tb: TracebackType | None + ) -> None: + """Async context manager exit.""" + await self.close() class BaseStorage(ABC): @@ -164,12 +291,12 @@ class BaseStorage(ABC): """ return [] - async def describe_collections(self) -> list[dict[str, object]]: + async def describe_collections(self) -> list[CollectionSummary]: """ Describe available collections with metadata (if supported by backend). Returns: - List of collection metadata dictionaries, empty list if not supported + List of collection metadata, empty list if not supported """ return [] @@ -206,7 +333,7 @@ class BaseStorage(ABC): offset: int = 0, *, collection_name: str | None = None, - ) -> list[dict[str, object]]: + ) -> list[DocumentInfo]: """ List documents in the storage backend (if supported). @@ -216,7 +343,7 @@ class BaseStorage(ABC): collection_name: Collection to list documents from Returns: - List of document dictionaries with metadata + List of document information with metadata Raises: NotImplementedError: If backend doesn't support document listing diff --git a/ingest_pipeline/storage/openwebui.py b/ingest_pipeline/storage/openwebui.py index 114bb46..8e42d0c 100644 --- a/ingest_pipeline/storage/openwebui.py +++ b/ingest_pipeline/storage/openwebui.py @@ -1,33 +1,49 @@ """Open WebUI storage adapter.""" + import asyncio +import contextlib import logging -from typing import TYPE_CHECKING, Final, TypedDict, cast +from typing import Final, TypedDict, cast -import httpx from typing_extensions import override -if TYPE_CHECKING: - # Type checking imports - these will be ignored at runtime - from httpx import AsyncClient, ConnectError, HTTPStatusError, RequestError -else: - # Runtime imports that work properly - AsyncClient = httpx.AsyncClient - ConnectError = httpx.ConnectError - HTTPStatusError = httpx.HTTPStatusError - RequestError = httpx.RequestError - from ..core.exceptions import StorageError from ..core.models import Document, StorageConfig -from .base import BaseStorage +from .base import BaseStorage, TypedHttpClient +from .types import CollectionSummary, DocumentInfo LOGGER: Final[logging.Logger] = logging.getLogger(__name__) +class OpenWebUIFileResponse(TypedDict, total=False): + """OpenWebUI API file response structure.""" + id: str + filename: str + name: str + content_type: str + size: int + created_at: str + meta: dict[str, str | int] + + +class OpenWebUIKnowledgeBase(TypedDict, total=False): + """OpenWebUI knowledge base response structure.""" + id: str + name: str + description: str + files: list[OpenWebUIFileResponse] + data: dict[str, str] + created_at: str + updated_at: str + + + + class OpenWebUIStorage(BaseStorage): """Storage adapter for Open WebUI knowledge endpoints.""" - client: AsyncClient + http_client: TypedHttpClient _knowledge_cache: dict[str, str] def __init__(self, config: StorageConfig): @@ -39,13 +55,9 @@ class OpenWebUIStorage(BaseStorage): """ super().__init__(config) - headers: dict[str, str] = {} - if config.api_key: - headers["Authorization"] = f"Bearer {config.api_key}" - - self.client = AsyncClient( + self.http_client = TypedHttpClient( base_url=str(config.endpoint), - headers=headers, + api_key=config.api_key, timeout=30.0, ) self._knowledge_cache = {} @@ -59,60 +71,45 @@ class OpenWebUIStorage(BaseStorage): self.config.collection_name, create=True, ) - - except ConnectError as e: - raise StorageError(f"Connection to OpenWebUI failed: {e}") from e - except HTTPStatusError as e: - raise StorageError(f"OpenWebUI returned error {e.response.status_code}: {e}") from e - except RequestError as e: - raise StorageError(f"Request to OpenWebUI failed: {e}") from e except Exception as e: raise StorageError(f"Failed to initialize Open WebUI: {e}") from e async def _create_collection(self, name: str) -> str: """Create knowledge base in Open WebUI.""" - try: - response = await self.client.post( - "/api/v1/knowledge/create", - json={ - "name": name, - "description": "Documents ingested from various sources", - "data": {}, - "access_control": None, - }, - ) - response.raise_for_status() - result = response.json() - knowledge_id = result.get("id") + response = await self.http_client.request( + "POST", + "/api/v1/knowledge/create", + json={ + "name": name, + "description": "Documents ingested from various sources", + "data": {}, + "access_control": None, + }, + ) + if response is None: + raise StorageError("Unexpected None response from knowledge base creation") + result = response.json() + knowledge_id = result.get("id") - if not knowledge_id or not isinstance(knowledge_id, str): - raise StorageError("Knowledge base creation failed: no ID returned") + if not knowledge_id or not isinstance(knowledge_id, str): + raise StorageError("Knowledge base creation failed: no ID returned") - return str(knowledge_id) + return str(knowledge_id) - except ConnectError as e: - raise StorageError(f"Connection to OpenWebUI failed during creation: {e}") from e - except HTTPStatusError as e: - raise StorageError( - f"OpenWebUI returned error {e.response.status_code} during creation: {e}" - ) from e - except RequestError as e: - raise StorageError(f"Request to OpenWebUI failed during creation: {e}") from e - except Exception as e: - raise StorageError(f"Failed to create knowledge base: {e}") from e - - async def _fetch_knowledge_bases(self) -> list[dict[str, object]]: + async def _fetch_knowledge_bases(self) -> list[OpenWebUIKnowledgeBase]: """Return the list of knowledge bases from the API.""" - response = await self.client.get("/api/v1/knowledge/list") - response.raise_for_status() + response = await self.http_client.request("GET", "/api/v1/knowledge/list") + if response is None: + return [] data = response.json() if not isinstance(data, list): return [] - normalized: list[dict[str, object]] = [] + normalized: list[OpenWebUIKnowledgeBase] = [] for item in data: if isinstance(item, dict): - item_dict: dict[str, object] = item - normalized.append({str(k): v for k, v in item_dict.items()}) + # Cast to our expected structure + kb_item = cast(OpenWebUIKnowledgeBase, item) + normalized.append(kb_item) return normalized async def _get_knowledge_id( @@ -171,12 +168,14 @@ class OpenWebUIStorage(BaseStorage): if not filename.endswith(('.txt', '.md', '.pdf', '.doc', '.docx')): filename = f"{filename}.txt" files = {"file": (filename, document.content.encode(), "text/plain")} - response = await self.client.post( + response = await self.http_client.request( + "POST", "/api/v1/files/", files=files, params={"process": True, "process_in_background": False}, ) - response.raise_for_status() + if response is None: + raise StorageError("Unexpected None response from file upload") file_data = response.json() file_id = file_data.get("id") @@ -185,19 +184,14 @@ class OpenWebUIStorage(BaseStorage): raise StorageError("File upload failed: no file ID returned") # Step 2: Add file to knowledge base - response = await self.client.post( - f"/api/v1/knowledge/{knowledge_id}/file/add", json={"file_id": file_id} + response = await self.http_client.request( + "POST", + f"/api/v1/knowledge/{knowledge_id}/file/add", + json={"file_id": file_id} ) - response.raise_for_status() - + return str(file_id) - except ConnectError as e: - raise StorageError(f"Connection to OpenWebUI failed: {e}") from e - except HTTPStatusError as e: - raise StorageError(f"OpenWebUI returned error {e.response.status_code}: {e}") from e - except RequestError as e: - raise StorageError(f"Request to OpenWebUI failed: {e}") from e except Exception as e: raise StorageError(f"Failed to store document: {e}") from e @@ -229,12 +223,14 @@ class OpenWebUIStorage(BaseStorage): if not filename.endswith(('.txt', '.md', '.pdf', '.doc', '.docx')): filename = f"{filename}.txt" files = {"file": (filename, doc.content.encode(), "text/plain")} - upload_response = await self.client.post( + upload_response = await self.http_client.request( + "POST", "/api/v1/files/", files=files, params={"process": True, "process_in_background": False}, ) - upload_response.raise_for_status() + if upload_response is None: + raise StorageError(f"Unexpected None response from file upload for document {doc.id}") file_data = upload_response.json() file_id = file_data.get("id") @@ -244,10 +240,11 @@ class OpenWebUIStorage(BaseStorage): f"File upload failed for document {doc.id}: no file ID returned" ) - attach_response = await self.client.post( - f"/api/v1/knowledge/{knowledge_id}/file/add", json={"file_id": file_id} + await self.http_client.request( + "POST", + f"/api/v1/knowledge/{knowledge_id}/file/add", + json={"file_id": file_id} ) - attach_response.raise_for_status() return str(file_id) @@ -273,14 +270,6 @@ class OpenWebUIStorage(BaseStorage): return file_ids - except ConnectError as e: - raise StorageError(f"Connection to OpenWebUI failed during batch: {e}") from e - except HTTPStatusError as e: - raise StorageError( - f"OpenWebUI returned error {e.response.status_code} during batch: {e}" - ) from e - except RequestError as e: - raise StorageError(f"Request to OpenWebUI failed during batch: {e}") from e except Exception as e: raise StorageError(f"Failed to store batch: {e}") from e @@ -298,6 +287,7 @@ class OpenWebUIStorage(BaseStorage): Returns: Always None - retrieval not supported """ + _ = document_id, collection_name # Mark as used # OpenWebUI uses file-based storage without direct document retrieval # This will cause the base check_exists method to return False, # which means documents will always be re-scraped for OpenWebUI @@ -323,35 +313,20 @@ class OpenWebUIStorage(BaseStorage): return False # Remove file from knowledge base - response = await self.client.post( - f"/api/v1/knowledge/{knowledge_id}/file/remove", json={"file_id": document_id} + await self.http_client.request( + "POST", + f"/api/v1/knowledge/{knowledge_id}/file/remove", + json={"file_id": document_id} ) - response.raise_for_status() - delete_response = await self.client.delete(f"/api/v1/files/{document_id}") - if delete_response.status_code == 404: - return True - delete_response.raise_for_status() + await self.http_client.request( + "DELETE", + f"/api/v1/files/{document_id}", + allow_404=True + ) return True - - except ConnectError as exc: - LOGGER.error( - "Failed to reach OpenWebUI when deleting file %s", document_id, exc_info=exc - ) - return False - except HTTPStatusError as exc: - LOGGER.error( - "OpenWebUI returned status error %s when deleting file %s", - exc.response.status_code if exc.response else "unknown", - document_id, - exc_info=exc, - ) - return False - except RequestError as exc: - LOGGER.error("Request error deleting file %s from OpenWebUI", document_id, exc_info=exc) - return False except Exception as exc: - LOGGER.error("Unexpected error deleting file %s", document_id, exc_info=exc) + LOGGER.error("Error deleting file %s from OpenWebUI", document_id, exc_info=exc) return False async def list_collections(self) -> list[str]: @@ -370,12 +345,6 @@ class OpenWebUIStorage(BaseStorage): for kb in knowledge_bases ] - except ConnectError as e: - raise StorageError(f"Connection to OpenWebUI failed: {e}") from e - except HTTPStatusError as e: - raise StorageError(f"OpenWebUI returned error {e.response.status_code}: {e}") from e - except RequestError as e: - raise StorageError(f"Request to OpenWebUI failed: {e}") from e except Exception as e: raise StorageError(f"Failed to list knowledge bases: {e}") from e @@ -396,8 +365,11 @@ class OpenWebUIStorage(BaseStorage): return True # Delete the knowledge base using the OpenWebUI API - response = await self.client.delete(f"/api/v1/knowledge/{knowledge_id}/delete") - response.raise_for_status() + await self.http_client.request( + "DELETE", + f"/api/v1/knowledge/{knowledge_id}/delete", + allow_404=True + ) # Remove from cache if it exists if collection_name in self._knowledge_cache: @@ -406,45 +378,25 @@ class OpenWebUIStorage(BaseStorage): LOGGER.info("Successfully deleted knowledge base: %s", collection_name) return True - except HTTPStatusError as e: - # Handle 404 as success (already deleted) - if e.response.status_code == 404: - LOGGER.info("Knowledge base %s was already deleted or not found", collection_name) - return True - LOGGER.error( - "OpenWebUI returned error %s when deleting knowledge base %s", - e.response.status_code, - collection_name, - exc_info=e, - ) - return False - except ConnectError as e: - LOGGER.error( - "Failed to reach OpenWebUI when deleting knowledge base %s", - collection_name, - exc_info=e, - ) - return False - except RequestError as e: - LOGGER.error( - "Request error deleting knowledge base %s from OpenWebUI", - collection_name, - exc_info=e, - ) - return False except Exception as e: - LOGGER.error("Unexpected error deleting knowledge base %s", collection_name, exc_info=e) + if hasattr(e, 'response'): + response_attr = getattr(e, 'response', None) + if response_attr is not None and hasattr(response_attr, 'status_code'): + with contextlib.suppress(Exception): + status_code = response_attr.status_code # type: ignore[attr-defined] + if status_code == 404: + LOGGER.info("Knowledge base %s was already deleted or not found", collection_name) + return True + LOGGER.error( + "Error deleting knowledge base %s from OpenWebUI", + collection_name, + exc_info=e, + ) return False - class CollectionSummary(TypedDict): - """Structure describing a knowledge base summary.""" - - name: str - count: int - size_mb: float - async def _get_knowledge_base_count(self, kb: dict[str, object]) -> int: + async def _get_knowledge_base_count(self, kb: OpenWebUIKnowledgeBase) -> int: """Get the file count for a knowledge base.""" kb_id = kb.get("id") name = kb.get("name", "Unknown") @@ -454,17 +406,22 @@ class OpenWebUIStorage(BaseStorage): return await self._count_files_from_detailed_info(str(kb_id), str(name), kb) - def _count_files_from_basic_info(self, kb: dict[str, object]) -> int: + def _count_files_from_basic_info(self, kb: OpenWebUIKnowledgeBase) -> int: """Count files from basic knowledge base info.""" files = kb.get("files", []) return len(files) if isinstance(files, list) and files is not None else 0 - async def _count_files_from_detailed_info(self, kb_id: str, name: str, kb: dict[str, object]) -> int: + async def _count_files_from_detailed_info(self, kb_id: str, name: str, kb: OpenWebUIKnowledgeBase) -> int: """Count files by fetching detailed knowledge base info.""" try: LOGGER.debug(f"Fetching detailed info for KB '{name}' from /api/v1/knowledge/{kb_id}") - detail_response = await self.client.get(f"/api/v1/knowledge/{kb_id}") - detail_response.raise_for_status() + detail_response = await self.http_client.request( + "GET", + f"/api/v1/knowledge/{kb_id}" + ) + if detail_response is None: + LOGGER.warning(f"Knowledge base '{name}' (ID: {kb_id}) not found") + return self._count_files_from_basic_info(kb) detailed_kb = detail_response.json() files = detailed_kb.get("files", []) @@ -477,21 +434,18 @@ class OpenWebUIStorage(BaseStorage): LOGGER.warning(f"Failed to get detailed info for KB '{name}' (ID: {kb_id}): {e}") return self._count_files_from_basic_info(kb) - async def describe_collections(self) -> list[dict[str, object]]: + async def describe_collections(self) -> list[CollectionSummary]: """Return metadata about each knowledge base.""" try: knowledge_bases = await self._fetch_knowledge_bases() - collections: list[dict[str, object]] = [] + collections: list[CollectionSummary] = [] for kb in knowledge_bases: - if not isinstance(kb, dict): - continue - count = await self._get_knowledge_base_count(kb) name = kb.get("name", "Unknown") size_mb = count * 0.5 # rough heuristic - summary: dict[str, object] = { + summary: CollectionSummary = { "name": str(name), "count": count, "size_mb": float(size_mb), @@ -535,8 +489,13 @@ class OpenWebUIStorage(BaseStorage): return 0 # Get detailed knowledge base information to get accurate file count - detail_response = await self.client.get(f"/api/v1/knowledge/{kb_id}") - detail_response.raise_for_status() + detail_response = await self.http_client.request( + "GET", + f"/api/v1/knowledge/{kb_id}" + ) + if detail_response is None: + LOGGER.warning(f"Knowledge base '{collection_name}' (ID: {kb_id}) not found") + return self._count_files_from_basic_info(kb) detailed_kb = detail_response.json() files = detailed_kb.get("files", []) @@ -549,7 +508,7 @@ class OpenWebUIStorage(BaseStorage): LOGGER.warning(f"Failed to get count for collection '{collection_name}': {e}") return 0 - async def get_knowledge_by_name(self, name: str) -> dict[str, object] | None: + async def get_knowledge_by_name(self, name: str) -> OpenWebUIKnowledgeBase | None: """ Get knowledge base details by name. @@ -560,13 +519,14 @@ class OpenWebUIStorage(BaseStorage): Knowledge base details or None if not found """ try: - response = await self.client.get("/api/v1/knowledge/list") - response.raise_for_status() + response = await self.http_client.request("GET", "/api/v1/knowledge/list") + if response is None: + return None knowledge_bases = response.json() return next( ( - {str(k): v for k, v in kb.items()} + cast(OpenWebUIKnowledgeBase, kb) for kb in knowledge_bases if isinstance(kb, dict) and kb.get("name") == name ), @@ -587,6 +547,7 @@ class OpenWebUIStorage(BaseStorage): exc_tb: object | None, ) -> None: """Async context manager exit.""" + _ = exc_type, exc_val, exc_tb # Mark as used await self.close() async def list_documents( @@ -595,7 +556,7 @@ class OpenWebUIStorage(BaseStorage): offset: int = 0, *, collection_name: str | None = None, - ) -> list[dict[str, object]]: + ) -> list[DocumentInfo]: """ List documents (files) in a knowledge base. @@ -645,11 +606,8 @@ class OpenWebUIStorage(BaseStorage): paginated_files = files[offset : offset + limit] # Convert to document format with safe field access - documents: list[dict[str, object]] = [] + documents: list[DocumentInfo] = [] for i, file_info in enumerate(paginated_files): - if not isinstance(file_info, dict): - continue - # Safely extract fields with fallbacks doc_id = str(file_info.get("id", f"file_{i}")) @@ -663,7 +621,9 @@ class OpenWebUIStorage(BaseStorage): filename = file_info["name"] # Check meta.name (from FileModelResponse schema) elif isinstance(file_info.get("meta"), dict): - filename = file_info["meta"].get("name") + meta = file_info.get("meta") + if isinstance(meta, dict): + filename = meta.get("name") # Final fallback if not filename: @@ -673,28 +633,28 @@ class OpenWebUIStorage(BaseStorage): # Extract size from meta if available size = 0 - if isinstance(file_info.get("meta"), dict): - size = file_info["meta"].get("size", 0) + meta = file_info.get("meta") + if isinstance(meta, dict): + size = meta.get("size", 0) else: size = file_info.get("size", 0) # Estimate word count from file size (very rough approximation) word_count = max(1, int(size / 6)) if isinstance(size, (int, float)) else 0 - documents.append( - { - "id": doc_id, - "title": filename, - "source_url": "", # OpenWebUI files don't typically have source URLs - "description": f"File: {filename}", - "content_type": str(file_info.get("content_type", "text/plain")), - "content_preview": f"File uploaded to OpenWebUI: {filename}", - "word_count": word_count, - "timestamp": str( - file_info.get("created_at") or file_info.get("timestamp", "") - ), - } - ) + doc_info: DocumentInfo = { + "id": doc_id, + "title": filename, + "source_url": "", # OpenWebUI files don't typically have source URLs + "description": f"File: {filename}", + "content_type": str(file_info.get("content_type", "text/plain")), + "content_preview": f"File uploaded to OpenWebUI: {filename}", + "word_count": word_count, + "timestamp": str( + file_info.get("created_at") or file_info.get("timestamp", "") + ), + } + documents.append(doc_info) return documents @@ -721,10 +681,5 @@ class OpenWebUIStorage(BaseStorage): async def close(self) -> None: """Close client connection.""" - if hasattr(self, "client") and self.client: - try: - await self.client.aclose() - except Exception as e: - import logging - - logging.warning(f"Error closing OpenWebUI client: {e}") + if hasattr(self, "http_client"): + await self.http_client.close() diff --git a/ingest_pipeline/storage/r2r/__pycache__/storage.cpython-312.pyc b/ingest_pipeline/storage/r2r/__pycache__/storage.cpython-312.pyc index 1efdbbbacf1325468cebf2e7eff5c404fd7af3b8..bfe63edb68cbb9803d944dec48e8f794be94e034 100644 GIT binary patch literal 50082 zcmdqK34B|}nJ4%jZV&(g0^ogtH^EyHb&18G?Z=y+^T zH=VdA^d{YsI?kG!xZ6}mX*20`hx41B9#N9pN^j5p9t1i-qdBw6w0Czhd(p>CY&ko- z|F0h20Scm=p5EQxZ=bDVq25>Z>ec(|yXvd2{yT>wi^KJYNB_;)b8m9o-_sBIGD;c# zlX{-xE^`5{j|=bt-7r6*>(lX+*AMIa^z5&p&w#&%VdIFY&opB0GmlvMEF;!FD}yr* zXN}nUY$I8HS?srI*goRubBtv7Wsf-foUF_|>>A1G%NcR^x!G^auxG^E=N-xI%Voc< z!+9h5efc8=eFf}y)^OoSQC|`Awt#)Oc%-DSgykK>r6Xm1Wh3Q%a$#GeO_b$c5qH zNN{v~V0>t7)He|t8a?A1KO6LoojyG@I5aTqBj}?&2U>nfjrjSbZD4eCOac$}`VGnK zuF$2?!9BszpfG?&la_ts$ekJvCe4QiE?hu+Nz2jTZ%qV82ZMfHQvcMk?xZC!FdiHq z8VUOKNyE|L@ad%a*rf}>rw4?jVQ?Tc4miV8PwneT8Yd=(0`2%|V^8TG9zs`>&b`Nu zJ#iF$mgfRj4v(Zg0h#kLoy)YQOK(B>fTgILb3c^qz_)8iL6xfeSBOo$I*AX@ry8kdf#pH|~(3BbX%eeWC3i{)}_ka*OI;xn1?SL)=l$uj@@(`uhVz zgX8`EeoHcI2nZJ%9{_F$^kPW^-4dvWNquNsNSek@oeK_*C(UOELIdODg5blg>wMDq z{J`);Fhs-VdvLJz?AS=K_4$F&U=Y|j6l}d9j8U~vE0DZ-WMCAiF0=v_@fG_o3|$Bg z4~+&}Q*)qISS!f+mJ64Xw*G-o|A>UTf*%i{febx`7C3E)F#Pqq0K<~bI{l%DfTg2o>3h?xe!qq@*%$IQ=8)e`4m~X&0 zJ^@Mrvfz`xNT}sH=QjuyXjCBdOBxw~K#vk?kx1W0sHb~&8q!dT2na1GqIx09yu^LT z#D3^rK5 zk4*Yt$%8Sei|B-$i0&NVR$iC@seu~h1KgxOqJODSJD{2g;!vj^O`HHH(^XbW7s1et zTjlm6dbwQ*>2*T^zmZ{6Qhz2mF4O=pX-1AAbrpVrl$t^kd9j=x7#=<~FnB&`PT?UY zN&vKJ==4xf2oZkxa7klp>kqy#E(~Bo4F!aaxJ4r#I*bJ9nq7FMHkMuS(w@JqZJy4* zT>M7ym1B!Jm2>Sc?N69=m@a1L#|`vFA{lKK3ecQczA80Rc!uOL`G@DKMMVF+QgdH5m%TPa$ve z=&r}3$CdAT&aT4BIdxJD>H?4w@ZD1Q!&86Mx*3SrxJzzDe%?{8idzHRz6c59Sv;HP zUax#X|15Wrzs~pitWkqM_thM%-AY z3J*dyJmfEOyV~^&?Z&RH=FAh&_hpda5p?GdIgD$1>Qw- z*^<*ez5bhF(Nd_Qf(A2k7tQ6FK$dlSYjL8!adzUm{X6TvGjZjqZ%1ODYSG^F7gh7Y zf7yV{vWc^MUh4hqQzPdp`WWa@obVKVqjwo-RQw=RfPO3#<#%o87S?7_y4|+Zp}%G4 zkq+XXaw)?5eoVmM0r3cOeITiQx&RN6ea#ei8lwEGc8Kk31A2(I#=yFO0e?+FbD%w7 zeAV!SR8xJH!1{m*cdWQ$4p`pg0~-R?S53U&+ft^eh$P08s5is=SXL$P_CYtb9dTIm&!1%6urRSW#Ai zvdX}LKqdZG1#oxBnTRoHMtpd*m+B9#3`SJqaw-%yCUos1tPf7_9h-jn{YfLtYW6vY= zdpvk{Y8r9eYap_}i)k5ABqDIu)HJ2ia|+2-swm>-IYsVK@2QeigzKm0vTK}J3A-ZP zYAvFU^1J|g)~~2Nmvnx8xNhI*&^WZyp>U8Xs3}q2cX4R^ENQF0L8d9TJm6bPGSzWM7NpHFMNb za3yci;#(^9&E&mPeyu!Kx+Y$_DO$Q|-V-a`I%Rm>u~geWckHJpe{?cd+l}A0#G3U} zh7T;2l02MICVPMb-%S~hr2D?bB@u7F-)iAUA1p6Lsz%BIW`Ne08$%+TsN zpVg~mpq$3SB5PqhKWVq&~NvbNAcNhgw|Rk1q=q%=w*VI^{Yz3?Ofm{ScTZK1$< zxwRDXq=fnc=|SV;7l2%3^N_*9Ra%T0){E{9Q|3=C;M~*g zmpk9+oXK9awJzlp$8)NqIn{4J8_Q|^h|@b76J?ceoxgfMUbZ1xwjoxwY1+2rDZO(3 zM*E_tE#WP`^0lb9VS3N6%m7`saIXBR?5T@W*}t-?za?4mCv9DA`h_-QSBE*XW=kT@ zWkMP)>Y|J;1wnXKIFIlF{)SFQ0MI#ocw4s+gcZp*;DfS{**$!TJ~n3Lgb)l!A6pu_ z%-{Nq^ssF^AZZ;(K4~U<)Yt@UPr|eKEs*GsJbf_+kS2k7`Ulz5`_HPQA1>2=nuqWA zqoq(O5_}bRUfkUpb+^XcYo^Rg&Z3!(b3KdB&7x)V!!w#_@GE30s>TdEfVp-V%+9LO zq%zGsbr3Pgiq?ddQWtbq?wn!>l1nv%Z?JL^!#SsVN6tm`-{k^^@9E^({|Xn-eGQcF z_YIfzzil*flg5bgT#gK_{*`-#*=soGQOo6=d|zo#FSW;LORfy1{zd=u)L(M$97&)n zu7`S>YI%~sq=U$Akij9cOSW6xu%2uN8M&IG*OUJD`n$4Vzma?WMf`iF>dU+xo+yph zMwl;nML9tI1L#kYra#_E(>S3Fy8_0D>ARdd@)6U^x`1xd^rGn^FZbhz`is2Z6yCmq zKVoK#N-WGbFdFa$M?;t;z7&JQypb84FZcqeWvE5K2jzOh&Ans3;PXU0hsVY)U?NSR za$rIrBQcQON8FTgVMZe#I9s1z*Qz!a3K#7j7#hZGgaJtbWemH+=XWN}17tZK8cVu} zjtLX6+>M^;rza)tM#bU|FS99x|d%zm>o~@OWxwR zw<+pvig{b6`DHg(P;z4-aMUI$>fR~3Ry6yU-l9fuxmrU}aQbs5n;i^(bxT@V};uDn1fxJ-scnVe1PK}hxI z6hT_8AJKu3^6GR}2%k(y4Iq??*sIo)TV{kbgWxUSpERn%_PAmgQw1&6xHV34FT12w z1GaKt%+%+A{)}QilezJvDPrp3PJSJNnmJ<5Sj!-j;0S*n=E)HMdch+gjWXnbxBnLZ{ssQo z!FGM}_ktLZKk3lCgPmcXBjUifQ!&}Wn6KkShsu1x{2z}s#Qd26lFs}Y=lI8y)lQ z>j@WjO^lBcaXu$N@Pkn7Ke)Qur-OXi6px!;nY>(w`*jXf@@CPcuF)kQ^^O zK}irv(r{r6(8SCA*^=ze02CfB3^6EL))hu53k$tGX2~2i4^6*c7`T{pq!mGGki>rZ z4U^{7ILdcWOZqMXF^dpQKPDn5iDVo&jq-4*Mo4&AOnH&&H$voR!oqz|{^hN2Y@MA!Kpoo zvYL-Mi^D&imGG2a-uA|}nc5gu4Ekf9Es6TI?`B`mo*R$VZxOvUiTrvozip|kVfMNC zR&n!wasOAv#(`MbDY4+xQgxkJzd2UjF?}eJUprg)VzyAtbnUNI~d ztW6ZxN4@pSYdKHh<>oh{{oC5?%i zZS!9jw;dL%p7_Y9&vP#uxtg|5I8#mbuQ*eA_SE6!Y|c>L%YO^f!Hgnv`qzcuRL z8uM?T+Ot$v`F8sa{f(z*ZPBXsxiddK|D*Fa{dbSVHarn4dvfYnWd6)(YGd6vUcjweP`KB^*fE%8sn8)qm^6lIkzU7 zJEjebu7>*_AC%-3FLj9L3U>3KFCXMNN9AYB&FJlC%T36C{;8RB6_O0qFUe5S8{Q3x zM~{g|o)XKSj=7JEC%+=vzrv)ce*OyxW180Q*(-h>`Yw>><H;ZFO`V*GgDNkH#c@_SlM?^hAf z`}I7PH}aHkX8Bf@Ut`^yqkq4{z1Ob4Yd0c)H%GtM%iZ;Q_qOWq*7JKCEqDDU%D3uK zxA!{FCafZAk@^pUD2ZBRF;hgX42FRSe+Cwz8;Z+oyET(le)T}K~;NH%-9pql>! zzS8hrOSLM9(io6{#{|(o0V(($?su|&$M!pRX_Q{eXg-N(HFKV6=Be@dMwV(&3FuzS zf;gp*=w9GMc|3=<=sT>qGTHyMOb}uc-@|=NKd3tcJM&3U`g>){aoawGaT zbgvf}xJd)-5QY$HALY)#a-a$>vM}=v+iSLV?iHASv~L10Dfn{@n>0)U}Q)R*r~u9Cl3UlEps34LeD`T&n(!SY?q3Ys)ohZ78-4xebiv z2F*9MMyy)<0P62DkOdh3$u(ZV0$ch>MvOFLAQ0t{*h}wrk!H}Jru`a0Nj|N7v--dk zRfN&5g;QIS{gur>Tewr2eL)TkolLvv;eyuv)XG+6hd$!6m01Y=RU>U z(Nq=;QKQ2b&S4fkwY0;>=J78{$4WCdlJfLzvYgE!^LvKWtT8Zr;?hUqHGL*iWQ2Q- ze@!R*dEi@1wNl@Dr9T12W<{%v7PZmX;h!}%e%#j{IewQkqbJb9E@)FFOd3R-Bns9q zk6=FyTFN*K&k~Xrgo{X$HjPPAGGtgoLAa5eMI#{^BHv56B$Gu!TGN^q!<$x;HiVP zm3hD7bR~Je(&IJ*{FnegTh^iQvt=s%j1{Ym{5nK@;uUjO9{&x#)AUU}GXE^Ui`V}l zzt!|ddStHKQS@j0cGI7mkoi|UivBf^+yZav@|YLQd{>rz!D6PolbT892*L3QVHA!u zDTl$(v2a08idFdF_cS&T@SSF+EC{RyX|QSPV&RC@e2@j9(--mej*SNWj$|GQfn&qZ z2Q^kF2$V_V1z66;Rk;<`siBbiMd54(7ak%@0@;d_)}!)wOPbCMz$sIZ))K1Nld(8~ z^(#r#b)-JDj2t1xvyyuJPC8k$%2VKd!aiL99SX7M2)|8FWWq3Bhm;D2NW!aB{7;dD z97w*ZNU%G0?2v@mNfd|kG9$`QsNNVD*cTEj%;L(c;$@qoWt-<)V`cl}1^cHQOBP2W zyK3gp%yw~Y_szridIa(KiTLqT(c`D$$A#!|LF^8R_VG{kI&0Q)7T4JR^P*!j>*mJe z8+)T0d+&xm*m(30)`>;OrkqIr)>)SD(b?N8U0=Nj6v7cxBI9y;pk^o%?^Db6~DJ>TZ}_ z_kp|hUe1AKy{@Azu3hpLiY4uHPtWZYi?)8?-L`B+#b4E+W2}qIZE&c7!^Sd1#KHso z7a!ZX;-~m!7w$j!xP)^b>(K9F;D0-JS1$iuzNg9X<{IP{ z9C;vswcEg0OOI;c0|W${t?i7 zC*y&-aO)9?7-UkJNp~l8AN)?X#<#$!S|I&U(rU?A&1Q2-E@gQ(LDNK1;TwPpS7;_< zW*YPhfQ3jkWz+H(wxUE~X}qv4TG%#M9xLpMd%8q>*L|BaZmYRxt4VnBuOajlarwH5vY(%Yf`c8Q;}nSSZh?d<%~>4Nu5X3%J660?`!fU7FQvKI3Xv zYW(pv_)K!FP?WiRH8WetrFwJ1MU0HKI@S2dbC>k0w3@kf8D|29^F8>8wGXwYk#|>b z0navlPmyTp*_X_IOSlStghS*KsK`~oF}8rsR(V43j%W$HkDec5Zulxt*xBI=j$9aL zE2gyGy?bcKPA3=6?NI8EkB#|;2ZS?0Uwzo@8$3H8g#5?gw?@22XhQ8;MI zow5A7Df@kAe%x7p&sje6LcDHcv~FX(Zfmq|>tfyZC3}wOt(}R?jKUV;?pd_&N_YxB zv}I4dfC^D(dCcaU*?iB|1h|B|dgi&AUUBW7ztP{_8$bA5^x$)`y(h&zUlG3&6kVsm zcpTYKvb}}(ogQgjmR>-JV4t>-BGBX zDsMhUmd_|}@~g<3yhd8rh5bh}Ys->=4pB$AgoVH}LxZ4EI!RE$0C3k0lBG?NL7C{V z!c93ZvlyFBveJY~S(y1h@rV!!M2uv8m{*}NcD*$la-}t!(XbuS!W~!wyV5*Ma%Bk<7QW%eNuWvMPmkeNkWw5D}WrJPns?%v5?Ca|$vEq`+ zNdPM`=7d>$znWA^Uw=Wmdy!Yy zwUlQn>q%1Ul>VbU^T*0F$u!L_JhN^x3-3u*sTEh|ZyizAgvhx7EoQxl4QZF)`b0T( zoq-7!rf|=fovx{EUGS569@_}goMT$T#oc7mhIvfo4@@5?Ra>CpA_F?EFU+JxCbAgWFa?v4-_QYZ6_Jvj=CliP;^Cmd^Wm`OtIn3a4xz=9gSK5zF^~ z%;j72pq=RQ9C?Wvf4t_2Xw4G|Z^@OB*%Kg*wQi6`pL^Q=5m=iqQCuD`?uZt5Bnql$ zhUX3epd{~OPM4dvtjFzT8`^?x-sN3xMWxR_&E|3|-mVip4dgITktnT*mu`%fZcKP{ zr!UUfum;eH7Ru-m1@4b1fi9L?v|K28|ShnK+uS4I#H+XqNPbvQ|^19k| z3r>ihrUh34j?7px{{t;;~gj6y_0K1$(;;6#+$uV~r25{AUySfShw>mD9 zx}v&kgZnFncNq4KP#VA;)6Mm8rRFoa1D=K>8N5x%(6izKC&s|h#bAs`jRDh}2KajD z1pgyz>CPx_eaR}+KS0uEyb8RzlZm^;Zh9hvioDoG(mnVnH&=XIn39@ z`v`de&9a2-0 zAehWnP5ES$47(qeNW%H@B7wYEPmmqH6A$>4;nJfpgpvRQIs;PRYxbRn*I~$aK6nWr zVW+&0+qeJd;a&(eKgOW`l8`#=AcsX+&j`WTZe%rw{s?#BKfs(2ssJeU+u45#0}$v} z-iD+f;WCn>F)%T5AtVTtH_&s15ZyaOzp=PTE{U)O<6R5-&%yz7IM~muvdnF=Uy@b= zfW-zvQbJT~zGQ@D{UGUtbzS%=<%W=iXuc~Fhon_sLs59mm!gPTnWG(VtjZjpSU9UU zUE^k^pt`81Zq^d>teLVfF{Eksx!KKbQ{z1w%aZ@zTi8Ef7v z*6y3yCAur4_9|7O7uyfqJ#=@w*mpv7KeuQ|?o%@enkM{VUZ>+acV6LweJ?vL92v-NWaZ`Lo`;f#=f-{Oi}Dx#K( zMN3t}ng6ZH*CxODbP~l4I z+{%8Z(_?|wJTM4mtw|0QZ9%@d>Nu?KlH>{b9$+f@K@ zoFEKD)5DGwoHS;=Sc0sY&(1q; zUW(N|Db^ej-A5Pg$0U;U_V|rGb9?89?z&@52V-@I#M)lbefR_W6Q40>XoGb#;1cfk zx##A3Z&uzpe)k#i_(`#UG`4F@?71Miev8=`Q@xR&`*ZsytP5E;KM295!M)to*|m*Z z*tTwGyZ+WX9%&l=AhD9s58`OpPQzys31BQzC`p>QNv4~W!3mxhD&5eLXr`qK5KVOF zH{#>vXrN^3La>TcB)QEBZVeRnf{^W+?HMs^#XRy9fCand3CR@T3r!4?N#OLvu&N<2 zok8nQ*uqvR!$pMs(!wi@88Z8pd;{{1OFINQd`D!%Gx;Zmt;`AX%-A@xJ+jOauJlQE z1>Y(72!%pJXGW#X3aUUt-Ug(pD18A*fPs4r2Al<3V7LyuIAhyHiX{xch#Yn$q^4N^ zg@H@7hMY8!U&1Z>kOX&X3@jG3Od8CQt3?cjrfG33KO6udqjnU%9*?n`7SaFYN zDOk!W5lfqABe9$d1 zSNnynfMIO$mEBi1iB-F9j@;}QkExb$?D)!?a!7`7dIR}Vw40Yul3LV*89Ts-vpDYj za~KnBv{6|b7!z0$bQ3}iqZTPt5}#=iN403eCTL1`-?(Ik#nH&*T}!yRCx{S4BM9;0 z8#?Vv8H*%3rTN}u+8GK4T7*AGuXl%YQkRcBtYFg4wS*f0f>5K0J zC{tXqHw3%*&d+qcF~%0?3fMQUnHiq>%G|b_jW6iJv^62%y!U@;GbX#{W6L#qd+pHhxh1IZ3E|h1eSrVR?=A9P%r#Oy+9Z3=rVY zgRX=uw3ZRjX^=p#p$tql#f~OI1rtLB^Y_fMd;tDWNX4#9aX`{g!LkAstO_b*g&WdP zf$acgpCn-tDG$WBKJ;eB*WxQhgcU}Taw2tUKk)IK8N zuL(K>Iqm_ z7LX_7z8YB|UQt)EV?evk3c#sv#eHQ53A=9%WMLB^?ZBbwu;P-RK#+c0c=N$Ajq#k! zyA)Y1B}+0}dgwxM5V3m(vCnuscxFr(3Nv}LC0wXF$TPFB%FUSkmejYeT|2QCkN5fh z{cry_z8<)(YvsP7P-r3ueRuF|aPYiZSGx$T$bC|P8Q}yPn`l;A>#iVg~>>@Vd zTPavkkVQALGJhy?y8<9_Q0zQN@n&#I;RcnMaTiO6f)K^8m15#MC`;dV`>?V9do)dk z0LXURi!T3c<80~Nsd>-5QOw@9XxaXuJ9m2U+x1iCgv~X*Cu%Du?&yB*1N+H@BkxMx zTg_LSzuOvh_*rq+2lkx_hkN==)KSU`IzO;)`FnfGeS2=)ULLiVe_*eWIAlt+!hVXUVJq@Mgec{syvE zfmik&0p0?53*c?5z}q$LTLEtcynPjThXy_i@L7O&tOB2{fwuwP2Kek%;GG(HJK*hr zcdi2O(!e_a?*P1O6?i46B*rHj@Y#URSq0v$Y2OKWC*a+yzcL*O>fHw12X|qt% zW-j^wH~2u|D)2=b_&mVp0lsJz_+kxwKH&2KU%U!@i3Yv^@YMg3Rp3iC@P&Xc1bpc# z@MRkKBES~`zHAlvat(Yj;EMrYz6!ig178C862SXbfv?cOmjb>N@D;1TS8CwP0AB|9 zN(Qfo@>&&M)18WvC^Ie9G!9F2a8wuqUk)&aDly*{_};PzfBh4}aMGS))BWQJHj>N& z8xFumjD7FelLmjQRNhKj08BI^q+t9U1XzeulAz>oDM=zpx>S>TDu$qOd}w?)C|sbs zhVg+jp`;}xd|)LzB*g+qT86Q5KLKe4`{gIb;UP{M>d3&*Xwn)Q8V|CK@yQ&r|DgT; zvj{#oAPkJJUVfj#LS9HWS}a9Se= z0;$YOMtkVk)Dt0y4S+)s!7fRix1EA>!rV(MKUki5Us}9qN{s&6#Q}PQ+{yimsPsxXr{Ew9UPn7)6l>7%ueo4uHq~w30fdo*bH;t^qrUYYIm|U#*yQNta!W5CeBzWJEHcoL|)nD;Wvh7HplXsMO*XKMkEPe9qw+9Vqf5@b@5=- ze(sIuX6j?PjiRk_Y6Fr)c@1iBirPyOc&AnF7Wm$>U$xJih!w06ZEL1BBS};?qHZUI zOtf9$f5(2!E;jFo`MN|K{x3nAXy}~UEqZFB*zj3W{no*&2WQJ;C8)c3YAcdN?K(8D z9g8Z7vf8(XuMUfiTViFnvvq1al0^MRs$Gp8qlvnY#YB-A~Mn#A~-kYqu`eZWo{KOTEaK83_zk$+Z%(WoN9STeRW- zQlyE-t@N^WQF}ew!0SGGv>!WItG~_acdHuT*>Y`**tR=XwMVq!|8k^><}T_S)|?v9 zIbZ!-k*g80X69~5V)@B+n&8J=!5$-#4Ws{z)uwW{4X8e1FY(Ufa zCE0+O`yh%}UYV>wnSunUjtVRNS||1DQaxBK?Y9FZp$m)YncW0oTasto|ax_ zR~q^bJ{(iR!!=Kh248>y0zTL&gO+6ae4cEYN@ZqmVD@z^LQ}LGVHydAydg6w3<0#D zDJ}1uqTC=Qw7iq_g7vd#(f#9Nild#pk{u(kGn9}D&QyBIR>m43(^%4+vW{n3&HfAk zX+iYl?qFjk7a|j!Wc_=eLN!B6o1T_tp!U(6*E;clXHPs zQRmcwgtO?%;5}!>QhxE@VH=U%^R2_L9Tv;FZyvb0Rea)zc=T!U$Z@gcnMM1vOP>5M zQY%0OnPHz1k31`u^ex&?5G<|t6#P0whMAZ7ZWI4%eV3cN!FRa~KQtisqjHpyU$L88 za9g|e`h{ZmPPcxc*+}JUi;=(Op!_Yj9d~b)74O`pzqOf_Y%`+dwqB3BX})F_JPT8d z|Kx9gC0YxeI!Q4pi4d?0cDB@nluPJu01M9YB$Op(Sj@Q9CqI^&h`6xp5$%#EQl{rA zkzXEqstHe3rn}raY!&23oFZ%RSIX?e4001O$ix8)SQ*2j0U|XTAW4}=!=Uk2B64Ds z+f;gB)x@hYYB%=EZNsVo!X|CVMn5w6A7%0^x}o?=Ln|?d*U4H5t&%CzJ2l71MFtv( zuSZ@2BL|$}7Ddhxk7n2qg=Z7FVh_5+TZ`XID{j zgc2e%{w&FmXhu+f>IW-C6lJWtVGnH5E}KKyFWTZS^^ue)X%K8F4J|l2F+$Nl*mfH7 z*1;~8EI_G(vLqRQj*m|(2lE?sHbK2V$8D(+P`eZu8-) zU6S6(c15M!CNz7d=%kSrVEIP0Dxnkk9SX$BrC8ij+)e@e8xSvsa0bC)V3H0IkpezQ zi11(VBb>GYFXMD*BUI=BEKngA-}Xk;jo=T?{Gch8yKyRu#oq=`xRMp~REhQ~coMLX zAjM0bf^TnBqHI&o-pXp={@ByS(bi^8pGMt@5~6GTCuA;2{|FiBD%^?WftNo-M6+HC zi*iP4tOjJ2YpEah5p|S3DPOvVvT7sO2$6u}L5rK5;3f6fUeIWuT>_&ouCn;|LE-)gQjs*V~ z?xj>^t%PpOa-=7mBg9MhjTbl+h3mB}Xf>|dL)dp3g9ZnZ zY|iS4Tk4_~cy-Rz&2NDRCu1M=Q44lc%;EPA_PZ)-shZh43ue;Eepf~Bc)c>c-@aj;XRt3Fo)h2^gNRi10qJl8&ClBimk;Ss^(LK7n+1HvU{f&@up%W(~d-l0_^VVqd*7-BB z=H0M`kg;#ilCS#hqqFVT`sQ+DzKv6Rr;o$_c0a%D^4H$@8mx2q&G+&T&XvU1bVb*6 z-So%S929Mb$Xa)3skY(9x;gLlj=77m+U=-#@*b>TaGNS=ik39Zo{W`rijK~y6G&9h zuThkB2^0=9*;^zW){;OI`p$6p8NBqD!Qn~0(4t~VM6X62mhl5o5cG&XWJtwQ!q(NI&y!KrRi zkL8gSCioBunX;%a$#>a=-nA>KY-W})K=}Pr_G^|ZYi7=TzX>qU!d(2=cJA4IKy-Wi=O5cR zUkCHDn04>0Go*=?g#QfZ{|1>aiSyZ7oR=vpzV>PqSc|g;U>rS{UFsb84RF>7j8j|C zVm-szONL7ZP|jw;GnIo&;S<wYyhBm`9Y^y@C>C&RIH z;D?+5alGUTy@AN4+Lx4gKs(1mq;oTr!bi`oiRRYKoJCw{TDQR3LhDjR^~}@XFUCiI zhSn}}D;^J1P#rI*j~3L=7RL(M;tAFY)+1`;to8f7Xy*ii3KC*$c$fkbe`VM}W}+{N z4L;46$V36PheIhG_BW(*gfSVY=5K^C*&5sNLm2Z2c|}X*h)ya(th#|trgD#HL3t8r zHPK161udO4G8w{j$plkR8DTB-QFd-6{PFGl&7qZWS?O2`l`j!0)7mvd<%(uW%MsTg+~zLPhX1jYwd=#i_2Py- zf8$+j?EQ{G%&lE?)rrTR1~NKw#oT&q#=*i!sXxWW`(q_+6^oLk`qnu|tbQBNx1UzB z${Hl?N^IQ`E2E_{TI=dsYG|4LdaMBw0otbJvcj6wuArt^At>lZTJqYsRO6p*h}En| zThB%9mDnPkak|vc7JmQm>eGct5SuRlFB4BiC;JBzPjxaW2ns~j63;q9Lsg%__P=4c zIc8ACe@Td?tXC^cn#d+ou9>OuVFH?7?I8jR?H~gIRr`~|x^EEHMIM87OHHj}TUV@U zC*cKQS)!;mUepjRYM9*`D`E?&SYF<|RNFjzK33a-Z+?zLAp#^|EZ%rD7l6?jk_gHZU0Ru#&oX;u3CI(B{r z$-?YX_i(Ww9~MOE#CBte*W;{=Zh7yOcCGFaby)k@ucedg_zmH9LbH^5>|>rEDMBZ0 z4Gj&P4*J+BdL%Gn!72of-NKwAdYM;*O!RutS;?wHq%>)zvwn85V}w&1l~mHFcvP%( zJjEH%D;!UW(536{zefo7r+CC_WDXWT$NHtV^*&@=B0K0E8SEohgb6#XSCe;K=@ssv^7PtD}`FDpwjDo&E2Sfz1_==jiC zj2;+@4d@oTGCggD(O4I*&1f`M79xDoIzfx@cW4qcixDx#RLMg(sXxAJr{o1hNT#(K zChe&e25C)!=CNW!<_WE;wK|4uLsp*KFV%jVkc$|#6d7xav<(P80(YG89Y>=(j>dO9 z8{P42thg_he`3l`hUGeh+?d&dBR@rZn`~I7vN-%jJhk`iwfOYbx(A^gwEAaRT{y^* zE9@%4awCsa*>{E>ajkBEH_@M6XzN5= zKQ#Ql?lPv^>mMOpm5#+f?v|n;WB1VSgG1%XPsJ)z?W7}MLAVb4WOgJwQ&@(8HDZH* zNLKn05iE$4ZW2dCAi8o5LUgS-EMmM?e#UscoMW7`b<(YQUZZ?Z{Tr}Ha_~F{dun#X z4et@f8pF7*W_;jFqMLNT=zjfc(qGk=WW`&0ap(nY3}IKm$=*dFsF4iEU9jzTu5j_u zAbEu%pXPCQOz7lh**Y8)IBq@H;D3VU)Uv|PbD?g;X`~vS|bX3_l>fpzm({} zsIN~Taa`Ie?M%If66ZHxio!|0FiC6bNdv<;q?`4TonkI6L;I->M*0a=ncs{+jw_PO z%qhwZQbJ3`QUu>_$}u^l8k9yTri2M2#z64Y#2M)rDvzXyRnko#66~Cj$4&C6siHd< z(UdZ7@`h99WSY#vPek)&OWirLnn-Jx?A--6qO@izdF4<)lP(1;lAquFSgf`;?(3a);G~GT!_zpBsx9hTbHBQt&Vz_oZ;n=Pj#ux9R_{pU730tb zIs~GN|Af=iDG+)(1%g)B>fXLM`*pU+UDq2cI{YzL=O{i(L`Yp1lWd_E0(viuVzz2xh-FO>g8Mr+Qs9;CE9dll5odpOGi`5Kni^8kIX!MeN9_};qW-CpBgWt(vK zue?T-{8gcz@+HnaMaI8stl5)meBWh6$@{rRl)PU=ci%6y?&;9KU+dhnUjP1jBjr2v zc#0wn1N}J?HeaDd_-`Pi$VeHF5X&g*C_*i&r!Lhw>tPQ{;$g2FwVD<978VSlLm{5O zCd#KF?CuGY1HDi69K?p^-LROeUVkPy1<@XA51uS9K{9f21_i=dBrBF6TUX9_sezAZ z#_t5UG~-PMYax!JyL|qQ^YOgKXkKGHZ*4S>FgkC0+_inG3vpGTCg<$nKmU-t@OW$C z-+Bs2EG_(Xs1?n;%y;SGAJ-1|xV7+(Yv+DM1wZE7xu00!1@W(R_{IDo7C1e!gY+Tj zqQ7LvU?1czIwgG&2t9On#f5!%tM0J-|MEn)2(NgDkr2M(I%i)|fS-s8=9Eh_odKpE zOreTA|DaRgaP}L-m?-4PP`z<>KE;A&F;`mB*;TR24sMw_*jX{e< zSVk?D@2G!eCm==mO~vJ+*rW!8*!fLsT{2-}{X6`t#6KM2FkT`*3lpq_CfJu~5ySxE zCXPpy&iuM$lBXm)G3;?~LA9Bhl;OlMvvgvZ%(h|rzy>0URG@~iNSzoa$x@@}eKK1H zm-ZMjH(|f?(dTd-K_yA#U~9wz{j8*o`nd>RT+ax7fB;~$F#$DSjhumOlUn&EJbWMZ$aiGv)sS& zky-bBt@nJrbL~Iv{849oU2k+*#c-#? z1eo_Y9(UivVFU*5J%e{wx&A%WEw;Q@Xrg?%9xw_eM<)>iI7P`xOn3}y)l-G^OG6>j zj_$#3em=!SiFvaNbG#DAk7N75YDWS~r#i~ijj4S&a{Wb~x$onzs&X-v$q>%zro(C3 z&cIZNJ?SW_a1k>^`snOh=sy|&1lKdNNd)w#x@?+B=hF_Nr?P>G_!%n=(Tii|NCNo= z%!>30Lv7>%CbS=LsR?7WZcDiHE;qi>7Dft6XYf z>Y|@#%2S9Kjx+|ztp5H1Km>*U{prU!1L!DZds zfsEX}Oa`xqa82Px60Wfq!VwXVc%c`D#wFu6Rx@zKIq)JlFgkGo?rOY1-!vtgB`p*r zfo&sY;S!*yB~7g8RJxp&w9?z6Rg8gGqn|0I)iea;RxP1KHs|zw>)dPSzBwGX6y38F zr6R|8%lO~tOAMDyW$D`r6cLL~ZFtPYOtE)7O1Ux#4RGqqbmE(AHPowXy{c6Yc1Zoh zG3Ve-L*JHNnM0i-B#{y+OP5t7Hf*3u!l=4+sR%xaO^tAlN{$|;sO`&t5)T=UnW zA&qgGBznobkcwGF0)ey>m2u}w^z#OimCI~698)^_QhidmM1w*P-EAdQ`dzdnp_0L9 zeVSLc=(I;m+odf!H@y^qrnECw+8@vFpW3^Whw#z}w773xh*j>4<#pr0Mo-@KMds<% zKI@I;x5PZHqMg+Jg0J$QLkpHv%BQ7c6SvL>=AXW~_U7PCoSAqa<~~S=C)%H4s^!yA zEjOV@FY`OC{HqOJwH!kGR~z1KM{c2t3Kyz*%GX+Vny}j8-pT84@kZotne@1;D0pZ? zRq$@&hX&b+QQSz^$TSrc4x~1f(bTON=pLaGt_4Rk1AWP#iCvFW3%!b3sD5Fl z7#dYOn6MphCy=t2rW!J>kPJ~7RKsMBqzY<8y)?Xe=(RS9b#L*m%l2Sn9NP73&uYbW~o%Q@&V_d>Z3YNZ@{aRfRLKkxx%1 z`NwFPSlNQ3NJcXNOuwt08;-b1nQ%qsMND(DruT+^$@eD>=|?eUI>jNACaKXH4Vn7A z%Gu$dc8@LI+WQrBFvUHHs6Wuy$>|7XEik0X>_@A6k5ux$g=X0?%aZg-OC++Umr2QUsoi7o z7bJ5&0_2gL#!i}MhbLrI@?6wq4b2FJi#5z<<;^al8RSSO(F|<)e}w9nZLLiFW~U|J zZ)g(jC0|b0EA-KU1RJM~iDKVdo3C!3c{*0y1oQE9J2q$~yw!1UW7OMtqkGQqgMA-( zH!K?g3?r$x`eBu>SW@YV(oP26FSED-m`lsMV(y)HdC|U)@wWXL&jTi&{&Hzg6_mQ} z8Yp$$6*v!gEwZ;7OOf8TcyRA_G2dNfx?N(Vd<9SCRXpWutUWsY?G|VER{ia*M&$44 z^gTxIj?vrW)88qCnrFFFWTL!J5BM}fNSgaAB*D+(vuM~OidSA48DIv&r?7ikFA5F zYh*K*T;~z3D;|IW%bQAHLDqsGYto5xuY$?D<^C`dWz-yDZ%vQs%A`|xgg+#mW+9l} zs$@n|N2H*=aPiVO$71982%ayk*0BwvY}=J0UqVG?rAmRY3z&hUSPoMyCRi}TQVacL zQmdbGjOJ8Pj;*jgB)>ALmDQ#bQ<7FjTxiKFvlLsnl60g35C;Qkeix5~dX^~7zd?6W z(n+?pccoOjIa<0oR@xcQ@5B-8>();lSSpmF2wQKijn(ds74D&v)!oy^rINZ?W2~fQ z+L~|`$6b|CSLKIg6*K;Kny)p_8RkyKDmTZ&A3Tfb+K5ArlOYFuO7KY8YTLq=Zm;u6GXfvMpb^r{yl|zuIt~LXB10-I$<@;} zN0S60ZIV#uUM0p#QXthCmvWC|3qdsPV2YyFrHMxLnptWkBdv$5yYdtJrDvo?YRqu| zJk6~cw3C`!ChMR&w_2m6t?^RQ8|O#vo)EV_E0(s$O8esZeN%fs+_3Y`#=D!ur@kV7 z^;E2TFt#D^pY=HCEz!L8JBII^n8}Uh)_vfr|Cq{_!J)IU$W(BGfABFo4NlP@2c=1} zBBn$6GcotGV*e@8eu_<(K};7(f!OKgUv2DK!@bLQH5lefkXx`9iID z#!>=Yeb|;F&kH5?in8#;tRs4r^N~#jb3i(2KwSXRGZ9(;qJG63FooC2Zg(K$L*qDv zi|xf=F2(TYr0tcd`23pXOIX8*wMf$->{n+%%6pD1gHl^8vcHU$sfl3GEPTRz#v+tS z%NvXVF;=8bg4BjgWfDl(PwF~hzl8iQ*=tXJZkq7A^5z7gJ=t9pw9i-UBvL37Iv#8O zyja>1E8QQ@-wz{+C;v^`+k0mB&JEpk$Etf`1-oLN-J*RrA@+gP>Z#tEk45%`8&Oc^ zx8Bv=U3a%ebUgvr22b{VPaX-Mh&_Un5c6fDNY@7bRezU<`yt`4-VguLsQ+V(Cm6StS0?w)OyNPuI~jaMq4p1( zP}F&#at9T+DJ<)w=LYPJikzsaqkjZf!oIgN+@S6xaV5>Uu{))GmJNfa?-#%78|WDTz) zu0>(_gy7lWXlMvQiIf>e5ripvp0uF~2Tee@92yKkR%8?C5U@m*8X+ORh%UgGNo+-0 z0Td1cBwVddX<}+htHdCcXGsQ8yE>aCCoF0};%+KWV{@7@yQ_GhBRug!np9g)h*YB4`Hr zL`4mS0*O~_h*oTfS8R_~Y`3 zvFd?%*}&A{rSht`5B$VDZ@)PtK6y0Ob}Ux?RIL1IsJLK*Weqp#=2~xVzI!Ux@MNs? z$bUBKOSA8L3qOTVMy|?7K{QIUrw%Q1_=7W^OPiOL891k2QeDI=PZV zu)viKJowlE$j{&uB?+TJiO=r3c^(}+a!h>c8S&V&VtHT8eL@@vAPhOW1LxbbNy0dxnVRhN49(y^uYA$w!$a ziWQgqG?)UdnGSZ9@558soJkv&?<>zsNfU6gcgl~7WM%4mz;x!}pi#EbA&iPkuBrY# z=T`G_E|c*OCk>E(kQ(omy_&SE)GcCXJ;3Cm3m&gZe>H9~Y1nI-&%Eo^)#bp`97qrc;+v&zJFR()HL{2wneBtsG4c zGC#|fA()pNVViR_&vR$&rTyVvKDG}0saN*#v30To&Xsi{ncqbThAmWRT73lU!#%Agr@xyh6r?Nahjn1iUMI_Som$-e^#gbiPF zMH7#BnN;9?(Yr#9FBn)0!k8s7LZ6S>lw?L7BKIU1b5kqZ180UtrPC0y6!B%BpX4U?k|Pm*dI!)&3q~&t1`F_R^Xj`T|2N^f7$kq1CW+A>?ReN)ND= zJZX};%zVDXE;^x5?Ng{VoRxN9q3OZ^Vi>0!<(YkrS>2ZK#gdk%NbMO82BgirKS!Cg z(i^9o4IhZbbo=F9{0!@Hu8kBDiD;wnYb4kT%vz+zqzFkDC`Yv+7o{SjLFyv`PpV20 zl)^=-utsJy^$TIu!7HsLveg1&?vhb2K)K(dBuEJp%$QYhntlqDFo})kp++;KX(BnE zF(cvA^pi<(xsvC+kKT~ZPm}!6SYvFJQeCWgj_#5zLvl-FCp(Ae;Hc5TVA4)8$C-@F~$qmL<`p- zs_(RES!c;7M&VKIy!SI07|${U4S$&i1^!_xj$s zXK(6aP2I8bo;U2MT-)%@iEAg~H678Kj(E-XXwCMU&x=p>i8b3}H7DZbC#LO7-TM;= zednJ&0)@4QcTZ=3YT5?Yg=x09cIM(cU%U3TxeoXt)a{HFcTZc;@nW2KT}i<;&b~8zZFp`AwC~!USmiDj zljEJpwaDBd7l_~bS`xKgWp zs(W7l14#0|=lBPo?&dic3p%$~64N{9yXM!4<=bQK9XAI=`yM8o?}c#wyFllc`TeE* zs|EYEa&PkcJcjT2ko(JueI?vNIlnK@u;8mk{+7Xm;#>LrzCy#Tg03w5xLwKb>oDA| z+SQ65KXX$nKlAXE&*M?!XN4?Z;@!7V|FbrJ-#W|B)|e>2(LgX;t^14ge{FK^&(;5R zu95OZdb9vit)#hxB?RUQn%QcTLe-mgdw7&e?U1_FTI1(cd!e8P4^kG|6|H^ zQ!=aM)Y{jntcMbHAXBx;Fe4qw^&8bTNmdK?cBJl0yCBN$Q3Jm>#IqH4(7}8u(i$$AZLr8pB$fAVZC)Wjhn*Y16 zYw2z3h{7|r&$X`|JGNsI=YdI0lt*zBrHLpFY)lY}tBOkOhFqi4E~=jIj70IP<9n1b9VOmX42d$3OO>ikM| z{nV{BKE8sVDz`eiIgSX0S9GnGIRRDL-59N~y(aqvc9o000k&HYgtcyV(Mu}1t=+(6 zaHM0irhj+I0I}~@!EmwLizJ8ZNVP9idR*k`N2o&-B`3s>ba#pPLJLs6X8gRA+Uks_ zdB>w`BNJPRK$dL;0oKm)M!`4vl#O7kEs%W<;0yDocz|Ddg20?bwpRe+*WvM^xenL> zd=9t+_!96H;2Xd_z)yg00lxsg1AGto0q`T>XTS@MZ~<@yz%wYjbwRMkhm=&;);BkU zfPZ&5&vlSh#&hDtnI+T-fdAFIF{NI7p&rKOp2vuehM5#~g*{Q4;>p&_>_oa~NrzFU z*3q}eXjR))%^|f9&NozqF1PfcyiCN))XywGi@r%cweBG+a(Yo@-Ouk3hd1*ByR4IR zFf3-M5cYeNvDw2^;IkWS6moph$%SQ+b-yDd4!37?f-({d60um;{Wn5lX$yzwkSHOc zghUoBB@#A$kA~pr#TjXuKVebSlWRcb=LZLLkh_0IVaCu5Lx%zLgH{c z$d#|R7cH@5|0{pM#=#P*v)lS$N!8V3Q($eXv)87S1UP~OAh9e zgSq5jE;+N1OD~807c^I7GXmS6JoPMEpaos)<%<(CIpav(FYXXRn)U~6R0@;~D<1XQ zY?6|4v5-^qHp;T@cZbB%7Tee?`LZRJxX8mc4wg_|!`25&;(}6OeQA16OI(m9-k&e2?-@jQL?n1?KCM$NGMr~lBF&CTzWGp6PK1N zhPH*ucBbj5=pmtIDO#3c$)h2gMYrdMD8iQhyrz?y+hFL(v$XJfJ1=N(p}fMk!^Ry? zr~)^9D|XL-b+OTA!#nK~-fRFDX9>+#TmG=2JnC+0m`Uli z(+mU4N6=&9)LYM2EU|>u2W=cIp>kY#FlTdSjm|h-(qTT%rQ{{M1C(!O2R53eWgQMA zXibNO1dj)WEUjxh0CVt>G5msLmyMs*JTh>eY>t$0F^3EFRIacagH7OKu=YTOQBx{Qiu=yl>h7X#~e)KYDme29JHW2#2ZOaxOAl!mTKO<2QNe zJMb~*vcWF&_+g97Hkj8?8?!#jFIYJDp(;-uum+z)Wkda{`<@`a7MvsbO0pGOWv9dR zAqRpJQZ;xiY)XI;z$oAyz$`%CsYAockUO{#|7r zsob_&e4rM$Re4(t{;CGIRo|bU(qr4I@>mT%QAf7b#Rux*V^w;r`hSb1?l0UsdzVkO jC!GniF+Ce*h#~9ggN=Nf?r4-QW%QE%y7##fA|2~rCGhde delta 15907 zcmbVz3s_s%k?_5`dP{&n5_*Cj2!wbV{J;i-gMk=7;s@A?jqPB}m2EH&=Ssvj7ST3w zTQ~K#c<;7xnxw(o>>B4$lhSTT+a%byiIX;|gz%46C0nOW`?r7pzrW(Rjhl2IGv`Vm zNZP;OfBkth_netIbIzG_?wmQpKYfw;>dT_^AE%`$DR>_L-#&xfTjkN2W`&5vo zSejMz=zFu8d6M0wlXnW0VX28{KT92jI+GZtqdQVPoZd-1zt1^h6G&V|6Z7$ZtW}Ozd<+Em13%>=dh1H!d7Pq-s zE31csLN@XO_Z*WDP{ePiCvxamdP`+Xz3*gSXG2eyx6kjP zWAgq7y&TuYdU=`1kwM4QK+$xf(|gR{)!)ariVlyzCE7^SHeM(0lN5qD>-ZPN7J7{x zP)-gYCh-p(>+y0@EXW7oL#^$c8|&2V9lmy-_kjU#U#FKVg(4UKGf5Tg;kD9NOeikK z9QOJ-7h*ENI{bc)bMn8Ht}+k-s&?-QKiAReZ|`EcRX|nDuafPiJ>y@M<tb(`2Lff-{?i=P7yeA=WiablP_8PrHv>60`}pzO4CzFwXzS5V1+Fv0Q^3I%Ip z6(B(gltCgQNvNP$JF83~Rk04%$*RCQ)Lscvv3WwH3mi#|b+PKxvUxSql4`K|Y+8!M z^!f4vHvP0Tcb*{wDlBY9N>4Scn}0=Z2o|!LplueY;sINhv}xHQV#_+ z@)`PjMEpwR!~WQUbo&! zJ)YfAM2#65l++a&-5?cTk?Rn*YN7Z_Io%)^Us+ZH_*E<2P$<5dBSzdoH!A6?&fEru znW98cG9{uBXBfn#G)S0|(}*i|4f)JeF5TdePuZl1=c9zFBBP>F#!M~GXcRHmL=wQS z$rupYDdOL&)``qw-tiT?cu3@D_|Km?%dfNa(mnhOmNKP36FNg4{zZ#{4;{`F56J^G z|H;83J|kC_ArDaP>Lf`(&YK=B=Kn*lU|K0LKbC9YFQ1WX0yHZP&`(fLiCEc_qUYuO zyH*pU2q<{kVxtwj!;-@&{VIO5r9>1^@{X-K{#1@WO|{T;Kt)P_dN}K^H=gsT8ynZP zQdQJ2efFzW)Y*67|FiI4&A<1khyPi&c1s@R&rK@qqBz5l+V4odvkKIcRR+|ctvI=# z2h%-Wro}ULWo}F)`WFPK%e{gR0D0!2*4dVQT>&uDD0yNkPhu#3NDa|cr_6X>`$1V<30A)C0T z3zh+@E$o4Lh@tUv`w?tIuopl~e5@Y`@5gsyx@p981uDUAvFZRJg^jm^V-Zz$D(05} z|2cb2P6w3wuw{GdL%tc@K0ao57WE^=uVCfpj=Y_s?qb!_3$1Srxp#gbVa(Za3FTSwAtm)>eMCv~ zvW7F_=~SkDRgXgvTmaP6kkha7Tk&Ffl7DcPQd>(d}y9*T9g0D`x(Yt6b;id zmzB$Z?92=P*@vHjhv{Y&eL8zXA@zh|gOd6Vy@3{w*H-{~HNO_%lv4|JQ!D8WV(HW> zG2+$8GF4-T+G`eigH(LYx?T-0uNNZe>oxQSx%Bn5Ncskaq;JqP)V?96u|!I6IlZA8 zUX;2G`OF)+^ah9g4Vx74d9Zv|oKC|h611Q^&A zVJ!%cUy;OEQGkK%g$amG(7tS%Z!aremb%f<)M08!4F4qox{(@Y_VW}qBvnv;RZ>Dg z$}f}a_>m0x)*)GnU~xc(`)hK?qN#wiJ3Yy?@XMJ$ySzJ-KeyBD@;OpON>C&#eTqpD zD(Tkp57b(-XzFagU!RnVGWicymdv=(W<{knv^h3e!0WAW^;^k)y>A#flAif#v=S*y*zfv%M~OfOIdG;HQtP&-A3ETV&!Sd1=ppi*e`e%$%V6VP<$<0SI#I)@`0 zdjN)*sMZ+ zaum)0+*1k)N=*EFU$@j|rD$VKj9S*>QA?r7`LrGI<#i?|H;{q9nKo=m=#Kz-Pn|8p z?#Gh|d7!=wO%15iMGA`7EH`8(rj&-2+xKKrRk!2lK2UW}AT6L|je~ILI11h4cBCld zB(yL9wmhP^E1&|K%zsLjoEALS1?1f;aIoB}Wa-l3fwiV$3kJc998xXSXjpU>hpGxE zMhb+9QDf8sp&?auT1*AhpF0vzch}=s2MYu0%VoXD#5GF=uB1iq3wIxM}Z$M+Qq*c)%korBjzd(x@`KE0Z(} zzkkutz@Ugi&^~!+Y7csZ4<+3<((r=>x(U>QkNC?xs)Bk1*77Ma>=eZMtKb0PqmPS_ zpBNY;xPQnu zg|Y|sm?7;$TIj{?hZFdMbG^hMo*Bwc(Q%pZ4K#v%rU$YYTR8>|x2-fPgKG2RUq14Vut-@!EUgn9&6x9DCB^@$r9lm*fmu8t|ae)pvNl!hXEV zf5g7AvD(fzROItr75Q2YK7tg7pYFqgk;K>Wf4{DgF6XD$ou*In_uZAx8=72}K~rPG zpMrBspC9}xdylv8u>T0ix~r+wG?>-m^&RW)^Lgz@ydA8U^HtlsZo_Khk8iQ97)&p* za}XywiQ07a*$2hGva&J`Kiq@*oeYiFax9L>+w~xvq84%SH@8~;VOxtLCc}sk2<7kF)*1BVbFlX(jzROMqL}0uhfyF5 zD3Ie>OqS?b4E#elgTab@Uzfk}yFmqy6AOR}b6X3!yJO zFU2J6K<_ah*9Fuu@lo$dAJ=^lIq`56Q~4l3HsEXT?1uqzV2La-Sma0+6Z;`r2Cesa zy~oHAFDBd8+Qf1FTuhx9CGy5;QRD#xxKVPe_&2vXT8%;&LNRX!!m+LwC}K+D-?9*e z^TjlWI=XtiZ2JP8A4$|4#`%13GR0V(&|SWNb?$mK5Vj`=J|=?@pSSmzKc+h3_aAHT zB!NioB&zWcf&)Yr#3cKg z6N^I-gCxhr57d;}a?@Eb>s%9du9+y0I5*8&H+|`zxRlc9W-~m~8J?)!HEXX9+pEu| zL8-fVwxBj#P&>iCcJ$?=vvqsIb$cQOZL{{a;k0OJ^{{wGQ}jOn^Wu`AoO0xyQAUeP zLvmx(==hM@B~qBqG{;3dMQNt@ZO)O^7i%umL~P~5>N$-*WL!SBWvnigy?#d10PPs< zvj+FH!999nwtQWhCboslTV{0kM0KuNU2#}fJlZ^? zTN%|^-`8djp9lhf*x-(63rE*XYgYhU)Lb&UZ**&@dfUwQ*3iy$*j93?5>$;j<}o7H_ureA=h1#*2(ni2SYpd zgtoVZoO@@i`+%~na%}bN@`mv8hEPdkw6tPu#Y@#MR!EZ-0+X^y&!NB4yas-u4XN3^_N=SU2A+T|HY`8!oMl=D9D{UZ@=@AIk~Z*F_78;gnuh_aPNz z%5p!Vn8Mui&2c4VFPyb458IZH`KE2vadDbs2OV8r^HS}Lwd3nXw#>Oa(V{Ka_g&u_ zYTFmu-xb=|9V$Eu`sEkgmQwjep^~O+o!1LzH?@T~wMFjU8*17YD%wBex*r%}lqPpi zZVDA`n{jO?@3j+0X6ssl;kuSkZEL7#=ZtF?DX;sXf3kVDac8)3XJp;3P~E+uqWflC zyCI0+DEMeE4O8_I&esPB;`h^wb-tNbG`e@9;!^u`-ddt=)r5R@^_KAJEupG=uHPTp zcOLKP`$oOLyZT#Zpz z`B+8RwPIv*)K&6P{}N?5n##$awXO_XSI$_gqQ%QE*Iuf9Zr%AUQLE#l<4cGoXpHgh zpor4hL)zk~d-<%pD(tSBP)x3kxVMLNuDP7TkY~-r+DK0G@ZIm@dRjI1;MkFndEJce zF3``IJ7S$Sl+GDpMpumm#`-2VUO#kwcPRJX8RLB)$SGq!R7G>$qtcNRq4FJ}miEw& z1EB*CgsdEN(PWOxDZMQ?tIH4T@<$Hc(3O%lHeN5eo*Odnp3&`Dykh?p-zdhtA>JpZ zH0F@p6yGMHayCO?1!iV1{A|G<{wZDt#`N)RJ5^Y5ixPtYk!e`48t?4>?(qKZ(7vOg z!k*hqaKU(z1~Ti~9#cxcZd-zSi;A|9`4*8ggJ1H+&7!Q zCY-+}WU0PIDfG^$#W}Kftl~oZv}FaXm8i37LOu4-XRT|)*0oWqd$b~KEjhnAW_8`PI_4}z3Axs%s~@kPwN!*H6=SW_ zmemvYO`Zzvy7x+Oy7peuQu!GDlJZ66?6SMV%kB=9ZB10ilG^g!?- zFuEY!fVSsoMBDp+EAcK-;*+=*y7tNKMw-&)+@dnlOk{tq7%P0K_{HL}oQZ>z+DS<$ z`|cUdhNuSOZ|<T+gv?y$~1qw~D0F@6%afg*qQ zJq?b|!-m~9`f+;u3h_7UrGQ?s)7z`WR~&VKU$xhJc9n~+DS|Y-T~pSl!`qLH`0^uD z4&qhmEf(?X5_)^J@^vYLxS~PRB9^^rqPN#7-!y9xFI%tJK}mmHMZ?RFSF3>PCk(w~ zjr=EKG2(I&NcxGgUIo=}>**b9m2YP&5w|1h+r{*bbwT;tB}n`BGNgUG(gD@)XtDa8 zT)IUhf5(Qk@8qplLv^Se`w^-_HAB@jRD^1==`dBfW0fLY$soRxMtl{)*C0)JEumS5 z__T;Xv9u*yHLavu(t?_4l?-39WLRxNEv7B?;$0=Oh>dP_DkF9!;zbNpL`oHiud280 za?5V$>0L(Ujcg_2b}3ZfD4-E{OMvJ`2{PO$SM0V*ZmeE!+HH_TGg14fK>|flD+73^ zh#G%&dy91Wx4_%ro2>u1ZMF24k_^03k;Z2XPe}L=s!tH36A8tnvk2l=cK?l*z zi7C2#{or->!i6IqLCH-c@xwee;(>AKP7#j?Zl}m40Cx#X(81=OMSyWA?s)_+AowZAU^?(&a=CliW-k zK#B_p<jrn1ncwG1;+>lW?oc`Y>Dyp>hj~$h1 zvIVYyn18QB&p-F1Ha+mEVw;qm56L7?c`27ygf&N=tV#YZr7$RxHOXIA`J`k>&i}l_ zz*o1Zq`pizK?{0Rx2Q`LT(%$21wyaDl@=aWJqUdbD3URvfSgsW<>xxg{5KZM_(n^5 zh2gJAP^L&oM@#JUBwBx~CB6KwwXIspnc(}lCB3~YB?74SC5Psm=xl}3|0L_0;TYDYSkp2C-& zwL`mzQkz1lPodnjQ=O^f>?xzH^Jnu{6k3NNG1OE*m*h>1U3LopAhe{1mNMt5=VN}L zyfV0^%0HVVdWdkG24Zae;$>2V@%-Q5upmfYOvg`JX^(TtqqF8c>>3D9d)K z9VVchU*7>!icCui<^4cu2Fit=nNujODU@FUN()eC%=gHWLYb37`RXq9N-JlCy=4AQ zB1IExk}LTOy62{F|9+QxUCv+R&Pj45e}UVE+$k-P-N0dAxX|vlCv{%n#c{ z$1r7R$+I}XFH}D7d=3&dg80%(LG%wU;)DmHn(3+G;N*?YkQ5@I;`(Bf`z zd=a6Y6VMJm#YJG^&_js{KFY^Rj0%+>>*?s~Tkuyr5<(+TFL7lq z0W%~Z_y&4=JGhg7-R}tgO^yeu^jUt#C{~;X0P2({aUmE)T%#VW%I0v^6Ok&!NW={n z6PJ#2Oft4eb{EOWA{h;w9jm(#)FWsB5L4_tdCV&Wz;JVkG50mC7rZaxa^Qxxko4ej z2;ndpR|$cr{!Zd=KtdSAvU)p?#ng$2U9f|^DzUtaZHK^L3~!Fx2L`}JAyaV@#o~~1 zKSgjD0eW#U#@})H|J#*ADLA4vOI*2_T5vquAMEHE@Iu6pxS{RdzJcCYS|UCv)cG6` zMRO_=ciP+={4=ae_6j<6CJFyjHctXUifjk}-XU$dZO&jhzbRsHpJ|53irz6|xM;ax z8QnLrcD817xMuV9&5@ej5yzg0uI-HCww(Xvk)851S%DJV!EF9lz2A_A)p=2^jlX_u zgttE+=4*W}_}>V{j;Ota|MkFINp#u6 z2l9cK>2)-~2%~E?GZ7PwFvrxohKa1E0d|T(Bt9^l6x=8A!ayUwd0zBh>57k3=MU`8t;wF`mQYP-iE1f)^%m?Gi3 zcJ#!gf=H5RDY(%KD~Bu^bhkNleg#i^!O4CD%j^i&Bf#adhr5Yr2tf@12u*tr4jdNp zezd|#<=?S#4l5P@BXCwZ(%+MC%zutGBy^nYXEFlJUE`}C$*a4L1WzC!?mO|^HL!9> z&P98|cPB?7Fyci`y?7ECugSMcS30R@AZDZacg2*&ZBKJqlvx^T*nvfduA zUpQlU#De}r338BVN3l3-cuWuk|8A*ZLBM|g$`}7pbXGK<;WqyHGq2H@C&u6NXes`t zpOnJw-@n6uC;V3=GsUvuvoQYwfgPT~OPZ5XUjOCF;7(|3t6%{nk1KtCBs%*C;F1B( zB9tdXNDa&aT-Xg4vbc6&8R-bPxQUI=ff$+gJj9w1>_u=t0FP1_0m8Edzf{73>;Pg% z0mL#PdkZph;j$r_y(s91dvXjf37`g4^MQGjLBoN=q2n{pT0MtEFq{^!ej$BJs#EAc zoxPRu8QEJmUCXCkEo1(dPQ7?)wyGsu)e_P;=KkiCE+(^baY|RHVtvH0KI$sFxb?!; z(DH`K9TC@t^NKlJ!N^Gna)*r-ad~iQ@>T|jf$*-;v@|u!f~iq=;$}>Z3Tg6}rbNvN zDN*`=)9P?*(f`?Z9tP)Ox=9QfQH?7hBdXB=8BvXy;_>w=K(DUL1vnMV(L&|aGP+SK zovIKcz7lz+R?%3!8f&kq=tjNxni^}bX|VQMCe~il(paJ=(4cFqWUdv{jb-v{B~rvI z8LVBYXrh^Gbt2&U5k&+3Bbos`nb^u_e&s)RkiA6G+0*ay3i*BWn}dfDOa)gSIbknK zhT?8P@uE~HA|oi`4}A587Pm7GP56^};e7LOf#^#j{@C#Ia^%Ie$h*!J(r*5qb1u=} zi^hHD0<`)NR3%nKIseH&ID@zoCc*|W)q+v_Vp75R5xfNQB_~F|4CsYzj$|rIg-+n= z628F)5bH;90e}w=yu^YNIbNB9)OqvgpXBYL^P=%r`8Vk``;d)nEuSEUI|XSfxv!8_ zg}WcOWMAPYzqUejLB#*@Yd;d*mDoaPKQsQjuYZ|rA!^KOIC}~H%kn6G+xa|xHrv2| z>HJ;1Mp^rdEE8avpNO$XUQ(`1ypK&7Z5h{#8$8ct4OB~AVxL;&B8#F+-2b_ z1tAvLlTLDGGMZ=?pWX%j_;{uJS=Z)(kgAZLX(JK7MjEJ7QZFGAtYRhYic3#===+dDvL~U1qFtTyeu#9hU$xUIr8{ zS=zR^L0H?_pONYa>zY?R;e-rFKlJrsx-l2lb3+}h=LS2h=LRceI939BwIB!J6r+W@ zscgC-S2|@7BW^*KDM-PFT1cjb61%RUhM8JHH&n@|R!R}CVX(YT(P)8O>Ws#0=32G{ zaSH>yWT}piKk;We$l~PSx)MC*w-KASYFMkpkDf2!Xc&HvD#cidQ(TH%WUUYnfmnqT z%RYP69Kb4K>Wm%a#S~=z(ewTE;^DV`vr#lD8vn;{o)#JK1AB>%$qse(z^B)jSkT3M z`Gvyv)CDc=JTlOCRJiv}abFzRF42%g2FXH}2n(5fx>2u~gzyTANT7hMV)zdI>xI># zY0-H6LbXVSg945770;Bnl6K|7h>-ZBEb%ucLIN~tjttF0o2l*swkb5W;4bjv&s2)$ zMC1SSncE@{(M$ouwBXUfw4mj{zJ=WsKAfD27mpSR+nRbo8^JwOAv>9=g&`n1k%{{v zs<5D)JY__lz-BUp(VV|@=YE3>YLE$M0HQ(Czq)|Vgij8Dj2ZwKxYtJ4iheB`|BumQ zbdCn99>G9p84RtE%&H(+rI6wEEXh#)alV3o_SqtlUo!rSXWyggLjKzGuQ1|nlD#*~_i8XpZ9lYphsUHDqa!1%v0Yn0!|s ziO#daiSt`fCVU#6LF^p_rvdyHCv=d(gp+^9GW5l`p8)X4g~dh=cZ<34k3LA2OZzd- z`(T$>eq7+a$iMiaGe|xkWV`(=Uykg~WS+@pOni1?3d8{H*!qG|kVQ+DZxdFaYbR{# zI>9K=Ip+*$P2`(IgqeG>aACVQV)X$mJIeQbr`jArbOZtE89Bgqz^hLOZnr@M7V_Kg zlo{@bpyU_;!Nn`6Q8d%hisgLSxI;2$ay^m5?;Zaoqfg7?>%V(X@TSHztFebQ_M5q` zsH0%6z%yESx#UvGSYWaxQq&wN*f{4d8eM&P?WMI7X_H)}cvHl^8B_6_FK@ZDWg_>w zHBxeKr0~9*7U$D7kJpTxoUv5gW~h?#(c_m-T{<gm(QjVq}q>5VG!Q)a*>92<4i z6%)NNU3|stLcC53#aA=wjhW)BS@lYIf!%*&rFbf*sT5w`RMMOD^qVRg@pKwE-UM2} zb-In^%$tSu#$x%K9x3AGVk}>&*p$V*S(mXXjd?4WCV`i?vKSBn4#y6hC^G=OE%Fa= zeKGOY{ys1K37Z4w)d>Ko$m|ni5lVsHBNc9%3@8BWKZYzH&K|HMdxBXYL#r7~$G;KT z+t}Y3BuBLUg2+5<14VwUAoU<&KQxEC9Y+L{x=gmK)4vz$O6>gyyWwx4_Cw=jpCl>~ z<>n;~WveF#A;(kF0$B#@k^g-J z4G2Xe&b(Kh=h=mW4O>Lic5~N3UN92M~dI^eOvG&cpKEM@9=r&|I#bjl{;9K zCJaKdY+;^Y2BBU!w2?s3LTlKKqzu6Tv|QWMdmZ%!lAh2wq1(;tRh( z>~{#_2+;ZGZX@^;g8xA9UkJpY6(>WGhCqWrkAV0dY3-0S*5UJYdV6}dx3|OTq6L`9 z>EbTo;JyKcAVBSWc_NCsbDH`$@m+rqOC+n}>6A46mdF5W`a=fLxPVp`O4{l8PX41G zI+yo}B<85e9cKtqyFoy1Dza`#uq44o&;ycPv?M#;$De(5rECu^QN|w>@$bI&g^X;8 zK0yaqd>PM8?#$FmHYM96Xc2#7(iOai236p+=`Q|2Mev&$S#gO_3K?3uxGYIwcEtsX zxSCQrZl#fO(#mZS!8uQ|=P=a;i02_#nChjagIWF{{`Kx&!5~ZQ@R7r%Svk+U95Ls- z6vRF)8`mAo1F;}!O2duFw|<@(F;6j90#g}NwznS|fZfWQ-rmlMkz^NwHN50%TO|WA zR}MW72tu4Z2Ej4>D`0P*5AM}{#M)vKOddQT{HdiD2>EkYH`H-CD54Mn`FyTItPTPB zHWITa##b^-Zood4ukGz;2YS49+-lxARc`(jmHRGL@R5?D%ipD}zoLqNPVI_NyWXX8 V`J+=V?I{)AOb=WBKoNw<`ES-go(=#2 diff --git a/ingest_pipeline/storage/r2r/storage.py b/ingest_pipeline/storage/r2r/storage.py index daff13c..1c43302 100644 --- a/ingest_pipeline/storage/r2r/storage.py +++ b/ingest_pipeline/storage/r2r/storage.py @@ -10,15 +10,14 @@ from typing import Self, TypeVar, cast from uuid import UUID, uuid4 # Direct imports for runtime and type checking -# Note: Some type checkers (basedpyright/Pyrefly) may report import issues -# but these work correctly at runtime and with mypy -from httpx import AsyncClient, HTTPStatusError -from r2r import R2RAsyncClient, R2RException +from httpx import AsyncClient, HTTPStatusError # type: ignore +from r2r import R2RAsyncClient, R2RException # type: ignore from typing_extensions import override from ...core.exceptions import StorageError from ...core.models import Document, DocumentMetadata, IngestionSource, StorageConfig from ..base import BaseStorage +from ..types import DocumentInfo T = TypeVar("T") @@ -80,6 +79,24 @@ class R2RStorage(BaseStorage): self.client: R2RAsyncClient = R2RAsyncClient(self.endpoint) self.default_collection_id: str | None = None + def _get_http_client_headers(self) -> dict[str, str]: + """Get consistent HTTP headers for direct API calls.""" + headers = {"Content-Type": "application/json"} + + # Add authentication headers if available + # Note: R2R SDK may handle auth internally, so we extract it if possible + if hasattr(self.client, "_get_headers"): + with contextlib.suppress(Exception): + sdk_headers = self.client._get_headers() # type: ignore[attr-defined] + if isinstance(sdk_headers, dict): + headers |= sdk_headers + return headers + + def _create_http_client(self) -> AsyncClient: + """Create a properly configured HTTP client for direct API calls.""" + headers = self._get_http_client_headers() + return AsyncClient(headers=headers, timeout=30.0) + @override async def initialize(self) -> None: """Initialize R2R connection and ensure default collection exists.""" @@ -96,7 +113,7 @@ class R2RStorage(BaseStorage): # Test connection using direct HTTP call to v3 API endpoint = self.endpoint - client = AsyncClient() + client = self._create_http_client() try: response = await client.get(f"{endpoint}/v3/collections") response.raise_for_status() @@ -109,7 +126,7 @@ class R2RStorage(BaseStorage): async def _ensure_collection(self, collection_name: str) -> str: """Get or create collection by name.""" endpoint = self.endpoint - client = AsyncClient() + client = self._create_http_client() try: # List collections and find by name response = await client.get(f"{endpoint}/v3/collections") @@ -152,6 +169,9 @@ class R2RStorage(BaseStorage): finally: await client.aclose() + # This should never be reached, but satisfies static analyzer + raise StorageError(f"Unexpected code path in _ensure_collection for '{collection_name}'") + @override async def store(self, document: Document, *, collection_name: str | None = None) -> str: """Store a single document.""" @@ -161,20 +181,44 @@ class R2RStorage(BaseStorage): async def store_batch( self, documents: list[Document], *, collection_name: str | None = None ) -> list[str]: - """Store multiple documents.""" + """Store multiple documents efficiently with connection reuse.""" collection_id = await self._resolve_collection_id(collection_name) print( f"Using collection ID: {collection_id} for collection: {collection_name or self.config.collection_name}" ) - stored_ids: list[str] = [] - for document in documents: - if not self._is_document_valid(document): - continue + # Filter valid documents upfront + valid_documents = [doc for doc in documents if self._is_document_valid(doc)] + if not valid_documents: + return [] - stored_id = await self._store_single_document(document, collection_id) - if stored_id: - stored_ids.append(stored_id) + stored_ids: list[str] = [] + + # Use a single HTTP client for all requests + http_client = AsyncClient() + async with http_client: # type: ignore + # Process documents with controlled concurrency + import asyncio + + semaphore = asyncio.Semaphore(5) # Limit concurrent uploads + + async def store_single_with_client(document: Document) -> str | None: + async with semaphore: + return await self._store_single_document_with_client( + document, collection_id, http_client + ) + + # Execute all uploads concurrently + results = await asyncio.gather( + *[store_single_with_client(doc) for doc in valid_documents], return_exceptions=True + ) + + # Collect successful IDs + for result in results: + if isinstance(result, str): + stored_ids.append(result) + elif isinstance(result, Exception): + print(f"Document upload failed: {result}") return stored_ids @@ -208,6 +252,16 @@ class R2RStorage(BaseStorage): async def _store_single_document(self, document: Document, collection_id: str) -> str | None: """Store a single document with retry logic.""" + http_client = AsyncClient() + async with http_client: # type: ignore + return await self._store_single_document_with_client( + document, collection_id, http_client + ) + + async def _store_single_document_with_client( + self, document: Document, collection_id: str, http_client: AsyncClient + ) -> str | None: + """Store a single document with retry logic using provided HTTP client.""" requested_id = str(document.id) print(f"Creating document with ID: {requested_id}") @@ -216,15 +270,23 @@ class R2RStorage(BaseStorage): for attempt in range(max_retries): try: - doc_response = await self._attempt_document_creation(document, collection_id) + doc_response = await self._attempt_document_creation_with_client( + document, collection_id, http_client + ) if doc_response: - return self._process_document_response(doc_response, requested_id, collection_id) + return self._process_document_response( + doc_response, requested_id, collection_id + ) except (TimeoutError, OSError) as e: - if not await self._should_retry_timeout(e, attempt, max_retries, requested_id, retry_delay): + if not await self._should_retry_timeout( + e, attempt, max_retries, requested_id, retry_delay + ): break retry_delay *= 2 except HTTPStatusError as e: - if not await self._should_retry_http_error(e, attempt, max_retries, requested_id, retry_delay): + if not await self._should_retry_http_error( + e, attempt, max_retries, requested_id, retry_delay + ): break retry_delay *= 2 except Exception as exc: @@ -233,8 +295,20 @@ class R2RStorage(BaseStorage): return None - async def _attempt_document_creation(self, document: Document, collection_id: str) -> dict[str, object] | None: + async def _attempt_document_creation( + self, document: Document, collection_id: str + ) -> dict[str, object] | None: """Attempt to create a document via HTTP API.""" + http_client = AsyncClient() + async with http_client: # type: ignore + return await self._attempt_document_creation_with_client( + document, collection_id, http_client + ) + + async def _attempt_document_creation_with_client( + self, document: Document, collection_id: str, http_client: AsyncClient + ) -> dict[str, object] | None: + """Attempt to create a document via HTTP API using provided client.""" import json requested_id = str(document.id) @@ -255,29 +329,36 @@ class R2RStorage(BaseStorage): print(f"Sending to R2R - files keys: {list(files.keys())}") print(f"Metadata JSON: {files['metadata'][1]}") - async with AsyncClient() as http_client: - response = await http_client.post(f"{self.endpoint}/v3/documents", files=files) + response = await http_client.post(f"{self.endpoint}/v3/documents", files=files) # type: ignore[call-arg] - if response.status_code == 422: - self._handle_validation_error(response, requested_id, metadata) - return None + if response.status_code == 422: + self._handle_validation_error(response, requested_id, metadata) + return None - response.raise_for_status() - return response.json() + response.raise_for_status() + return response.json() - def _handle_validation_error(self, response: object, requested_id: str, metadata: dict[str, object]) -> None: + def _handle_validation_error( + self, response: object, requested_id: str, metadata: dict[str, object] + ) -> None: """Handle validation errors from R2R API.""" try: - error_detail = getattr(response, 'json', lambda: {})() if hasattr(response, 'json') else {} + error_detail = ( + getattr(response, "json", lambda: {})() if hasattr(response, "json") else {} + ) print(f"R2R validation error for document {requested_id}: {error_detail}") print(f"Document metadata sent: {metadata}") print(f"Response status: {getattr(response, 'status_code', 'unknown')}") print(f"Response headers: {dict(getattr(response, 'headers', {}))}") except Exception: - print(f"R2R validation error for document {requested_id}: {getattr(response, 'text', 'unknown error')}") + print( + f"R2R validation error for document {requested_id}: {getattr(response, 'text', 'unknown error')}" + ) print(f"Document metadata sent: {metadata}") - def _process_document_response(self, doc_response: dict[str, object], requested_id: str, collection_id: str) -> str: + def _process_document_response( + self, doc_response: dict[str, object], requested_id: str, collection_id: str + ) -> str: """Process successful document creation response.""" response_payload = doc_response.get("results", doc_response) doc_id = _extract_id(response_payload, requested_id) @@ -288,11 +369,20 @@ class R2RStorage(BaseStorage): print(f"Warning: Requested ID {requested_id} but got {doc_id}") if collection_id: - print(f"Document {doc_id} should be assigned to collection {collection_id} via creation API") + print( + f"Document {doc_id} should be assigned to collection {collection_id} via creation API" + ) return doc_id - async def _should_retry_timeout(self, error: Exception, attempt: int, max_retries: int, requested_id: str, retry_delay: float) -> bool: + async def _should_retry_timeout( + self, + error: Exception, + attempt: int, + max_retries: int, + requested_id: str, + retry_delay: float, + ) -> bool: """Determine if timeout error should be retried.""" if attempt >= max_retries - 1: return False @@ -301,12 +391,22 @@ class R2RStorage(BaseStorage): await asyncio.sleep(retry_delay) return True - async def _should_retry_http_error(self, error: HTTPStatusError, attempt: int, max_retries: int, requested_id: str, retry_delay: float) -> bool: + async def _should_retry_http_error( + self, + error: HTTPStatusError, + attempt: int, + max_retries: int, + requested_id: str, + retry_delay: float, + ) -> bool: """Determine if HTTP error should be retried.""" - if error.response.status_code < 500 or attempt >= max_retries - 1: + status_code = error.response.status_code + if status_code < 500 or attempt >= max_retries - 1: return False - print(f"Server error {error.response.status_code} for document {requested_id}, retrying in {retry_delay}s...") + print( + f"Server error {status_code} for document {requested_id}, retrying in {retry_delay}s..." + ) await asyncio.sleep(retry_delay) return True @@ -323,13 +423,13 @@ class R2RStorage(BaseStorage): print(" โ†’ Server error - R2R internal issue") else: import traceback + traceback.print_exc() def _build_metadata(self, document: Document) -> dict[str, object]: """Convert document metadata to enriched R2R format.""" metadata = document.metadata - # Core required fields result: dict[str, object] = { "source_url": metadata["source_url"], @@ -465,7 +565,9 @@ class R2RStorage(BaseStorage): except ValueError: return uuid4() - def _build_core_metadata(self, metadata_map: dict[str, object], timestamp: datetime) -> DocumentMetadata: + def _build_core_metadata( + self, metadata_map: dict[str, object], timestamp: datetime + ) -> DocumentMetadata: """Build core required metadata fields.""" return { "source_url": str(metadata_map.get("source_url", "")), @@ -475,7 +577,12 @@ class R2RStorage(BaseStorage): "char_count": _as_int(metadata_map.get("char_count")), } - def _add_optional_metadata_fields(self, metadata: DocumentMetadata, doc_map: dict[str, object], metadata_map: dict[str, object]) -> None: + def _add_optional_metadata_fields( + self, + metadata: DocumentMetadata, + doc_map: dict[str, object], + metadata_map: dict[str, object], + ) -> None: """Add optional metadata fields if present.""" self._add_title_and_description(metadata, doc_map, metadata_map) self._add_content_categorization(metadata, metadata_map) @@ -484,7 +591,12 @@ class R2RStorage(BaseStorage): self._add_processing_fields(metadata, metadata_map) self._add_quality_scores(metadata, metadata_map) - def _add_title_and_description(self, metadata: DocumentMetadata, doc_map: dict[str, object], metadata_map: dict[str, object]) -> None: + def _add_title_and_description( + self, + metadata: DocumentMetadata, + doc_map: dict[str, object], + metadata_map: dict[str, object], + ) -> None: """Add title and description fields.""" if title := (doc_map.get("title") or metadata_map.get("title")): metadata["title"] = cast(str | None, title) @@ -494,7 +606,9 @@ class R2RStorage(BaseStorage): elif description := metadata_map.get("description"): metadata["description"] = cast(str | None, description) - def _add_content_categorization(self, metadata: DocumentMetadata, metadata_map: dict[str, object]) -> None: + def _add_content_categorization( + self, metadata: DocumentMetadata, metadata_map: dict[str, object] + ) -> None: """Add content categorization fields.""" if tags := metadata_map.get("tags"): metadata["tags"] = [str(tag) for tag in tags] if isinstance(tags, list) else [] @@ -505,7 +619,9 @@ class R2RStorage(BaseStorage): if language := metadata_map.get("language"): metadata["language"] = str(language) - def _add_authorship_fields(self, metadata: DocumentMetadata, metadata_map: dict[str, object]) -> None: + def _add_authorship_fields( + self, metadata: DocumentMetadata, metadata_map: dict[str, object] + ) -> None: """Add authorship and source information fields.""" if author := metadata_map.get("author"): metadata["author"] = str(author) @@ -514,7 +630,9 @@ class R2RStorage(BaseStorage): if site_name := metadata_map.get("site_name"): metadata["site_name"] = str(site_name) - def _add_structure_fields(self, metadata: DocumentMetadata, metadata_map: dict[str, object]) -> None: + def _add_structure_fields( + self, metadata: DocumentMetadata, metadata_map: dict[str, object] + ) -> None: """Add document structure fields.""" if heading_hierarchy := metadata_map.get("heading_hierarchy"): metadata["heading_hierarchy"] = ( @@ -529,7 +647,9 @@ class R2RStorage(BaseStorage): if has_links := metadata_map.get("has_links"): metadata["has_links"] = bool(has_links) - def _add_processing_fields(self, metadata: DocumentMetadata, metadata_map: dict[str, object]) -> None: + def _add_processing_fields( + self, metadata: DocumentMetadata, metadata_map: dict[str, object] + ) -> None: """Add processing-related metadata fields.""" if extraction_method := metadata_map.get("extraction_method"): metadata["extraction_method"] = str(extraction_method) @@ -538,7 +658,9 @@ class R2RStorage(BaseStorage): if last_modified := metadata_map.get("last_modified"): metadata["last_modified"] = _as_datetime(last_modified) - def _add_quality_scores(self, metadata: DocumentMetadata, metadata_map: dict[str, object]) -> None: + def _add_quality_scores( + self, metadata: DocumentMetadata, metadata_map: dict[str, object] + ) -> None: """Add quality score fields with safe float conversion.""" if readability_score := metadata_map.get("readability_score"): try: @@ -641,7 +763,7 @@ class R2RStorage(BaseStorage): async def count(self, *, collection_name: str | None = None) -> int: """Get document count in collection.""" endpoint = self.endpoint - client = AsyncClient() + client = self._create_http_client() try: # Get collections and find the count for the specific collection response = await client.get(f"{endpoint}/v3/collections") @@ -662,6 +784,9 @@ class R2RStorage(BaseStorage): finally: await client.aclose() + # This should never be reached, but satisfies static analyzer + return 0 + @override async def close(self) -> None: """Close R2R client.""" @@ -709,7 +834,7 @@ class R2RStorage(BaseStorage): async def list_collections(self) -> list[str]: """List all available collections.""" endpoint = self.endpoint - client = AsyncClient() + client = self._create_http_client() try: response = await client.get(f"{endpoint}/v3/collections") response.raise_for_status() @@ -726,6 +851,9 @@ class R2RStorage(BaseStorage): finally: await client.aclose() + # This should never be reached, but satisfies static analyzer + return [] + async def list_collections_detailed(self) -> list[dict[str, object]]: """List all available collections with detailed information.""" try: @@ -789,7 +917,7 @@ class R2RStorage(BaseStorage): offset: int = 0, *, collection_name: str | None = None, - ) -> list[dict[str, object]]: + ) -> list[DocumentInfo]: """ List documents in R2R with pagination. @@ -802,14 +930,14 @@ class R2RStorage(BaseStorage): List of document dictionaries with metadata """ try: - documents: list[dict[str, object]] = [] + documents: list[DocumentInfo] = [] if collection_name: # Get collection ID first collection_id = await self._ensure_collection(collection_name) # Use the collections API to list documents in a specific collection endpoint = self.endpoint - client = AsyncClient() + client = self._create_http_client() try: params = {"offset": offset, "limit": limit} response = await client.get( @@ -842,20 +970,19 @@ class R2RStorage(BaseStorage): title = str(doc_map.get("title", "Untitled")) metadata = _as_mapping(doc_map.get("metadata", {})) - documents.append( - { - "id": doc_id, - "title": title, - "source_url": str(metadata.get("source_url", "")), - "description": str(metadata.get("description", "")), - "content_type": str(metadata.get("content_type", "text/plain")), - "content_preview": str(doc_map.get("content", ""))[:200] + "..." - if doc_map.get("content") - else "", - "word_count": _as_int(metadata.get("word_count", 0)), - "timestamp": str(doc_map.get("created_at", "")), - } - ) + document_info: DocumentInfo = { + "id": doc_id, + "title": title, + "source_url": str(metadata.get("source_url", "")), + "description": str(metadata.get("description", "")), + "content_type": str(metadata.get("content_type", "text/plain")), + "content_preview": str(doc_map.get("content", ""))[:200] + "..." + if doc_map.get("content") + else "", + "word_count": _as_int(metadata.get("word_count", 0)), + "timestamp": str(doc_map.get("created_at", "")), + } + documents.append(document_info) return documents diff --git a/ingest_pipeline/storage/types.py b/ingest_pipeline/storage/types.py new file mode 100644 index 0000000..5e5a4f4 --- /dev/null +++ b/ingest_pipeline/storage/types.py @@ -0,0 +1,22 @@ +"""Shared types for storage adapters.""" + +from typing import TypedDict + + +class CollectionSummary(TypedDict): + """Collection metadata for describe_collections.""" + name: str + count: int + size_mb: float + + +class DocumentInfo(TypedDict): + """Document information for list_documents.""" + id: str + title: str + source_url: str + description: str + content_type: str + content_preview: str + word_count: int + timestamp: str \ No newline at end of file diff --git a/ingest_pipeline/storage/weaviate.py b/ingest_pipeline/storage/weaviate.py index d856680..2ee92ec 100644 --- a/ingest_pipeline/storage/weaviate.py +++ b/ingest_pipeline/storage/weaviate.py @@ -21,6 +21,7 @@ from ..core.exceptions import StorageError from ..core.models import Document, DocumentMetadata, IngestionSource, StorageConfig from ..utils.vectorizer import Vectorizer from .base import BaseStorage +from .types import CollectionSummary, DocumentInfo VectorContainer: TypeAlias = Mapping[str, object] | Sequence[object] | None @@ -594,14 +595,14 @@ class WeaviateStorage(BaseStorage): except Exception as e: raise StorageError(f"Failed to list collections: {e}") from e - async def describe_collections(self) -> list[dict[str, object]]: + async def describe_collections(self) -> list[CollectionSummary]: """Return metadata for each Weaviate collection.""" if not self.client: raise StorageError("Weaviate client not initialized") try: client = cast(weaviate.WeaviateClient, self.client) - collections: list[dict[str, object]] = [] + collections: list[CollectionSummary] = [] for name in client.collections.list_all(): collection_obj = client.collections.get(name) if not collection_obj: @@ -609,13 +610,12 @@ class WeaviateStorage(BaseStorage): count = collection_obj.aggregate.over_all(total_count=True).total_count or 0 size_mb = count * 0.01 - collections.append( - { - "name": name, - "count": count, - "size_mb": size_mb, - } - ) + collection_summary: CollectionSummary = { + "name": name, + "count": count, + "size_mb": size_mb, + } + collections.append(collection_summary) return collections except Exception as e: @@ -812,7 +812,7 @@ class WeaviateStorage(BaseStorage): offset: int = 0, *, collection_name: str | None = None, - ) -> list[dict[str, object]]: + ) -> list[DocumentInfo]: """ List documents in the collection with pagination. @@ -834,7 +834,7 @@ class WeaviateStorage(BaseStorage): limit=limit, offset=offset, return_metadata=["creation_time"] ) - documents: list[dict[str, object]] = [] + documents: list[DocumentInfo] = [] for obj in response.objects: props = self._coerce_properties( obj.properties, @@ -853,7 +853,7 @@ class WeaviateStorage(BaseStorage): else: word_count = 0 - doc_info: dict[str, object] = { + doc_info: DocumentInfo = { "id": str(obj.uuid), "title": str(props.get("title", "Untitled")), "source_url": str(props.get("source_url", "")), diff --git a/ingest_pipeline/utils/__pycache__/metadata_tagger.cpython-312.pyc b/ingest_pipeline/utils/__pycache__/metadata_tagger.cpython-312.pyc index 81989d6997df438ca8414e5b8f62470acb8aafb7..88ac7534937565012f97fe40761b2295782d2c69 100644 GIT binary patch delta 4340 zcmZ`6ZA@F&^*;MOf7{r`fNdZc65Kd^H4tb*SrQ;5q|Hd0&@Hvvx~|^^OpHzMd*lO= zxF2lFwsr;GP0Q4Fs#Hx%oBr5@G-)4N)9ufssuh%K(5F_7R8?EG{Xl^qTYv4G`wXEG zyMpu1Jzw|UbI(2J_`@sj6@4Fgy)FWy{@jOS(T;gv*uGC9~>OVa+JTNqz8h+x;fZ^?((#HxcJ1xYFQ19czgQw4) zO!f|+1ku5P-eUu26O!Q>rFu%Ex}MFCY76g64@y#q-?g+J3&BQM1%gTd8r%nou`obG z#wDi%gStL>mTHrQyhdA*B!r`=Q4Qd23k!19dY}t$#E{i&MmMaQ&J0`O*>RfDSuIKm z9TXR)SpYEv0sfJ-Ato3&&A|WGaFnq1Y3`SM`hBWnR))YRQ=Ra2sV?fK9$ZwlOxpp% ztb}=}t%SjM!~s_?C%xcjKmWem<;VRP)+v@VcNXMt$ouwec_CqCRUp6;2vAhRfQiMx zC;VS>om9=;N`K{+__v(KRb_Xm0mRpB0RBn-^8)!VYX+1*pgd6ASqHd=E0s#qyiQpv zH688MuH-Z5^Zr~Gh^_Z{dq9@8AwZjkgI><0^edCpup@Rcoil8L^lYXSa3Dn>_z8Il z{&|lQytjvhY#y^6IIIJ~0RS2*!dXiJ;Ka2rXLZa%1S`q9AV94T}-oCU(2QFvmB)$mN zh3xwR3)c~gVFYXVrf12ga$?Ox_O;%1a*M6y%({;#d%u(quE~m0ekWKtcXcV)cvD^q zHm@l_S+kK~-J7AMK;xPnrw$URx)GZ9t~qh$A}-&C8!!?3VkQ2*RLO^J?A#7#Gvr(r z-hw!sI3}LZCVsEgC*U`;;?LY3_VTt>Uv+tnteO@_& zGz1;oag50L%opOl?>2!jk1+)Cbk0jLOtMVi88Y)H@=p6_A|*YK>Y6-=JCrMa`=! zKo7&b{ECvO<@4^*{1D`DyPln(g(=@1OIHjeN5z# z773=W^%q5%)sZH~td7cn`Z4ZR?rVg{C2R4zxSZy zdoZ&=fwx|DsIrdPQjDy0)j8u}9#xrfyhh%z%s6!{RAOK->F7!^bgW4xhKXB@@m;vH zefAmWySB}myZME)_&4RCI@r@PY>ofU%Zr!-OHS>M4P3?|oYiB!XHLez&9TBo%2-yV zE)?4wrK{wx`Ak&`y0+fM#QEwTB zygqI3qiIH243a2btzjRdX_Yc95n$+6^BD~z$HpMbWGC4focJcFo>tR(IyF&HX^uUH zL}esbNb5#fX;-PuP%6W|0K;iBh&+T83G<JUhHo=eu}Do(!_TNz$X%rB)-LW*a2pW}<|n|A$S<277A5#^W$_{& zQu{rC=gCF`3B~x~>yFh(?bFu6AV&{p)=E3VHJ_+qz@+3BF zq^kNe5BvpIJhe-n+QoR+ZTlyl!F9!2<+?7v;ue{b|Ir_lf(xJd580cXaGZoBQXd-x z+;FQ@%fOXlr88VwVTxsF3Y(Vm{AhRqdbf>mixlP!ksj&SJQayUWAeku5s$@||y^%rpWlM+t>pKs<+F z0zn=@0l{$uClEZ!pRSHc{k&LxASW`rsDrpgu{$^x)m1u@p33PAvkGGfFcBHn9L*bw z1}{%%qGX6z6xGvtuLHpHlkl1$ID)#P2(IzEntrPxrTA#g0r1i5HLa4i@Ij4TiT)IA z+n}w6h)mc_#aD$Q|93t5Gup6My3P9=#%%pa&My46!R}~9+>oa<%0@fHSU&Rg1|$pi z5Hj4@6sOE?7aa2#|HrKWk1~Zu1 zj0)cfFX7S52PnFMB2I-Pu#AfWY(D=+ex$kGFIrY}$H*S#FEk&9#cwz7?>!0x!wu;M z5)1qSfX3Xz(w9=3IfA_cd@TUr-a9K{W~+Xcx3|Qlm4zo-p0P;R7XHxomE49`{59@y z$h>HbSoRu%lL#I`@F)TdYj%o%bfD!aakOt=C3_Ls*lHL`u5gJmxN%Hl37>g0*O24X z()p~Oou(@L3Cn&~bC2@h99)Qti@}Z)vaN9Ep8?k_07~t!=r{SRhoaI2{?4JUqIGC!h+&yZ zScZl+``ptw5B>i(S2R7MNqrI|8AaemfRSll@Xrww&*ds&A{sF8*;@$qBf!qL?*1kZ zmu3j?4z#2Ejqd8xd-kr@#8!7TtU2oy7c{~3jd#6(-KmV+I5t1NrXbY@9sAtykEYk` zICGF-#hgAL-P8zT^INlyT5E&+@7+dFw4>sj)n@-*%m4ClV^KWN6fA9&WeSY8!cP&i zPuG};<>!HA9u+Fg_7>2GIyMNK*)NIv3)1if=~yNmUlPw(r1^8wa3|KjNCI=6H_GR`mr48T zZkUGV=#BpQahQHi>Njk%)G~K)gTQCQS1y&$jcpM4Y&2V>#<|1>fzO7^BGt_~H~6o5 PVr8lJ8iY)m*a=g^{ delta 3671 zcmaJ@eQXrR72mzP-P`*<-}&SGfqfWbyujTVVvGUDU<@S?FyupR>Z&~-w`=>%-bZKG z7+=o4Mwr(9K!VI4P!(EL%^yTWY9v=Gs?@ZVs;UyLgyTw>ZPQRiNKK?jCiWjKe>8n> z)^?3+I@11j=Djy>-n^Oj=KZ?-rwRXEpU=%fd;Q5DM_PAX^+$vS7+t#>OO$y|;T4-E z4M>Whh?*mBg&oQ*riDbf_DxK!b`9o2B$fR9n#K=rJl`We*=RG%uY z$_5zK57fXKYLHR;fF@X+`V*;ohS=(OK1eUyZw^d5jI z?tw#I@N(PZpSc8~{vmCXWbcxn6WW-UNi+SC^u1R$SlD!E8E`jSnb{BpZDt^uOyzQU zBV}arIX#(NZ@>-^A_$vl$<-{f1K2^|a(%aVgC*7}Cvlv-Zb$7%LujRW_a;kWJAKnV zc4i}m%+QV0Jcnu!&^8?-?-Dv@-WQxYX*uG#nJ;;~eD_9LV>tPlsymU3&`y8s?Y1oJ zq<{C;xm`fSw)hGCx~GOV_!`SfwIZkv#jZMr9Evz43^-j7Jc^`B5X%n5S%k}}xK(LL zR6M|SF|Jny3SNtiRiv!t+;s-5(RZVMkdWR(CKoCTI5ZxfA28L`*say4e zF(C+opi--Z;a{OtssY8VIDuXTe9;kfD5v}{t%DTRC{XQY>N58EXs1spHF z2|<00EAgW|tq3$hI$LffTZv09rwQ1LO|fhCNjv-s+wpX3_s~ANkme-CFzIUOE#vwR$u`qzWU^|0%rKoJYD!Uw zE(cAKKpM#uNH55d!vLl&uba*S7_DW_l4D2_Evv~3IFORbp^T;`lcrdJ<#UE94y(8+ z(+U5S0dGKA7%_~(1p?7x`J=tsJ<|9bOmB>p7xWck#P?E%jO+INlQ-w_OoH{{H z1K$j;*-@XC(sk2KQsc=q+oFzH7jpb+vq82W54DzYRsB7HZ*mI}u6FDF$d1{_j$3_m zk@jiF_kHtKb?|d9ikvI-ffRitwLTWP%E%3g_S_G*&4%0VhZD2m#QkvRY`F7I^=$ay zwDiDRb6uMAZl2~JhAOYgSLN%c=0e-w51qbs_Gg*5Gxxe)n2R5W;U8X}b`CrYZ@#x> z&%IFRwDW-{G%vTmTl22#-nRYI$F7{7^|maDT*H>P_S}li)py;KA`hyz-cs%y`*YQi zY4=~N>uwypbz-i1@3iBxho!)NQ&RHju`GH#On23V;RT+#6yE3bNw7niXGxCiqj&2z z@h{Sk>t^@>y;&dQ!}M-_H~#}FHMH{8G|^DlJPPur9TEh~ns7C3I=0vP}f^*42Z_GC_nb)>DCD_+stft0v7!bjx2ctG= z)kol*!}M~ji{CYQn09nA?~Gao1M-;gd=(ATS@X7n{AeR@p!w#dil;&1Bh)f`fEhq6cgm zO6=ly(d&t(&6j|zV+FVxV=Eo2vO7=DbXCz$6BYd3na>l0Hhz3&aQ7oe0?+wle9!x> z)7}SNLJ{^O96-R-Oz;kq=c%zb_Eoltr_YnTj%+m86tw)fO5oTrYsr4=B-m-X^i(cm zWQwXnE~2Dlq=xk*qzN*Gv)Pr~NB_O|q!`BW4KTTtZ=D(M`l+oOU$Uo4UR2uO$`sLB>rF4SLo>jGsoDuV9h7nER*g)U&l89YfseX!YLOr1}KYc zpp0>nw6D9fY%BUPf9_8t;5CnHsk6nZp$t*S^W>bK7}m0}j>4W;Dnkf9J+iI8AJ+6i zmmI=z_}O(TE4`Xg@@oiqE(9;Obu%tw4??iak@MJP_b7_J8wkHZ_$2~!7Dl*r_J^>C zfCs4upcKI3?F$e66&Fv4~%yXTyH z?z#7#^W8JoJO1eO{_OP@GxW2CZl3;i-dpATj4D@{l2Oub0!@ggk%ywRn^zo(ZH28q#%1nb4*K ziA+Y8k_lZ>mBy5&Dx_ri*hoz18HtVc#m0oOSB~@%Z`Y)LTE)_|=|;-Cj*ktFj1I-S z#)hctU|&~n-{|6Ac9Ik_jY*|~%Wq9n> zwv^j=(^n?3*Ve*i8G+y6bcrGEyvQlA`GeC5bSkm)JEO6OSyX1*C`>RZ?`q zV``H!7>YvtD18cvBBh{^Rz)OZ*qxNETSY026pfM+Lq19A`pnaS{$^k@?2*Vw!WlG$ zz0?)^!QU}1s|lU>F*-YmG+oeXICWAthWKy~PdE+9<9HJ|x~!K#0vKtkd=80E!$JCq ze{Cs!GD%etH4P|ee9mWPp^VyX3Ky8iKBhck9GG+6ac{aj{!x0xU4K7R^QkKzY0X7i zmqUBF{dRmV8xnuBw?T!>i$K`EYwK+&2Awk!u*e+lltQM%-+T&-(v;D`-P3z8kz_v=_sv1 zQ!{-BOZKxDjkiip&H`cZb_#^csFYbItpYe9X-F7Xu|V0VPinXa+@NF_`U~OJQz?2L zVyD!>a6!VO^OW5pvCF8ywWzyrdh@!)COZWskw(Nt36fD24G~r#MzVAy*iQp!ZVEIV z)`#n16p`t0Gx(J@Kb=$bX_V~$9`+P7o3=dHb?w|kWdtrR+KgbO%RBkHWahb zV8}k+2~M0WQ5pbsFpv><0<-|^1^~ukM1gQRZDhpTozPI<`6S9fP;7_kGXNq$0|32T zCxc5s1b~!kcPPBg+~V5r`ARMxUUmB}4zK!4FUB5nm5xz%VdiQ!SF`^S1IokQ#$xDe zuNex~M=P*u=;5uiJXP>E3dBi|0>9w`IsHi$qXuN1Nh=W4ZCXtpY8&nh-OFsgg{}vA z!WjTRfO+U=Zo)iqbV%$=W~z0%|R&gXL@I;y|=e*Da=uf{P>m2<cqC;dX>dhNM!&StP9SqK#_yl7(oe8dK6zJ-VkPXYqvB2smoXA2+5XYs@n1 zd}|IU!vQ0&IT0DiF=wMQt8htI&Q9}VXs+(0C&xf;S-W0cI_C1j;3sNJ%%l1LIit6% zqBcY0%sHqlChI7<$gx@5M1(GfLaq`wg|&4US^QgN72S`~HX4;D=M=r7PsTYe>m;1$ z&pO{nAK0>-Sk^k=rfPdoUaAv?%S#y_s>>F)M+MM<_P ziQ$Y-Rg^?B(LOvTwI;X7h?a@KUBq~7GD8V{oX#FaO^AZ1>Qd@dhH0Z{dJZ2R5PFCD zgy;7kH~oV{`(wR*YdXq!_o^wnTW>u!ot84?iF8_#M-sX`qo&#?G&PlR?vWBiB2m-V zJE@PWM9$DvH8oZYIJTR&nND3!O6sI;I>)7iC=rbhnr>N>Q<|PgjYz~r_Yxm~AE1na zY1edO2BqY%B#LrsOpsDyT9s3}X;U@RnND=-s5r?C6E$P0yy4@TUB}GXqOC&2;KtiZtZ+y%)P1uK$o-jP58zcPxav zjenfsnfokq1#ffFRj%nb7FASV;EY|1q2@xUc`?*p2(>SUx(cDLPbv$c=knaLx9U8% z;BCm`m0-m=ewIH!un=sy5gfcY{E>V~p5K0Gp{jZ?`pWgGH7$0lg01K`etV;GCzV5nUx3X*D{1mAJ(?5lSnf{7#X*MLv{P83k@D9T7l9aEmS9+MsFyBPs|CSJ8MZHPmnr#6tl66xI?4Vt_(sqYaU^4(X zA|uA9;iraV5QhOa0a)RBK>~oT(r#0D2Q4!@R{Vj}14|zN>Hej%;OW>sroj=%=W`dc zh5DWM5h(W#V52E=&1bm+2YeYx8oxxIo@t~CebA=U^ik4^%~v*}5~)p^8j;fQ@@d5e z^jNIbXIk-vv$C>6dNRxhTo2tgsYtuX6!i`j)u6Wb>=@%^v}G05-$9X|karbrs9HrG SchI)K*o+M~{*C~xeE$P?Na<7n diff --git a/ingest_pipeline/utils/metadata_tagger.py b/ingest_pipeline/utils/metadata_tagger.py index 1f00b16..9beb2d2 100644 --- a/ingest_pipeline/utils/metadata_tagger.py +++ b/ingest_pipeline/utils/metadata_tagger.py @@ -2,13 +2,17 @@ import json from datetime import UTC, datetime -from typing import Protocol, TypedDict, cast +from typing import Final, Protocol, TypedDict, cast import httpx from ..core.exceptions import IngestionError from ..core.models import Document +JSON_CONTENT_TYPE: Final[str] = "application/json" +AUTHORIZATION_HEADER: Final[str] = "Authorization" +from ..config import get_settings + class HttpResponse(Protocol): """Protocol for HTTP response.""" @@ -29,6 +33,15 @@ class AsyncHttpClient(Protocol): async def aclose(self) -> None: ... + async def __aenter__(self) -> "AsyncHttpClient": ... + + async def __aexit__( + self, + exc_type: type[BaseException] | None, + exc_val: BaseException | None, + exc_tb: object | None, + ) -> None: ... + class LlmResponse(TypedDict): """Type for LLM API response structure.""" @@ -66,8 +79,11 @@ class MetadataTagger: def __init__( self, - llm_endpoint: str = "http://llm.lab", - model: str = "fireworks/glm-4p5-air", + llm_endpoint: str | None = None, + model: str | None = None, + api_key: str | None = None, + *, + timeout: float | None = None, ): """ Initialize metadata tagger. @@ -75,30 +91,26 @@ class MetadataTagger: Args: llm_endpoint: LLM API endpoint model: Model to use for tagging + api_key: Explicit API key override + timeout: Optional request timeout override in seconds """ - self.endpoint = llm_endpoint.rstrip('/') - self.model = model + settings = get_settings() + endpoint_value = llm_endpoint or str(settings.llm_endpoint) + self.endpoint = endpoint_value.rstrip('/') + self.model = model or settings.metadata_model - # Get API key from environment - import os - from pathlib import Path + resolved_timeout = timeout if timeout is not None else float(settings.request_timeout) + resolved_api_key = api_key or settings.get_llm_api_key() or "" - from dotenv import load_dotenv - - # Load .env from the project root - env_path = Path(__file__).parent.parent.parent / ".env" - _ = load_dotenv(env_path) - - api_key = os.getenv("LLM_API_KEY") or os.getenv("OPENAI_API_KEY") or "" - - headers = {"Content-Type": "application/json"} - if api_key: - headers["Authorization"] = f"Bearer {api_key}" + headers: dict[str, str] = {"Content-Type": JSON_CONTENT_TYPE} + if resolved_api_key: + headers[AUTHORIZATION_HEADER] = f"Bearer {resolved_api_key}" # Create client with proper typing - httpx.AsyncClient implements AsyncHttpClient protocol - AsyncClientClass = getattr(httpx, "AsyncClient") - raw_client = AsyncClientClass(timeout=60.0, headers=headers) - self.client = cast(AsyncHttpClient, raw_client) + self.client = cast( + AsyncHttpClient, + httpx.AsyncClient(timeout=resolved_timeout, headers=headers), + ) async def tag_document( self, document: Document, custom_instructions: str | None = None diff --git a/ingest_pipeline/utils/vectorizer.py b/ingest_pipeline/utils/vectorizer.py index 75a5e14..adc63f6 100644 --- a/ingest_pipeline/utils/vectorizer.py +++ b/ingest_pipeline/utils/vectorizer.py @@ -1,7 +1,7 @@ """Vectorizer utility for generating embeddings.""" from types import TracebackType -from typing import Self, cast +from typing import Final, Self, cast import httpx @@ -9,6 +9,10 @@ from typings import EmbeddingResponse from ..core.exceptions import VectorizationError from ..core.models import StorageConfig, VectorConfig +from ..config import get_settings + +JSON_CONTENT_TYPE: Final[str] = "application/json" +AUTHORIZATION_HEADER: Final[str] = "Authorization" class Vectorizer: @@ -25,33 +29,24 @@ class Vectorizer: Args: config: Configuration with embedding details """ + settings = get_settings() if isinstance(config, StorageConfig): - # Extract vector config from storage config - self.endpoint = "http://llm.lab" - self.model = "ollama/bge-m3" - self.dimension = 1024 + # Extract vector config from global settings when storage config is provided + self.endpoint = str(settings.llm_endpoint).rstrip("/") + self.model = settings.embedding_model + self.dimension = settings.embedding_dimension else: - self.endpoint = str(config.embedding_endpoint) + self.endpoint = str(config.embedding_endpoint).rstrip("/") self.model = config.model self.dimension = config.dimension - # Get API key from environment - import os - from pathlib import Path + resolved_api_key = settings.get_llm_api_key() or "" + headers: dict[str, str] = {"Content-Type": JSON_CONTENT_TYPE} + if resolved_api_key: + headers[AUTHORIZATION_HEADER] = f"Bearer {resolved_api_key}" - from dotenv import load_dotenv - - # Load .env from the project root - env_path = Path(__file__).parent.parent.parent / ".env" - _ = load_dotenv(env_path) - - api_key = os.getenv("LLM_API_KEY") or os.getenv("OPENAI_API_KEY") or "" - - headers = {"Content-Type": "application/json"} - if api_key: - headers["Authorization"] = f"Bearer {api_key}" - - self.client: httpx.AsyncClient = httpx.AsyncClient(timeout=60.0, headers=headers) + timeout_seconds = float(settings.request_timeout) + self.client = httpx.AsyncClient(timeout=timeout_seconds, headers=headers) async def vectorize(self, text: str) -> list[float]: """ diff --git a/pyproject.toml b/pyproject.toml index 75fca2d..a1e1877 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,9 +38,9 @@ dev-dependencies = [ "pytest-cov>=4.1.0", "mypy>=1.7.0", "ruff>=0.1.0", - "basedpyright>=1.31.4", "pyrefly>=0.33.0", "sourcery>=1.37.0", + "pylance>=0.36.0", ] [tool.ruff] diff --git a/repomix-output.xml b/repomix-output.xml index a00dd00..afa1c62 100644 --- a/repomix-output.xml +++ b/repomix-output.xml @@ -1444,682 +1444,6 @@ class StorageManager: return self.has_capability(backend, StorageCapabilities.FULL_FEATURED) - -"""Firecrawl configuration widgets for advanced scraping options.""" - -from __future__ import annotations - -import json -from typing import cast - -from textual.app import ComposeResult -from textual.containers import Container, Horizontal -from textual.validation import Integer -from textual.widgets import Button, Checkbox, Input, Label, Switch, TextArea -from typing_extensions import override - -from ..models import FirecrawlOptions - - -class ScrapeOptionsForm(Container): - """Form for configuring Firecrawl scraping options.""" - - DEFAULT_CSS = """ - ScrapeOptionsForm { - border: solid $border; - background: $surface; - padding: 1; - height: auto; - } - - ScrapeOptionsForm .form-section { - margin-bottom: 2; - padding: 1; - border: solid $border-lighten-1; - background: $surface-lighten-1; - } - - ScrapeOptionsForm .form-row { - layout: horizontal; - align-items: center; - height: auto; - margin-bottom: 1; - } - - ScrapeOptionsForm .form-label { - width: 30%; - min-width: 15; - text-align: right; - padding-right: 2; - } - - ScrapeOptionsForm .form-input { - width: 70%; - } - - ScrapeOptionsForm .checkbox-row { - layout: horizontal; - align-items: center; - height: 3; - margin-bottom: 1; - } - - ScrapeOptionsForm .checkbox-label { - margin-left: 2; - } - """ - - def __init__( - self, - *, - name: str | None = None, - id: str | None = None, - classes: str | None = None, - disabled: bool = False, - markup: bool = True, - ) -> None: - """Initialize scrape options form.""" - super().__init__(name=name, id=id, classes=classes, disabled=disabled, markup=markup) - - @override - def compose(self) -> ComposeResult: - """Compose scrape options form.""" - yield Label("๐Ÿ”ง Scraping Configuration", classes="form-title") - - # Output formats section - yield Container( - Label("Output Formats", classes="section-title"), - Horizontal( - Checkbox("Markdown", id="format_markdown", value=True, classes="checkbox"), - Label("Markdown", classes="checkbox-label"), - classes="checkbox-row", - ), - Horizontal( - Checkbox("HTML", id="format_html", value=False, classes="checkbox"), - Label("HTML", classes="checkbox-label"), - classes="checkbox-row", - ), - Horizontal( - Checkbox("Screenshot", id="format_screenshot", value=False, classes="checkbox"), - Label("Screenshot", classes="checkbox-label"), - classes="checkbox-row", - ), - classes="form-section", - ) - - # Content filtering section - yield Container( - Label("Content Filtering", classes="section-title"), - Horizontal( - Label("Only Main Content:", classes="form-label"), - Switch(id="only_main_content", value=True, classes="form-input"), - classes="form-row", - ), - Horizontal( - Label("Include Tags:", classes="form-label"), - Input( - placeholder="p, div, article (comma-separated)", - id="include_tags", - classes="form-input", - ), - classes="form-row", - ), - Horizontal( - Label("Exclude Tags:", classes="form-label"), - Input( - placeholder="nav, footer, script (comma-separated)", - id="exclude_tags", - classes="form-input", - ), - classes="form-row", - ), - classes="form-section", - ) - - # Performance settings section - yield Container( - Label("Performance Settings", classes="section-title"), - Horizontal( - Label("Wait Time (ms):", classes="form-label"), - Input( - placeholder="0", - id="wait_for", - validators=[Integer(minimum=0, maximum=30000)], - classes="form-input", - ), - classes="form-row", - ), - classes="form-section", - ) - - def get_scrape_options(self) -> dict[str, object]: - """Get scraping options from form.""" - # Collect formats - formats = [] - if self.query_one("#format_markdown", Checkbox).value: - formats.append("markdown") - if self.query_one("#format_html", Checkbox).value: - formats.append("html") - if self.query_one("#format_screenshot", Checkbox).value: - formats.append("screenshot") - options: dict[str, object] = { - "formats": formats, - "only_main_content": self.query_one( - "#only_main_content", Switch - ).value, - } - include_tags_input = self.query_one("#include_tags", Input).value - if include_tags_input.strip(): - options["include_tags"] = [tag.strip() for tag in include_tags_input.split(",")] - - exclude_tags_input = self.query_one("#exclude_tags", Input).value - if exclude_tags_input.strip(): - options["exclude_tags"] = [tag.strip() for tag in exclude_tags_input.split(",")] - - # Performance - wait_for_input = self.query_one("#wait_for", Input).value - if wait_for_input.strip(): - try: - options["wait_for"] = int(wait_for_input) - except ValueError: - pass - - return options - - def set_scrape_options(self, options: dict[str, object]) -> None: - """Set form values from options.""" - # Set formats - formats = options.get("formats", ["markdown"]) - formats_list = formats if isinstance(formats, list) else [] - self.query_one("#format_markdown", Checkbox).value = "markdown" in formats_list - self.query_one("#format_html", Checkbox).value = "html" in formats_list - self.query_one("#format_screenshot", Checkbox).value = "screenshot" in formats_list - - # Set content filtering - main_content_val = options.get("only_main_content", True) - self.query_one("#only_main_content", Switch).value = bool(main_content_val) - - if include_tags := options.get("include_tags", []): - include_list = include_tags if isinstance(include_tags, list) else [] - self.query_one("#include_tags", Input).value = ", ".join(str(tag) for tag in include_list) - - if exclude_tags := options.get("exclude_tags", []): - exclude_list = exclude_tags if isinstance(exclude_tags, list) else [] - self.query_one("#exclude_tags", Input).value = ", ".join(str(tag) for tag in exclude_list) - - # Set performance - wait_for = options.get("wait_for") - if wait_for is not None: - self.query_one("#wait_for", Input).value = str(wait_for) - - -class MapOptionsForm(Container): - """Form for configuring site mapping options.""" - - DEFAULT_CSS = """ - MapOptionsForm { - border: solid $border; - background: $surface; - padding: 1; - height: auto; - } - - MapOptionsForm .form-section { - margin-bottom: 2; - padding: 1; - border: solid $border-lighten-1; - background: $surface-lighten-1; - } - - MapOptionsForm .form-row { - layout: horizontal; - align-items: center; - height: auto; - margin-bottom: 1; - } - - MapOptionsForm .form-label { - width: 30%; - min-width: 15; - text-align: right; - padding-right: 2; - } - - MapOptionsForm .form-input { - width: 70%; - } - """ - - def __init__( - self, - *, - name: str | None = None, - id: str | None = None, - classes: str | None = None, - disabled: bool = False, - markup: bool = True, - ) -> None: - """Initialize map options form.""" - super().__init__(name=name, id=id, classes=classes, disabled=disabled, markup=markup) - - @override - def compose(self) -> ComposeResult: - """Compose map options form.""" - yield Label("๐Ÿ—บ๏ธ Site Mapping Configuration", classes="form-title") - - # Discovery settings section - yield Container( - Label("Discovery Settings", classes="section-title"), - Horizontal( - Label("Search Pattern:", classes="form-label"), - Input( - placeholder="docs, api, guide (optional)", - id="search_pattern", - classes="form-input", - ), - classes="form-row", - ), - Horizontal( - Label("Include Subdomains:", classes="form-label"), - Switch(id="include_subdomains", value=False, classes="form-input"), - classes="form-row", - ), - classes="form-section", - ) - - # Limits section - yield Container( - Label("Crawling Limits", classes="section-title"), - Horizontal( - Label("Max Pages:", classes="form-label"), - Input( - placeholder="100", - id="max_pages", - validators=[Integer(minimum=1, maximum=1000)], - classes="form-input", - ), - classes="form-row", - ), - Horizontal( - Label("Max Depth:", classes="form-label"), - Input( - placeholder="5", - id="max_depth", - validators=[Integer(minimum=1, maximum=20)], - classes="form-input", - ), - classes="form-row", - ), - classes="form-section", - ) - - def get_map_options(self) -> dict[str, object]: - """Get mapping options from form.""" - options: dict[str, object] = {} - - # Discovery settings - search_pattern = self.query_one("#search_pattern", Input).value - if search_pattern.strip(): - options["search"] = search_pattern.strip() - - options["include_subdomains"] = self.query_one("#include_subdomains", Switch).value - - # Limits - max_pages_input = self.query_one("#max_pages", Input).value - if max_pages_input.strip(): - try: - options["limit"] = int(max_pages_input) - except ValueError: - pass - - max_depth_input = self.query_one("#max_depth", Input).value - if max_depth_input.strip(): - try: - options["max_depth"] = int(max_depth_input) - except ValueError: - pass - - return options - - def set_map_options(self, options: dict[str, object]) -> None: - """Set form values from options.""" - if search := options.get("search"): - self.query_one("#search_pattern", Input).value = str(search) - - subdomains_val = options.get("include_subdomains", False) - self.query_one("#include_subdomains", Switch).value = bool(subdomains_val) - - # Set limits - limit = options.get("limit") - if limit is not None: - self.query_one("#max_pages", Input).value = str(limit) - - max_depth = options.get("max_depth") - if max_depth is not None: - self.query_one("#max_depth", Input).value = str(max_depth) - - -class ExtractOptionsForm(Container): - """Form for configuring data extraction options.""" - - DEFAULT_CSS = """ - ExtractOptionsForm { - border: solid $border; - background: $surface; - padding: 1; - height: auto; - } - - ExtractOptionsForm .form-section { - margin-bottom: 2; - padding: 1; - border: solid $border-lighten-1; - background: $surface-lighten-1; - } - - ExtractOptionsForm .form-row { - layout: horizontal; - align-items: start; - height: auto; - margin-bottom: 1; - } - - ExtractOptionsForm .form-label { - width: 30%; - min-width: 15; - text-align: right; - padding-right: 2; - padding-top: 1; - } - - ExtractOptionsForm .form-input { - width: 70%; - } - - ExtractOptionsForm .text-area { - height: 6; - } - """ - - def __init__( - self, - *, - name: str | None = None, - id: str | None = None, - classes: str | None = None, - disabled: bool = False, - markup: bool = True, - ) -> None: - """Initialize extract options form.""" - super().__init__(name=name, id=id, classes=classes, disabled=disabled, markup=markup) - - @override - def compose(self) -> ComposeResult: - """Compose extract options form.""" - yield Label("๐ŸŽฏ Data Extraction Configuration", classes="form-title") - - # Extraction prompt section - yield Container( - Label("AI-Powered Extraction", classes="section-title"), - Horizontal( - Label("Custom Prompt:", classes="form-label"), - TextArea( - placeholder="Extract product names, prices, and descriptions...", - id="extract_prompt", - classes="form-input text-area", - ), - classes="form-row", - ), - classes="form-section", - ) - - # Schema definition section - yield Container( - Label("Structured Schema (JSON)", classes="section-title"), - Horizontal( - Label("Schema Definition:", classes="form-label"), - TextArea( - placeholder='{"product_name": "string", "price": "number", "description": "string"}', - id="extract_schema", - classes="form-input text-area", - ), - classes="form-row", - ), - Container( - Label("๐Ÿ’ก Tip: Define the structure of data you want to extract"), - classes="help-text", - ), - classes="form-section", - ) - - # Schema presets - yield Container( - Label("Quick Presets", classes="section-title"), - Horizontal( - Button("๐Ÿ“„ Article", id="preset_article", variant="default"), - Button("๐Ÿ›๏ธ Product", id="preset_product", variant="default"), - Button("๐Ÿ‘ค Contact", id="preset_contact", variant="default"), - Button("๐Ÿ“Š Data", id="preset_data", variant="default"), - classes="preset-buttons", - ), - classes="form-section", - ) - - def get_extract_options(self) -> dict[str, object]: - """Get extraction options from form.""" - options: dict[str, object] = {} - - # Extract prompt - prompt = self.query_one("#extract_prompt", TextArea).text - if prompt.strip(): - options["extract_prompt"] = prompt.strip() - - # Extract schema - schema_text = self.query_one("#extract_schema", TextArea).text - if schema_text.strip(): - try: - schema = json.loads(schema_text) - options["extract_schema"] = schema - except json.JSONDecodeError: - # Invalid JSON, skip schema - pass - - return options - - def set_extract_options(self, options: dict[str, object]) -> None: - """Set form values from options.""" - if prompt := options.get("extract_prompt"): - self.query_one("#extract_prompt", TextArea).text = str(prompt) - - if schema := options.get("extract_schema"): - import json - - self.query_one("#extract_schema", TextArea).text = json.dumps(schema, indent=2) - - def on_button_pressed(self, event: Button.Pressed) -> None: - """Handle preset button presses.""" - schema_widget = self.query_one("#extract_schema", TextArea) - prompt_widget = self.query_one("#extract_prompt", TextArea) - - if event.button.id == "preset_article": - schema_widget.text = """{ - "title": "string", - "author": "string", - "date": "string", - "content": "string", - "tags": ["string"] -}""" - prompt_widget.text = "Extract article title, author, publication date, main content, and associated tags" - - elif event.button.id == "preset_product": - schema_widget.text = """{ - "name": "string", - "price": "number", - "description": "string", - "category": "string", - "availability": "string" -}""" - prompt_widget.text = "Extract product name, price, description, category, and availability status" - - elif event.button.id == "preset_contact": - schema_widget.text = """{ - "name": "string", - "email": "string", - "phone": "string", - "company": "string", - "position": "string" -}""" - prompt_widget.text = "Extract contact information including name, email, phone, company, and position" - - elif event.button.id == "preset_data": - schema_widget.text = """{ - "metrics": [{"name": "string", "value": "number", "unit": "string"}], - "tables": [{"headers": ["string"], "rows": [["string"]]}] -}""" - prompt_widget.text = "Extract numerical data, metrics, and tabular information" - - -class FirecrawlConfigWidget(Container): - """Complete Firecrawl configuration widget with tabbed interface.""" - - DEFAULT_CSS = """ - FirecrawlConfigWidget { - border: solid $border; - background: $surface; - height: 100%; - padding: 1; - } - - FirecrawlConfigWidget .config-header { - dock: top; - height: 3; - background: $primary; - color: $text; - padding: 1; - margin: -1 -1 1 -1; - } - - FirecrawlConfigWidget .tab-buttons { - dock: top; - height: 3; - layout: horizontal; - margin-bottom: 1; - } - - FirecrawlConfigWidget .tab-button { - width: 1fr; - margin-right: 1; - } - - FirecrawlConfigWidget .tab-content { - height: 1fr; - overflow: auto; - } - - FirecrawlConfigWidget .actions { - dock: bottom; - height: 3; - layout: horizontal; - align: center; - margin-top: 1; - } - """ - - def __init__( - self, - *, - name: str | None = None, - id: str | None = None, - classes: str | None = None, - disabled: bool = False, - markup: bool = True, - ) -> None: - """Initialize Firecrawl config widget.""" - super().__init__(name=name, id=id, classes=classes, disabled=disabled, markup=markup) - self.current_tab = "scrape" - - @override - def compose(self) -> ComposeResult: - """Compose config widget layout.""" - yield Container( - Label("๐Ÿ”ฅ Firecrawl Configuration", classes="config-header"), - Horizontal( - Button("๐Ÿ”ง Scraping", id="tab_scrape", variant="primary", classes="tab-button"), - Button("๐Ÿ—บ๏ธ Mapping", id="tab_map", variant="default", classes="tab-button"), - Button("๐ŸŽฏ Extraction", id="tab_extract", variant="default", classes="tab-button"), - classes="tab-buttons", - ), - Container( - ScrapeOptionsForm(id="scrape_form"), - classes="tab-content", - ), - Horizontal( - Button("๐Ÿ“‹ Load Preset", id="load_preset", variant="default"), - Button("๐Ÿ’พ Save Preset", id="save_preset", variant="default"), - Button("๐Ÿ”„ Reset", id="reset_config", variant="default"), - classes="actions", - ), - ) - - def on_mount(self) -> None: - """Initialize widget.""" - self.show_tab("scrape") - - def show_tab(self, tab_name: str) -> None: - """Show specific configuration tab.""" - self.current_tab = tab_name - - # Update button states - for tab in ["scrape", "map", "extract"]: - button = self.query_one(f"#tab_{tab}", Button) - button.variant = "primary" if tab == tab_name else "default" - # Update tab content - content_container = self.query_one(".tab-content", Container) - content_container.remove_children() - - if tab_name == "extract": - content_container.mount(ExtractOptionsForm(id="extract_form")) - elif tab_name == "map": - content_container.mount(MapOptionsForm(id="map_form")) - elif tab_name == "scrape": - content_container.mount(ScrapeOptionsForm(id="scrape_form")) - - def on_button_pressed(self, event: Button.Pressed) -> None: - """Handle button presses.""" - if event.button.id and event.button.id.startswith("tab_"): - tab_name = event.button.id[4:] # Remove "tab_" prefix - self.show_tab(tab_name) - - def get_all_options(self) -> FirecrawlOptions: - """Get all configuration options.""" - options: FirecrawlOptions = {} - - # Try to get options from currently mounted form - if self.current_tab == "scrape": - try: - form = self.query_one("#scrape_form", ScrapeOptionsForm) - scrape_opts = form.get_scrape_options() - options.update(cast(FirecrawlOptions, scrape_opts)) - except Exception: - pass - elif self.current_tab == "map": - try: - map_form = self.query_one("#map_form", MapOptionsForm) - map_opts = map_form.get_map_options() - options.update(cast(FirecrawlOptions, map_opts)) - except Exception: - pass - elif self.current_tab == "extract": - try: - extract_form = self.query_one("#extract_form", ExtractOptionsForm) - extract_opts = extract_form.get_extract_options() - options.update(cast(FirecrawlOptions, extract_opts)) - except Exception: - pass - - return options - - """Status indicators and progress bars with enhanced visual feedback.""" @@ -2212,906 +1536,6 @@ class EnhancedProgressBar(Static): status_display.update(f"๐Ÿš€ {self.status_text}") - -"""R2R-specific widgets for chunk viewing and entity visualization.""" - -from __future__ import annotations - -from typing import Any - -from textual import work -from textual.app import ComposeResult -from textual.containers import Container, Horizontal, Vertical, VerticalScroll -from textual.widgets import Button, DataTable, Label, Markdown, ProgressBar, Static, Tree -from typing_extensions import override - -from ....storage.r2r.storage import R2RStorage -from ..models import ChunkInfo, EntityInfo - - -class ChunkViewer(Container): - """Widget for viewing document chunks with navigation.""" - - DEFAULT_CSS = """ - ChunkViewer { - border: solid $border; - background: $surface; - height: 100%; - } - - ChunkViewer .chunk-header { - dock: top; - height: 3; - background: $primary; - color: $text; - padding: 1; - } - - ChunkViewer .chunk-navigation { - dock: top; - height: 3; - background: $surface-lighten-1; - padding: 1; - } - - ChunkViewer .chunk-content { - height: 1fr; - padding: 1; - overflow: auto; - } - - ChunkViewer .chunk-footer { - dock: bottom; - height: 3; - background: $surface-darken-1; - padding: 1; - } - """ - - def __init__(self, r2r_storage: R2RStorage, document_id: str, **kwargs: Any) -> None: - """Initialize chunk viewer.""" - super().__init__(**kwargs) - self.r2r_storage = r2r_storage - self.document_id = document_id - self.chunks: list[ChunkInfo] = [] - self.current_chunk_index = 0 - - @override - def compose(self) -> ComposeResult: - """Compose chunk viewer layout.""" - yield Container( - Static("๐Ÿ“„ Document Chunks", classes="chunk-header"), - Horizontal( - Button("โ—€ Previous", id="prev_chunk", variant="default"), - Static("Chunk 1 of 1", id="chunk_info"), - Button("Next โ–ถ", id="next_chunk", variant="default"), - classes="chunk-navigation", - ), - VerticalScroll( - Markdown("", id="chunk_content"), - classes="chunk-content", - ), - Container( - Static("Loading chunks...", id="chunk_status"), - classes="chunk-footer", - ), - ) - - def on_mount(self) -> None: - """Initialize chunk viewer.""" - self.load_chunks() - - @work(exclusive=True) - async def load_chunks(self) -> None: - """Load document chunks.""" - try: - chunks_data = await self.r2r_storage.get_document_chunks(self.document_id) - self.chunks = [] - - for chunk_data in chunks_data: - chunk_info: ChunkInfo = { - "id": str(chunk_data.get("id", "")), - "document_id": self.document_id, - "content": str(chunk_data.get("text", "")), - "start_index": (lambda si: int(si) if isinstance(si, (int, str)) else 0)(chunk_data.get("start_index", 0)), - "end_index": (lambda ei: int(ei) if isinstance(ei, (int, str)) else 0)(chunk_data.get("end_index", 0)), - "metadata": ( - dict(metadata_val) if (metadata_val := chunk_data.get("metadata")) and isinstance(metadata_val, dict) else {} - ), - } - self.chunks.append(chunk_info) - - if self.chunks: - self.current_chunk_index = 0 - self.update_chunk_display() - else: - self.query_one("#chunk_status", Static).update("No chunks found") - - except Exception as e: - self.query_one("#chunk_status", Static).update(f"Error loading chunks: {e}") - - def update_chunk_display(self) -> None: - """Update chunk display with current chunk.""" - if not self.chunks: - return - - chunk = self.chunks[self.current_chunk_index] - - # Update content - content_widget = self.query_one("#chunk_content", Markdown) - content_widget.update(chunk["content"]) - - # Update navigation info - chunk_info = self.query_one("#chunk_info", Static) - chunk_info.update(f"Chunk {self.current_chunk_index + 1} of {len(self.chunks)}") - - # Update status - status_widget = self.query_one("#chunk_status", Static) - status_widget.update( - f"Chunk {chunk['id']} | " - f"Range: {chunk['start_index']}-{chunk['end_index']} | " - f"Length: {len(chunk['content'])} chars" - ) - - # Update button states - prev_btn = self.query_one("#prev_chunk", Button) - next_btn = self.query_one("#next_chunk", Button) - prev_btn.disabled = self.current_chunk_index == 0 - next_btn.disabled = self.current_chunk_index >= len(self.chunks) - 1 - - def on_button_pressed(self, event: Button.Pressed) -> None: - """Handle button presses.""" - if event.button.id == "prev_chunk" and self.current_chunk_index > 0: - self.current_chunk_index -= 1 - self.update_chunk_display() - elif event.button.id == "next_chunk" and self.current_chunk_index < len(self.chunks) - 1: - self.current_chunk_index += 1 - self.update_chunk_display() - - -class EntityGraph(Container): - """Widget for visualizing extracted entities and relationships.""" - - DEFAULT_CSS = """ - EntityGraph { - border: solid $border; - background: $surface; - height: 100%; - } - - EntityGraph .entity-header { - dock: top; - height: 3; - background: $primary; - color: $text; - padding: 1; - } - - EntityGraph .entity-tree { - height: 1fr; - overflow: auto; - } - - EntityGraph .entity-details { - dock: bottom; - height: 8; - background: $surface-lighten-1; - padding: 1; - border-top: solid $border; - } - """ - - def __init__(self, r2r_storage: R2RStorage, document_id: str, **kwargs: Any) -> None: - """Initialize entity graph.""" - super().__init__(**kwargs) - self.r2r_storage = r2r_storage - self.document_id = document_id - self.entities: list[EntityInfo] = [] - - @override - def compose(self) -> ComposeResult: - """Compose entity graph layout.""" - yield Container( - Static("๐Ÿ•ธ๏ธ Entity Graph", classes="entity-header"), - Tree("Entities", id="entity_tree", classes="entity-tree"), - VerticalScroll( - Label("Entity Details"), - Static("Select an entity to view details", id="entity_details"), - classes="entity-details", - ), - ) - - def on_mount(self) -> None: - """Initialize entity graph.""" - self.load_entities() - - @work(exclusive=True) - async def load_entities(self) -> None: - """Load entities from document.""" - try: - entities_data = await self.r2r_storage.extract_entities(self.document_id) - self.entities = [] - - # Parse entities from R2R response - entities_list = entities_data.get("entities", []) - if not isinstance(entities_list, list): - entities_list = [] - for entity_data in entities_list: - entity_info: EntityInfo = { - "id": str(entity_data.get("id", "")), - "name": str(entity_data.get("name", "")), - "type": str(entity_data.get("type", "unknown")), - "confidence": float(entity_data.get("confidence", 0.0)), - "metadata": dict(entity_data.get("metadata", {})), - } - self.entities.append(entity_info) - - self.populate_entity_tree() - - except Exception as e: - details_widget = self.query_one("#entity_details", Static) - details_widget.update(f"Error loading entities: {e}") - - def populate_entity_tree(self) -> None: - """Populate the entity tree.""" - tree = self.query_one("#entity_tree", Tree) - tree.clear() - - if not self.entities: - tree.root.add_leaf("No entities found") - return - - # Group entities by type - entities_by_type: dict[str, list[EntityInfo]] = {} - for entity in self.entities: - entity_type = entity["type"] - if entity_type not in entities_by_type: - entities_by_type[entity_type] = [] - entities_by_type[entity_type].append(entity) - - # Add entities to tree grouped by type - for entity_type, type_entities in entities_by_type.items(): - type_node = tree.root.add(f"{entity_type.title()} ({len(type_entities)})") - for entity in type_entities: - confidence_pct = int(entity["confidence"] * 100) - entity_node = type_node.add_leaf(f"{entity['name']} ({confidence_pct}%)") - entity_node.data = entity - - tree.root.expand() - - def on_tree_node_selected(self, event: Tree.NodeSelected[EntityInfo]) -> None: - """Handle entity selection.""" - if hasattr(event.node, "data") and event.node.data: - entity = event.node.data - self.show_entity_details(entity) - - def show_entity_details(self, entity: EntityInfo) -> None: - """Show detailed information about an entity.""" - details_widget = self.query_one("#entity_details", Static) - - details_text = f"""**Entity:** {entity['name']} -**Type:** {entity['type']} -**Confidence:** {entity['confidence']:.2%} -**ID:** {entity['id']} - -**Metadata:** -""" - for key, value in entity["metadata"].items(): - details_text += f"- **{key}:** {value}\n" - - details_widget.update(details_text) - - -class CollectionStats(Container): - """Widget for showing R2R-specific collection statistics.""" - - DEFAULT_CSS = """ - CollectionStats { - border: solid $border; - background: $surface; - height: 100%; - padding: 1; - } - - CollectionStats .stats-header { - dock: top; - height: 3; - background: $primary; - color: $text; - padding: 1; - margin: -1 -1 1 -1; - } - - CollectionStats .stats-grid { - layout: grid; - grid-size: 2; - grid-columns: 1fr 1fr; - grid-gutter: 1; - height: auto; - } - - CollectionStats .stat-card { - background: $surface-lighten-1; - border: solid $border; - padding: 1; - height: auto; - } - - CollectionStats .stat-value { - color: $primary; - text-style: bold; - text-align: center; - } - - CollectionStats .stat-label { - color: $text-muted; - text-align: center; - margin-top: 1; - } - - CollectionStats .progress-section { - margin-top: 2; - } - """ - - def __init__(self, r2r_storage: R2RStorage, collection_name: str, **kwargs: Any) -> None: - """Initialize collection stats.""" - super().__init__(**kwargs) - self.r2r_storage = r2r_storage - self.collection_name = collection_name - - @override - def compose(self) -> ComposeResult: - """Compose stats layout.""" - yield Container( - Static(f"๐Ÿ“Š {self.collection_name} Statistics", classes="stats-header"), - Container( - Container( - Static("0", id="document_count", classes="stat-value"), - Static("Documents", classes="stat-label"), - classes="stat-card", - ), - Container( - Static("0", id="chunk_count", classes="stat-value"), - Static("Chunks", classes="stat-label"), - classes="stat-card", - ), - Container( - Static("0", id="entity_count", classes="stat-value"), - Static("Entities", classes="stat-label"), - classes="stat-card", - ), - Container( - Static("0 MB", id="storage_size", classes="stat-value"), - Static("Storage Used", classes="stat-label"), - classes="stat-card", - ), - classes="stats-grid", - ), - Container( - Label("Processing Progress"), - ProgressBar(id="processing_progress", total=100, show_eta=False), - Static("Idle", id="processing_status"), - classes="progress-section", - ), - ) - - def on_mount(self) -> None: - """Initialize stats display.""" - self.refresh_stats() - - @work(exclusive=True) - async def refresh_stats(self) -> None: - """Refresh collection statistics.""" - try: - # Get basic document count - doc_count = await self.r2r_storage.count(collection_name=self.collection_name) - self.query_one("#document_count", Static).update(str(doc_count)) - - # Estimate other stats (these would need real implementation) - estimated_chunks = doc_count * 5 # Rough estimate - estimated_entities = doc_count * 10 # Rough estimate - estimated_size_mb = doc_count * 0.05 # Rough estimate - - self.query_one("#chunk_count", Static).update(str(estimated_chunks)) - self.query_one("#entity_count", Static).update(str(estimated_entities)) - self.query_one("#storage_size", Static).update(f"{estimated_size_mb:.1f} MB") - - # Update progress (would be real-time in actual implementation) - progress_bar = self.query_one("#processing_progress", ProgressBar) - progress_bar.progress = 100 # Assume complete for now - - status_widget = self.query_one("#processing_status", Static) - status_widget.update("All documents processed") - - except Exception as e: - self.query_one("#processing_status", Static).update(f"Error: {e}") - - -class DocumentOverview(Container): - """Widget for comprehensive document overview and statistics.""" - - DEFAULT_CSS = """ - DocumentOverview { - layout: vertical; - height: 100%; - } - - DocumentOverview .overview-header { - dock: top; - height: 3; - background: $primary; - color: $text; - padding: 1; - } - - DocumentOverview .overview-content { - height: 1fr; - layout: horizontal; - } - - DocumentOverview .overview-left { - width: 50%; - padding: 1; - } - - DocumentOverview .overview-right { - width: 50%; - padding: 1; - } - - DocumentOverview .info-table { - height: auto; - margin-bottom: 2; - } - """ - - def __init__(self, r2r_storage: R2RStorage, document_id: str, **kwargs: Any) -> None: - """Initialize document overview.""" - super().__init__(**kwargs) - self.r2r_storage = r2r_storage - self.document_id = document_id - - @override - def compose(self) -> ComposeResult: - """Compose overview layout.""" - yield Container( - Static("๐Ÿ“‹ Document Overview", classes="overview-header"), - Horizontal( - Vertical( - Label("Document Information"), - DataTable[str](id="doc_info_table", classes="info-table"), - Label("Processing Statistics"), - DataTable[str](id="stats_table", classes="info-table"), - classes="overview-left", - ), - Vertical( - ChunkViewer(self.r2r_storage, self.document_id), - classes="overview-right", - ), - classes="overview-content", - ), - ) - - def on_mount(self) -> None: - """Initialize overview.""" - self.load_overview() - - @work(exclusive=True) - async def load_overview(self) -> None: - """Load comprehensive document overview.""" - try: - overview_data = await self.r2r_storage.get_document_overview(self.document_id) - - # Populate document info table - doc_table = self.query_one("#doc_info_table", DataTable) - doc_table.add_columns("Property", "Value") - - document_info_raw = overview_data.get("document", {}) - document_info = document_info_raw if isinstance(document_info_raw, dict) else {} - doc_table.add_row("ID", str(document_info.get("id", "N/A"))) - doc_table.add_row("Title", str(document_info.get("title", "N/A"))) - doc_table.add_row("Created", str(document_info.get("created_at", "N/A"))) - doc_table.add_row("Modified", str(document_info.get("updated_at", "N/A"))) - - # Populate stats table - stats_table = self.query_one("#stats_table", DataTable) - stats_table.add_columns("Metric", "Count") - - chunk_count = overview_data.get("chunk_count", 0) - stats_table.add_row("Chunks", str(chunk_count)) - stats_table.add_row("Characters", str(len(str(document_info.get("content", ""))))) - - except Exception as e: - # Handle error by showing minimal info - doc_table = self.query_one("#doc_info_table", DataTable) - doc_table.add_columns("Property", "Value") - doc_table.add_row("Error", str(e)) - - - -"""Responsive layout system for TUI applications.""" - -from __future__ import annotations - -from typing import Any - -from textual.app import ComposeResult -from textual.containers import Container, VerticalScroll -from textual.widgets import Static -from typing_extensions import override - - -class ResponsiveGrid(Container): - """Grid that auto-adjusts based on terminal size.""" - - DEFAULT_CSS = """ - ResponsiveGrid { - layout: grid; - grid-size: 1; - grid-columns: 1fr; - grid-rows: auto; - grid-gutter: 1; - padding: 1; - } - - ResponsiveGrid.two-column { - grid-size: 2; - grid-columns: 1fr 1fr; - } - - ResponsiveGrid.three-column { - grid-size: 3; - grid-columns: 1fr 1fr 1fr; - } - - ResponsiveGrid.auto-fit { - grid-columns: repeat(auto-fit, minmax(20, 1fr)); - } - - ResponsiveGrid.compact { - grid-gutter: 0; - padding: 0; - } - """ - - def __init__( - self, - *children: Any, - columns: int = 1, - auto_fit: bool = False, - compact: bool = False, - **kwargs: Any, - ) -> None: - """Initialize responsive grid.""" - super().__init__(*children, **kwargs) - self.columns = columns - self.auto_fit = auto_fit - self.compact = compact - - def on_mount(self) -> None: - """Apply responsive classes based on configuration.""" - if self.auto_fit: - _ = self.add_class("auto-fit") - elif self.columns == 2: - _ = self.add_class("two-column") - elif self.columns == 3: - _ = self.add_class("three-column") - - if self.compact: - _ = self.add_class("compact") - - def on_resize(self) -> None: - """Adjust layout based on terminal size.""" - if self.auto_fit: - # Let CSS handle auto-fit - return - - terminal_width = self.size.width - if terminal_width < 60: - # Force single column on narrow terminals - _ = self.remove_class("two-column", "three-column") - self.styles.grid_size_columns = 1 - self.styles.grid_columns = "1fr" - elif terminal_width < 100 and self.columns > 2: - # Force two columns on medium terminals - _ = self.remove_class("three-column") - _ = self.add_class("two-column") - self.styles.grid_size_columns = 2 - self.styles.grid_columns = "1fr 1fr" - elif self.columns == 2: - _ = self.add_class("two-column") - elif self.columns == 3: - _ = self.add_class("three-column") - - -class CollapsibleSidebar(Container): - """Sidebar that can be collapsed to save space.""" - - DEFAULT_CSS = """ - CollapsibleSidebar { - dock: left; - width: 25%; - min-width: 20; - max-width: 40; - background: $surface; - border-right: solid $border; - padding: 1; - transition: width 300ms; - } - - CollapsibleSidebar.collapsed { - width: 3; - min-width: 3; - overflow: hidden; - } - - CollapsibleSidebar.collapsed > * { - display: none; - } - - CollapsibleSidebar .sidebar-toggle { - dock: top; - height: 1; - background: $primary; - color: $text; - text-align: center; - margin-bottom: 1; - } - - CollapsibleSidebar .sidebar-content { - height: 1fr; - overflow-y: auto; - } - """ - - def __init__(self, *children: Any, collapsed: bool = False, **kwargs: Any) -> None: - """Initialize collapsible sidebar.""" - super().__init__(**kwargs) - self.collapsed = collapsed - self._children = children - - @override - def compose(self) -> ComposeResult: - """Compose sidebar with toggle and content.""" - yield Static("โ˜ฐ", classes="sidebar-toggle") - with VerticalScroll(classes="sidebar-content"): - yield from self._children - - def on_mount(self) -> None: - """Apply initial collapsed state.""" - if self.collapsed: - _ = self.add_class("collapsed") - - def on_click(self) -> None: - """Toggle sidebar when clicked.""" - self.toggle() - - def toggle(self) -> None: - """Toggle sidebar collapsed state.""" - self.collapsed = not self.collapsed - if self.collapsed: - _ = self.add_class("collapsed") - else: - _ = self.remove_class("collapsed") - - def expand_sidebar(self) -> None: - """Expand sidebar.""" - if self.collapsed: - self.toggle() - - def collapse_sidebar(self) -> None: - """Collapse sidebar.""" - if not self.collapsed: - self.toggle() - - -class TabularLayout(Container): - """Optimized layout for data tables with optional sidebar.""" - - DEFAULT_CSS = """ - TabularLayout { - layout: horizontal; - height: 100%; - } - - TabularLayout .main-content { - width: 1fr; - height: 100%; - layout: vertical; - } - - TabularLayout .table-container { - height: 1fr; - overflow: auto; - border: solid $border; - background: $surface; - } - - TabularLayout .table-header { - dock: top; - height: 3; - background: $primary; - color: $text; - padding: 1; - } - - TabularLayout .table-footer { - dock: bottom; - height: 3; - background: $surface-lighten-1; - padding: 1; - border-top: solid $border; - } - """ - - def __init__( - self, - table_widget: Any, - header_content: Any | None = None, - footer_content: Any | None = None, - sidebar_content: Any | None = None, - **kwargs: Any, - ) -> None: - """Initialize tabular layout.""" - super().__init__(**kwargs) - self.table_widget = table_widget - self.header_content = header_content - self.footer_content = footer_content - self.sidebar_content = sidebar_content - - @override - def compose(self) -> ComposeResult: - """Compose layout with optional sidebar.""" - if self.sidebar_content: - yield CollapsibleSidebar(self.sidebar_content) - - with Container(classes="main-content"): - if self.header_content: - yield Container(self.header_content, classes="table-header") - - yield Container(self.table_widget, classes="table-container") - - if self.footer_content: - yield Container(self.footer_content, classes="table-footer") - - -class CardLayout(ResponsiveGrid): - """Grid layout optimized for card-based content.""" - - DEFAULT_CSS = """ - CardLayout { - grid-gutter: 2; - padding: 2; - } - - CardLayout .card { - background: $surface; - border: solid $border; - border-radius: 1; - padding: 2; - height: auto; - min-height: 10; - } - - CardLayout .card:hover { - border: solid $accent; - background: $surface-lighten-1; - } - - CardLayout .card:focus { - border: solid $primary; - } - - CardLayout .card-header { - dock: top; - height: 3; - background: $primary-lighten-1; - color: $text; - padding: 1; - margin: -2 -2 1 -2; - border-radius: 1 1 0 0; - } - - CardLayout .card-content { - height: 1fr; - overflow: auto; - } - - CardLayout .card-footer { - dock: bottom; - height: 3; - background: $surface-darken-1; - padding: 1; - margin: 1 -2 -2 -2; - border-radius: 0 0 1 1; - } - """ - - def __init__(self, **kwargs: Any) -> None: - """Initialize card layout with default settings for cards.""" - # Default to auto-fit cards with minimum width - super().__init__(auto_fit=True, **kwargs) - - -class SplitPane(Container): - """Resizable split pane layout.""" - - DEFAULT_CSS = """ - SplitPane { - layout: horizontal; - height: 100%; - } - - SplitPane.vertical { - layout: vertical; - } - - SplitPane .left-pane, - SplitPane .top-pane { - width: 50%; - height: 50%; - background: $surface; - border-right: solid $border; - border-bottom: solid $border; - } - - SplitPane .right-pane, - SplitPane .bottom-pane { - width: 50%; - height: 50%; - background: $surface; - } - - SplitPane .splitter { - width: 1; - height: 1; - background: $border; - } - - SplitPane.vertical .splitter { - width: 100%; - height: 1; - } - """ - - def __init__( - self, - left_content: Any, - right_content: Any, - vertical: bool = False, - split_ratio: float = 0.5, - **kwargs: Any, - ) -> None: - """Initialize split pane.""" - super().__init__(**kwargs) - self.left_content = left_content - self.right_content = right_content - self.vertical = vertical - self.split_ratio = split_ratio - - @override - def compose(self) -> ComposeResult: - """Compose split pane layout.""" - if self.vertical: - _ = self.add_class("vertical") - - pane_classes = ("top-pane", "bottom-pane") if self.vertical else ("left-pane", "right-pane") - - yield Container(self.left_content, classes=pane_classes[0]) - yield Static("", classes="splitter") - yield Container(self.right_content, classes=pane_classes[1]) - - def on_mount(self) -> None: - """Apply split ratio.""" - if self.vertical: - self.query_one(f".{self.__class__.__name__} .top-pane").styles.height = f"{self.split_ratio * 100}%" - self.query_one(f".{self.__class__.__name__} .bottom-pane").styles.height = f"{(1 - self.split_ratio) * 100}%" - else: - self.query_one(f".{self.__class__.__name__} .left-pane").styles.width = f"{self.split_ratio * 100}%" - self.query_one(f".{self.__class__.__name__} .right-pane").styles.width = f"{(1 - self.split_ratio) * 100}%" - - """Data models and TypedDict definitions for the TUI.""" @@ -3720,847 +2144,6 @@ class R2RCollections: return results - -"""R2R storage implementation using the official R2R SDK.""" - -from __future__ import annotations - -import asyncio -import contextlib -from collections.abc import AsyncGenerator, Iterable, Mapping, Sequence -from datetime import UTC, datetime -from typing import Self, TypeVar, cast -from uuid import UUID, uuid4 - -from r2r import R2RAsyncClient -from typing_extensions import override - -# Direct imports for runtime and type checking -# Note: Some type checkers (basedpyright/Pyrefly) may report import issues -# but these work correctly at runtime and with mypy -from httpx import AsyncClient, HTTPStatusError -from r2r import R2RException - -from ...core.exceptions import StorageError -from ...core.models import Document, DocumentMetadata, IngestionSource, StorageConfig -from ..base import BaseStorage - -T = TypeVar("T") - - -def _as_mapping(value: object) -> dict[str, object]: - if isinstance(value, Mapping): - return dict(cast(Mapping[str, object], value)) - if hasattr(value, "__dict__"): - return dict(cast(Mapping[str, object], value.__dict__)) - return {} - - -def _as_sequence(value: object) -> tuple[object, ...]: - """Convert value to a tuple of objects.""" - if isinstance(value, Sequence): - return tuple(value) - return tuple(value) if isinstance(value, Iterable) else () - - -def _extract_id(source: object, fallback: str) -> str: - mapping = _as_mapping(source) - identifier = mapping.get("id") if mapping else None - if identifier is None and hasattr(source, "id"): - identifier = getattr(source, "id", None) - return fallback if identifier is None else str(identifier) - - -def _as_datetime(value: object) -> datetime: - if isinstance(value, datetime): - return value - if isinstance(value, str): - with contextlib.suppress(ValueError): - return datetime.fromisoformat(value) - return datetime.now(UTC) - - -def _as_int(value: object, default: int = 0) -> int: - if isinstance(value, bool): - return int(value) - if isinstance(value, int): - return value - if isinstance(value, float): - return int(value) - if isinstance(value, str): - try: - return int(float(value)) if "." in value else int(value) - except ValueError: - return default - return default - - -class R2RStorage(BaseStorage): - """R2R storage implementation using the official R2R SDK.""" - - def __init__(self, config: StorageConfig) -> None: - """Initialize R2R storage with SDK client.""" - super().__init__(config) - self.endpoint: str = str(config.endpoint).rstrip("/") - self.client: R2RAsyncClient = R2RAsyncClient(self.endpoint) - self.default_collection_id: str | None = None - - @override - async def initialize(self) -> None: - """Initialize R2R connection and ensure default collection exists.""" - try: - # Ensure we have an event loop - try: - _ = asyncio.get_running_loop() - except RuntimeError: - # No event loop running, this should not happen in async context - # but let's be defensive - import logging - - logging.warning("No event loop found during R2R initialization") - - # Test connection using direct HTTP call to v3 API - endpoint = self.endpoint - client = AsyncClient() - try: - response = await client.get(f"{endpoint}/v3/collections") - response.raise_for_status() - finally: - await client.aclose() - _ = await self._ensure_collection(self.config.collection_name) - except Exception as e: - raise StorageError(f"Failed to initialize R2R: {e}") from e - - async def _ensure_collection(self, collection_name: str) -> str: - """Get or create collection by name.""" - try: - endpoint = self.endpoint - client = AsyncClient() - try: - # List collections and find by name - response = await client.get(f"{endpoint}/v3/collections") - response.raise_for_status() - data: dict[str, object] = response.json() - - results = cast(list[dict[str, object]], data.get("results", [])) - for collection in results: - if collection.get("name") == collection_name: - collection_id = str(collection.get("id")) - if collection_name == self.config.collection_name: - self.default_collection_id = collection_id - return collection_id - - # Create if not found - create_response = await client.post( - f"{endpoint}/v3/collections", - json={ - "name": collection_name, - "description": f"Auto-created collection: {collection_name}", - }, - ) - create_response.raise_for_status() - created: dict[str, object] = create_response.json() - created_results = cast(dict[str, object], created.get("results", {})) - collection_id = str(created_results.get("id")) - - if collection_name == self.config.collection_name: - self.default_collection_id = collection_id - - return collection_id - finally: - await client.aclose() - - except Exception as e: - raise StorageError(f"Failed to ensure collection '{collection_name}': {e}") from e - - @override - async def store(self, document: Document, *, collection_name: str | None = None) -> str: - """Store a single document.""" - return (await self.store_batch([document], collection_name=collection_name))[0] - - @override - async def store_batch( - self, documents: list[Document], *, collection_name: str | None = None - ) -> list[str]: - """Store multiple documents.""" - # Fix: Always ensure we have the correct collection ID - if collection_name: - # If a specific collection is requested, get its ID - collection_id = await self._ensure_collection(collection_name) - else: - # If no collection specified, use the default one from config - if self.default_collection_id: - collection_id = self.default_collection_id - else: - # Fallback: ensure the default collection exists - collection_id = await self._ensure_collection(self.config.collection_name) - self.default_collection_id = collection_id - - print( - f"Using collection ID: {collection_id} for collection: {collection_name or self.config.collection_name}" - ) - - stored_ids: list[str] = [] - failed_documents: list[Document] = [] - for document in documents: - try: - # Create document with explicit ID using direct HTTP call - requested_id = str(document.id) - print(f"Creating document with ID: {requested_id}") - - # Validate document before sending to R2R - if not document.content or not document.content.strip(): - print(f"Skipping document {requested_id}: empty content") - failed_documents.append(document) - continue - - if len(document.content) > 1_000_000: # 1MB limit - print( - f"Skipping document {requested_id}: content too large ({len(document.content)} chars)" - ) - failed_documents.append(document) - continue - - # Use direct HTTP call with proper multipart form-data format - import asyncio - import json - - max_retries = 3 - retry_delay = 1.0 - doc_response = None # Initialize variable to avoid UnboundLocalError - - for attempt in range(max_retries): - try: - async with AsyncClient() as http_client: - # Use files parameter but with string values for multipart/form-data - # This matches the cURL -F behavior more closely - metadata = self._build_metadata(document) - print(f"Built metadata for document {requested_id}: {metadata}") - - files = { - "raw_text": (None, document.content), - "metadata": (None, json.dumps(metadata)), - "id": (None, requested_id), - "ingestion_mode": (None, "hi-res"), # Enable R2R enrichment - } - - # Add collection_ids if we have a collection to assign to - if collection_id: - files["collection_ids"] = (None, json.dumps([collection_id])) - print( - f"Creating document {requested_id} with collection_ids: [{collection_id}]" - ) - - print(f"Sending to R2R - files keys: {list(files.keys())}") - print(f"Metadata JSON: {files['metadata'][1]}") - - response = await http_client.post( - f"{self.endpoint}/v3/documents", - files=files, - ) - - if response.status_code == 422: - # Get detailed error information for 422 responses - try: - error_detail = response.json() - print( - f"R2R validation error for document {requested_id}: {error_detail}" - ) - print(f"Document content length: {len(document.content)}") - print(f"Document metadata sent: {metadata}") - print(f"Response status: {response.status_code}") - print(f"Response headers: {dict(response.headers)}") - except Exception: - print( - f"R2R validation error for document {requested_id}: {response.text}" - ) - print(f"Document metadata sent: {metadata}") - # Don't retry validation errors - break - - if response.status_code >= 500: - # Server error - retry - if attempt < max_retries - 1: - print( - f"Server error {response.status_code} for document {requested_id}, retrying in {retry_delay}s..." - ) - await asyncio.sleep(retry_delay) - retry_delay *= 2 # Exponential backoff - continue - - response.raise_for_status() - doc_response = response.json() - break # Success - exit retry loop - - except (OSError, asyncio.TimeoutError): - if attempt < max_retries - 1: - print( - f"Timeout for document {requested_id}, retrying in {retry_delay}s..." - ) - await asyncio.sleep(retry_delay) - retry_delay *= 2 - continue - else: - raise - except HTTPStatusError as e: - if e.response.status_code >= 500 and attempt < max_retries - 1: - print( - f"Server error {e.response.status_code} for document {requested_id}, retrying in {retry_delay}s..." - ) - await asyncio.sleep(retry_delay) - retry_delay *= 2 - continue - else: - raise - - # Only process response if we have a successful doc_response - if doc_response is not None: - response_payload = doc_response.get("results", doc_response) - doc_id = _extract_id(response_payload, requested_id) - - print(f"R2R returned document ID: {doc_id}") - - # Verify the ID matches what we requested - if doc_id != requested_id: - print(f"Warning: Requested ID {requested_id} but got {doc_id}") - - # Collection assignment is now handled during document creation - # No need to add to collection afterward if collection_ids was provided - if collection_id: - print( - f"Document {doc_id} should be assigned to collection {collection_id} via creation API" - ) - - stored_ids.append(doc_id) - else: - print(f"No successful response received for document {requested_id}") - failed_documents.append(document) - - except Exception as exc: - print(f"Failed to store document {document.id}: {exc}") - failed_documents.append(document) - - # Log specific error types for debugging - if "422" in str(exc): - print(" โ†’ Data validation issue - check document content and metadata format") - elif "timeout" in str(exc).lower(): - print(" โ†’ Network timeout - R2R may be overloaded") - elif "500" in str(exc): - print(" โ†’ Server error - R2R internal issue") - else: - import traceback - - traceback.print_exc() - continue - - return stored_ids - - def _build_metadata(self, document: Document) -> dict[str, object]: - """Convert document metadata to enriched R2R format.""" - metadata = document.metadata - - - # Core required fields - result: dict[str, object] = { - "source_url": metadata["source_url"], - "content_type": metadata["content_type"], - "word_count": metadata["word_count"], - "char_count": metadata["char_count"], - "timestamp": metadata["timestamp"].isoformat(), - "ingestion_source": document.source.value, - } - - # Basic optional fields - if title := metadata.get("title"): - result["title"] = title - if description := metadata.get("description"): - result["description"] = description - - # Content categorization - if tags := metadata.get("tags"): - result["tags"] = tags - if category := metadata.get("category"): - result["category"] = category - if section := metadata.get("section"): - result["section"] = section - if language := metadata.get("language"): - result["language"] = language - - # Authorship and source info - if author := metadata.get("author"): - result["author"] = author - if domain := metadata.get("domain"): - result["domain"] = domain - if site_name := metadata.get("site_name"): - result["site_name"] = site_name - - # Document structure - if heading_hierarchy := metadata.get("heading_hierarchy"): - result["heading_hierarchy"] = heading_hierarchy - if section_depth := metadata.get("section_depth"): - result["section_depth"] = section_depth - if has_code_blocks := metadata.get("has_code_blocks"): - result["has_code_blocks"] = has_code_blocks - if has_images := metadata.get("has_images"): - result["has_images"] = has_images - if has_links := metadata.get("has_links"): - result["has_links"] = has_links - - # Processing metadata - if extraction_method := metadata.get("extraction_method"): - result["extraction_method"] = extraction_method - if crawl_depth := metadata.get("crawl_depth"): - result["crawl_depth"] = crawl_depth - if last_modified := metadata.get("last_modified"): - result["last_modified"] = last_modified.isoformat() if last_modified else None - - # Content quality indicators - if readability_score := metadata.get("readability_score"): - result["readability_score"] = readability_score - if completeness_score := metadata.get("completeness_score"): - result["completeness_score"] = completeness_score - - # Repository-specific fields - if file_path := metadata.get("file_path"): - result["file_path"] = file_path - if repository_name := metadata.get("repository_name"): - result["repository_name"] = repository_name - if branch_name := metadata.get("branch_name"): - result["branch_name"] = branch_name - if commit_hash := metadata.get("commit_hash"): - result["commit_hash"] = commit_hash - if programming_language := metadata.get("programming_language"): - result["programming_language"] = programming_language - - # Custom business metadata - if importance_score := metadata.get("importance_score"): - result["importance_score"] = importance_score - if review_status := metadata.get("review_status"): - result["review_status"] = review_status - if assigned_team := metadata.get("assigned_team"): - result["assigned_team"] = assigned_team - - return result - - @override - async def retrieve( - self, document_id: str, *, collection_name: str | None = None - ) -> Document | None: - """Retrieve a document by ID.""" - try: - response = await self.client.documents.retrieve(document_id) - except R2RException as exc: - status_code = getattr(exc, "status_code", None) - if status_code == 404: - return None - import logging - - logging.warning(f"Unexpected error retrieving document {document_id}: {exc}") - return None - except Exception as error: - import logging - - logging.warning(f"Unexpected error retrieving document {document_id}: {error}") - return None - payload = getattr(response, "results", response) - return self._convert_to_document(payload, collection_name) - - def _convert_to_document(self, r2r_doc: object, collection_name: str | None = None) -> Document: - """Convert R2R document payload to our Document model.""" - doc_map = _as_mapping(r2r_doc) - metadata_map = _as_mapping(doc_map.get("metadata", {})) - - - doc_id_str = _extract_id(r2r_doc, str(uuid4())) - try: - doc_uuid = UUID(doc_id_str) - except ValueError: - doc_uuid = uuid4() - - timestamp = _as_datetime(doc_map.get("created_at", metadata_map.get("timestamp"))) - - metadata: DocumentMetadata = { - # Core required fields - "source_url": str(metadata_map.get("source_url", "")), - "timestamp": timestamp, - "content_type": str(metadata_map.get("content_type", "text/plain")), - "word_count": _as_int(metadata_map.get("word_count")), - "char_count": _as_int(metadata_map.get("char_count")), - } - - # Add optional fields if present - # Check for title in both top-level and metadata (R2R schema has title as top-level field) - if title := (doc_map.get("title") or metadata_map.get("title")): - metadata["title"] = cast(str | None, title) - # Check for summary in top-level R2R field (R2R schema has summary as top-level field) - if summary := (doc_map.get("summary") or metadata_map.get("summary")): - metadata["description"] = cast(str | None, summary) - elif description := metadata_map.get("description"): - metadata["description"] = cast(str | None, description) - if tags := metadata_map.get("tags"): - metadata["tags"] = [str(tag) for tag in tags] if isinstance(tags, list) else [] - if category := metadata_map.get("category"): - metadata["category"] = str(category) - if section := metadata_map.get("section"): - metadata["section"] = str(section) - if language := metadata_map.get("language"): - metadata["language"] = str(language) - if author := metadata_map.get("author"): - metadata["author"] = str(author) - if domain := metadata_map.get("domain"): - metadata["domain"] = str(domain) - if site_name := metadata_map.get("site_name"): - metadata["site_name"] = str(site_name) - if heading_hierarchy := metadata_map.get("heading_hierarchy"): - metadata["heading_hierarchy"] = ( - list(heading_hierarchy) if isinstance(heading_hierarchy, list) else [] - ) - if section_depth := metadata_map.get("section_depth"): - metadata["section_depth"] = _as_int(section_depth) - if has_code_blocks := metadata_map.get("has_code_blocks"): - metadata["has_code_blocks"] = bool(has_code_blocks) - if has_images := metadata_map.get("has_images"): - metadata["has_images"] = bool(has_images) - if has_links := metadata_map.get("has_links"): - metadata["has_links"] = bool(has_links) - if extraction_method := metadata_map.get("extraction_method"): - metadata["extraction_method"] = str(extraction_method) - if crawl_depth := metadata_map.get("crawl_depth"): - metadata["crawl_depth"] = _as_int(crawl_depth) - if last_modified := metadata_map.get("last_modified"): - metadata["last_modified"] = _as_datetime(last_modified) - if readability_score := metadata_map.get("readability_score"): - try: - metadata["readability_score"] = float(str(readability_score)) - except (ValueError, TypeError): - metadata["readability_score"] = None - if completeness_score := metadata_map.get("completeness_score"): - try: - metadata["completeness_score"] = float(str(completeness_score)) - except (ValueError, TypeError): - metadata["completeness_score"] = None - - source_value = str(metadata_map.get("ingestion_source", IngestionSource.WEB.value)) - try: - source_enum = IngestionSource(source_value) - except ValueError: - source_enum = IngestionSource.WEB - - content_value = doc_map.get("content", getattr(r2r_doc, "content", "")) - - return Document( - id=doc_uuid, - content=str(content_value), - metadata=metadata, - source=source_enum, - collection=collection_name or self.config.collection_name, - ) - - @override - async def search( - self, - query: str, - limit: int = 10, - threshold: float = 0.7, - *, - collection_name: str | None = None, - ) -> AsyncGenerator[Document, None]: - """Search documents using R2R.""" - try: - search_settings: dict[str, object] = { - "limit": limit, - "similarity_threshold": threshold, - } - - if collection_name: - collection_id = await self._ensure_collection(collection_name) - search_settings["collection_ids"] = [collection_id] - - search_response = await self.client.retrieval.search( - query=query, - search_settings=search_settings, - ) - - for result in _as_sequence(getattr(search_response, "results", ())): - result_map = _as_mapping(result) - document_id_value = result_map.get( - "document_id", getattr(result, "document_id", None) - ) - if document_id_value is None: - continue - document_id = str(document_id_value) - - try: - doc_response = await self.client.documents.retrieve(document_id) - except R2RException as exc: - import logging - - logging.warning( - f"Failed to retrieve document {document_id} during search: {exc}" - ) - continue - - document_payload = getattr(doc_response, "results", doc_response) - document = self._convert_to_document(document_payload, collection_name) - - score_value = result_map.get("score", getattr(result, "score", None)) - if score_value is not None: - try: - # Handle various score value types safely - if isinstance(score_value, (int, float, str)): - document.score = float(score_value) - else: - # For unknown types, try string conversion first - document.score = float(str(score_value)) - except (TypeError, ValueError) as e: - import logging - - logging.debug( - f"Invalid score value {score_value} for document {document_id}: {e}" - ) - document.score = None - - yield document - - except R2RException as exc: - raise StorageError(f"Search failed: {exc}") from exc - - @override - async def delete(self, document_id: str, *, collection_name: str | None = None) -> bool: - """Delete a document.""" - try: - _ = await self.client.documents.delete(document_id) - return True - except R2RException: - return False - - @override - async def count(self, *, collection_name: str | None = None) -> int: - """Get document count in collection.""" - try: - endpoint = self.endpoint - client = AsyncClient() - try: - # Get collections and find the count for the specific collection - response = await client.get(f"{endpoint}/v3/collections") - response.raise_for_status() - data: dict[str, object] = response.json() - - target_collection = collection_name or self.config.collection_name - results = cast(list[dict[str, object]], data.get("results", [])) - for collection in results: - if collection.get("name") == target_collection: - doc_count = collection.get("document_count", 0) - return _as_int(doc_count) - - return 0 - finally: - await client.aclose() - except Exception: - return 0 - - @override - async def close(self) -> None: - """Close R2R client.""" - try: - await self.client.close() - except Exception as e: - import logging - - logging.warning(f"Error closing R2R client: {e}") - - async def __aenter__(self) -> Self: - """Async context manager entry.""" - return self - - async def __aexit__( - self, - exc_type: type[BaseException] | None, - exc_val: BaseException | None, - exc_tb: object | None, - ) -> None: - """Async context manager exit with proper cleanup.""" - await self.close() - - # Additional R2R-specific comprehensive management methods - - async def create_collection(self, name: str, description: str | None = None) -> str: - """Create a new collection.""" - try: - response = await self.client.collections.create(name=name, description=description) - created = _as_mapping(getattr(response, "results", {})) - return str(created.get("id", name)) - except R2RException as exc: - raise StorageError(f"Failed to create collection {name}: {exc}") from exc - - async def delete_collection(self, collection_name: str) -> bool: - """Delete a collection.""" - try: - collection_id = await self._ensure_collection(collection_name) - _ = await self.client.collections.delete(collection_id) - return True - except R2RException: - return False - - @override - async def list_collections(self) -> list[str]: - """List all available collections.""" - try: - endpoint = self.endpoint - client = AsyncClient() - try: - response = await client.get(f"{endpoint}/v3/collections") - response.raise_for_status() - data: dict[str, object] = response.json() - - collection_names: list[str] = [] - results = cast(list[dict[str, object]], data.get("results", [])) - for entry in results: - if name := entry.get("name"): - collection_names.append(str(name)) - return collection_names - finally: - await client.aclose() - except Exception as e: - raise StorageError(f"Failed to list collections: {e}") from e - - async def list_collections_detailed(self) -> list[dict[str, object]]: - """List all available collections with detailed information.""" - try: - response = await self.client.collections.list() - collections: list[dict[str, object]] = [] - for entry in _as_sequence(getattr(response, "results", ())): - entry_map = _as_mapping(entry) - collections.append( - { - "id": str(entry_map.get("id", "")), - "name": str(entry_map.get("name", "")), - "description": entry_map.get("description"), - } - ) - return collections - except R2RException as exc: - raise StorageError(f"Failed to list collections: {exc}") from exc - - async def get_document_chunks(self, document_id: str) -> list[dict[str, object]]: - """Get all chunks for a specific document.""" - try: - response = await self.client.chunks.list(filters={"document_id": document_id}) - return [ - dict(_as_mapping(chunk)) for chunk in _as_sequence(getattr(response, "results", ())) - ] - except R2RException as exc: - raise StorageError(f"Failed to get chunks for document {document_id}: {exc}") from exc - - async def extract_entities(self, document_id: str) -> dict[str, object]: - """Extract entities and relationships from a document.""" - try: - response = await self.client.documents.extract(id=document_id) - return dict(_as_mapping(getattr(response, "results", {}))) - except R2RException as exc: - raise StorageError( - f"Failed to extract entities from document {document_id}: {exc}" - ) from exc - - async def get_document_overview(self, document_id: str) -> dict[str, object]: - """Get comprehensive document overview and statistics.""" - try: - doc_response = await self.client.documents.retrieve(document_id) - chunks_response = await self.client.chunks.list(filters={"document_id": document_id}) - document_payload = dict(_as_mapping(getattr(doc_response, "results", {}))) - chunk_payload = [ - dict(_as_mapping(chunk)) - for chunk in _as_sequence(getattr(chunks_response, "results", ())) - ] - return { - "document": document_payload, - "chunk_count": len(chunk_payload), - "chunks": chunk_payload, - } - except R2RException as exc: - raise StorageError(f"Failed to get overview for document {document_id}: {exc}") from exc - - @override - async def list_documents( - self, - limit: int = 100, - offset: int = 0, - *, - collection_name: str | None = None, - ) -> list[dict[str, object]]: - """ - List documents in R2R with pagination. - - Args: - limit: Maximum number of documents to return - offset: Number of documents to skip - collection_name: Collection name (optional) - - Returns: - List of document dictionaries with metadata - """ - try: - documents: list[dict[str, object]] = [] - - if collection_name: - # Get collection ID first - collection_id = await self._ensure_collection(collection_name) - # Use the collections API to list documents in a specific collection - endpoint = self.endpoint - client = AsyncClient() - try: - params = {"offset": offset, "limit": limit} - response = await client.get( - f"{endpoint}/v3/collections/{collection_id}/documents", params=params - ) - response.raise_for_status() - data: dict[str, object] = response.json() - finally: - await client.aclose() - - doc_sequence = _as_sequence(data.get("results", [])) - else: - # List all documents - r2r_response = await self.client.documents.list(offset=offset, limit=limit) - documents_data: list[object] | dict[str, object] = getattr( - r2r_response, "results", [] - ) - - doc_sequence = _as_sequence( - documents_data.get("results", []) - if isinstance(documents_data, dict) - else documents_data - ) - - for doc_data in doc_sequence: - doc_map = _as_mapping(doc_data) - - # Extract standard document fields - doc_id = str(doc_map.get("id", "")) - title = str(doc_map.get("title", "Untitled")) - metadata = _as_mapping(doc_map.get("metadata", {})) - - documents.append( - { - "id": doc_id, - "title": title, - "source_url": str(metadata.get("source_url", "")), - "description": str(metadata.get("description", "")), - "content_type": str(metadata.get("content_type", "text/plain")), - "content_preview": str(doc_map.get("content", ""))[:200] + "..." - if doc_map.get("content") - else "", - "word_count": _as_int(metadata.get("word_count", 0)), - "timestamp": str(doc_map.get("created_at", "")), - } - ) - - return documents - - except Exception as e: - raise StorageError(f"Failed to list documents: {e}") from e - - """Storage adapters for different backends.""" @@ -4588,1017 +2171,6 @@ __all__ = [ ] - -"""Main dashboard screen with collections overview.""" - -import logging -from datetime import datetime -from typing import TYPE_CHECKING, Final - -from textual import work -from textual.app import ComposeResult -from textual.binding import Binding -from textual.containers import Container, Grid, Horizontal -from textual.css.query import NoMatches -from textual.reactive import reactive, var -from textual.screen import Screen -from textual.widgets import ( - Button, - Footer, - Header, - LoadingIndicator, - Rule, - Static, - TabbedContent, - TabPane, -) -from typing_extensions import override - -from ....core.models import StorageBackend -from ....storage.base import BaseStorage -from ....storage.openwebui import OpenWebUIStorage -from ....storage.weaviate import WeaviateStorage -from ..models import CollectionInfo -from ..utils.storage_manager import StorageManager -from ..widgets import EnhancedDataTable, MetricsCard, StatusIndicator - -if TYPE_CHECKING: - from ....storage.r2r.storage import R2RStorage -else: # pragma: no cover - optional dependency fallback - R2RStorage = BaseStorage - - -LOGGER: Final[logging.Logger] = logging.getLogger(__name__) - - -class CollectionOverviewScreen(Screen[None]): - """Enhanced dashboard with modern design and metrics.""" - - total_documents: int = 0 - total_collections: int = 0 - active_backends: int = 0 - - BINDINGS = [ - Binding("q", "quit", "Quit"), - Binding("r", "refresh", "Refresh"), - Binding("i", "ingest", "Ingest"), - Binding("m", "manage", "Manage"), - Binding("s", "search", "Search"), - Binding("ctrl+d", "delete", "Delete"), - Binding("ctrl+1", "tab_dashboard", "Dashboard"), - Binding("ctrl+2", "tab_collections", "Collections"), - Binding("ctrl+3", "tab_analytics", "Analytics"), - Binding("tab", "next_tab", "Next Tab"), - Binding("shift+tab", "prev_tab", "Prev Tab"), - Binding("f1", "help", "Help"), - ] - - collections: var[list[CollectionInfo]] = var([]) - is_loading: var[bool] = var(False) - selected_collection: reactive[CollectionInfo | None] = reactive(None) - storage_manager: StorageManager - weaviate: WeaviateStorage | None - openwebui: OpenWebUIStorage | None - r2r: R2RStorage | BaseStorage | None - - def __init__( - self, - storage_manager: StorageManager, - weaviate: WeaviateStorage | None, - openwebui: OpenWebUIStorage | None, - r2r: R2RStorage | BaseStorage | None, - ) -> None: - super().__init__() - self.storage_manager = storage_manager - self.weaviate = weaviate - self.openwebui = openwebui - self.r2r = r2r - self.total_documents = 0 - self.total_collections = 0 - self.active_backends = 0 - - @override - def compose(self) -> ComposeResult: - yield Header(show_clock=True) - - with TabbedContent(): - # Dashboard Tab - with TabPane("Dashboard", id="dashboard"): - yield Container( - Static("๐Ÿš€ Collection Management System", classes="title"), - Static("Modern document ingestion and management platform", classes="subtitle"), - Rule(line_style="heavy"), - # Metrics Grid - Container( - Grid( - MetricsCard( - "Collections", str(self.total_collections), "Active collections" - ), - MetricsCard("Documents", str(self.total_documents), "Total indexed"), - MetricsCard( - "Backends", str(self.active_backends), "Connected services" - ), - MetricsCard("Status", "Online", "System health"), - classes="responsive-grid metrics-grid", - ), - classes="center", - ), - Rule(line_style="dashed"), - # Quick Actions - Container( - Static("โšก Quick Actions", classes="section-title"), - Horizontal( - Button("๐Ÿ”„ Refresh Data", id="quick_refresh", variant="primary"), - Button("๐Ÿ“ฅ New Ingestion", id="quick_ingest", variant="success"), - Button("๐Ÿ” Search All", id="quick_search", variant="default"), - Button("โš™๏ธ Settings", id="quick_settings", variant="default"), - classes="action_buttons", - ), - classes="card", - ), - # Recent Activity - Container( - Static("๐Ÿ“Š Recent Activity", classes="section-title"), - Static( - "Loading recent activity...", id="activity_feed", classes="status-text" - ), - classes="card", - ), - classes="main_container", - ) - - # Collections Tab - with TabPane("Collections", id="collections"): - yield Container( - Static("๐Ÿ“š Collection Overview", classes="title"), - # Collection controls - Horizontal( - Button("๐Ÿ”„ Refresh", id="refresh_btn", variant="primary"), - Button("๐Ÿ“ฅ Ingest", id="ingest_btn", variant="success"), - Button("๐Ÿ”ง Manage", id="manage_btn", variant="warning"), - Button("๐Ÿ—‘๏ธ Delete", id="delete_btn", variant="error"), - Button("๐Ÿ” Search", id="search_btn", variant="default"), - classes="button_bar", - ), - # Collection table with enhanced navigation - EnhancedDataTable(id="collections_table", classes="enhanced-table"), - # Status bar - Container( - Static("Ready", id="status_text", classes="status-text"), - StatusIndicator("Ready", id="connection_status"), - classes="status-bar", - ), - LoadingIndicator(id="loading", classes="pulse"), - classes="main_container", - ) - - # Analytics Tab - with TabPane("Analytics", id="analytics"): - yield Container( - Static("๐Ÿ“ˆ Analytics & Insights", classes="title"), - # Analytics content - Container( - Static("๐Ÿšง Analytics Dashboard", classes="section-title"), - Static("Advanced analytics and insights coming soon!", classes="subtitle"), - # Placeholder charts area - Container( - Static("๐Ÿ“Š Document Distribution", classes="chart-title"), - Static( - "Chart placeholder - integrate with visualization library", - classes="chart-placeholder", - ), - classes="card", - ), - Container( - Static("โฑ๏ธ Ingestion Timeline", classes="chart-title"), - Static("Timeline chart placeholder", classes="chart-placeholder"), - classes="card", - ), - classes="analytics-grid", - ), - classes="main_container", - ) - - yield Footer() - - async def on_mount(self) -> None: - """Initialize the screen with enhanced loading.""" - self.query_one("#loading").display = False - self.update_metrics() - self.refresh_collections() # Don't await, let it run as a worker - - def update_metrics(self) -> None: - """Update dashboard metrics with enhanced calculations.""" - self._calculate_metrics() - self._update_metrics_cards() - self._update_activity_feed() - - def _calculate_metrics(self) -> None: - """Calculate basic metrics from collections.""" - self.total_collections = len(self.collections) - self.total_documents = sum(col["count"] for col in self.collections) - self.active_backends = sum([bool(self.weaviate), bool(self.openwebui), bool(self.r2r)]) - - def _update_metrics_cards(self) -> None: - """Update the metrics cards display.""" - try: - dashboard_tab = self.query_one("#dashboard") - metrics_cards_query = dashboard_tab.query(MetricsCard) - if len(metrics_cards_query) >= 4: - metrics_cards = list(metrics_cards_query) - self._update_card_values(metrics_cards) - self._update_status_card(metrics_cards[3]) - except NoMatches: - return - except Exception as exc: - LOGGER.exception("Failed to update dashboard metrics", exc_info=exc) - - def _update_card_values(self, metrics_cards: list[MetricsCard]) -> None: - """Update individual metric card values.""" - metrics_cards[0].query_one(".metrics-value", Static).update(f"{self.total_collections:,}") - metrics_cards[1].query_one(".metrics-value", Static).update(f"{self.total_documents:,}") - metrics_cards[2].query_one(".metrics-value", Static).update(str(self.active_backends)) - - def _update_status_card(self, status_card: MetricsCard) -> None: - """Update the system status card.""" - if self.active_backends > 0 and self.total_collections > 0: - status_text, status_class = "๐ŸŸข Healthy", "status-active" - elif self.active_backends > 0: - status_text, status_class = "๐ŸŸก Ready", "status-warning" - else: - status_text, status_class = "๐Ÿ”ด Offline", "status-error" - - status_card.query_one(".metrics-value", Static).update(status_text) - status_card.add_class(status_class) - - def _update_activity_feed(self) -> None: - """Update the activity feed with collection data.""" - try: - dashboard_tab = self.query_one("#dashboard") - activity_feed = dashboard_tab.query_one("#activity_feed", Static) - activity_text = self._generate_activity_text() - activity_feed.update(activity_text) - except NoMatches: - return - except Exception as exc: - LOGGER.exception("Failed to update dashboard activity feed", exc_info=exc) - - def _generate_activity_text(self) -> str: - """Generate activity feed text from collections.""" - if not self.collections: - return "๐Ÿš€ No collections found. Start by creating your first ingestion!\n๐Ÿ’ก Press 'I' to begin or use the Quick Actions above." - - recent_activity = [self._format_collection_item(col) for col in self.collections[:3]] - activity_text = "\n".join(recent_activity) - - if len(self.collections) > 3: - total_docs = sum(c["count"] for c in self.collections) - activity_text += ( - f"\n๐Ÿ“Š Total: {len(self.collections)} collections with {total_docs:,} documents" - ) - - return activity_text - - def _format_collection_item(self, col: CollectionInfo) -> str: - """Format a single collection item for the activity feed.""" - content_type = self._get_content_type_icon(col["name"]) - size_mb = col["size_mb"] - backend_info = col["backend"] - - # Check if this represents a multi-backend ingestion result - if isinstance(backend_info, list): - if len(backend_info) > 1: - # Ensure all elements are strings for safe joining - backend_strings = [str(b) for b in backend_info if b is not None] - backend_list = " + ".join(backend_strings) if backend_strings else "unknown" - return f"{content_type} {col['name']}: {col['count']:,} docs ({size_mb:.1f} MB) โ†’ {backend_list}" - elif len(backend_info) == 1: - backend_name = str(backend_info[0]) if backend_info[0] is not None else "unknown" - return f"{content_type} {col['name']}: {col['count']:,} docs ({size_mb:.1f} MB) - {backend_name}" - else: - return f"{content_type} {col['name']}: {col['count']:,} docs ({size_mb:.1f} MB) - unknown" - else: - backend_display = str(backend_info) if backend_info is not None else "unknown" - return f"{content_type} {col['name']}: {col['count']:,} docs ({size_mb:.1f} MB) - {backend_display}" - - def _get_content_type_icon(self, name: str) -> str: - """Get appropriate icon for collection content type.""" - name_lower = name.lower() - if "web" in name_lower: - return "๐ŸŒ" - elif "doc" in name_lower: - return "๐Ÿ“–" - elif "repo" in name_lower: - return "๐Ÿ“ฆ" - return "๐Ÿ“„" - - @work(exclusive=True) - async def refresh_collections(self) -> None: - """Refresh collection data with enhanced multi-backend loading feedback.""" - self.is_loading = True - loading_indicator = self.query_one("#loading") - status_text = self.query_one("#status_text", Static) - - loading_indicator.display = True - status_text.update("๐Ÿ”„ Refreshing collections...") - - try: - # Use storage manager for unified backend handling - if not self.storage_manager.is_initialized: - status_text.update("๐Ÿ”— Initializing storage backends...") - backend_results = await self.storage_manager.initialize_all_backends() - - # Report per-backend initialization status - success_count = sum(backend_results.values()) - total_count = len(backend_results) - status_text.update(f"โœ… Initialized {success_count}/{total_count} backends") - - # Get collections from all backends via storage manager - status_text.update("๐Ÿ“š Loading collections from all backends...") - collections = await self.storage_manager.get_all_collections() - - # Update metrics calculation for multi-backend support - self.active_backends = len(self.storage_manager.get_available_backends()) - - self.collections = collections - await self.update_collections_table() - self.update_metrics() - - # Enhanced status reporting for multi-backend - backend_names = ", ".join( - backend.value for backend in self.storage_manager.get_available_backends() - ) - status_text.update(f"โœจ Ready - {len(collections)} collections from {backend_names}") - - # Update connection status with multi-backend awareness - connection_status = self.query_one("#connection_status", StatusIndicator) - if collections and self.active_backends > 0: - connection_status.update_status(f"โœ“ {self.active_backends} Active") - else: - connection_status.update_status("No Data") - - except Exception as e: - status_text.update(f"โŒ Error: {e}") - self.notify(f"Failed to refresh: {e}", severity="error", markup=False) - finally: - self.is_loading = False - loading_indicator.display = False - - async def list_weaviate_collections(self) -> list[CollectionInfo]: - """List Weaviate collections with enhanced metadata.""" - if not self.weaviate: - return [] - - try: - overview = await self.weaviate.describe_collections() - collections: list[CollectionInfo] = [] - - for item in overview: - count_raw = item.get("count", 0) - count_val = int(count_raw) if isinstance(count_raw, (int, str)) else 0 - size_mb_raw = item.get("size_mb", 0.0) - size_mb_val = float(size_mb_raw) if isinstance(size_mb_raw, (int, float, str)) else 0.0 - collections.append( - CollectionInfo( - name=str(item.get("name", "Unknown")), - type="weaviate", - count=count_val, - backend="๐Ÿ—„๏ธ Weaviate", - status="โœ“ Active", - last_updated=datetime.now().strftime("%Y-%m-%d %H:%M"), - size_mb=size_mb_val, - ) - ) - - return collections - except Exception as e: - self.notify(f"Error listing Weaviate collections: {e}", severity="error", markup=False) - return [] - - async def list_openwebui_collections(self) -> list[CollectionInfo]: - """List OpenWebUI collections with enhanced metadata.""" - # Try to get OpenWebUI backend from storage manager if direct instance not available - openwebui_backend = self.openwebui or self.storage_manager.get_backend(StorageBackend.OPEN_WEBUI) - if not openwebui_backend: - return [] - - try: - overview = await openwebui_backend.describe_collections() - collections: list[CollectionInfo] = [] - - for item in overview: - count_raw = item.get("count", 0) - count_val = int(count_raw) if isinstance(count_raw, (int, str)) else 0 - size_mb_raw = item.get("size_mb", 0.0) - size_mb_val = float(size_mb_raw) if isinstance(size_mb_raw, (int, float, str)) else 0.0 - collection_name = str(item.get("name", "Unknown")) - collections.append( - CollectionInfo( - name=collection_name, - type="openwebui", - count=count_val, - backend="๐ŸŒ OpenWebUI", - status="โœ“ Active", - last_updated=datetime.now().strftime("%Y-%m-%d %H:%M"), - size_mb=size_mb_val, - ) - ) - - return collections - except Exception as e: - self.notify(f"Error listing OpenWebUI collections: {e}", severity="error", markup=False) - return [] - - async def update_collections_table(self) -> None: - """Update the collections table with enhanced formatting.""" - table = self.query_one("#collections_table", EnhancedDataTable) - table.clear(columns=True) - - # Add enhanced columns with more metadata - table.add_columns("Collection", "Backend", "Documents", "Size", "Type", "Status", "Updated") - - # Add rows with enhanced formatting - for collection in self.collections: - # Format size - size_str = f"{collection['size_mb']:.1f} MB" - if collection["size_mb"] > 1000: - size_str = f"{collection['size_mb'] / 1000:.1f} GB" - - # Format document count - doc_count = f"{collection['count']:,}" - - # Determine content type based on collection name or other metadata - content_type = "๐Ÿ“„ Mixed" - if "web" in collection["name"].lower(): - content_type = "๐ŸŒ Web" - elif "doc" in collection["name"].lower(): - content_type = "๐Ÿ“– Docs" - elif "repo" in collection["name"].lower(): - content_type = "๐Ÿ“ฆ Code" - - table.add_row( - collection["name"], - collection["backend"], - doc_count, - size_str, - content_type, - collection["status"], - collection["last_updated"], - ) - - if self.collections: - table.move_cursor(row=0) - - self.get_selected_collection() - - def update_search_controls(self, collection: CollectionInfo | None) -> None: - """Enable or disable search controls based on backend support.""" - try: - search_button = self.query_one("#search_btn", Button) - quick_search_button = self.query_one("#quick_search", Button) - except Exception: - return - - is_weaviate = bool(collection and collection.get("type") == "weaviate") - search_button.disabled = not is_weaviate - quick_search_button.disabled = not is_weaviate - - def get_selected_collection(self) -> CollectionInfo | None: - """Get the currently selected collection.""" - table = self.query_one("#collections_table", EnhancedDataTable) - try: - row_index = table.cursor_coordinate.row - except (AttributeError, IndexError): - self.selected_collection = None - self.update_search_controls(None) - return None - - if 0 <= row_index < len(self.collections): - collection = self.collections[row_index] - self.selected_collection = collection - self.update_search_controls(collection) - return collection - - self.selected_collection = None - self.update_search_controls(None) - return None - - # Action methods - def action_refresh(self) -> None: - """Refresh collections.""" - self.refresh_collections() - - def action_ingest(self) -> None: - """Show enhanced ingestion dialog.""" - if selected := self.get_selected_collection(): - from .ingestion import IngestionScreen - - self.app.push_screen(IngestionScreen(selected, self.storage_manager)) - else: - self.notify("๐Ÿ” Please select a collection first", severity="warning") - - def action_manage(self) -> None: - """Manage documents in selected collection.""" - if selected := self.get_selected_collection(): - if storage_backend := self._get_storage_for_collection(selected): - from .documents import DocumentManagementScreen - - self.app.push_screen(DocumentManagementScreen(selected, storage_backend)) - else: - self.notify( - "๐Ÿšง No storage backend available for this collection", severity="warning" - ) - else: - self.notify("๐Ÿ” Please select a collection first", severity="warning") - - def _get_storage_for_collection(self, collection: CollectionInfo) -> BaseStorage | None: - """Get the appropriate storage backend for a collection.""" - collection_type = collection.get("type", "") - - # Map collection types to storage backends (try direct instances first) - if collection_type == "weaviate" and self.weaviate: - return self.weaviate - elif collection_type == "openwebui" and self.openwebui: - return self.openwebui - elif collection_type == "r2r" and self.r2r: - return self.r2r - - # Fall back to storage manager if direct instances not available - if collection_type == "weaviate": - return self.storage_manager.get_backend(StorageBackend.WEAVIATE) - elif collection_type == "openwebui": - return self.storage_manager.get_backend(StorageBackend.OPEN_WEBUI) - elif collection_type == "r2r": - return self.storage_manager.get_backend(StorageBackend.R2R) - - # Fall back to checking available backends by backend name - backend_name = collection.get("backend", "") - if isinstance(backend_name, str): - if "weaviate" in backend_name.lower(): - return self.weaviate or self.storage_manager.get_backend(StorageBackend.WEAVIATE) - elif "openwebui" in backend_name.lower(): - return self.openwebui or self.storage_manager.get_backend(StorageBackend.OPEN_WEBUI) - elif "r2r" in backend_name.lower(): - return self.r2r or self.storage_manager.get_backend(StorageBackend.R2R) - - return None - - def action_search(self) -> None: - """Search in selected collection.""" - if selected := self.get_selected_collection(): - if selected["type"] != "weaviate": - self.notify( - "๐Ÿ” Search is currently available only for Weaviate collections", - severity="warning", - ) - return - from .search import SearchScreen - - self.app.push_screen(SearchScreen(selected, self.weaviate, self.openwebui)) - else: - self.notify("๐Ÿ” Please select a collection first", severity="warning") - - def action_delete(self) -> None: - """Delete selected collection.""" - if selected := self.get_selected_collection(): - from .dialogs import ConfirmDeleteScreen - - self.app.push_screen(ConfirmDeleteScreen(selected, self)) - else: - self.notify("๐Ÿ” Please select a collection first", severity="warning") - - def action_tab_dashboard(self) -> None: - """Switch to dashboard tab.""" - tabs = self.query_one(TabbedContent) - tabs.active = "dashboard" - - def action_tab_collections(self) -> None: - """Switch to collections tab.""" - tabs = self.query_one(TabbedContent) - tabs.active = "collections" - - def action_tab_analytics(self) -> None: - """Switch to analytics tab.""" - tabs = self.query_one(TabbedContent) - tabs.active = "analytics" - - def action_next_tab(self) -> None: - """Switch to next tab.""" - tabs = self.query_one(TabbedContent) - tab_ids = ["dashboard", "collections", "analytics"] - current = tabs.active - try: - current_index = tab_ids.index(current) - next_index = (current_index + 1) % len(tab_ids) - tabs.active = tab_ids[next_index] - except (ValueError, AttributeError): - tabs.active = tab_ids[0] - - def action_prev_tab(self) -> None: - """Switch to previous tab.""" - tabs = self.query_one(TabbedContent) - tab_ids = ["dashboard", "collections", "analytics"] - current = tabs.active - try: - current_index = tab_ids.index(current) - prev_index = (current_index - 1) % len(tab_ids) - tabs.active = tab_ids[prev_index] - except (ValueError, AttributeError): - tabs.active = tab_ids[0] - - def action_help(self) -> None: - """Show help screen.""" - from .help import HelpScreen - - help_md = """ -# ๐Ÿš€ Modern Collection Management System - -## Navigation -- **Tab** / **Shift+Tab**: Switch between tabs -- **Ctrl+1/2/3**: Direct tab access -- **Enter**: Activate selected item -- **Escape**: Go back/cancel -- **Arrow Keys**: Navigate within tables -- **Home/End**: Jump to first/last row -- **Page Up/Down**: Scroll by page - -## Collections -- **R**: Refresh collections -- **I**: Start ingestion -- **M**: Manage documents -- **S**: Search collection -- **Ctrl+D**: Delete collection - -## Table Navigation -- **Up/Down** or **J/K**: Navigate rows -- **Space**: Toggle selection -- **Ctrl+A**: Select all -- **Ctrl+Shift+A**: Clear selection - -## General -- **Q** / **Ctrl+C**: Quit application -- **F1**: Show this help - -Enjoy the enhanced interface! ๐ŸŽ‰ - """ - self.app.push_screen(HelpScreen(help_md)) - - def on_button_pressed(self, event: Button.Pressed) -> None: - """Handle button presses with enhanced feedback.""" - button_id = event.button.id - - # Add visual feedback - event.button.add_class("pressed") - self.call_later(self.remove_pressed_class, event.button) - - if getattr(event.button, "disabled", False): - self.notify( - "๐Ÿ” Search is currently limited to Weaviate collections", - severity="warning", - ) - return - - if button_id in ["refresh_btn", "quick_refresh"]: - self.action_refresh() - elif button_id in ["ingest_btn", "quick_ingest"]: - self.action_ingest() - elif button_id == "manage_btn": - self.action_manage() - elif button_id == "delete_btn": - self.action_delete() - elif button_id in ["search_btn", "quick_search"]: - self.action_search() - elif button_id == "quick_settings": - self.notify("โš™๏ธ Settings panel coming soon!", severity="information") - - def remove_pressed_class(self, button: Button) -> None: - """Remove pressed visual feedback class.""" - button.remove_class("pressed") - - - -"""Dialog screens for confirmations and user interactions.""" - -from pathlib import Path -from typing import TYPE_CHECKING - -from textual.app import ComposeResult -from textual.binding import Binding -from textual.containers import Container, Horizontal -from textual.screen import ModalScreen, Screen -from textual.widgets import Button, Footer, Header, LoadingIndicator, RichLog, Static -from typing_extensions import override - -from ..models import CollectionInfo - -if TYPE_CHECKING: - from .dashboard import CollectionOverviewScreen - from .documents import DocumentManagementScreen - - -class ConfirmDeleteScreen(Screen[None]): - """Screen for confirming collection deletion.""" - - collection: CollectionInfo - parent_screen: "CollectionOverviewScreen" - - BINDINGS = [ - Binding("escape", "app.pop_screen", "Cancel"), - Binding("y", "confirm_delete", "Yes"), - Binding("n", "app.pop_screen", "No"), - Binding("enter", "confirm_delete", "Confirm"), - ] - - def __init__(self, collection: CollectionInfo, parent_screen: "CollectionOverviewScreen"): - super().__init__() - self.collection = collection - self.parent_screen = parent_screen - - @override - def compose(self) -> ComposeResult: - yield Header() - yield Container( - Static("โš ๏ธ Confirm Deletion", classes="title warning"), - Static(f"Are you sure you want to delete collection '{self.collection['name']}'?"), - Static(f"Backend: {self.collection['backend']}"), - Static(f"Documents: {self.collection['count']:,}"), - Static("This action cannot be undone!", classes="warning"), - Static("Press Y to confirm, N or Escape to cancel", classes="subtitle"), - Horizontal( - Button("โœ… Yes, Delete (Y)", id="yes_btn", variant="error"), - Button("โŒ Cancel (N)", id="no_btn", variant="default"), - classes="action_buttons", - ), - classes="main_container center", - ) - yield Footer() - - def on_mount(self) -> None: - """Initialize the screen with focus on cancel button for safety.""" - self.query_one("#no_btn").focus() - - def on_button_pressed(self, event: Button.Pressed) -> None: - """Handle button presses.""" - if event.button.id == "yes_btn": - self.action_confirm_delete() - elif event.button.id == "no_btn": - self.app.pop_screen() - - def action_confirm_delete(self) -> None: - """Confirm deletion.""" - self.run_worker(self.delete_collection()) - - async def delete_collection(self) -> None: - """Delete the collection.""" - try: - if self.collection["type"] == "weaviate" and self.parent_screen.weaviate: - # Delete Weaviate collection - if self.parent_screen.weaviate.client: - self.parent_screen.weaviate.client.collections.delete(self.collection["name"]) - self.notify( - f"Deleted Weaviate collection: {self.collection['name']}", - severity="information", - ) - else: - # Use the dashboard's method to get the appropriate storage backend - storage_backend = self.parent_screen._get_storage_for_collection(self.collection) - if not storage_backend: - self.notify( - f"โŒ No storage backend available for {self.collection['type']} collection: {self.collection['name']}", - severity="error", - ) - self.app.pop_screen() - return - - # Check if the storage backend supports collection deletion - if not hasattr(storage_backend, 'delete_collection'): - self.notify( - f"โŒ Collection deletion not supported for {self.collection['type']} backend", - severity="error", - ) - self.app.pop_screen() - return - - # Delete the collection using the appropriate backend - # Ensure we use the exact collection name, not any default from storage config - collection_name = str(self.collection["name"]) - collection_type = str(self.collection["type"]) - - self.notify(f"Deleting {collection_type} collection: {collection_name}...", severity="information") - - # Use the standard delete_collection method for all backends - if hasattr(storage_backend, 'delete_collection'): - success = await storage_backend.delete_collection(collection_name) - else: - self.notify("โŒ Backend does not support collection deletion", severity="error") - self.app.pop_screen() - return - if success: - self.notify( - f"โœ… Successfully deleted {self.collection['type']} collection: {self.collection['name']}", - severity="information", - timeout=3.0, - ) - else: - self.notify( - f"โŒ Failed to delete {self.collection['type']} collection: {self.collection['name']}", - severity="error", - ) - # Don't refresh if deletion failed - self.app.pop_screen() - return - - # Refresh parent screen after a short delay to ensure deletion is processed - self.call_later(lambda _: self.parent_screen.refresh_collections(), 0.5) # 500ms delay - self.app.pop_screen() - - except Exception as e: - self.notify(f"Failed to delete collection: {e}", severity="error", markup=False) - - - -class ConfirmDocumentDeleteScreen(Screen[None]): - """Screen for confirming document deletion.""" - - doc_ids: list[str] - collection: CollectionInfo - parent_screen: "DocumentManagementScreen" - - BINDINGS = [ - Binding("escape", "app.pop_screen", "Cancel"), - Binding("y", "confirm_delete", "Yes"), - Binding("n", "app.pop_screen", "No"), - Binding("enter", "confirm_delete", "Confirm"), - ] - - def __init__( - self, - doc_ids: list[str], - collection: CollectionInfo, - parent_screen: "DocumentManagementScreen", - ): - super().__init__() - self.doc_ids = doc_ids - self.collection = collection - self.parent_screen = parent_screen - - @override - def compose(self) -> ComposeResult: - yield Header() - yield Container( - Static("โš ๏ธ Confirm Document Deletion", classes="title warning"), - Static( - f"Are you sure you want to delete {len(self.doc_ids)} documents from '{self.collection['name']}'?" - ), - Static("This action cannot be undone!", classes="warning"), - Static("Press Y to confirm, N or Escape to cancel", classes="subtitle"), - Horizontal( - Button("โœ… Yes, Delete (Y)", id="yes_btn", variant="error"), - Button("โŒ Cancel (N)", id="no_btn", variant="default"), - classes="action_buttons", - ), - LoadingIndicator(id="loading"), - classes="main_container center", - ) - yield Footer() - - def on_mount(self) -> None: - """Initialize the screen with focus on cancel button for safety.""" - self.query_one("#loading").display = False - self.query_one("#no_btn").focus() - - def on_button_pressed(self, event: Button.Pressed) -> None: - """Handle button presses.""" - if event.button.id == "yes_btn": - self.action_confirm_delete() - elif event.button.id == "no_btn": - self.app.pop_screen() - - def action_confirm_delete(self) -> None: - """Confirm deletion.""" - self.run_worker(self.delete_documents()) - - async def delete_documents(self) -> None: - """Delete the selected documents.""" - loading = self.query_one("#loading") - loading.display = True - - try: - if hasattr(self.parent_screen, 'storage') and self.parent_screen.storage: - # Delete documents via storage - # The storage should have delete_documents method for weaviate - storage = self.parent_screen.storage - if hasattr(storage, 'delete_documents'): - results = await storage.delete_documents( - self.doc_ids, - collection_name=self.collection["name"], - ) - - # Count successful deletions - successful = sum(bool(success) for success in results.values()) - failed = len(results) - successful - - if successful > 0: - self.notify(f"Deleted {successful} documents", severity="information") - if failed > 0: - self.notify(f"Failed to delete {failed} documents", severity="error") - - # Clear selection and refresh parent screen - self.parent_screen.selected_docs.clear() - await self.parent_screen.load_documents() - self.app.pop_screen() - - except Exception as e: - self.notify(f"Failed to delete documents: {e}", severity="error", markup=False) - finally: - loading.display = False - - -class LogViewerScreen(ModalScreen[None]): - """Display live log output without disrupting the TUI.""" - - _log_widget: RichLog | None - _log_file: Path | None - - BINDINGS = [ - Binding("escape", "close", "Close"), - Binding("ctrl+l", "close", "Close"), - Binding("s", "show_path", "Log File"), - ] - - def __init__(self) -> None: - super().__init__() - self._log_widget = None - self._log_file = None - - @override - def compose(self) -> ComposeResult: - yield Header(show_clock=True) - yield Container( - Static("๐Ÿ“œ Live Application Logs", classes="title"), - Static("Logs update in real time. Press S to reveal the log file path.", classes="subtitle"), - RichLog(id="log_stream", classes="log-stream", wrap=True, highlight=False), - Static("", id="log_file_path", classes="subtitle"), - classes="main_container log-viewer-container", - ) - yield Footer() - - def on_mount(self) -> None: - """Attach this viewer to the parent application once mounted.""" - self._log_widget = self.query_one(RichLog) - - if hasattr(self.app, 'attach_log_viewer'): - self.app.attach_log_viewer(self) - - def on_unmount(self) -> None: - """Detach from the parent application when closed.""" - - if hasattr(self.app, 'detach_log_viewer'): - self.app.detach_log_viewer(self) - - def _get_log_widget(self) -> RichLog: - if self._log_widget is None: - self._log_widget = self.query_one(RichLog) - if self._log_widget is None: - raise RuntimeError("RichLog widget not found") - return self._log_widget - - def replace_logs(self, lines: list[str]) -> None: - """Replace rendered logs with the provided history.""" - log_widget = self._get_log_widget() - log_widget.clear() - for line in lines: - log_widget.write(line) - log_widget.scroll_end(animate=False) - - def append_logs(self, lines: list[str]) -> None: - """Append new log lines to the viewer.""" - log_widget = self._get_log_widget() - for line in lines: - log_widget.write(line) - log_widget.scroll_end(animate=False) - - def update_log_file(self, log_file: Path | None) -> None: - """Update the displayed log file path.""" - self._log_file = log_file - label = self.query_one("#log_file_path", Static) - if log_file is None: - label.update("Logs are not currently being persisted to disk.") - else: - label.update(f"Log file: {log_file}") - - def action_close(self) -> None: - """Close the log viewer.""" - self.app.pop_screen() - - def action_show_path(self) -> None: - """Reveal the log file location in a notification.""" - if self._log_file is None: - self.notify("File logging is disabled for this session.", severity="warning") - else: - self.notify(f"Log file available at: {self._log_file}", severity="information", markup=False) - - """Document management screen with enhanced navigation.""" @@ -6809,6 +3381,1201 @@ def dashboard() -> None: asyncio.run(run_textual_tui()) + +"""Firecrawl configuration widgets for advanced scraping options.""" + +from __future__ import annotations + +import json +from typing import cast + +from textual.app import ComposeResult +from textual.containers import Container, Horizontal +from textual.validation import Integer +from textual.widget import Widget +from textual.widgets import Button, Checkbox, Input, Label, Switch, TextArea +from typing_extensions import override + +from ..models import FirecrawlOptions + + +class ScrapeOptionsForm(Widget): + """Form for configuring Firecrawl scraping options.""" + + DEFAULT_CSS = """ + ScrapeOptionsForm { + border: solid $border; + background: $surface; + padding: 1; + height: auto; + } + + ScrapeOptionsForm .form-section { + margin-bottom: 2; + padding: 1; + border: solid $border-lighten-1; + background: $surface-lighten-1; + } + + ScrapeOptionsForm .form-row { + layout: horizontal; + align-items: center; + height: auto; + margin-bottom: 1; + } + + ScrapeOptionsForm .form-label { + width: 30%; + min-width: 15; + text-align: right; + padding-right: 2; + } + + ScrapeOptionsForm .form-input { + width: 70%; + } + + ScrapeOptionsForm .checkbox-row { + layout: horizontal; + align-items: center; + height: 3; + margin-bottom: 1; + } + + ScrapeOptionsForm .checkbox-label { + margin-left: 2; + } + """ + + def __init__( + self, + *, + name: str | None = None, + id: str | None = None, + classes: str | None = None, + disabled: bool = False, + markup: bool = True, + ) -> None: + """Initialize scrape options form.""" + super().__init__(name=name, id=id, classes=classes, disabled=disabled, markup=markup) + + @override + def compose(self) -> ComposeResult: + """Compose scrape options form.""" + yield Label("๐Ÿ”ง Scraping Configuration", classes="form-title") + + # Output formats section + yield Container( + Label("Output Formats", classes="section-title"), + Horizontal( + Checkbox("Markdown", id="format_markdown", value=True, classes="checkbox"), + Label("Markdown", classes="checkbox-label"), + classes="checkbox-row", + ), + Horizontal( + Checkbox("HTML", id="format_html", value=False, classes="checkbox"), + Label("HTML", classes="checkbox-label"), + classes="checkbox-row", + ), + Horizontal( + Checkbox("Screenshot", id="format_screenshot", value=False, classes="checkbox"), + Label("Screenshot", classes="checkbox-label"), + classes="checkbox-row", + ), + classes="form-section", + ) + + # Content filtering section + yield Container( + Label("Content Filtering", classes="section-title"), + Horizontal( + Label("Only Main Content:", classes="form-label"), + Switch(id="only_main_content", value=True, classes="form-input"), + classes="form-row", + ), + Horizontal( + Label("Include Tags:", classes="form-label"), + Input( + placeholder="p, div, article (comma-separated)", + id="include_tags", + classes="form-input", + ), + classes="form-row", + ), + Horizontal( + Label("Exclude Tags:", classes="form-label"), + Input( + placeholder="nav, footer, script (comma-separated)", + id="exclude_tags", + classes="form-input", + ), + classes="form-row", + ), + classes="form-section", + ) + + # Performance settings section + yield Container( + Label("Performance Settings", classes="section-title"), + Horizontal( + Label("Wait Time (ms):", classes="form-label"), + Input( + placeholder="0", + id="wait_for", + validators=[Integer(minimum=0, maximum=30000)], + classes="form-input", + ), + classes="form-row", + ), + classes="form-section", + ) + + def get_scrape_options(self) -> dict[str, object]: + """Get scraping options from form.""" + # Collect formats + formats = [] + if self.query_one("#format_markdown", Checkbox).value: + formats.append("markdown") + if self.query_one("#format_html", Checkbox).value: + formats.append("html") + if self.query_one("#format_screenshot", Checkbox).value: + formats.append("screenshot") + options: dict[str, object] = { + "formats": formats, + "only_main_content": self.query_one( + "#only_main_content", Switch + ).value, + } + include_tags_input = self.query_one("#include_tags", Input).value + if include_tags_input.strip(): + options["include_tags"] = [tag.strip() for tag in include_tags_input.split(",")] + + exclude_tags_input = self.query_one("#exclude_tags", Input).value + if exclude_tags_input.strip(): + options["exclude_tags"] = [tag.strip() for tag in exclude_tags_input.split(",")] + + # Performance + wait_for_input = self.query_one("#wait_for", Input).value + if wait_for_input.strip(): + try: + options["wait_for"] = int(wait_for_input) + except ValueError: + pass + + return options + + def set_scrape_options(self, options: dict[str, object]) -> None: + """Set form values from options.""" + # Set formats + formats = options.get("formats", ["markdown"]) + formats_list = formats if isinstance(formats, list) else [] + self.query_one("#format_markdown", Checkbox).value = "markdown" in formats_list + self.query_one("#format_html", Checkbox).value = "html" in formats_list + self.query_one("#format_screenshot", Checkbox).value = "screenshot" in formats_list + + # Set content filtering + main_content_val = options.get("only_main_content", True) + self.query_one("#only_main_content", Switch).value = bool(main_content_val) + + if include_tags := options.get("include_tags", []): + include_list = include_tags if isinstance(include_tags, list) else [] + self.query_one("#include_tags", Input).value = ", ".join(str(tag) for tag in include_list) + + if exclude_tags := options.get("exclude_tags", []): + exclude_list = exclude_tags if isinstance(exclude_tags, list) else [] + self.query_one("#exclude_tags", Input).value = ", ".join(str(tag) for tag in exclude_list) + + # Set performance + wait_for = options.get("wait_for") + if wait_for is not None: + self.query_one("#wait_for", Input).value = str(wait_for) + + +class MapOptionsForm(Widget): + """Form for configuring site mapping options.""" + + DEFAULT_CSS = """ + MapOptionsForm { + border: solid $border; + background: $surface; + padding: 1; + height: auto; + } + + MapOptionsForm .form-section { + margin-bottom: 2; + padding: 1; + border: solid $border-lighten-1; + background: $surface-lighten-1; + } + + MapOptionsForm .form-row { + layout: horizontal; + align-items: center; + height: auto; + margin-bottom: 1; + } + + MapOptionsForm .form-label { + width: 30%; + min-width: 15; + text-align: right; + padding-right: 2; + } + + MapOptionsForm .form-input { + width: 70%; + } + """ + + def __init__( + self, + *, + name: str | None = None, + id: str | None = None, + classes: str | None = None, + disabled: bool = False, + markup: bool = True, + ) -> None: + """Initialize map options form.""" + super().__init__(name=name, id=id, classes=classes, disabled=disabled, markup=markup) + + @override + def compose(self) -> ComposeResult: + """Compose map options form.""" + yield Label("๐Ÿ—บ๏ธ Site Mapping Configuration", classes="form-title") + + # Discovery settings section + yield Container( + Label("Discovery Settings", classes="section-title"), + Horizontal( + Label("Search Pattern:", classes="form-label"), + Input( + placeholder="docs, api, guide (optional)", + id="search_pattern", + classes="form-input", + ), + classes="form-row", + ), + Horizontal( + Label("Include Subdomains:", classes="form-label"), + Switch(id="include_subdomains", value=False, classes="form-input"), + classes="form-row", + ), + classes="form-section", + ) + + # Limits section + yield Container( + Label("Crawling Limits", classes="section-title"), + Horizontal( + Label("Max Pages:", classes="form-label"), + Input( + placeholder="100", + id="max_pages", + validators=[Integer(minimum=1, maximum=1000)], + classes="form-input", + ), + classes="form-row", + ), + Horizontal( + Label("Max Depth:", classes="form-label"), + Input( + placeholder="5", + id="max_depth", + validators=[Integer(minimum=1, maximum=20)], + classes="form-input", + ), + classes="form-row", + ), + classes="form-section", + ) + + def get_map_options(self) -> dict[str, object]: + """Get mapping options from form.""" + options: dict[str, object] = {} + + # Discovery settings + search_pattern = self.query_one("#search_pattern", Input).value + if search_pattern.strip(): + options["search"] = search_pattern.strip() + + options["include_subdomains"] = self.query_one("#include_subdomains", Switch).value + + # Limits + max_pages_input = self.query_one("#max_pages", Input).value + if max_pages_input.strip(): + try: + options["limit"] = int(max_pages_input) + except ValueError: + pass + + max_depth_input = self.query_one("#max_depth", Input).value + if max_depth_input.strip(): + try: + options["max_depth"] = int(max_depth_input) + except ValueError: + pass + + return options + + def set_map_options(self, options: dict[str, object]) -> None: + """Set form values from options.""" + if search := options.get("search"): + self.query_one("#search_pattern", Input).value = str(search) + + subdomains_val = options.get("include_subdomains", False) + self.query_one("#include_subdomains", Switch).value = bool(subdomains_val) + + # Set limits + limit = options.get("limit") + if limit is not None: + self.query_one("#max_pages", Input).value = str(limit) + + max_depth = options.get("max_depth") + if max_depth is not None: + self.query_one("#max_depth", Input).value = str(max_depth) + + +class ExtractOptionsForm(Widget): + """Form for configuring data extraction options.""" + + DEFAULT_CSS = """ + ExtractOptionsForm { + border: solid $border; + background: $surface; + padding: 1; + height: auto; + } + + ExtractOptionsForm .form-section { + margin-bottom: 2; + padding: 1; + border: solid $border-lighten-1; + background: $surface-lighten-1; + } + + ExtractOptionsForm .form-row { + layout: horizontal; + align-items: start; + height: auto; + margin-bottom: 1; + } + + ExtractOptionsForm .form-label { + width: 30%; + min-width: 15; + text-align: right; + padding-right: 2; + padding-top: 1; + } + + ExtractOptionsForm .form-input { + width: 70%; + } + + ExtractOptionsForm .text-area { + height: 6; + } + """ + + def __init__( + self, + *, + name: str | None = None, + id: str | None = None, + classes: str | None = None, + disabled: bool = False, + markup: bool = True, + ) -> None: + """Initialize extract options form.""" + super().__init__(name=name, id=id, classes=classes, disabled=disabled, markup=markup) + + @override + def compose(self) -> ComposeResult: + """Compose extract options form.""" + yield Label("๐ŸŽฏ Data Extraction Configuration", classes="form-title") + + # Extraction prompt section + yield Container( + Label("AI-Powered Extraction", classes="section-title"), + Horizontal( + Label("Custom Prompt:", classes="form-label"), + TextArea( + placeholder="Extract product names, prices, and descriptions...", + id="extract_prompt", + classes="form-input text-area", + ), + classes="form-row", + ), + classes="form-section", + ) + + # Schema definition section + yield Container( + Label("Structured Schema (JSON)", classes="section-title"), + Horizontal( + Label("Schema Definition:", classes="form-label"), + TextArea( + placeholder='{"product_name": "string", "price": "number", "description": "string"}', + id="extract_schema", + classes="form-input text-area", + ), + classes="form-row", + ), + Container( + Label("๐Ÿ’ก Tip: Define the structure of data you want to extract"), + classes="help-text", + ), + classes="form-section", + ) + + # Schema presets + yield Container( + Label("Quick Presets", classes="section-title"), + Horizontal( + Button("๐Ÿ“„ Article", id="preset_article", variant="default"), + Button("๐Ÿ›๏ธ Product", id="preset_product", variant="default"), + Button("๐Ÿ‘ค Contact", id="preset_contact", variant="default"), + Button("๐Ÿ“Š Data", id="preset_data", variant="default"), + classes="preset-buttons", + ), + classes="form-section", + ) + + def get_extract_options(self) -> dict[str, object]: + """Get extraction options from form.""" + options: dict[str, object] = {} + + # Extract prompt + prompt = self.query_one("#extract_prompt", TextArea).text + if prompt.strip(): + options["extract_prompt"] = prompt.strip() + + # Extract schema + schema_text = self.query_one("#extract_schema", TextArea).text + if schema_text.strip(): + try: + schema = json.loads(schema_text) + options["extract_schema"] = schema + except json.JSONDecodeError: + # Invalid JSON, skip schema + pass + + return options + + def set_extract_options(self, options: dict[str, object]) -> None: + """Set form values from options.""" + if prompt := options.get("extract_prompt"): + self.query_one("#extract_prompt", TextArea).text = str(prompt) + + if schema := options.get("extract_schema"): + import json + + self.query_one("#extract_schema", TextArea).text = json.dumps(schema, indent=2) + + def on_button_pressed(self, event: Button.Pressed) -> None: + """Handle preset button presses.""" + schema_widget = self.query_one("#extract_schema", TextArea) + prompt_widget = self.query_one("#extract_prompt", TextArea) + + if event.button.id == "preset_article": + schema_widget.text = """{ + "title": "string", + "author": "string", + "date": "string", + "content": "string", + "tags": ["string"] +}""" + prompt_widget.text = "Extract article title, author, publication date, main content, and associated tags" + + elif event.button.id == "preset_product": + schema_widget.text = """{ + "name": "string", + "price": "number", + "description": "string", + "category": "string", + "availability": "string" +}""" + prompt_widget.text = "Extract product name, price, description, category, and availability status" + + elif event.button.id == "preset_contact": + schema_widget.text = """{ + "name": "string", + "email": "string", + "phone": "string", + "company": "string", + "position": "string" +}""" + prompt_widget.text = "Extract contact information including name, email, phone, company, and position" + + elif event.button.id == "preset_data": + schema_widget.text = """{ + "metrics": [{"name": "string", "value": "number", "unit": "string"}], + "tables": [{"headers": ["string"], "rows": [["string"]]}] +}""" + prompt_widget.text = "Extract numerical data, metrics, and tabular information" + + +class FirecrawlConfigWidget(Widget): + """Complete Firecrawl configuration widget with tabbed interface.""" + + DEFAULT_CSS = """ + FirecrawlConfigWidget { + border: solid $border; + background: $surface; + height: 100%; + padding: 1; + } + + FirecrawlConfigWidget .config-header { + dock: top; + height: 3; + background: $primary; + color: $text; + padding: 1; + margin: -1 -1 1 -1; + } + + FirecrawlConfigWidget .tab-buttons { + dock: top; + height: 3; + layout: horizontal; + margin-bottom: 1; + } + + FirecrawlConfigWidget .tab-button { + width: 1fr; + margin-right: 1; + } + + FirecrawlConfigWidget .tab-content { + height: 1fr; + overflow: auto; + } + + FirecrawlConfigWidget .actions { + dock: bottom; + height: 3; + layout: horizontal; + align: center; + margin-top: 1; + } + """ + + def __init__( + self, + *, + name: str | None = None, + id: str | None = None, + classes: str | None = None, + disabled: bool = False, + markup: bool = True, + ) -> None: + """Initialize Firecrawl config widget.""" + super().__init__(name=name, id=id, classes=classes, disabled=disabled, markup=markup) + self.current_tab = "scrape" + + @override + def compose(self) -> ComposeResult: + """Compose config widget layout.""" + yield Container( + Label("๐Ÿ”ฅ Firecrawl Configuration", classes="config-header"), + Horizontal( + Button("๐Ÿ”ง Scraping", id="tab_scrape", variant="primary", classes="tab-button"), + Button("๐Ÿ—บ๏ธ Mapping", id="tab_map", variant="default", classes="tab-button"), + Button("๐ŸŽฏ Extraction", id="tab_extract", variant="default", classes="tab-button"), + classes="tab-buttons", + ), + Container( + ScrapeOptionsForm(id="scrape_form"), + classes="tab-content", + ), + Horizontal( + Button("๐Ÿ“‹ Load Preset", id="load_preset", variant="default"), + Button("๐Ÿ’พ Save Preset", id="save_preset", variant="default"), + Button("๐Ÿ”„ Reset", id="reset_config", variant="default"), + classes="actions", + ), + ) + + def on_mount(self) -> None: + """Initialize widget.""" + self.show_tab("scrape") + + def show_tab(self, tab_name: str) -> None: + """Show specific configuration tab.""" + self.current_tab = tab_name + + # Update button states + for tab in ["scrape", "map", "extract"]: + button = self.query_one(f"#tab_{tab}", Button) + button.variant = "primary" if tab == tab_name else "default" + # Update tab content + content_container = self.query_one(".tab-content", Container) + content_container.remove_children() + + if tab_name == "extract": + content_container.mount(ExtractOptionsForm(id="extract_form")) + elif tab_name == "map": + content_container.mount(MapOptionsForm(id="map_form")) + elif tab_name == "scrape": + content_container.mount(ScrapeOptionsForm(id="scrape_form")) + + def on_button_pressed(self, event: Button.Pressed) -> None: + """Handle button presses.""" + if event.button.id and event.button.id.startswith("tab_"): + tab_name = event.button.id[4:] # Remove "tab_" prefix + self.show_tab(tab_name) + + def get_all_options(self) -> FirecrawlOptions: + """Get all configuration options.""" + options: FirecrawlOptions = {} + + # Try to get options from currently mounted form + if self.current_tab == "scrape": + try: + form = self.query_one("#scrape_form", ScrapeOptionsForm) + scrape_opts = form.get_scrape_options() + options.update(cast(FirecrawlOptions, scrape_opts)) + except Exception: + pass + elif self.current_tab == "map": + try: + map_form = self.query_one("#map_form", MapOptionsForm) + map_opts = map_form.get_map_options() + options.update(cast(FirecrawlOptions, map_opts)) + except Exception: + pass + elif self.current_tab == "extract": + try: + extract_form = self.query_one("#extract_form", ExtractOptionsForm) + extract_opts = extract_form.get_extract_options() + options.update(cast(FirecrawlOptions, extract_opts)) + except Exception: + pass + + return options + + + +"""R2R-specific widgets for chunk viewing and entity visualization.""" + +from __future__ import annotations + +from typing import Any + +from textual import work +from textual.app import ComposeResult +from textual.containers import Container, Horizontal, Vertical, VerticalScroll +from textual.widget import Widget +from textual.widgets import Button, DataTable, Label, Markdown, ProgressBar, Static, Tree +from typing_extensions import override + +from ....storage.r2r.storage import R2RStorage +from ..models import ChunkInfo, EntityInfo + + +class ChunkViewer(Widget): + """Widget for viewing document chunks with navigation.""" + + DEFAULT_CSS = """ + ChunkViewer { + border: solid $border; + background: $surface; + height: 100%; + } + + ChunkViewer .chunk-header { + dock: top; + height: 3; + background: $primary; + color: $text; + padding: 1; + } + + ChunkViewer .chunk-navigation { + dock: top; + height: 3; + background: $surface-lighten-1; + padding: 1; + } + + ChunkViewer .chunk-content { + height: 1fr; + padding: 1; + overflow: auto; + } + + ChunkViewer .chunk-footer { + dock: bottom; + height: 3; + background: $surface-darken-1; + padding: 1; + } + """ + + def __init__(self, r2r_storage: R2RStorage, document_id: str, **kwargs: Any) -> None: + """Initialize chunk viewer.""" + super().__init__(**kwargs) + self.r2r_storage: R2RStorage = r2r_storage + self.document_id: str = document_id + self.chunks: list[ChunkInfo] = [] + self.current_chunk_index: int = 0 + + @override + def compose(self) -> ComposeResult: + """Compose chunk viewer layout.""" + yield Container( + Static("๐Ÿ“„ Document Chunks", classes="chunk-header"), + Horizontal( + Button("โ—€ Previous", id="prev_chunk", variant="default"), + Static("Chunk 1 of 1", id="chunk_info"), + Button("Next โ–ถ", id="next_chunk", variant="default"), + classes="chunk-navigation", + ), + VerticalScroll( + Markdown("", id="chunk_content"), + classes="chunk-content", + ), + Container( + Static("Loading chunks...", id="chunk_status"), + classes="chunk-footer", + ), + ) + + def on_mount(self) -> None: + """Initialize chunk viewer.""" + self.load_chunks() + + @work(exclusive=True) + async def load_chunks(self) -> None: + """Load document chunks.""" + try: + chunks_data = await self.r2r_storage.get_document_chunks(self.document_id) + self.chunks = [] + + for chunk_data in chunks_data: + chunk_info: ChunkInfo = { + "id": str(chunk_data.get("id", "")), + "document_id": self.document_id, + "content": str(chunk_data.get("text", "")), + "start_index": (lambda si: int(si) if isinstance(si, (int, str)) else 0)(chunk_data.get("start_index", 0)), + "end_index": (lambda ei: int(ei) if isinstance(ei, (int, str)) else 0)(chunk_data.get("end_index", 0)), + "metadata": ( + dict(metadata_val) if (metadata_val := chunk_data.get("metadata")) and isinstance(metadata_val, dict) else {} + ), + } + self.chunks.append(chunk_info) + + if self.chunks: + self.current_chunk_index = 0 + self.update_chunk_display() + else: + self.query_one("#chunk_status", Static).update("No chunks found") + + except Exception as e: + self.query_one("#chunk_status", Static).update(f"Error loading chunks: {e}") + + def update_chunk_display(self) -> None: + """Update chunk display with current chunk.""" + if not self.chunks: + return + + chunk = self.chunks[self.current_chunk_index] + + # Update content + content_widget = self.query_one("#chunk_content", Markdown) + content_widget.update(chunk["content"]) + + # Update navigation info + chunk_info = self.query_one("#chunk_info", Static) + chunk_info.update(f"Chunk {self.current_chunk_index + 1} of {len(self.chunks)}") + + # Update status + status_widget = self.query_one("#chunk_status", Static) + status_widget.update( + f"Chunk {chunk['id']} | " + f"Range: {chunk['start_index']}-{chunk['end_index']} | " + f"Length: {len(chunk['content'])} chars" + ) + + # Update button states + prev_btn = self.query_one("#prev_chunk", Button) + next_btn = self.query_one("#next_chunk", Button) + prev_btn.disabled = self.current_chunk_index == 0 + next_btn.disabled = self.current_chunk_index >= len(self.chunks) - 1 + + def on_button_pressed(self, event: Button.Pressed) -> None: + """Handle button presses.""" + if event.button.id == "prev_chunk" and self.current_chunk_index > 0: + self.current_chunk_index -= 1 + self.update_chunk_display() + elif event.button.id == "next_chunk" and self.current_chunk_index < len(self.chunks) - 1: + self.current_chunk_index += 1 + self.update_chunk_display() + + +class EntityGraph(Widget): + """Widget for visualizing extracted entities and relationships.""" + + DEFAULT_CSS = """ + EntityGraph { + border: solid $border; + background: $surface; + height: 100%; + } + + EntityGraph .entity-header { + dock: top; + height: 3; + background: $primary; + color: $text; + padding: 1; + } + + EntityGraph .entity-tree { + height: 1fr; + overflow: auto; + } + + EntityGraph .entity-details { + dock: bottom; + height: 8; + background: $surface-lighten-1; + padding: 1; + border-top: solid $border; + } + """ + + def __init__(self, r2r_storage: R2RStorage, document_id: str, **kwargs: Any) -> None: + """Initialize entity graph.""" + super().__init__(**kwargs) + self.r2r_storage: R2RStorage = r2r_storage + self.document_id: str = document_id + self.entities: list[EntityInfo] = [] + + @override + def compose(self) -> ComposeResult: + """Compose entity graph layout.""" + yield Container( + Static("๐Ÿ•ธ๏ธ Entity Graph", classes="entity-header"), + Tree("Entities", id="entity_tree", classes="entity-tree"), + VerticalScroll( + Label("Entity Details"), + Static("Select an entity to view details", id="entity_details"), + classes="entity-details", + ), + ) + + def on_mount(self) -> None: + """Initialize entity graph.""" + self.load_entities() + + @work(exclusive=True) + async def load_entities(self) -> None: + """Load entities from document.""" + try: + entities_data = await self.r2r_storage.extract_entities(self.document_id) + self.entities = [] + + # Parse entities from R2R response + entities_list = entities_data.get("entities", []) + if not isinstance(entities_list, list): + entities_list = [] + for entity_data in entities_list: + entity_info: EntityInfo = { + "id": str(entity_data.get("id", "")), + "name": str(entity_data.get("name", "")), + "type": str(entity_data.get("type", "unknown")), + "confidence": float(entity_data.get("confidence", 0.0)), + "metadata": dict(entity_data.get("metadata", {})), + } + self.entities.append(entity_info) + + self.populate_entity_tree() + + except Exception as e: + details_widget = self.query_one("#entity_details", Static) + details_widget.update(f"Error loading entities: {e}") + + def populate_entity_tree(self) -> None: + """Populate the entity tree.""" + tree = self.query_one("#entity_tree", Tree) + tree.clear() + + if not self.entities: + tree.root.add_leaf("No entities found") + return + + # Group entities by type + entities_by_type: dict[str, list[EntityInfo]] = {} + for entity in self.entities: + entity_type = entity["type"] + if entity_type not in entities_by_type: + entities_by_type[entity_type] = [] + entities_by_type[entity_type].append(entity) + + # Add entities to tree grouped by type + for entity_type, type_entities in entities_by_type.items(): + type_node = tree.root.add(f"{entity_type.title()} ({len(type_entities)})") + for entity in type_entities: + confidence_pct = int(entity["confidence"] * 100) + entity_node = type_node.add_leaf(f"{entity['name']} ({confidence_pct}%)") + entity_node.data = entity + + tree.root.expand() + + def on_tree_node_selected(self, event: Tree.NodeSelected[EntityInfo]) -> None: + """Handle entity selection.""" + if hasattr(event.node, "data") and event.node.data: + entity = event.node.data + self.show_entity_details(entity) + + def show_entity_details(self, entity: EntityInfo) -> None: + """Show detailed information about an entity.""" + details_widget = self.query_one("#entity_details", Static) + + details_text = f"""**Entity:** {entity['name']} +**Type:** {entity['type']} +**Confidence:** {entity['confidence']:.2%} +**ID:** {entity['id']} + +**Metadata:** +""" + for key, value in entity["metadata"].items(): + details_text += f"- **{key}:** {value}\n" + + details_widget.update(details_text) + + +class CollectionStats(Widget): + """Widget for showing R2R-specific collection statistics.""" + + DEFAULT_CSS = """ + CollectionStats { + border: solid $border; + background: $surface; + height: 100%; + padding: 1; + } + + CollectionStats .stats-header { + dock: top; + height: 3; + background: $primary; + color: $text; + padding: 1; + margin: -1 -1 1 -1; + } + + CollectionStats .stats-grid { + layout: grid; + grid-size: 2; + grid-columns: 1fr 1fr; + grid-gutter: 1; + height: auto; + } + + CollectionStats .stat-card { + background: $surface-lighten-1; + border: solid $border; + padding: 1; + height: auto; + } + + CollectionStats .stat-value { + color: $primary; + text-style: bold; + text-align: center; + } + + CollectionStats .stat-label { + color: $text-muted; + text-align: center; + margin-top: 1; + } + + CollectionStats .progress-section { + margin-top: 2; + } + """ + + def __init__(self, r2r_storage: R2RStorage, collection_name: str, **kwargs: Any) -> None: + """Initialize collection stats.""" + super().__init__(**kwargs) + self.r2r_storage: R2RStorage = r2r_storage + self.collection_name: str = collection_name + + @override + def compose(self) -> ComposeResult: + """Compose stats layout.""" + yield Container( + Static(f"๐Ÿ“Š {self.collection_name} Statistics", classes="stats-header"), + Container( + Container( + Static("0", id="document_count", classes="stat-value"), + Static("Documents", classes="stat-label"), + classes="stat-card", + ), + Container( + Static("0", id="chunk_count", classes="stat-value"), + Static("Chunks", classes="stat-label"), + classes="stat-card", + ), + Container( + Static("0", id="entity_count", classes="stat-value"), + Static("Entities", classes="stat-label"), + classes="stat-card", + ), + Container( + Static("0 MB", id="storage_size", classes="stat-value"), + Static("Storage Used", classes="stat-label"), + classes="stat-card", + ), + classes="stats-grid", + ), + Container( + Label("Processing Progress"), + ProgressBar(id="processing_progress", total=100, show_eta=False), + Static("Idle", id="processing_status"), + classes="progress-section", + ), + ) + + def on_mount(self) -> None: + """Initialize stats display.""" + self.refresh_stats() + + @work(exclusive=True) + async def refresh_stats(self) -> None: + """Refresh collection statistics.""" + try: + # Get basic document count + doc_count = await self.r2r_storage.count(collection_name=self.collection_name) + self.query_one("#document_count", Static).update(str(doc_count)) + + # Estimate other stats (these would need real implementation) + estimated_chunks = doc_count * 5 # Rough estimate + estimated_entities = doc_count * 10 # Rough estimate + estimated_size_mb = doc_count * 0.05 # Rough estimate + + self.query_one("#chunk_count", Static).update(str(estimated_chunks)) + self.query_one("#entity_count", Static).update(str(estimated_entities)) + self.query_one("#storage_size", Static).update(f"{estimated_size_mb:.1f} MB") + + # Update progress (would be real-time in actual implementation) + progress_bar = self.query_one("#processing_progress", ProgressBar) + progress_bar.progress = 100 # Assume complete for now + + status_widget = self.query_one("#processing_status", Static) + status_widget.update("All documents processed") + + except Exception as e: + self.query_one("#processing_status", Static).update(f"Error: {e}") + + +class DocumentOverview(Widget): + """Widget for comprehensive document overview and statistics.""" + + DEFAULT_CSS = """ + DocumentOverview { + layout: vertical; + height: 100%; + } + + DocumentOverview .overview-header { + dock: top; + height: 3; + background: $primary; + color: $text; + padding: 1; + } + + DocumentOverview .overview-content { + height: 1fr; + layout: horizontal; + } + + DocumentOverview .overview-left { + width: 50%; + padding: 1; + } + + DocumentOverview .overview-right { + width: 50%; + padding: 1; + } + + DocumentOverview .info-table { + height: auto; + margin-bottom: 2; + } + """ + + def __init__(self, r2r_storage: R2RStorage, document_id: str, **kwargs: Any) -> None: + """Initialize document overview.""" + super().__init__(**kwargs) + self.r2r_storage: R2RStorage = r2r_storage + self.document_id: str = document_id + + @override + def compose(self) -> ComposeResult: + """Compose overview layout.""" + yield Container( + Static("๐Ÿ“‹ Document Overview", classes="overview-header"), + Horizontal( + Vertical( + Label("Document Information"), + DataTable[str](id="doc_info_table", classes="info-table"), + Label("Processing Statistics"), + DataTable[str](id="stats_table", classes="info-table"), + classes="overview-left", + ), + Vertical( + ChunkViewer(self.r2r_storage, self.document_id), + classes="overview-right", + ), + classes="overview-content", + ), + ) + + def on_mount(self) -> None: + """Initialize overview.""" + self.load_overview() + + @work(exclusive=True) + async def load_overview(self) -> None: + """Load comprehensive document overview.""" + try: + overview_data = await self.r2r_storage.get_document_overview(self.document_id) + + # Populate document info table + doc_table = self.query_one("#doc_info_table", DataTable) + doc_table.add_columns("Property", "Value") + + document_info_raw = overview_data.get("document", {}) + document_info = document_info_raw if isinstance(document_info_raw, dict) else {} + doc_table.add_row("ID", str(document_info.get("id", "N/A"))) + doc_table.add_row("Title", str(document_info.get("title", "N/A"))) + doc_table.add_row("Created", str(document_info.get("created_at", "N/A"))) + doc_table.add_row("Modified", str(document_info.get("updated_at", "N/A"))) + + # Populate stats table + stats_table = self.query_one("#stats_table", DataTable) + stats_table.add_columns("Metric", "Count") + + chunk_count = overview_data.get("chunk_count", 0) + stats_table.add_row("Chunks", str(chunk_count)) + stats_table.add_row("Characters", str(len(str(document_info.get("content", ""))))) + + except Exception as e: + # Handle error by showing minimal info + doc_table = self.query_one("#doc_info_table", DataTable) + doc_table.add_columns("Property", "Value") + doc_table.add_row("Error", str(e)) + + """Main TUI application with enhanced keyboard navigation.""" @@ -7126,6 +4893,422 @@ class CollectionManagementApp(App[None]): # No else clause needed - just handle our events + +"""Responsive layout system for TUI applications.""" + +from __future__ import annotations + +from typing import cast + +from textual.app import ComposeResult +from textual.containers import Container, VerticalScroll +from textual.widget import Widget +from textual.widgets import Static +from typing_extensions import override + + +class ResponsiveGrid(Container): + """Grid that auto-adjusts based on terminal size.""" + + DEFAULT_CSS: str = """ + ResponsiveGrid { + layout: grid; + grid-size: 1; + grid-columns: 1fr; + grid-rows: auto; + grid-gutter: 1; + padding: 1; + } + + ResponsiveGrid.two-column { + grid-size: 2; + grid-columns: 1fr 1fr; + } + + ResponsiveGrid.three-column { + grid-size: 3; + grid-columns: 1fr 1fr 1fr; + } + + ResponsiveGrid.auto-fit { + grid-columns: repeat(auto-fit, minmax(20, 1fr)); + } + + ResponsiveGrid.compact { + grid-gutter: 0; + padding: 0; + } + """ + + def __init__( + self, + *children: Widget, + columns: int = 1, + auto_fit: bool = False, + compact: bool = False, + name: str | None = None, + id: str | None = None, + classes: str | None = None, + disabled: bool = False, + markup: bool = True, + ) -> None: + """Initialize responsive grid.""" + super().__init__(*children, name=name, id=id, classes=classes, disabled=disabled, markup=markup) + self._columns: int = columns + self._auto_fit: bool = auto_fit + self._compact: bool = compact + + def on_mount(self) -> None: + """Apply responsive classes based on configuration.""" + widget = cast(Widget, self) + if self._auto_fit: + widget.add_class("auto-fit") + elif self._columns == 2: + widget.add_class("two-column") + elif self._columns == 3: + widget.add_class("three-column") + + if self._compact: + widget.add_class("compact") + + def on_resize(self) -> None: + """Adjust layout based on terminal size.""" + if self._auto_fit: + # Let CSS handle auto-fit + return + + widget = cast(Widget, self) + terminal_width = widget.size.width + if terminal_width < 60: + # Force single column on narrow terminals + widget.remove_class("two-column", "three-column") + widget.styles.grid_size_columns = 1 + widget.styles.grid_columns = "1fr" + elif terminal_width < 100 and self._columns > 2: + # Force two columns on medium terminals + widget.remove_class("three-column") + widget.add_class("two-column") + widget.styles.grid_size_columns = 2 + widget.styles.grid_columns = "1fr 1fr" + elif self._columns == 2: + widget.add_class("two-column") + elif self._columns == 3: + widget.add_class("three-column") + + +class CollapsibleSidebar(Container): + """Sidebar that can be collapsed to save space.""" + + DEFAULT_CSS: str = """ + CollapsibleSidebar { + dock: left; + width: 25%; + min-width: 20; + max-width: 40; + background: $surface; + border-right: solid $border; + padding: 1; + transition: width 300ms; + } + + CollapsibleSidebar.collapsed { + width: 3; + min-width: 3; + overflow: hidden; + } + + CollapsibleSidebar.collapsed > * { + display: none; + } + + CollapsibleSidebar .sidebar-toggle { + dock: top; + height: 1; + background: $primary; + color: $text; + text-align: center; + margin-bottom: 1; + } + + CollapsibleSidebar .sidebar-content { + height: 1fr; + overflow-y: auto; + } + """ + + def __init__( + self, + *children: Widget, + collapsed: bool = False, + name: str | None = None, + id: str | None = None, + classes: str | None = None, + disabled: bool = False, + markup: bool = True, + ) -> None: + """Initialize collapsible sidebar.""" + super().__init__(name=name, id=id, classes=classes, disabled=disabled, markup=markup) + self._collapsed: bool = collapsed + self._children: tuple[Widget, ...] = children + + @override + def compose(self) -> ComposeResult: + """Compose sidebar with toggle and content.""" + yield Static("โ˜ฐ", classes="sidebar-toggle") + with VerticalScroll(classes="sidebar-content"): + yield from self._children + + def on_mount(self) -> None: + """Apply initial collapsed state.""" + if self._collapsed: + cast(Widget, self).add_class("collapsed") + + def on_click(self) -> None: + """Toggle sidebar when clicked.""" + self.toggle() + + def toggle(self) -> None: + """Toggle sidebar collapsed state.""" + self._collapsed = not self._collapsed + widget = cast(Widget, self) + if self._collapsed: + widget.add_class("collapsed") + else: + widget.remove_class("collapsed") + + def expand_sidebar(self) -> None: + """Expand sidebar.""" + if self._collapsed: + self.toggle() + + def collapse_sidebar(self) -> None: + """Collapse sidebar.""" + if not self._collapsed: + self.toggle() + + +class TabularLayout(Container): + """Optimized layout for data tables with optional sidebar.""" + + DEFAULT_CSS: str = """ + TabularLayout { + layout: horizontal; + height: 100%; + } + + TabularLayout .main-content { + width: 1fr; + height: 100%; + layout: vertical; + } + + TabularLayout .table-container { + height: 1fr; + overflow: auto; + border: solid $border; + background: $surface; + } + + TabularLayout .table-header { + dock: top; + height: 3; + background: $primary; + color: $text; + padding: 1; + } + + TabularLayout .table-footer { + dock: bottom; + height: 3; + background: $surface-lighten-1; + padding: 1; + border-top: solid $border; + } + """ + + def __init__( + self, + table_widget: Widget, + header_content: Widget | None = None, + footer_content: Widget | None = None, + sidebar_content: Widget | None = None, + name: str | None = None, + id: str | None = None, + classes: str | None = None, + disabled: bool = False, + markup: bool = True, + ) -> None: + """Initialize tabular layout.""" + super().__init__(name=name, id=id, classes=classes, disabled=disabled, markup=markup) + self.table_widget: Widget = table_widget + self.header_content: Widget | None = header_content + self.footer_content: Widget | None = footer_content + self.sidebar_content: Widget | None = sidebar_content + + @override + def compose(self) -> ComposeResult: + """Compose layout with optional sidebar.""" + if self.sidebar_content: + yield CollapsibleSidebar(self.sidebar_content) + + with Container(classes="main-content"): + if self.header_content: + yield Container(self.header_content, classes="table-header") + + yield Container(self.table_widget, classes="table-container") + + if self.footer_content: + yield Container(self.footer_content, classes="table-footer") + + +class CardLayout(ResponsiveGrid): + """Grid layout optimized for card-based content.""" + + DEFAULT_CSS: str = """ + CardLayout { + grid-gutter: 2; + padding: 2; + } + + CardLayout .card { + background: $surface; + border: solid $border; + border-radius: 1; + padding: 2; + height: auto; + min-height: 10; + } + + CardLayout .card:hover { + border: solid $accent; + background: $surface-lighten-1; + } + + CardLayout .card:focus { + border: solid $primary; + } + + CardLayout .card-header { + dock: top; + height: 3; + background: $primary-lighten-1; + color: $text; + padding: 1; + margin: -2 -2 1 -2; + border-radius: 1 1 0 0; + } + + CardLayout .card-content { + height: 1fr; + overflow: auto; + } + + CardLayout .card-footer { + dock: bottom; + height: 3; + background: $surface-darken-1; + padding: 1; + margin: 1 -2 -2 -2; + border-radius: 0 0 1 1; + } + """ + + def __init__( + self, + name: str | None = None, + id: str | None = None, + classes: str | None = None, + disabled: bool = False, + markup: bool = True, + ) -> None: + """Initialize card layout with default settings for cards.""" + # Default to auto-fit cards with minimum width + super().__init__(auto_fit=True, name=name, id=id, classes=classes, disabled=disabled, markup=markup) + + +class SplitPane(Container): + """Resizable split pane layout.""" + + DEFAULT_CSS: str = """ + SplitPane { + layout: horizontal; + height: 100%; + } + + SplitPane.vertical { + layout: vertical; + } + + SplitPane .left-pane, + SplitPane .top-pane { + width: 50%; + height: 50%; + background: $surface; + border-right: solid $border; + border-bottom: solid $border; + } + + SplitPane .right-pane, + SplitPane .bottom-pane { + width: 50%; + height: 50%; + background: $surface; + } + + SplitPane .splitter { + width: 1; + height: 1; + background: $border; + } + + SplitPane.vertical .splitter { + width: 100%; + height: 1; + } + """ + + def __init__( + self, + left_content: Widget, + right_content: Widget, + vertical: bool = False, + split_ratio: float = 0.5, + name: str | None = None, + id: str | None = None, + classes: str | None = None, + disabled: bool = False, + markup: bool = True, + ) -> None: + """Initialize split pane.""" + super().__init__(name=name, id=id, classes=classes, disabled=disabled, markup=markup) + self._left_content: Widget = left_content + self._right_content: Widget = right_content + self._vertical: bool = vertical + self._split_ratio: float = split_ratio + + @override + def compose(self) -> ComposeResult: + """Compose split pane layout.""" + if self._vertical: + cast(Widget, self).add_class("vertical") + + pane_classes = ("top-pane", "bottom-pane") if self._vertical else ("left-pane", "right-pane") + + yield Container(self._left_content, classes=pane_classes[0]) + yield Static("", classes="splitter") + yield Container(self._right_content, classes=pane_classes[1]) + + def on_mount(self) -> None: + """Apply split ratio.""" + widget = cast(Widget, self) + if self._vertical: + widget.query_one(".top-pane").styles.height = f"{self._split_ratio * 100}%" + widget.query_one(".bottom-pane").styles.height = f"{(1 - self._split_ratio) * 100}%" + else: + widget.query_one(".left-pane").styles.width = f"{self._split_ratio * 100}%" + widget.query_one(".right-pane").styles.width = f"{(1 - self._split_ratio) * 100}%" + + """Comprehensive theming system for TUI applications with WCAG AA accessibility compliance.""" @@ -9584,1161 +7767,6 @@ def configure_prefect(settings: Settings) -> None: _prefect_settings_stack = new_stack - -"""Application settings and configuration.""" - -from functools import lru_cache -from typing import Annotated, Literal - -from prefect.variables import Variable -from pydantic import Field, HttpUrl, model_validator -from pydantic_settings import BaseSettings, SettingsConfigDict - - -class Settings(BaseSettings): - """Application settings.""" - - model_config = SettingsConfigDict( - env_file=".env", - env_file_encoding="utf-8", - case_sensitive=False, - extra="ignore", # Ignore extra environment variables - ) - - # API Keys - firecrawl_api_key: str | None = None - openwebui_api_key: str | None = None - weaviate_api_key: str | None = None - r2r_api_key: str | None = None - - # Endpoints - llm_endpoint: HttpUrl = HttpUrl("http://llm.lab") - weaviate_endpoint: HttpUrl = HttpUrl("http://weaviate.yo") - openwebui_endpoint: HttpUrl = HttpUrl("http://chat.lab") # This will be the API URL - firecrawl_endpoint: HttpUrl = HttpUrl("http://crawl.lab:30002") - r2r_endpoint: HttpUrl | None = Field(default=None, alias="r2r_api_url") - - # Model Configuration - embedding_model: str = "ollama/bge-m3:latest" - embedding_dimension: int = 1024 - - # Ingestion Settings - default_batch_size: Annotated[int, Field(gt=0, le=500)] = 50 - max_file_size: int = 1_000_000 - max_crawl_depth: Annotated[int, Field(ge=1, le=20)] = 5 - max_crawl_pages: Annotated[int, Field(ge=1, le=1000)] = 100 - - # Storage Settings - default_storage_backend: Literal["weaviate", "open_webui", "r2r"] = "weaviate" - default_collection_prefix: str = "docs" - - # Prefect Settings - prefect_api_url: HttpUrl | None = None - prefect_api_key: str | None = None - prefect_work_pool: str = "default" - - # Scheduling Defaults - default_schedule_interval: Annotated[int, Field(ge=1, le=10080)] = 60 # Max 1 week - - # Performance Settings - max_concurrent_tasks: Annotated[int, Field(ge=1, le=20)] = 5 - request_timeout: Annotated[int, Field(ge=10, le=300)] = 60 - - # Logging - log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR"] = "INFO" - - def get_storage_endpoint(self, backend: str) -> HttpUrl: - """ - Get endpoint for storage backend. - - Args: - backend: Storage backend name - - Returns: - Endpoint URL - - Raises: - ValueError: If backend is unknown or R2R endpoint not configured - """ - endpoints = { - "weaviate": self.weaviate_endpoint, - "open_webui": self.openwebui_endpoint, - } - - if backend in endpoints: - return endpoints[backend] - elif backend == "r2r": - if not self.r2r_endpoint: - raise ValueError( - "R2R_API_URL must be set in environment variables. " - "This should have been caught during settings validation." - ) - return self.r2r_endpoint - else: - raise ValueError(f"Unknown backend: {backend}. Supported: weaviate, open_webui, r2r") - - def get_api_key(self, service: str) -> str | None: - """ - Get API key for service. - - Args: - service: Service name - - Returns: - API key or None - """ - service_map = { - "firecrawl": self.firecrawl_api_key, - "openwebui": self.openwebui_api_key, - "weaviate": self.weaviate_api_key, - "r2r": self.r2r_api_key, - } - return service_map.get(service) - - @model_validator(mode="after") - def validate_backend_configuration(self) -> "Settings": - """Validate that required configuration is present for the default backend.""" - backend = self.default_storage_backend - - # Validate R2R backend configuration - if backend == "r2r" and not self.r2r_endpoint: - raise ValueError( - "R2R_API_URL must be set in environment variables when using R2R as default backend" - ) - - # Validate API key requirements (optional warning for missing keys) - required_keys = { - "weaviate": ("WEAVIATE_API_KEY", self.weaviate_api_key), - "open_webui": ("OPENWEBUI_API_KEY", self.openwebui_api_key), - "r2r": ("R2R_API_KEY", self.r2r_api_key), - } - - if backend in required_keys: - key_name, key_value = required_keys[backend] - if not key_value: - import warnings - warnings.warn( - f"{key_name} not set - authentication may fail for {backend} backend", - UserWarning, - stacklevel=2 - ) - - return self - - -@lru_cache -def get_settings() -> Settings: - """ - Get cached settings instance. - - Returns: - Settings instance - """ - return Settings() - - -class PrefectVariableConfig: - """Helper class for managing Prefect variables with fallbacks to settings.""" - - def __init__(self) -> None: - self._settings = get_settings() - self._variable_names = [ - "default_batch_size", "max_file_size", "max_crawl_depth", "max_crawl_pages", - "default_storage_backend", "default_collection_prefix", "max_concurrent_tasks", - "request_timeout", "default_schedule_interval" - ] - - def _get_fallback_value(self, name: str, default_value: object = None) -> object: - """Get fallback value from settings or default.""" - return default_value or getattr(self._settings, name, default_value) - - def get_with_fallback(self, name: str, default_value: str | int | float | None = None) -> str | int | float | None: - """Get variable value with fallback synchronously.""" - fallback = self._get_fallback_value(name, default_value) - # Ensure fallback is a type that Variable expects - variable_fallback = str(fallback) if fallback is not None else None - try: - result = Variable.get(name, default=variable_fallback) - # Variable can return various types, convert to our expected types - if isinstance(result, (str, int, float)): - return result - elif result is None: - return None - else: - # Convert other types to string - return str(result) - except Exception: - # Return fallback with proper type - if isinstance(fallback, (str, int, float)) or fallback is None: - return fallback - return str(fallback) if fallback is not None else None - - async def get_with_fallback_async(self, name: str, default_value: str | int | float | None = None) -> str | int | float | None: - """Get variable value with fallback asynchronously.""" - fallback = self._get_fallback_value(name, default_value) - variable_fallback = str(fallback) if fallback is not None else None - try: - result = await Variable.aget(name, default=variable_fallback) - # Variable can return various types, convert to our expected types - if isinstance(result, (str, int, float)): - return result - elif result is None: - return None - else: - # Convert other types to string - return str(result) - except Exception: - # Return fallback with proper type - if isinstance(fallback, (str, int, float)) or fallback is None: - return fallback - return str(fallback) if fallback is not None else None - - def get_ingestion_config(self) -> dict[str, str | int | float | None]: - """Get all ingestion-related configuration variables synchronously.""" - return {name: self.get_with_fallback(name) for name in self._variable_names} - - async def get_ingestion_config_async(self) -> dict[str, str | int | float | None]: - """Get all ingestion-related configuration variables asynchronously.""" - result = {} - for name in self._variable_names: - result[name] = await self.get_with_fallback_async(name) - return result - - -@lru_cache -def get_prefect_config() -> PrefectVariableConfig: - """Get cached Prefect variable configuration helper.""" - return PrefectVariableConfig() - - - -"""Core data models with strict typing.""" - -from datetime import UTC, datetime -from enum import Enum -from typing import Annotated, TypedDict -from uuid import UUID, uuid4 - -from prefect.blocks.core import Block -from pydantic import BaseModel, Field, HttpUrl, SecretStr - - -class IngestionStatus(str, Enum): - """Status of an ingestion job.""" - - PENDING = "pending" - IN_PROGRESS = "in_progress" - COMPLETED = "completed" - PARTIAL = "partial" # Some documents succeeded, some failed - FAILED = "failed" - CANCELLED = "cancelled" - - -class StorageBackend(str, Enum): - """Available storage backends.""" - - WEAVIATE = "weaviate" - OPEN_WEBUI = "open_webui" - R2R = "r2r" - - -class IngestionSource(str, Enum): - """Types of ingestion sources.""" - - WEB = "web" - REPOSITORY = "repository" - DOCUMENTATION = "documentation" - - -class VectorConfig(BaseModel): - """Configuration for vectorization.""" - - model: str = Field(default="ollama/bge-m3:latest") - embedding_endpoint: HttpUrl = Field(default=HttpUrl("http://llm.lab")) - dimension: int = Field(default=1024) - batch_size: Annotated[int, Field(gt=0, le=1000)] = 100 - - -class StorageConfig(Block): - """Configuration for storage backend.""" - - _block_type_name = "Storage Configuration" - _block_type_slug = "storage-config" - _description = "Configures storage backend connections and settings for document ingestion" - - backend: StorageBackend - endpoint: HttpUrl - api_key: SecretStr | None = Field(default=None) - collection_name: str = Field(default="documents") - batch_size: Annotated[int, Field(gt=0, le=1000)] = 100 - - -class FirecrawlConfig(Block): - """Configuration for Firecrawl ingestion (operational parameters only).""" - - _block_type_name = "Firecrawl Configuration" - _block_type_slug = "firecrawl-config" - _description = "Configures Firecrawl web scraping and crawling parameters" - - formats: list[str] = Field(default_factory=lambda: ["markdown", "html"]) - max_depth: Annotated[int, Field(ge=1, le=20)] = 5 - limit: Annotated[int, Field(ge=1, le=1000)] = 100 - only_main_content: bool = Field(default=True) - include_subdomains: bool = Field(default=False) - - -class RepomixConfig(Block): - """Configuration for Repomix ingestion.""" - - _block_type_name = "Repomix Configuration" - _block_type_slug = "repomix-config" - _description = "Configures repository ingestion patterns and file processing settings" - - include_patterns: list[str] = Field( - default_factory=lambda: ["*.py", "*.js", "*.ts", "*.md", "*.yaml", "*.json"] - ) - exclude_patterns: list[str] = Field( - default_factory=lambda: ["**/node_modules/**", "**/__pycache__/**", "**/.git/**"] - ) - max_file_size: int = Field(default=1_000_000) # 1MB - respect_gitignore: bool = Field(default=True) - - -class R2RConfig(Block): - """Configuration for R2R ingestion.""" - - _block_type_name = "R2R Configuration" - _block_type_slug = "r2r-config" - _description = "Configures R2R-specific ingestion settings including chunking and graph enrichment" - - chunk_size: Annotated[int, Field(ge=100, le=8192)] = 1000 - chunk_overlap: Annotated[int, Field(ge=0, le=1000)] = 200 - enable_graph_enrichment: bool = Field(default=False) - graph_creation_settings: dict[str, object] | None = Field(default=None) - - -class DocumentMetadataRequired(TypedDict): - """Required metadata fields for a document.""" - source_url: str - timestamp: datetime - content_type: str - word_count: int - char_count: int - - -class DocumentMetadata(DocumentMetadataRequired, total=False): - """Rich metadata for a document with R2R-compatible fields.""" - - # Basic optional fields - title: str | None - description: str | None - - # Content categorization - tags: list[str] - category: str - section: str - language: str - - # Authorship and source info - author: str - domain: str - site_name: str - - # Document structure - heading_hierarchy: list[str] - section_depth: int - has_code_blocks: bool - has_images: bool - has_links: bool - - # Processing metadata - extraction_method: str - crawl_depth: int - last_modified: datetime | None - - # Content quality indicators - readability_score: float | None - completeness_score: float | None - - # Repository-specific fields - file_path: str | None - repository_name: str | None - branch_name: str | None - commit_hash: str | None - programming_language: str | None - - # Custom business metadata - importance_score: float | None - review_status: str | None - assigned_team: str | None - - -class Document(BaseModel): - """Represents a single document.""" - - id: UUID = Field(default_factory=uuid4) - content: str - metadata: DocumentMetadata - vector: list[float] | None = Field(default=None) - score: float | None = Field(default=None) - source: IngestionSource - collection: str = Field(default="documents") - - -class IngestionJob(BaseModel): - """Represents an ingestion job.""" - - id: UUID = Field(default_factory=uuid4) - source_type: IngestionSource - source_url: HttpUrl | str - status: IngestionStatus = Field(default=IngestionStatus.PENDING) - created_at: datetime = Field(default_factory=lambda: datetime.now(UTC)) - updated_at: datetime = Field(default_factory=lambda: datetime.now(UTC)) - completed_at: datetime | None = Field(default=None) - error_message: str | None = Field(default=None) - document_count: int = Field(default=0) - storage_backend: StorageBackend - - -class IngestionResult(BaseModel): - """Result of an ingestion operation.""" - - job_id: UUID - status: IngestionStatus - documents_processed: int - documents_failed: int - duration_seconds: float - error_messages: list[str] = Field(default_factory=list) - - - -"""Prefect flow for ingestion pipeline.""" - -from __future__ import annotations - -from collections.abc import Callable -from datetime import UTC, datetime -from typing import TYPE_CHECKING, Literal, TypeAlias, assert_never, cast - -from prefect import flow, get_run_logger, task -from prefect.blocks.core import Block -from prefect.variables import Variable -from pydantic.types import SecretStr - -from ..config.settings import Settings -from ..core.exceptions import IngestionError -from ..core.models import ( - Document, - FirecrawlConfig, - IngestionJob, - IngestionResult, - IngestionSource, - IngestionStatus, - RepomixConfig, - StorageBackend, - StorageConfig, -) -from ..ingestors import BaseIngestor, FirecrawlIngestor, FirecrawlPage, RepomixIngestor -from ..storage import OpenWebUIStorage, WeaviateStorage -from ..storage import R2RStorage as RuntimeR2RStorage -from ..storage.base import BaseStorage -from ..utils.metadata_tagger import MetadataTagger - -SourceTypeLiteral = Literal["web", "repository", "documentation"] -StorageBackendLiteral = Literal["weaviate", "open_webui", "r2r"] -SourceTypeLike: TypeAlias = IngestionSource | SourceTypeLiteral -StorageBackendLike: TypeAlias = StorageBackend | StorageBackendLiteral - - -def _safe_cache_key(prefix: str, params: dict[str, object], key: str) -> str: - """Create a type-safe cache key from task parameters.""" - value = params.get(key, "") - return f"{prefix}_{hash(str(value))}" - - -if TYPE_CHECKING: - from ..storage.r2r.storage import R2RStorage as R2RStorageType -else: - R2RStorageType = BaseStorage - - -@task(name="validate_source", retries=2, retry_delay_seconds=10, tags=["validation"]) -async def validate_source_task(source_url: str, source_type: IngestionSource) -> bool: - """ - Validate that a source is accessible. - - Args: - source_url: URL or path to source - source_type: Type of source - - Returns: - True if valid - """ - if source_type == IngestionSource.WEB: - ingestor = FirecrawlIngestor() - elif source_type == IngestionSource.REPOSITORY: - ingestor = RepomixIngestor() - else: - raise ValueError(f"Unsupported source type: {source_type}") - - result = await ingestor.validate_source(source_url) - return bool(result) - - -@task(name="initialize_storage", retries=3, retry_delay_seconds=5, tags=["storage"]) -async def initialize_storage_task(config: StorageConfig | str) -> BaseStorage: - """ - Initialize storage backend. - - Args: - config: Storage configuration block or block name - - Returns: - Initialized storage adapter - """ - # Load block if string provided - if isinstance(config, str): - # Use Block.aload with type slug for better type inference - loaded_block = await Block.aload(f"storage-config/{config}") - config = cast(StorageConfig, loaded_block) - - if config.backend == StorageBackend.WEAVIATE: - storage = WeaviateStorage(config) - elif config.backend == StorageBackend.OPEN_WEBUI: - storage = OpenWebUIStorage(config) - elif config.backend == StorageBackend.R2R: - if RuntimeR2RStorage is None: - raise ValueError("R2R storage not available. Check dependencies.") - storage = RuntimeR2RStorage(config) - else: - raise ValueError(f"Unsupported backend: {config.backend}") - - await storage.initialize() - return storage - - -@task(name="map_firecrawl_site", retries=2, retry_delay_seconds=15, tags=["firecrawl", "map"], - cache_key_fn=lambda ctx, p: _safe_cache_key("firecrawl_map", p, "source_url")) -async def map_firecrawl_site_task(source_url: str, config: FirecrawlConfig | str) -> list[str]: - """Map a site using Firecrawl and return discovered URLs.""" - # Load block if string provided - if isinstance(config, str): - # Use Block.aload with type slug for better type inference - loaded_block = await Block.aload(f"firecrawl-config/{config}") - config = cast(FirecrawlConfig, loaded_block) - - ingestor = FirecrawlIngestor(config) - mapped = await ingestor.map_site(source_url) - return mapped or [source_url] - - -@task(name="filter_existing_documents", retries=1, retry_delay_seconds=5, tags=["dedup"], - cache_key_fn=lambda ctx, p: _safe_cache_key("filter_docs", p, "urls")) # Cache based on URL list -async def filter_existing_documents_task( - urls: list[str], - storage_client: BaseStorage, - stale_after_days: int = 30, - *, - collection_name: str | None = None, -) -> list[str]: - """Filter URLs to only those that need scraping (missing or stale in storage).""" - logger = get_run_logger() - eligible: list[str] = [] - - for url in urls: - document_id = str(FirecrawlIngestor.compute_document_id(url)) - exists = await storage_client.check_exists( - document_id, - collection_name=collection_name, - stale_after_days=stale_after_days - ) - - if not exists: - eligible.append(url) - - skipped = len(urls) - len(eligible) - if skipped > 0: - logger.info("Skipping %s up-to-date documents in %s", skipped, storage_client.display_name) - - return eligible - - -@task( - name="scrape_firecrawl_batch", retries=2, retry_delay_seconds=20, tags=["firecrawl", "scrape"] -) -async def scrape_firecrawl_batch_task( - batch_urls: list[str], config: FirecrawlConfig -) -> list[FirecrawlPage]: - """Scrape a batch of URLs via Firecrawl.""" - ingestor = FirecrawlIngestor(config) - result: list[FirecrawlPage] = await ingestor.scrape_pages(batch_urls) - return result - - -@task(name="annotate_firecrawl_metadata", retries=1, retry_delay_seconds=10, tags=["metadata"]) -async def annotate_firecrawl_metadata_task( - pages: list[FirecrawlPage], job: IngestionJob -) -> list[Document]: - """Annotate scraped pages with standardized metadata.""" - if not pages: - return [] - - ingestor = FirecrawlIngestor() - documents = [ingestor.create_document(page, job) for page in pages] - - try: - from ..config import get_settings - - settings = get_settings() - async with MetadataTagger(llm_endpoint=str(settings.llm_endpoint)) as tagger: - tagged_documents: list[Document] = await tagger.tag_batch(documents) - return tagged_documents - except IngestionError as exc: # pragma: no cover - logging side effect - logger = get_run_logger() - logger.warning("Metadata tagging failed: %s", exc) - return documents - except Exception as exc: # pragma: no cover - defensive - logger = get_run_logger() - logger.warning("Metadata tagging unavailable, using base metadata: %s", exc) - return documents - - -@task(name="upsert_r2r_documents", retries=2, retry_delay_seconds=20, tags=["storage", "r2r"]) -async def upsert_r2r_documents_task( - storage_client: R2RStorageType, - documents: list[Document], - collection_name: str | None, -) -> tuple[int, int]: - """Upsert documents into R2R storage.""" - if not documents: - return 0, 0 - - stored_ids: list[str] = await storage_client.store_batch( - documents, collection_name=collection_name - ) - processed = len(stored_ids) - failed = len(documents) - processed - - if failed: - logger = get_run_logger() - logger.warning("Failed to upsert %s documents to R2R", failed) - - return processed, failed - - -@task(name="ingest_documents", retries=2, retry_delay_seconds=30, tags=["ingestion"]) -async def ingest_documents_task( - job: IngestionJob, - collection_name: str | None = None, - batch_size: int | None = None, - storage_client: BaseStorage | None = None, - storage_block_name: str | None = None, - ingestor_config_block_name: str | None = None, - progress_callback: Callable[[int, str], None] | None = None, -) -> tuple[int, int]: - """ - Ingest documents from source with optional pre-initialized storage client. - - Args: - job: Ingestion job configuration - collection_name: Target collection name - batch_size: Number of documents per batch (uses Variable if None) - storage_client: Optional pre-initialized storage client - storage_block_name: Optional storage block name to load - ingestor_config_block_name: Optional ingestor config block name to load - progress_callback: Optional callback for progress updates - - Returns: - Tuple of (processed_count, failed_count) - """ - if progress_callback: - progress_callback(35, "Creating ingestor and storage clients...") - - # Use Variable for batch size if not provided - if batch_size is None: - try: - batch_size_var = await Variable.aget("default_batch_size", default="50") - # Convert Variable result to int, handling various types - if isinstance(batch_size_var, int): - batch_size = batch_size_var - elif isinstance(batch_size_var, (str, float)): - batch_size = int(float(str(batch_size_var))) - else: - batch_size = 50 - except Exception: - batch_size = 50 - - ingestor = await _create_ingestor(job, ingestor_config_block_name) - storage = storage_client or await _create_storage(job, collection_name, storage_block_name) - - if progress_callback: - progress_callback(40, "Starting document processing...") - - return await _process_documents(ingestor, storage, job, batch_size, collection_name, progress_callback) - - -async def _create_ingestor(job: IngestionJob, config_block_name: str | None = None) -> BaseIngestor: - """Create appropriate ingestor based on job source type.""" - if job.source_type == IngestionSource.WEB: - if config_block_name: - # Use Block.aload with type slug for better type inference - loaded_block = await Block.aload(f"firecrawl-config/{config_block_name}") - config = cast(FirecrawlConfig, loaded_block) - else: - # Fallback to default configuration - config = FirecrawlConfig() - return FirecrawlIngestor(config) - elif job.source_type == IngestionSource.REPOSITORY: - if config_block_name: - # Use Block.aload with type slug for better type inference - loaded_block = await Block.aload(f"repomix-config/{config_block_name}") - config = cast(RepomixConfig, loaded_block) - else: - # Fallback to default configuration - config = RepomixConfig() - return RepomixIngestor(config) - else: - raise ValueError(f"Unsupported source: {job.source_type}") - - -async def _create_storage(job: IngestionJob, collection_name: str | None, storage_block_name: str | None = None) -> BaseStorage: - """Create and initialize storage client.""" - if collection_name is None: - # Use variable for default collection prefix - prefix = await Variable.aget("default_collection_prefix", default="docs") - collection_name = f"{prefix}_{job.source_type.value}" - - if storage_block_name: - # Load storage config from block - loaded_block = await Block.aload(f"storage-config/{storage_block_name}") - storage_config = cast(StorageConfig, loaded_block) - # Override collection name if provided - storage_config.collection_name = collection_name - else: - # Fallback to building config from settings - from ..config import get_settings - settings = get_settings() - storage_config = _build_storage_config(job, settings, collection_name) - - storage = _instantiate_storage(job.storage_backend, storage_config) - await storage.initialize() - return storage - - -def _build_storage_config( - job: IngestionJob, settings: Settings, collection_name: str -) -> StorageConfig: - """Build storage configuration from job and settings.""" - storage_endpoints = { - StorageBackend.WEAVIATE: settings.weaviate_endpoint, - StorageBackend.OPEN_WEBUI: settings.openwebui_endpoint, - StorageBackend.R2R: settings.get_storage_endpoint("r2r"), - } - storage_api_keys: dict[StorageBackend, str | None] = { - StorageBackend.WEAVIATE: settings.get_api_key("weaviate"), - StorageBackend.OPEN_WEBUI: settings.get_api_key("openwebui"), - StorageBackend.R2R: None, # R2R is self-hosted, no API key needed - } - - api_key_raw: str | None = storage_api_keys[job.storage_backend] - api_key: SecretStr | None = SecretStr(api_key_raw) if api_key_raw is not None else None - - return StorageConfig( - backend=job.storage_backend, - endpoint=storage_endpoints[job.storage_backend], - api_key=api_key, - collection_name=collection_name, - ) - - -def _instantiate_storage(backend: StorageBackend, config: StorageConfig) -> BaseStorage: - """Instantiate storage based on backend type.""" - if backend == StorageBackend.WEAVIATE: - return WeaviateStorage(config) - elif backend == StorageBackend.OPEN_WEBUI: - return OpenWebUIStorage(config) - elif backend == StorageBackend.R2R: - if RuntimeR2RStorage is None: - raise ValueError("R2R storage not available. Check dependencies.") - return RuntimeR2RStorage(config) - - assert_never(backend) - - -def _chunk_urls(urls: list[str], chunk_size: int) -> list[list[str]]: - """Group URLs into fixed-size chunks for batch processing.""" - - if chunk_size <= 0: - raise ValueError("chunk_size must be greater than zero") - - return [urls[i : i + chunk_size] for i in range(0, len(urls), chunk_size)] - - -def _deduplicate_urls(urls: list[str]) -> list[str]: - """Return the URLs with order preserved and duplicates removed.""" - - seen: set[str] = set() - unique: list[str] = [] - for url in urls: - if url not in seen: - seen.add(url) - unique.append(url) - return unique - - -async def _process_documents( - ingestor: BaseIngestor, - storage: BaseStorage, - job: IngestionJob, - batch_size: int, - collection_name: str | None, - progress_callback: Callable[[int, str], None] | None = None, -) -> tuple[int, int]: - """Process documents in batches.""" - processed = 0 - failed = 0 - batch: list[Document] = [] - total_documents = 0 - batch_count = 0 - - if progress_callback: - progress_callback(45, "Ingesting documents from source...") - - # Use smart ingestion with deduplication if storage supports it - if hasattr(storage, 'check_exists'): - try: - # Try to use the smart ingestion method - document_generator = ingestor.ingest_with_dedup( - job, storage, collection_name=collection_name - ) - except Exception: - # Fall back to regular ingestion if smart method fails - document_generator = ingestor.ingest(job) - else: - document_generator = ingestor.ingest(job) - - async for document in document_generator: - batch.append(document) - total_documents += 1 - - if len(batch) >= batch_size: - batch_count += 1 - if progress_callback: - progress_callback( - 45 + min(35, (batch_count * 10)), - f"Processing batch {batch_count} ({total_documents} documents so far)..." - ) - - batch_processed, batch_failed = await _store_batch(storage, batch, collection_name) - processed += batch_processed - failed += batch_failed - batch = [] - - # Process remaining batch - if batch: - batch_count += 1 - if progress_callback: - progress_callback(80, f"Processing final batch ({total_documents} total documents)...") - - batch_processed, batch_failed = await _store_batch(storage, batch, collection_name) - processed += batch_processed - failed += batch_failed - - if progress_callback: - progress_callback(85, f"Completed processing {total_documents} documents") - - return processed, failed - - -async def _store_batch( - storage: BaseStorage, - batch: list[Document], - collection_name: str | None, -) -> tuple[int, int]: - """Store a batch of documents and return processed/failed counts.""" - try: - # Apply metadata tagging for backends that benefit from it - processed_batch = batch - if hasattr(storage, "config") and storage.config.backend in ( - StorageBackend.R2R, - StorageBackend.WEAVIATE, - ): - try: - from ..config import get_settings - - settings = get_settings() - async with MetadataTagger(llm_endpoint=str(settings.llm_endpoint)) as tagger: - processed_batch = await tagger.tag_batch(batch) - except Exception as exc: - print(f"Metadata tagging failed, using original documents: {exc}") - processed_batch = batch - - stored_ids = await storage.store_batch(processed_batch, collection_name=collection_name) - processed_count = len(stored_ids) - failed_count = len(processed_batch) - processed_count - - batch_type = ( - "final" if len(processed_batch) < 50 else "" - ) # Assume standard batch size is 50 - print(f"Successfully stored {processed_count} documents in {batch_type} batch".strip()) - - return processed_count, failed_count - except Exception as e: - batch_type = "Final" if len(batch) < 50 else "Batch" - print(f"{batch_type} storage failed: {e}") - return 0, len(batch) - - -@flow( - name="firecrawl_to_r2r", - description="Ingest Firecrawl pages into R2R with metadata annotation", - persist_result=False, - log_prints=True, -) -async def firecrawl_to_r2r_flow( - job: IngestionJob, collection_name: str | None = None, progress_callback: Callable[[int, str], None] | None = None -) -> tuple[int, int]: - """Specialized flow for Firecrawl ingestion into R2R.""" - logger = get_run_logger() - from ..config import get_settings - - if progress_callback: - progress_callback(35, "Initializing Firecrawl and R2R storage...") - - settings = get_settings() - firecrawl_config = FirecrawlConfig() - resolved_collection = collection_name or f"docs_{job.source_type.value}" - - storage_config = _build_storage_config(job, settings, resolved_collection) - storage_client = await initialize_storage_task(storage_config) - - if RuntimeR2RStorage is None or not isinstance(storage_client, RuntimeR2RStorage): - raise IngestionError("Firecrawl to R2R flow requires an R2R storage backend") - - r2r_storage = cast("R2RStorageType", storage_client) - - if progress_callback: - progress_callback(45, "Checking for existing content before mapping...") - - # Smart mapping: try single URL first to avoid expensive map operation - base_url = str(job.source_url) - single_url_id = str(FirecrawlIngestor.compute_document_id(base_url)) - base_exists = await r2r_storage.check_exists( - single_url_id, collection_name=resolved_collection, stale_after_days=30 - ) - - if base_exists: - # Check if this is a recent single-page update - logger.info("Base URL %s exists and is fresh, skipping expensive mapping", base_url) - if progress_callback: - progress_callback(100, "Content is up to date, no processing needed") - return 0, 0 - - if progress_callback: - progress_callback(50, "Discovering pages with Firecrawl...") - - discovered_urls = await map_firecrawl_site_task(base_url, firecrawl_config) - unique_urls = _deduplicate_urls(discovered_urls) - logger.info("Discovered %s unique URLs from Firecrawl map", len(unique_urls)) - - if progress_callback: - progress_callback(60, f"Found {len(unique_urls)} pages, filtering existing content...") - - eligible_urls = await filter_existing_documents_task( - unique_urls, r2r_storage, collection_name=resolved_collection - ) - - if not eligible_urls: - logger.info("All Firecrawl pages are up to date for %s", job.source_url) - if progress_callback: - progress_callback(100, "All pages are up to date, no processing needed") - return 0, 0 - - if progress_callback: - progress_callback(70, f"Scraping {len(eligible_urls)} new/updated pages...") - - batch_size = min(settings.default_batch_size, firecrawl_config.limit) - url_batches = _chunk_urls(eligible_urls, batch_size) - logger.info("Scraping %s batches of Firecrawl pages", len(url_batches)) - - # Use asyncio.gather for concurrent scraping - import asyncio - scrape_tasks = [ - scrape_firecrawl_batch_task(batch, firecrawl_config) - for batch in url_batches - ] - batch_results = await asyncio.gather(*scrape_tasks) - - scraped_pages: list[FirecrawlPage] = [] - for batch_pages in batch_results: - scraped_pages.extend(batch_pages) - - if progress_callback: - progress_callback(80, f"Processing {len(scraped_pages)} scraped pages...") - - documents = await annotate_firecrawl_metadata_task(scraped_pages, job) - - if not documents: - logger.warning("No documents produced after scraping for %s", job.source_url) - return 0, len(eligible_urls) - - if progress_callback: - progress_callback(90, f"Storing {len(documents)} documents in R2R...") - - processed, failed = await upsert_r2r_documents_task(r2r_storage, documents, resolved_collection) - - logger.info("Upserted %s documents into R2R (%s failed)", processed, failed) - - return processed, failed - - -@task(name="update_job_status", tags=["tracking"]) -async def update_job_status_task( - job: IngestionJob, - status: IngestionStatus, - processed: int = 0, - _failed: int = 0, - error: str | None = None, -) -> IngestionJob: - """ - Update job status. - - Args: - job: Ingestion job - status: New status - processed: Documents processed - _failed: Documents failed (currently unused) - error: Error message if any - - Returns: - Updated job - """ - job.status = status - job.updated_at = datetime.now(UTC) - job.document_count = processed - - if status == IngestionStatus.COMPLETED: - job.completed_at = datetime.now(UTC) - - if error: - job.error_message = error - - return job - - -@flow( - name="ingestion_pipeline", - description="Main ingestion pipeline for documents", - retries=1, - retry_delay_seconds=60, - persist_result=True, - log_prints=True, -) -async def create_ingestion_flow( - source_url: str, - source_type: SourceTypeLike, - storage_backend: StorageBackendLike = StorageBackend.WEAVIATE, - collection_name: str | None = None, - validate_first: bool = True, - progress_callback: Callable[[int, str], None] | None = None, -) -> IngestionResult: - """ - Main ingestion flow. - - Args: - source_url: URL or path to source - source_type: Type of source - storage_backend: Storage backend to use - validate_first: Whether to validate source first - progress_callback: Optional callback for progress updates - - Returns: - Ingestion result - """ - print(f"Starting ingestion from {source_url}") - - source_enum = IngestionSource(source_type) - backend_enum = StorageBackend(storage_backend) - - # Create job - job = IngestionJob( - source_url=source_url, - source_type=source_enum, - storage_backend=backend_enum, - status=IngestionStatus.PENDING, - ) - - start_time = datetime.now(UTC) - error_messages: list[str] = [] - processed = 0 - failed = 0 - - try: - # Validate source if requested - if validate_first: - if progress_callback: - progress_callback(10, "Validating source...") - print("Validating source...") - is_valid = await validate_source_task(source_url, job.source_type) - - if not is_valid: - raise IngestionError(f"Source validation failed: {source_url}") - - # Update status to in progress - if progress_callback: - progress_callback(20, "Initializing storage...") - job = await update_job_status_task(job, IngestionStatus.IN_PROGRESS) - - # Run ingestion - if progress_callback: - progress_callback(30, "Starting document ingestion...") - print("Ingesting documents...") - if job.source_type == IngestionSource.WEB and job.storage_backend == StorageBackend.R2R: - processed, failed = await firecrawl_to_r2r_flow(job, collection_name, progress_callback=progress_callback) - else: - processed, failed = await ingest_documents_task(job, collection_name, progress_callback=progress_callback) - - if progress_callback: - progress_callback(90, "Finalizing ingestion...") - - # Update final status - if failed > 0: - error_messages.append(f"{failed} documents failed to process") - - # Set status based on results - if processed == 0 and failed > 0: - final_status = IngestionStatus.FAILED - elif failed > 0: - final_status = IngestionStatus.PARTIAL - else: - final_status = IngestionStatus.COMPLETED - - job = await update_job_status_task(job, final_status, processed=processed, _failed=failed) - - print(f"Ingestion completed: {processed} processed, {failed} failed") - - except Exception as e: - print(f"Ingestion failed: {e}") - error_messages.append(str(e)) - - # Don't reset counts - keep whatever was processed before the error - job = await update_job_status_task( - job, IngestionStatus.FAILED, processed=processed, _failed=failed, error=str(e) - ) - - # Calculate duration - duration = (datetime.now(UTC) - start_time).total_seconds() - - return IngestionResult( - job_id=job.id, - status=job.status, - documents_processed=processed, - documents_failed=failed, - duration_seconds=duration, - error_messages=error_messages, - ) - - """Scheduler for Prefect deployments.""" @@ -11539,1949 +8567,6 @@ class FirecrawlIngestor(BaseIngestor): await self.close() - -"""Base storage interface.""" - -from abc import ABC, abstractmethod -from collections.abc import AsyncGenerator - -from ..core.models import Document, StorageConfig - - -class BaseStorage(ABC): - """Abstract base class for storage adapters.""" - - config: StorageConfig - - def __init__(self, config: StorageConfig): - """ - Initialize storage adapter. - - Args: - config: Storage configuration - """ - self.config = config - - @property - def display_name(self) -> str: - """Human-readable name for UI display.""" - return self.__class__.__name__.replace("Storage", "") - - @abstractmethod - async def initialize(self) -> None: - """Initialize the storage backend and create collections if needed.""" - pass # pragma: no cover - - @abstractmethod - async def store(self, document: Document, *, collection_name: str | None = None) -> str: - """ - Store a single document. - - Args: - document: Document to store - - Returns: - Document ID - """ - pass # pragma: no cover - - @abstractmethod - async def store_batch( - self, documents: list[Document], *, collection_name: str | None = None - ) -> list[str]: - """ - Store multiple documents in batch. - - Args: - documents: List of documents to store - - Returns: - List of document IDs - """ - pass # pragma: no cover - - async def retrieve( - self, document_id: str, *, collection_name: str | None = None - ) -> Document | None: - """ - Retrieve a document by ID (if supported by backend). - - Args: - document_id: Document ID - - Returns: - Document or None if not found - - Raises: - NotImplementedError: If backend doesn't support retrieval - """ - raise NotImplementedError(f"{self.__class__.__name__} doesn't support document retrieval") - - async def check_exists( - self, document_id: str, *, collection_name: str | None = None, stale_after_days: int = 30 - ) -> bool: - """ - Check if a document exists and is not stale. - - Args: - document_id: Document ID to check - collection_name: Collection to check in - stale_after_days: Consider document stale after this many days - - Returns: - True if document exists and is not stale, False otherwise - """ - try: - document = await self.retrieve(document_id, collection_name=collection_name) - if document is None: - return False - - # Check staleness if timestamp is available - if "timestamp" in document.metadata: - from datetime import UTC, datetime, timedelta - timestamp_obj = document.metadata["timestamp"] - if isinstance(timestamp_obj, datetime): - timestamp = timestamp_obj - cutoff = datetime.now(UTC) - timedelta(days=stale_after_days) - return timestamp >= cutoff - - # If no timestamp, assume it exists and is valid - return True - except Exception: - # Backend doesn't support retrieval, assume doesn't exist - return False - - def search( - self, - query: str, - limit: int = 10, - threshold: float = 0.7, - *, - collection_name: str | None = None, - ) -> AsyncGenerator[Document, None]: - """ - Search for documents (if supported by backend). - - Args: - query: Search query - limit: Maximum number of results - threshold: Similarity threshold - - Yields: - Matching documents - - Raises: - NotImplementedError: If backend doesn't support search - """ - raise NotImplementedError(f"{self.__class__.__name__} doesn't support search") - - @abstractmethod - async def delete(self, document_id: str, *, collection_name: str | None = None) -> bool: - """ - Delete a document. - - Args: - document_id: Document ID - - Returns: - True if deleted successfully - """ - pass # pragma: no cover - - async def count(self, *, collection_name: str | None = None) -> int: - """ - Get total document count (if supported by backend). - - Returns: - Number of documents, 0 if not supported - """ - return 0 - - async def list_collections(self) -> list[str]: - """ - List available collections (if supported by backend). - - Returns: - List of collection names, empty list if not supported - """ - return [] - - async def describe_collections(self) -> list[dict[str, object]]: - """ - Describe available collections with metadata (if supported by backend). - - Returns: - List of collection metadata dictionaries, empty list if not supported - """ - return [] - - async def list_documents( - self, - limit: int = 100, - offset: int = 0, - *, - collection_name: str | None = None, - ) -> list[dict[str, object]]: - """ - List documents in the storage backend (if supported). - - Args: - limit: Maximum number of documents to return - offset: Number of documents to skip - collection_name: Collection to list documents from - - Returns: - List of document dictionaries with metadata - - Raises: - NotImplementedError: If backend doesn't support document listing - """ - raise NotImplementedError(f"{self.__class__.__name__} doesn't support document listing") - - async def close(self) -> None: - """ - Close storage connections and cleanup resources. - - Default implementation does nothing. - """ - # Default implementation - storage backends can override to cleanup connections - return None - - - -"""Open WebUI storage adapter.""" - -import asyncio -import logging -from typing import Final, TypedDict, cast - -import httpx -from typing_extensions import override - -from ..core.exceptions import StorageError -from ..core.models import Document, StorageConfig -from .base import BaseStorage - -LOGGER: Final[logging.Logger] = logging.getLogger(__name__) - - -class OpenWebUIStorage(BaseStorage): - """Storage adapter for Open WebUI knowledge endpoints.""" - - client: httpx.AsyncClient - _knowledge_cache: dict[str, str] - - def __init__(self, config: StorageConfig): - """ - Initialize Open WebUI storage. - - Args: - config: Storage configuration - """ - super().__init__(config) - - headers: dict[str, str] = {} - if config.api_key: - headers["Authorization"] = f"Bearer {config.api_key}" - - self.client = httpx.AsyncClient( - base_url=str(config.endpoint), - headers=headers, - timeout=30.0, - ) - self._knowledge_cache = {} - - @override - async def initialize(self) -> None: - """Initialize Open WebUI connection.""" - try: - if self.config.collection_name: - await self._get_knowledge_id( - self.config.collection_name, - create=True, - ) - - except httpx.ConnectError as e: - raise StorageError(f"Connection to OpenWebUI failed: {e}") from e - except httpx.HTTPStatusError as e: - raise StorageError(f"OpenWebUI returned error {e.response.status_code}: {e}") from e - except httpx.RequestError as e: - raise StorageError(f"Request to OpenWebUI failed: {e}") from e - except Exception as e: - raise StorageError(f"Failed to initialize Open WebUI: {e}") from e - - async def _create_collection(self, name: str) -> str: - """Create knowledge base in Open WebUI.""" - try: - response = await self.client.post( - "/api/v1/knowledge/create", - json={ - "name": name, - "description": "Documents ingested from various sources", - "data": {}, - "access_control": None, - }, - ) - response.raise_for_status() - result = response.json() - knowledge_id = result.get("id") - - if not knowledge_id or not isinstance(knowledge_id, str): - raise StorageError("Knowledge base creation failed: no ID returned") - - return str(knowledge_id) - - except httpx.ConnectError as e: - raise StorageError(f"Connection to OpenWebUI failed during creation: {e}") from e - except httpx.HTTPStatusError as e: - raise StorageError( - f"OpenWebUI returned error {e.response.status_code} during creation: {e}" - ) from e - except httpx.RequestError as e: - raise StorageError(f"Request to OpenWebUI failed during creation: {e}") from e - except Exception as e: - raise StorageError(f"Failed to create knowledge base: {e}") from e - - async def _fetch_knowledge_bases(self) -> list[dict[str, object]]: - """Return the list of knowledge bases from the API.""" - response = await self.client.get("/api/v1/knowledge/list") - response.raise_for_status() - data = response.json() - if not isinstance(data, list): - return [] - normalized: list[dict[str, object]] = [] - for item in data: - if isinstance(item, dict): - item_dict: dict[str, object] = item - normalized.append({str(k): v for k, v in item_dict.items()}) - return normalized - - async def _get_knowledge_id( - self, - name: str | None, - *, - create: bool, - ) -> str | None: - """Retrieve (and optionally create) a knowledge base identifier.""" - target_raw = name or self.config.collection_name - target = str(target_raw) if target_raw else "" - if not target: - raise StorageError("Knowledge base name is required") - - if cached := self._knowledge_cache.get(target): - return cached - - knowledge_bases = await self._fetch_knowledge_bases() - for kb in knowledge_bases: - if kb.get("name") == target: - kb_id = kb.get("id") - if isinstance(kb_id, str): - self._knowledge_cache[target] = kb_id - return kb_id - - if not create: - return None - - knowledge_id = await self._create_collection(target) - self._knowledge_cache[target] = knowledge_id - return knowledge_id - - @override - async def store(self, document: Document, *, collection_name: str | None = None) -> str: - """ - Store a document in Open WebUI as a file. - - Args: - document: Document to store - - Returns: - File ID - """ - try: - knowledge_id = await self._get_knowledge_id( - collection_name, - create=True, - ) - if not knowledge_id: - raise StorageError("Knowledge base not initialized") - - # Step 1: Upload document as file - # Use document title from metadata if available, otherwise fall back to ID - filename = document.metadata.get("title") or f"doc_{document.id}" - # Ensure filename has proper extension - if not filename.endswith(('.txt', '.md', '.pdf', '.doc', '.docx')): - filename = f"{filename}.txt" - files = {"file": (filename, document.content.encode(), "text/plain")} - response = await self.client.post( - "/api/v1/files/", - files=files, - params={"process": True, "process_in_background": False}, - ) - response.raise_for_status() - - file_data = response.json() - file_id = file_data.get("id") - - if not file_id or not isinstance(file_id, str): - raise StorageError("File upload failed: no file ID returned") - - # Step 2: Add file to knowledge base - response = await self.client.post( - f"/api/v1/knowledge/{knowledge_id}/file/add", json={"file_id": file_id} - ) - response.raise_for_status() - - return str(file_id) - - except httpx.ConnectError as e: - raise StorageError(f"Connection to OpenWebUI failed: {e}") from e - except httpx.HTTPStatusError as e: - raise StorageError(f"OpenWebUI returned error {e.response.status_code}: {e}") from e - except httpx.RequestError as e: - raise StorageError(f"Request to OpenWebUI failed: {e}") from e - except Exception as e: - raise StorageError(f"Failed to store document: {e}") from e - - @override - async def store_batch( - self, documents: list[Document], *, collection_name: str | None = None - ) -> list[str]: - """ - Store multiple documents as files in batch. - - Args: - documents: List of documents - - Returns: - List of file IDs - """ - try: - knowledge_id = await self._get_knowledge_id( - collection_name, - create=True, - ) - if not knowledge_id: - raise StorageError("Knowledge base not initialized") - - async def upload_and_attach(doc: Document) -> str: - # Use document title from metadata if available, otherwise fall back to ID - filename = doc.metadata.get("title") or f"doc_{doc.id}" - # Ensure filename has proper extension - if not filename.endswith(('.txt', '.md', '.pdf', '.doc', '.docx')): - filename = f"{filename}.txt" - files = {"file": (filename, doc.content.encode(), "text/plain")} - upload_response = await self.client.post( - "/api/v1/files/", - files=files, - params={"process": True, "process_in_background": False}, - ) - upload_response.raise_for_status() - - file_data = upload_response.json() - file_id = file_data.get("id") - - if not file_id or not isinstance(file_id, str): - raise StorageError( - f"File upload failed for document {doc.id}: no file ID returned" - ) - - attach_response = await self.client.post( - f"/api/v1/knowledge/{knowledge_id}/file/add", json={"file_id": file_id} - ) - attach_response.raise_for_status() - - return str(file_id) - - tasks = [upload_and_attach(doc) for doc in documents] - results = await asyncio.gather(*tasks, return_exceptions=True) - - file_ids: list[str] = [] - failures: list[str] = [] - - for index, result in enumerate(results): - doc = documents[index] - if isinstance(result, Exception): - failures.append(f"{doc.id}: {result}") - else: - file_ids.append(cast(str, result)) - - if failures: - LOGGER.warning( - "OpenWebUI partial batch failure for knowledge base %s: %s", - self.config.collection_name, - ", ".join(failures), - ) - - return file_ids - - except httpx.ConnectError as e: - raise StorageError(f"Connection to OpenWebUI failed during batch: {e}") from e - except httpx.HTTPStatusError as e: - raise StorageError( - f"OpenWebUI returned error {e.response.status_code} during batch: {e}" - ) from e - except httpx.RequestError as e: - raise StorageError(f"Request to OpenWebUI failed during batch: {e}") from e - except Exception as e: - raise StorageError(f"Failed to store batch: {e}") from e - - @override - async def retrieve( - self, document_id: str, *, collection_name: str | None = None - ) -> Document | None: - """ - OpenWebUI doesn't support document retrieval by ID. - - Args: - document_id: File ID (not supported) - collection_name: Collection name (not used) - - Returns: - Always None - retrieval not supported - """ - # OpenWebUI uses file-based storage without direct document retrieval - # This will cause the base check_exists method to return False, - # which means documents will always be re-scraped for OpenWebUI - raise NotImplementedError("OpenWebUI doesn't support document retrieval by ID") - - @override - async def delete(self, document_id: str, *, collection_name: str | None = None) -> bool: - """ - Remove a file from Open WebUI knowledge base. - - Args: - document_id: File ID to remove - - Returns: - True if removed successfully - """ - try: - knowledge_id = await self._get_knowledge_id( - collection_name, - create=False, - ) - if not knowledge_id: - return False - - # Remove file from knowledge base - response = await self.client.post( - f"/api/v1/knowledge/{knowledge_id}/file/remove", json={"file_id": document_id} - ) - response.raise_for_status() - - delete_response = await self.client.delete(f"/api/v1/files/{document_id}") - if delete_response.status_code == 404: - return True - delete_response.raise_for_status() - return True - - except httpx.ConnectError as exc: - LOGGER.error( - "Failed to reach OpenWebUI when deleting file %s", document_id, exc_info=exc - ) - return False - except httpx.HTTPStatusError as exc: - LOGGER.error( - "OpenWebUI returned status error %s when deleting file %s", - exc.response.status_code if exc.response else "unknown", - document_id, - exc_info=exc, - ) - return False - except httpx.RequestError as exc: - LOGGER.error("Request error deleting file %s from OpenWebUI", document_id, exc_info=exc) - return False - except Exception as exc: - LOGGER.error("Unexpected error deleting file %s", document_id, exc_info=exc) - return False - - async def list_collections(self) -> list[str]: - """ - List all available knowledge bases. - - Returns: - List of knowledge base names - """ - try: - knowledge_bases = await self._fetch_knowledge_bases() - - # Extract names from knowledge bases - return [ - str(kb.get("name", f"knowledge_{kb.get('id', 'unknown')}") or "") - for kb in knowledge_bases - ] - - except httpx.ConnectError as e: - raise StorageError(f"Connection to OpenWebUI failed: {e}") from e - except httpx.HTTPStatusError as e: - raise StorageError(f"OpenWebUI returned error {e.response.status_code}: {e}") from e - except httpx.RequestError as e: - raise StorageError(f"Request to OpenWebUI failed: {e}") from e - except Exception as e: - raise StorageError(f"Failed to list knowledge bases: {e}") from e - - async def delete_collection(self, collection_name: str) -> bool: - """ - Delete a knowledge base by name. - - Args: - collection_name: Name of the knowledge base to delete - - Returns: - True if deleted successfully, False otherwise - """ - try: - knowledge_id = await self._get_knowledge_id(collection_name, create=False) - if not knowledge_id: - # Collection doesn't exist, consider it already deleted - return True - - # Delete the knowledge base using the OpenWebUI API - response = await self.client.delete(f"/api/v1/knowledge/{knowledge_id}/delete") - response.raise_for_status() - - # Remove from cache if it exists - if collection_name in self._knowledge_cache: - del self._knowledge_cache[collection_name] - - LOGGER.info("Successfully deleted knowledge base: %s", collection_name) - return True - - except httpx.HTTPStatusError as e: - # Handle 404 as success (already deleted) - if e.response.status_code == 404: - LOGGER.info("Knowledge base %s was already deleted or not found", collection_name) - return True - LOGGER.error( - "OpenWebUI returned error %s when deleting knowledge base %s", - e.response.status_code, - collection_name, - exc_info=e, - ) - return False - except httpx.ConnectError as e: - LOGGER.error( - "Failed to reach OpenWebUI when deleting knowledge base %s", - collection_name, - exc_info=e, - ) - return False - except httpx.RequestError as e: - LOGGER.error( - "Request error deleting knowledge base %s from OpenWebUI", - collection_name, - exc_info=e, - ) - return False - except Exception as e: - LOGGER.error("Unexpected error deleting knowledge base %s", collection_name, exc_info=e) - return False - - class CollectionSummary(TypedDict): - """Structure describing a knowledge base summary.""" - - name: str - count: int - size_mb: float - - - async def _get_knowledge_base_count(self, kb: dict[str, object]) -> int: - """Get the file count for a knowledge base.""" - kb_id = kb.get("id") - name = kb.get("name", "Unknown") - - if not kb_id: - return self._count_files_from_basic_info(kb) - - return await self._count_files_from_detailed_info(str(kb_id), str(name), kb) - - def _count_files_from_basic_info(self, kb: dict[str, object]) -> int: - """Count files from basic knowledge base info.""" - files = kb.get("files", []) - return len(files) if isinstance(files, list) and files is not None else 0 - - async def _count_files_from_detailed_info(self, kb_id: str, name: str, kb: dict[str, object]) -> int: - """Count files by fetching detailed knowledge base info.""" - try: - LOGGER.debug(f"Fetching detailed info for KB '{name}' from /api/v1/knowledge/{kb_id}") - detail_response = await self.client.get(f"/api/v1/knowledge/{kb_id}") - detail_response.raise_for_status() - detailed_kb = detail_response.json() - - files = detailed_kb.get("files", []) - count = len(files) if isinstance(files, list) and files is not None else 0 - - LOGGER.info(f"Knowledge base '{name}' (ID: {kb_id}): found {count} files") - return count - - except Exception as e: - LOGGER.warning(f"Failed to get detailed info for KB '{name}' (ID: {kb_id}): {e}") - return self._count_files_from_basic_info(kb) - - async def describe_collections(self) -> list[dict[str, object]]: - """Return metadata about each knowledge base.""" - try: - knowledge_bases = await self._fetch_knowledge_bases() - collections: list[dict[str, object]] = [] - - for kb in knowledge_bases: - if not isinstance(kb, dict): - continue - - count = await self._get_knowledge_base_count(kb) - name = kb.get("name", "Unknown") - size_mb = count * 0.5 # rough heuristic - - summary: dict[str, object] = { - "name": str(name), - "count": count, - "size_mb": float(size_mb), - } - collections.append(summary) - - return collections - - except Exception as e: - raise StorageError(f"Failed to describe knowledge bases: {e}") from e - - async def count(self, *, collection_name: str | None = None) -> int: - """ - Get document count for a specific collection (knowledge base). - - Args: - collection_name: Name of the knowledge base to count documents for - - Returns: - Number of documents in the collection, 0 if collection not found - """ - if not collection_name: - # If no collection name provided, return total across all collections - try: - collections = await self.describe_collections() - return sum( - int(collection["count"]) if isinstance(collection["count"], (int, str)) else 0 - for collection in collections - ) - except Exception: - return 0 - - try: - # Get knowledge base by name and return its file count - kb = await self.get_knowledge_by_name(collection_name) - if not kb: - return 0 - - kb_id = kb.get("id") - if not kb_id: - return 0 - - # Get detailed knowledge base information to get accurate file count - detail_response = await self.client.get(f"/api/v1/knowledge/{kb_id}") - detail_response.raise_for_status() - detailed_kb = detail_response.json() - - files = detailed_kb.get("files", []) - count = len(files) if isinstance(files, list) else 0 - - LOGGER.debug(f"Count for collection '{collection_name}': {count} files") - return count - - except Exception as e: - LOGGER.warning(f"Failed to get count for collection '{collection_name}': {e}") - return 0 - - async def get_knowledge_by_name(self, name: str) -> dict[str, object] | None: - """ - Get knowledge base details by name. - - Args: - name: Knowledge base name - - Returns: - Knowledge base details or None if not found - """ - try: - response = await self.client.get("/api/v1/knowledge/list") - response.raise_for_status() - knowledge_bases = response.json() - - return next( - ( - {str(k): v for k, v in kb.items()} - for kb in knowledge_bases - if isinstance(kb, dict) and kb.get("name") == name - ), - None, - ) - except Exception as e: - raise StorageError(f"Failed to get knowledge base by name: {e}") from e - - async def __aenter__(self) -> "OpenWebUIStorage": - """Async context manager entry.""" - await self.initialize() - return self - - async def __aexit__( - self, - exc_type: type[BaseException] | None, - exc_val: BaseException | None, - exc_tb: object | None, - ) -> None: - """Async context manager exit.""" - await self.close() - - async def list_documents( - self, - limit: int = 100, - offset: int = 0, - *, - collection_name: str | None = None, - ) -> list[dict[str, object]]: - """ - List documents (files) in a knowledge base. - - NOTE: This is a basic implementation that attempts to extract file information - from OpenWebUI knowledge bases. The actual file listing capabilities depend - on the OpenWebUI API version and may not include detailed file metadata. - - Args: - limit: Maximum number of documents to return - offset: Number of documents to skip - collection_name: Knowledge base name - - Returns: - List of document dictionaries with available metadata - """ - try: - # Use the knowledge base name or fall back to default - kb_name = collection_name or self.config.collection_name or "default" - - # Try to get knowledge base details - knowledge_base = await self.get_knowledge_by_name(kb_name) - if not knowledge_base: - # If specific KB not found, return empty list with a note - return [] - - # Extract files if available (API structure may vary) - files = knowledge_base.get("files", []) - - # Handle different possible API response structures - if not isinstance(files, list): - # Some API versions might structure this differently - # Try to handle gracefully - return [ - { - "id": "unknown", - "title": f"Knowledge Base: {kb_name}", - "source_url": "", - "description": "OpenWebUI knowledge base (file details not available)", - "content_type": "text/plain", - "content_preview": "Document listing not fully supported for OpenWebUI", - "word_count": 0, - "timestamp": "", - } - ] - - # Apply pagination - paginated_files = files[offset : offset + limit] - - # Convert to document format with safe field access - documents: list[dict[str, object]] = [] - for i, file_info in enumerate(paginated_files): - if not isinstance(file_info, dict): - continue - - # Safely extract fields with fallbacks - doc_id = str(file_info.get("id", f"file_{i}")) - - # Try multiple ways to get filename from OpenWebUI API response - filename = None - # Check direct filename field - if "filename" in file_info: - filename = file_info["filename"] - # Check name field - elif "name" in file_info: - filename = file_info["name"] - # Check meta.name (from FileModelResponse schema) - elif isinstance(file_info.get("meta"), dict): - filename = file_info["meta"].get("name") - - # Final fallback - if not filename: - filename = f"file_{i}" - - filename = str(filename) - - # Extract size from meta if available - size = 0 - if isinstance(file_info.get("meta"), dict): - size = file_info["meta"].get("size", 0) - else: - size = file_info.get("size", 0) - - # Estimate word count from file size (very rough approximation) - word_count = max(1, int(size / 6)) if isinstance(size, (int, float)) else 0 - - documents.append( - { - "id": doc_id, - "title": filename, - "source_url": "", # OpenWebUI files don't typically have source URLs - "description": f"File: {filename}", - "content_type": str(file_info.get("content_type", "text/plain")), - "content_preview": f"File uploaded to OpenWebUI: {filename}", - "word_count": word_count, - "timestamp": str( - file_info.get("created_at") or file_info.get("timestamp", "") - ), - } - ) - - return documents - - except Exception as e: - # Since OpenWebUI file listing API structure is not guaranteed, - # we gracefully fall back rather than raise an error - import logging - - logging.warning(f"OpenWebUI document listing failed: {e}") - - # Return a placeholder entry indicating limited support - return [ - { - "id": "api_error", - "title": f"Knowledge Base: {collection_name or 'default'}", - "source_url": "", - "description": "Document listing encountered an error - API compatibility issue", - "content_type": "text/plain", - "content_preview": f"Error: {str(e)[:100]}...", - "word_count": 0, - "timestamp": "", - } - ] - - async def close(self) -> None: - """Close client connection.""" - if hasattr(self, "client") and self.client: - try: - await self.client.aclose() - except Exception as e: - import logging - - logging.warning(f"Error closing OpenWebUI client: {e}") - - - -"""Weaviate storage adapter.""" - -from collections.abc import AsyncGenerator, Mapping, Sequence -from datetime import UTC, datetime -from typing import Literal, Self, TypeAlias, cast, overload -from uuid import UUID - -import weaviate -from typing_extensions import override -from weaviate.classes.config import Configure, DataType, Property -from weaviate.classes.data import DataObject -from weaviate.classes.query import Filter -from weaviate.collections import Collection -from weaviate.exceptions import ( - WeaviateBatchError, - WeaviateConnectionError, - WeaviateQueryError, -) - -from ..core.exceptions import StorageError -from ..core.models import Document, DocumentMetadata, IngestionSource, StorageConfig -from ..utils.vectorizer import Vectorizer -from .base import BaseStorage - -VectorContainer: TypeAlias = Mapping[str, object] | Sequence[object] | None - - -class WeaviateStorage(BaseStorage): - """Storage adapter for Weaviate.""" - - client: weaviate.WeaviateClient | None - vectorizer: Vectorizer - _default_collection: str - - def __init__(self, config: StorageConfig): - """ - Initialize Weaviate storage. - - Args: - config: Storage configuration - """ - super().__init__(config) - self.client = None - self.vectorizer = Vectorizer(config) - self._default_collection = self._normalize_collection_name(config.collection_name) - - @override - async def initialize(self) -> None: - """Initialize Weaviate client and create collection if needed.""" - try: - # Let Weaviate client handle URL parsing - self.client = weaviate.WeaviateClient( - connection_params=weaviate.connect.ConnectionParams.from_url( - url=str(self.config.endpoint), - grpc_port=50051, # Default gRPC port - ), - additional_config=weaviate.classes.init.AdditionalConfig( - timeout=weaviate.classes.init.Timeout(init=30, query=60, insert=120), - ), - ) - - # Connect to the client - self.client.connect() - - # Ensure the default collection exists - await self._ensure_collection(self._default_collection) - - except WeaviateConnectionError as e: - raise StorageError(f"Failed to connect to Weaviate: {e}") from e - except Exception as e: - raise StorageError(f"Failed to initialize Weaviate: {e}") from e - - async def _create_collection(self, collection_name: str) -> None: - """Create Weaviate collection with schema.""" - if not self.client: - raise StorageError("Weaviate client not initialized") - try: - self.client.collections.create( - name=collection_name, - properties=[ - Property( - name="content", data_type=DataType.TEXT, description="Document content" - ), - Property(name="source_url", data_type=DataType.TEXT, description="Source URL"), - Property(name="title", data_type=DataType.TEXT, description="Document title"), - Property( - name="description", - data_type=DataType.TEXT, - description="Document description", - ), - Property( - name="timestamp", data_type=DataType.DATE, description="Ingestion timestamp" - ), - Property( - name="content_type", data_type=DataType.TEXT, description="Content type" - ), - Property(name="word_count", data_type=DataType.INT, description="Word count"), - Property( - name="char_count", data_type=DataType.INT, description="Character count" - ), - Property( - name="source", data_type=DataType.TEXT, description="Ingestion source" - ), - ], - vectorizer_config=Configure.Vectorizer.none(), - ) - except Exception as e: - raise StorageError(f"Failed to create collection: {e}") from e - - @staticmethod - def _extract_vector(vector_raw: VectorContainer) -> list[float] | None: - """Normalize vector payloads returned by Weaviate into a float list.""" - if isinstance(vector_raw, Mapping): - default_vector = vector_raw.get("default") - return WeaviateStorage._extract_vector( - cast(VectorContainer, default_vector) - ) - - if not isinstance(vector_raw, Sequence) or isinstance( - vector_raw, (str, bytes, bytearray) - ): - return None - - items = list(vector_raw) - if not items: - return None - - first_item = items[0] - if isinstance(first_item, (int, float)): - numeric_items = cast(list[int | float], items) - try: - return [float(value) for value in numeric_items] - except (TypeError, ValueError): - return None - - if isinstance(first_item, Sequence) and not isinstance( - first_item, (str, bytes, bytearray) - ): - inner_items = list(first_item) - if all(isinstance(item, (int, float)) for item in inner_items): - try: - numeric_inner = cast(list[int | float], inner_items) - return [float(item) for item in numeric_inner] - except (TypeError, ValueError): - return None - - return None - - @staticmethod - def _parse_source(source_raw: object) -> IngestionSource: - """Safely normalize persistence source values into enum instances.""" - if isinstance(source_raw, IngestionSource): - return source_raw - - if isinstance(source_raw, str): - try: - return IngestionSource(source_raw) - except ValueError: - return IngestionSource.WEB - - return IngestionSource.WEB - - @staticmethod - @overload - def _coerce_properties( - properties: object, - *, - context: str, - ) -> Mapping[str, object]: - ... - - @staticmethod - @overload - def _coerce_properties( - properties: object, - *, - context: str, - allow_missing: Literal[False], - ) -> Mapping[str, object]: - ... - - @staticmethod - @overload - def _coerce_properties( - properties: object, - *, - context: str, - allow_missing: Literal[True], - ) -> Mapping[str, object] | None: - ... - - @staticmethod - def _coerce_properties( - properties: object, - *, - context: str, - allow_missing: bool = False, - ) -> Mapping[str, object] | None: - """Ensure Weaviate properties payloads are mappings.""" - if properties is None: - if allow_missing: - return None - raise StorageError(f"{context} returned object without properties") - - if not isinstance(properties, Mapping): - raise StorageError( - f"{context} returned invalid properties payload of type {type(properties)!r}" - ) - - return cast(Mapping[str, object], properties) - - def _normalize_collection_name(self, collection_name: str | None) -> str: - """Return a canonicalized collection name, defaulting to configured value.""" - candidate = collection_name or self.config.collection_name - if not candidate: - raise StorageError("Collection name is required") - - if normalized := candidate.strip(): - return normalized[0].upper() + normalized[1:] - else: - raise StorageError("Collection name cannot be empty") - - async def _ensure_collection(self, collection_name: str) -> None: - """Create the collection if missing.""" - if not self.client: - raise StorageError("Weaviate client not initialized") - - existing = self.client.collections.list_all() - if collection_name not in existing: - await self._create_collection(collection_name) - - async def _prepare_collection( - self, - collection_name: str | None, - *, - ensure_exists: bool, - ) -> tuple[Collection, str]: - """Return a ready collection handle and normalized name.""" - normalized = self._normalize_collection_name(collection_name) - - if not self.client: - raise StorageError("Weaviate client not initialized") - - if ensure_exists: - await self._ensure_collection(normalized) - - return self.client.collections.get(normalized), normalized - - @override - async def store(self, document: Document, *, collection_name: str | None = None) -> str: - """ - Store a document in Weaviate. - - Args: - document: Document to store - - Returns: - Document ID - """ - try: - # Vectorize content if no vector provided - if document.vector is None: - document.vector = await self.vectorizer.vectorize(document.content) - - collection, resolved_name = await self._prepare_collection( - collection_name, ensure_exists=True - ) - - # Prepare properties - properties = { - "content": document.content, - "source_url": document.metadata["source_url"], - "title": document.metadata.get("title", ""), - "description": document.metadata.get("description", ""), - "timestamp": document.metadata["timestamp"].isoformat(), - "content_type": document.metadata["content_type"], - "word_count": document.metadata["word_count"], - "char_count": document.metadata["char_count"], - "source": document.source.value, - } - - # Insert with vector - result = collection.data.insert( - properties=properties, vector=document.vector, uuid=str(document.id) - ) - - return str(result) - - except Exception as e: - raise StorageError(f"Failed to store document: {e}") from e - - @override - async def store_batch( - self, documents: list[Document], *, collection_name: str | None = None - ) -> list[str]: - """ - Store multiple documents using proper batch operations. - - Args: - documents: List of documents - - Returns: - List of successfully stored document IDs - """ - try: - collection, resolved_name = await self._prepare_collection( - collection_name, ensure_exists=True - ) - - # Vectorize documents without vectors - for doc in documents: - if doc.vector is None: - doc.vector = await self.vectorizer.vectorize(doc.content) - - # Prepare batch data for insert_many - batch_objects = [] - for doc in documents: - properties = { - "content": doc.content, - "source_url": doc.metadata["source_url"], - "title": doc.metadata.get("title", ""), - "description": doc.metadata.get("description", ""), - "timestamp": doc.metadata["timestamp"].isoformat(), - "content_type": doc.metadata["content_type"], - "word_count": doc.metadata["word_count"], - "char_count": doc.metadata["char_count"], - "source": doc.source.value, - } - - batch_objects.append( - DataObject(properties=properties, vector=doc.vector, uuid=str(doc.id)) - ) - - # Insert batch using insert_many - response = collection.data.insert_many(batch_objects) - - successful_ids: list[str] = [] - error_indices = set(response.errors.keys()) if response else set() - - for index, doc in enumerate(documents): - if index in error_indices: - continue - - uuid_value = response.uuids.get(index) if response else None - successful_ids.append(str(uuid_value) if uuid_value is not None else str(doc.id)) - - if error_indices: - error_messages = ", ".join( - f"{documents[i].id}: {response.errors[i].message}" - for i in error_indices - if hasattr(response.errors[i], "message") - ) - print( - "Weaviate partial batch failure for collection " - f"{resolved_name}: {error_messages}" - ) - - return successful_ids - - except WeaviateBatchError as e: - raise StorageError(f"Batch operation failed: {e}") from e - except WeaviateConnectionError as e: - raise StorageError(f"Connection to Weaviate failed: {e}") from e - except Exception as e: - raise StorageError(f"Failed to store batch: {e}") from e - - @override - async def retrieve( - self, document_id: str, *, collection_name: str | None = None - ) -> Document | None: - """ - Retrieve a document from Weaviate. - - Args: - document_id: Document ID - - Returns: - Document or None - """ - try: - collection, resolved_name = await self._prepare_collection( - collection_name, ensure_exists=False - ) - result = collection.query.fetch_object_by_id(document_id) - - if not result: - return None - - # Reconstruct document - props = self._coerce_properties( - result.properties, - context="fetch_object_by_id", - ) - metadata_dict = { - "source_url": str(props["source_url"]), - "title": str(props.get("title")) if props.get("title") else None, - "description": str(props.get("description")) - if props.get("description") - else None, - "timestamp": str(props["timestamp"]), - "content_type": str(props["content_type"]), - "word_count": int(str(props["word_count"])), - "char_count": int(str(props["char_count"])), - } - metadata = cast(DocumentMetadata, cast(object, metadata_dict)) - - vector = self._extract_vector(cast(VectorContainer, result.vector)) - - return Document( - id=UUID(document_id), - content=str(props["content"]), - metadata=metadata, - vector=vector, - source=self._parse_source(props.get("source")), - collection=resolved_name, - ) - - except WeaviateQueryError as e: - raise StorageError(f"Query failed: {e}") from e - except WeaviateConnectionError as e: - # Connection issues should be logged and return None - import logging - logging.warning(f"Weaviate connection error retrieving document {document_id}: {e}") - return None - except Exception as e: - # Log unexpected errors for debugging - import logging - logging.warning(f"Unexpected error retrieving document {document_id}: {e}") - return None - - def _build_search_metadata(self, props: Mapping[str, object]) -> DocumentMetadata: - """Build metadata dictionary from Weaviate properties.""" - metadata_dict = { - "source_url": str(props["source_url"]), - "title": str(props.get("title")) if props.get("title") else None, - "description": str(props.get("description")) - if props.get("description") - else None, - "timestamp": str(props["timestamp"]), - "content_type": str(props["content_type"]), - "word_count": int(str(props["word_count"])), - "char_count": int(str(props["char_count"])), - } - return cast(DocumentMetadata, cast(object, metadata_dict)) - - def _extract_search_score(self, result: object) -> float | None: - """Extract and convert search score from result metadata.""" - metadata_obj = getattr(result, "metadata", None) - if metadata_obj is None: - return None - - raw_distance = getattr(metadata_obj, "distance", None) - if raw_distance is None: - return None - - try: - distance_value = float(raw_distance) - return max(0.0, 1.0 - distance_value) - except (TypeError, ValueError) as e: - import logging - logging.debug(f"Invalid distance value {raw_distance}: {e}") - return None - - def _build_search_document( - self, - result: object, - resolved_name: str, - ) -> Document: - """Build Document from Weaviate search result.""" - props = self._coerce_properties( - getattr(result, "properties", None), - context="search result", - ) - metadata = self._build_search_metadata(props) - - vector_attr = getattr(result, "vector", None) - vector = self._extract_vector(cast(VectorContainer, vector_attr)) - score_value = self._extract_search_score(result) - - uuid_raw = getattr(result, "uuid", None) - if uuid_raw is None: - raise StorageError("Weaviate search result missing uuid") - uuid_value = uuid_raw if isinstance(uuid_raw, UUID) else UUID(str(uuid_raw)) - - return Document( - id=uuid_value, - content=str(props["content"]), - metadata=metadata, - vector=vector, - source=self._parse_source(props.get("source")), - collection=resolved_name, - score=score_value, - ) - - @override - async def search( - self, - query: str, - limit: int = 10, - threshold: float = 0.7, - *, - collection_name: str | None = None, - ) -> AsyncGenerator[Document, None]: - """ - Search for documents in Weaviate. - - Args: - query: Search query - limit: Maximum results - threshold: Similarity threshold - - Yields: - Matching documents - """ - try: - query_vector = await self.vectorizer.vectorize(query) - collection, resolved_name = await self._prepare_collection( - collection_name, ensure_exists=False - ) - - results = collection.query.near_vector( - near_vector=query_vector, - limit=limit, - distance=1 - threshold, - return_metadata=["distance"], - ) - - for result in results.objects: - yield self._build_search_document(result, resolved_name) - - except WeaviateQueryError as e: - raise StorageError(f"Search query failed: {e}") from e - except WeaviateConnectionError as e: - raise StorageError(f"Connection to Weaviate failed during search: {e}") from e - except Exception as e: - raise StorageError(f"Search failed: {e}") from e - - @override - async def delete(self, document_id: str, *, collection_name: str | None = None) -> bool: - """ - Delete a document from Weaviate. - - Args: - document_id: Document ID - - Returns: - True if deleted - """ - try: - collection, _ = await self._prepare_collection(collection_name, ensure_exists=False) - collection.data.delete_by_id(document_id) - return True - except WeaviateQueryError as e: - raise StorageError(f"Delete operation failed: {e}") from e - except Exception: - return False - - @override - async def count(self, *, collection_name: str | None = None) -> int: - """ - Get document count in collection. - - Returns: - Number of documents - """ - try: - if not self.client: - return 0 - collection, _ = await self._prepare_collection(collection_name, ensure_exists=False) - result = collection.aggregate.over_all(total_count=True) - return result.total_count or 0 - except WeaviateQueryError as e: - raise StorageError(f"Count query failed: {e}") from e - except Exception: - return 0 - - async def list_collections(self) -> list[str]: - """ - List all available collections. - - Returns: - List of collection names - """ - try: - if not self.client: - raise StorageError("Weaviate client not initialized") - - return list(self.client.collections.list_all()) - - except Exception as e: - raise StorageError(f"Failed to list collections: {e}") from e - - async def describe_collections(self) -> list[dict[str, object]]: - """Return metadata for each Weaviate collection.""" - if not self.client: - raise StorageError("Weaviate client not initialized") - - try: - collections: list[dict[str, object]] = [] - for name in self.client.collections.list_all(): - collection_obj = self.client.collections.get(name) - if not collection_obj: - continue - - count = collection_obj.aggregate.over_all(total_count=True).total_count or 0 - size_mb = count * 0.01 - collections.append( - { - "name": name, - "count": count, - "size_mb": size_mb, - } - ) - - return collections - except Exception as e: - raise StorageError(f"Failed to describe collections: {e}") from e - - async def sample_documents( - self, limit: int = 5, *, collection_name: str | None = None - ) -> list[Document]: - """ - Get sample documents from the collection. - - Args: - limit: Maximum number of documents to return - - Returns: - List of sample documents - """ - try: - collection, resolved_name = await self._prepare_collection( - collection_name, ensure_exists=False - ) - - # Query for sample documents - response = collection.query.fetch_objects(limit=limit) - - documents = [] - for obj in response.objects: - # Convert back to Document format - props = self._coerce_properties( - getattr(obj, "properties", None), - context="sample_documents", - allow_missing=True, - ) - if props is None: - continue - uuid_raw = getattr(obj, "uuid", None) - if uuid_raw is None: - continue - document_id = uuid_raw if isinstance(uuid_raw, UUID) else UUID(str(uuid_raw)) - # Safely convert WeaviateField values - word_count_val = props.get("word_count") - if isinstance(word_count_val, (int, float)): - word_count = int(word_count_val) - elif word_count_val: - word_count = int(str(word_count_val)) - else: - word_count = 0 - - char_count_val = props.get("char_count") - if isinstance(char_count_val, (int, float)): - char_count = int(char_count_val) - elif char_count_val: - char_count = int(str(char_count_val)) - else: - char_count = 0 - - doc = Document( - id=document_id, - content=str(props.get("content", "")), - source=self._parse_source(props.get("source")), - metadata={ - "source_url": str(props.get("source_url", "")), - "title": str(props.get("title", "")) if props.get("title") else None, - "description": str(props.get("description", "")) - if props.get("description") - else None, - "timestamp": datetime.fromisoformat( - str(props.get("timestamp", datetime.now(UTC).isoformat())) - ), - "content_type": str(props.get("content_type", "text/plain")), - "word_count": word_count, - "char_count": char_count, - }, - collection=resolved_name, - ) - documents.append(doc) - - return documents - - except Exception as e: - raise StorageError(f"Failed to sample documents: {e}") from e - - def _safe_convert_count(self, value: object) -> int: - """Safely convert a value to integer count.""" - if isinstance(value, (int, float)): - return int(value) - elif value: - return int(str(value)) - else: - return 0 - - def _build_document_metadata(self, props: Mapping[str, object]) -> DocumentMetadata: - """Build metadata from search document properties.""" - return { - "source_url": str(props.get("source_url", "")), - "title": str(props.get("title", "")) if props.get("title") else None, - "description": str(props.get("description", "")) - if props.get("description") - else None, - "timestamp": datetime.fromisoformat( - str(props.get("timestamp", datetime.now(UTC).isoformat())) - ), - "content_type": str(props.get("content_type", "text/plain")), - "word_count": self._safe_convert_count(props.get("word_count")), - "char_count": self._safe_convert_count(props.get("char_count")), - } - - def _extract_document_score(self, obj: object) -> float | None: - """Extract score from document search result.""" - metadata_obj = getattr(obj, "metadata", None) - if metadata_obj is None: - return None - - raw_score = getattr(metadata_obj, "score", None) - if raw_score is None: - return None - - try: - return float(raw_score) - except (TypeError, ValueError) as e: - import logging - logging.debug(f"Invalid score value {raw_score}: {e}") - return None - - def _build_document_from_search( - self, - obj: object, - resolved_name: str, - ) -> Document: - """Build Document from search document result.""" - props = self._coerce_properties( - getattr(obj, "properties", None), - context="document search result", - ) - metadata = self._build_document_metadata(props) - score_value = self._extract_document_score(obj) - - uuid_raw = getattr(obj, "uuid", None) - if uuid_raw is None: - raise StorageError("Weaviate search document result missing uuid") - uuid_value = uuid_raw if isinstance(uuid_raw, UUID) else UUID(str(uuid_raw)) - - return Document( - id=uuid_value, - content=str(props.get("content", "")), - source=self._parse_source(props.get("source")), - metadata=metadata, - collection=resolved_name, - score=score_value, - ) - - async def search_documents( - self, query: str, limit: int = 10, *, collection_name: str | None = None - ) -> list[Document]: - """ - Search documents in the collection. - - Args: - query: Search query - limit: Maximum number of results - - Returns: - List of matching documents - """ - try: - if not self.client: - raise StorageError("Weaviate client not initialized") - - collection, resolved_name = await self._prepare_collection( - collection_name, ensure_exists=False - ) - - # Try hybrid search first, fall back to BM25 keyword search - try: - response = collection.query.hybrid( - query=query, limit=limit, return_metadata=["score"] - ) - except Exception: - response = collection.query.bm25( - query=query, limit=limit, return_metadata=["score"] - ) - - return [ - self._build_document_from_search(obj, resolved_name) - for obj in response.objects - ] - - except Exception as e: - raise StorageError(f"Failed to search documents: {e}") from e - - async def list_documents( - self, - limit: int = 100, - offset: int = 0, - *, - collection_name: str | None = None, - ) -> list[dict[str, object]]: - """ - List documents in the collection with pagination. - - Args: - limit: Maximum number of documents to return - offset: Number of documents to skip - - Returns: - List of document dictionaries with id, title, source_url, and content preview - """ - try: - if not self.client: - raise StorageError("Weaviate client not initialized") - - collection, _ = await self._prepare_collection(collection_name, ensure_exists=False) - - # Query documents with pagination - response = collection.query.fetch_objects( - limit=limit, offset=offset, return_metadata=["creation_time"] - ) - - documents: list[dict[str, object]] = [] - for obj in response.objects: - props = self._coerce_properties( - obj.properties, - context="list_documents", - allow_missing=True, - ) - if props is None: - continue - content = str(props.get("content", "")) - word_count_value = props.get("word_count", 0) - # Convert WeaviateField to int - if isinstance(word_count_value, (int, float)): - word_count = int(word_count_value) - elif word_count_value: - word_count = int(str(word_count_value)) - else: - word_count = 0 - - doc_info: dict[str, object] = { - "id": str(obj.uuid), - "title": str(props.get("title", "Untitled")), - "source_url": str(props.get("source_url", "")), - "description": str(props.get("description", "")), - "content_type": str(props.get("content_type", "text/plain")), - "content_preview": (f"{content[:200]}..." if len(content) > 200 else content), - "word_count": word_count, - "timestamp": str(props.get("timestamp", "")), - } - documents.append(doc_info) - - return documents - - except Exception as e: - raise StorageError(f"Failed to list documents: {e}") from e - - async def delete_documents( - self, document_ids: list[str], *, collection_name: str | None = None - ) -> dict[str, bool]: - """ - Delete multiple documents from Weaviate. - - Args: - document_ids: List of document IDs to delete - - Returns: - Dictionary mapping document IDs to deletion success status - """ - results: dict[str, bool] = {} - - try: - if not self.client: - raise StorageError("Weaviate client not initialized") - - if not document_ids: - return results - - collection, resolved_name = await self._prepare_collection( - collection_name, ensure_exists=False - ) - - delete_filter = Filter.by_id().contains_any(document_ids) - response = collection.data.delete_many(where=delete_filter, verbose=True) - - if objects := getattr(response, "objects", None): - for result_obj in objects: - if doc_uuid := str(getattr(result_obj, "uuid", "")): - results[doc_uuid] = bool(getattr(result_obj, "successful", False)) - - if len(results) < len(document_ids): - default_success = getattr(response, "failed", 0) == 0 - for doc_id in document_ids: - _ = results.setdefault(doc_id, default_success) - - return results - - except Exception as e: - raise StorageError(f"Failed to delete documents: {e}") from e - - async def delete_by_filter( - self, filter_dict: dict[str, str], *, collection_name: str | None = None - ) -> int: - """ - Delete documents matching a filter. - - Args: - filter_dict: Filter criteria (e.g., {"source_url": "example.com"}) - - Returns: - Number of documents deleted - """ - try: - if not self.client: - raise StorageError("Weaviate client not initialized") - - collection, _ = await self._prepare_collection(collection_name, ensure_exists=False) - - # Build where filter - where_filter = None - if "source_url" in filter_dict: - where_filter = Filter.by_property("source_url").equal(filter_dict["source_url"]) - - # Get documents matching filter - if where_filter: - response = collection.query.fetch_objects( - filters=where_filter, - limit=1000, # Max batch size - ) - else: - response = collection.query.fetch_objects( - limit=1000 # Max batch size - ) - - # Delete matching documents - deleted_count = 0 - for obj in response.objects: - try: - collection.data.delete_by_id(obj.uuid) - deleted_count += 1 - except Exception: - continue - - return deleted_count - - except Exception as e: - raise StorageError(f"Failed to delete by filter: {e}") from e - - async def delete_collection(self, collection_name: str | None = None) -> bool: - """ - Delete the entire collection. - - Returns: - True if successful - """ - try: - if not self.client: - raise StorageError("Weaviate client not initialized") - - target = self._normalize_collection_name(collection_name) - - # Delete the collection using the client's collections API - self.client.collections.delete(target) - - return True - - except Exception as e: - raise StorageError(f"Failed to delete collection: {e}") from e - - async def __aenter__(self) -> Self: - """Async context manager entry.""" - return self - - async def __aexit__( - self, - exc_type: type[BaseException] | None, - exc_val: BaseException | None, - exc_tb: object | None, - ) -> None: - """Async context manager exit with proper cleanup.""" - await self.close() - - async def close(self) -> None: - """Close client connection.""" - if self.client: - try: - self.client.close() - except Exception as e: - import logging - logging.warning(f"Error closing Weaviate client: {e}") - - def __del__(self) -> None: - """Clean up client connection as fallback.""" - if self.client: - try: - self.client.close() - except Exception: - pass # Ignore errors in destructor - - """Metadata tagger for enriching documents with AI-generated tags and metadata.""" @@ -14041,4 +9126,5032 @@ class Vectorizer: await self.client.aclose() + +"""Main dashboard screen with collections overview.""" + +import logging +from datetime import datetime +from typing import TYPE_CHECKING, Final + +from textual import work +from textual.app import ComposeResult +from textual.binding import Binding +from textual.containers import Container, Grid, Horizontal +from textual.css.query import NoMatches +from textual.reactive import reactive, var +from textual.screen import Screen +from textual.widgets import ( + Button, + Footer, + Header, + LoadingIndicator, + Rule, + Static, + TabbedContent, + TabPane, +) +from typing_extensions import override + +from ....core.models import StorageBackend +from ....storage.base import BaseStorage +from ....storage.openwebui import OpenWebUIStorage +from ....storage.weaviate import WeaviateStorage +from ..models import CollectionInfo +from ..utils.storage_manager import StorageManager +from ..widgets import EnhancedDataTable, MetricsCard, StatusIndicator + +if TYPE_CHECKING: + from ....storage.r2r.storage import R2RStorage +else: # pragma: no cover - optional dependency fallback + R2RStorage = BaseStorage + + +LOGGER: Final[logging.Logger] = logging.getLogger(__name__) + + +class CollectionOverviewScreen(Screen[None]): + """Enhanced dashboard with modern design and metrics.""" + + total_documents: int = 0 + total_collections: int = 0 + active_backends: int = 0 + + BINDINGS = [ + Binding("q", "quit", "Quit"), + Binding("r", "refresh", "Refresh"), + Binding("i", "ingest", "Ingest"), + Binding("m", "manage", "Manage"), + Binding("s", "search", "Search"), + Binding("ctrl+d", "delete", "Delete"), + Binding("ctrl+1", "tab_dashboard", "Dashboard"), + Binding("ctrl+2", "tab_collections", "Collections"), + Binding("ctrl+3", "tab_analytics", "Analytics"), + Binding("tab", "next_tab", "Next Tab"), + Binding("shift+tab", "prev_tab", "Prev Tab"), + Binding("f1", "help", "Help"), + ] + + collections: var[list[CollectionInfo]] = var([]) + is_loading: var[bool] = var(False) + selected_collection: reactive[CollectionInfo | None] = reactive(None) + storage_manager: StorageManager + weaviate: WeaviateStorage | None + openwebui: OpenWebUIStorage | None + r2r: R2RStorage | BaseStorage | None + + def __init__( + self, + storage_manager: StorageManager, + weaviate: WeaviateStorage | None, + openwebui: OpenWebUIStorage | None, + r2r: R2RStorage | BaseStorage | None, + ) -> None: + super().__init__() + self.storage_manager = storage_manager + self.weaviate = weaviate + self.openwebui = openwebui + self.r2r = r2r + self.total_documents = 0 + self.total_collections = 0 + self.active_backends = 0 + + @override + def compose(self) -> ComposeResult: + yield Header(show_clock=True) + + with TabbedContent(): + # Dashboard Tab + with TabPane("Dashboard", id="dashboard"): + yield Container( + Static("๐Ÿš€ Collection Management System", classes="title"), + Static("Modern document ingestion and management platform", classes="subtitle"), + Rule(line_style="heavy"), + # Metrics Grid + Container( + Grid( + MetricsCard( + "Collections", str(self.total_collections), "Active collections" + ), + MetricsCard("Documents", str(self.total_documents), "Total indexed"), + MetricsCard( + "Backends", str(self.active_backends), "Connected services" + ), + MetricsCard("Status", "Online", "System health"), + classes="responsive-grid metrics-grid", + ), + classes="center", + ), + Rule(line_style="dashed"), + # Quick Actions + Container( + Static("โšก Quick Actions", classes="section-title"), + Horizontal( + Button("๐Ÿ”„ Refresh Data", id="quick_refresh", variant="primary"), + Button("๐Ÿ“ฅ New Ingestion", id="quick_ingest", variant="success"), + Button("๐Ÿ” Search All", id="quick_search", variant="default"), + Button("โš™๏ธ Settings", id="quick_settings", variant="default"), + classes="action_buttons", + ), + classes="card", + ), + # Recent Activity + Container( + Static("๐Ÿ“Š Recent Activity", classes="section-title"), + Static( + "Loading recent activity...", id="activity_feed", classes="status-text" + ), + classes="card", + ), + classes="main_container", + ) + + # Collections Tab + with TabPane("Collections", id="collections"): + yield Container( + Static("๐Ÿ“š Collection Overview", classes="title"), + # Collection controls + Horizontal( + Button("๐Ÿ”„ Refresh", id="refresh_btn", variant="primary"), + Button("๐Ÿ“ฅ Ingest", id="ingest_btn", variant="success"), + Button("๐Ÿ”ง Manage", id="manage_btn", variant="warning"), + Button("๐Ÿ—‘๏ธ Delete", id="delete_btn", variant="error"), + Button("๐Ÿ” Search", id="search_btn", variant="default"), + classes="button_bar", + ), + # Collection table with enhanced navigation + EnhancedDataTable(id="collections_table", classes="enhanced-table"), + # Status bar + Container( + Static("Ready", id="status_text", classes="status-text"), + StatusIndicator("Ready", id="connection_status"), + classes="status-bar", + ), + LoadingIndicator(id="loading", classes="pulse"), + classes="main_container", + ) + + # Analytics Tab + with TabPane("Analytics", id="analytics"): + yield Container( + Static("๐Ÿ“ˆ Analytics & Insights", classes="title"), + # Analytics content + Container( + Static("๐Ÿšง Analytics Dashboard", classes="section-title"), + Static("Advanced analytics and insights coming soon!", classes="subtitle"), + # Placeholder charts area + Container( + Static("๐Ÿ“Š Document Distribution", classes="chart-title"), + Static( + "Chart placeholder - integrate with visualization library", + classes="chart-placeholder", + ), + classes="card", + ), + Container( + Static("โฑ๏ธ Ingestion Timeline", classes="chart-title"), + Static("Timeline chart placeholder", classes="chart-placeholder"), + classes="card", + ), + classes="analytics-grid", + ), + classes="main_container", + ) + + yield Footer() + + async def on_mount(self) -> None: + """Initialize the screen with enhanced loading.""" + self.query_one("#loading").display = False + self.update_metrics() + self.refresh_collections() # Don't await, let it run as a worker + + def update_metrics(self) -> None: + """Update dashboard metrics with enhanced calculations.""" + self._calculate_metrics() + self._update_metrics_cards() + self._update_activity_feed() + + def _calculate_metrics(self) -> None: + """Calculate basic metrics from collections.""" + self.total_collections = len(self.collections) + self.total_documents = sum(col["count"] for col in self.collections) + self.active_backends = sum([bool(self.weaviate), bool(self.openwebui), bool(self.r2r)]) + + def _update_metrics_cards(self) -> None: + """Update the metrics cards display.""" + try: + dashboard_tab = self.query_one("#dashboard") + metrics_cards_query = dashboard_tab.query(MetricsCard) + if len(metrics_cards_query) >= 4: + metrics_cards = list(metrics_cards_query) + self._update_card_values(metrics_cards) + self._update_status_card(metrics_cards[3]) + except NoMatches: + return + except Exception as exc: + LOGGER.exception("Failed to update dashboard metrics", exc_info=exc) + + def _update_card_values(self, metrics_cards: list[MetricsCard]) -> None: + """Update individual metric card values.""" + metrics_cards[0].query_one(".metrics-value", Static).update(f"{self.total_collections:,}") + metrics_cards[1].query_one(".metrics-value", Static).update(f"{self.total_documents:,}") + metrics_cards[2].query_one(".metrics-value", Static).update(str(self.active_backends)) + + def _update_status_card(self, status_card: MetricsCard) -> None: + """Update the system status card.""" + if self.active_backends > 0 and self.total_collections > 0: + status_text, status_class = "๐ŸŸข Healthy", "status-active" + elif self.active_backends > 0: + status_text, status_class = "๐ŸŸก Ready", "status-warning" + else: + status_text, status_class = "๐Ÿ”ด Offline", "status-error" + + status_card.query_one(".metrics-value", Static).update(status_text) + status_card.add_class(status_class) + + def _update_activity_feed(self) -> None: + """Update the activity feed with collection data.""" + try: + dashboard_tab = self.query_one("#dashboard") + activity_feed = dashboard_tab.query_one("#activity_feed", Static) + activity_text = self._generate_activity_text() + activity_feed.update(activity_text) + except NoMatches: + return + except Exception as exc: + LOGGER.exception("Failed to update dashboard activity feed", exc_info=exc) + + def _generate_activity_text(self) -> str: + """Generate activity feed text from collections.""" + if not self.collections: + return "๐Ÿš€ No collections found. Start by creating your first ingestion!\n๐Ÿ’ก Press 'I' to begin or use the Quick Actions above." + + recent_activity = [self._format_collection_item(col) for col in self.collections[:3]] + activity_text = "\n".join(recent_activity) + + if len(self.collections) > 3: + total_docs = sum(c["count"] for c in self.collections) + activity_text += ( + f"\n๐Ÿ“Š Total: {len(self.collections)} collections with {total_docs:,} documents" + ) + + return activity_text + + def _format_collection_item(self, col: CollectionInfo) -> str: + """Format a single collection item for the activity feed.""" + content_type = self._get_content_type_icon(col["name"]) + size_mb = col["size_mb"] + backend_info = col["backend"] + + # Check if this represents a multi-backend ingestion result + if isinstance(backend_info, list): + if len(backend_info) > 1: + # Ensure all elements are strings for safe joining + backend_strings = [str(b) for b in backend_info if b is not None] + backend_list = " + ".join(backend_strings) if backend_strings else "unknown" + return f"{content_type} {col['name']}: {col['count']:,} docs ({size_mb:.1f} MB) โ†’ {backend_list}" + elif len(backend_info) == 1: + backend_name = str(backend_info[0]) if backend_info[0] is not None else "unknown" + return f"{content_type} {col['name']}: {col['count']:,} docs ({size_mb:.1f} MB) - {backend_name}" + else: + return f"{content_type} {col['name']}: {col['count']:,} docs ({size_mb:.1f} MB) - unknown" + else: + backend_display = str(backend_info) if backend_info is not None else "unknown" + return f"{content_type} {col['name']}: {col['count']:,} docs ({size_mb:.1f} MB) - {backend_display}" + + def _get_content_type_icon(self, name: str) -> str: + """Get appropriate icon for collection content type.""" + name_lower = name.lower() + if "web" in name_lower: + return "๐ŸŒ" + elif "doc" in name_lower: + return "๐Ÿ“–" + elif "repo" in name_lower: + return "๐Ÿ“ฆ" + return "๐Ÿ“„" + + @work(exclusive=True) + async def refresh_collections(self) -> None: + """Refresh collection data with enhanced multi-backend loading feedback.""" + self.is_loading = True + loading_indicator = self.query_one("#loading") + status_text = self.query_one("#status_text", Static) + + loading_indicator.display = True + status_text.update("๐Ÿ”„ Refreshing collections...") + + try: + # Use storage manager for unified backend handling + if not self.storage_manager.is_initialized: + status_text.update("๐Ÿ”— Initializing storage backends...") + backend_results = await self.storage_manager.initialize_all_backends() + + # Report per-backend initialization status + success_count = sum(backend_results.values()) + total_count = len(backend_results) + status_text.update(f"โœ… Initialized {success_count}/{total_count} backends") + + # Get collections from all backends via storage manager + status_text.update("๐Ÿ“š Loading collections from all backends...") + collections = await self.storage_manager.get_all_collections() + + # Update metrics calculation for multi-backend support + self.active_backends = len(self.storage_manager.get_available_backends()) + + self.collections = collections + await self.update_collections_table() + self.update_metrics() + + # Enhanced status reporting for multi-backend + backend_names = ", ".join( + backend.value for backend in self.storage_manager.get_available_backends() + ) + status_text.update(f"โœจ Ready - {len(collections)} collections from {backend_names}") + + # Update connection status with multi-backend awareness + connection_status = self.query_one("#connection_status", StatusIndicator) + if collections and self.active_backends > 0: + connection_status.update_status(f"โœ“ {self.active_backends} Active") + else: + connection_status.update_status("No Data") + + except Exception as e: + status_text.update(f"โŒ Error: {e}") + self.notify(f"Failed to refresh: {e}", severity="error", markup=False) + finally: + self.is_loading = False + loading_indicator.display = False + + async def list_weaviate_collections(self) -> list[CollectionInfo]: + """List Weaviate collections with enhanced metadata.""" + if not self.weaviate: + return [] + + try: + overview = await self.weaviate.describe_collections() + collections: list[CollectionInfo] = [] + + for item in overview: + count_raw = item.get("count", 0) + count_val = int(count_raw) if isinstance(count_raw, (int, str)) else 0 + size_mb_raw = item.get("size_mb", 0.0) + size_mb_val = float(size_mb_raw) if isinstance(size_mb_raw, (int, float, str)) else 0.0 + collections.append( + CollectionInfo( + name=str(item.get("name", "Unknown")), + type="weaviate", + count=count_val, + backend="๐Ÿ—„๏ธ Weaviate", + status="โœ“ Active", + last_updated=datetime.now().strftime("%Y-%m-%d %H:%M"), + size_mb=size_mb_val, + ) + ) + + return collections + except Exception as e: + self.notify(f"Error listing Weaviate collections: {e}", severity="error", markup=False) + return [] + + async def list_openwebui_collections(self) -> list[CollectionInfo]: + """List OpenWebUI collections with enhanced metadata.""" + # Try to get OpenWebUI backend from storage manager if direct instance not available + openwebui_backend = self.openwebui + if not openwebui_backend: + backend = self.storage_manager.get_backend(StorageBackend.OPEN_WEBUI) + if not isinstance(backend, OpenWebUIStorage): + return [] + openwebui_backend = backend + if not openwebui_backend: + return [] + + try: + overview = await openwebui_backend.describe_collections() + collections: list[CollectionInfo] = [] + + for item in overview: + count_raw = item.get("count", 0) + count_val = int(count_raw) if isinstance(count_raw, (int, str)) else 0 + size_mb_raw = item.get("size_mb", 0.0) + size_mb_val = float(size_mb_raw) if isinstance(size_mb_raw, (int, float, str)) else 0.0 + collection_name = str(item.get("name", "Unknown")) + collections.append( + CollectionInfo( + name=collection_name, + type="openwebui", + count=count_val, + backend="๐ŸŒ OpenWebUI", + status="โœ“ Active", + last_updated=datetime.now().strftime("%Y-%m-%d %H:%M"), + size_mb=size_mb_val, + ) + ) + + return collections + except Exception as e: + self.notify(f"Error listing OpenWebUI collections: {e}", severity="error", markup=False) + return [] + + async def update_collections_table(self) -> None: + """Update the collections table with enhanced formatting.""" + table = self.query_one("#collections_table", EnhancedDataTable) + table.clear(columns=True) + + # Add enhanced columns with more metadata + table.add_columns("Collection", "Backend", "Documents", "Size", "Type", "Status", "Updated") + + # Add rows with enhanced formatting + for collection in self.collections: + # Format size + size_str = f"{collection['size_mb']:.1f} MB" + if collection["size_mb"] > 1000: + size_str = f"{collection['size_mb'] / 1000:.1f} GB" + + # Format document count + doc_count = f"{collection['count']:,}" + + # Determine content type based on collection name or other metadata + content_type = "๐Ÿ“„ Mixed" + if "web" in collection["name"].lower(): + content_type = "๐ŸŒ Web" + elif "doc" in collection["name"].lower(): + content_type = "๐Ÿ“– Docs" + elif "repo" in collection["name"].lower(): + content_type = "๐Ÿ“ฆ Code" + + table.add_row( + collection["name"], + collection["backend"], + doc_count, + size_str, + content_type, + collection["status"], + collection["last_updated"], + ) + + if self.collections: + table.move_cursor(row=0) + + self.get_selected_collection() + + def update_search_controls(self, collection: CollectionInfo | None) -> None: + """Enable or disable search controls based on backend support.""" + try: + search_button = self.query_one("#search_btn", Button) + quick_search_button = self.query_one("#quick_search", Button) + except Exception: + return + + is_weaviate = bool(collection and collection.get("type") == "weaviate") + search_button.disabled = not is_weaviate + quick_search_button.disabled = not is_weaviate + + def get_selected_collection(self) -> CollectionInfo | None: + """Get the currently selected collection.""" + table = self.query_one("#collections_table", EnhancedDataTable) + try: + row_index = table.cursor_coordinate.row + except (AttributeError, IndexError): + self.selected_collection = None + self.update_search_controls(None) + return None + + if 0 <= row_index < len(self.collections): + collection = self.collections[row_index] + self.selected_collection = collection + self.update_search_controls(collection) + return collection + + self.selected_collection = None + self.update_search_controls(None) + return None + + # Action methods + def action_refresh(self) -> None: + """Refresh collections.""" + self.refresh_collections() + + def action_ingest(self) -> None: + """Show enhanced ingestion dialog.""" + if selected := self.get_selected_collection(): + from .ingestion import IngestionScreen + + self.app.push_screen(IngestionScreen(selected, self.storage_manager)) + else: + self.notify("๐Ÿ” Please select a collection first", severity="warning") + + def action_manage(self) -> None: + """Manage documents in selected collection.""" + if selected := self.get_selected_collection(): + if storage_backend := self._get_storage_for_collection(selected): + from .documents import DocumentManagementScreen + + self.app.push_screen(DocumentManagementScreen(selected, storage_backend)) + else: + self.notify( + "๐Ÿšง No storage backend available for this collection", severity="warning" + ) + else: + self.notify("๐Ÿ” Please select a collection first", severity="warning") + + def _get_storage_for_collection(self, collection: CollectionInfo) -> BaseStorage | None: + """Get the appropriate storage backend for a collection.""" + collection_type = collection.get("type", "") + + # Map collection types to storage backends (try direct instances first) + if collection_type == "weaviate" and self.weaviate: + return self.weaviate + elif collection_type == "openwebui" and self.openwebui: + return self.openwebui + elif collection_type == "r2r" and self.r2r: + return self.r2r + + # Fall back to storage manager if direct instances not available + if collection_type == "weaviate": + return self.storage_manager.get_backend(StorageBackend.WEAVIATE) + elif collection_type == "openwebui": + return self.storage_manager.get_backend(StorageBackend.OPEN_WEBUI) + elif collection_type == "r2r": + return self.storage_manager.get_backend(StorageBackend.R2R) + + # Fall back to checking available backends by backend name + backend_name = collection.get("backend", "") + if isinstance(backend_name, str): + if "weaviate" in backend_name.lower(): + return self.weaviate or self.storage_manager.get_backend(StorageBackend.WEAVIATE) + elif "openwebui" in backend_name.lower(): + return self.openwebui or self.storage_manager.get_backend(StorageBackend.OPEN_WEBUI) + elif "r2r" in backend_name.lower(): + return self.r2r or self.storage_manager.get_backend(StorageBackend.R2R) + + return None + + def action_search(self) -> None: + """Search in selected collection.""" + if selected := self.get_selected_collection(): + if selected["type"] != "weaviate": + self.notify( + "๐Ÿ” Search is currently available only for Weaviate collections", + severity="warning", + ) + return + from .search import SearchScreen + + self.app.push_screen(SearchScreen(selected, self.weaviate, self.openwebui)) + else: + self.notify("๐Ÿ” Please select a collection first", severity="warning") + + def action_delete(self) -> None: + """Delete selected collection.""" + if selected := self.get_selected_collection(): + from .dialogs import ConfirmDeleteScreen + + self.app.push_screen(ConfirmDeleteScreen(selected, self)) + else: + self.notify("๐Ÿ” Please select a collection first", severity="warning") + + def action_tab_dashboard(self) -> None: + """Switch to dashboard tab.""" + tabbed_content: TabbedContent = self.query_one(TabbedContent) + tabbed_content.active = "dashboard" + + def action_tab_collections(self) -> None: + """Switch to collections tab.""" + tabbed_content: TabbedContent = self.query_one(TabbedContent) + tabbed_content.active = "collections" + + def action_tab_analytics(self) -> None: + """Switch to analytics tab.""" + tabbed_content: TabbedContent = self.query_one(TabbedContent) + tabbed_content.active = "analytics" + + def action_next_tab(self) -> None: + """Switch to next tab.""" + tabbed_content: TabbedContent = self.query_one(TabbedContent) + tab_ids = ["dashboard", "collections", "analytics"] + current = tabbed_content.active + try: + current_index = tab_ids.index(current) + next_index = (current_index + 1) % len(tab_ids) + tabbed_content.active = tab_ids[next_index] + except (ValueError, AttributeError): + tabbed_content.active = tab_ids[0] + + def action_prev_tab(self) -> None: + """Switch to previous tab.""" + tabbed_content: TabbedContent = self.query_one(TabbedContent) + tab_ids = ["dashboard", "collections", "analytics"] + current = tabbed_content.active + try: + current_index = tab_ids.index(current) + prev_index = (current_index - 1) % len(tab_ids) + tabbed_content.active = tab_ids[prev_index] + except (ValueError, AttributeError): + tabbed_content.active = tab_ids[0] + + def action_help(self) -> None: + """Show help screen.""" + from .help import HelpScreen + + help_md = """ +# ๐Ÿš€ Modern Collection Management System + +## Navigation +- **Tab** / **Shift+Tab**: Switch between tabs +- **Ctrl+1/2/3**: Direct tab access +- **Enter**: Activate selected item +- **Escape**: Go back/cancel +- **Arrow Keys**: Navigate within tables +- **Home/End**: Jump to first/last row +- **Page Up/Down**: Scroll by page + +## Collections +- **R**: Refresh collections +- **I**: Start ingestion +- **M**: Manage documents +- **S**: Search collection +- **Ctrl+D**: Delete collection + +## Table Navigation +- **Up/Down** or **J/K**: Navigate rows +- **Space**: Toggle selection +- **Ctrl+A**: Select all +- **Ctrl+Shift+A**: Clear selection + +## General +- **Q** / **Ctrl+C**: Quit application +- **F1**: Show this help + +Enjoy the enhanced interface! ๐ŸŽ‰ + """ + self.app.push_screen(HelpScreen(help_md)) + + def on_button_pressed(self, event: Button.Pressed) -> None: + """Handle button presses with enhanced feedback.""" + button_id = event.button.id + + # Add visual feedback + event.button.add_class("pressed") + self.call_later(self.remove_pressed_class, event.button) + + if getattr(event.button, "disabled", False): + self.notify( + "๐Ÿ” Search is currently limited to Weaviate collections", + severity="warning", + ) + return + + if button_id in ["refresh_btn", "quick_refresh"]: + self.action_refresh() + elif button_id in ["ingest_btn", "quick_ingest"]: + self.action_ingest() + elif button_id == "manage_btn": + self.action_manage() + elif button_id == "delete_btn": + self.action_delete() + elif button_id in ["search_btn", "quick_search"]: + self.action_search() + elif button_id == "quick_settings": + self.notify("โš™๏ธ Settings panel coming soon!", severity="information") + + def remove_pressed_class(self, button: Button) -> None: + """Remove pressed visual feedback class.""" + button.remove_class("pressed") + + + +"""Dialog screens for confirmations and user interactions.""" + +from __future__ import annotations + +from pathlib import Path +from typing import TYPE_CHECKING, ClassVar + +from textual.app import ComposeResult +from textual.binding import Binding +from textual.containers import Container, Horizontal +from textual.screen import ModalScreen, Screen +from textual.widgets import Button, Footer, Header, LoadingIndicator, RichLog, Static +from typing_extensions import override + +from ..models import CollectionInfo + +if TYPE_CHECKING: + from .dashboard import CollectionOverviewScreen + from .documents import DocumentManagementScreen + + +class ConfirmDeleteScreen(Screen[None]): + """Screen for confirming collection deletion.""" + + collection: CollectionInfo + parent_screen: CollectionOverviewScreen + + BINDINGS: list[Binding] = [ + Binding("escape", "app.pop_screen", "Cancel"), + Binding("y", "confirm_delete", "Yes"), + Binding("n", "app.pop_screen", "No"), + Binding("enter", "confirm_delete", "Confirm"), + ] + + def __init__(self, collection: CollectionInfo, parent_screen: CollectionOverviewScreen): + super().__init__() + self.collection = collection + self.parent_screen = parent_screen + + @override + def compose(self) -> ComposeResult: + yield Header() + yield Container( + Static("โš ๏ธ Confirm Deletion", classes="title warning"), + Static(f"Are you sure you want to delete collection '{self.collection['name']}'?"), + Static(f"Backend: {self.collection['backend']}"), + Static(f"Documents: {self.collection['count']:,}"), + Static("This action cannot be undone!", classes="warning"), + Static("Press Y to confirm, N or Escape to cancel", classes="subtitle"), + Horizontal( + Button("โœ… Yes, Delete (Y)", id="yes_btn", variant="error"), + Button("โŒ Cancel (N)", id="no_btn", variant="default"), + classes="action_buttons", + ), + classes="main_container center", + ) + yield Footer() + + def on_mount(self) -> None: + """Initialize the screen with focus on cancel button for safety.""" + self.query_one("#no_btn").focus() + + def on_button_pressed(self, event: Button.Pressed) -> None: + """Handle button presses.""" + if event.button.id == "yes_btn": + self.action_confirm_delete() + elif event.button.id == "no_btn": + self.app.pop_screen() + + def action_confirm_delete(self) -> None: + """Confirm deletion.""" + self.run_worker(self.delete_collection()) + + async def delete_collection(self) -> None: + """Delete the collection.""" + try: + if self.collection["type"] == "weaviate" and self.parent_screen.weaviate: + # Delete Weaviate collection + if self.parent_screen.weaviate.client and self.parent_screen.weaviate.client.collections: + self.parent_screen.weaviate.client.collections.delete(self.collection["name"]) + self.notify( + f"Deleted Weaviate collection: {self.collection['name']}", + severity="information", + ) + else: + # Use the dashboard's method to get the appropriate storage backend + storage_backend = self.parent_screen._get_storage_for_collection(self.collection) + if not storage_backend: + self.notify( + f"โŒ No storage backend available for {self.collection['type']} collection: {self.collection['name']}", + severity="error", + ) + self.app.pop_screen() + return + + # Check if the storage backend supports collection deletion + if not hasattr(storage_backend, 'delete_collection'): + self.notify( + f"โŒ Collection deletion not supported for {self.collection['type']} backend", + severity="error", + ) + self.app.pop_screen() + return + + # Delete the collection using the appropriate backend + # Ensure we use the exact collection name, not any default from storage config + collection_name = str(self.collection["name"]) + collection_type = str(self.collection["type"]) + + self.notify(f"Deleting {collection_type} collection: {collection_name}...", severity="information") + + # Use the standard delete_collection method for all backends + if hasattr(storage_backend, 'delete_collection'): + success = await storage_backend.delete_collection(collection_name) + else: + self.notify("โŒ Backend does not support collection deletion", severity="error") + self.app.pop_screen() + return + if success: + self.notify( + f"โœ… Successfully deleted {self.collection['type']} collection: {self.collection['name']}", + severity="information", + timeout=3.0, + ) + else: + self.notify( + f"โŒ Failed to delete {self.collection['type']} collection: {self.collection['name']}", + severity="error", + ) + # Don't refresh if deletion failed + self.app.pop_screen() + return + + # Refresh parent screen after a short delay to ensure deletion is processed + self.call_later(lambda _: self.parent_screen.refresh_collections(), 0.5) # 500ms delay + self.app.pop_screen() + + except Exception as e: + self.notify(f"Failed to delete collection: {e}", severity="error", markup=False) + + + +class ConfirmDocumentDeleteScreen(Screen[None]): + """Screen for confirming document deletion.""" + + doc_ids: list[str] + collection: CollectionInfo + parent_screen: "DocumentManagementScreen" + + BINDINGS: list[Binding] = [ + Binding("escape", "app.pop_screen", "Cancel"), + Binding("y", "confirm_delete", "Yes"), + Binding("n", "app.pop_screen", "No"), + Binding("enter", "confirm_delete", "Confirm"), + ] + + def __init__( + self, + doc_ids: list[str], + collection: CollectionInfo, + parent_screen: "DocumentManagementScreen", + ): + super().__init__() + self.doc_ids = doc_ids + self.collection = collection + self.parent_screen = parent_screen + + @override + def compose(self) -> ComposeResult: + yield Header() + yield Container( + Static("โš ๏ธ Confirm Document Deletion", classes="title warning"), + Static( + f"Are you sure you want to delete {len(self.doc_ids)} documents from '{self.collection['name']}'?" + ), + Static("This action cannot be undone!", classes="warning"), + Static("Press Y to confirm, N or Escape to cancel", classes="subtitle"), + Horizontal( + Button("โœ… Yes, Delete (Y)", id="yes_btn", variant="error"), + Button("โŒ Cancel (N)", id="no_btn", variant="default"), + classes="action_buttons", + ), + LoadingIndicator(id="loading"), + classes="main_container center", + ) + yield Footer() + + def on_mount(self) -> None: + """Initialize the screen with focus on cancel button for safety.""" + self.query_one("#loading").display = False + self.query_one("#no_btn").focus() + + def on_button_pressed(self, event: Button.Pressed) -> None: + """Handle button presses.""" + if event.button.id == "yes_btn": + self.action_confirm_delete() + elif event.button.id == "no_btn": + self.app.pop_screen() + + def action_confirm_delete(self) -> None: + """Confirm deletion.""" + self.run_worker(self.delete_documents()) + + async def delete_documents(self) -> None: + """Delete the selected documents.""" + loading = self.query_one("#loading") + loading.display = True + + try: + results: dict[str, bool] = {} + if hasattr(self.parent_screen, 'storage') and self.parent_screen.storage: + # Delete documents via storage + # The storage should have delete_documents method for weaviate + storage = self.parent_screen.storage + if hasattr(storage, 'delete_documents'): + results = await storage.delete_documents( + self.doc_ids, + collection_name=self.collection["name"], + ) + + # Count successful deletions + successful = sum(bool(success) for success in results.values()) + failed = len(results) - successful + + if successful > 0: + self.notify(f"Deleted {successful} documents", severity="information") + if failed > 0: + self.notify(f"Failed to delete {failed} documents", severity="error") + + # Clear selection and refresh parent screen + self.parent_screen.selected_docs.clear() + await self.parent_screen.load_documents() + self.app.pop_screen() + + except Exception as e: + self.notify(f"Failed to delete documents: {e}", severity="error", markup=False) + finally: + loading.display = False + + +class LogViewerScreen(ModalScreen[None]): + """Display live log output without disrupting the TUI.""" + + _log_widget: RichLog | None + _log_file: Path | None + + BINDINGS: list[Binding] = [ + Binding("escape", "close", "Close"), + Binding("ctrl+l", "close", "Close"), + Binding("s", "show_path", "Log File"), + ] + + def __init__(self) -> None: + super().__init__() + self._log_widget = None + self._log_file = None + + @override + def compose(self) -> ComposeResult: + yield Header(show_clock=True) + yield Container( + Static("๐Ÿ“œ Live Application Logs", classes="title"), + Static("Logs update in real time. Press S to reveal the log file path.", classes="subtitle"), + RichLog(id="log_stream", classes="log-stream", wrap=True, highlight=False), + Static("", id="log_file_path", classes="subtitle"), + classes="main_container log-viewer-container", + ) + yield Footer() + + def on_mount(self) -> None: + """Attach this viewer to the parent application once mounted.""" + self._log_widget = self.query_one(RichLog) + + if hasattr(self.app, 'attach_log_viewer'): + self.app.attach_log_viewer(self) + + def on_unmount(self) -> None: + """Detach from the parent application when closed.""" + + if hasattr(self.app, 'detach_log_viewer'): + self.app.detach_log_viewer(self) + + def _get_log_widget(self) -> RichLog: + if self._log_widget is None: + self._log_widget = self.query_one(RichLog) + if self._log_widget is None: + raise RuntimeError("RichLog widget not found") + return self._log_widget + + def replace_logs(self, lines: list[str]) -> None: + """Replace rendered logs with the provided history.""" + log_widget = self._get_log_widget() + log_widget.clear() + for line in lines: + log_widget.write(line) + log_widget.scroll_end(animate=False) + + def append_logs(self, lines: list[str]) -> None: + """Append new log lines to the viewer.""" + log_widget = self._get_log_widget() + for line in lines: + log_widget.write(line) + log_widget.scroll_end(animate=False) + + def update_log_file(self, log_file: Path | None) -> None: + """Update the displayed log file path.""" + self._log_file = log_file + label = self.query_one("#log_file_path", Static) + if log_file is None: + label.update("Logs are not currently being persisted to disk.") + else: + label.update(f"Log file: {log_file}") + + def action_close(self) -> None: + """Close the log viewer.""" + self.app.pop_screen() + + def action_show_path(self) -> None: + """Reveal the log file location in a notification.""" + if self._log_file is None: + self.notify("File logging is disabled for this session.", severity="warning") + else: + self.notify(f"Log file available at: {self._log_file}", severity="information", markup=False) + + + +"""Application settings and configuration.""" + +from functools import lru_cache +from typing import Annotated, ClassVar, Literal + +from prefect.variables import Variable +from pydantic import Field, HttpUrl, model_validator +from pydantic_settings import BaseSettings, SettingsConfigDict + + +class Settings(BaseSettings): + """Application settings.""" + + model_config: ClassVar[SettingsConfigDict] = SettingsConfigDict( + env_file=".env", + env_file_encoding="utf-8", + case_sensitive=False, + extra="ignore", # Ignore extra environment variables + ) + + # API Keys + firecrawl_api_key: str | None = None + openwebui_api_key: str | None = None + weaviate_api_key: str | None = None + r2r_api_key: str | None = None + + # Endpoints + llm_endpoint: HttpUrl = HttpUrl("http://llm.lab") + weaviate_endpoint: HttpUrl = HttpUrl("http://weaviate.yo") + openwebui_endpoint: HttpUrl = HttpUrl("http://chat.lab") # This will be the API URL + firecrawl_endpoint: HttpUrl = HttpUrl("http://crawl.lab:30002") + r2r_endpoint: HttpUrl | None = Field(default=None, alias="r2r_api_url") + + # Model Configuration + embedding_model: str = "ollama/bge-m3:latest" + embedding_dimension: int = 1024 + + # Ingestion Settings + default_batch_size: Annotated[int, Field(gt=0, le=500)] = 50 + max_file_size: int = 1_000_000 + max_crawl_depth: Annotated[int, Field(ge=1, le=20)] = 5 + max_crawl_pages: Annotated[int, Field(ge=1, le=1000)] = 100 + + # Storage Settings + default_storage_backend: Literal["weaviate", "open_webui", "r2r"] = "weaviate" + default_collection_prefix: str = "docs" + + # Prefect Settings + prefect_api_url: HttpUrl | None = None + prefect_api_key: str | None = None + prefect_work_pool: str = "default" + + # Scheduling Defaults + default_schedule_interval: Annotated[int, Field(ge=1, le=10080)] = 60 # Max 1 week + + # Performance Settings + max_concurrent_tasks: Annotated[int, Field(ge=1, le=20)] = 5 + request_timeout: Annotated[int, Field(ge=10, le=300)] = 60 + + # Logging + log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR"] = "INFO" + + def get_storage_endpoint(self, backend: str) -> HttpUrl: + """ + Get endpoint for storage backend. + + Args: + backend: Storage backend name + + Returns: + Endpoint URL + + Raises: + ValueError: If backend is unknown or R2R endpoint not configured + """ + endpoints = { + "weaviate": self.weaviate_endpoint, + "open_webui": self.openwebui_endpoint, + } + + if backend in endpoints: + return endpoints[backend] + elif backend == "r2r": + if not self.r2r_endpoint: + raise ValueError( + "R2R_API_URL must be set in environment variables. " + "This should have been caught during settings validation." + ) + return self.r2r_endpoint + else: + raise ValueError(f"Unknown backend: {backend}. Supported: weaviate, open_webui, r2r") + + def get_api_key(self, service: str) -> str | None: + """ + Get API key for service. + + Args: + service: Service name + + Returns: + API key or None + """ + service_map = { + "firecrawl": self.firecrawl_api_key, + "openwebui": self.openwebui_api_key, + "weaviate": self.weaviate_api_key, + "r2r": self.r2r_api_key, + } + return service_map.get(service) + + @model_validator(mode="after") + def validate_backend_configuration(self) -> "Settings": + """Validate that required configuration is present for the default backend.""" + backend = self.default_storage_backend + + # Validate R2R backend configuration + if backend == "r2r" and not self.r2r_endpoint: + raise ValueError( + "R2R_API_URL must be set in environment variables when using R2R as default backend" + ) + + # Validate API key requirements (optional warning for missing keys) + required_keys = { + "weaviate": ("WEAVIATE_API_KEY", self.weaviate_api_key), + "open_webui": ("OPENWEBUI_API_KEY", self.openwebui_api_key), + "r2r": ("R2R_API_KEY", self.r2r_api_key), + } + + if backend in required_keys: + key_name, key_value = required_keys[backend] + if not key_value: + import warnings + warnings.warn( + f"{key_name} not set - authentication may fail for {backend} backend", + UserWarning, + stacklevel=2 + ) + + return self + + +@lru_cache +def get_settings() -> Settings: + """ + Get cached settings instance. + + Returns: + Settings instance + """ + return Settings() + + +class PrefectVariableConfig: + """Helper class for managing Prefect variables with fallbacks to settings.""" + + def __init__(self) -> None: + self._settings: Settings = get_settings() + self._variable_names: list[str] = [ + "default_batch_size", "max_file_size", "max_crawl_depth", "max_crawl_pages", + "default_storage_backend", "default_collection_prefix", "max_concurrent_tasks", + "request_timeout", "default_schedule_interval" + ] + + def _get_fallback_value(self, name: str, default_value: object = None) -> object: + """Get fallback value from settings or default.""" + return default_value or getattr(self._settings, name, default_value) + + def get_with_fallback(self, name: str, default_value: str | int | float | None = None) -> str | int | float | None: + """Get variable value with fallback synchronously.""" + fallback = self._get_fallback_value(name, default_value) + # Ensure fallback is a type that Variable expects + variable_fallback = str(fallback) if fallback is not None else None + try: + result = Variable.get(name, default=variable_fallback) + # Variable can return various types, convert to our expected types + if isinstance(result, (str, int, float)): + return result + elif result is None: + return None + else: + # Convert other types to string + return str(result) + except Exception: + # Return fallback with proper type + if isinstance(fallback, (str, int, float)) or fallback is None: + return fallback + return str(fallback) if fallback is not None else None + + async def get_with_fallback_async(self, name: str, default_value: str | int | float | None = None) -> str | int | float | None: + """Get variable value with fallback asynchronously.""" + fallback = self._get_fallback_value(name, default_value) + variable_fallback = str(fallback) if fallback is not None else None + try: + result = await Variable.aget(name, default=variable_fallback) + # Variable can return various types, convert to our expected types + if isinstance(result, (str, int, float)): + return result + elif result is None: + return None + else: + # Convert other types to string + return str(result) + except Exception: + # Return fallback with proper type + if isinstance(fallback, (str, int, float)) or fallback is None: + return fallback + return str(fallback) if fallback is not None else None + + def get_ingestion_config(self) -> dict[str, str | int | float | None]: + """Get all ingestion-related configuration variables synchronously.""" + return {name: self.get_with_fallback(name) for name in self._variable_names} + + async def get_ingestion_config_async(self) -> dict[str, str | int | float | None]: + """Get all ingestion-related configuration variables asynchronously.""" + result: dict[str, str | int | float | None] = {} + for name in self._variable_names: + result[name] = await self.get_with_fallback_async(name) + return result + + +@lru_cache +def get_prefect_config() -> PrefectVariableConfig: + """Get cached Prefect variable configuration helper.""" + return PrefectVariableConfig() + + + +"""Core data models with strict typing.""" + +from datetime import UTC, datetime +from enum import Enum +from typing import Annotated, ClassVar, TypedDict +from uuid import UUID, uuid4 + +from prefect.blocks.core import Block +from pydantic import BaseModel, Field, HttpUrl, SecretStr + + +class IngestionStatus(str, Enum): + """Status of an ingestion job.""" + + PENDING = "pending" + IN_PROGRESS = "in_progress" + COMPLETED = "completed" + PARTIAL = "partial" # Some documents succeeded, some failed + FAILED = "failed" + CANCELLED = "cancelled" + + +class StorageBackend(str, Enum): + """Available storage backends.""" + + WEAVIATE = "weaviate" + OPEN_WEBUI = "open_webui" + R2R = "r2r" + + +class IngestionSource(str, Enum): + """Types of ingestion sources.""" + + WEB = "web" + REPOSITORY = "repository" + DOCUMENTATION = "documentation" + + +class VectorConfig(BaseModel): + """Configuration for vectorization.""" + + model: str = Field(default="ollama/bge-m3:latest") + embedding_endpoint: HttpUrl = Field(default=HttpUrl("http://llm.lab")) + dimension: int = Field(default=1024) + batch_size: Annotated[int, Field(gt=0, le=1000)] = 100 + + +class StorageConfig(Block): + """Configuration for storage backend.""" + + _block_type_name: ClassVar[str] = "Storage Configuration" + _block_type_slug: ClassVar[str] = "storage-config" + _description: ClassVar[str] = "Configures storage backend connections and settings for document ingestion" + + backend: StorageBackend + endpoint: HttpUrl + api_key: SecretStr | None = Field(default=None) + collection_name: str = Field(default="documents") + batch_size: Annotated[int, Field(gt=0, le=1000)] = 100 + + +class FirecrawlConfig(Block): + """Configuration for Firecrawl ingestion (operational parameters only).""" + + _block_type_name: ClassVar[str] = "Firecrawl Configuration" + _block_type_slug: ClassVar[str] = "firecrawl-config" + _description: ClassVar[str] = "Configures Firecrawl web scraping and crawling parameters" + + formats: list[str] = Field(default_factory=lambda: ["markdown", "html"]) + max_depth: Annotated[int, Field(ge=1, le=20)] = 5 + limit: Annotated[int, Field(ge=1, le=1000)] = 100 + only_main_content: bool = Field(default=True) + include_subdomains: bool = Field(default=False) + + +class RepomixConfig(Block): + """Configuration for Repomix ingestion.""" + + _block_type_name: ClassVar[str] = "Repomix Configuration" + _block_type_slug: ClassVar[str] = "repomix-config" + _description: ClassVar[str] = "Configures repository ingestion patterns and file processing settings" + + include_patterns: list[str] = Field( + default_factory=lambda: ["*.py", "*.js", "*.ts", "*.md", "*.yaml", "*.json"] + ) + exclude_patterns: list[str] = Field( + default_factory=lambda: ["**/node_modules/**", "**/__pycache__/**", "**/.git/**"] + ) + max_file_size: int = Field(default=1_000_000) # 1MB + respect_gitignore: bool = Field(default=True) + + +class R2RConfig(Block): + """Configuration for R2R ingestion.""" + + _block_type_name: ClassVar[str] = "R2R Configuration" + _block_type_slug: ClassVar[str] = "r2r-config" + _description: ClassVar[str] = "Configures R2R-specific ingestion settings including chunking and graph enrichment" + + chunk_size: Annotated[int, Field(ge=100, le=8192)] = 1000 + chunk_overlap: Annotated[int, Field(ge=0, le=1000)] = 200 + enable_graph_enrichment: bool = Field(default=False) + graph_creation_settings: dict[str, object] | None = Field(default=None) + + +class DocumentMetadataRequired(TypedDict): + """Required metadata fields for a document.""" + source_url: str + timestamp: datetime + content_type: str + word_count: int + char_count: int + + +class DocumentMetadata(DocumentMetadataRequired, total=False): + """Rich metadata for a document with R2R-compatible fields.""" + + # Basic optional fields + title: str | None + description: str | None + + # Content categorization + tags: list[str] + category: str + section: str + language: str + + # Authorship and source info + author: str + domain: str + site_name: str + + # Document structure + heading_hierarchy: list[str] + section_depth: int + has_code_blocks: bool + has_images: bool + has_links: bool + + # Processing metadata + extraction_method: str + crawl_depth: int + last_modified: datetime | None + + # Content quality indicators + readability_score: float | None + completeness_score: float | None + + # Repository-specific fields + file_path: str | None + repository_name: str | None + branch_name: str | None + commit_hash: str | None + programming_language: str | None + + # Custom business metadata + importance_score: float | None + review_status: str | None + assigned_team: str | None + + +class Document(BaseModel): + """Represents a single document.""" + + id: UUID = Field(default_factory=uuid4) + content: str + metadata: DocumentMetadata + vector: list[float] | None = Field(default=None) + score: float | None = Field(default=None) + source: IngestionSource + collection: str = Field(default="documents") + + +class IngestionJob(BaseModel): + """Represents an ingestion job.""" + + id: UUID = Field(default_factory=uuid4) + source_type: IngestionSource + source_url: HttpUrl | str + status: IngestionStatus = Field(default=IngestionStatus.PENDING) + created_at: datetime = Field(default_factory=lambda: datetime.now(UTC)) + updated_at: datetime = Field(default_factory=lambda: datetime.now(UTC)) + completed_at: datetime | None = Field(default=None) + error_message: str | None = Field(default=None) + document_count: int = Field(default=0) + storage_backend: StorageBackend + + +class IngestionResult(BaseModel): + """Result of an ingestion operation.""" + + job_id: UUID + status: IngestionStatus + documents_processed: int + documents_failed: int + duration_seconds: float + error_messages: list[str] = Field(default_factory=list) + + + +"""Prefect flow for ingestion pipeline.""" + +from __future__ import annotations + +from collections.abc import Callable +from datetime import UTC, datetime +from typing import TYPE_CHECKING, Literal, TypeAlias, assert_never, cast + +from prefect import flow, get_run_logger, task +from prefect.blocks.core import Block +from prefect.variables import Variable +from pydantic import SecretStr + +from ..config.settings import Settings +from ..core.exceptions import IngestionError +from ..core.models import ( + Document, + FirecrawlConfig, + IngestionJob, + IngestionResult, + IngestionSource, + IngestionStatus, + RepomixConfig, + StorageBackend, + StorageConfig, +) +from ..ingestors import BaseIngestor, FirecrawlIngestor, FirecrawlPage, RepomixIngestor +from ..storage import OpenWebUIStorage, WeaviateStorage +from ..storage import R2RStorage as RuntimeR2RStorage +from ..storage.base import BaseStorage +from ..utils.metadata_tagger import MetadataTagger + +SourceTypeLiteral = Literal["web", "repository", "documentation"] +StorageBackendLiteral = Literal["weaviate", "open_webui", "r2r"] +SourceTypeLike: TypeAlias = IngestionSource | SourceTypeLiteral +StorageBackendLike: TypeAlias = StorageBackend | StorageBackendLiteral + + +def _safe_cache_key(prefix: str, params: dict[str, object], key: str) -> str: + """Create a type-safe cache key from task parameters.""" + value = params.get(key, "") + return f"{prefix}_{hash(str(value))}" + + +if TYPE_CHECKING: + from ..storage.r2r.storage import R2RStorage as R2RStorageType +else: + R2RStorageType = BaseStorage + + +@task(name="validate_source", retries=2, retry_delay_seconds=10, tags=["validation"]) +async def validate_source_task(source_url: str, source_type: IngestionSource) -> bool: + """ + Validate that a source is accessible. + + Args: + source_url: URL or path to source + source_type: Type of source + + Returns: + True if valid + """ + if source_type == IngestionSource.WEB: + ingestor = FirecrawlIngestor() + elif source_type == IngestionSource.REPOSITORY: + ingestor = RepomixIngestor() + else: + raise ValueError(f"Unsupported source type: {source_type}") + + result = await ingestor.validate_source(source_url) + return bool(result) + + +@task(name="initialize_storage", retries=3, retry_delay_seconds=5, tags=["storage"]) +async def initialize_storage_task(config: StorageConfig | str) -> BaseStorage: + """ + Initialize storage backend. + + Args: + config: Storage configuration block or block name + + Returns: + Initialized storage adapter + """ + # Load block if string provided + if isinstance(config, str): + # Use Block.aload with type slug for better type inference + loaded_block = await Block.aload(f"storage-config/{config}") + config = cast(StorageConfig, loaded_block) + + if config.backend == StorageBackend.WEAVIATE: + storage = WeaviateStorage(config) + elif config.backend == StorageBackend.OPEN_WEBUI: + storage = OpenWebUIStorage(config) + elif config.backend == StorageBackend.R2R: + if RuntimeR2RStorage is None: + raise ValueError("R2R storage not available. Check dependencies.") + storage = RuntimeR2RStorage(config) + else: + raise ValueError(f"Unsupported backend: {config.backend}") + + await storage.initialize() + return storage + + +@task(name="map_firecrawl_site", retries=2, retry_delay_seconds=15, tags=["firecrawl", "map"], + cache_key_fn=lambda ctx, p: _safe_cache_key("firecrawl_map", p, "source_url")) +async def map_firecrawl_site_task(source_url: str, config: FirecrawlConfig | str) -> list[str]: + """Map a site using Firecrawl and return discovered URLs.""" + # Load block if string provided + if isinstance(config, str): + # Use Block.aload with type slug for better type inference + loaded_block = await Block.aload(f"firecrawl-config/{config}") + config = cast(FirecrawlConfig, loaded_block) + + ingestor = FirecrawlIngestor(config) + mapped = await ingestor.map_site(source_url) + return mapped or [source_url] + + +@task(name="filter_existing_documents", retries=1, retry_delay_seconds=5, tags=["dedup"], + cache_key_fn=lambda ctx, p: _safe_cache_key("filter_docs", p, "urls")) # Cache based on URL list +async def filter_existing_documents_task( + urls: list[str], + storage_client: BaseStorage, + stale_after_days: int = 30, + *, + collection_name: str | None = None, +) -> list[str]: + """Filter URLs to only those that need scraping (missing or stale in storage).""" + logger = get_run_logger() + eligible: list[str] = [] + + for url in urls: + document_id = str(FirecrawlIngestor.compute_document_id(url)) + exists = await storage_client.check_exists( + document_id, + collection_name=collection_name, + stale_after_days=stale_after_days + ) + + if not exists: + eligible.append(url) + + skipped = len(urls) - len(eligible) + if skipped > 0: + logger.info("Skipping %s up-to-date documents in %s", skipped, storage_client.display_name) + + return eligible + + +@task( + name="scrape_firecrawl_batch", retries=2, retry_delay_seconds=20, tags=["firecrawl", "scrape"] +) +async def scrape_firecrawl_batch_task( + batch_urls: list[str], config: FirecrawlConfig +) -> list[FirecrawlPage]: + """Scrape a batch of URLs via Firecrawl.""" + ingestor = FirecrawlIngestor(config) + result: list[FirecrawlPage] = await ingestor.scrape_pages(batch_urls) + return result + + +@task(name="annotate_firecrawl_metadata", retries=1, retry_delay_seconds=10, tags=["metadata"]) +async def annotate_firecrawl_metadata_task( + pages: list[FirecrawlPage], job: IngestionJob +) -> list[Document]: + """Annotate scraped pages with standardized metadata.""" + if not pages: + return [] + + ingestor = FirecrawlIngestor() + documents = [ingestor.create_document(page, job) for page in pages] + + try: + from ..config import get_settings + + settings = get_settings() + async with MetadataTagger(llm_endpoint=str(settings.llm_endpoint)) as tagger: + tagged_documents: list[Document] = await tagger.tag_batch(documents) + return tagged_documents + except IngestionError as exc: # pragma: no cover - logging side effect + logger = get_run_logger() + logger.warning("Metadata tagging failed: %s", exc) + return documents + except Exception as exc: # pragma: no cover - defensive + logger = get_run_logger() + logger.warning("Metadata tagging unavailable, using base metadata: %s", exc) + return documents + + +@task(name="upsert_r2r_documents", retries=2, retry_delay_seconds=20, tags=["storage", "r2r"]) +async def upsert_r2r_documents_task( + storage_client: R2RStorageType, + documents: list[Document], + collection_name: str | None, +) -> tuple[int, int]: + """Upsert documents into R2R storage.""" + if not documents: + return 0, 0 + + stored_ids: list[str] = await storage_client.store_batch( + documents, collection_name=collection_name + ) + processed = len(stored_ids) + failed = len(documents) - processed + + if failed: + logger = get_run_logger() + logger.warning("Failed to upsert %s documents to R2R", failed) + + return processed, failed + + +@task(name="ingest_documents", retries=2, retry_delay_seconds=30, tags=["ingestion"]) +async def ingest_documents_task( + job: IngestionJob, + collection_name: str | None = None, + batch_size: int | None = None, + storage_client: BaseStorage | None = None, + storage_block_name: str | None = None, + ingestor_config_block_name: str | None = None, + progress_callback: Callable[[int, str], None] | None = None, +) -> tuple[int, int]: + """ + Ingest documents from source with optional pre-initialized storage client. + + Args: + job: Ingestion job configuration + collection_name: Target collection name + batch_size: Number of documents per batch (uses Variable if None) + storage_client: Optional pre-initialized storage client + storage_block_name: Optional storage block name to load + ingestor_config_block_name: Optional ingestor config block name to load + progress_callback: Optional callback for progress updates + + Returns: + Tuple of (processed_count, failed_count) + """ + if progress_callback: + progress_callback(35, "Creating ingestor and storage clients...") + + # Use Variable for batch size if not provided + if batch_size is None: + try: + batch_size_var = await Variable.aget("default_batch_size", default="50") + # Convert Variable result to int, handling various types + if isinstance(batch_size_var, int): + batch_size = batch_size_var + elif isinstance(batch_size_var, (str, float)): + batch_size = int(float(str(batch_size_var))) + else: + batch_size = 50 + except Exception: + batch_size = 50 + + ingestor = await _create_ingestor(job, ingestor_config_block_name) + storage = storage_client or await _create_storage(job, collection_name, storage_block_name) + + if progress_callback: + progress_callback(40, "Starting document processing...") + + return await _process_documents(ingestor, storage, job, batch_size, collection_name, progress_callback) + + +async def _create_ingestor(job: IngestionJob, config_block_name: str | None = None) -> BaseIngestor: + """Create appropriate ingestor based on job source type.""" + if job.source_type == IngestionSource.WEB: + if config_block_name: + # Use Block.aload with type slug for better type inference + loaded_block = await Block.aload(f"firecrawl-config/{config_block_name}") + config = cast(FirecrawlConfig, loaded_block) + else: + # Fallback to default configuration + config = FirecrawlConfig() + return FirecrawlIngestor(config) + elif job.source_type == IngestionSource.REPOSITORY: + if config_block_name: + # Use Block.aload with type slug for better type inference + loaded_block = await Block.aload(f"repomix-config/{config_block_name}") + config = cast(RepomixConfig, loaded_block) + else: + # Fallback to default configuration + config = RepomixConfig() + return RepomixIngestor(config) + else: + raise ValueError(f"Unsupported source: {job.source_type}") + + +async def _create_storage(job: IngestionJob, collection_name: str | None, storage_block_name: str | None = None) -> BaseStorage: + """Create and initialize storage client.""" + if collection_name is None: + # Use variable for default collection prefix + prefix = await Variable.aget("default_collection_prefix", default="docs") + collection_name = f"{prefix}_{job.source_type.value}" + + if storage_block_name: + # Load storage config from block + loaded_block = await Block.aload(f"storage-config/{storage_block_name}") + storage_config = cast(StorageConfig, loaded_block) + # Override collection name if provided + storage_config.collection_name = collection_name + else: + # Fallback to building config from settings + from ..config import get_settings + settings = get_settings() + storage_config = _build_storage_config(job, settings, collection_name) + + storage = _instantiate_storage(job.storage_backend, storage_config) + await storage.initialize() + return storage + + +def _build_storage_config( + job: IngestionJob, settings: Settings, collection_name: str +) -> StorageConfig: + """Build storage configuration from job and settings.""" + storage_endpoints = { + StorageBackend.WEAVIATE: settings.weaviate_endpoint, + StorageBackend.OPEN_WEBUI: settings.openwebui_endpoint, + StorageBackend.R2R: settings.get_storage_endpoint("r2r"), + } + storage_api_keys: dict[StorageBackend, str | None] = { + StorageBackend.WEAVIATE: settings.get_api_key("weaviate"), + StorageBackend.OPEN_WEBUI: settings.get_api_key("openwebui"), + StorageBackend.R2R: None, # R2R is self-hosted, no API key needed + } + + api_key_raw: str | None = storage_api_keys[job.storage_backend] + api_key: SecretStr | None = SecretStr(api_key_raw) if api_key_raw is not None else None + + return StorageConfig( + backend=job.storage_backend, + endpoint=storage_endpoints[job.storage_backend], + api_key=api_key, + collection_name=collection_name, + ) + + +def _instantiate_storage(backend: StorageBackend, config: StorageConfig) -> BaseStorage: + """Instantiate storage based on backend type.""" + if backend == StorageBackend.WEAVIATE: + return WeaviateStorage(config) + elif backend == StorageBackend.OPEN_WEBUI: + return OpenWebUIStorage(config) + elif backend == StorageBackend.R2R: + if RuntimeR2RStorage is None: + raise ValueError("R2R storage not available. Check dependencies.") + return RuntimeR2RStorage(config) + + assert_never(backend) + + +def _chunk_urls(urls: list[str], chunk_size: int) -> list[list[str]]: + """Group URLs into fixed-size chunks for batch processing.""" + + if chunk_size <= 0: + raise ValueError("chunk_size must be greater than zero") + + return [urls[i : i + chunk_size] for i in range(0, len(urls), chunk_size)] + + +def _deduplicate_urls(urls: list[str]) -> list[str]: + """Return the URLs with order preserved and duplicates removed.""" + + seen: set[str] = set() + unique: list[str] = [] + for url in urls: + if url not in seen: + seen.add(url) + unique.append(url) + return unique + + +async def _process_documents( + ingestor: BaseIngestor, + storage: BaseStorage, + job: IngestionJob, + batch_size: int, + collection_name: str | None, + progress_callback: Callable[[int, str], None] | None = None, +) -> tuple[int, int]: + """Process documents in batches.""" + processed = 0 + failed = 0 + batch: list[Document] = [] + total_documents = 0 + batch_count = 0 + + if progress_callback: + progress_callback(45, "Ingesting documents from source...") + + # Use smart ingestion with deduplication if storage supports it + if hasattr(storage, 'check_exists'): + try: + # Try to use the smart ingestion method + document_generator = ingestor.ingest_with_dedup( + job, storage, collection_name=collection_name + ) + except Exception: + # Fall back to regular ingestion if smart method fails + document_generator = ingestor.ingest(job) + else: + document_generator = ingestor.ingest(job) + + async for document in document_generator: + batch.append(document) + total_documents += 1 + + if len(batch) >= batch_size: + batch_count += 1 + if progress_callback: + progress_callback( + 45 + min(35, (batch_count * 10)), + f"Processing batch {batch_count} ({total_documents} documents so far)..." + ) + + batch_processed, batch_failed = await _store_batch(storage, batch, collection_name) + processed += batch_processed + failed += batch_failed + batch = [] + + # Process remaining batch + if batch: + batch_count += 1 + if progress_callback: + progress_callback(80, f"Processing final batch ({total_documents} total documents)...") + + batch_processed, batch_failed = await _store_batch(storage, batch, collection_name) + processed += batch_processed + failed += batch_failed + + if progress_callback: + progress_callback(85, f"Completed processing {total_documents} documents") + + return processed, failed + + +async def _store_batch( + storage: BaseStorage, + batch: list[Document], + collection_name: str | None, +) -> tuple[int, int]: + """Store a batch of documents and return processed/failed counts.""" + try: + # Apply metadata tagging for backends that benefit from it + processed_batch = batch + if hasattr(storage, "config") and storage.config.backend in ( + StorageBackend.R2R, + StorageBackend.WEAVIATE, + ): + try: + from ..config import get_settings + + settings = get_settings() + async with MetadataTagger(llm_endpoint=str(settings.llm_endpoint)) as tagger: + processed_batch = await tagger.tag_batch(batch) + except Exception as exc: + print(f"Metadata tagging failed, using original documents: {exc}") + processed_batch = batch + + stored_ids = await storage.store_batch(processed_batch, collection_name=collection_name) + processed_count = len(stored_ids) + failed_count = len(processed_batch) - processed_count + + batch_type = ( + "final" if len(processed_batch) < 50 else "" + ) # Assume standard batch size is 50 + print(f"Successfully stored {processed_count} documents in {batch_type} batch".strip()) + + return processed_count, failed_count + except Exception as e: + batch_type = "Final" if len(batch) < 50 else "Batch" + print(f"{batch_type} storage failed: {e}") + return 0, len(batch) + + +@flow( + name="firecrawl_to_r2r", + description="Ingest Firecrawl pages into R2R with metadata annotation", + persist_result=False, + log_prints=True, +) +async def firecrawl_to_r2r_flow( + job: IngestionJob, collection_name: str | None = None, progress_callback: Callable[[int, str], None] | None = None +) -> tuple[int, int]: + """Specialized flow for Firecrawl ingestion into R2R.""" + logger = get_run_logger() + from ..config import get_settings + + if progress_callback: + progress_callback(35, "Initializing Firecrawl and R2R storage...") + + settings = get_settings() + firecrawl_config = FirecrawlConfig() + resolved_collection = collection_name or f"docs_{job.source_type.value}" + + storage_config = _build_storage_config(job, settings, resolved_collection) + storage_client = await initialize_storage_task(storage_config) + + if RuntimeR2RStorage is None or not isinstance(storage_client, RuntimeR2RStorage): + raise IngestionError("Firecrawl to R2R flow requires an R2R storage backend") + + r2r_storage = cast("R2RStorageType", storage_client) + + if progress_callback: + progress_callback(45, "Checking for existing content before mapping...") + + # Smart mapping: try single URL first to avoid expensive map operation + base_url = str(job.source_url) + single_url_id = str(FirecrawlIngestor.compute_document_id(base_url)) + base_exists = await r2r_storage.check_exists( + single_url_id, collection_name=resolved_collection, stale_after_days=30 + ) + + if base_exists: + # Check if this is a recent single-page update + logger.info("Base URL %s exists and is fresh, skipping expensive mapping", base_url) + if progress_callback: + progress_callback(100, "Content is up to date, no processing needed") + return 0, 0 + + if progress_callback: + progress_callback(50, "Discovering pages with Firecrawl...") + + discovered_urls = await map_firecrawl_site_task(base_url, firecrawl_config) + unique_urls = _deduplicate_urls(discovered_urls) + logger.info("Discovered %s unique URLs from Firecrawl map", len(unique_urls)) + + if progress_callback: + progress_callback(60, f"Found {len(unique_urls)} pages, filtering existing content...") + + eligible_urls = await filter_existing_documents_task( + unique_urls, r2r_storage, collection_name=resolved_collection + ) + + if not eligible_urls: + logger.info("All Firecrawl pages are up to date for %s", job.source_url) + if progress_callback: + progress_callback(100, "All pages are up to date, no processing needed") + return 0, 0 + + if progress_callback: + progress_callback(70, f"Scraping {len(eligible_urls)} new/updated pages...") + + batch_size = min(settings.default_batch_size, firecrawl_config.limit) + url_batches = _chunk_urls(eligible_urls, batch_size) + logger.info("Scraping %s batches of Firecrawl pages", len(url_batches)) + + # Use asyncio.gather for concurrent scraping + import asyncio + scrape_tasks = [ + scrape_firecrawl_batch_task(batch, firecrawl_config) + for batch in url_batches + ] + batch_results = await asyncio.gather(*scrape_tasks) + + scraped_pages: list[FirecrawlPage] = [] + for batch_pages in batch_results: + scraped_pages.extend(batch_pages) + + if progress_callback: + progress_callback(80, f"Processing {len(scraped_pages)} scraped pages...") + + documents = await annotate_firecrawl_metadata_task(scraped_pages, job) + + if not documents: + logger.warning("No documents produced after scraping for %s", job.source_url) + return 0, len(eligible_urls) + + if progress_callback: + progress_callback(90, f"Storing {len(documents)} documents in R2R...") + + processed, failed = await upsert_r2r_documents_task(r2r_storage, documents, resolved_collection) + + logger.info("Upserted %s documents into R2R (%s failed)", processed, failed) + + return processed, failed + + +@task(name="update_job_status", tags=["tracking"]) +async def update_job_status_task( + job: IngestionJob, + status: IngestionStatus, + processed: int = 0, + _failed: int = 0, + error: str | None = None, +) -> IngestionJob: + """ + Update job status. + + Args: + job: Ingestion job + status: New status + processed: Documents processed + _failed: Documents failed (currently unused) + error: Error message if any + + Returns: + Updated job + """ + job.status = status + job.updated_at = datetime.now(UTC) + job.document_count = processed + + if status == IngestionStatus.COMPLETED: + job.completed_at = datetime.now(UTC) + + if error: + job.error_message = error + + return job + + +@flow( + name="ingestion_pipeline", + description="Main ingestion pipeline for documents", + retries=1, + retry_delay_seconds=60, + persist_result=True, + log_prints=True, +) +async def create_ingestion_flow( + source_url: str, + source_type: SourceTypeLike, + storage_backend: StorageBackendLike = StorageBackend.WEAVIATE, + collection_name: str | None = None, + validate_first: bool = True, + progress_callback: Callable[[int, str], None] | None = None, +) -> IngestionResult: + """ + Main ingestion flow. + + Args: + source_url: URL or path to source + source_type: Type of source + storage_backend: Storage backend to use + validate_first: Whether to validate source first + progress_callback: Optional callback for progress updates + + Returns: + Ingestion result + """ + print(f"Starting ingestion from {source_url}") + + source_enum = IngestionSource(source_type) + backend_enum = StorageBackend(storage_backend) + + # Create job + job = IngestionJob( + source_url=source_url, + source_type=source_enum, + storage_backend=backend_enum, + status=IngestionStatus.PENDING, + ) + + start_time = datetime.now(UTC) + error_messages: list[str] = [] + processed = 0 + failed = 0 + + try: + # Validate source if requested + if validate_first: + if progress_callback: + progress_callback(10, "Validating source...") + print("Validating source...") + is_valid = await validate_source_task(source_url, job.source_type) + + if not is_valid: + raise IngestionError(f"Source validation failed: {source_url}") + + # Update status to in progress + if progress_callback: + progress_callback(20, "Initializing storage...") + job = await update_job_status_task(job, IngestionStatus.IN_PROGRESS) + + # Run ingestion + if progress_callback: + progress_callback(30, "Starting document ingestion...") + print("Ingesting documents...") + if job.source_type == IngestionSource.WEB and job.storage_backend == StorageBackend.R2R: + processed, failed = await firecrawl_to_r2r_flow(job, collection_name, progress_callback=progress_callback) + else: + processed, failed = await ingest_documents_task(job, collection_name, progress_callback=progress_callback) + + if progress_callback: + progress_callback(90, "Finalizing ingestion...") + + # Update final status + if failed > 0: + error_messages.append(f"{failed} documents failed to process") + + # Set status based on results + if processed == 0 and failed > 0: + final_status = IngestionStatus.FAILED + elif failed > 0: + final_status = IngestionStatus.PARTIAL + else: + final_status = IngestionStatus.COMPLETED + + job = await update_job_status_task(job, final_status, processed=processed, _failed=failed) + + print(f"Ingestion completed: {processed} processed, {failed} failed") + + except Exception as e: + print(f"Ingestion failed: {e}") + error_messages.append(str(e)) + + # Don't reset counts - keep whatever was processed before the error + job = await update_job_status_task( + job, IngestionStatus.FAILED, processed=processed, _failed=failed, error=str(e) + ) + + # Calculate duration + duration = (datetime.now(UTC) - start_time).total_seconds() + + return IngestionResult( + job_id=job.id, + status=job.status, + documents_processed=processed, + documents_failed=failed, + duration_seconds=duration, + error_messages=error_messages, + ) + + + +"""R2R storage implementation using the official R2R SDK.""" + +from __future__ import annotations + +import asyncio +import contextlib +from collections.abc import AsyncGenerator, Iterable, Mapping, Sequence +from datetime import UTC, datetime +from typing import Self, TypeVar, cast +from uuid import UUID, uuid4 + +# Direct imports for runtime and type checking +# Note: Some type checkers (basedpyright/Pyrefly) may report import issues +# but these work correctly at runtime and with mypy +from httpx import AsyncClient, HTTPStatusError +from r2r import R2RAsyncClient, R2RException +from typing_extensions import override + +from ...core.exceptions import StorageError +from ...core.models import Document, DocumentMetadata, IngestionSource, StorageConfig +from ..base import BaseStorage + +T = TypeVar("T") + + +def _as_mapping(value: object) -> dict[str, object]: + if isinstance(value, Mapping): + return dict(cast(Mapping[str, object], value)) + if hasattr(value, "__dict__"): + return dict(cast(Mapping[str, object], value.__dict__)) + return {} + + +def _as_sequence(value: object) -> tuple[object, ...]: + """Convert value to a tuple of objects.""" + if isinstance(value, Sequence): + return tuple(value) + return tuple(value) if isinstance(value, Iterable) else () + + +def _extract_id(source: object, fallback: str) -> str: + mapping = _as_mapping(source) + identifier = mapping.get("id") if mapping else None + if identifier is None and hasattr(source, "id"): + identifier = getattr(source, "id", None) + return fallback if identifier is None else str(identifier) + + +def _as_datetime(value: object) -> datetime: + if isinstance(value, datetime): + return value + if isinstance(value, str): + with contextlib.suppress(ValueError): + return datetime.fromisoformat(value) + return datetime.now(UTC) + + +def _as_int(value: object, default: int = 0) -> int: + if isinstance(value, bool): + return int(value) + if isinstance(value, int): + return value + if isinstance(value, float): + return int(value) + if isinstance(value, str): + try: + return int(float(value)) if "." in value else int(value) + except ValueError: + return default + return default + + +class R2RStorage(BaseStorage): + """R2R storage implementation using the official R2R SDK.""" + + def __init__(self, config: StorageConfig) -> None: + """Initialize R2R storage with SDK client.""" + super().__init__(config) + self.endpoint: str = str(config.endpoint).rstrip("/") + self.client: R2RAsyncClient = R2RAsyncClient(self.endpoint) + self.default_collection_id: str | None = None + + @override + async def initialize(self) -> None: + """Initialize R2R connection and ensure default collection exists.""" + try: + # Ensure we have an event loop + try: + _ = asyncio.get_running_loop() + except RuntimeError: + # No event loop running, this should not happen in async context + # but let's be defensive + import logging + + logging.warning("No event loop found during R2R initialization") + + # Test connection using direct HTTP call to v3 API + endpoint = self.endpoint + client = AsyncClient() + try: + response = await client.get(f"{endpoint}/v3/collections") + response.raise_for_status() + finally: + await client.aclose() + _ = await self._ensure_collection(self.config.collection_name) + except Exception as e: + raise StorageError(f"Failed to initialize R2R: {e}") from e + + async def _ensure_collection(self, collection_name: str) -> str: + """Get or create collection by name.""" + endpoint = self.endpoint + client = AsyncClient() + try: + # List collections and find by name + response = await client.get(f"{endpoint}/v3/collections") + response.raise_for_status() + data: dict[str, object] = response.json() + + results = cast(list[dict[str, object]], data.get("results", [])) + for collection in results: + if collection.get("name") == collection_name: + collection_id_raw = collection.get("id") + if collection_id_raw is None: + raise StorageError(f"Collection '{collection_name}' exists but has no ID") + collection_id = str(collection_id_raw) + if collection_name == self.config.collection_name: + self.default_collection_id = collection_id + return collection_id + + # Create if not found + create_response = await client.post( + f"{endpoint}/v3/collections", + json={ + "name": collection_name, + "description": f"Auto-created collection: {collection_name}", + }, + ) + create_response.raise_for_status() + created: dict[str, object] = create_response.json() + created_results = cast(dict[str, object], created.get("results", {})) + collection_id_raw = created_results.get("id") + if collection_id_raw is None: + raise StorageError("Failed to get collection ID from creation response") + collection_id = str(collection_id_raw) + + if collection_name == self.config.collection_name: + self.default_collection_id = collection_id + + return collection_id + except Exception as e: + raise StorageError(f"Failed to ensure collection '{collection_name}': {e}") from e + finally: + await client.aclose() + + @override + async def store(self, document: Document, *, collection_name: str | None = None) -> str: + """Store a single document.""" + return (await self.store_batch([document], collection_name=collection_name))[0] + + @override + async def store_batch( + self, documents: list[Document], *, collection_name: str | None = None + ) -> list[str]: + """Store multiple documents.""" + collection_id = await self._resolve_collection_id(collection_name) + print( + f"Using collection ID: {collection_id} for collection: {collection_name or self.config.collection_name}" + ) + + stored_ids: list[str] = [] + for document in documents: + if not self._is_document_valid(document): + continue + + stored_id = await self._store_single_document(document, collection_id) + if stored_id: + stored_ids.append(stored_id) + + return stored_ids + + async def _resolve_collection_id(self, collection_name: str | None) -> str: + """Resolve collection ID from name or use default.""" + if collection_name: + return await self._ensure_collection(collection_name) + + if self.default_collection_id: + return self.default_collection_id + + collection_id = await self._ensure_collection(self.config.collection_name) + self.default_collection_id = collection_id + return collection_id + + def _is_document_valid(self, document: Document) -> bool: + """Validate document content and size.""" + requested_id = str(document.id) + + if not document.content or not document.content.strip(): + print(f"Skipping document {requested_id}: empty content") + return False + + if len(document.content) > 1_000_000: # 1MB limit + print( + f"Skipping document {requested_id}: content too large ({len(document.content)} chars)" + ) + return False + + return True + + async def _store_single_document(self, document: Document, collection_id: str) -> str | None: + """Store a single document with retry logic.""" + requested_id = str(document.id) + print(f"Creating document with ID: {requested_id}") + + max_retries = 3 + retry_delay = 1.0 + + for attempt in range(max_retries): + try: + doc_response = await self._attempt_document_creation(document, collection_id) + if doc_response: + return self._process_document_response(doc_response, requested_id, collection_id) + except (TimeoutError, OSError) as e: + if not await self._should_retry_timeout(e, attempt, max_retries, requested_id, retry_delay): + break + retry_delay *= 2 + except HTTPStatusError as e: + if not await self._should_retry_http_error(e, attempt, max_retries, requested_id, retry_delay): + break + retry_delay *= 2 + except Exception as exc: + self._log_document_error(document.id, exc) + break + + return None + + async def _attempt_document_creation(self, document: Document, collection_id: str) -> dict[str, object] | None: + """Attempt to create a document via HTTP API.""" + import json + + requested_id = str(document.id) + metadata = self._build_metadata(document) + print(f"Built metadata for document {requested_id}: {metadata}") + + files = { + "raw_text": (None, document.content), + "metadata": (None, json.dumps(metadata)), + "id": (None, requested_id), + "ingestion_mode": (None, "hi-res"), + } + + if collection_id: + files["collection_ids"] = (None, json.dumps([collection_id])) + print(f"Creating document {requested_id} with collection_ids: [{collection_id}]") + + print(f"Sending to R2R - files keys: {list(files.keys())}") + print(f"Metadata JSON: {files['metadata'][1]}") + + async with AsyncClient() as http_client: + response = await http_client.post(f"{self.endpoint}/v3/documents", files=files) + + if response.status_code == 422: + self._handle_validation_error(response, requested_id, metadata) + return None + + response.raise_for_status() + return response.json() + + def _handle_validation_error(self, response: object, requested_id: str, metadata: dict[str, object]) -> None: + """Handle validation errors from R2R API.""" + try: + error_detail = getattr(response, 'json', lambda: {})() if hasattr(response, 'json') else {} + print(f"R2R validation error for document {requested_id}: {error_detail}") + print(f"Document metadata sent: {metadata}") + print(f"Response status: {getattr(response, 'status_code', 'unknown')}") + print(f"Response headers: {dict(getattr(response, 'headers', {}))}") + except Exception: + print(f"R2R validation error for document {requested_id}: {getattr(response, 'text', 'unknown error')}") + print(f"Document metadata sent: {metadata}") + + def _process_document_response(self, doc_response: dict[str, object], requested_id: str, collection_id: str) -> str: + """Process successful document creation response.""" + response_payload = doc_response.get("results", doc_response) + doc_id = _extract_id(response_payload, requested_id) + + print(f"R2R returned document ID: {doc_id}") + + if doc_id != requested_id: + print(f"Warning: Requested ID {requested_id} but got {doc_id}") + + if collection_id: + print(f"Document {doc_id} should be assigned to collection {collection_id} via creation API") + + return doc_id + + async def _should_retry_timeout(self, error: Exception, attempt: int, max_retries: int, requested_id: str, retry_delay: float) -> bool: + """Determine if timeout error should be retried.""" + if attempt >= max_retries - 1: + return False + + print(f"Timeout for document {requested_id}, retrying in {retry_delay}s...") + await asyncio.sleep(retry_delay) + return True + + async def _should_retry_http_error(self, error: HTTPStatusError, attempt: int, max_retries: int, requested_id: str, retry_delay: float) -> bool: + """Determine if HTTP error should be retried.""" + if error.response.status_code < 500 or attempt >= max_retries - 1: + return False + + print(f"Server error {error.response.status_code} for document {requested_id}, retrying in {retry_delay}s...") + await asyncio.sleep(retry_delay) + return True + + def _log_document_error(self, document_id: object, exc: Exception) -> None: + """Log document storage errors with specific categorization.""" + print(f"Failed to store document {document_id}: {exc}") + + exc_str = str(exc) + if "422" in exc_str: + print(" โ†’ Data validation issue - check document content and metadata format") + elif "timeout" in exc_str.lower(): + print(" โ†’ Network timeout - R2R may be overloaded") + elif "500" in exc_str: + print(" โ†’ Server error - R2R internal issue") + else: + import traceback + traceback.print_exc() + + def _build_metadata(self, document: Document) -> dict[str, object]: + """Convert document metadata to enriched R2R format.""" + metadata = document.metadata + + + # Core required fields + result: dict[str, object] = { + "source_url": metadata["source_url"], + "content_type": metadata["content_type"], + "word_count": metadata["word_count"], + "char_count": metadata["char_count"], + "timestamp": metadata["timestamp"].isoformat(), + "ingestion_source": document.source.value, + } + + # Basic optional fields + if title := metadata.get("title"): + result["title"] = title + if description := metadata.get("description"): + result["description"] = description + + # Content categorization + if tags := metadata.get("tags"): + result["tags"] = tags + if category := metadata.get("category"): + result["category"] = category + if section := metadata.get("section"): + result["section"] = section + if language := metadata.get("language"): + result["language"] = language + + # Authorship and source info + if author := metadata.get("author"): + result["author"] = author + if domain := metadata.get("domain"): + result["domain"] = domain + if site_name := metadata.get("site_name"): + result["site_name"] = site_name + + # Document structure + if heading_hierarchy := metadata.get("heading_hierarchy"): + result["heading_hierarchy"] = heading_hierarchy + if section_depth := metadata.get("section_depth"): + result["section_depth"] = section_depth + if has_code_blocks := metadata.get("has_code_blocks"): + result["has_code_blocks"] = has_code_blocks + if has_images := metadata.get("has_images"): + result["has_images"] = has_images + if has_links := metadata.get("has_links"): + result["has_links"] = has_links + + # Processing metadata + if extraction_method := metadata.get("extraction_method"): + result["extraction_method"] = extraction_method + if crawl_depth := metadata.get("crawl_depth"): + result["crawl_depth"] = crawl_depth + if last_modified := metadata.get("last_modified"): + result["last_modified"] = last_modified.isoformat() if last_modified else None + + # Content quality indicators + if readability_score := metadata.get("readability_score"): + result["readability_score"] = readability_score + if completeness_score := metadata.get("completeness_score"): + result["completeness_score"] = completeness_score + + # Repository-specific fields + if file_path := metadata.get("file_path"): + result["file_path"] = file_path + if repository_name := metadata.get("repository_name"): + result["repository_name"] = repository_name + if branch_name := metadata.get("branch_name"): + result["branch_name"] = branch_name + if commit_hash := metadata.get("commit_hash"): + result["commit_hash"] = commit_hash + if programming_language := metadata.get("programming_language"): + result["programming_language"] = programming_language + + # Custom business metadata + if importance_score := metadata.get("importance_score"): + result["importance_score"] = importance_score + if review_status := metadata.get("review_status"): + result["review_status"] = review_status + if assigned_team := metadata.get("assigned_team"): + result["assigned_team"] = assigned_team + + return result + + @override + async def retrieve( + self, document_id: str, *, collection_name: str | None = None + ) -> Document | None: + """Retrieve a document by ID.""" + try: + response = await self.client.documents.retrieve(document_id) + except R2RException as exc: + status_code = getattr(exc, "status_code", None) + if status_code == 404: + return None + import logging + + logging.warning(f"Unexpected error retrieving document {document_id}: {exc}") + return None + except Exception as error: + import logging + + logging.warning(f"Unexpected error retrieving document {document_id}: {error}") + return None + payload = getattr(response, "results", response) + return self._convert_to_document(payload, collection_name) + + def _convert_to_document(self, r2r_doc: object, collection_name: str | None = None) -> Document: + """Convert R2R document payload to our Document model.""" + doc_map = _as_mapping(r2r_doc) + metadata_map = _as_mapping(doc_map.get("metadata", {})) + + doc_uuid = self._extract_document_uuid(r2r_doc) + timestamp = _as_datetime(doc_map.get("created_at", metadata_map.get("timestamp"))) + + metadata = self._build_core_metadata(metadata_map, timestamp) + self._add_optional_metadata_fields(metadata, doc_map, metadata_map) + + source_enum = self._extract_ingestion_source(metadata_map) + content_value = doc_map.get("content", getattr(r2r_doc, "content", "")) + + return Document( + id=doc_uuid, + content=str(content_value), + metadata=metadata, + source=source_enum, + collection=collection_name or self.config.collection_name, + ) + + def _extract_document_uuid(self, r2r_doc: object) -> UUID: + """Extract and validate document UUID.""" + doc_id_str = _extract_id(r2r_doc, str(uuid4())) + try: + return UUID(doc_id_str) + except ValueError: + return uuid4() + + def _build_core_metadata(self, metadata_map: dict[str, object], timestamp: datetime) -> DocumentMetadata: + """Build core required metadata fields.""" + return { + "source_url": str(metadata_map.get("source_url", "")), + "timestamp": timestamp, + "content_type": str(metadata_map.get("content_type", "text/plain")), + "word_count": _as_int(metadata_map.get("word_count")), + "char_count": _as_int(metadata_map.get("char_count")), + } + + def _add_optional_metadata_fields(self, metadata: DocumentMetadata, doc_map: dict[str, object], metadata_map: dict[str, object]) -> None: + """Add optional metadata fields if present.""" + self._add_title_and_description(metadata, doc_map, metadata_map) + self._add_content_categorization(metadata, metadata_map) + self._add_authorship_fields(metadata, metadata_map) + self._add_structure_fields(metadata, metadata_map) + self._add_processing_fields(metadata, metadata_map) + self._add_quality_scores(metadata, metadata_map) + + def _add_title_and_description(self, metadata: DocumentMetadata, doc_map: dict[str, object], metadata_map: dict[str, object]) -> None: + """Add title and description fields.""" + if title := (doc_map.get("title") or metadata_map.get("title")): + metadata["title"] = cast(str | None, title) + + if summary := (doc_map.get("summary") or metadata_map.get("summary")): + metadata["description"] = cast(str | None, summary) + elif description := metadata_map.get("description"): + metadata["description"] = cast(str | None, description) + + def _add_content_categorization(self, metadata: DocumentMetadata, metadata_map: dict[str, object]) -> None: + """Add content categorization fields.""" + if tags := metadata_map.get("tags"): + metadata["tags"] = [str(tag) for tag in tags] if isinstance(tags, list) else [] + if category := metadata_map.get("category"): + metadata["category"] = str(category) + if section := metadata_map.get("section"): + metadata["section"] = str(section) + if language := metadata_map.get("language"): + metadata["language"] = str(language) + + def _add_authorship_fields(self, metadata: DocumentMetadata, metadata_map: dict[str, object]) -> None: + """Add authorship and source information fields.""" + if author := metadata_map.get("author"): + metadata["author"] = str(author) + if domain := metadata_map.get("domain"): + metadata["domain"] = str(domain) + if site_name := metadata_map.get("site_name"): + metadata["site_name"] = str(site_name) + + def _add_structure_fields(self, metadata: DocumentMetadata, metadata_map: dict[str, object]) -> None: + """Add document structure fields.""" + if heading_hierarchy := metadata_map.get("heading_hierarchy"): + metadata["heading_hierarchy"] = ( + list(heading_hierarchy) if isinstance(heading_hierarchy, list) else [] + ) + if section_depth := metadata_map.get("section_depth"): + metadata["section_depth"] = _as_int(section_depth) + if has_code_blocks := metadata_map.get("has_code_blocks"): + metadata["has_code_blocks"] = bool(has_code_blocks) + if has_images := metadata_map.get("has_images"): + metadata["has_images"] = bool(has_images) + if has_links := metadata_map.get("has_links"): + metadata["has_links"] = bool(has_links) + + def _add_processing_fields(self, metadata: DocumentMetadata, metadata_map: dict[str, object]) -> None: + """Add processing-related metadata fields.""" + if extraction_method := metadata_map.get("extraction_method"): + metadata["extraction_method"] = str(extraction_method) + if crawl_depth := metadata_map.get("crawl_depth"): + metadata["crawl_depth"] = _as_int(crawl_depth) + if last_modified := metadata_map.get("last_modified"): + metadata["last_modified"] = _as_datetime(last_modified) + + def _add_quality_scores(self, metadata: DocumentMetadata, metadata_map: dict[str, object]) -> None: + """Add quality score fields with safe float conversion.""" + if readability_score := metadata_map.get("readability_score"): + try: + metadata["readability_score"] = float(str(readability_score)) + except (ValueError, TypeError): + metadata["readability_score"] = None + if completeness_score := metadata_map.get("completeness_score"): + try: + metadata["completeness_score"] = float(str(completeness_score)) + except (ValueError, TypeError): + metadata["completeness_score"] = None + + def _extract_ingestion_source(self, metadata_map: dict[str, object]) -> IngestionSource: + """Extract and validate ingestion source.""" + source_value = str(metadata_map.get("ingestion_source", IngestionSource.WEB.value)) + try: + return IngestionSource(source_value) + except ValueError: + return IngestionSource.WEB + + @override + async def search( + self, + query: str, + limit: int = 10, + threshold: float = 0.7, + *, + collection_name: str | None = None, + ) -> AsyncGenerator[Document, None]: + """Search documents using R2R.""" + try: + search_settings: dict[str, object] = { + "limit": limit, + "similarity_threshold": threshold, + } + + if collection_name: + collection_id = await self._ensure_collection(collection_name) + search_settings["collection_ids"] = [collection_id] + + search_response = await self.client.retrieval.search( + query=query, + search_settings=search_settings, + ) + + for result in _as_sequence(getattr(search_response, "results", ())): + result_map = _as_mapping(result) + document_id_value = result_map.get( + "document_id", getattr(result, "document_id", None) + ) + if document_id_value is None: + continue + document_id = str(document_id_value) + + try: + doc_response = await self.client.documents.retrieve(document_id) + except R2RException as exc: + import logging + + logging.warning( + f"Failed to retrieve document {document_id} during search: {exc}" + ) + continue + + document_payload = getattr(doc_response, "results", doc_response) + document = self._convert_to_document(document_payload, collection_name) + + score_value = result_map.get("score", getattr(result, "score", None)) + if score_value is not None: + try: + # Handle various score value types safely + if isinstance(score_value, (int, float, str)): + document.score = float(score_value) + else: + # For unknown types, try string conversion first + document.score = float(str(score_value)) + except (TypeError, ValueError) as e: + import logging + + logging.debug( + f"Invalid score value {score_value} for document {document_id}: {e}" + ) + document.score = None + + yield document + + except R2RException as exc: + raise StorageError(f"Search failed: {exc}") from exc + + @override + async def delete(self, document_id: str, *, collection_name: str | None = None) -> bool: + """Delete a document.""" + try: + _ = await self.client.documents.delete(document_id) + return True + except R2RException: + return False + + @override + async def count(self, *, collection_name: str | None = None) -> int: + """Get document count in collection.""" + endpoint = self.endpoint + client = AsyncClient() + try: + # Get collections and find the count for the specific collection + response = await client.get(f"{endpoint}/v3/collections") + response.raise_for_status() + data: dict[str, object] = response.json() + + target_collection = collection_name or self.config.collection_name + results = cast(list[dict[str, object]], data.get("results", [])) + for collection in results: + if collection.get("name") == target_collection: + doc_count = collection.get("document_count", 0) + return _as_int(doc_count) + + # Collection not found + return 0 + except Exception: + return 0 + finally: + await client.aclose() + + @override + async def close(self) -> None: + """Close R2R client.""" + try: + await self.client.close() + except Exception as e: + import logging + + logging.warning(f"Error closing R2R client: {e}") + + async def __aenter__(self) -> Self: + """Async context manager entry.""" + return self + + async def __aexit__( + self, + exc_type: type[BaseException] | None, + exc_val: BaseException | None, + exc_tb: object | None, + ) -> None: + """Async context manager exit with proper cleanup.""" + await self.close() + + # Additional R2R-specific comprehensive management methods + + async def create_collection(self, name: str, description: str | None = None) -> str: + """Create a new collection.""" + try: + response = await self.client.collections.create(name=name, description=description) + created = _as_mapping(getattr(response, "results", {})) + return str(created.get("id", name)) + except R2RException as exc: + raise StorageError(f"Failed to create collection {name}: {exc}") from exc + + async def delete_collection(self, collection_name: str) -> bool: + """Delete a collection.""" + try: + collection_id = await self._ensure_collection(collection_name) + _ = await self.client.collections.delete(collection_id) + return True + except R2RException: + return False + + @override + async def list_collections(self) -> list[str]: + """List all available collections.""" + endpoint = self.endpoint + client = AsyncClient() + try: + response = await client.get(f"{endpoint}/v3/collections") + response.raise_for_status() + data: dict[str, object] = response.json() + + collection_names: list[str] = [] + results = cast(list[dict[str, object]], data.get("results", [])) + for entry in results: + if name := entry.get("name"): + collection_names.append(str(name)) + return collection_names + except Exception as e: + raise StorageError(f"Failed to list collections: {e}") from e + finally: + await client.aclose() + + async def list_collections_detailed(self) -> list[dict[str, object]]: + """List all available collections with detailed information.""" + try: + response = await self.client.collections.list() + collections: list[dict[str, object]] = [] + for entry in _as_sequence(getattr(response, "results", ())): + entry_map = _as_mapping(entry) + collections.append( + { + "id": str(entry_map.get("id", "")), + "name": str(entry_map.get("name", "")), + "description": entry_map.get("description"), + } + ) + return collections + except R2RException as exc: + raise StorageError(f"Failed to list collections: {exc}") from exc + + async def get_document_chunks(self, document_id: str) -> list[dict[str, object]]: + """Get all chunks for a specific document.""" + try: + response = await self.client.chunks.list(filters={"document_id": document_id}) + return [ + dict(_as_mapping(chunk)) for chunk in _as_sequence(getattr(response, "results", ())) + ] + except R2RException as exc: + raise StorageError(f"Failed to get chunks for document {document_id}: {exc}") from exc + + async def extract_entities(self, document_id: str) -> dict[str, object]: + """Extract entities and relationships from a document.""" + try: + response = await self.client.documents.extract(id=document_id) + return dict(_as_mapping(getattr(response, "results", {}))) + except R2RException as exc: + raise StorageError( + f"Failed to extract entities from document {document_id}: {exc}" + ) from exc + + async def get_document_overview(self, document_id: str) -> dict[str, object]: + """Get comprehensive document overview and statistics.""" + try: + doc_response = await self.client.documents.retrieve(document_id) + chunks_response = await self.client.chunks.list(filters={"document_id": document_id}) + document_payload = dict(_as_mapping(getattr(doc_response, "results", {}))) + chunk_payload = [ + dict(_as_mapping(chunk)) + for chunk in _as_sequence(getattr(chunks_response, "results", ())) + ] + return { + "document": document_payload, + "chunk_count": len(chunk_payload), + "chunks": chunk_payload, + } + except R2RException as exc: + raise StorageError(f"Failed to get overview for document {document_id}: {exc}") from exc + + @override + async def list_documents( + self, + limit: int = 100, + offset: int = 0, + *, + collection_name: str | None = None, + ) -> list[dict[str, object]]: + """ + List documents in R2R with pagination. + + Args: + limit: Maximum number of documents to return + offset: Number of documents to skip + collection_name: Collection name (optional) + + Returns: + List of document dictionaries with metadata + """ + try: + documents: list[dict[str, object]] = [] + + if collection_name: + # Get collection ID first + collection_id = await self._ensure_collection(collection_name) + # Use the collections API to list documents in a specific collection + endpoint = self.endpoint + client = AsyncClient() + try: + params = {"offset": offset, "limit": limit} + response = await client.get( + f"{endpoint}/v3/collections/{collection_id}/documents", params=params + ) + response.raise_for_status() + data: dict[str, object] = response.json() + finally: + await client.aclose() + + doc_sequence = _as_sequence(data.get("results", [])) + else: + # List all documents + r2r_response = await self.client.documents.list(offset=offset, limit=limit) + documents_data: list[object] | dict[str, object] = getattr( + r2r_response, "results", [] + ) + + doc_sequence = _as_sequence( + documents_data.get("results", []) + if isinstance(documents_data, dict) + else documents_data + ) + + for doc_data in doc_sequence: + doc_map = _as_mapping(doc_data) + + # Extract standard document fields + doc_id = str(doc_map.get("id", "")) + title = str(doc_map.get("title", "Untitled")) + metadata = _as_mapping(doc_map.get("metadata", {})) + + documents.append( + { + "id": doc_id, + "title": title, + "source_url": str(metadata.get("source_url", "")), + "description": str(metadata.get("description", "")), + "content_type": str(metadata.get("content_type", "text/plain")), + "content_preview": str(doc_map.get("content", ""))[:200] + "..." + if doc_map.get("content") + else "", + "word_count": _as_int(metadata.get("word_count", 0)), + "timestamp": str(doc_map.get("created_at", "")), + } + ) + + return documents + + except Exception as e: + raise StorageError(f"Failed to list documents: {e}") from e + + + +"""Base storage interface.""" + +from abc import ABC, abstractmethod +from collections.abc import AsyncGenerator + +from ..core.models import Document, StorageConfig + + +class BaseStorage(ABC): + """Abstract base class for storage adapters.""" + + config: StorageConfig + + def __init__(self, config: StorageConfig): + """ + Initialize storage adapter. + + Args: + config: Storage configuration + """ + self.config = config + + @property + def display_name(self) -> str: + """Human-readable name for UI display.""" + return self.__class__.__name__.replace("Storage", "") + + @abstractmethod + async def initialize(self) -> None: + """Initialize the storage backend and create collections if needed.""" + pass # pragma: no cover + + @abstractmethod + async def store(self, document: Document, *, collection_name: str | None = None) -> str: + """ + Store a single document. + + Args: + document: Document to store + + Returns: + Document ID + """ + pass # pragma: no cover + + @abstractmethod + async def store_batch( + self, documents: list[Document], *, collection_name: str | None = None + ) -> list[str]: + """ + Store multiple documents in batch. + + Args: + documents: List of documents to store + + Returns: + List of document IDs + """ + pass # pragma: no cover + + async def retrieve( + self, document_id: str, *, collection_name: str | None = None + ) -> Document | None: + """ + Retrieve a document by ID (if supported by backend). + + Args: + document_id: Document ID + + Returns: + Document or None if not found + + Raises: + NotImplementedError: If backend doesn't support retrieval + """ + raise NotImplementedError(f"{self.__class__.__name__} doesn't support document retrieval") + + async def check_exists( + self, document_id: str, *, collection_name: str | None = None, stale_after_days: int = 30 + ) -> bool: + """ + Check if a document exists and is not stale. + + Args: + document_id: Document ID to check + collection_name: Collection to check in + stale_after_days: Consider document stale after this many days + + Returns: + True if document exists and is not stale, False otherwise + """ + try: + document = await self.retrieve(document_id, collection_name=collection_name) + if document is None: + return False + + # Check staleness if timestamp is available + if "timestamp" in document.metadata: + from datetime import UTC, datetime, timedelta + timestamp_obj = document.metadata["timestamp"] + if isinstance(timestamp_obj, datetime): + timestamp = timestamp_obj + cutoff = datetime.now(UTC) - timedelta(days=stale_after_days) + return timestamp >= cutoff + + # If no timestamp, assume it exists and is valid + return True + except Exception: + # Backend doesn't support retrieval, assume doesn't exist + return False + + def search( + self, + query: str, + limit: int = 10, + threshold: float = 0.7, + *, + collection_name: str | None = None, + ) -> AsyncGenerator[Document, None]: + """ + Search for documents (if supported by backend). + + Args: + query: Search query + limit: Maximum number of results + threshold: Similarity threshold + + Yields: + Matching documents + + Raises: + NotImplementedError: If backend doesn't support search + """ + raise NotImplementedError(f"{self.__class__.__name__} doesn't support search") + + @abstractmethod + async def delete(self, document_id: str, *, collection_name: str | None = None) -> bool: + """ + Delete a document. + + Args: + document_id: Document ID + + Returns: + True if deleted successfully + """ + pass # pragma: no cover + + async def count(self, *, collection_name: str | None = None) -> int: + """ + Get total document count (if supported by backend). + + Returns: + Number of documents, 0 if not supported + """ + return 0 + + async def list_collections(self) -> list[str]: + """ + List available collections (if supported by backend). + + Returns: + List of collection names, empty list if not supported + """ + return [] + + async def describe_collections(self) -> list[dict[str, object]]: + """ + Describe available collections with metadata (if supported by backend). + + Returns: + List of collection metadata dictionaries, empty list if not supported + """ + return [] + + async def delete_collection(self, collection_name: str) -> bool: + """ + Delete a collection (if supported by backend). + + Args: + collection_name: Name of collection to delete + + Returns: + True if deleted successfully, False if not supported + """ + return False + + async def delete_documents( + self, document_ids: list[str], *, collection_name: str | None = None + ) -> dict[str, bool]: + """ + Delete documents by IDs (if supported by backend). + + Args: + document_ids: List of document IDs to delete + collection_name: Collection to delete from + + Returns: + Dict mapping document IDs to success status, empty if not supported + """ + return {} + + async def list_documents( + self, + limit: int = 100, + offset: int = 0, + *, + collection_name: str | None = None, + ) -> list[dict[str, object]]: + """ + List documents in the storage backend (if supported). + + Args: + limit: Maximum number of documents to return + offset: Number of documents to skip + collection_name: Collection to list documents from + + Returns: + List of document dictionaries with metadata + + Raises: + NotImplementedError: If backend doesn't support document listing + """ + raise NotImplementedError(f"{self.__class__.__name__} doesn't support document listing") + + async def close(self) -> None: + """ + Close storage connections and cleanup resources. + + Default implementation does nothing. + """ + # Default implementation - storage backends can override to cleanup connections + return None + + + +"""Open WebUI storage adapter.""" + +import asyncio +import logging +from typing import TYPE_CHECKING, Final, TypedDict, cast + +import httpx +from typing_extensions import override + +if TYPE_CHECKING: + # Type checking imports - these will be ignored at runtime + from httpx import AsyncClient, ConnectError, HTTPStatusError, RequestError +else: + # Runtime imports that work properly + AsyncClient = httpx.AsyncClient + ConnectError = httpx.ConnectError + HTTPStatusError = httpx.HTTPStatusError + RequestError = httpx.RequestError + +from ..core.exceptions import StorageError +from ..core.models import Document, StorageConfig +from .base import BaseStorage + +LOGGER: Final[logging.Logger] = logging.getLogger(__name__) + + +class OpenWebUIStorage(BaseStorage): + """Storage adapter for Open WebUI knowledge endpoints.""" + + client: AsyncClient + _knowledge_cache: dict[str, str] + + def __init__(self, config: StorageConfig): + """ + Initialize Open WebUI storage. + + Args: + config: Storage configuration + """ + super().__init__(config) + + headers: dict[str, str] = {} + if config.api_key: + headers["Authorization"] = f"Bearer {config.api_key}" + + self.client = AsyncClient( + base_url=str(config.endpoint), + headers=headers, + timeout=30.0, + ) + self._knowledge_cache = {} + + @override + async def initialize(self) -> None: + """Initialize Open WebUI connection.""" + try: + if self.config.collection_name: + await self._get_knowledge_id( + self.config.collection_name, + create=True, + ) + + except ConnectError as e: + raise StorageError(f"Connection to OpenWebUI failed: {e}") from e + except HTTPStatusError as e: + raise StorageError(f"OpenWebUI returned error {e.response.status_code}: {e}") from e + except RequestError as e: + raise StorageError(f"Request to OpenWebUI failed: {e}") from e + except Exception as e: + raise StorageError(f"Failed to initialize Open WebUI: {e}") from e + + async def _create_collection(self, name: str) -> str: + """Create knowledge base in Open WebUI.""" + try: + response = await self.client.post( + "/api/v1/knowledge/create", + json={ + "name": name, + "description": "Documents ingested from various sources", + "data": {}, + "access_control": None, + }, + ) + response.raise_for_status() + result = response.json() + knowledge_id = result.get("id") + + if not knowledge_id or not isinstance(knowledge_id, str): + raise StorageError("Knowledge base creation failed: no ID returned") + + return str(knowledge_id) + + except ConnectError as e: + raise StorageError(f"Connection to OpenWebUI failed during creation: {e}") from e + except HTTPStatusError as e: + raise StorageError( + f"OpenWebUI returned error {e.response.status_code} during creation: {e}" + ) from e + except RequestError as e: + raise StorageError(f"Request to OpenWebUI failed during creation: {e}") from e + except Exception as e: + raise StorageError(f"Failed to create knowledge base: {e}") from e + + async def _fetch_knowledge_bases(self) -> list[dict[str, object]]: + """Return the list of knowledge bases from the API.""" + response = await self.client.get("/api/v1/knowledge/list") + response.raise_for_status() + data = response.json() + if not isinstance(data, list): + return [] + normalized: list[dict[str, object]] = [] + for item in data: + if isinstance(item, dict): + item_dict: dict[str, object] = item + normalized.append({str(k): v for k, v in item_dict.items()}) + return normalized + + async def _get_knowledge_id( + self, + name: str | None, + *, + create: bool, + ) -> str | None: + """Retrieve (and optionally create) a knowledge base identifier.""" + target_raw = name or self.config.collection_name + target = str(target_raw) if target_raw else "" + if not target: + raise StorageError("Knowledge base name is required") + + if cached := self._knowledge_cache.get(target): + return cached + + knowledge_bases = await self._fetch_knowledge_bases() + for kb in knowledge_bases: + if kb.get("name") == target: + kb_id = kb.get("id") + if isinstance(kb_id, str): + self._knowledge_cache[target] = kb_id + return kb_id + + if not create: + return None + + knowledge_id = await self._create_collection(target) + self._knowledge_cache[target] = knowledge_id + return knowledge_id + + @override + async def store(self, document: Document, *, collection_name: str | None = None) -> str: + """ + Store a document in Open WebUI as a file. + + Args: + document: Document to store + + Returns: + File ID + """ + try: + knowledge_id = await self._get_knowledge_id( + collection_name, + create=True, + ) + if not knowledge_id: + raise StorageError("Knowledge base not initialized") + + # Step 1: Upload document as file + # Use document title from metadata if available, otherwise fall back to ID + filename = document.metadata.get("title") or f"doc_{document.id}" + # Ensure filename has proper extension + if not filename.endswith(('.txt', '.md', '.pdf', '.doc', '.docx')): + filename = f"{filename}.txt" + files = {"file": (filename, document.content.encode(), "text/plain")} + response = await self.client.post( + "/api/v1/files/", + files=files, + params={"process": True, "process_in_background": False}, + ) + response.raise_for_status() + + file_data = response.json() + file_id = file_data.get("id") + + if not file_id or not isinstance(file_id, str): + raise StorageError("File upload failed: no file ID returned") + + # Step 2: Add file to knowledge base + response = await self.client.post( + f"/api/v1/knowledge/{knowledge_id}/file/add", json={"file_id": file_id} + ) + response.raise_for_status() + + return str(file_id) + + except ConnectError as e: + raise StorageError(f"Connection to OpenWebUI failed: {e}") from e + except HTTPStatusError as e: + raise StorageError(f"OpenWebUI returned error {e.response.status_code}: {e}") from e + except RequestError as e: + raise StorageError(f"Request to OpenWebUI failed: {e}") from e + except Exception as e: + raise StorageError(f"Failed to store document: {e}") from e + + @override + async def store_batch( + self, documents: list[Document], *, collection_name: str | None = None + ) -> list[str]: + """ + Store multiple documents as files in batch. + + Args: + documents: List of documents + + Returns: + List of file IDs + """ + try: + knowledge_id = await self._get_knowledge_id( + collection_name, + create=True, + ) + if not knowledge_id: + raise StorageError("Knowledge base not initialized") + + async def upload_and_attach(doc: Document) -> str: + # Use document title from metadata if available, otherwise fall back to ID + filename = doc.metadata.get("title") or f"doc_{doc.id}" + # Ensure filename has proper extension + if not filename.endswith(('.txt', '.md', '.pdf', '.doc', '.docx')): + filename = f"{filename}.txt" + files = {"file": (filename, doc.content.encode(), "text/plain")} + upload_response = await self.client.post( + "/api/v1/files/", + files=files, + params={"process": True, "process_in_background": False}, + ) + upload_response.raise_for_status() + + file_data = upload_response.json() + file_id = file_data.get("id") + + if not file_id or not isinstance(file_id, str): + raise StorageError( + f"File upload failed for document {doc.id}: no file ID returned" + ) + + attach_response = await self.client.post( + f"/api/v1/knowledge/{knowledge_id}/file/add", json={"file_id": file_id} + ) + attach_response.raise_for_status() + + return str(file_id) + + tasks = [upload_and_attach(doc) for doc in documents] + results = await asyncio.gather(*tasks, return_exceptions=True) + + file_ids: list[str] = [] + failures: list[str] = [] + + for index, result in enumerate(results): + doc = documents[index] + if isinstance(result, Exception): + failures.append(f"{doc.id}: {result}") + else: + file_ids.append(cast(str, result)) + + if failures: + LOGGER.warning( + "OpenWebUI partial batch failure for knowledge base %s: %s", + self.config.collection_name, + ", ".join(failures), + ) + + return file_ids + + except ConnectError as e: + raise StorageError(f"Connection to OpenWebUI failed during batch: {e}") from e + except HTTPStatusError as e: + raise StorageError( + f"OpenWebUI returned error {e.response.status_code} during batch: {e}" + ) from e + except RequestError as e: + raise StorageError(f"Request to OpenWebUI failed during batch: {e}") from e + except Exception as e: + raise StorageError(f"Failed to store batch: {e}") from e + + @override + async def retrieve( + self, document_id: str, *, collection_name: str | None = None + ) -> Document | None: + """ + OpenWebUI doesn't support document retrieval by ID. + + Args: + document_id: File ID (not supported) + collection_name: Collection name (not used) + + Returns: + Always None - retrieval not supported + """ + # OpenWebUI uses file-based storage without direct document retrieval + # This will cause the base check_exists method to return False, + # which means documents will always be re-scraped for OpenWebUI + raise NotImplementedError("OpenWebUI doesn't support document retrieval by ID") + + @override + async def delete(self, document_id: str, *, collection_name: str | None = None) -> bool: + """ + Remove a file from Open WebUI knowledge base. + + Args: + document_id: File ID to remove + + Returns: + True if removed successfully + """ + try: + knowledge_id = await self._get_knowledge_id( + collection_name, + create=False, + ) + if not knowledge_id: + return False + + # Remove file from knowledge base + response = await self.client.post( + f"/api/v1/knowledge/{knowledge_id}/file/remove", json={"file_id": document_id} + ) + response.raise_for_status() + + delete_response = await self.client.delete(f"/api/v1/files/{document_id}") + if delete_response.status_code == 404: + return True + delete_response.raise_for_status() + return True + + except ConnectError as exc: + LOGGER.error( + "Failed to reach OpenWebUI when deleting file %s", document_id, exc_info=exc + ) + return False + except HTTPStatusError as exc: + LOGGER.error( + "OpenWebUI returned status error %s when deleting file %s", + exc.response.status_code if exc.response else "unknown", + document_id, + exc_info=exc, + ) + return False + except RequestError as exc: + LOGGER.error("Request error deleting file %s from OpenWebUI", document_id, exc_info=exc) + return False + except Exception as exc: + LOGGER.error("Unexpected error deleting file %s", document_id, exc_info=exc) + return False + + async def list_collections(self) -> list[str]: + """ + List all available knowledge bases. + + Returns: + List of knowledge base names + """ + try: + knowledge_bases = await self._fetch_knowledge_bases() + + # Extract names from knowledge bases + return [ + str(kb.get("name", f"knowledge_{kb.get('id', 'unknown')}") or "") + for kb in knowledge_bases + ] + + except ConnectError as e: + raise StorageError(f"Connection to OpenWebUI failed: {e}") from e + except HTTPStatusError as e: + raise StorageError(f"OpenWebUI returned error {e.response.status_code}: {e}") from e + except RequestError as e: + raise StorageError(f"Request to OpenWebUI failed: {e}") from e + except Exception as e: + raise StorageError(f"Failed to list knowledge bases: {e}") from e + + async def delete_collection(self, collection_name: str) -> bool: + """ + Delete a knowledge base by name. + + Args: + collection_name: Name of the knowledge base to delete + + Returns: + True if deleted successfully, False otherwise + """ + try: + knowledge_id = await self._get_knowledge_id(collection_name, create=False) + if not knowledge_id: + # Collection doesn't exist, consider it already deleted + return True + + # Delete the knowledge base using the OpenWebUI API + response = await self.client.delete(f"/api/v1/knowledge/{knowledge_id}/delete") + response.raise_for_status() + + # Remove from cache if it exists + if collection_name in self._knowledge_cache: + del self._knowledge_cache[collection_name] + + LOGGER.info("Successfully deleted knowledge base: %s", collection_name) + return True + + except HTTPStatusError as e: + # Handle 404 as success (already deleted) + if e.response.status_code == 404: + LOGGER.info("Knowledge base %s was already deleted or not found", collection_name) + return True + LOGGER.error( + "OpenWebUI returned error %s when deleting knowledge base %s", + e.response.status_code, + collection_name, + exc_info=e, + ) + return False + except ConnectError as e: + LOGGER.error( + "Failed to reach OpenWebUI when deleting knowledge base %s", + collection_name, + exc_info=e, + ) + return False + except RequestError as e: + LOGGER.error( + "Request error deleting knowledge base %s from OpenWebUI", + collection_name, + exc_info=e, + ) + return False + except Exception as e: + LOGGER.error("Unexpected error deleting knowledge base %s", collection_name, exc_info=e) + return False + + class CollectionSummary(TypedDict): + """Structure describing a knowledge base summary.""" + + name: str + count: int + size_mb: float + + + async def _get_knowledge_base_count(self, kb: dict[str, object]) -> int: + """Get the file count for a knowledge base.""" + kb_id = kb.get("id") + name = kb.get("name", "Unknown") + + if not kb_id: + return self._count_files_from_basic_info(kb) + + return await self._count_files_from_detailed_info(str(kb_id), str(name), kb) + + def _count_files_from_basic_info(self, kb: dict[str, object]) -> int: + """Count files from basic knowledge base info.""" + files = kb.get("files", []) + return len(files) if isinstance(files, list) and files is not None else 0 + + async def _count_files_from_detailed_info(self, kb_id: str, name: str, kb: dict[str, object]) -> int: + """Count files by fetching detailed knowledge base info.""" + try: + LOGGER.debug(f"Fetching detailed info for KB '{name}' from /api/v1/knowledge/{kb_id}") + detail_response = await self.client.get(f"/api/v1/knowledge/{kb_id}") + detail_response.raise_for_status() + detailed_kb = detail_response.json() + + files = detailed_kb.get("files", []) + count = len(files) if isinstance(files, list) and files is not None else 0 + + LOGGER.info(f"Knowledge base '{name}' (ID: {kb_id}): found {count} files") + return count + + except Exception as e: + LOGGER.warning(f"Failed to get detailed info for KB '{name}' (ID: {kb_id}): {e}") + return self._count_files_from_basic_info(kb) + + async def describe_collections(self) -> list[dict[str, object]]: + """Return metadata about each knowledge base.""" + try: + knowledge_bases = await self._fetch_knowledge_bases() + collections: list[dict[str, object]] = [] + + for kb in knowledge_bases: + if not isinstance(kb, dict): + continue + + count = await self._get_knowledge_base_count(kb) + name = kb.get("name", "Unknown") + size_mb = count * 0.5 # rough heuristic + + summary: dict[str, object] = { + "name": str(name), + "count": count, + "size_mb": float(size_mb), + } + collections.append(summary) + + return collections + + except Exception as e: + raise StorageError(f"Failed to describe knowledge bases: {e}") from e + + async def count(self, *, collection_name: str | None = None) -> int: + """ + Get document count for a specific collection (knowledge base). + + Args: + collection_name: Name of the knowledge base to count documents for + + Returns: + Number of documents in the collection, 0 if collection not found + """ + if not collection_name: + # If no collection name provided, return total across all collections + try: + collections = await self.describe_collections() + return sum( + int(collection["count"]) if isinstance(collection["count"], (int, str)) else 0 + for collection in collections + ) + except Exception: + return 0 + + try: + # Get knowledge base by name and return its file count + kb = await self.get_knowledge_by_name(collection_name) + if not kb: + return 0 + + kb_id = kb.get("id") + if not kb_id: + return 0 + + # Get detailed knowledge base information to get accurate file count + detail_response = await self.client.get(f"/api/v1/knowledge/{kb_id}") + detail_response.raise_for_status() + detailed_kb = detail_response.json() + + files = detailed_kb.get("files", []) + count = len(files) if isinstance(files, list) else 0 + + LOGGER.debug(f"Count for collection '{collection_name}': {count} files") + return count + + except Exception as e: + LOGGER.warning(f"Failed to get count for collection '{collection_name}': {e}") + return 0 + + async def get_knowledge_by_name(self, name: str) -> dict[str, object] | None: + """ + Get knowledge base details by name. + + Args: + name: Knowledge base name + + Returns: + Knowledge base details or None if not found + """ + try: + response = await self.client.get("/api/v1/knowledge/list") + response.raise_for_status() + knowledge_bases = response.json() + + return next( + ( + {str(k): v for k, v in kb.items()} + for kb in knowledge_bases + if isinstance(kb, dict) and kb.get("name") == name + ), + None, + ) + except Exception as e: + raise StorageError(f"Failed to get knowledge base by name: {e}") from e + + async def __aenter__(self) -> "OpenWebUIStorage": + """Async context manager entry.""" + await self.initialize() + return self + + async def __aexit__( + self, + exc_type: type[BaseException] | None, + exc_val: BaseException | None, + exc_tb: object | None, + ) -> None: + """Async context manager exit.""" + await self.close() + + async def list_documents( + self, + limit: int = 100, + offset: int = 0, + *, + collection_name: str | None = None, + ) -> list[dict[str, object]]: + """ + List documents (files) in a knowledge base. + + NOTE: This is a basic implementation that attempts to extract file information + from OpenWebUI knowledge bases. The actual file listing capabilities depend + on the OpenWebUI API version and may not include detailed file metadata. + + Args: + limit: Maximum number of documents to return + offset: Number of documents to skip + collection_name: Knowledge base name + + Returns: + List of document dictionaries with available metadata + """ + try: + # Use the knowledge base name or fall back to default + kb_name = collection_name or self.config.collection_name or "default" + + # Try to get knowledge base details + knowledge_base = await self.get_knowledge_by_name(kb_name) + if not knowledge_base: + # If specific KB not found, return empty list with a note + return [] + + # Extract files if available (API structure may vary) + files = knowledge_base.get("files", []) + + # Handle different possible API response structures + if not isinstance(files, list): + # Some API versions might structure this differently + # Try to handle gracefully + return [ + { + "id": "unknown", + "title": f"Knowledge Base: {kb_name}", + "source_url": "", + "description": "OpenWebUI knowledge base (file details not available)", + "content_type": "text/plain", + "content_preview": "Document listing not fully supported for OpenWebUI", + "word_count": 0, + "timestamp": "", + } + ] + + # Apply pagination + paginated_files = files[offset : offset + limit] + + # Convert to document format with safe field access + documents: list[dict[str, object]] = [] + for i, file_info in enumerate(paginated_files): + if not isinstance(file_info, dict): + continue + + # Safely extract fields with fallbacks + doc_id = str(file_info.get("id", f"file_{i}")) + + # Try multiple ways to get filename from OpenWebUI API response + filename = None + # Check direct filename field + if "filename" in file_info: + filename = file_info["filename"] + # Check name field + elif "name" in file_info: + filename = file_info["name"] + # Check meta.name (from FileModelResponse schema) + elif isinstance(file_info.get("meta"), dict): + filename = file_info["meta"].get("name") + + # Final fallback + if not filename: + filename = f"file_{i}" + + filename = str(filename) + + # Extract size from meta if available + size = 0 + if isinstance(file_info.get("meta"), dict): + size = file_info["meta"].get("size", 0) + else: + size = file_info.get("size", 0) + + # Estimate word count from file size (very rough approximation) + word_count = max(1, int(size / 6)) if isinstance(size, (int, float)) else 0 + + documents.append( + { + "id": doc_id, + "title": filename, + "source_url": "", # OpenWebUI files don't typically have source URLs + "description": f"File: {filename}", + "content_type": str(file_info.get("content_type", "text/plain")), + "content_preview": f"File uploaded to OpenWebUI: {filename}", + "word_count": word_count, + "timestamp": str( + file_info.get("created_at") or file_info.get("timestamp", "") + ), + } + ) + + return documents + + except Exception as e: + # Since OpenWebUI file listing API structure is not guaranteed, + # we gracefully fall back rather than raise an error + import logging + + logging.warning(f"OpenWebUI document listing failed: {e}") + + # Return a placeholder entry indicating limited support + return [ + { + "id": "api_error", + "title": f"Knowledge Base: {collection_name or 'default'}", + "source_url": "", + "description": "Document listing encountered an error - API compatibility issue", + "content_type": "text/plain", + "content_preview": f"Error: {str(e)[:100]}...", + "word_count": 0, + "timestamp": "", + } + ] + + async def close(self) -> None: + """Close client connection.""" + if hasattr(self, "client") and self.client: + try: + await self.client.aclose() + except Exception as e: + import logging + + logging.warning(f"Error closing OpenWebUI client: {e}") + + + +"""Weaviate storage adapter.""" + +from collections.abc import AsyncGenerator, Mapping, Sequence +from datetime import UTC, datetime +from typing import Literal, Self, TypeAlias, cast, overload +from uuid import UUID + +import weaviate +from typing_extensions import override +from weaviate.classes.config import Configure, DataType, Property +from weaviate.classes.data import DataObject +from weaviate.classes.query import Filter +from weaviate.collections import Collection +from weaviate.exceptions import ( + WeaviateBatchError, + WeaviateConnectionError, + WeaviateQueryError, +) + +from ..core.exceptions import StorageError +from ..core.models import Document, DocumentMetadata, IngestionSource, StorageConfig +from ..utils.vectorizer import Vectorizer +from .base import BaseStorage + +VectorContainer: TypeAlias = Mapping[str, object] | Sequence[object] | None + + +class WeaviateStorage(BaseStorage): + """Storage adapter for Weaviate.""" + + client: weaviate.WeaviateClient | None + vectorizer: Vectorizer + _default_collection: str + + def __init__(self, config: StorageConfig): + """ + Initialize Weaviate storage. + + Args: + config: Storage configuration + """ + super().__init__(config) + self.client = None + self.vectorizer = Vectorizer(config) + self._default_collection = self._normalize_collection_name(config.collection_name) + + @override + async def initialize(self) -> None: + """Initialize Weaviate client and create collection if needed.""" + try: + # Let Weaviate client handle URL parsing + self.client = weaviate.WeaviateClient( + connection_params=weaviate.connect.ConnectionParams.from_url( + url=str(self.config.endpoint), + grpc_port=50051, # Default gRPC port + ), + additional_config=weaviate.classes.init.AdditionalConfig( + timeout=weaviate.classes.init.Timeout(init=30, query=60, insert=120), + ), + ) + + # Connect to the client + self.client.connect() + + # Ensure the default collection exists + await self._ensure_collection(self._default_collection) + + except WeaviateConnectionError as e: + raise StorageError(f"Failed to connect to Weaviate: {e}") from e + except Exception as e: + raise StorageError(f"Failed to initialize Weaviate: {e}") from e + + async def _create_collection(self, collection_name: str) -> None: + """Create Weaviate collection with schema.""" + if not self.client: + raise StorageError("Weaviate client not initialized") + try: + client = cast(weaviate.WeaviateClient, self.client) + client.collections.create( + name=collection_name, + properties=[ + Property( + name="content", data_type=DataType.TEXT, description="Document content" + ), + Property(name="source_url", data_type=DataType.TEXT, description="Source URL"), + Property(name="title", data_type=DataType.TEXT, description="Document title"), + Property( + name="description", + data_type=DataType.TEXT, + description="Document description", + ), + Property( + name="timestamp", data_type=DataType.DATE, description="Ingestion timestamp" + ), + Property( + name="content_type", data_type=DataType.TEXT, description="Content type" + ), + Property(name="word_count", data_type=DataType.INT, description="Word count"), + Property( + name="char_count", data_type=DataType.INT, description="Character count" + ), + Property( + name="source", data_type=DataType.TEXT, description="Ingestion source" + ), + ], + vectorizer_config=Configure.Vectorizer.none(), + ) + except Exception as e: + raise StorageError(f"Failed to create collection: {e}") from e + + @staticmethod + def _extract_vector(vector_raw: VectorContainer) -> list[float] | None: + """Normalize vector payloads returned by Weaviate into a float list.""" + if isinstance(vector_raw, Mapping): + default_vector = vector_raw.get("default") + return WeaviateStorage._extract_vector( + cast(VectorContainer, default_vector) + ) + + if not isinstance(vector_raw, Sequence) or isinstance( + vector_raw, (str, bytes, bytearray) + ): + return None + + items = list(vector_raw) + if not items: + return None + + first_item = items[0] + if isinstance(first_item, (int, float)): + numeric_items = cast(list[int | float], items) + try: + return [float(value) for value in numeric_items] + except (TypeError, ValueError): + return None + + if isinstance(first_item, Sequence) and not isinstance( + first_item, (str, bytes, bytearray) + ): + inner_items = list(first_item) + if all(isinstance(item, (int, float)) for item in inner_items): + try: + numeric_inner = cast(list[int | float], inner_items) + return [float(item) for item in numeric_inner] + except (TypeError, ValueError): + return None + + return None + + @staticmethod + def _parse_source(source_raw: object) -> IngestionSource: + """Safely normalize persistence source values into enum instances.""" + if isinstance(source_raw, IngestionSource): + return source_raw + + if isinstance(source_raw, str): + try: + return IngestionSource(source_raw) + except ValueError: + return IngestionSource.WEB + + return IngestionSource.WEB + + @staticmethod + @overload + def _coerce_properties( + properties: object, + *, + context: str, + ) -> Mapping[str, object]: + ... + + @staticmethod + @overload + def _coerce_properties( + properties: object, + *, + context: str, + allow_missing: Literal[False], + ) -> Mapping[str, object]: + ... + + @staticmethod + @overload + def _coerce_properties( + properties: object, + *, + context: str, + allow_missing: Literal[True], + ) -> Mapping[str, object] | None: + ... + + @staticmethod + def _coerce_properties( + properties: object, + *, + context: str, + allow_missing: bool = False, + ) -> Mapping[str, object] | None: + """Ensure Weaviate properties payloads are mappings.""" + if properties is None: + if allow_missing: + return None + raise StorageError(f"{context} returned object without properties") + + if not isinstance(properties, Mapping): + raise StorageError( + f"{context} returned invalid properties payload of type {type(properties)!r}" + ) + + return cast(Mapping[str, object], properties) + + def _normalize_collection_name(self, collection_name: str | None) -> str: + """Return a canonicalized collection name, defaulting to configured value.""" + candidate = collection_name or self.config.collection_name + if not candidate: + raise StorageError("Collection name is required") + + if normalized := candidate.strip(): + return normalized[0].upper() + normalized[1:] + else: + raise StorageError("Collection name cannot be empty") + + async def _ensure_collection(self, collection_name: str) -> None: + """Create the collection if missing.""" + if not self.client: + raise StorageError("Weaviate client not initialized") + + client = cast(weaviate.WeaviateClient, self.client) + existing = client.collections.list_all() + if collection_name not in existing: + await self._create_collection(collection_name) + + async def _prepare_collection( + self, + collection_name: str | None, + *, + ensure_exists: bool, + ) -> tuple[Collection, str]: + """Return a ready collection handle and normalized name.""" + normalized = self._normalize_collection_name(collection_name) + + if not self.client: + raise StorageError("Weaviate client not initialized") + + if ensure_exists: + await self._ensure_collection(normalized) + + client = cast(weaviate.WeaviateClient, self.client) + return client.collections.get(normalized), normalized + + @override + async def store(self, document: Document, *, collection_name: str | None = None) -> str: + """ + Store a document in Weaviate. + + Args: + document: Document to store + + Returns: + Document ID + """ + try: + # Vectorize content if no vector provided + if document.vector is None: + document.vector = await self.vectorizer.vectorize(document.content) + + collection, resolved_name = await self._prepare_collection( + collection_name, ensure_exists=True + ) + + # Prepare properties + properties = { + "content": document.content, + "source_url": document.metadata["source_url"], + "title": document.metadata.get("title", ""), + "description": document.metadata.get("description", ""), + "timestamp": document.metadata["timestamp"].isoformat(), + "content_type": document.metadata["content_type"], + "word_count": document.metadata["word_count"], + "char_count": document.metadata["char_count"], + "source": document.source.value, + } + + # Insert with vector + result = collection.data.insert( + properties=properties, vector=document.vector, uuid=str(document.id) + ) + + return str(result) + + except Exception as e: + raise StorageError(f"Failed to store document: {e}") from e + + @override + async def store_batch( + self, documents: list[Document], *, collection_name: str | None = None + ) -> list[str]: + """ + Store multiple documents using proper batch operations. + + Args: + documents: List of documents + + Returns: + List of successfully stored document IDs + """ + try: + collection, resolved_name = await self._prepare_collection( + collection_name, ensure_exists=True + ) + + # Vectorize documents without vectors + for doc in documents: + if doc.vector is None: + doc.vector = await self.vectorizer.vectorize(doc.content) + + # Prepare batch data for insert_many + batch_objects = [] + for doc in documents: + properties = { + "content": doc.content, + "source_url": doc.metadata["source_url"], + "title": doc.metadata.get("title", ""), + "description": doc.metadata.get("description", ""), + "timestamp": doc.metadata["timestamp"].isoformat(), + "content_type": doc.metadata["content_type"], + "word_count": doc.metadata["word_count"], + "char_count": doc.metadata["char_count"], + "source": doc.source.value, + } + + batch_objects.append( + DataObject(properties=properties, vector=doc.vector, uuid=str(doc.id)) + ) + + # Insert batch using insert_many + response = collection.data.insert_many(batch_objects) + + successful_ids: list[str] = [] + error_indices = set(response.errors.keys()) if response else set() + + for index, doc in enumerate(documents): + if index in error_indices: + continue + + uuid_value = response.uuids.get(index) if response else None + successful_ids.append(str(uuid_value) if uuid_value is not None else str(doc.id)) + + if error_indices: + error_messages = ", ".join( + f"{documents[i].id}: {response.errors[i].message}" + for i in error_indices + if hasattr(response.errors[i], "message") + ) + print( + "Weaviate partial batch failure for collection " + f"{resolved_name}: {error_messages}" + ) + + return successful_ids + + except WeaviateBatchError as e: + raise StorageError(f"Batch operation failed: {e}") from e + except WeaviateConnectionError as e: + raise StorageError(f"Connection to Weaviate failed: {e}") from e + except Exception as e: + raise StorageError(f"Failed to store batch: {e}") from e + + @override + async def retrieve( + self, document_id: str, *, collection_name: str | None = None + ) -> Document | None: + """ + Retrieve a document from Weaviate. + + Args: + document_id: Document ID + + Returns: + Document or None + """ + try: + collection, resolved_name = await self._prepare_collection( + collection_name, ensure_exists=False + ) + result = collection.query.fetch_object_by_id(document_id) + + if not result: + return None + + # Reconstruct document + props = self._coerce_properties( + result.properties, + context="fetch_object_by_id", + ) + metadata_dict = { + "source_url": str(props["source_url"]), + "title": str(props.get("title")) if props.get("title") else None, + "description": str(props.get("description")) + if props.get("description") + else None, + "timestamp": str(props["timestamp"]), + "content_type": str(props["content_type"]), + "word_count": int(str(props["word_count"])), + "char_count": int(str(props["char_count"])), + } + metadata = cast(DocumentMetadata, cast(object, metadata_dict)) + + vector = self._extract_vector(cast(VectorContainer, result.vector)) + + return Document( + id=UUID(document_id), + content=str(props["content"]), + metadata=metadata, + vector=vector, + source=self._parse_source(props.get("source")), + collection=resolved_name, + ) + + except WeaviateQueryError as e: + raise StorageError(f"Query failed: {e}") from e + except WeaviateConnectionError as e: + # Connection issues should be logged and return None + import logging + logging.warning(f"Weaviate connection error retrieving document {document_id}: {e}") + return None + except Exception as e: + # Log unexpected errors for debugging + import logging + logging.warning(f"Unexpected error retrieving document {document_id}: {e}") + return None + + def _build_search_metadata(self, props: Mapping[str, object]) -> DocumentMetadata: + """Build metadata dictionary from Weaviate properties.""" + metadata_dict = { + "source_url": str(props["source_url"]), + "title": str(props.get("title")) if props.get("title") else None, + "description": str(props.get("description")) + if props.get("description") + else None, + "timestamp": str(props["timestamp"]), + "content_type": str(props["content_type"]), + "word_count": int(str(props["word_count"])), + "char_count": int(str(props["char_count"])), + } + return cast(DocumentMetadata, cast(object, metadata_dict)) + + def _extract_search_score(self, result: object) -> float | None: + """Extract and convert search score from result metadata.""" + metadata_obj = getattr(result, "metadata", None) + if metadata_obj is None: + return None + + raw_distance = getattr(metadata_obj, "distance", None) + if raw_distance is None: + return None + + try: + distance_value = float(raw_distance) + return max(0.0, 1.0 - distance_value) + except (TypeError, ValueError) as e: + import logging + logging.debug(f"Invalid distance value {raw_distance}: {e}") + return None + + def _build_search_document( + self, + result: object, + resolved_name: str, + ) -> Document: + """Build Document from Weaviate search result.""" + props = self._coerce_properties( + getattr(result, "properties", None), + context="search result", + ) + metadata = self._build_search_metadata(props) + + vector_attr = getattr(result, "vector", None) + vector = self._extract_vector(cast(VectorContainer, vector_attr)) + score_value = self._extract_search_score(result) + + uuid_raw = getattr(result, "uuid", None) + if uuid_raw is None: + raise StorageError("Weaviate search result missing uuid") + uuid_value = uuid_raw if isinstance(uuid_raw, UUID) else UUID(str(uuid_raw)) + + return Document( + id=uuid_value, + content=str(props["content"]), + metadata=metadata, + vector=vector, + source=self._parse_source(props.get("source")), + collection=resolved_name, + score=score_value, + ) + + @override + async def search( + self, + query: str, + limit: int = 10, + threshold: float = 0.7, + *, + collection_name: str | None = None, + ) -> AsyncGenerator[Document, None]: + """ + Search for documents in Weaviate. + + Args: + query: Search query + limit: Maximum results + threshold: Similarity threshold + + Yields: + Matching documents + """ + try: + query_vector = await self.vectorizer.vectorize(query) + collection, resolved_name = await self._prepare_collection( + collection_name, ensure_exists=False + ) + + results = collection.query.near_vector( + near_vector=query_vector, + limit=limit, + distance=1 - threshold, + return_metadata=["distance"], + ) + + for result in results.objects: + yield self._build_search_document(result, resolved_name) + + except WeaviateQueryError as e: + raise StorageError(f"Search query failed: {e}") from e + except WeaviateConnectionError as e: + raise StorageError(f"Connection to Weaviate failed during search: {e}") from e + except Exception as e: + raise StorageError(f"Search failed: {e}") from e + + @override + async def delete(self, document_id: str, *, collection_name: str | None = None) -> bool: + """ + Delete a document from Weaviate. + + Args: + document_id: Document ID + + Returns: + True if deleted + """ + try: + collection, _ = await self._prepare_collection(collection_name, ensure_exists=False) + collection.data.delete_by_id(document_id) + return True + except WeaviateQueryError as e: + raise StorageError(f"Delete operation failed: {e}") from e + except Exception: + return False + + @override + async def count(self, *, collection_name: str | None = None) -> int: + """ + Get document count in collection. + + Returns: + Number of documents + """ + try: + if not self.client: + return 0 + collection, _ = await self._prepare_collection(collection_name, ensure_exists=False) + result = collection.aggregate.over_all(total_count=True) + return result.total_count or 0 + except WeaviateQueryError as e: + raise StorageError(f"Count query failed: {e}") from e + except Exception: + return 0 + + async def list_collections(self) -> list[str]: + """ + List all available collections. + + Returns: + List of collection names + """ + try: + if not self.client: + raise StorageError("Weaviate client not initialized") + + client = cast(weaviate.WeaviateClient, self.client) + return list(client.collections.list_all()) + + except Exception as e: + raise StorageError(f"Failed to list collections: {e}") from e + + async def describe_collections(self) -> list[dict[str, object]]: + """Return metadata for each Weaviate collection.""" + if not self.client: + raise StorageError("Weaviate client not initialized") + + try: + client = cast(weaviate.WeaviateClient, self.client) + collections: list[dict[str, object]] = [] + for name in client.collections.list_all(): + collection_obj = client.collections.get(name) + if not collection_obj: + continue + + count = collection_obj.aggregate.over_all(total_count=True).total_count or 0 + size_mb = count * 0.01 + collections.append( + { + "name": name, + "count": count, + "size_mb": size_mb, + } + ) + + return collections + except Exception as e: + raise StorageError(f"Failed to describe collections: {e}") from e + + async def sample_documents( + self, limit: int = 5, *, collection_name: str | None = None + ) -> list[Document]: + """ + Get sample documents from the collection. + + Args: + limit: Maximum number of documents to return + + Returns: + List of sample documents + """ + try: + collection, resolved_name = await self._prepare_collection( + collection_name, ensure_exists=False + ) + + # Query for sample documents + response = collection.query.fetch_objects(limit=limit) + + documents = [] + for obj in response.objects: + # Convert back to Document format + props = self._coerce_properties( + getattr(obj, "properties", None), + context="sample_documents", + allow_missing=True, + ) + if props is None: + continue + uuid_raw = getattr(obj, "uuid", None) + if uuid_raw is None: + continue + document_id = uuid_raw if isinstance(uuid_raw, UUID) else UUID(str(uuid_raw)) + # Safely convert WeaviateField values + word_count_val = props.get("word_count") + if isinstance(word_count_val, (int, float)): + word_count = int(word_count_val) + elif word_count_val: + word_count = int(str(word_count_val)) + else: + word_count = 0 + + char_count_val = props.get("char_count") + if isinstance(char_count_val, (int, float)): + char_count = int(char_count_val) + elif char_count_val: + char_count = int(str(char_count_val)) + else: + char_count = 0 + + doc = Document( + id=document_id, + content=str(props.get("content", "")), + source=self._parse_source(props.get("source")), + metadata={ + "source_url": str(props.get("source_url", "")), + "title": str(props.get("title", "")) if props.get("title") else None, + "description": str(props.get("description", "")) + if props.get("description") + else None, + "timestamp": datetime.fromisoformat( + str(props.get("timestamp", datetime.now(UTC).isoformat())) + ), + "content_type": str(props.get("content_type", "text/plain")), + "word_count": word_count, + "char_count": char_count, + }, + collection=resolved_name, + ) + documents.append(doc) + + return documents + + except Exception as e: + raise StorageError(f"Failed to sample documents: {e}") from e + + def _safe_convert_count(self, value: object) -> int: + """Safely convert a value to integer count.""" + if isinstance(value, (int, float)): + return int(value) + elif value: + return int(str(value)) + else: + return 0 + + def _build_document_metadata(self, props: Mapping[str, object]) -> DocumentMetadata: + """Build metadata from search document properties.""" + return { + "source_url": str(props.get("source_url", "")), + "title": str(props.get("title", "")) if props.get("title") else None, + "description": str(props.get("description", "")) + if props.get("description") + else None, + "timestamp": datetime.fromisoformat( + str(props.get("timestamp", datetime.now(UTC).isoformat())) + ), + "content_type": str(props.get("content_type", "text/plain")), + "word_count": self._safe_convert_count(props.get("word_count")), + "char_count": self._safe_convert_count(props.get("char_count")), + } + + def _extract_document_score(self, obj: object) -> float | None: + """Extract score from document search result.""" + metadata_obj = getattr(obj, "metadata", None) + if metadata_obj is None: + return None + + raw_score = getattr(metadata_obj, "score", None) + if raw_score is None: + return None + + try: + return float(raw_score) + except (TypeError, ValueError) as e: + import logging + logging.debug(f"Invalid score value {raw_score}: {e}") + return None + + def _build_document_from_search( + self, + obj: object, + resolved_name: str, + ) -> Document: + """Build Document from search document result.""" + props = self._coerce_properties( + getattr(obj, "properties", None), + context="document search result", + ) + metadata = self._build_document_metadata(props) + score_value = self._extract_document_score(obj) + + uuid_raw = getattr(obj, "uuid", None) + if uuid_raw is None: + raise StorageError("Weaviate search document result missing uuid") + uuid_value = uuid_raw if isinstance(uuid_raw, UUID) else UUID(str(uuid_raw)) + + return Document( + id=uuid_value, + content=str(props.get("content", "")), + source=self._parse_source(props.get("source")), + metadata=metadata, + collection=resolved_name, + score=score_value, + ) + + async def search_documents( + self, query: str, limit: int = 10, *, collection_name: str | None = None + ) -> list[Document]: + """ + Search documents in the collection. + + Args: + query: Search query + limit: Maximum number of results + + Returns: + List of matching documents + """ + try: + if not self.client: + raise StorageError("Weaviate client not initialized") + + collection, resolved_name = await self._prepare_collection( + collection_name, ensure_exists=False + ) + + # Try hybrid search first, fall back to BM25 keyword search + try: + response = collection.query.hybrid( + query=query, limit=limit, return_metadata=["score"] + ) + except Exception: + response = collection.query.bm25( + query=query, limit=limit, return_metadata=["score"] + ) + + return [ + self._build_document_from_search(obj, resolved_name) + for obj in response.objects + ] + + except Exception as e: + raise StorageError(f"Failed to search documents: {e}") from e + + async def list_documents( + self, + limit: int = 100, + offset: int = 0, + *, + collection_name: str | None = None, + ) -> list[dict[str, object]]: + """ + List documents in the collection with pagination. + + Args: + limit: Maximum number of documents to return + offset: Number of documents to skip + + Returns: + List of document dictionaries with id, title, source_url, and content preview + """ + try: + if not self.client: + raise StorageError("Weaviate client not initialized") + + collection, _ = await self._prepare_collection(collection_name, ensure_exists=False) + + # Query documents with pagination + response = collection.query.fetch_objects( + limit=limit, offset=offset, return_metadata=["creation_time"] + ) + + documents: list[dict[str, object]] = [] + for obj in response.objects: + props = self._coerce_properties( + obj.properties, + context="list_documents", + allow_missing=True, + ) + if props is None: + continue + content = str(props.get("content", "")) + word_count_value = props.get("word_count", 0) + # Convert WeaviateField to int + if isinstance(word_count_value, (int, float)): + word_count = int(word_count_value) + elif word_count_value: + word_count = int(str(word_count_value)) + else: + word_count = 0 + + doc_info: dict[str, object] = { + "id": str(obj.uuid), + "title": str(props.get("title", "Untitled")), + "source_url": str(props.get("source_url", "")), + "description": str(props.get("description", "")), + "content_type": str(props.get("content_type", "text/plain")), + "content_preview": (f"{content[:200]}..." if len(content) > 200 else content), + "word_count": word_count, + "timestamp": str(props.get("timestamp", "")), + } + documents.append(doc_info) + + return documents + + except Exception as e: + raise StorageError(f"Failed to list documents: {e}") from e + + async def delete_documents( + self, document_ids: list[str], *, collection_name: str | None = None + ) -> dict[str, bool]: + """ + Delete multiple documents from Weaviate. + + Args: + document_ids: List of document IDs to delete + + Returns: + Dictionary mapping document IDs to deletion success status + """ + results: dict[str, bool] = {} + + try: + if not self.client: + raise StorageError("Weaviate client not initialized") + + if not document_ids: + return results + + collection, resolved_name = await self._prepare_collection( + collection_name, ensure_exists=False + ) + + delete_filter = Filter.by_id().contains_any(document_ids) + response = collection.data.delete_many(where=delete_filter, verbose=True) + + if objects := getattr(response, "objects", None): + for result_obj in objects: + if doc_uuid := str(getattr(result_obj, "uuid", "")): + results[doc_uuid] = bool(getattr(result_obj, "successful", False)) + + if len(results) < len(document_ids): + default_success = getattr(response, "failed", 0) == 0 + for doc_id in document_ids: + _ = results.setdefault(doc_id, default_success) + + return results + + except Exception as e: + raise StorageError(f"Failed to delete documents: {e}") from e + + async def delete_by_filter( + self, filter_dict: dict[str, str], *, collection_name: str | None = None + ) -> int: + """ + Delete documents matching a filter. + + Args: + filter_dict: Filter criteria (e.g., {"source_url": "example.com"}) + + Returns: + Number of documents deleted + """ + try: + if not self.client: + raise StorageError("Weaviate client not initialized") + + collection, _ = await self._prepare_collection(collection_name, ensure_exists=False) + + # Build where filter + where_filter = None + if "source_url" in filter_dict: + where_filter = Filter.by_property("source_url").equal(filter_dict["source_url"]) + + # Get documents matching filter + if where_filter: + response = collection.query.fetch_objects( + filters=where_filter, + limit=1000, # Max batch size + ) + else: + response = collection.query.fetch_objects( + limit=1000 # Max batch size + ) + + # Delete matching documents + deleted_count = 0 + for obj in response.objects: + try: + collection.data.delete_by_id(obj.uuid) + deleted_count += 1 + except Exception: + continue + + return deleted_count + + except Exception as e: + raise StorageError(f"Failed to delete by filter: {e}") from e + + async def delete_collection(self, collection_name: str | None = None) -> bool: + """ + Delete the entire collection. + + Returns: + True if successful + """ + try: + if not self.client: + raise StorageError("Weaviate client not initialized") + + target = self._normalize_collection_name(collection_name) + + # Delete the collection using the client's collections API + client = cast(weaviate.WeaviateClient, self.client) + client.collections.delete(target) + + return True + + except Exception as e: + raise StorageError(f"Failed to delete collection: {e}") from e + + async def __aenter__(self) -> Self: + """Async context manager entry.""" + return self + + async def __aexit__( + self, + exc_type: type[BaseException] | None, + exc_val: BaseException | None, + exc_tb: object | None, + ) -> None: + """Async context manager exit with proper cleanup.""" + await self.close() + + async def close(self) -> None: + """Close client connection.""" + if self.client: + try: + client = cast(weaviate.WeaviateClient, self.client) + client.close() + except Exception as e: + import logging + logging.warning(f"Error closing Weaviate client: {e}") + + def __del__(self) -> None: + """Clean up client connection as fallback.""" + if self.client: + try: + client = cast(weaviate.WeaviateClient, self.client) + client.close() + except Exception: + pass # Ignore errors in destructor + + diff --git a/tests/__pycache__/conftest.cpython-312-pytest-8.4.2.pyc b/tests/__pycache__/conftest.cpython-312-pytest-8.4.2.pyc index e4ecd5a05ba14bc4509a6b106e0056610b91fd10..8047a3d8c967a60ec89cc7b42189b8d2e354b73b 100644 GIT binary patch delta 3557 zcmZ`*4RBP|6@K?+-|i;+lb_vWce6ivVMG2%0ul@zH{!m_nv$1i$>TB&ok>EEtWI^pG~X()M;%zX`N=>?@bjSWbmTV2e(>->G?|5 zUj4c>Wtw8!t6OJ+lUAou@hf(0n5iKu1(2QYcTHCu$g?U&#d(mW3fF#0A*@LE+KLo6 zYBNxq{CN)Q(AZsBta!0MQz=n==rBtuRr2uPcFp?QzcNFSvERPaSW*^wBK=bX8;=Y- zRxvgX|8hRaw!<#hLADdzxd%}`ojbN@Cr+tE$LHf;T&Dy=0bYb6pN`Oxf`xq_e3Q7Q=>PDM!&AJ$K>wKf(xq zCJv|EElEO~fc84g-U}ycb2^cp5|+?JI!Y7xE)%_G;=v(PldP$z`7j(a9GzT>Zy8j8 z(Z%+ma2io}u%O62cWhIKf+2aU8tUv-QKRz>Q)cXj zxu-lQJrUoy=u30*X=K5F*7npc0x;T;a|55ul~=9tMba^rcE9CLXO zI}bQV{n3gABhCx1x{J2DsJSj?agD4#Z}F4bb-*=RIo5tUJGywyi0guD?M2(#sCg|> z)}FUakGY1thusI4MzA5=OyZ&px|k zTZ?aV)74-dF3O$GMlXBR(wM0~Qi0OhY}V+|oz2N7c`j>o>CRTmD8J-njc(mbt~!)o zb}>RXBE02xK-iNDpEqZ~&)u>pF?hv&muO_L)bm7+2^TzQ!cmf|ZWc1^SNKmOqIVY) zORTWZo3~!e0zXo;R2V2$x;jGZgCSlfhqm=~t4VRwPE61;8M`-Zki&9IFD5kAodM5v zIx11MskgWL8U^}&KDHko@yVWth-_aN2&#*Abf|ryuHK$&egIze*;qE5u;jvjedT#t zvTA9&o1&^G_zA&%1P{Zj#WuJ#&nmWvk*>U#*}6oPP+AcyDUZ!t5Sv-~QD&iJy_8`~ zNcfLiY0XR+NJ%;;Ej-tP@L-1A6S~do2 zihqzjOvDXen>X7Y^lTa6scu`H8-=|y{jMjGK9JSi+p(oh z<^5e9YDH(DN9k6-4=>J~mODxwWrFzxV+543d*I;o~8G8sqRfU${ z5QTh>Gjw;Ps!)0p8~4Ncs`uGxcy{h`=^bp3KzjANdaAPgEx1x$D_utMNvQeW=h8n= z`UTX?8~49QYI2p_MylFzBcIJT(!Ik!z~(bxsCkCH2hY{`r4OeEl7@l3w!TtbwbuY4y;Zj|L{!{Ay1d8gt(pM<{Hga?QjVxbFw8u%c9sxgKNXIEC zPJ#17aJXT*gjcBW=g99H7@Lh>EJBIIy`+>0zTE?b%W0PA;fsZNk`B#20J-t1UvrSc zb4ho7;FRRRG%K{IZ7PuTXz@j4bJNSrl}2OgcnvD!x&XgD$Ui2Yp25PKR*2bRIju%KgVJxM8tm)s393!>Nk)$F$_(^v0#p?Bh8 z3H}&%Ez5~i-drsX@jb+dl0)Ox0Nh)N(jid_!XF!wX(*yzjxinkcBY?Q9U-XHQfo0XqK7&G} zuPp82h}j%?Z9u?nhj{aBb{|+;8geR#^7Y+nNa6g=E zE!G#HXc8lDx0W&I9-@eGi3fUaSL4#=?%qHMQ|1)-SC+HaV8zN=tO@R1xya+vWPBiV5xJi14Ae%s= zdr-nHyw224gT}lJ(Y9>Ubop3}Xb!D8F+~kojWC#+IaBA{6}2W9e)o6Y=YIEmhmSsl zuXG4I5gIyKqMw-~Cs!vH9u7+mvzl0zBp81X@R(&P)`fY)6E$0pdF5nH)gpULE3J4q zEY6}OX;CDEupn#67#;4kJGEHy3)3uGTrUJm6TeytR)#0!q-s;h8lk0W@f3KQHcd;Q zr)|P(;;E%;Q%N8BxW(ntzYPCl61eowVru|gIK{pV*5F(AUg*R{j$YzFJNBA7sa+gP zd_X=YVU@aG9+GfS?T`79)LtvWh;*e}8V!*mqW%=?^sPxx0-`etp259NHTzj|SR1v# z?e&y(HhS89O>M2}6KX|Ut4C}klSpG&$WTt;io$n79cW9*bab$;i{VLzZie*?o3JD+ zGGZxHhQ;*fy#)HYlutk&&(L?JwafA!5=OZCU<)2{)yY>RJ$U*_fNgkYhH8D01Gg}A zA!HPp4H@ySj3W7(gwJIBOx?F;*1|6QBJ+Xh&FpypjgaS=?ZASpB>6WW0gK=Up%=U5(=YFOPPRi!xnd_0hH5Fep$x4WkQtZE8 zHW6Q+ebmM|y)uESikb8fd$1)Zby^lhi?>-0v|+lXxKT2WZGn;lOc`d}TC(Jpk^@xo zUg*Q(IXb+E7w0(bFO&OP+dXZKk5qZYnx;li)@paF*6hi}?77Jf<5Q@dwt!(5L#n~B zd2S)RY@B8vD;RR{i@DX%r^n~^fpVM_cjAY)e+I8%&mE`W4Xn*8a~R)aH<|pQEgqj+ zbNk#4T2rG>9LJ-1MNos6^H$p4>zf+z53eg-^@!})2U@30H!+y|5{wjfRUnuJxjpkM?J;f%ti${@+@_*CJr z`4sz}#-{m&%2}dacwqit$`G*)c%W#X^BkKwRp2x7$MRdmY|-Ft_cV$hNZgDa#qYv7 z%q(##!z90;*OYuLM;mu1j027NRhezZ8KsSML|aRb!3f@4R$~5@)#vf0vIONK(O&(% zvbm6C4B8Gh)f2c(f^A}mfg2Zy;R{ohD@2dzc?$t%Un1&KL;@RChX2pN?FxQN%FtY% zs9YubDQ+vj>NFAr2?yJ577hv&=C`7VgNss>YeY}$XBK?|c8TVWTFWPm-{ck#dBq6( z(JdZWTq*w!y0YXb6ev8>CU&(5J`H~akKZznx4o^^>nWv?5S+*#(&BD!5ZBe>RnF9f zSMDhU6XsS_%A=rftXL+m@SAvw#OIvJ(&846Xl)&>e$#4SOS6BHaCejw36xVr7?%!b zh+yC*jKILOnT>XYnY9wms*Hv%Tv(YdhXOuYxkjdy_Dkg*5E~@ZpaoMoW6cgm#?q1{ z3Hz7DU`16Xtj8y+N@C3Hbz>Qm*RzZN6g_yVYL0yad3*W44o&Ly%f5-EwfVTNDmSCE z$=A>j{I6(QbqZMU&FXBh;b8UM%bz2)YqCgZtBYYeLk2@B zgOy{W8Dbd%>zfQNpsz$-ky7do>>md^{7v$Cz|NX_aNy~hdA3n@4ie+^Fd~El)#d5t zDdhYk=oQN|ATE@+OZF?m-TIKnudHfrbNgs24&wF|nQ#J+t;mLW99yx(dW4f07wIc3 zubr->P+%N()J~5-&K4VkvE%xQ(JnCfe*B1)s%# umdsU4K}(zVShHuoCr0I8GDRIIyViPdQ9xs++$(C)*dT<&4acfdyk#fu?#wfoITDVM~{R3Z5?zC zxqIB~+&1VLD(ER-=k~$EA#ab@XfPS}8wAJ82Eq9?6MuV;k6m-&np?S6#IAX8tsq?V zHM3N+m|ZQzRj=l%pI!CgYLVt@VARxB{0=?XX3V<#hKEO@ebIrD;Yc_Bwm;T)?%cre z={8f=eDuhktV8IFhNA;RVcfGk(HA{~U-PcvQT$lXpC1r5^z$C@QtFOz=@$OJYzP~A zjDi6}W$ZBvC4%W?OOGXN6-ov3%a*T6&-K`ZGQom7b|EN~3)YvdJr1Elu;GeRs1)q@ zbqQ4%7)y^^s1}^~_29Y-zXd{#;KpyEP%C)w>lH#m0e*c#oluD1BB5UJ;47L*u65o;fI$Wh)K*$sp|&Zfw$k3H zq0QW#wVuNSN3yQ|k)d-V!{}Y4&62hBihUQe=F{P5*4E1_xO+7H*ofAjShWn$DF0t=)$57vJ-|4W}6%9vlWd!5dcW$6} zXr%wSNawlHtfRMgV0a+f+ZzjL#=29!>ccxCohV*3WCCSz%PY=IY5C>Tlc%RnC&YB= zDqL}ADr>H7zOp&7eWo~Fxgl=(kt4W-Ck#2GZNmjN+OOc`GL5g1>f|1RQ82w}mPdG_ zVcZl|M(r%kSKcvZ`kK_rxLF`#q0A1U_?Y3-X8HbMLxW+hA!58}G8&E>jQH=BqUX)W z4Hu1VmhP-kyk|WhJ=L)VnC1u%_m2pKmFBF4hAZnn8yOkKhzaKh!&wV~7-q`cKN!hc zXo|A7GkwFtU|0;GcM%#U^rHy9bV-!)r#y!@uJ~QLD8$nFCCwV zB%X>NPZxA78jQ~NnbOxKaYu@G2@pEUseYCSB#iCiDIKwSAl=d#ww zIZ#89#vtn&hzty)K3*dd1ZEwF!lS&p^T_Bq{$!g~^wT{vx%UP)Y)nOv^ClBNV4isy77TP6#tzG>(GIj~Oo* z#1=rR4v`u&3Pyq~8QuZ820dmm28e#sBtz|CkRv_38B_4)dE>Z6G>utUO&uHpgHl>U zT`THZSzW^WyhB?TuxswVXCYX<4!L5u5AuaoT!SaF4vC9~v;JQ2oZj&B0I7a(g9`+1 z*~(r{*?J@WXTn2$#E4D~40HJjepfBGybNerwJ8&fdy|<%G;DUH7%sQ>ywW4 z9K;mK03B37?2qsV#9Izr!KeaSXSKCahmL$8I}AaX0cADrmAVZ*4o%@*6bKOBYR#o;By;6)O=`0R&TC9!OENC=GrD5Lb zT{?^#(7@(b`>s)2n?2TbfQdEWNu!USS{;InN-0=F2nu~*jxY%Y^r<;+G2+?bqD~X%&N311h(pM%@52JGA zGzyyh;)zo!cNNIMg1xB{A30O;yySyicZ%o??z8Ft8{88N=ng7LM+Ag4nr?64S^TxU|p zb4Ac*;|ahdb|gYXH83Gd;&+Ab}D-BP7dtlz?!y2xh)14S<9n2E)s7# zvljZv+PH)cvL=dsqgfkCah*23B@5EE3tlvG`!tesW}F5UTkR2w&$1 z2fKlgq*iBrsstr9=~b`r#x+%>*J#8p>4j~UJc5~ZQa^i1(92av#VvSktU3p!WqC># z;`&e}LOcqv7pS=W$mAna7t(>|xRbc2W8NKN1hh5fZ=Eik38ej-lAcY(Jni%DT6U`? z<%ifc1G-8g7TvnfQ_gvJ9eb)R+T!YAPACj@e1ld1W=In*JB5?{|(l#nz~TK4Z=(v(4gh z<(tLem^o6q!ke9fRr_YaN^d4z1Y%*8f=C>sxh;;13!PaPEK}#gVss!J$vVXFC(jRv zVSx(?fj&VPApX=h_=H-?{9JexWR{_eWpE%8&026Jl23?Tc^Dv)Ay51S72Q+_RGdVS zbyE){eJ1Nt-x;AH;F<@OT);ezYq2VQbm5;DL5;`^3Sdh~<>j8qo`h%mWV&Q++?DZ{ zU*0yk4Vq|u+TRg(WQdag)a##kaqqcgvfN)6ajc6Uwi@>^+0Y`Cxoc zrlRp$=ao(zB0ZFkAlRf(Q&m))w^dSg|lDs=6!E00aDn%y|{Sh{K#p4@opu{++9`?7@|e9p=? zA`N7ID7}}-Fu*1dZ_8;T>7ubp1x6T=%LNhZKr}oQ;SATrCSEJVFeQNVS~pdcXVSu+ z$`fGdLt>Ts)5qTYt2BGW(@BDY@!UaRoW4E!(8y{_@(iw6VuW7c-jlAW~QWk z;_0}UbaYHx5*P5ZocxfW$~fyMaI*3cDuh7gtj19~=bX?uYSrakCc%k8G{HFK`mzZF z>5jE41Id{~h-^iYJu1|XMTP-^*~oF$nYD2{Rm@4UUI(o*#LCyLS+}8M&073BvS!UT z_OG3lSqs&Qxzq|-3#~xJ8rMii2mN+zpkfOZ^rw7e&F95|nEy;PdM>i9t4mV%I-xDZ zY#mHhglI}Qy_|Kqc`tjD+C2Zv1{~M?gA!$sV&mh^T-FMujQ|~+{{&+p2n4x zc)#@C%k)qq3XETI@baq3Ra3{)#Vv7rrnWg*yY7|9#OB`^OFFjB8c{4C84I3d;2*;Y zjf_BQC&zt#o}`S0p8ZTv?(Q|bYU)Qj{ibKY31LAtL6*0TnI=uII4!t3j&URniOw#P zwd?n3!mnDwO|_d!%$?p|9H~;Fbz~sJXh@A zOzR?pT;8E4c*{M3T=TCDhNm(7<5rS$&bsp+#-FOi6Y`H8`fbIS719#sPcV&>w4>K6 zhe9-+(E_>6F{gZ0b4`(Ro`b1CG`?~W(wj>#kGs{?f-&cqMX zf8l6zWvzoF7sI0XJXNRJ73mAaGgNT;Fo<<5a}4m`gln|MC9s8swn8R3Wo<*@=$R2g z4AEWJFs*0|4#dJj*4x(~J>NIjDR#B2i8<20NAWY=K;X;ZC+l`BQ1a zhSi3?p$I5hUlfLAA?Fb>ie+;CPPQmfh#mneH3wf=D?BwYEc7a`%~hv!@4bd5A|c=r z(6p%Ra@Azj=W3u-SA`M>Q&k<4hiBH$2L7PpyA^YLey=vQ{s2U`ikgLrRjGQpEk!I%Y+&iLTgf?H8a)e5Ufbmx4p$le`DI)^rnBhH{G^l-n)b9H?=^!FRh60 zzumbuxo*!~Z}ORww>taZGgu2MQodCa1ye6%{GkPZd&=KFz2}yH&CD|k8xN&69=f^b z*2X6?>vl{$JhfxGXnI|;vU5Jrl_{xDJO?>6Q`0c9|Ls8eq8shJ=P~%oNRMhxmbK3N zS{EDe9gB@H$4opK$6_KQ>D@VWh;Wh7A<|QR&9ySW%aIFBz~@TBXROnK1Ar<8R1O$b z@Ow<5HK6qpj~%xFSFKJ%RMC0H6pS8+V5F-rK|@oXC#r+K<{yHl&ab=F-o#y}U;usD zAwVaSshGNqOcY8i8nf%RsNnlB94{E<(xY|`!SSZzWisxBrsgE^HjO^@Gs{fFhYDMw>-x=P2 z(eR$ZQBZg%Pz|*uP@S;DP_S5FC<;zFf8?uw*Nl6=U?ShY@Ru5)CivhSgViWTVY+Au zJ{Htofg*>M)}eNG#}y4fAMFBN8W?6O0%@1LS6q{jRY-gVR7fZbq8O!bZKi@WfHpT* zAur*a=xkr~d_?>#{ZW8bSXvw7*RpQ@i?uMUn-BW?Yqgm@vSQzNzJ+MpeD11B# zGE^G&kVM0B2QZh0<=*=)o{zkR0>g;mE2+8s*yLl0_GvL4SQmF@f;CVc%Uzk0im8EA zNz0{uZyxz}n3_0&+hVs-owu2wW$tG-g5 z7@s+rF5en2%oK+fiW^hKjfvwkrgZW8xc#;-uw=sQELtq}#docAR%|FJS}JiDx#HG$ zgNC-wiIU6ZljT!eZWT4oYyfR7F1_3_*^#hK_e^x8i?=|6^j1vmPkI}Yj)wQ|Hes#y zJ%iC%^tQ*h2zQ2}dlBMSzw6%JZ1{e&bN4#)_tzRx=1?M%L>MLdJV2koTNft*?gPq- zDr6kfOFk9X;Y~m?Dm!F)5h+nA%~=H_1n=QRC2j`vU)0-E@}ka1DCq4bfqiW-!s^q z>n}YL-!oA{7W$^w%Wiqw-aI>7DhnEa+H$M&$qbgCYqq3nwqyboQ|l)W!2@E+YOXDw zuuVih>%QwU)HYljx-vAqFI}^aUt9E|-9_IDrhNrRNZPq<_@L!o8Eyzq^rUVbc(uS1 zs&Zzl%EM$h0=%mC&3ag%@^hW1310PJT=M1_v&!$mN{)p|*4-cQl?rw!PY}Gqqemol z7XX(Boo%?vmUo!sEdCp+#2p*NX}a|W70jx5lFoh$#j*t+dMH+>WtF*5CqesvrDsW8 zf6;)s2F=umB+EyEq}kFr^LH!f!s(hvImG<5ram8roM@3iU0xo76#;8vhI>VlKk5mn z%Bw@WhDV=)fhL4IB28%)t6S0Y;;=}(3^y2#no(8!w^Ze~>8g>92gVEfl;K`*S$m)PQKRl$I(ZI#J zqUYiHdSaz0#cbUOe6bb1XEIxZOJ`l5`x%GpXQ8 z?uU^HKoM4mN-%*QzxjurAyezz$c%P2sOh#BE696XpaosUz@WB9I8O&wPavGbq1iHE zBsCmE$8Y46x(lv~;0saL+WiixAz7Ocj`j@<##~2-xfhZEH{cMc`3d317gp{y%a)-_{;ms8n}OP;uMf)>{y&Sw0jQ^7>fjO{y~Z+N~{nCyCR_LFmg zIa{(}e=>L==|7P49H1qz(MzKX-jL{cT1typnQFxB*bvQQ#ea@vcWV&AV?%^bEsNlBB4URkcwC6s zp$Hx~B6cW($AgF+ir^{0FGcVa=DtJyOK4<0^r44d#PbxPu1{!2Tn|P6v>=j)qJLUh z^p0O>6H4%RKv*S|;+Jzu~%Y@ZJIevpehfsmv^0?u7V^0Mzt24{uP@W2( zJbF+=LSqjd&S4Y%edi#HQuGfY1gjl-Zg}M4U|2XE2FDOWgifIm#YCOUH58&ci23+} z5vmfK87eMeS^G1fAX+M8gh^h@E7)JsY*JJZ?oo^EV1FqT2qsWNFHt&nu*e@~xR2S+ zgVS}aZL?!x>O2t$2GXuxc}|Ws6+zzi7DaD7a)#If9@a8*VvTGaEO9 zb$K&u*RwF7`i8jWmZLUPT|>VWnd&Br1*+hbpAP^pmC@ghzqM+rYUizE1uF>Kt*KPb$G z7TumV>hUDO6hnLcCQXc*L2s1wxSuBc2Ev0Lcaz5!i*{DM1wI&i3};>WJ?2<~uPQOZ z0Z31A7Zl??YS9v(bqogj73wM+me&QA>9?sE-anw8vS=vRJB@(Pt1cOSsL%MSLY=Q- zrUf-9=xni09mTM~mg-{W=T3Go)V{Z>p$o1KQczzP_kD_V9g?ZjQK0CPvUU2$Xjmk$ zVjdTLSqr#XU$(HXzdsy_0KZ{Y9vQ>}pJ_nI+6lP-;%# z+MsjsC)Dg;Qt?+*5Mym~i9e$A8&uGAi?>l^ZTu;5fv)(dNK>(kirpw=iyYIMS#SRz zTt&c<0F(E?sR0lpD>r_dqLB#M2_&h7OO?Jb!llHpO2T4I3c=Qp_s;xk4ZxVl9kd2% zrqEDYKVPyp5t-hw(7rp>zB{>UPqJw**6AFc1xM|?qc*WN!K~3D0aTL7jx#YQ18p>4GE>w1;Dm&n@oT*(kU;F5E`0E4L z2WAV>oe$%$oAsHR5d8~QUvpk@CYsa1w)tS!^pndk)VI9R_F7wV_4aiAj`{jMbH$nR z%6CiL1%-Fb1y0vum7xU^{;S?g-YHkg(QsSxEIOKI)^J8xV1W5+8dW<{{Cq0n#2rRO zPGU^{ucsokH0S~5twc@qF+PuqfHHuJ2-d3##rQ|WWm|WQ$>JQFW6iqQ{4gf!F)RTM zDws^z22;&X@xv$6L6b>_4UUL^LpT1GD!IhB=sXt@#lNF#Di+?NKbK+Qf1|4eD&qf3 z#XD5|Jr(~2#d7R(bMn9gP*q7ui+GwE4-(E@M?)Ovs^?3#Cz_@$3$5Eyt=p0<4;!p zRv?f2l6&G2yiX5#aw1)c05N2t;r=K}r5eA=B2+DSJnztBn;Ksh@`fZ-%{vsvy26?e z#7q(kz#0d18MB}bB1b%kV7{s#1Aw0VL@eF0wv~Yt`~+;FFBeWN98MXBPyy4Xfk_5` zj2{r2V9+TAk}%NAI*3tG!&ogHIWHpalz|`#zKj8I@*n8s%b1L=$xp=EbXkC2)u#v~ zNg9H|1W{WM$QJyaDSv0Ot0(P00jF9lzf?|EUap<2O_ZgJ+OPzZL+yP;b`hPDDX;xv zRbt&ja8)X}YI@Vmx^!?8r|<7Ntpsa!z#3Lnf|3(N4om@_I80GTJ_<}}po6>sOH|K? zvx-W9CZz*m5%^j`>VPTCY~5(ik)3ER%b4 z>V#>WyU-fO89dZG>0R(OqP(BC% z7y>?vlV2mvO8O{+A#Q|zYkZkpytGe9PwL67GVAx>7!7)Tkmlv_PG!QzprwVgnp1jLf0J#00eN>t-%bDYFK> z>i00#{t-TBK!(Kkp>BHpOD6Q?PFu&TXwCzl_?M$~l zbk~ZDu*JX!p}_8QEn=0>s69UsV zIDcW{j-5l!Dzq-%MS;Wy;izPK6MVeuaxiA*7Z2t_Km$VeLrIa#Ly^5T1RhbA$9 z#Wfq~i$?qU&me*w>wqu|agj%#(dbnrOL*jUvDn?|mg?H|R&{S8I(OuckKH(SQ~1MYlTSUH-uLVmh571USY=yMkfR($?03OYmvYo4 zT5mbpGxg1HbiLM#p*7!@c<51lL|F_z4 zE^lXrFL!~~+y|rNQCZ<>_lJA>ml_L?N_FJ15DIc^A|z54c8hl~o2oXW+8mg*BHzHE z$dC;Aue)>~{0X)AmsI=}6?xCtHNq#;g>pq6MjXIW8F8Ss4)LpKBvEkwa~H zs;oT|sGd5N3N&RZ8?WuSvSYe>)|9S%;5MiQx$3pO(eYXbR$@%)`Uld1?JHfDoHY)9 z{juwhCAU3tb9Z|66HDxv}z zP9lmj`cu4s+cQX*6cuae{ppK{EPn>kNrj7072J-LPA-I&Yp|Ns&WK6{I(6mH73+V7 z+P;mpmyv-J>EfpqGH|D~Hes0@0oo^yrz*QL!Ma3qD%h5(ZM`;rWqkUX+1=^d2X9Mx z5eD8EdTnUtVO(o|FdckorOROaL@u1)DfmV~a%Xq)i6_%*4yWsnWNKQNGuNZDjepSf z-LCYuL-6y`y?DF4nz{9kzA^sV_{>wYClP^^F5jJFmsIHhzLXuLLMbMc)woJ)W8g;e zeW@D#s45T(^r1P(J5+f>Pq$O=Lgq!fG9|nVF83_F%g)2Q#vNGQcObW$slovJz4BtsHkdK>PtNFgE~p3pV#Z zqzmM!A-1AWGchT$Fg(tMb0cujieyXWtSZdPqFfi}>G3)gusAZd`7&JyQIY#T@mJ~E zD2l8@j?xj|#-F)S(bAeBwaC3(mN8WBiEs^{7$K()sBmyaD6Lp1X-k!~O*f`Xx?n(T z-?-5JK&t(L**)p@hZfo&PPIQg7fH7tddvS*!v4dHpWUDIKNWYNzzV!4FtK~$WAW|t zl^f^X8=0)VKILBz1UfJ$r2U7I9`ayUYptDkuVuB!NHl$6=J~mjw10onv;Qt~HI-bt zckIPBJqSp|y8sKzfdI7A!>;A0ZC6a(x7bW0E>cY%K8 zHvq2T55Qc2>d^+zb5X6dVP|(J-;W-KDCS04{zGfj#?ycS!YV!7fbmj`RTS7JxVQ%m z;*eXUgft3pc8FmL5feg4-HEJE5})~!Dz08{@TOgqcUxClpMnyj!i>=Y;HXKLP_o;~dyLc)>Q;hK2 zvg*rYlVhLzRNQkrSUGjzO9k-)xSB~w(BsGBcpNIXAdeC>sq zQ}Zp`?~po5DLB&pwxp+xkRv#?30XE0Lo=V8Z{9L{Y_9LSCvF~?-*JTD3Cp|D_*<)H zkfcCLiRLMv+BNU4X1CX+{Oe}6&joL8Nc*2idY-t0ypzY0wL6l5ok{P`q+=%+Bhhi? zP|Pkb;?hS^KV{$sJ1|P@08|SBOuDF`+E&3PN-T&99*1#p%tTIIQY^3$B-s?Jd=$_? zX{$8;A+%tL92A}6=g~({<}`k?HXcwQy74u*#wh0B(+$hX5n(h($Pf|SW{5TFPjD{u z{TWT_4m5*F-Tm+%cire(*nKRu`&fE+&s*LTvn3Oj1z$(X*D)Rbk#9rNdm?T@fmDhU zd*h=?NAMT#nGL?Djm${+<=_Lknm%DFqe}D{EAeR!Ez2R?&b1^Lf_ zClY4k7&i$B&SoL3_E_~Ewn8213k`=a=1sw9Cs!+(I;Y5fFfXdYpfo}M70q&QGtvs> z+4(A6r76mtoGO}}X8p;@rv&pfJqOVQrU%LVd`&4|)9s?b#8a0~OrA)TPFJQ&Hl~X< z5fbjdcIe8X>4F(CU9~M8d;o^iqUI%onPQ-Tnm+%;zIZI@DCZbqL6h>K2vh=IhG{~q zHlzuj)k?0T`PFrKRMvdRB={oYnaLq+%)&F3azd=GF64-2MuD?~AQnt{5+uI>fku}# zC@u}mHPR*}N@_rl9n@pN%@|AN3QPjnCwQnH+$vNiBaH-@j8Q0H&#yvHJnXDVX}OU7 zU5%DCBO#yan=e(cH5x{}Y(#t#ODc-3kyw(#e7LKzMD6q7kdXr7NZl&K5smm;#IhV5 z#Hk>*#K^@vMBuU(lC|C;7M->2-S_CeBm2k~bTAwZV=bS6_DHN+uv^xVif*(;-ldZjQaZZu`s#C(|u@Sqpb!E__B8r5T6 z`j{c{rR5AUi-#ir1x*oKs!bRtUx<4o8|XryB^79y_M`(FVF}IgEcHx(YPKiYa3C2x znDie^dJg_n;bV*^L7odGo|);JZ`w@GsFsD`rc`j#%xF5eE9oaQE3q!li4v{TYhObq zk7qo-S0BIh_=2Z4<*7xg9^n>JKl-aAwze}FcrfXGFzI-ZBe7Nj zBQR#P_C=hK1PEDfGX@|2@dSF3H?c{Q$|zGBhAsaf?LnrJKnT!D2KFoERm-U@!2cc= z3Ffuj=8UbD0G!}Z?%M{Roq$KgVa&+dMFQ8z;)V<=zShzofMsP;t4iKQN2m*!p z3{lh_$z8GF!#QnhVAlkl1JyZbMn_a(28s%Af!|`GeM)!a^D`ueY~x@8uuu*~`k6*Aw2PlBk@U9BkOGtOXoQ*o7Ji#&Rwy%OJwg({;Sr4<~i$r!B|IWN#z zsM3yu)J^QKfN`K>%&famh1S?YmDH-9(OiueWZ&TB?P*I{azx);)S5sBbTVh=9jeTz zkrHV!%M%Oj5`<76cN&f&4|g6CK~iB3Hy6?3z`wyKaizr1H0}W*G!Q`+Z=TL9X6cMR zAH8lAnXe9M`s!L3mp7fNBqQFU`-@cErNT^2Q(i`9?q)1O)<+bZH}A=cOJU+VJifw` zt3J~HPdpGIbsv7O#U&G`FF!Z=T%v9I$#m(Obn)7_{cUflgg{%8!L3RE)}&`E(y2~0 zUTeS7ovk$tu5c% zd=R&2K>RiVHVHQ}{m+ABlKAp-&#osJUO|CgnZbCWfJDx(49HlE{rdRz@!6;5PNusK zzEyoX5xsfj&yW4!Sn}gNfAVay_f-0^Q?w*|T3(XPS%19`kl772XOQm^Z=NaNwas$F zWkH!^KvKkq|A0y)KIn-D^6U|8NT#A%jVh+N6bL*WDm^s|gf7_yA6 z5il$odyWS-7us}#@p%M{axt6zB}3H9)FKmu8NDR|BOjsy1X22qD6+`Sh`HRENErds zFd8PYSFy&Z29?+Qr(3_ujVeqkZA|$a6Gzkj_N1p>6+z!LPapUi7em!r$EN#UJHd0E zN+QLUWMFI3yY>EQ)iUS|We$fMlTrMb>Q@D4B}wzvB=Fn?5Yqt;C`}j$I>ybK7R*s1 zg6xpxQ!%8jFRLI0a3VWlv35fZo=skEmpe0#Ar%|IN@oomDVr5F6UlyTV;0F$;Magt zh);QE%)?K@0x?ywgHuXchK{L=}RGM7&qFTM)@f1c{R)$>e8d zENsJGrisfejH1Cf(~x$C(zOM4YZw{96zxtx`n+%iV8g^TS`Xt&dmWuIOOzC9#e_q~ z*BCdWK_qR4Yu#J#14J5PwuXS{(|8g<@hBNC3S-~OE)-`Fu%cC$Xc{;{{}o}1o~{MgZjj~!2a?0B;0+4RSHll^DYA3OV% z#`(JE;s+;A&wFYp<<6iHGzvDW;|WVL(3JExB^^!VT~t3Pc^4Iek=bsvf|1$8KSift z)}SRF7HWl{%nfw@bt;(Mj-4@=t#8vcR`Z)ULro?Iv-^Kdx7d9)TzNGuT&`s`)#okT zrlDNDSE6P#pllCP)&jUzsy(8oV|6@(0m8VoNf77}`56e-tjDZv)+6ODSqo*UfbT7v z(%;VcB=OJqcfCwk*%OTMe2f140TnY;urHthR#-EYb2Ru>x~VouH84g@BmUsFu#TtJ zWC}EUhINugTeo!^|A-auJ2c_B>X$GQ>7X{1-eZh)uHu_=$4+G5RJmfi0q;BOND#JBEiiVkYTbRM3NwMilR}(BPk0Y}RJDgY(ef(2^V(96~5kRZXU$ zYz4;hii2dRGEb(gZI{H2R_I&;AD zcF{>^Nd8i_$nNr|aBXRg2MH#sOx8WdC5OZ6qT~`AmI8&=vZavETDI6&V(ldyrWj>* z%7c{IDUVWSr#wuVo$@$kc1nTDKP#fsrOp!TgG-%eYx$DPX02HAm0BM{9yyowfu*3= zx^t=ChQjAT&Wb9hwc_Wc7#L>sV8hmrp;HbsKQk_Ez&I`oZm!7*Zm#`M!ObBlxVesP zNa#SgGHpocM3^#dNa#`nlhre!k%c(B5uQvT&K`s(Q;2f`!jmb)dF9aNR@Of+3vF&= z8yNZ!vP>b)MF?3g#&0n~mMO&9kC0{BpfDhGvJmGIp^Jq$mkMiGh;x~+mW4P6g>@{% zxm;M!LYymv4MHWJtHf^=eyfCyLN$J?;}rT_1E;%9*}^B+J*f%KiFM@$J&&A1GGH+r zLM@yO4q~4$1fVMGp^+70oOfep{&!=XN0o4dd9)_Q-(&St?gTb6q|_~Jo5hGol4_n` zJB2sL%5uL?i801{6PL4C}mx2rY(3oQr?cV7pbSaIG&Nk2_eUf&1MVK0nd*T^r0RbhHYpW z3MMVU_^| ztWdK+H75y$SKaaq()VLF3iE~($tVP#@Z;yiOviDxrF>(>PyeJ}BS-i~# zPK$l#&Vccrk04QjoHf%jfDJOUMf_f5bhux>VWU5=`ru-*%`CkEQ}D8dGBx2Y7EIyG zI^~NIz?K2LlS@U2-cK1_cerQ&bs?%z^DF=XN~DvqOo48?>d(pJP9R1m<5kJEkC z?f4-5xr=TP@swBFn25G8zTa)A@iNfYV0HG@IKzm zBQYE1OV+*K|7O$ITE0362ddm4=c8%g$Dtf@H{;Jej&(RS#<{BQ+QBOa6KB&^Yv-$W zef#kDN`9~GTTibX@!Z<+b?0?wa@~P+>%sZf!#6*kX?#}hBi>`qs2ez(j+ zp>~A`wW~p*kGN&QQJ!)@A5J-%J}|J8#0y5>NWlVq`v@AYj3uO{{<1-M3?PWcYM?^$ z5n-_K_qdhgg^(?z06Wd3SLaQ7?(IIB@wI4#1>i0C>QcTs?v0QbO8Ykaa|j-WCihUC z9F~XRiN8Qy@g5cbABsFtgoW5`&6^Gux~5LoeWL45W2(gePHq1W6!1m4O6Lqf&d?c| zVE8_TpWq(JWh=BDYyNP32+OcR(vcbxx6rmN)wXS+?V(iLL+Q5N9~=}1n#E!y85&Uv zqF)k?V4Z%E_6E{3FS78tFT+5o?tIEtI>C}{B(>->VzYA ze)*Uc1|^$hP;$WpMQKIkU^wTP4JIQG3x)$Lhx4$ZQo9GR!tP-oXADvaZL`6EWbKZ* zRc;r8@Wlu}!*gOGjG4fCpwfgBbX8rudkJ4E^9*_l`C=r|n0VzHx`J)LWzm_jrK}lM zmNSnkB;MCj5kek{{zxWJvk+)Y1=^~o#8n+Ag@~yOjmyMU zMbuUIqLAB`8PJ$T-GkyReN*0{n%P%)r=B>ggDHXn--8?s}PVu zTZF>x_!uTClRBDrj2YQo3ryO$i=YF;Sz7+eJFr)WQSiR$lS2%UN+R18t^1$2Pr8{_*F9D&)52p(Q_gSw_CY^(D4E~>JdikGMv z8@5eHjur6{G$4jiz*NfIaw!uIdlT>FF#SmqnAk&SL~@tsPt1BHFr zQOu2K3qrWhfJ76T%tD9Lz9afVcU@)$p}T&~ZD~8U)^yeSOkMLE<*$`bKbWq2V7_kO z?1fBa=-SpRTNB6Am0d`ZGP5yL*Y-x$YgN;Otj3A&MdmjD@y;7NlTRK`@99ZDcmg|n za1lIRvvIy=_iQ$q(wQ5bM~v9bgj_v3Ql5_Kq1h8Rg`}tBN1ns9o^Zs7t?e}F zAsV=A6oyba{*fg37fqBLLv?ojpi~-i8Vv?S`Vu??r{xm7heUJ*S}OCPMSJrqfl4R7 zlu7TJ^cjcJzBPc8vZ{%3+P8A&n~h)ZxZW{qO|Md&=e1)*7A7X1P2uFDFgg$=bqf^e zIoQS?wh>(s4K66MwLv0v zvSssZak_dtf$z~};2UP(dxXIEsPWEycW)+D;-gK6%z4P050e3dobF^8)sS+?$tFxl zEqD*5ebAW@S)M>GNW3hk38lWZ!=z>--bE7cD|N_je_z8K(mAjtRCNjeEael(J5*T& zJj>k83GLsa5(HZnm6*)Sv>s+a*@gSuVxl3`*#bH74-3rUg!(FT>X%dmbz|cGGI_1WXW-ee2R6%T>y(&#(sDp+w{Op6>W7zEyaTb z3YbGu!0;v50Lj7PHXTsCur1%X6IYy2z04JCrT#Q`S|Vp7Hy;9RiGE>cW^C_31Of*r zyaJS$kWJw;)ezi3(WLlG+p_pe2d(^f$Glw^)+76(tXoBRKqi2|fJn^sOw|&LG1`zM z{ty?Ln^Cq(_5jj6sDx!{qA-=HF_~UP)LLv=qz+BWI|;o^&4;M?0Tr)M@k1(pl?t|w zJ4$D4&6TZ8(y$P1h66bF5N8R zlBW1Zm3?=Wh%Mqli0bnZ zxW+HD^D5vYM>H|8Ab82KC5oRyqsz5@4h0$;qY93HLZGo9?ZB4=KJXN5!RCp={kV%7 zWx3(5@rbBkZti=(`3S<&{n(@KLCGs^eKLTMv<*qe1}@E^7s{c~E@B2Bg*9qaa^q?8 z9Qx2}Px1_d{T-CppBb^XiaMy9Sw<5$r7GrNuZqH^d=VndRmn((3@D8}lafi-p3Em#)(IMBnLwVG6Ycg@EFC zelW@bZ;wI(Wpxs}YN*C5Q`RoB+7aw;HWI}Kk5Z@zcc*h{lr`0`xeG(3YG*vT8S_=A zbrac5{i{^zWxxcsP*ropGNzxk_7B3XW?8iCOQ^kUO=KDV+#-;E4Xp#DwMkhrp2wHW zCci6g2O;y9AT2|E^FsalRQ>ve`mL$@t;o0Xp26&_PkCzMho(y8#4AndKx=&8?dG=W zrmweMZ<{$ZSDaq8H{HB1zAssf$_*Kh|JCkG-BTNXQJKW zOHaRRHB(j!*HT=gLkWu$BoCk$Ee$@z!ILs#E*QzusB1D^p|li9rt z(%L{xARnn5d513BS5mE#M61Wb1^bvu-FponjYJI0wbL03?$HK;s$k9IKeH|sTGf?A zk^FID>vf@C0rp6*`ltfv4E#7C+eBuXWflgen%uyraw7x-T1iQ%$E&zpO5iIm7lb$m z{uS!Ssyq&SA6ov`1gIlu2Y^Z;rOhcHwh0SN9JzdK^4Qd=#9+E)eY$7^g_Q2ScHqhZ zM8WM&S8YxQw@^rFiyBgjgqNDsL;dMuLCHBXRc(ALQ3xS8ck|dz;3AJ&#w_2H)G8+hPNfPiVPVTbErzZz65nv2G zuyKRW3uB{BlKJK>VH7 z{-9njm)+x+KT-~lWl9jfT$_?yha# zUmc${&bMrxJu~;o?>=|4XMX2XOd?`$ZJk~FjrQ4S+P^pH*((V@+mmt-(QIyr_Q|3DHASf*b${!1fEBuNOHcj%#mMuhq(t~-jj!R!lq=wQ{Q z>&sUikPwoP9AcD?DyQz04O=|9AQi};q*}>kQBFo{!G2YF3v*_%@VG!ks`KywcdRal zhsY4dJ-W$@Khhan_d(3!;P7ee1p&jk!Yfut@4c)dHmgQL{=U=USc7se6zz*V7y9J+ z@Ofl9iJWIC`>~JJAc6&dicikk={|4=!u@B7IGm*7b5xLg^bS)7$UtZP+nU8JNiw&MeDWnE2 zZ1RXKL-1GdzF2EMg6G-D6~aU45cwv02mDrIw1LbI!II){nb^RSk|1@@bSsLu<4$EQ znY$M%H>D~!Wjqf-gsXBP-vwkiB(OU;d2k`nl?rqv*Bna+j+0LdvT6pg^9YjdT^^qt zhnc*rYrgDQa{aMPWkX``m7Tda9{l$DZ=e6x#cyqY*KVnRgl2(+<}$Rb!5gOFbJ75= znG7^0y^Tpn<8t&S9?H=BS&YGD=!wW^0R&a~jVgBK1M;{9I*nDeP9ru)5wK)IEJ6l5 zL^43PFkQvQskCPmvH_80L=P#YT$?hgpf3mll_M|o+NS7Xr@2A_oiH(2&oo@$PqFbgE&u_NS`% z-}I!bkH)+4P*r`VtR@p|%+#z#XDMa0)hVTn)^cUyzYOt-l4*U3bwljhwP-G*4~$e# zV`8ecJJ_Ekc_Hrr`Y=IRTGv*%-(ip&wmZB+HuTII*fAba9|OpAbR4ynUE?jW`h5~I z$pE84AC?XR%!x^=5%ec!dSM)3)`nB@0RpF&=nR%tNUp>lB3bVvhad0W8>Us*eWExb z!WV`XV6%mC^UQ8A%H5qLed#KJ-!j(}8T^z7Rit@+Uo4etBwND%5O+ zEKRozCWj59<~B>LPM$7_%-nCb81cJq$)7G_|H`z;zD(1yOpTSNWjUGKOS7`+ zKC^OjIQh)8sqWrn_j9-Xr4ye@^kH&%+OdqcXgOE?KQO}L2aj-*Ji;>F3mx2hzftZM z-@ri01L^>EXS*BaK={9=p$V0h|DLg_W*`^fG9J`hNc-3Le?k%3g)_hBiO47%ecRS<${ z(}g~9W>Bl zG#qD7DHPBgYn3NlWx$y7$o=LVi$OXxQ2Z(enPn#A9^D_ z{wziz&vZnYFk@?1;>2nPRqoGZ{U-=u;%JT&#IvtA-Ez0Sd3dJcYrS*E`Ho$4XOa5o z`$LN+YeC8F^2UYoRjKk-)7xhHuJ1^fZ=JB+_9GMZ>%uL6duH91w<@+Uwf7@T9lFkx1nFERA&PA)SpyXbp0iF5H)w|Xh zZWI;oYPZ~IFwuFtmC7~#-FEZ$ZAO$i6jhO;{{at^^+|8lMpb3O2$36gg<7ioXi{Zw z03Xp5c4&Z4--4vH@H`}W(S}8EmT&VkHqeF4s>7hgT6KY31HQU-Wz>*=sZ|1KQ1}f{ zHA&Xf25c$dC^N&*h!{cuL`d2dp%cDJiv6B#@*$EtB#VK-@;ftRGJqx8FMfhuBrp|Uqg&*>nzav(^wTmy9)KnYKsV$A=!X$cPY{frN7E1&y_E~z+7$Ll46Vg3 zvGd*|GlL789!_m~cy27c=}6pi8(dwuy5|-ap(<(?DmJAmHqAuS72A{E?eU8!GC`yj zyW*M{OZpn_tlPA(ZqNL>J)gIH{>W70LPdM3qCH)*I_>L7dOMSjLpO_2Eaxr?w7rZz zeHfDrd}Lhmj=@NlWcjk0+x!3xT9S*_=+IkBSKTaOyur$}z??8(2IppPN33Lj7?~$k zy#{+(AKcq1Dn`J2&b_h`kY$>?xAub!7;=386<6dnwr;a{o~HW-ddo}%G7vCD;WFKt zLXigmpTQ-he94VR(YGxMG~*}Hx}@T>gcn%sT@4lA?N7QZ`KH3hr!P#OnK_$m*p&?K zPO_9*dCLBS8fCu)k!~MGB_~A3`;AMS^I?+E@%%$;ar+3IPGai$x7;naODir9O%5gYr%PAI_sa`_2NM^Q zm0dsbths}Y9$J#6tK$3bG zdQub-Idj!j| z#4vG`DycM&Muab3NIL4PxW63CR$3oJB35rPYa6Sy2Dd03^2i`>WfxU-adRB2z-)5t zjLdN&y@h*<($Y`Po7F~@H={p!S7)hxo8~IbYb-z+mQUp70eOZu&OYGJ=-X_XR2d8| zW|SWM4xR0$rY&MUe)98iXtQjv)iBn1wUHmv^Mok;ovgV%yt%wv(kTBXy+nOLx=I-$ zHCJiW(4pVTy-G8Nt8eCBr4HdL6Ytm+I+%AS zTS|Y1T4y3BdHM2H2_|GOX5pZ(4A^>lT>#dD@OOO4^P! zJZ(o~>;2Mp1ncwCb}VhtCzgz zb>mX8*IKny??4e?**d(|rltB;WD7ZBv|EdpiY+KS7QEWy#967``lxZK*o8_+?(x9S z>)qBS#-`Y45{*?3iPIRDCU9w4Hje@|n}<+HxjcHTNXEqfvtG3bUcvUVt;Z(#kio;! zV;72~3?9W&1`odwKn@RUk4x|iCCJ!e?QyF)JaW&3QuZznGIMz1lqsVCYE@Y_@bG{b z?uWT+P!mWU+nAd#1IZaeO1;of-?;7Kk? zlU|rBg~;lwzU<87qvS*&CTym3?6E-4dpV)off-PzL#ODO$B*4HYHgA%^8p zTFpD;vBDOm;`jLweFr~$%>F%H;M*K#&F95I@ek?Nf1u($v?EfN^A~lp1*i!^9!&>)24UQLYQoHKttWeJ}l~=W45(cRq@L1Ef{sX(cTk#YMbS3M-{keyQrTM6xU#muSU> zwpW2iLIq@CXiV^X+m7mEh~`tGgdu;AW7F_4q>O{@(IG_#OKSTVtdOucy5x7L?&qgW zur!*<()db;Y-g0LloTxtr}L1mdi7Y}Iay6-mYX9YQ+@u3=9#7#mYf@S8`qx*kGNqc zO4r({XhtE05ocY|K9M|r04{`ILjzHs?28)8y4i(Zwtq#|!mux2;>{LI+ba|LNvr>{ z7F{4(UhyjgNn~6?5QWzDGTHe=y7*(zwUCj*HR-xsI9Z51hL22dPy4p8Se&H0iW@0w z2x-qwxHEp*vBckiAi2cq!~@Byu8DnEy{V~>+b5h9c(&<*w<@zqr$!0gfL?7 zt_RIGb{bJC1OQbGv}Vo-00k`llE6&)4i#mL8s!H8F7D_kV5$iLM8prL7zLPBD`UPd z!3r&DTsk>Hf+;}%oyd+T+eM|C2$9$AogJaQlGRJ9i5-5fDlVTCJ`1+%>W($&g8E^` zQXi9d1b1*2dk|?|BGJC+`N$rM;`}_uDC?FMnvRT~3&)(aQr|_i8rQ|JFc6I#B};k) zH9;T=Q?gFJyZF&39~J+ca3o1Zii&v@sC5ICIS!GjA>SoN%F)4)j)-e6S1N+H+^Ift znwFK)ShGG(DnFhwY%qv-Yu|G(54eXn*q(9<>-=f5O^^l|9gzmFA3y`6C*P@qocZ9P7jpEawFRG zw?4LLqTWEa_1aO5s{lyk14N=&+L+n?Wiu=e`cEj)>CzKU_Jj*h*!82v5tj~E9No9O z$LumftWZOpwo%GGql*b7G2iwxYj;66=`OnbuGQM{>U3>(5uewg= zTS&ijXz~!-7n&jn1K6<{JnPjLF1;|d2||{)Ic~u|BbRqf?nqR?WIpeIe5P}*V$O4O zcXG|+AQ#>s3!eUX+S@qqeH2E-*=@6{=fcU>NAV!f279Nfkuo<)yUUi(>`hm0&6L-( zT^=vuo#pEx$Prz@Zs_pCsR}Jru1!_0omrKxL@NAX#t~d_)FO#_!udwQYX#G1u{CUH z$1TUsOl5oA67L}{TTz4Wv0d^qcDq~*v|uP0Q6b5M5fw~Y{y`j2MxDb_)>znVQC|KnuKpuOj2;@Z zOECZstJ^n?J2XxN4qZ~Ikw;_#HSPq7)02W!v#|>VUEkl)Ii}>zBV#g*%iO2`pQ3*X z3mJE_{^|KwI>+2Hd{h5aMd6ZXS)8gNQAnO*w~xmk&%Hn0hK5Xo~NXwuK)NGm~egM$@HjSmxooPpwM@*UfChPCr5HKT11>{?r>-VZTs@mf&l1_*On0 zH|UIDMPnAXiGM(Y%M7u^BSkXLYVLcOe9P}M1HiZGKC=R_GiC~yp=Yx~8VvhP2Tk}1n+8ljGuU7&MSA11r7D(g*>5eoyUJ)? zf7fP&DcKfOO||Tln`+r9H`TIJZmMOc+*Hd>$yEFEB07E7KqZ43*3GsOB+KnRbv}At z3?nKNot73%8LpE1rAV94Yn?zMoqZ?!MdH6K2`$e*Yd_Z)Ju^6PQY1>7wIM$cy!rWx z!Z6h&nTSK>_vkE11+$&A0i(zR-Y4Rr_Z-`YMuhW&;ho~&VkGg;5IKVa3bxVsV?+I4 z8QgzisQ3$m`^Scc|7@uLu_5?lLn;3K+;-M%EPWToA7kmyd+cS#_3s)`+}&`}Vr))U ncHA}4$=yeshpon@WJ$|i1D)I*be%LBdyS~@GXpzgJ^B9u;I~~j delta 16212 zcmbt*3w%?@neRDzzii2tELoD@mLIZ>jSa@eV8CDqPZPjA12M)&fQ?@{k|B1EqNK}B zorWYLv$xnyQcTJw;)cC-)=A@T+6LORB}pH8kXWefz4UI|-L$){6SA8&zy9vt@0+6+ zkEFS~O8Cy4nQy-Dn{VdKeDk02ixbitKaga;n~|Ym;CJP%IP@CN!dP`OU`9*(vz~6u?GD1DY#5d1|>>P&lx!RN%=k%m&GX{N6lq( zN_cA23};j`iT^kgrv@(VZMiv|26AdFv38Y= zQ((@^ED-nsuafeX0- zoQhVqBD1B5Q=tG+L3%w2VX42h!$v z+5+j^c-jhSTRd$SB+oKj-t!VFHJ|3fD8)|m6(pq^1>>VyPfoH6lKOTqqlM6%qPVnT zNSDOZrI2>S(@Zbe?kwy0ROaAahcB=(*c-I3Xy}XMf z#b&!1_<6p2G$!x$xCY!Yh1>U^x_yE5>^1JNf zO-?Pp5S6QrxCUK(x5pKe^$ZP<##EfkHH<0V3B|k&c@!hV9B}Xke3A9{y21Qou2FYP z3i6#M@=xk~ofg~Eb-cTO#N{TZ^+sPz!MohpQq^I;`^bQ6&=XT*le*9$d@BlHfn*1g zRX}1|Y-5+Z=csF-JErO8xL!~1&|r6eOp8rR6v*%;rae5w4|IFFj)DdJ2B_^e0VzuT z49O&qy~%fwrJ91Q--E*+VWQfs@fA^{1)ii&Q^Q8dW4a9oqPC(b^JMM#rfYiJb&LIs z_q2EFzUi!U_eU&?gNDUXZMOe#NSlXjmdU!07->eCUlGkKI=f|ZOQ3w(GqEL-x7@Fe z8uLQN%BbBDHaX^%z%;L7bVhPMv&vUiF}^OXdJR;!7oKgMYz>&Fxrx?@eVJc%-Izad z^rG@Tk8h>>KhVg1r*LFL)O$-%%55LM9V*G3CU&JIit1UAj~ObkQ{R{Irx_cPjg zIdab{7+Vo44mw&Q)@9=xuj#GN^#yFN?!H>kdbuxJ5EGpI+>5jToE3Hf#Pwp1&ces}HmX zmcS#~=y=sV5gYVdRBxL&7S=nXmIh274(lCJOKB?E){1FYSYH~o)I`lT|E`H8{*^&p z35rvPwZ&0$-kHABeN)zmxqAGbsNNjZmt42lCYp#4G?Ye-CbGa}(F3z0tS!Ltqt6;w zle&v`@_YL~vx75pc^g;_FA?N}EGO}*Nc~PTEE0cYd&ts4d+A0Ph9CR~{G6JYynpC~ zi|1W9GLIm^NuOUmbD~hevTpKlkz0AKa^d9q=^paOqIcNCL^LSXxwS4%kNi_WoC?tee$03p$vz~j zkURs#tpFl6RGgfiBJ)+{?CZo*{bTDHlmG+v6V=B*2B}XqP_IZR>VUO~KSM4q z$ZMg3X)7j?HKka=(*~lgVk>DXWq&4-)NU-G4yI$=i;Nia@&pNb>H_&%t&M$wyjEM; z+|J6gZ*G=IrFN0HP5WBdy-WI zP~-*j-ogg<3Q^TB&uxK>GmOCSa)II4`0~thEcx($Gnuz($iwwY)r-Ices+;3>o->Y z1YX?5KptU!sWvInc@}rdRaBsiM%^xS#i<%{2hPW0D_^92~#Z;aa`0nUY+P4>U zckRnppaM@j233fuzrz%c4Y9MHgA~~HJZ5|y$yp>*NX!R-#&r0V_Vx_8JV%E(9`_tP zx`BTIi3JH=LeFCg$q(V5y9>xk=9+BdO_@Y)g$_|vN^_A!fwT9gRiwKu zhe(&173czoz2BBBArh8<HJlO z$zb>(CPPTVL7CpqP!FRqfGwVBy4`E}b4 z)hpFfCcpYr9v>6Qyvmc^Euf!`A zq~w*>9P-FQjh0Rvu)M`t8!C&bw}y&7;!- zbvW%~rD$;#TKo&5UY}oqE{5?zM;i4M^dJ*c9~&Gx(eL75A8`$N50WkGJK5zkFRag! z`mSJ3azIg6h~tyPg4;%aa(vP6;DY9^6+gd@GHqvw3X8g?MlO6WZ_u*|~Wy zyKd&2n|q~e{&SY{zePd6Ljse+efRETcad-1`|p6QFpQL8*qfbdQv8II+}Ji)*91%^ zj{-{Q@9`Xgc`^%j!>k|?q=HP4UzEKp7t3HH%3?T%JN#C!+5-Wd_!Csg{ip`^mU6my z-2*|Jf~HmrDo$brR!&LZTLiN5#T>#PLP;9K)7l=`LH9@wKtDog2N7R^sa{Be1*ava za%7BG=hXs&qB|^ZheZ`DL51XtiuC;jk!mVvf70sRNvk!qWUC+rL<7Ja06j7UrVp2x z+kPHgd+1Nd=)(r5|H!JPGE#6tYfkv;R&G+i)VLyp%S;5h39gI!L=JU;mO21EyPskF zEPpd$r58XG@T#QGO1ibi)?2WVM>K%JWpUX;CLjvN=cT>uEp7y4B;Hd-a52+oM+sZ? z2@B%9qJRDoctsTBe}QHN$JiKH0nJCmZn&E6qdVKT)^cb2yavJGR={eR#aZKa8Ni0T zbk~XL<17z@v#$`XvlyTyw&x`m?FlPc2%Y+hu}p7B#dgJnWu! z=IQMC*n?LuAT;1T;I7Y4NDvIi-~)mAG*`%mfoPY(I0ZhazZ~i(M+Oy&(70knUciR4z7mC@0YmUQ-&E$y1xCk@%wcqO>{8l&Hm(y(-{-K4)_*GR=bg< zUdlA2dntV$)v93w;RPV{i}(4?FWBd&mLq!2RdP=81O5Eff`qo4d>C-Rm!drZ^tM!X zGVhj@v#Nu4fR8_b1&VP>W76*7-k9WgJ&%jx1-ZDFD1tQq3X)MI7$C%?_pI&wK!&f| zkvxidKH0rncXY-SVvsAY@^o<(!(d>!AW2*M{}wC$PbB|{=`p+W>+s))ax!vAe&>dTF-+X*qPC2^6z{oCK86756?aEUHBE z_efwXs|0ZL3dJT#m_ z9b$41*yeJY#n9^kWGh0F=o*{Y1fIvt!$4vxw~Jz`ZgP27{uUZv$7F|wxKaKIcDh& zxU6Y*{iSW+-gIS2c=@*Ix{W^`x;!+uZhvUq{>Zxf-#2#6?)10Mnd(EP`suRwOwB=K z*LXV+*nLmP{7ZwnyxXMxWf3`dl|j6_bA4vpGkp_>0N^v_&Y7Gclk<8`?u6y6ebPR) zD{x=Lx-^p0ay_qbO8JuZoHnp!wjolqB9gaqUe4rH-DIRWnd55_inUD0{VhRV{ta#J zL|a6gAGMcGwM{M?-yF@&Lx^=Qw>p$t9oQVnT}pwfpuQ*yVB#KMz&5>kdc`GMuwrvC zZ%fd!C1}`kLvNoL0l+elKU*AjE* z%~BAx`wFIVgVn2oMQuTQThP=NHQJ^eFIAtb4jC6vmpDQu$FyZyJ!=Zqt_YeOS4}Gc zr=GJf4%rt^?~d48{i>+JI%jZ%3=TwXuNvy3O{*?hLQNaqFFO+OTj;V&u>0h`MUQNpZC)4D~BRY+uko53V4D$_s;D+5ZZYlc>kfu&Ys|r!N|@* zG=*F&5BFSSK2Nw3vS;RUb7D{%QO1Z`Z+^s$WS`9**ERDYG{gD zif~LX0oZx!Xt-g0xO{!oQZ~N+n%?xBZEEesrmNOP(fpFLdnfl!9gpPK`Zahm(08%y zs&V0~J7#S&U02v}nB~vYd?sKh?!uoJw>+~M~`eYvljzZ@}Oikl!@jFTkR*P?< zIjk)WsHU^0ivzlFb4R$c zPTVuqc~w^x%`ZONHQ6x=s~Y$%zRV&xlPopq`` znClE0ok5-RS9;UqCmw%zYS*-5w)0APaLv?gR1-g^wACpW7@<1=_3PUItUN= z4Dr#zlpm#;<-_o(V&LIM{O>j8cAf0IG9cft%H5tN`$28)cC%0Rw!*sIE_=I32K3zp z*6l^IpD1NOLptmBGDWCd29$pl>ce7k1|Ad}0t9U_rs+IS*FBn|GwrijfSjLZa=kqi zd#x#I8yr0VmFNmaS$~RL&oW>bUHu>#uf=O%rUfvB&efEaYK`Yo4Pq`?H&~*GCkF?e zzEshzRbw*5Z$Tnm0={~S9{x3Wb!Oci4n)pxA;DEjT-Q=-*gs(2FOmE+l7D63{EnxK zT`|1JGP=5Y`n%ok9#?;VSJwqrTw0)I%qkH-oW)v5P=@8vq{u zB#Wj$WavZ0F;ROD{7e4G*Xz`hw?u5xa0@n>h1jGO?^Y<_)&=b30Buy!TNQ{-=8z8F zUMOOe+Bin3i({0CO(I4~u}NDRHmScAn;K$#bxCda(+6T*hG&srwXQzc>`S@ChBn7+_ z<7)|$QXqiy(b0rJMjEhU#qw>KN8=eIzNVnGJD^A#%cv4KANj?DosxIhnflSk*#sU( z>r+W7A#sETsv;iuf5uiye#4T|hj#pD0J)ZQK#u?4U^ha9DETgE9Df7KQ{dYt!n!@b z1y(njM!~na;)YOh!(4H5sJJ;&-11+5-)2sDx3K$Y6Vch!?Fg@7n!2M5Q>n-hqss)) zi?w1}Zm4GjPtGAa=Kl<(05cEu_rrYvI2Y$jNr8`(%(F8meXmH_JhJ4kn%SR`p}(q1 z4Te}|iXUXS#XxW&s%=088SdUO2)zfp4vl)?U?Me479&d|5=17&b2~MB-{RR{Cw$y! zjx*z~gJu_PTxwt?d!5G5F@m1;qE2Vp2R-On$dp zP3pgVZV}bk0?ImJ>ZFpSsF{kRhQ^UxQ4(5aBl7XpZ0XGU@iDgR8s_{Q{h}0W(~7iz z=|H5PRni&ht&U~D*-JOvvFPr?vpccXRNO(7xeCcMr1&d)*lIHNmFH5x_=cwOYZfnq#Cc5{PtpDP7)6E>O5H>p8m+s z3M_@P6ta`pp9?xpz8p9Nbzcr*3r1a{1g<}A!lULs3+4}~Q?dkgf`?N)CrPAPyz`}^ z2$&*%DEB3>T}AN>Z#pOg`*ef|Zu@!ci3}kFaLt@THK$3S4+$N&^B_o=1S2kHB-p_b z;3@+kXwb*_^Qve&0I#y^1#K^z$WOPqFCXzNZ4wa!^bFp8$#;N*GUEuC5+r5{nSgaE zgiOG?)Bw8bAx96$MHZlZ*_=!Sqx>=+4)H}F_$9nI&4)<@h|59`LFpi%r3Oyt%}TY> zQ%pk7S$)~i;YLrX*cOW7I1=eZlL6l>;{fe}`+TbeIcG!%1E!OzWAc>WUWczvvnT0B zuxCGv$$T~Aaf&r$4ATe4V_M@<6K6ufBB;Zqz#{`^rmB~KYBN=xlcqYyQ%;J{8@rQ( zQApN|D>Di?qU1+2Q;kpLB|6KS#aWVCEL4j*O^aF7;!9~s!pBK1W>E{Dt|gwA)B;z` zsFq%6OD@%7Nz-DviyJ%B+-Rvx2#PCRCs^W2+fp7)IP^%oEK$dso2DxFE+|~CkPG9` z8Xt#ns^GPzIn0`n17lJTW76og3P$%9!GPaWRC}|T7kU;YWOy3M<-kG(1p%ArG?Oi42?oJ< zG4Ev_9VXe}pQYHilI(<3 zZ=R3`Ckka;IaeWM0}fvqKT#lWzFEDUb0+!ng}kJ8Wm0<;ojadP8(z0-f9e+P%TT*E zrM(b6n8{VAsmVvrS(9SmXeu>Ds67r6y;!eLPVaaJ2|SL|V2x0iK5Y)lIGRImDaG9; z6!H>I373(|yl~4T*@y+S5nsM*BWh{cC^W)>F64~#{ee`YVB#xA3pI3%FoM)mOQv#6 z4R?G5ts(%yj{|JiVBVhVgqH7%4LUCfk{M@ebeSX(?RwcAOUIJ%;{|&UJ&H(FD zXMk})H}eD>W_l2Hq`$tJL@M=kFuZ~uml^gle(5RS>xs)+aS_%pDgsvu5Ft7B$(<6M z3p>X2b;lRgr6NZ+|8HQ#SX(LN4Zwvk2uR&^?jADSrXYJyFHnfVEje|%p!!i@1$*QyUF94k4#NR0mwp&gSvk1^RfW%}7SMY1_J%M#I&eG3< zq0>=l7>RJUjv}%DVp{MejEmmGy>JmK1lh4Jc~3uHB7^IQB4*9Ykx`e7bfH(pjfDcX zIk>~29h;dfr}%U%Gtv27;=8Js6I0~pFXm()L<8thQ}z$_-~}A=(6beF{8m)@7#6O^ z)MZGyaSjs^?%OFK-nnGulV;@-7(nj53P8L|0Kd)&TC2j^s(>&%60U293!`UxPxsE5 z>qF-HsA0!Vsl<{weho!EID5aEo; zj44+nx5h8OUbP_L`eyHi-dXLX?UCB`k*W=TdC&~Z6;XrbOy=p#iIMkwhBCbLTMkHb zhUw{!$2)EmmR~Qc3|PLg|3kSnEBj-qJR@^nhcL9#zdWcbOt<%VxDGHs%QGz#4RCuF zVeYDssS06k)7jj~+^O6lEAs)CUmmd z!GjGpZZOxhI=^5p8W0;p&LK>|$x)=*s|EN2-=b z3yb}_s4WlhX=~ndJD=VCY#v;KuBr8BBD(21zxzGFpe@ByJ42SLsIxXO7IL;sWL~$I zO%1;4x@uqkmhsa4k+#m@p1t9=y>Q7azu|ga%Us>6P~ECa1;Kl_h1#}<+x7wU?2FXh zH>HEl$u0R}!h-kr2L}d2_YVc{f9QI-Gf+F56{=bmDPKNu4_u9&7&+~YmR1H#=ax;V z(43>M%CFj&d}q<@gKxB6X$vpg0#I-8zWw3t2e6ZJr+vj2?2)p@iFHv+MW8WcSr{#= z3RW)*l`WspVEw*V%dXm+zq8|#?Ts$5JKVZ0xa(lB`%rjS&%8vQzxcXy@tm^-T0h%z zsr%cl5$A>}#r4t!fy1wIS4)>gH*bHxamS_hS9@k1uk{8ScT9Bv`4C!L9<^7@*{ef7 zdv&k|Zj`SG+YerQN{j+CznmbLpk=(#{$ z$X*vMDv#!sM)RG~()xKdljmD_lac3T`&BSR9hEOFJ-2kObVaCiMbxw&J{(7O&`<=O z16cBg$qjRPO`*J|VDrI9UN->Ac(hOyG8Ek?t)VxYtdY`X;ZiQRl8aW@nnA^sHDsuU!-Qa7P0&&kG}I)HSuPp_dtQzo zwOEU%iYM*#F8-XoCSs%r}RK0`Sk5nNm|t$JBJCWt1L+Y07vwIN$=;CRHg#IL+f`j#~5_Ubt2 zC_U4`59i4F6IYFO*W>r}?}-#F_W9v>%YLTgbjQ?&z=>dK6P&8uD69-r1PdCVsJ1cT zoohuElg7XfXuc&sSXdXd)CCQ7=%y24efh;jfd^l1oox$Ow_d8eQh&KRxb431>ibew z7nS%k&;agy`&FYex_s69j<)Ic^PK^o<8Sr_9c}(CKI#4PD#|9HK$HdVejTEFNU znOMH~&mWDkOxbGIy&hJOAGRoWEtLI8A_My4mb_h!%sWjryUonIrP;f)WbZagApM>q z+qb(x_I_i|?o8=i28%RH4h3`OoZWTOxmp(JEe8y^qJPE#1H}*TI4YpIG!CFg1zKF? z;E&Nz2rcr_G8)y;6h#l=fOx21JkiKNUX2*`PzD+<(bP4tg`P;z1)2XEzNVi?q=Y6_ zCt@yIpXU0yQGp^&0WQR2CVDmf)-#d=qIBAXFLX9RaWonTRe(&MMmPx1r3W_%Z;EkE zG1{MruX-SE_?I}3fc-7};RTTI(E_sn)h;d54;lIp&m^*D&b_vmElwN+pt)usF?rWO_b~iH zSJ!a&5ddfLY`{VqUN2Bw!6>DOTy8Rx{@1%RXhn$_?YT(ob(iF)64ExiSV23ompnB4 zXu~mRA%wa+u*LLR$6?G6uX@I$BYZ#Kk3|DW;?-)uy-pI8%pCu=pDm~1)DRZ-V!iZB zm@dqvL24(dd81N6L&9FN?+s28mCRgvqg}#2O61>j?!j;#G@_K4+C4PF_qcX$-^@RV z^+%DwN5=3E0RgU5U`(T4DxVH6jCG+QM)VRj=o)$Kdz&P`k<3KC_ohTk4SgF6`^oc{ zE$je!?Q*$~8ZAp};v=XEb7^C5(T9!^+e0;b(H^y^kB4+jDgNP^o5sE|HTDev=#VLD z#oFk_m?0U*4wHS~-!J)CGBf-A)vT5F7Rw(;x#{h26XRQLteWh6t0%#8@^+p_N$9P6 z-~zSvho7*%Pm(U6n<^aG^ybT$rtWwHY|r>^_?v`BF_&&TRB77|>PhNTG=oPyY2kmy z7YhIBM@VCPbPF%4$JAvK`0;XuUJ7IM2D$IaQj&4yN^by-rD|!;H9n!%{Q*^{mqRd{h;%&1pT{5+f)3-**gjM` zjD&U;4oI=b9>df)l45K~YKk*MUSf{x!Ez)&l#rwEoU6p7Gbg6$>N-5)8R1=BT|D$u z{CMvbOywi_0jYm?<+^p4#4nDg9d;g5vqsbX`glyyF2kIhijNtZ`nW@N Oh!uJMjiD*3?f(G>xneK? diff --git a/tests/conftest.py b/tests/conftest.py index 70cef40..8b77ee4 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -218,6 +218,46 @@ class AsyncClientStub: files=None, ) + async def request( + self, + method: str, + url: str, + *, + json: dict[str, object] | None = None, + data: dict[str, object] | None = None, + files: dict[str, tuple[str, bytes, str]] | None = None, + params: dict[str, str | bool] | None = None, + ) -> StubbedResponse: + """Generic request method that delegates to specific HTTP methods.""" + # Convert params to the format expected by other methods + converted_params: dict[str, object] | None = None + if params: + converted_params = {k: v for k, v in params.items()} + + method_upper = method.upper() + if method_upper == "GET": + return await self.get(url, params=converted_params) + elif method_upper == "POST": + return await self.post(url, json=json, files=files, params=converted_params) + elif method_upper == "DELETE": + return await self.delete(url, json=json, params=converted_params) + else: + # For other methods, use the consume/record pattern directly + normalized = self._record( + method=method_upper, + url=url, + json=json or data, + params=converted_params, + files=files, + ) + return self._consume( + method=method_upper, + url=normalized, + json=json or data, + params=converted_params, + files=files, + ) + async def aclose(self) -> None: return None diff --git a/tests/unit/flows/__pycache__/test_scheduler.cpython-312-pytest-8.4.2.pyc b/tests/unit/flows/__pycache__/test_scheduler.cpython-312-pytest-8.4.2.pyc index d386fd940fc62706bdb90391cd9e11d399ddba58..f1851be00c460b1fb312b63ec7fdc38748f56694 100644 GIT binary patch delta 1224 zcma)*TSydP6vyY=%j~Z1ZtCu;xvTDaNvC$ZnNez5re>u^6ctp+>Q1HY?$|eLVG8-s zLj?)((S@FTh+tp3;6oG%y@yZ`6hjZuTTcmwNf34Zvse-ZeK5Z{bI$oL=YPJr)%>x^ z`_kjdlW6oDn>$nPy5W7mvX5ui+haB4yUA`#S1l9lqF<7(SuzZH7zs8cO|W}xiXD=& zmcFbcn&i3GAVroYr4MfZ9r?Ap;!5{>U)5`0bovP4l{>nCnSEIsFQB&}YXigTV8)QS zni=J3E{i>mwXHr1Wf?D|myX>gc@dIb9iyq#SXWZJz;RYDJqu34(q(DUVs*uMfIfJ| z%tMEZv0Cxe849i=k6|4d+w#pD*L9U=!nO4yV_OxS$HcsIoH<0&6=79k%5_Js!0bj5 z%@eFpyvQq+%}r~?!@`L8k!Oz_qkKcz6Rccp>#&&=i|x#)KGdfqw0JY zX5b(SGaSoKMcntP1<;vCfo;Hc5%h;InfV*>y$NUlV!&~N;^j@S9Y8a146p$$zz(1l zXa{x!9Y7nf6WBnYbvj|^x(BNZ*ds~enLot#inso7eGgo{z&^mdSWr5=a_r=NT<<9X}An|WR}Y+5=wrZ8@{N5PVR5$_YOz9MsT-x4HYbHhsA;-#=`eOg*oTr1R- z#pkk;qko9{OGQdG1suj&jesejDH*A_Qnw-c)Ln#S*&x};VAOt&d~ISXSS`0b+V{rR zCY}UmohVVU@lG)jDwT0Gu8F&$2np~!)WBNAe5j1cBEP(Py#qn~UL4wUtY)g2ySVrb jzd2h{T4FS*w(@atqP)&m%r-A~S!R@*flJ}K%6^95+kr-0Xi-QsRSzbNx}U_A}& z1d6~Zf)VAju%SN3H1c-% zLsR4y582j=**Q{itIP5Lyd} None: captured: dict[str, object] = {} - - class DummyFlow: def to_deployment(self, **kwargs: object) -> SimpleNamespace: + nonlocal captured captured |= kwargs return SimpleNamespace(**kwargs) @@ -37,10 +36,9 @@ def test_create_scheduled_deployment_cron(monkeypatch: pytest.MonkeyPatch) -> No def test_create_scheduled_deployment_interval(monkeypatch: pytest.MonkeyPatch) -> None: captured: dict[str, object] = {} - - class DummyFlow: def to_deployment(self, **kwargs: object) -> SimpleNamespace: + nonlocal captured captured |= kwargs return SimpleNamespace(**kwargs) @@ -69,7 +67,7 @@ def test_serve_deployments_invokes_prefect(monkeypatch: pytest.MonkeyPatch) -> N called["deployments"] = deployments called["limit"] = limit - monkeypatch.setattr(scheduler, "serve", fake_serve) + monkeypatch.setattr(scheduler, "prefect_serve", fake_serve) deployment = SimpleNamespace(name="only") scheduler.serve_deployments([deployment]) diff --git a/tests/unit/storage/__pycache__/test_openwebui.cpython-312-pytest-8.4.2.pyc b/tests/unit/storage/__pycache__/test_openwebui.cpython-312-pytest-8.4.2.pyc index 52b6cd563d176d61c7af92e46589aa7b99a8abbc..051c1f3ed8874548c60614c38b79399192c10efe 100644 GIT binary patch delta 1977 zcmah}Z%k8H6o2>i@_^ey8EyHe%)*LVfl4XZT0}~%7H}@B6Bie^T1pWZ2;Ntwm^n85 zU^HaeyjilT$-XWz`U0?aW}Nz=aR$@GNU|;2$BOR5*q5DCIt60bP2TVR&UxqD zd+s^++&=_4_X)|nm6yjP7$f`d^dA2Cv;)*i`mony!eQ!KI9F7k3#g4?q$(#pl8~7RFQ`@!f+zN7NmX#-1`7E;~m~EC-Cs_4e zRy#{;6SOv^m&`%(&%7cTTIOi&g6agn@s^;COcD|-qm#1V1HzQo5G<$D%8q;xZkS21 zlHRZ=$jbr616E9mw7=Mobd|V$t;ITdBB=doJRw8m(ZWDe2W-YSBh(?_-gYCeNA5ys z07&L{$9Ug6vF>nm=txiBVcv*;UJw^;C3-JXD?-ybQP#ZLTN5)n=w@EU0 zWX_#3NLh{r72_D+(veFpllB&R#qS0>RcGjs>D0EJ%^=*|N;87pm%X zU907~STfcuUa?fcvsBk~wvew#XCwXE?*=-x-q7ixQ}v-{5bkXzoz3)Kor2th7^Py4 znaMYr*ZxY5#RfaC82ua4-SN7$SVt%H;=1#&vJXu7ezB>(okT>uK1jll$6(GuP}^l5 z6na-FX%khhF4voAxg_5x`VjmGcz#}lP>N87&?~OG+AH@UJqchpa|29Cy&50lv2a%; z9_{5=gmX5>Sh|M&Xo_+3w-Ml`EGrO*_EJ88+UJ!itEZ2v@uZ^0pbNVByJ)usAgT8a zkA@?v8sqUsKJ--6GsFiYad-N+xYLjuz`7>$d-?`qYIry@IxrOJR=FFOSzc%ELETn_ zBM4mpNjeydM|hJcYuv{sK(QY_^|E%n@vQtmO;+H=!dHxtzvme|G57I_!JLkJLq+5o z31!pkfl8n=>+(X3&eUcjHtb@cy0zU9GSb^D;07T#QNvDAWT!!b=kbi^?R`aaEns9^k{^=5&261UQ-JAPWynJ&Z6xHNlU05<2-9l~At#;QxJLpZv5|zf z(1cw)%Y@`d=ht$ECG-&~Q> z6%t1(M^e~)F0jWsNw%b9)NPXFoYQMQSTjq^31UvkCbDP`bH*N6#vXMwE9{GiV-D8O wf;q)-eLb*@J?iSCD;z8t2bePsz%mY~t6O1Tk_iarOhCZW_K##>JP*$MFB~4i?ybDGYqxe^AX{73jk>XqcBNhE7%LrJ8HrS!&Y*0iD`Uz&xUXP_ z8sab-RK$2=2xt_di7_Uic`?pJO_o8(!iX${%ru%TA^v4z3j1%0@tn8L!JIF-zxzAq zo^#*5zjN<>{{!+HC#D-FlPtg&IzMwb^tI`(NG8;KVp>Nw@_Xiu{G3!m&Z?KC3PQ~M zX8tw)iT)SEHi$PvQ`!k{>DZ`#x=u&-pRGS#KPOkt%9Xce#~gLcQb*Px*rMdm`;uU6 zo2QONr3sS8+oJ2qBq31?ofN|XFs6dWsFhxoc2Y2|%OqMxuNx%fR>Y)^zvs3QQLQli zNVR2oNvrCzoDgA27mFg>VlXRfKxhPTn;HIwc@Vq+nS%Zl8$6!s?@f*j9~vBB%{Wqs z;Nz9nGRclK$ivp890%%xrp&2VtlyB6Yi-IZ*mmbOl{E@^&P4_PJ-#*8MkYwiN52dE zflf6VV*xr9>hOXw-9Tckbh=SO9zaY|$XWgjKu!q)S@;7=(IpV!oRB6ssQzC3p-eVu zm6))XkXHVa{gl)OBdneGINmLdAngE9L;$;;;Z!WjA2@~#ok%rpcJpoCT7J=4Q?zF2 z4z+w!fRG~oWL1#F_?fEeXdmXTNV*|o=^sfR8%hnQdk-a&=@B-zN@PvcjD9(a_22}W zyk&NzPay0?cpe}_dRc-uxQ>z_zvS8{9|vCuni5z4b~WmrSxfd(!Cbd=l(Lo;)$6)_ z_km6`A z)d?BynUx@vBdkXl;4RIu$~{P50dQNH5xR^)0d-To2NUV!Vb;elH8%yd8V+J$6~YS$ z;|MDSDrYaEX=ut<>R-OMMC%$QBDp9lX4G-XB&bbKlP&l^-wpSui8PY}L#0@k|<%0nO+F61Doxm>eq0ZZ0eDxVV zkPNRY!-6whE5j?oOb7;mMSW;!r2p8F)OPkcS7NOvoj=#x@Y!_yvhgWUh>5V?;*s0~Lb1 z^>)ej+pL{w*YP&4ahlflm*guB>Bjl`a@uXbwB0mPphi}lG}%oy$=5#?8)uv3YqopN z2nlf7t@k?l-E+^JIWu!!bHs0cOK$xqN&L0JpcCNv`mWaw##LQSH0X$p*>o?H%!wPi zDnfFZPX9D1oOoVeMo0;J)9_;=vwu#kA9)-meS@48TtXL7gf3ArDdd!@OHz_Xg#$zp zPwBfDn@u42PM1hvl14FPiJr0jBLB{45|3`dq$B|vG_fVG;iEw&!FghbPWLML4&_~-5$`H0AIN|}lY5?}t^fe=E@v8;klMjVYP6{ET zJhnb)EWXkdvSvYpO}>>VRo=$e2BrA2b$TDda=dL2LE6Ytp(ZU|Mz4oHHg) zn)3tt{7=0qQ1+GDdeS2T5q-2kn9*o=fIx36#D~XcQyVkcTgh1oq94A#DV!IFMfQrt z%4}v!0#PJ9!4MdcIV~x6m7;!jPU&-+XVDMv5VWSG#G%xjJeOX-Kw61^N^+BVd)u}C zf);L5dETaS)TZ(=w^QEwK*x@9)@iL4?P^w^R_w5=BIZO(hLieT#}~T8txyN>`PpSz zK1prxMRx%Bo8|(^G!pgH{E{(WZN$l=0N{dm@`|4EUS9FN)mK(emu?7_ZkQ}?el;mr zyg5*`B_Ov3^jRUj=^xu3Yn#@W2KA+(l=LTy&O8*#UKX;+7h2A>jO`s?H)*S#i4zP< z?+PkI;%%K^kwc~(>7k5LkQqxo*BZ(%zEpRy?sD6;%E|nO+i@ye@{FX(O`H`p7UPVd zF&JqU>}HV*HNfC5d=Ch8A&>KS9X50s#`d4q=%it3h;1{aAZ-~|fls6%l|r79hSfuA zkk_!Qsb;2gm~}(qDo_Kg1O_TbxED2$E@C9n7kNwVFPrmLG|0c`EBUzFcThgy?sIv| z?GnX^4#)L)`hAc@;rJsipHp%AoOBcL{aH8vmOG*eTe@8*eALy9qe z?N-937&odfDaMH@Zoma~v3cHbZuijvSaMO;#MO^qr}U0@%{d`cdMLjW` zWLTVdkR5~CaD8KXk^(n?BKhW$9U_LUtxFLhkdii=+1i%45k(cP90$Z`bx>4TH#!8n z+Mlx_;!!zj6nRsVe1Lk6%*6_ily<)ghRf(4m<4&>1~1x)fL^6N09V9$UwN?x$#Nve zH4npwfAzc%{!9c7fQWY`03^3L?X2UB`9iY46p&)75Rk>bA+{?SXCE zgUh!EDmns7b_DWwPMUWGcJB$AI|KU8FXkiGS#GMOKtMKoTX+i5dsj2MGNjL{2>MaOmm!|#Zm`k`1p%&GRFd>X*~ ziMuXmOqy4WZws1h1Nz$k<2V@pct0p&pD>K2e|et8hE*YUBsF>$Dh*t!L*gMdJ7Y03 zxn2tws~erJ@fSr;3N>CHa8j>Jjwb5DLsN8QJI^;bwO{Ns?qG zcDJ1%!=jfAMhf!zdYzV=X(yCn%$xi}Sj6vnplw;@!~2;(T#nt`WMRwpYE`NsRV?Aa z$|F3O6bVcBposqlJIHh6i9cr5oM&(4uMz@{t7H4BLt1E2e6+@8M{S z3XJ}O(KePfUhq=c)w1cz)*$?s+j=WY9j>sLCbQ~K>q6~&Moz2F8qOG| zO-q8NC6`wRO^!hQ<~K}RX4FDn8yOw`V(MTs$16cIRDk)^Sx_{I4`Y)*(JZ3sXus4C zz3Z@~73SnKL2O9!#Udp(0g?^cFbnTtVU9GcTCjr3$Gd-AA~vCNNJT5~{^$9sULdXZ z>G@_BN^2F>kQ#y!I)!NcUNmFjhLZSfDh{bTAQ1DRMPygESw?)Os8(NclsX_42vj0M zCkP%FPDq^s^xA5F(Nc#Ezn41f_N8sDvFduZhOfBvyc}M7d z0R(x8c&}#d_3Wu_yQXS(k7bXOH*zZjdk+Wn*)yWnkaDYKd!WNL)q7~FsgStAaVzfwbzm;q_D2I^YJJ@-trk&o?HolZ00)k^p}yWvr8>pXLxi*0`jR{lHd7&au0;^W-V^MAi=y zE})T5B7B+c$=*Bi_eka-H~^po^B*zQ_i}lj|21xQM8iF@I1BXN!<8f4-Q0WFknm0T zbx`BwA-Ubt@1nIRB1SfcrPCfy{}u(g^jBll-^D0?3>iM{rYCYTs&?T3{yd=A!qJBj z@Jgk4X;1|LECbTJjXjuSTcg$%e4Lo5HGV8xwZ_>>QQJjk<3(-V zY`jX_LT2^x+D2Fh^9XI$p9 zJs#@feLEPu0q69v&x?HIhm00Gllih5`W@u+WxvbbFRmq*SyoA|d=a?U;D0X`n^VH9 zNVElaHEL~1#0FQu2I=g0$qmf~oPLFM*f)@WW#{Z}4cgsb3dxzBw!Y7eHglU8j!cBxQ`$bKi z`U4>Ui%GRVC)e38Ym3N76En4YB=RcTw6fXyAHTK<=71j5-4E*R@ zpoJx@a;ZNBCI7>Yu9_l0nb^E~J1%y9?e1KDu+L!ZN^~7u>^BInBLrDWV{z;KFB{WG z-^A&+0Ky4t2L>T@ye{gip>MO%#ukkldI&vXHyYm}?@YY7u2?dlW9)<`zyN)feYm0h z@Vw3cgsKD_ApmG|19JYKySk2ZV!OiITA*(uPes_p=9L-)Ffz!*zc=4efpISGNM*m@ zl90)bkM{U1F>(aCuw-Dc+U}*Vv!vFoHa-sb-<-W-kzqH|E;ig+UT{CVWUseoYIq)u zu$k73+#i9yu z2sqtHQ*b68gr*{O?+I|giUVl~{3tm{$!u_EueuP(#)wYMI-a+flyacFUv7-ab=}a5JJ>8eNaJuAo574jUjC}~XxTj=2$*HQR?BDTqDx^s()J6Jt^TaXdcx-tYas z@Ap1(`73ncFGzD^4=6i0A09XfMkvLp<>| zO^ujW@e(R0;nlp9eoA=_FQcEbR?e*bK8rk=l|a@cXvt3P4wQ4ysAG|pWJLcVLr&ch zhCyR*h`{ec1~I!B9(mCw2Df@y+`_X0BK3<>$YH%f$+c1OmzjNRyNLMp7B$O@5-7(j z^0MpY#9tC)5%ZEaS<0e6N2G6ZVG1!U^IQ}!1GzgIcjRY`ljr4oIVXA(g>{xNyaJ-= zMEEr?NAXo&$uo`OcCnWW`;w}7hF9N-uB%K>-qXud??$+mjl{uo5M#R}qBlXqYww$r zM&^=qeAIn&GOr9rdBy9sB(EV^??O(dUG9~5rCvE7y_aiOcono16Ud%2y4`quJkJLS3*=cF)E=Hq9b&~85Vn$;Xqx;t87=vnMk_h^i+cW0ljc&JC-DOP(@X{ zdYrgghd`0ylP$Lo=d!tKeGt3LQ zBz$}}(vhK9Qx47nJw<&)&=V9lx@}Zd;q*9N9>M0aJAz_iV{@Yer;x1^r>}{|P&5q3 zNk|zl1`AOxM+{sz@639s!4AR!3adLwa~6IX$s39xnoE zh2R!!jS!m$&!P-Z^v(@>G89wP@d%8(v~1sf&3yMX6VRr0&-H6fhv!|%oO~p|M>y7g zxP3Uc!k=3)XsJB=xZhIcoBWv1xX7nV59su})H~F}x?I05H;`c1y|ybkFfl)nW;`T2 zAnP&ovx8|xBT^>HdW#W7MUN_&cw<0s2&Cjvk?!ROQ~{Hvnp1BO{@1!Su>bo^EKH0DSW3gkeIsP+-2^r>{SdV7WJ3<6K8iy z1F6RS9j#rhJ&C=m2UE)i^8BggovKSkzt}x<|Lm^WJ=sG^GtMsVl#R%kq|88SW*{w# zhL|4RsUA@mM$*-F>B4H+)4ka8&@L7rm(~CPbmtxYtoW0;|%20LP z(Cqr2lz#M?xzOkE`1F}0Y;>&Q^Z1V3%ZqA!tD1(~cxaKQC+pah!&7{-77k5*bcFUW zdX(L*y_}TM?S4xbN-91TGf+QRy3Dt-`m@sNE4j1#pY-RJ`jWFRr<#3}3s1}&N-Z7W zeN}b-`E>zn$$;K(oqs@mIo;yREkPpj}%*7(y_jmqvU zr)g?nQrW1OgS8XG+KFN9#8EM$Dn_5y%?`#)@~I|$Hxk1bGKP~2{K*AxIZws=k}7X9 zD0Y_NQhf3VhhhysdMvaRpD9;W#GuoV*GEBTGdBFUztC!4%$N=}?3H;$FDNFF~Xp>PWT8$i-a;F0JagiWh$L3wk$+D$ac#d3256n z^7LdqxoUCJoEB#1N#IPvzb0F=>(NW(i|i^H-124-t|2pXQqgjjH8nKqy%_vV*F{4gotIn{17xiz@I=%Gyi+Jt_*@m@!%-<1L9; zo)y;EL#|oBk#y2IyoUtx%25wVnd+24rQ0m@{gX%(Q$72qa=g z!Kfy;eM7y?)9SL<+Z&y5iNN(sN(=I}f24#5`h*^g^t>a_6l@p04wH`;OhP9}{;@na0&?M*DPPM?9&$-3!vhNV<7C~dMeH#=QVm{I7~x&ikcrH}Vro1Vr=Le=Rd z^1}WId8UvTU!*dpiM6QD1SW7FfDQE68}RzOL7$ZjUPB?s+8SsDkM}8xN7?FrVE+j~ z3lO#q)Vlx!04FI_ndM=D4}tS%fKvdc0X_ma18^4L9Kgq9>71G3VM-2?9dk~jUb3Pj z6J6@tQnHdm{bYFV{0RY&y9Cfn!7RonfPRk@%(G5=NH=CF9%qGg&2xxvP$ORuu6zg` z>kE`G24|Lv73rbG{z>N!NBn@Z_W?ctI7tpxm>zw2w|DK~^Pv6-g`l#exs^7V$AN|E z_yUQmTp;sJR*j|7TQC z&7Y&fV=%k`aEl?ij}=LNqV#{5zUIenifD0iYvV}kQe{%8_)w4UHQ^M}GP0UmrEg#f-x^x$UA87i_$X)w zvsx?$i$|F}iV`62PYt|EVL}EF%25lHj!5gC5XVw7qt8>fN`Y38i|ee4(1wQHY9=4g zvNlYo@vowzpu!{2Q=W(SE4?bhTL?^tTKfy2t^s^WVLm+xCG;WbvWhMySb%Vg z&p|(aUOXcn>>?@3oGLyhUXjgs(rbCT$9jLDXc2fw-+}Qbq(`9SK z1`xh?8OHBpYq;g~Ae9ObO|Gx67iUtkuy5gpf3fHYnc>PtV#2$u=oER;m5)9o=Un~E zVY!2%CL7)mR5sf%?G9S_Hu~itG{6@j7C0U;99Til0gs7LUb)1V8|MgP=$Xe93xIFG{vv)~lvyQL-$kDgt3a2^2ukcbAq( zlZD*KmXo>@*|$mL)M=Y^rU@fkZK~-^>drWp>`rHrrlmwXV%Ab)JBeG5e^f51v90M; zJ?Ab+f~4fOKe7XS_w2d%+;bo2oO|wmF#7Ya68BGCE{6cmMgPx^_k~|@hsawmG_~i= z8C%vKvu7PK2N4uOF=gbK45gTHW?eBC=UFoDtS9E-w3I2ymc~jsZOwSIzL<~Gwv0bp z7AxblJrl?VV?j!nr=+`codoR1S@2>*`Z--%dli z2+cy@ZuY3pm#9+8&WINZHL*tCQ2;s$7IojGR4bt~Vr(g|Er;5QMQWn=R8!E>Ri#uG zX=xrZMQc7nol&C8iT?hameA6h@Qp|mM(#RjE)$R73Rfg*j6 zgTk4i4zHe}!o4{fR*g}Gv&OK(>3%ImdlSi2cu-9#@EX=qs;0K<;?Z0#qr1~;Jl&tx z(uquZIHj<^wl;dlNY|J!Ms!h4WqMTt17)w6A7v&>d9b}`5Jh8P?Uqmo4PijB>NlZkXDaWuouNpi65-eX`FrC`1iM5fRso))H1 zTe9FDNb~1DYR60zmqfQ7O(ajG`V|%B!Z2%O$~c5V#c<@_+W86`Ej6V1tRo;w>)ET; z=J@?*XHRBwY6_QbIDDUuXItPh&Mj!ym`RstN*kp8?0H+&PMARpG`Qo>q*d+k*4cg7 z)!&;tGQYB*Xp1q}BV!_M#I9WkE$o-J3MclYi(R??RMf#l`;xqFjmMQ-G9K4G@pzG6 z#^V&1oYo@LAv7UG5x51_BgIX@ixh$%{wlJb5q=>)_?|yBA%=b>cD*ZZdsp22uK36e zv6*r)Y@CEY*z`|YO7gux%=ga}_$T*GGf*O@EI>(|vI1q}REc5-&lHtX#Q~*7@hUQu zlHyaGP+Ao~1X%RoGQ|z0eaI6H=-&Ce(W4DE9;IzC0^M0`IO%|+ZIT|AK*Qt1Y)-#F7KYBtPJF7hK@tAgEvJsJkd{>$7% z8rc$Yr3nn|koe^42cb49ay`1Jq?4L%>!rEuoQ7gYMrF8ZtisYtTG|?vsLhMIMI9VSQDc~HXh<>Y7wnAty>5@k zlbM97#^VJmJ8d{D&TKf?4K?W}Qw4WukMk2K9^V9&D*8Gs?hTvkE7q&F&<$t#WT^H= zd3d~T^+b8cWVqEiZ53Va+h)n`Fhb$|&X$ba?Armo(MJ$4(shjydf>&TRV2XvRj#h) z(b)zeh@b290JNC%2^MmO$!@mJ+11P~<9DHUtl&!a85wn<&<~xhGFO*Coku}e>mq4+ zMmnBW7Rv8*Me^M!YRCfxE?5*vv5H$1TF;n7(kA^fMzLZ zG3@41d|P3jILG-7y_Y3Tn=Y(FOn9n#Nd`m_G_MC8v#H1b}YSmpm zNt#Oa?}sF5^S}T_Z-;|Q~+@Qj)@mqCGyc_#RbiX&p5 zhg~i8vGIVFZEA8s+r4}BpSk0*l{F7)jrmF%vsisk$ck(Ozd>}H^v4fx%_M(SmMqzoe$ zf3&$Sm$K5JkKJ7!V*l;0Sfr|>?2Azs?r(QaC(k)Rz)SSe1Sg0PE^%N&mwu2y;#*HeGSPM*@soN2A46pdY>1a7S79; zpwP_&Lo4W?uw3;Yw}tpI&XZ={c5wUV$98Srznuq1-LbcOdsqD6_ALi?8Q!k4)itY# z%=&8np_^wPx<@sxe(}Q9PcK`*h-*UwDTShP-JBR0&`DgEPUh%|6xA&nT!m8fJP!BU zY_N8aRIrb0*E!w*1|Bl%>#RL|)_=<@v~`Y)<-c|{Oxmg^OUov`flI#$x0>@?YI|zP zl{&KTLFr1wjC8}29-H}1JLxe=Z#p`GeoK;p|5hpKv50SZyV~Jp!bG5A!a|U?Nj;U; z2{-8}bxwG!NC#}dPlEc{rw}K|)KOp+g?O_`v9s6gofU9*hP?e{!tc)3n2m>aJA`%* z`*=xYF4gH{!G_KLxmYwW)v4S~ABFxQ@69Du+5;q{cyke-=R6pv=ywo!_G4W3jKI%x z&6&8*UTdh6#-N?r#@=tJ2o>crjSEwmN(@rIPBv{u%p5OEaVykRcsj z1@A@f!#_>UUVXnfLn@m|A?GDC6u*1&Oa-$f!^ECM)=7YZP0qT++17gb238Is@WjV3 z{ipCfgc6uKfIZ;*DBIRnpMMX@-$%fm0c$#iG}nsx>s(q)Pvh&t%!aE&_yyKJjZlw( ztR(zrAH4j)f|IH5LO!!cAdp649&>5DEo@$|q1*9i#G*LSo@a9ud0w4sUY&D9W>R34gh}+|G_K zTMGYLmwToMrSxTyy}e?G)pd%DT1VZNA}bvxlM-a@D{d3#jN;IJ6_?K`mYp-58rog6 z?=Xb&|9uFRg&_kx>)qIjfE4t{{*|$3?rJuk` z(r>WWJ2sU3DUv@0_{fZ4W#XE8Pe)XBJk_vtU!)9rnTda(WO|o zDyl{uqx4@u49w#t?B~xA-beU=J+l@B=iz)5%e#B z(k%lC_-IQ161g(M_Yly{DF$@dKaIYqVAss}h2FN+=sooww2 zc?xQN4S%)Cd}(}bJ;~n)R-9jX{)v}5*i)zX-0+s2OP;TLp_jFset1$2T$k&v%5~#n z$9QBlAkvZ56LJTqJFbg!zjwrr=QbFOi8=CZkd*5T_BGzi94&KUcTpeEnjK#MF)3x; z8zR>0;!^m%!~Q8!!SWlLEboX*Z&k7j8yfOG9&(wGeGf>NO=hGm{(bArm&?h%mD1&k z7NDlS3Bd~Vgw+OoZD2@A z^lRxPPXTmUOP$gN6Pb3%ttf^Oy@uWG4B4_dC6!UBjos;7M(nJ4<1*r4v5l>ey`9}y zC-dt&&nDdL;>L)Co2$2>WPVUfXVmruX&f!Vf#8QsBVF8!6yC;kF`J+#bZbH#>QAO~ z^bM@wr_-EWqkSy+ih9S^2_qcLq&86r0v-GW^&bIlne&8@n?mH@1mTQuU1W1F+0IrzU3s1OuM+=%njR*kY04~^%5Lr9^mMl=LLBE? jrUfXbb`v|1rb8fje(Rr$FWsjPFNyLlJ~08s1O5L1$De4N delta 5938 zcmcgweQ;FO6@T}=-M8PHO?I={&FAI|*pP%IAOVCx5)h#piHTpUxGs4cSlEwv-$qDG zj3^aw=m1`)!W1j`N82=Z90y11bgBl$8K>vmGoSc^9c?nm@#c5MKKT!}W;IujJPIy9|L}93qe_P_- zL{X@S2%@lFP^^0d#Wo?LZ9xe6IL{6|N3>`{TA5+{Iok>Byr@+X{lK1(GP2^RdHYhG z&c*G^SbK>(Qlz-{7$!1;P#M>g4|)nveMUpM;#1sv4511x^?X@7NZ*cVQc*Q$Oby49v2-jFj}1qa zprjepXuMyu<*1<^FrcEvK!9mdH~WcY9?4^;EQ`e}0=sH?tR)X*K?5y>mu666eQC|y zPg4oBOWnX6C)5isl>h{&50KRvib)-tw7c1x)*@2KKC%uh8Yf*rLR}!H<+Gb>?yb>u zI1-QNPLDleTR{A5zwLfsB{YYwXlqtic9*e~iT1kUQq*S$cv85K=RP>Lf9#C8oE8Iv zU3PofL-tk6xek#}R+GS7P~?NwOxeZ4Jq_r?*K9bRjDWpk50DD>Ibbm(j=E~pG(LME zz|kyy@st|HSqzu*nLq5XtgZsd6$`-Mgzxy+{&fZS^C{5)iy_k@puhLvHa^%nv#}eJ z{i)4-pu_BWbM=@^qv=7K)C^szWE6(o1f7P9aUUMcxK4+@-}QklsZ@M(rU_jH?Xt~v z8?&2lhCN*XZ77aU*ILjKQKOkwfsp_j9CzpzX)~&`u$88owG_`Utw6vD(prQdLOsGl z1YCUX3)F!Wf)oDL0KgvMipc7mWioH%V7uLZ*6$p&nm`7Q2*-+j;j9%E!pig7$O5)G zuVR^`8P&moDAjFas0&Jb2l*T{OE}yYkEm)moN?tRb>n{aR$giSd}yxXzQHDZYIf{5 zO__b4+Wi-N6&L&s7XtOuCWFKItt4Bll<%<@+v@Ykbyc?BRp-U!rws^=0A1kxtKiL= z^P5Q}d&^bUhG#)DE2+N0L^PS^(#)7L-epwb#7Bocn_#@L$Fl3)$hnJ|C%>zXn_mlh z#`QUBmaIB;jy`rEzut+_lWsuR2!KPWzfOZKt05dy=IW>}s2W>7OUHGt!MC5bK%;R3 z@BY12P1B5zaOcq^V2q1gNPDrNOuJF(JqWl)oU6Arpr-~cbTP7*0BoeUAjuu89jV(9 zx)AOL&>Wfl?TbQ`2*aYU1}={{YxIq7#ij`J*h;sr;Wo07d@B%R)g@WDlXTC}caa`) z75SGDbT4g12V8|mVQ+3Qm-Y$$1OmDp0=hvD==+3SBxLLnf=12J(?_GxWN%bWw+{?J znEAo|xaP27Z8#E~h-M0_yis-+W&KkhyB=|~ZVJtZy|Yv{hI!}?huhd8PdUV%S3T)k zZbH;-1B2?ejB*-Ad$Zj1_QG1y!IFh5#v&+_5xCpp8P+#ZzP9u^E8mau2M|I4xy$qPJnu)PNUr3bq5rFJt!_e|Hp6DhkTjgEkAn6(L2w^9@;diAmN%1FJxQv=cOxsIP%F2CCqEl;p9hS0eOIEqm!(Q_F z+1V9!q>N3Lls)IS+h)Yu=dlwd?&tg#SCS~+^c+5WqSeB}et#iGN&e)tsKcJ@xW8qN zv9J9(#@MFf>#PBfn=8pt#)t7=n7dS5PU_i-6>e5l;+bQ(r{tR9c}`k5M|~+yv3w5G z&5kX#*so*0d#jbLEcM-F-uCik?Df)uIYanM>Hl#EBc)ZO=|8N6TK<0=$gzsgA>EAz zR5<}oLWkHt13@{CWRlfZR>~mJP_2d&deKVJt|mnW1~d}ZstOuiNyoOnNQ$k4+||o#tLIa|DaFD=@rS z$-thI4D6Ze9@4@w0=2wxj|?@w{^f+qUWQ6v|H@NL@MVH~fdx3$W0K-hY``(|L}GzN zlFtqVtF9G3_p@II+x5byWQ<}?hKf7Yr_x)23>vTH7K#;+Ub#JsBE6c`GYhX|a}!JH z+DtK$Dedr%F_T_?1l?6EkY?9LpZfgs&ifWk`4;gKY2MrtsiYDIz)QW{+=I+^=2}?o z`7x3W2)s_B9YA7fvVnKR;)Zf3(DR^P8u6?@?7z`)d3AIYdu>3-7CL3uwA}&xS%ntu zVtMs7&SN+0wCTD|ce8`_70X_l)#*$02a4+oS14mVmWZV_Io02;nrb=8vAcOz%E z?zk%0jn>NudKG}0*omN*U0hUN$b%OTW=R|X2C!idd#%CU7TO9lJnPobN8!a?2`+hQ zpmT1Lnrq8oEUx5S4C#B&;x!g&s)L#c3b)3a7H-m9&-7Hr{&m$a#?srYqYJRzEkQMC9fh;~A?kb@VGqK7cE_rwu`f+P598gh zp}A%8l3*S`RusA1ZTap}JZ744ATp>%=|L2k5e^|7MtBZ^htC&~Is%}W|XFz=+9Bp%P8aQ{uE6_(zoP(w9=o= zi1l7e=`VnzIsq_ZU3(yQ?QUza{SgEoV2i8b$+z38$+inN=e|exFFj*u-naC;t#ry( z3gnrxCP1Xin$FppIo*8TfPc`6zt2J|Tj@2rtmztU%e2)wfS(weS#r%+){LnI`}dN!BM_I0*oW=4W8L$~Bq7~qc~X>+HhQ}ArAa^Ow#k#l3xPgmv;qH= zi*!2;r}Dd&!N-)1fMCi=kj|I8YfMu`q`TNYq? z@cQMLU*R-EB0_g)rieO}?2D!7I7%^U=lCe@?VKNSzBjK-q?EyUw2giRQVcXI_PQhy zLM{qbe-+?|ou?fag_19X+jyrikYov6GPbS0q6!yu``rjbatC9$7do!0XCJVj=Rh Z5BeSHJZ?C8LlvOIMS@(#Wu$mS{WqjiQ*HnN diff --git a/tests/unit/utils/__pycache__/test_vectorizer.cpython-312-pytest-8.4.2.pyc b/tests/unit/utils/__pycache__/test_vectorizer.cpython-312-pytest-8.4.2.pyc index 1c566abcd49c2213e67357ac4fcd18a7da851fcf..d3d034ca051d67db74670fd2ae0e5188b2ae04d6 100644 GIT binary patch delta 126 zcmcZ-dM=drG%qg~0}#lBpUs%Dk$0*HYY``qyLqEX86)G(%^G6-j6Q)EtOGBw1YY1a q2Qn^l2VQ3Zp{pXdL@+iG%qg~0}x!CcqU`wM&79+{8ha9IXQ{BiTX+Dsk*two0p4}F*07=EGNd# z=;5`){sN2F1#a^l_7}OmuCsv9Rj2@jcNxrsurILy)kB!bBv8rZMjp}4ec}$n07?2U A$N&HU diff --git a/tests/unit/utils/test_vectorizer.py b/tests/unit/utils/test_vectorizer.py index 5247e64..10f4543 100644 --- a/tests/unit/utils/test_vectorizer.py +++ b/tests/unit/utils/test_vectorizer.py @@ -48,7 +48,7 @@ async def test_vectorizer_storage_config_uses_defaults( vector = await vectorizer.vectorize("repo content") assert len(vector) == 1024 - assert httpx_stub.requests[0]["json_body"]["model"] == "ollama/bge-m3" + assert httpx_stub.requests[0]["json_body"]["model"] == "ollama/bge-m3:latest" assert httpx_stub.requests[0]["url"] == "http://llm.lab/v1/embeddings" diff --git a/uv.lock b/uv.lock index 5a3c29a..61adac4 100644 --- a/uv.lock +++ b/uv.lock @@ -236,18 +236,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/25/2f/efa9d26dbb612b774990741fd8f13c7cf4cfd085b870e4a5af5c82eaf5f1/authlib-1.6.3-py2.py3-none-any.whl", hash = "sha256:7ea0f082edd95a03b7b72edac65ec7f8f68d703017d7e37573aee4fc603f2a48", size = 240105, upload-time = "2025-08-26T12:13:23.889Z" }, ] -[[package]] -name = "basedpyright" -version = "1.31.4" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "nodejs-wheel-binaries" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/0b/53/570b03ec0445a9b2cc69788482c1d12902a9b88a9b159e449c4c537c4e3a/basedpyright-1.31.4.tar.gz", hash = "sha256:2450deb16530f7c88c1a7da04530a079f9b0b18ae1c71cb6f812825b3b82d0b1", size = 22494467, upload-time = "2025-09-03T13:05:55.817Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/e5/40/d1047a5addcade9291685d06ef42a63c1347517018bafd82747af9da0294/basedpyright-1.31.4-py3-none-any.whl", hash = "sha256:055e4a38024bd653be12d6216c1cfdbee49a1096d342b4d5f5b4560f7714b6fc", size = 11731440, upload-time = "2025-09-03T13:05:52.308Z" }, -] - [[package]] name = "cachetools" version = "6.2.0" @@ -989,8 +977,8 @@ dependencies = [ [package.dev-dependencies] dev = [ - { name = "basedpyright" }, { name = "mypy" }, + { name = "pylance" }, { name = "pyrefly" }, { name = "pytest" }, { name = "pytest-asyncio" }, @@ -1019,8 +1007,8 @@ requires-dist = [ [package.metadata.requires-dev] dev = [ - { name = "basedpyright", specifier = ">=1.31.4" }, { name = "mypy", specifier = ">=1.7.0" }, + { name = "pylance", specifier = ">=0.36.0" }, { name = "pyrefly", specifier = ">=0.33.0" }, { name = "pytest", specifier = ">=7.4.0" }, { name = "pytest-asyncio", specifier = ">=0.21.0" }, @@ -1432,19 +1420,84 @@ wheels = [ ] [[package]] -name = "nodejs-wheel-binaries" -version = "22.19.0" +name = "numpy" +version = "2.3.3" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/bd/ca/6033f80b7aebc23cb31ed8b09608b6308c5273c3522aedd043e8a0644d83/nodejs_wheel_binaries-22.19.0.tar.gz", hash = "sha256:e69b97ef443d36a72602f7ed356c6a36323873230f894799f4270a853932fdb3", size = 8060, upload-time = "2025-09-12T10:33:46.935Z" } +sdist = { url = "https://files.pythonhosted.org/packages/d0/19/95b3d357407220ed24c139018d2518fab0a61a948e68286a25f1a4d049ff/numpy-2.3.3.tar.gz", hash = "sha256:ddc7c39727ba62b80dfdbedf400d1c10ddfa8eefbd7ec8dcb118be8b56d31029", size = 20576648, upload-time = "2025-09-09T16:54:12.543Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/93/a2/0d055fd1d8c9a7a971c4db10cf42f3bba57c964beb6cf383ca053f2cdd20/nodejs_wheel_binaries-22.19.0-py2.py3-none-macosx_11_0_arm64.whl", hash = "sha256:43eca1526455a1fb4cb777095198f7ebe5111a4444749c87f5c2b84645aaa72a", size = 50902454, upload-time = "2025-09-12T10:33:18.3Z" }, - { url = "https://files.pythonhosted.org/packages/b5/f5/446f7b3c5be1d2f5145ffa3c9aac3496e06cdf0f436adeb21a1f95dd79a7/nodejs_wheel_binaries-22.19.0-py2.py3-none-macosx_11_0_x86_64.whl", hash = "sha256:feb06709e1320790d34babdf71d841ec7f28e4c73217d733e7f5023060a86bfc", size = 51837860, upload-time = "2025-09-12T10:33:21.599Z" }, - { url = "https://files.pythonhosted.org/packages/1e/4e/d0a036f04fd0f5dc3ae505430657044b8d9853c33be6b2d122bb171aaca3/nodejs_wheel_binaries-22.19.0-py2.py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:db9f5777292491430457c99228d3a267decf12a09d31246f0692391e3513285e", size = 57841528, upload-time = "2025-09-12T10:33:25.433Z" }, - { url = "https://files.pythonhosted.org/packages/e2/11/4811d27819f229cc129925c170db20c12d4f01ad366a0066f06d6eb833cf/nodejs_wheel_binaries-22.19.0-py2.py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1392896f1a05a88a8a89b26e182d90fdf3020b4598a047807b91b65731e24c00", size = 58368815, upload-time = "2025-09-12T10:33:29.083Z" }, - { url = "https://files.pythonhosted.org/packages/6e/94/df41416856b980e38a7ff280cfb59f142a77955ccdbec7cc4260d8ab2e78/nodejs_wheel_binaries-22.19.0-py2.py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:9164c876644f949cad665e3ada00f75023e18f381e78a1d7b60ccbbfb4086e73", size = 59690937, upload-time = "2025-09-12T10:33:32.771Z" }, - { url = "https://files.pythonhosted.org/packages/d1/39/8d0d5f84b7616bdc4eca725f5d64a1cfcac3d90cf3f30cae17d12f8e987f/nodejs_wheel_binaries-22.19.0-py2.py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:6b4b75166134010bc9cfebd30dc57047796a27049fef3fc22316216d76bc0af7", size = 60751996, upload-time = "2025-09-12T10:33:36.962Z" }, - { url = "https://files.pythonhosted.org/packages/41/93/2d66b5b60055dd1de6e37e35bef563c15e4cafa5cfe3a6990e0ab358e515/nodejs_wheel_binaries-22.19.0-py2.py3-none-win_amd64.whl", hash = "sha256:3f271f5abfc71b052a6b074225eca8c1223a0f7216863439b86feaca814f6e5a", size = 40026140, upload-time = "2025-09-12T10:33:40.33Z" }, - { url = "https://files.pythonhosted.org/packages/a3/46/c9cf7ff7e3c71f07ca8331c939afd09b6e59fc85a2944ea9411e8b29ce50/nodejs_wheel_binaries-22.19.0-py2.py3-none-win_arm64.whl", hash = "sha256:666a355fe0c9bde44a9221cd543599b029045643c8196b8eedb44f28dc192e06", size = 38804500, upload-time = "2025-09-12T10:33:43.302Z" }, + { url = "https://files.pythonhosted.org/packages/7a/45/e80d203ef6b267aa29b22714fb558930b27960a0c5ce3c19c999232bb3eb/numpy-2.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0ffc4f5caba7dfcbe944ed674b7eef683c7e94874046454bb79ed7ee0236f59d", size = 21259253, upload-time = "2025-09-09T15:56:02.094Z" }, + { url = "https://files.pythonhosted.org/packages/52/18/cf2c648fccf339e59302e00e5f2bc87725a3ce1992f30f3f78c9044d7c43/numpy-2.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e7e946c7170858a0295f79a60214424caac2ffdb0063d4d79cb681f9aa0aa569", size = 14450980, upload-time = "2025-09-09T15:56:05.926Z" }, + { url = "https://files.pythonhosted.org/packages/93/fb/9af1082bec870188c42a1c239839915b74a5099c392389ff04215dcee812/numpy-2.3.3-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:cd4260f64bc794c3390a63bf0728220dd1a68170c169088a1e0dfa2fde1be12f", size = 5379709, upload-time = "2025-09-09T15:56:07.95Z" }, + { url = "https://files.pythonhosted.org/packages/75/0f/bfd7abca52bcbf9a4a65abc83fe18ef01ccdeb37bfb28bbd6ad613447c79/numpy-2.3.3-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:f0ddb4b96a87b6728df9362135e764eac3cfa674499943ebc44ce96c478ab125", size = 6913923, upload-time = "2025-09-09T15:56:09.443Z" }, + { url = "https://files.pythonhosted.org/packages/79/55/d69adad255e87ab7afda1caf93ca997859092afeb697703e2f010f7c2e55/numpy-2.3.3-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:afd07d377f478344ec6ca2b8d4ca08ae8bd44706763d1efb56397de606393f48", size = 14589591, upload-time = "2025-09-09T15:56:11.234Z" }, + { url = "https://files.pythonhosted.org/packages/10/a2/010b0e27ddeacab7839957d7a8f00e91206e0c2c47abbb5f35a2630e5387/numpy-2.3.3-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bc92a5dedcc53857249ca51ef29f5e5f2f8c513e22cfb90faeb20343b8c6f7a6", size = 16938714, upload-time = "2025-09-09T15:56:14.637Z" }, + { url = "https://files.pythonhosted.org/packages/1c/6b/12ce8ede632c7126eb2762b9e15e18e204b81725b81f35176eac14dc5b82/numpy-2.3.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7af05ed4dc19f308e1d9fc759f36f21921eb7bbfc82843eeec6b2a2863a0aefa", size = 16370592, upload-time = "2025-09-09T15:56:17.285Z" }, + { url = "https://files.pythonhosted.org/packages/b4/35/aba8568b2593067bb6a8fe4c52babb23b4c3b9c80e1b49dff03a09925e4a/numpy-2.3.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:433bf137e338677cebdd5beac0199ac84712ad9d630b74eceeb759eaa45ddf30", size = 18884474, upload-time = "2025-09-09T15:56:20.943Z" }, + { url = "https://files.pythonhosted.org/packages/45/fa/7f43ba10c77575e8be7b0138d107e4f44ca4a1ef322cd16980ea3e8b8222/numpy-2.3.3-cp311-cp311-win32.whl", hash = "sha256:eb63d443d7b4ffd1e873f8155260d7f58e7e4b095961b01c91062935c2491e57", size = 6599794, upload-time = "2025-09-09T15:56:23.258Z" }, + { url = "https://files.pythonhosted.org/packages/0a/a2/a4f78cb2241fe5664a22a10332f2be886dcdea8784c9f6a01c272da9b426/numpy-2.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:ec9d249840f6a565f58d8f913bccac2444235025bbb13e9a4681783572ee3caa", size = 13088104, upload-time = "2025-09-09T15:56:25.476Z" }, + { url = "https://files.pythonhosted.org/packages/79/64/e424e975adbd38282ebcd4891661965b78783de893b381cbc4832fb9beb2/numpy-2.3.3-cp311-cp311-win_arm64.whl", hash = "sha256:74c2a948d02f88c11a3c075d9733f1ae67d97c6bdb97f2bb542f980458b257e7", size = 10460772, upload-time = "2025-09-09T15:56:27.679Z" }, + { url = "https://files.pythonhosted.org/packages/51/5d/bb7fc075b762c96329147799e1bcc9176ab07ca6375ea976c475482ad5b3/numpy-2.3.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:cfdd09f9c84a1a934cde1eec2267f0a43a7cd44b2cca4ff95b7c0d14d144b0bf", size = 20957014, upload-time = "2025-09-09T15:56:29.966Z" }, + { url = "https://files.pythonhosted.org/packages/6b/0e/c6211bb92af26517acd52125a237a92afe9c3124c6a68d3b9f81b62a0568/numpy-2.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:cb32e3cf0f762aee47ad1ddc6672988f7f27045b0783c887190545baba73aa25", size = 14185220, upload-time = "2025-09-09T15:56:32.175Z" }, + { url = "https://files.pythonhosted.org/packages/22/f2/07bb754eb2ede9073f4054f7c0286b0d9d2e23982e090a80d478b26d35ca/numpy-2.3.3-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:396b254daeb0a57b1fe0ecb5e3cff6fa79a380fa97c8f7781a6d08cd429418fe", size = 5113918, upload-time = "2025-09-09T15:56:34.175Z" }, + { url = "https://files.pythonhosted.org/packages/81/0a/afa51697e9fb74642f231ea36aca80fa17c8fb89f7a82abd5174023c3960/numpy-2.3.3-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:067e3d7159a5d8f8a0b46ee11148fc35ca9b21f61e3c49fbd0a027450e65a33b", size = 6647922, upload-time = "2025-09-09T15:56:36.149Z" }, + { url = "https://files.pythonhosted.org/packages/5d/f5/122d9cdb3f51c520d150fef6e87df9279e33d19a9611a87c0d2cf78a89f4/numpy-2.3.3-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1c02d0629d25d426585fb2e45a66154081b9fa677bc92a881ff1d216bc9919a8", size = 14281991, upload-time = "2025-09-09T15:56:40.548Z" }, + { url = "https://files.pythonhosted.org/packages/51/64/7de3c91e821a2debf77c92962ea3fe6ac2bc45d0778c1cbe15d4fce2fd94/numpy-2.3.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d9192da52b9745f7f0766531dcfa978b7763916f158bb63bdb8a1eca0068ab20", size = 16641643, upload-time = "2025-09-09T15:56:43.343Z" }, + { url = "https://files.pythonhosted.org/packages/30/e4/961a5fa681502cd0d68907818b69f67542695b74e3ceaa513918103b7e80/numpy-2.3.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:cd7de500a5b66319db419dc3c345244404a164beae0d0937283b907d8152e6ea", size = 16056787, upload-time = "2025-09-09T15:56:46.141Z" }, + { url = "https://files.pythonhosted.org/packages/99/26/92c912b966e47fbbdf2ad556cb17e3a3088e2e1292b9833be1dfa5361a1a/numpy-2.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:93d4962d8f82af58f0b2eb85daaf1b3ca23fe0a85d0be8f1f2b7bb46034e56d7", size = 18579598, upload-time = "2025-09-09T15:56:49.844Z" }, + { url = "https://files.pythonhosted.org/packages/17/b6/fc8f82cb3520768718834f310c37d96380d9dc61bfdaf05fe5c0b7653e01/numpy-2.3.3-cp312-cp312-win32.whl", hash = "sha256:5534ed6b92f9b7dca6c0a19d6df12d41c68b991cef051d108f6dbff3babc4ebf", size = 6320800, upload-time = "2025-09-09T15:56:52.499Z" }, + { url = "https://files.pythonhosted.org/packages/32/ee/de999f2625b80d043d6d2d628c07d0d5555a677a3cf78fdf868d409b8766/numpy-2.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:497d7cad08e7092dba36e3d296fe4c97708c93daf26643a1ae4b03f6294d30eb", size = 12786615, upload-time = "2025-09-09T15:56:54.422Z" }, + { url = "https://files.pythonhosted.org/packages/49/6e/b479032f8a43559c383acb20816644f5f91c88f633d9271ee84f3b3a996c/numpy-2.3.3-cp312-cp312-win_arm64.whl", hash = "sha256:ca0309a18d4dfea6fc6262a66d06c26cfe4640c3926ceec90e57791a82b6eee5", size = 10195936, upload-time = "2025-09-09T15:56:56.541Z" }, + { url = "https://files.pythonhosted.org/packages/7d/b9/984c2b1ee61a8b803bf63582b4ac4242cf76e2dbd663efeafcb620cc0ccb/numpy-2.3.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f5415fb78995644253370985342cd03572ef8620b934da27d77377a2285955bf", size = 20949588, upload-time = "2025-09-09T15:56:59.087Z" }, + { url = "https://files.pythonhosted.org/packages/a6/e4/07970e3bed0b1384d22af1e9912527ecbeb47d3b26e9b6a3bced068b3bea/numpy-2.3.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d00de139a3324e26ed5b95870ce63be7ec7352171bc69a4cf1f157a48e3eb6b7", size = 14177802, upload-time = "2025-09-09T15:57:01.73Z" }, + { url = "https://files.pythonhosted.org/packages/35/c7/477a83887f9de61f1203bad89cf208b7c19cc9fef0cebef65d5a1a0619f2/numpy-2.3.3-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:9dc13c6a5829610cc07422bc74d3ac083bd8323f14e2827d992f9e52e22cd6a6", size = 5106537, upload-time = "2025-09-09T15:57:03.765Z" }, + { url = "https://files.pythonhosted.org/packages/52/47/93b953bd5866a6f6986344d045a207d3f1cfbad99db29f534ea9cee5108c/numpy-2.3.3-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:d79715d95f1894771eb4e60fb23f065663b2298f7d22945d66877aadf33d00c7", size = 6640743, upload-time = "2025-09-09T15:57:07.921Z" }, + { url = "https://files.pythonhosted.org/packages/23/83/377f84aaeb800b64c0ef4de58b08769e782edcefa4fea712910b6f0afd3c/numpy-2.3.3-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:952cfd0748514ea7c3afc729a0fc639e61655ce4c55ab9acfab14bda4f402b4c", size = 14278881, upload-time = "2025-09-09T15:57:11.349Z" }, + { url = "https://files.pythonhosted.org/packages/9a/a5/bf3db6e66c4b160d6ea10b534c381a1955dfab34cb1017ea93aa33c70ed3/numpy-2.3.3-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5b83648633d46f77039c29078751f80da65aa64d5622a3cd62aaef9d835b6c93", size = 16636301, upload-time = "2025-09-09T15:57:14.245Z" }, + { url = "https://files.pythonhosted.org/packages/a2/59/1287924242eb4fa3f9b3a2c30400f2e17eb2707020d1c5e3086fe7330717/numpy-2.3.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:b001bae8cea1c7dfdb2ae2b017ed0a6f2102d7a70059df1e338e307a4c78a8ae", size = 16053645, upload-time = "2025-09-09T15:57:16.534Z" }, + { url = "https://files.pythonhosted.org/packages/e6/93/b3d47ed882027c35e94ac2320c37e452a549f582a5e801f2d34b56973c97/numpy-2.3.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8e9aced64054739037d42fb84c54dd38b81ee238816c948c8f3ed134665dcd86", size = 18578179, upload-time = "2025-09-09T15:57:18.883Z" }, + { url = "https://files.pythonhosted.org/packages/20/d9/487a2bccbf7cc9d4bfc5f0f197761a5ef27ba870f1e3bbb9afc4bbe3fcc2/numpy-2.3.3-cp313-cp313-win32.whl", hash = "sha256:9591e1221db3f37751e6442850429b3aabf7026d3b05542d102944ca7f00c8a8", size = 6312250, upload-time = "2025-09-09T15:57:21.296Z" }, + { url = "https://files.pythonhosted.org/packages/1b/b5/263ebbbbcede85028f30047eab3d58028d7ebe389d6493fc95ae66c636ab/numpy-2.3.3-cp313-cp313-win_amd64.whl", hash = "sha256:f0dadeb302887f07431910f67a14d57209ed91130be0adea2f9793f1a4f817cf", size = 12783269, upload-time = "2025-09-09T15:57:23.034Z" }, + { url = "https://files.pythonhosted.org/packages/fa/75/67b8ca554bbeaaeb3fac2e8bce46967a5a06544c9108ec0cf5cece559b6c/numpy-2.3.3-cp313-cp313-win_arm64.whl", hash = "sha256:3c7cf302ac6e0b76a64c4aecf1a09e51abd9b01fc7feee80f6c43e3ab1b1dbc5", size = 10195314, upload-time = "2025-09-09T15:57:25.045Z" }, + { url = "https://files.pythonhosted.org/packages/11/d0/0d1ddec56b162042ddfafeeb293bac672de9b0cfd688383590090963720a/numpy-2.3.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:eda59e44957d272846bb407aad19f89dc6f58fecf3504bd144f4c5cf81a7eacc", size = 21048025, upload-time = "2025-09-09T15:57:27.257Z" }, + { url = "https://files.pythonhosted.org/packages/36/9e/1996ca6b6d00415b6acbdd3c42f7f03ea256e2c3f158f80bd7436a8a19f3/numpy-2.3.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:823d04112bc85ef5c4fda73ba24e6096c8f869931405a80aa8b0e604510a26bc", size = 14301053, upload-time = "2025-09-09T15:57:30.077Z" }, + { url = "https://files.pythonhosted.org/packages/05/24/43da09aa764c68694b76e84b3d3f0c44cb7c18cdc1ba80e48b0ac1d2cd39/numpy-2.3.3-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:40051003e03db4041aa325da2a0971ba41cf65714e65d296397cc0e32de6018b", size = 5229444, upload-time = "2025-09-09T15:57:32.733Z" }, + { url = "https://files.pythonhosted.org/packages/bc/14/50ffb0f22f7218ef8af28dd089f79f68289a7a05a208db9a2c5dcbe123c1/numpy-2.3.3-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:6ee9086235dd6ab7ae75aba5662f582a81ced49f0f1c6de4260a78d8f2d91a19", size = 6738039, upload-time = "2025-09-09T15:57:34.328Z" }, + { url = "https://files.pythonhosted.org/packages/55/52/af46ac0795e09657d45a7f4db961917314377edecf66db0e39fa7ab5c3d3/numpy-2.3.3-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:94fcaa68757c3e2e668ddadeaa86ab05499a70725811e582b6a9858dd472fb30", size = 14352314, upload-time = "2025-09-09T15:57:36.255Z" }, + { url = "https://files.pythonhosted.org/packages/a7/b1/dc226b4c90eb9f07a3fff95c2f0db3268e2e54e5cce97c4ac91518aee71b/numpy-2.3.3-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:da1a74b90e7483d6ce5244053399a614b1d6b7bc30a60d2f570e5071f8959d3e", size = 16701722, upload-time = "2025-09-09T15:57:38.622Z" }, + { url = "https://files.pythonhosted.org/packages/9d/9d/9d8d358f2eb5eced14dba99f110d83b5cd9a4460895230f3b396ad19a323/numpy-2.3.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:2990adf06d1ecee3b3dcbb4977dfab6e9f09807598d647f04d385d29e7a3c3d3", size = 16132755, upload-time = "2025-09-09T15:57:41.16Z" }, + { url = "https://files.pythonhosted.org/packages/b6/27/b3922660c45513f9377b3fb42240bec63f203c71416093476ec9aa0719dc/numpy-2.3.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ed635ff692483b8e3f0fcaa8e7eb8a75ee71aa6d975388224f70821421800cea", size = 18651560, upload-time = "2025-09-09T15:57:43.459Z" }, + { url = "https://files.pythonhosted.org/packages/5b/8e/3ab61a730bdbbc201bb245a71102aa609f0008b9ed15255500a99cd7f780/numpy-2.3.3-cp313-cp313t-win32.whl", hash = "sha256:a333b4ed33d8dc2b373cc955ca57babc00cd6f9009991d9edc5ddbc1bac36bcd", size = 6442776, upload-time = "2025-09-09T15:57:45.793Z" }, + { url = "https://files.pythonhosted.org/packages/1c/3a/e22b766b11f6030dc2decdeff5c2fb1610768055603f9f3be88b6d192fb2/numpy-2.3.3-cp313-cp313t-win_amd64.whl", hash = "sha256:4384a169c4d8f97195980815d6fcad04933a7e1ab3b530921c3fef7a1c63426d", size = 12927281, upload-time = "2025-09-09T15:57:47.492Z" }, + { url = "https://files.pythonhosted.org/packages/7b/42/c2e2bc48c5e9b2a83423f99733950fbefd86f165b468a3d85d52b30bf782/numpy-2.3.3-cp313-cp313t-win_arm64.whl", hash = "sha256:75370986cc0bc66f4ce5110ad35aae6d182cc4ce6433c40ad151f53690130bf1", size = 10265275, upload-time = "2025-09-09T15:57:49.647Z" }, + { url = "https://files.pythonhosted.org/packages/6b/01/342ad585ad82419b99bcf7cebe99e61da6bedb89e213c5fd71acc467faee/numpy-2.3.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:cd052f1fa6a78dee696b58a914b7229ecfa41f0a6d96dc663c1220a55e137593", size = 20951527, upload-time = "2025-09-09T15:57:52.006Z" }, + { url = "https://files.pythonhosted.org/packages/ef/d8/204e0d73fc1b7a9ee80ab1fe1983dd33a4d64a4e30a05364b0208e9a241a/numpy-2.3.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:414a97499480067d305fcac9716c29cf4d0d76db6ebf0bf3cbce666677f12652", size = 14186159, upload-time = "2025-09-09T15:57:54.407Z" }, + { url = "https://files.pythonhosted.org/packages/22/af/f11c916d08f3a18fb8ba81ab72b5b74a6e42ead4c2846d270eb19845bf74/numpy-2.3.3-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:50a5fe69f135f88a2be9b6ca0481a68a136f6febe1916e4920e12f1a34e708a7", size = 5114624, upload-time = "2025-09-09T15:57:56.5Z" }, + { url = "https://files.pythonhosted.org/packages/fb/11/0ed919c8381ac9d2ffacd63fd1f0c34d27e99cab650f0eb6f110e6ae4858/numpy-2.3.3-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:b912f2ed2b67a129e6a601e9d93d4fa37bef67e54cac442a2f588a54afe5c67a", size = 6642627, upload-time = "2025-09-09T15:57:58.206Z" }, + { url = "https://files.pythonhosted.org/packages/ee/83/deb5f77cb0f7ba6cb52b91ed388b47f8f3c2e9930d4665c600408d9b90b9/numpy-2.3.3-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9e318ee0596d76d4cb3d78535dc005fa60e5ea348cd131a51e99d0bdbe0b54fe", size = 14296926, upload-time = "2025-09-09T15:58:00.035Z" }, + { url = "https://files.pythonhosted.org/packages/77/cc/70e59dcb84f2b005d4f306310ff0a892518cc0c8000a33d0e6faf7ca8d80/numpy-2.3.3-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ce020080e4a52426202bdb6f7691c65bb55e49f261f31a8f506c9f6bc7450421", size = 16638958, upload-time = "2025-09-09T15:58:02.738Z" }, + { url = "https://files.pythonhosted.org/packages/b6/5a/b2ab6c18b4257e099587d5b7f903317bd7115333ad8d4ec4874278eafa61/numpy-2.3.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:e6687dc183aa55dae4a705b35f9c0f8cb178bcaa2f029b241ac5356221d5c021", size = 16071920, upload-time = "2025-09-09T15:58:05.029Z" }, + { url = "https://files.pythonhosted.org/packages/b8/f1/8b3fdc44324a259298520dd82147ff648979bed085feeacc1250ef1656c0/numpy-2.3.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d8f3b1080782469fdc1718c4ed1d22549b5fb12af0d57d35e992158a772a37cf", size = 18577076, upload-time = "2025-09-09T15:58:07.745Z" }, + { url = "https://files.pythonhosted.org/packages/f0/a1/b87a284fb15a42e9274e7fcea0dad259d12ddbf07c1595b26883151ca3b4/numpy-2.3.3-cp314-cp314-win32.whl", hash = "sha256:cb248499b0bc3be66ebd6578b83e5acacf1d6cb2a77f2248ce0e40fbec5a76d0", size = 6366952, upload-time = "2025-09-09T15:58:10.096Z" }, + { url = "https://files.pythonhosted.org/packages/70/5f/1816f4d08f3b8f66576d8433a66f8fa35a5acfb3bbd0bf6c31183b003f3d/numpy-2.3.3-cp314-cp314-win_amd64.whl", hash = "sha256:691808c2b26b0f002a032c73255d0bd89751425f379f7bcd22d140db593a96e8", size = 12919322, upload-time = "2025-09-09T15:58:12.138Z" }, + { url = "https://files.pythonhosted.org/packages/8c/de/072420342e46a8ea41c324a555fa90fcc11637583fb8df722936aed1736d/numpy-2.3.3-cp314-cp314-win_arm64.whl", hash = "sha256:9ad12e976ca7b10f1774b03615a2a4bab8addce37ecc77394d8e986927dc0dfe", size = 10478630, upload-time = "2025-09-09T15:58:14.64Z" }, + { url = "https://files.pythonhosted.org/packages/d5/df/ee2f1c0a9de7347f14da5dd3cd3c3b034d1b8607ccb6883d7dd5c035d631/numpy-2.3.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:9cc48e09feb11e1db00b320e9d30a4151f7369afb96bd0e48d942d09da3a0d00", size = 21047987, upload-time = "2025-09-09T15:58:16.889Z" }, + { url = "https://files.pythonhosted.org/packages/d6/92/9453bdc5a4e9e69cf4358463f25e8260e2ffc126d52e10038b9077815989/numpy-2.3.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:901bf6123879b7f251d3631967fd574690734236075082078e0571977c6a8e6a", size = 14301076, upload-time = "2025-09-09T15:58:20.343Z" }, + { url = "https://files.pythonhosted.org/packages/13/77/1447b9eb500f028bb44253105bd67534af60499588a5149a94f18f2ca917/numpy-2.3.3-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:7f025652034199c301049296b59fa7d52c7e625017cae4c75d8662e377bf487d", size = 5229491, upload-time = "2025-09-09T15:58:22.481Z" }, + { url = "https://files.pythonhosted.org/packages/3d/f9/d72221b6ca205f9736cb4b2ce3b002f6e45cd67cd6a6d1c8af11a2f0b649/numpy-2.3.3-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:533ca5f6d325c80b6007d4d7fb1984c303553534191024ec6a524a4c92a5935a", size = 6737913, upload-time = "2025-09-09T15:58:24.569Z" }, + { url = "https://files.pythonhosted.org/packages/3c/5f/d12834711962ad9c46af72f79bb31e73e416ee49d17f4c797f72c96b6ca5/numpy-2.3.3-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0edd58682a399824633b66885d699d7de982800053acf20be1eaa46d92009c54", size = 14352811, upload-time = "2025-09-09T15:58:26.416Z" }, + { url = "https://files.pythonhosted.org/packages/a1/0d/fdbec6629d97fd1bebed56cd742884e4eead593611bbe1abc3eb40d304b2/numpy-2.3.3-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:367ad5d8fbec5d9296d18478804a530f1191e24ab4d75ab408346ae88045d25e", size = 16702689, upload-time = "2025-09-09T15:58:28.831Z" }, + { url = "https://files.pythonhosted.org/packages/9b/09/0a35196dc5575adde1eb97ddfbc3e1687a814f905377621d18ca9bc2b7dd/numpy-2.3.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:8f6ac61a217437946a1fa48d24c47c91a0c4f725237871117dea264982128097", size = 16133855, upload-time = "2025-09-09T15:58:31.349Z" }, + { url = "https://files.pythonhosted.org/packages/7a/ca/c9de3ea397d576f1b6753eaa906d4cdef1bf97589a6d9825a349b4729cc2/numpy-2.3.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:179a42101b845a816d464b6fe9a845dfaf308fdfc7925387195570789bb2c970", size = 18652520, upload-time = "2025-09-09T15:58:33.762Z" }, + { url = "https://files.pythonhosted.org/packages/fd/c2/e5ed830e08cd0196351db55db82f65bc0ab05da6ef2b72a836dcf1936d2f/numpy-2.3.3-cp314-cp314t-win32.whl", hash = "sha256:1250c5d3d2562ec4174bce2e3a1523041595f9b651065e4a4473f5f48a6bc8a5", size = 6515371, upload-time = "2025-09-09T15:58:36.04Z" }, + { url = "https://files.pythonhosted.org/packages/47/c7/b0f6b5b67f6788a0725f744496badbb604d226bf233ba716683ebb47b570/numpy-2.3.3-cp314-cp314t-win_amd64.whl", hash = "sha256:b37a0b2e5935409daebe82c1e42274d30d9dd355852529eab91dab8dcca7419f", size = 13112576, upload-time = "2025-09-09T15:58:37.927Z" }, + { url = "https://files.pythonhosted.org/packages/06/b9/33bba5ff6fb679aa0b1f8a07e853f002a6b04b9394db3069a1270a7784ca/numpy-2.3.3-cp314-cp314t-win_arm64.whl", hash = "sha256:78c9f6560dc7e6b3990e32df7ea1a50bbd0e2a111e05209963f5ddcab7073b0b", size = 10545953, upload-time = "2025-09-09T15:58:40.576Z" }, + { url = "https://files.pythonhosted.org/packages/b8/f2/7e0a37cfced2644c9563c529f29fa28acbd0960dde32ece683aafa6f4949/numpy-2.3.3-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:1e02c7159791cd481e1e6d5ddd766b62a4d5acf8df4d4d1afe35ee9c5c33a41e", size = 21131019, upload-time = "2025-09-09T15:58:42.838Z" }, + { url = "https://files.pythonhosted.org/packages/1a/7e/3291f505297ed63831135a6cc0f474da0c868a1f31b0dd9a9f03a7a0d2ed/numpy-2.3.3-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:dca2d0fc80b3893ae72197b39f69d55a3cd8b17ea1b50aa4c62de82419936150", size = 14376288, upload-time = "2025-09-09T15:58:45.425Z" }, + { url = "https://files.pythonhosted.org/packages/bf/4b/ae02e985bdeee73d7b5abdefeb98aef1207e96d4c0621ee0cf228ddfac3c/numpy-2.3.3-pp311-pypy311_pp73-macosx_14_0_arm64.whl", hash = "sha256:99683cbe0658f8271b333a1b1b4bb3173750ad59c0c61f5bbdc5b318918fffe3", size = 5305425, upload-time = "2025-09-09T15:58:48.6Z" }, + { url = "https://files.pythonhosted.org/packages/8b/eb/9df215d6d7250db32007941500dc51c48190be25f2401d5b2b564e467247/numpy-2.3.3-pp311-pypy311_pp73-macosx_14_0_x86_64.whl", hash = "sha256:d9d537a39cc9de668e5cd0e25affb17aec17b577c6b3ae8a3d866b479fbe88d0", size = 6819053, upload-time = "2025-09-09T15:58:50.401Z" }, + { url = "https://files.pythonhosted.org/packages/57/62/208293d7d6b2a8998a4a1f23ac758648c3c32182d4ce4346062018362e29/numpy-2.3.3-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8596ba2f8af5f93b01d97563832686d20206d303024777f6dfc2e7c7c3f1850e", size = 14420354, upload-time = "2025-09-09T15:58:52.704Z" }, + { url = "https://files.pythonhosted.org/packages/ed/0c/8e86e0ff7072e14a71b4c6af63175e40d1e7e933ce9b9e9f765a95b4e0c3/numpy-2.3.3-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e1ec5615b05369925bd1125f27df33f3b6c8bc10d788d5999ecd8769a1fa04db", size = 16760413, upload-time = "2025-09-09T15:58:55.027Z" }, + { url = "https://files.pythonhosted.org/packages/af/11/0cc63f9f321ccf63886ac203336777140011fb669e739da36d8db3c53b98/numpy-2.3.3-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:2e267c7da5bf7309670523896df97f93f6e469fb931161f483cd6882b3b1a5dc", size = 12971844, upload-time = "2025-09-09T15:58:57.359Z" }, ] [[package]] @@ -1835,6 +1888,42 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5a/dd/464bd739bacb3b745a1c93bc15f20f0b1e27f0a64ec693367794b398673b/psycopg_binary-3.2.10-cp314-cp314-win_amd64.whl", hash = "sha256:d5c6a66a76022af41970bf19f51bc6bf87bd10165783dd1d40484bfd87d6b382", size = 2973554, upload-time = "2025-09-08T09:12:05.884Z" }, ] +[[package]] +name = "pyarrow" +version = "21.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ef/c2/ea068b8f00905c06329a3dfcd40d0fcc2b7d0f2e355bdb25b65e0a0e4cd4/pyarrow-21.0.0.tar.gz", hash = "sha256:5051f2dccf0e283ff56335760cbc8622cf52264d67e359d5569541ac11b6d5bc", size = 1133487, upload-time = "2025-07-18T00:57:31.761Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/94/dc/80564a3071a57c20b7c32575e4a0120e8a330ef487c319b122942d665960/pyarrow-21.0.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:c077f48aab61738c237802836fc3844f85409a46015635198761b0d6a688f87b", size = 31243234, upload-time = "2025-07-18T00:55:03.812Z" }, + { url = "https://files.pythonhosted.org/packages/ea/cc/3b51cb2db26fe535d14f74cab4c79b191ed9a8cd4cbba45e2379b5ca2746/pyarrow-21.0.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:689f448066781856237eca8d1975b98cace19b8dd2ab6145bf49475478bcaa10", size = 32714370, upload-time = "2025-07-18T00:55:07.495Z" }, + { url = "https://files.pythonhosted.org/packages/24/11/a4431f36d5ad7d83b87146f515c063e4d07ef0b7240876ddb885e6b44f2e/pyarrow-21.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:479ee41399fcddc46159a551705b89c05f11e8b8cb8e968f7fec64f62d91985e", size = 41135424, upload-time = "2025-07-18T00:55:11.461Z" }, + { url = "https://files.pythonhosted.org/packages/74/dc/035d54638fc5d2971cbf1e987ccd45f1091c83bcf747281cf6cc25e72c88/pyarrow-21.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:40ebfcb54a4f11bcde86bc586cbd0272bac0d516cfa539c799c2453768477569", size = 42823810, upload-time = "2025-07-18T00:55:16.301Z" }, + { url = "https://files.pythonhosted.org/packages/2e/3b/89fced102448a9e3e0d4dded1f37fa3ce4700f02cdb8665457fcc8015f5b/pyarrow-21.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8d58d8497814274d3d20214fbb24abcad2f7e351474357d552a8d53bce70c70e", size = 43391538, upload-time = "2025-07-18T00:55:23.82Z" }, + { url = "https://files.pythonhosted.org/packages/fb/bb/ea7f1bd08978d39debd3b23611c293f64a642557e8141c80635d501e6d53/pyarrow-21.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:585e7224f21124dd57836b1530ac8f2df2afc43c861d7bf3d58a4870c42ae36c", size = 45120056, upload-time = "2025-07-18T00:55:28.231Z" }, + { url = "https://files.pythonhosted.org/packages/6e/0b/77ea0600009842b30ceebc3337639a7380cd946061b620ac1a2f3cb541e2/pyarrow-21.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:555ca6935b2cbca2c0e932bedd853e9bc523098c39636de9ad4693b5b1df86d6", size = 26220568, upload-time = "2025-07-18T00:55:32.122Z" }, + { url = "https://files.pythonhosted.org/packages/ca/d4/d4f817b21aacc30195cf6a46ba041dd1be827efa4a623cc8bf39a1c2a0c0/pyarrow-21.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:3a302f0e0963db37e0a24a70c56cf91a4faa0bca51c23812279ca2e23481fccd", size = 31160305, upload-time = "2025-07-18T00:55:35.373Z" }, + { url = "https://files.pythonhosted.org/packages/a2/9c/dcd38ce6e4b4d9a19e1d36914cb8e2b1da4e6003dd075474c4cfcdfe0601/pyarrow-21.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:b6b27cf01e243871390474a211a7922bfbe3bda21e39bc9160daf0da3fe48876", size = 32684264, upload-time = "2025-07-18T00:55:39.303Z" }, + { url = "https://files.pythonhosted.org/packages/4f/74/2a2d9f8d7a59b639523454bec12dba35ae3d0a07d8ab529dc0809f74b23c/pyarrow-21.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:e72a8ec6b868e258a2cd2672d91f2860ad532d590ce94cdf7d5e7ec674ccf03d", size = 41108099, upload-time = "2025-07-18T00:55:42.889Z" }, + { url = "https://files.pythonhosted.org/packages/ad/90/2660332eeb31303c13b653ea566a9918484b6e4d6b9d2d46879a33ab0622/pyarrow-21.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b7ae0bbdc8c6674259b25bef5d2a1d6af5d39d7200c819cf99e07f7dfef1c51e", size = 42829529, upload-time = "2025-07-18T00:55:47.069Z" }, + { url = "https://files.pythonhosted.org/packages/33/27/1a93a25c92717f6aa0fca06eb4700860577d016cd3ae51aad0e0488ac899/pyarrow-21.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:58c30a1729f82d201627c173d91bd431db88ea74dcaa3885855bc6203e433b82", size = 43367883, upload-time = "2025-07-18T00:55:53.069Z" }, + { url = "https://files.pythonhosted.org/packages/05/d9/4d09d919f35d599bc05c6950095e358c3e15148ead26292dfca1fb659b0c/pyarrow-21.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:072116f65604b822a7f22945a7a6e581cfa28e3454fdcc6939d4ff6090126623", size = 45133802, upload-time = "2025-07-18T00:55:57.714Z" }, + { url = "https://files.pythonhosted.org/packages/71/30/f3795b6e192c3ab881325ffe172e526499eb3780e306a15103a2764916a2/pyarrow-21.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:cf56ec8b0a5c8c9d7021d6fd754e688104f9ebebf1bf4449613c9531f5346a18", size = 26203175, upload-time = "2025-07-18T00:56:01.364Z" }, + { url = "https://files.pythonhosted.org/packages/16/ca/c7eaa8e62db8fb37ce942b1ea0c6d7abfe3786ca193957afa25e71b81b66/pyarrow-21.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:e99310a4ebd4479bcd1964dff9e14af33746300cb014aa4a3781738ac63baf4a", size = 31154306, upload-time = "2025-07-18T00:56:04.42Z" }, + { url = "https://files.pythonhosted.org/packages/ce/e8/e87d9e3b2489302b3a1aea709aaca4b781c5252fcb812a17ab6275a9a484/pyarrow-21.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:d2fe8e7f3ce329a71b7ddd7498b3cfac0eeb200c2789bd840234f0dc271a8efe", size = 32680622, upload-time = "2025-07-18T00:56:07.505Z" }, + { url = "https://files.pythonhosted.org/packages/84/52/79095d73a742aa0aba370c7942b1b655f598069489ab387fe47261a849e1/pyarrow-21.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:f522e5709379d72fb3da7785aa489ff0bb87448a9dc5a75f45763a795a089ebd", size = 41104094, upload-time = "2025-07-18T00:56:10.994Z" }, + { url = "https://files.pythonhosted.org/packages/89/4b/7782438b551dbb0468892a276b8c789b8bbdb25ea5c5eb27faadd753e037/pyarrow-21.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:69cbbdf0631396e9925e048cfa5bce4e8c3d3b41562bbd70c685a8eb53a91e61", size = 42825576, upload-time = "2025-07-18T00:56:15.569Z" }, + { url = "https://files.pythonhosted.org/packages/b3/62/0f29de6e0a1e33518dec92c65be0351d32d7ca351e51ec5f4f837a9aab91/pyarrow-21.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:731c7022587006b755d0bdb27626a1a3bb004bb56b11fb30d98b6c1b4718579d", size = 43368342, upload-time = "2025-07-18T00:56:19.531Z" }, + { url = "https://files.pythonhosted.org/packages/90/c7/0fa1f3f29cf75f339768cc698c8ad4ddd2481c1742e9741459911c9ac477/pyarrow-21.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:dc56bc708f2d8ac71bd1dcb927e458c93cec10b98eb4120206a4091db7b67b99", size = 45131218, upload-time = "2025-07-18T00:56:23.347Z" }, + { url = "https://files.pythonhosted.org/packages/01/63/581f2076465e67b23bc5a37d4a2abff8362d389d29d8105832e82c9c811c/pyarrow-21.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:186aa00bca62139f75b7de8420f745f2af12941595bbbfa7ed3870ff63e25636", size = 26087551, upload-time = "2025-07-18T00:56:26.758Z" }, + { url = "https://files.pythonhosted.org/packages/c9/ab/357d0d9648bb8241ee7348e564f2479d206ebe6e1c47ac5027c2e31ecd39/pyarrow-21.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:a7a102574faa3f421141a64c10216e078df467ab9576684d5cd696952546e2da", size = 31290064, upload-time = "2025-07-18T00:56:30.214Z" }, + { url = "https://files.pythonhosted.org/packages/3f/8a/5685d62a990e4cac2043fc76b4661bf38d06efed55cf45a334b455bd2759/pyarrow-21.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:1e005378c4a2c6db3ada3ad4c217b381f6c886f0a80d6a316fe586b90f77efd7", size = 32727837, upload-time = "2025-07-18T00:56:33.935Z" }, + { url = "https://files.pythonhosted.org/packages/fc/de/c0828ee09525c2bafefd3e736a248ebe764d07d0fd762d4f0929dbc516c9/pyarrow-21.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:65f8e85f79031449ec8706b74504a316805217b35b6099155dd7e227eef0d4b6", size = 41014158, upload-time = "2025-07-18T00:56:37.528Z" }, + { url = "https://files.pythonhosted.org/packages/6e/26/a2865c420c50b7a3748320b614f3484bfcde8347b2639b2b903b21ce6a72/pyarrow-21.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:3a81486adc665c7eb1a2bde0224cfca6ceaba344a82a971ef059678417880eb8", size = 42667885, upload-time = "2025-07-18T00:56:41.483Z" }, + { url = "https://files.pythonhosted.org/packages/0a/f9/4ee798dc902533159250fb4321267730bc0a107d8c6889e07c3add4fe3a5/pyarrow-21.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:fc0d2f88b81dcf3ccf9a6ae17f89183762c8a94a5bdcfa09e05cfe413acf0503", size = 43276625, upload-time = "2025-07-18T00:56:48.002Z" }, + { url = "https://files.pythonhosted.org/packages/5a/da/e02544d6997037a4b0d22d8e5f66bc9315c3671371a8b18c79ade1cefe14/pyarrow-21.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6299449adf89df38537837487a4f8d3bd91ec94354fdd2a7d30bc11c48ef6e79", size = 44951890, upload-time = "2025-07-18T00:56:52.568Z" }, + { url = "https://files.pythonhosted.org/packages/e5/4e/519c1bc1876625fe6b71e9a28287c43ec2f20f73c658b9ae1d485c0c206e/pyarrow-21.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:222c39e2c70113543982c6b34f3077962b44fca38c0bd9e68bb6781534425c10", size = 26371006, upload-time = "2025-07-18T00:56:56.379Z" }, +] + [[package]] name = "pycparser" version = "2.23" @@ -1960,6 +2049,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" }, ] +[[package]] +name = "pylance" +version = "0.36.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, + { name = "pyarrow" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/09/13/f7f029d12a3dfdc9f3059d77b3999d40f9cc064ba85fef885a08bf65dcb2/pylance-0.36.0-cp39-abi3-macosx_10_15_x86_64.whl", hash = "sha256:160ed088dc5fb63a71c8c96640d43ea58464f64bca8aa23b0337b1a96fd47b79", size = 43403867, upload-time = "2025-09-12T20:29:25.507Z" }, + { url = "https://files.pythonhosted.org/packages/95/95/defad18786260653b33d5ef8223736c0e481861c8d33311756bd471468ad/pylance-0.36.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:ce43ad002b4e67ffb1a33925d05d472bbde77c57a5e84aca1728faa9ace0c086", size = 39777498, upload-time = "2025-09-12T20:27:02.906Z" }, + { url = "https://files.pythonhosted.org/packages/19/33/7080ed4e45648d8c803a49cd5a206eb95176ef9dc06bff26748ec2109c65/pylance-0.36.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ad7b168b0d4b7864be6040bebaf6d9a3959e76a190ff401a84b165b75eade96", size = 41819489, upload-time = "2025-09-12T20:17:06.37Z" }, + { url = "https://files.pythonhosted.org/packages/29/9a/0c572994d96e03e70481dafb2b062033a9ce24beb5ac6045f00f013ca57c/pylance-0.36.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:353deeb7b19be505db490258b5f2fc897efd4a45255fa0d51455662e01ad59ab", size = 45366480, upload-time = "2025-09-12T20:19:53.924Z" }, + { url = "https://files.pythonhosted.org/packages/fe/82/a74f0436b6a983c2798d1f44699352cd98c42bc335781ece98a878cf63fb/pylance-0.36.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:9cd963fc22257591d1daf281fa2369e05299d78950cb11980aa099d7cbacdf00", size = 41833322, upload-time = "2025-09-12T20:17:40.784Z" }, + { url = "https://files.pythonhosted.org/packages/a8/f2/d28fa3487992c3bd46af6838da13cf9a00be24fcf4cf928f77feec52d8d6/pylance-0.36.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:40117569a87379e08ed12eccac658999158f81df946f2ed02693b77776b57597", size = 45347065, upload-time = "2025-09-12T20:19:26.435Z" }, + { url = "https://files.pythonhosted.org/packages/ff/ab/e7fc302950f1c6815a6e832d052d0860130374bfe4bd482b075299dc8384/pylance-0.36.0-cp39-abi3-win_amd64.whl", hash = "sha256:a2930738192e5075220bc38c8a58ff4e48a71d53b3ca2a577ffce0318609cac0", size = 46348996, upload-time = "2025-09-12T20:36:04.663Z" }, +] + [[package]] name = "pyrefly" version = "0.33.0"