xx
This commit is contained in:
1
.env
1
.env
@@ -25,6 +25,7 @@ FIRECRAWL_ENDPOINT=http://crawl.lab:30002
|
||||
# Model Configuration
|
||||
EMBEDDING_MODEL=ollama/bge-m3:latest
|
||||
EMBEDDING_DIMENSION=1024
|
||||
METADATA_MODEL=fireworks/glm-4p5-air
|
||||
|
||||
# Ingestion Settings
|
||||
BATCH_SIZE=50
|
||||
|
||||
@@ -2,6 +2,8 @@
|
||||
FIRECRAWL_API_KEY=
|
||||
OPENWEBUI_API_KEY=
|
||||
WEAVIATE_API_KEY=
|
||||
LLM_API_KEY=
|
||||
OPENAI_API_KEY=
|
||||
|
||||
# Endpoints
|
||||
LLM_ENDPOINT=http://llm.lab
|
||||
@@ -12,6 +14,7 @@ FIRECRAWL_ENDPOINT=http://crawl.lab:30002
|
||||
# Model Configuration
|
||||
EMBEDDING_MODEL=ollama/bge-m3:latest
|
||||
EMBEDDING_DIMENSION=1024
|
||||
METADATA_MODEL=fireworks/glm-4p5-air
|
||||
|
||||
# Ingestion Settings
|
||||
BATCH_SIZE=50
|
||||
|
||||
41
.vscode/settings.json
vendored
41
.vscode/settings.json
vendored
@@ -7,14 +7,23 @@
|
||||
"python.linting.mypyPath": "./.venv/bin/mypy",
|
||||
"python.linting.pylintEnabled": false,
|
||||
"python.linting.flake8Enabled": false,
|
||||
"python.analysis.typeCheckingMode": "basic",
|
||||
"python.analysis.typeCheckingMode": "strict",
|
||||
"python.analysis.autoImportCompletions": true,
|
||||
"python.analysis.stubPath": "./.venv/lib/python3.12/site-packages",
|
||||
"python.analysis.memory.keepLibraryAst": false,
|
||||
"python.analysis.indexing": true,
|
||||
"python.analysis.packageIndexDepths": [
|
||||
{
|
||||
"name": "",
|
||||
"depth": 2
|
||||
}
|
||||
],
|
||||
"basedpyright.analysis.typeCheckingMode": "standard",
|
||||
"basedpyright.analysis.autoSearchPaths": true,
|
||||
"basedpyright.analysis.autoImportCompletions": true,
|
||||
"basedpyright.analysis.diagnosticMode": "workspace",
|
||||
"basedpyright.analysis.stubPath": "./.venv/lib/python3.12/site-packages",
|
||||
"basedpyright.analysis.useLibraryCodeForTypes": false,
|
||||
"basedpyright.analysis.extraPaths": [
|
||||
"./ingest_pipeline",
|
||||
"./.venv/lib/python3.12/site-packages"
|
||||
@@ -29,9 +38,33 @@
|
||||
"./.venv/lib/python3.12/site-packages"
|
||||
],
|
||||
"files.exclude": {
|
||||
".mypy_cache": true,
|
||||
"**/__pycache__": true,
|
||||
"**/.pytest_cache": true,
|
||||
"**/node_modules": true,
|
||||
".mypy_cache": true
|
||||
}
|
||||
"**/.ruff": true,
|
||||
"**/.uv**": true,
|
||||
"**/.venv": true,
|
||||
"**/node_modules": true
|
||||
},
|
||||
"python.analysis.enableTroubleshootMissingImports": true,
|
||||
"python.analysis.generateWithTypeAnnotation": true,
|
||||
"python.analysis.inlayHints.callArgumentNames": "partial",
|
||||
"python.analysis.languageServerMode": "full",
|
||||
"python.analysis.regenerateStdLibIndices": true,
|
||||
"python.analysis.typeEvaluation.enableExperimentalFeatures": true,
|
||||
"python.analysis.typeEvaluation.strictDictionaryInference": true,
|
||||
"python.analysis.typeEvaluation.strictListInference": true,
|
||||
"python.analysis.typeEvaluation.strictSetInference": true,
|
||||
"python.terminal.activateEnvInCurrentTerminal": true,
|
||||
"python.testing.pytestEnabled": true,
|
||||
"python.useEnvironmentsExtension": true,
|
||||
"editor.formatOnSave": true,
|
||||
"mcp": {},
|
||||
"python.pyrefly.displayTypeErrors": "force-on",
|
||||
"python-envs.defaultEnvManager": "ms-python.python:venv",
|
||||
"python-envs.defaultPackageManager": "charliermarsh.ruff:uv",
|
||||
"python-envs.pythonProjects": [],
|
||||
"python.analysis.fixAll": [],
|
||||
"python.analysis.includeAliasesFromUserFiles": true,
|
||||
"python.analysis.showOnlyDirectDependenciesInAutoImport": true
|
||||
}
|
||||
307
docs/feeds.md
307
docs/feeds.md
@@ -1,106 +1,263 @@
|
||||
# TUI Feeds
|
||||
## Codebase Analysis Report: RAG Manager Ingestion Pipeline
|
||||
|
||||
This guide explains how the terminal dashboard surfaces collection activity and status signals so new backends can plug in without duplicating UI logic.
|
||||
**Status:** Validated against current codebase implementation
|
||||
**Target:** Enhanced implementation guidance for efficient agent execution
|
||||
|
||||
***
|
||||
This analysis has been validated against the actual codebase structure and provides implementation-specific details for executing recommended improvements. The codebase demonstrates solid architecture with clear separation of concerns between ingestion flows, storage adapters, and TUI components.
|
||||
|
||||
## Activity Feed
|
||||
### Architecture Overview
|
||||
- **Storage Backends**: Weaviate, OpenWebUI, R2R with unified `BaseStorage` interface
|
||||
- **TUI Framework**: Textual-based with reactive components and async worker patterns
|
||||
- **Orchestration**: Prefect flows with retry logic and progress callbacks
|
||||
- **Configuration**: Pydantic-based settings with environment variable support
|
||||
|
||||
- **Primary surface:** `#activity_feed` widget inside `DashboardScreen` (`ingest_pipeline/cli/tui/screens/dashboard.py`).
|
||||
- **Data source:** `self.collections`, populated by `refresh_collections()` after gathering payloads from Weaviate and OpenWebUI via `describe_collections()`.
|
||||
- **Selection logic:** `_generate_activity_text()` formats the three most recent `CollectionInfo` entries and appends an aggregate line when additional collections exist.
|
||||
- **Empty state:** Presents the call-to-action _“🚀 No collections found…”_ encouraging the user to launch an ingestion run.
|
||||
- **Icons:** `_get_content_type_icon()` maps collection names containing `web`, `doc`, or `repo` to 🌐/📖/📦 respectively, and falls back to 📄. Update this helper when introducing new naming conventions.
|
||||
### Validated Implementation Analysis
|
||||
|
||||
### When it refreshes
|
||||
### 1. Bug Fixes & Potential Issues
|
||||
|
||||
1. `refresh_collections()` loads data for each connected backend and caches it in `self.collections`.
|
||||
2. `_update_activity_feed()` is triggered from `update_metrics()` immediately after metrics cards recompute.
|
||||
3. The Static widget updates with a newline-delimited summary, keeping the dashboard reactive without rerendering the entire layout.
|
||||
These are areas where the code may not function as intended or could lead to errors.
|
||||
|
||||
To surface a new backend, extend either `list_weaviate_collections()` or `list_openwebui_collections()` with the additional source (or introduce a new list helper) and ensure the resulting dictionaries match the `CollectionInfo` contract.
|
||||
* <details>
|
||||
<summary>
|
||||
<b>HIGH PRIORITY: `R2RStorage.store_batch` inefficient looping (Lines 161-179)</b>
|
||||
</summary>
|
||||
|
||||
***
|
||||
* **File:** `ingest_pipeline/storage/r2r/storage.py:161-179`
|
||||
* **Issue:** CONFIRMED - Method loops through documents calling `_store_single_document` individually
|
||||
* **Impact:** ~5-10x performance degradation for batch operations
|
||||
* **Implementation:** Check R2R v3 API for bulk endpoints; current implementation uses `/v3/documents` per document
|
||||
* **Effort:** Medium (API research + refactor)
|
||||
* **Priority:** High - affects all R2R ingestion workflows
|
||||
</details>
|
||||
|
||||
## Status Ticker
|
||||
* <details>
|
||||
<summary>
|
||||
<b>MEDIUM PRIORITY: Mixed HTTP client usage in `R2RStorage` (Lines 80, 99, 258)</b>
|
||||
</summary>
|
||||
|
||||
- **Widget:** `#status_text` Static component under the metrics card cluster.
|
||||
- **Lifecycle:** `refresh_collections()` pushes human-readable messages as each backend initializes, succeeds, or fails, ending with a ready state.
|
||||
- **Problem reporting:** Failures bubble into rich notifications via `self.notify` and remain visible in the ticker until the next refresh attempt.
|
||||
- **System health badge:** `_update_status_card()` converts backend counts into 🟢/🟡/🔴 badges so operators can judge connectivity at a glance.
|
||||
* **File:** `ingest_pipeline/storage/r2r/storage.py:80,99,258`
|
||||
* **Issue:** VALIDATED - Mixes `R2RAsyncClient` (line 80) with direct `httpx.AsyncClient` (lines 99, 258)
|
||||
* **Specific Methods:** `initialize()`, `_ensure_collection()`, `_attempt_document_creation()`
|
||||
* **Impact:** Inconsistent auth/header handling, connection pooling inefficiency
|
||||
* **Implementation:** Extend `R2RAsyncClient` or create adapter pattern for missing endpoints
|
||||
* **Test Coverage:** Check if affected methods have unit tests before refactoring
|
||||
* **Effort:** Medium (requires SDK analysis)
|
||||
</details>
|
||||
|
||||
When adding a backend integration, hook into the progress text updates inside `refresh_collections()` so the ticker narrates each stage consistently.
|
||||
* <details>
|
||||
<summary>
|
||||
<b>MEDIUM PRIORITY: TUI blocking during storage init (Line 91)</b>
|
||||
</summary>
|
||||
|
||||
***
|
||||
* **File:** `ingest_pipeline/cli/tui/utils/runners.py:91`
|
||||
* **Issue:** CONFIRMED - `await storage_manager.initialize_all_backends()` blocks TUI startup
|
||||
* **Current Implementation:** 30s timeout per backend in `StorageManager.initialize_all_backends()`
|
||||
* **User Impact:** Frozen terminal for up to 90s if all backends timeout
|
||||
* **Solution:** Move to `CollectionOverviewScreen.on_mount()` as `@work` task
|
||||
* **Dependencies:** `dashboard.py:304` already has worker pattern for `refresh_collections`
|
||||
* **Implementation:** Use existing loading indicators and status updates (lines 308-312)
|
||||
* **Effort:** Low (pattern exists, needs relocation)
|
||||
</details>
|
||||
|
||||
## Notifications & Progress
|
||||
* <details>
|
||||
<summary>
|
||||
<b>LOW PRIORITY: Weak URL validation in `IngestionScreen` (Lines 240-260)</b>
|
||||
</summary>
|
||||
|
||||
- **Toast notifications:** All feed-relevant exceptions use `self.notify` with severity hints, keeping the activity feed focused on successful runs.
|
||||
- **Ingestion progress:** `IngestionScreen.perform_ingestion()` (same module) drives the animated progress bar and sends celebratory/failure messages that complement the dashboard feed once control returns to the main screen.
|
||||
* **File:** `ingest_pipeline/cli/tui/screens/ingestion.py:240-260`
|
||||
* **Issue:** CONFIRMED - Method accepts `foo/bar` as valid (line 258)
|
||||
* **Security Risk:** Medium - malicious URLs could be passed to ingestors
|
||||
* **Current Logic:** Basic prefix checks only (http/https/file://)
|
||||
* **Enhancement:** Add `pathlib.Path.exists()` for file:// paths, `.git` directory check for repos
|
||||
* **Dependencies:** Import `pathlib` and add proper regex validation
|
||||
* **Alternative:** Use `validators` library (not currently imported)
|
||||
* **Effort:** Low (validation logic only)
|
||||
</details>
|
||||
|
||||
***
|
||||
### 2. Code Redundancy & Refactoring Opportunities
|
||||
|
||||
## Extending the Feed System
|
||||
These suggestions aim to make the code more concise, maintainable, and reusable (D.R.Y. - Don't Repeat Yourself).
|
||||
|
||||
1. Return a fully populated `CollectionInfo` (name, type, backend label, status, last_updated, size_mb, count).
|
||||
2. Call `update_metrics()` after mutating `self.collections` so both metrics cards and the activity feed stay in sync.
|
||||
3. Adjust `_get_content_type_icon()` or `_format_collection_item()` if the new source warrants distinct labeling.
|
||||
4. Update end-to-end tests or manual runbooks to verify the ticker, notifications, and activity feed stay coherent after integration.
|
||||
* <details>
|
||||
<summary>
|
||||
<b>HIGH IMPACT: Redundant collection logic in dashboard (Lines 356-424)</b>
|
||||
</summary>
|
||||
|
||||
***
|
||||
* **File:** `ingest_pipeline/cli/tui/screens/dashboard.py:356-424`
|
||||
* **Issue:** CONFIRMED - `list_weaviate_collections()` and `list_openwebui_collections()` duplicate `StorageManager.get_all_collections()`
|
||||
* **Code Duplication:** ~70 lines of redundant collection listing logic
|
||||
* **Architecture Violation:** UI layer coupled to specific storage implementations
|
||||
* **Current Usage:** `refresh_collections()` calls `get_all_collections()` (line 327), making methods obsolete
|
||||
* **Action:** DELETE methods `list_weaviate_collections` and `list_openwebui_collections`
|
||||
* **Impact:** Code reduction ~70 lines, improved maintainability
|
||||
* **Risk:** Low - methods appear unused in current flow
|
||||
* **Effort:** Low (deletion only)
|
||||
</details>
|
||||
|
||||
## Implementation Status (September 17, 2025)
|
||||
* <details>
|
||||
<summary>
|
||||
<b>MEDIUM IMPACT: Repetitive backend init pattern (Lines 255-291)</b>
|
||||
</summary>
|
||||
|
||||
| Component | Responsibility | Location |
|
||||
| --- | --- | --- |
|
||||
| Activity feed rendering | `_update_activity_feed`, `_generate_activity_text`, `_format_collection_item` | `ingest_pipeline/cli/tui/screens/dashboard.py`
|
||||
| Backend loaders | `list_weaviate_collections`, `list_openwebui_collections` | `ingest_pipeline/cli/tui/screens/dashboard.py`
|
||||
| Status ticker & health badge | `_update_status_card`, `refresh_collections` progress updates | `ingest_pipeline/cli/tui/screens/dashboard.py`
|
||||
| Ingestion progress hand-off | `perform_ingestion` success/error notifications | `ingest_pipeline/cli/tui/screens/ingestion.py`
|
||||
* **File:** `ingest_pipeline/cli/tui/utils/storage_manager.py:255-291`
|
||||
* **Issue:** CONFIRMED - Pattern repeated 3x for each backend type
|
||||
* **Code Structure:** Check settings → Create config → Add task (12 lines × 3 backends)
|
||||
* **Current Backends:** Weaviate (258-267), OpenWebUI (270-279), R2R (282-291)
|
||||
* **Refactor Pattern:** Create `BackendConfig` dataclass with `(backend_type, endpoint_setting, api_key_setting, storage_class)`
|
||||
* **Implementation:** Loop over config list, reducing ~36 lines to ~15 lines
|
||||
* **Extensibility:** Adding new backend becomes one-line config addition
|
||||
* **Testing:** Ensure `asyncio.gather()` behavior unchanged (line 296)
|
||||
* **Effort:** Medium (requires dataclass design + testing)
|
||||
</details>
|
||||
|
||||
***
|
||||
* <details>
|
||||
<summary>
|
||||
<b>MEDIUM IMPACT: Repeated Prefect block loading pattern (Lines 266-311)</b>
|
||||
</summary>
|
||||
|
||||
## Multi-Storage Ingestion Refactor Plan
|
||||
* **File:** `ingest_pipeline/flows/ingestion.py:266-311`
|
||||
* **Issue:** CONFIRMED - Pattern in `_create_ingestor()` and `_create_storage()` methods
|
||||
* **Duplication:** `Block.aload()` + fallback logic repeated 4x across both methods
|
||||
* **Variable Resolution:** Batch size logic (lines 244-255) also needs abstraction
|
||||
* **Helper Functions Needed:**
|
||||
- `load_block_with_fallback(block_slug: str, default_config: T) -> T`
|
||||
- `resolve_prefect_variable(var_name: str, default: T, type_cast: Type[T]) -> T`
|
||||
* **Impact:** Cleaner flow logic, better error handling, type safety
|
||||
* **Lines Reduced:** ~20 lines of repetitive code
|
||||
* **Effort:** Medium (requires generic typing)
|
||||
</details>
|
||||
|
||||
### 0. Guardrails and Baseline
|
||||
- Activate the virtual environment (`source .venv/bin/activate`) before running any tooling.
|
||||
- Capture current lint, type, and test status (`uv run basedpyright`, `uv run ruff check`, `uv run pytest`) to compare after the refactor.
|
||||
- Record the existing ingestion modal behaviour (screenshots or a short `textual run --dev ingest_pipeline/cli/tui` demo) to verify UX parity later.
|
||||
### 3. User Experience (UX) Enhancements
|
||||
|
||||
### 1. Storage Layer Enhancements
|
||||
- Graduate `MultiStorageAdapter` into `ingest_pipeline/storage/` so it can be reused outside the TUI package.
|
||||
- Extend `BaseStorage` with a descriptive `display_name` property that downstream UIs can show without hard-coding labels.
|
||||
- Harden the adapter: aggregate per-backend failures, short-circuit `close()` safely, and surface a structured result containing `success_ids` and `failed_targets`.
|
||||
- Add `StorageManager.build_multi_adapter(backends: Sequence[StorageBackend])` that returns an initialised adapter (invokes `initialize()` on each child) and memoises singletons for reuse inside the session.
|
||||
These are suggestions to make your TUI more powerful, intuitive, and enjoyable for the user.
|
||||
|
||||
### 2. Application Wiring
|
||||
- Refactor `CollectionManagementApp` to accept a `StorageManager` plus optional cached clients, removing direct constructor parameters for Weaviate/OpenWebUI.
|
||||
- Update all screens (`dashboard.py`, `documents.py`, `search.py`, dialogs) to pull storages through the shared manager instead of owning bespoke references.
|
||||
- Expose a capability flag (e.g., `StorageCapabilities.REPLICATION`) so the dashboard can badge backends that support multi-target ingestion.
|
||||
* <details>
|
||||
<summary>
|
||||
<b>HIGH IMPACT: Document content viewer modal (Add to documents.py)</b>
|
||||
</summary>
|
||||
|
||||
### 3. Ingestion Modal UX
|
||||
- Replace the single-backend select with a checkbox group generated from `StorageManager.get_available_backends()`; preserve keyboard shortcuts (`1`, `2`, `3`, plus `ctrl+shift+<n>` for toggling if feasible).
|
||||
- Default the selection to the collection’s current backend but allow "Select All"/"Clear" convenience buttons.
|
||||
- Persist the latest selection inside a lightweight config file (for example `~/.config/rag-manager/tui.json`) to improve repeated runs.
|
||||
* **Target File:** `ingest_pipeline/cli/tui/screens/documents.py`
|
||||
* **Current State:** READY - `DocumentManagementScreen` has table selection (line 212)
|
||||
* **Implementation:**
|
||||
- Add `Binding("v", "view_document", "View")` to BINDINGS (line 27)
|
||||
- Create `DocumentContentModal(ModalScreen)` with `ScrollableContainer` + `Markdown`
|
||||
- Use existing `get_current_document()` method (line 212)
|
||||
- Fetch full content via `storage.retrieve(document_id)`
|
||||
* **Dependencies:** Import `ModalScreen`, `ScrollableContainer`, `Markdown` from textual
|
||||
* **User Value:** HIGH - essential for content inspection workflow
|
||||
* **Effort:** Low-Medium (~50 lines of modal code)
|
||||
* **Pattern:** Follow existing modal patterns in codebase
|
||||
</details>
|
||||
|
||||
### 4. Flow Integration
|
||||
- Update `IngestionScreen.perform_ingestion()` to build the multi-adapter, pass it to `ingest_documents_task`, and capture per-backend success/failure counts for feed reporting.
|
||||
- Teach `ingest_pipeline/flows/ingestion.py` helpers to recognise the adapter (inspect for `fanout_targets`) and log progress per backend, while keeping Firecrawl→R2R flow single-target until replication lands there.
|
||||
- Ensure partial failures propagate as `IngestionStatus.PARTIAL` with an error message enumerating the failing targets.
|
||||
* <details>
|
||||
<summary>
|
||||
<b>HIGH IMPACT: Analytics tab visualization (Lines 164-189)</b>
|
||||
</summary>
|
||||
|
||||
### 5. Feeds, Ticker, and Notifications
|
||||
- Extend `_generate_activity_text()` to append the backend list (e.g., `→ weaviate + open_webui`) when a multi-target run finishes.
|
||||
- Add per-backend status lines to the progress ticker so operators know which replication stage is executing.
|
||||
- Emit granular toast notifications: success summary plus warning toasts for any backend that failed to store documents.
|
||||
* **Target File:** `ingest_pipeline/cli/tui/screens/dashboard.py:164-189`
|
||||
* **Current State:** PLACEHOLDER - Static widgets with dummy content
|
||||
* **Data Source:** Use existing `self.collections` (line 65) populated by `refresh_collections()`
|
||||
* **Implementation Options:**
|
||||
1. **Simple Text Chart:** ASCII bar chart using existing collections data
|
||||
2. **textual-plotext:** Add dependency + bar chart widget
|
||||
3. **Custom Widget:** Simple bar visualization with Static widgets
|
||||
* **Metrics to Show:**
|
||||
- Documents per collection (data available)
|
||||
- Storage usage per backend (calculated in `_calculate_metrics()`)
|
||||
- Ingestion timeline (requires timestamp tracking)
|
||||
* **Effort:** Low-Medium (depends on visualization complexity)
|
||||
* **Dependencies:** Consider `textual-plotext` or pure ASCII approach
|
||||
</details>
|
||||
|
||||
### 6. Validation
|
||||
- Add unit coverage for `MultiStorageAdapter` (full success, partial failure, close semantics) under `ingest_pipeline/tests/storage/`.
|
||||
- Create a focused TUI smoke test that opens the ingestion modal, toggles multiple checkboxes, and asserts the resulting progress copy.
|
||||
- Re-run `uv run basedpyright`, `uv run ruff check`, and the targeted pytest suite before and after changes; address new diagnostics immediately.
|
||||
- Optionally script a headless `textual run` that simulates ingestion across two mock storages to guard against regressions.
|
||||
* <details>
|
||||
<summary>
|
||||
<b>MEDIUM IMPACT: Global search implementation (Button exists, needs screen)</b>
|
||||
</summary>
|
||||
|
||||
### 7. Documentation and Rollout
|
||||
- Update this document and `README.md` with refreshed screenshots/GIFs demonstrating multi-backend ingestion.
|
||||
- Draft release notes covering required configuration (API keys for every backend) and outline rollback instructions (git tag + revert steps).
|
||||
- Brief support/playbook owners on interpreting the enriched feed/ticker signals so incidents can be triaged quickly.
|
||||
* **Target File:** `ingest_pipeline/cli/tui/screens/dashboard.py`
|
||||
* **Current State:** READY - "Search All" button exists (line 122), handler stubbed
|
||||
* **Backend Support:** `StorageManager.search_across_backends()` method exists (line 413-441)
|
||||
* **Implementation:**
|
||||
- Create `GlobalSearchScreen(ModalScreen)` with search input + results table
|
||||
- Use existing `search_across_backends()` method for data
|
||||
- Add "Backend" column to results table showing data source
|
||||
- Handle async search with loading indicators
|
||||
* **Current Limitation:** Search only works for Weaviate (line 563), need to extend
|
||||
* **Data Flow:** Input → `storage_manager.search_across_backends()` → Results display
|
||||
* **Effort:** Medium (~100 lines for new screen + search logic)
|
||||
</details>
|
||||
|
||||
* <details>
|
||||
<summary>
|
||||
<b>MEDIUM IMPACT: R2R advanced features integration (Widgets ready)</b>
|
||||
</summary>
|
||||
|
||||
* **Target File:** `ingest_pipeline/cli/tui/screens/documents.py`
|
||||
* **Available Widgets:** CONFIRMED - `ChunkViewer`, `EntityGraph`, `CollectionStats`, `DocumentOverview` in `r2r_widgets.py`
|
||||
* **Current Implementation:** Basic document table only, R2R-specific features unused
|
||||
* **Integration Points:**
|
||||
- Add "R2R Details" button when `collection["type"] == "r2r"` (conditional UI)
|
||||
- Create `R2RDocumentDetailsScreen` using existing widgets
|
||||
- Use `StorageManager.get_r2r_storage()` method (exists at line 442)
|
||||
* **R2R Methods Available:**
|
||||
- `get_document_chunks()`, `extract_entities()`, `get_document_overview()`
|
||||
* **User Value:** Medium-High for R2R users, showcases advanced features
|
||||
* **Effort:** Low-Medium (widgets exist, need screen integration)
|
||||
</details>
|
||||
|
||||
* <details>
|
||||
<summary>
|
||||
<b>LOW IMPACT: Create collection dialog (Backend methods exist)</b>
|
||||
</summary>
|
||||
|
||||
* **Target File:** `ingest_pipeline/cli/tui/screens/dashboard.py`
|
||||
* **Backend Support:** CONFIRMED - `create_collection()` method exists for R2R storage (line 690)
|
||||
* **Current State:** No "Create Collection" button in existing UI
|
||||
* **Implementation:**
|
||||
- Add "New Collection" button to dashboard action buttons
|
||||
- Create `CreateCollectionModal` with name input + backend checkboxes
|
||||
- Iterate over `storage_manager.get_available_backends()` for backend selection
|
||||
- Call `storage.create_collection()` on selected backends
|
||||
* **Backend Compatibility:** Check which storage backends support collection creation
|
||||
* **User Value:** Low-Medium (manual workflow, not critical)
|
||||
* **Effort:** Low-Medium (~75 lines for modal + integration)
|
||||
</details>
|
||||
|
||||
## Implementation Priority Matrix
|
||||
|
||||
### Quick Wins (High Impact, Low Effort)
|
||||
1. **Delete redundant collection methods** (dashboard.py:356-424) - 5 min
|
||||
2. **Fix TUI startup blocking** (runners.py:91) - 15 min
|
||||
3. **Document content viewer modal** (documents.py) - 30 min
|
||||
|
||||
### High Impact Fixes (Medium Effort)
|
||||
1. **R2R batch operation optimization** (storage.py:161-179) - Research R2R v3 API + implementation
|
||||
2. **Analytics tab visualization** (dashboard.py:164-189) - Choose visualization approach + implement
|
||||
3. **Backend initialization refactoring** (storage_manager.py:255-291) - Dataclass design + testing
|
||||
|
||||
### Technical Debt (Long-term)
|
||||
1. **R2R client consistency** (storage.py) - SDK analysis + refactoring
|
||||
2. **Prefect block loading helpers** (ingestion.py:266-311) - Generic typing + testing
|
||||
3. **URL validation enhancement** (ingestion.py:240-260) - Security + validation logic
|
||||
|
||||
### Feature Enhancements (User Value)
|
||||
1. **Global search implementation** - Medium effort, requires search backend extension
|
||||
2. **R2R advanced features integration** - Showcase existing widget capabilities
|
||||
3. **Create collection dialog** - Nice-to-have administrative feature
|
||||
|
||||
## Agent Execution Notes
|
||||
|
||||
**Context Efficiency Tips:**
|
||||
- Focus on one priority tier at a time
|
||||
- Read specific file ranges mentioned in line numbers
|
||||
- Use existing patterns (worker decorators, modal screens, async methods)
|
||||
- Test changes incrementally, especially async operations
|
||||
- Verify import dependencies before implementation
|
||||
|
||||
**Architecture Constraints:**
|
||||
- Maintain async/await patterns throughout
|
||||
- Follow Textual reactive widget patterns
|
||||
- Preserve Prefect flow structure for orchestration
|
||||
- Keep storage backend abstraction intact
|
||||
|
||||
The codebase demonstrates excellent architectural foundations - these enhancements build upon existing strengths rather than requiring structural changes.
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -206,7 +206,11 @@ class CollectionOverviewScreen(Screen[None]):
|
||||
"""Calculate basic metrics from collections."""
|
||||
self.total_collections = len(self.collections)
|
||||
self.total_documents = sum(col["count"] for col in self.collections)
|
||||
self.active_backends = sum([bool(self.weaviate), bool(self.openwebui), bool(self.r2r)])
|
||||
# Calculate active backends from storage manager if individual storages are None
|
||||
if self.weaviate is None and self.openwebui is None and self.r2r is None:
|
||||
self.active_backends = len(self.storage_manager.get_available_backends())
|
||||
else:
|
||||
self.active_backends = sum([bool(self.weaviate), bool(self.openwebui), bool(self.r2r)])
|
||||
|
||||
def _update_metrics_cards(self) -> None:
|
||||
"""Update the metrics cards display."""
|
||||
@@ -353,75 +357,6 @@ class CollectionOverviewScreen(Screen[None]):
|
||||
self.is_loading = False
|
||||
loading_indicator.display = False
|
||||
|
||||
async def list_weaviate_collections(self) -> list[CollectionInfo]:
|
||||
"""List Weaviate collections with enhanced metadata."""
|
||||
if not self.weaviate:
|
||||
return []
|
||||
|
||||
try:
|
||||
overview = await self.weaviate.describe_collections()
|
||||
collections: list[CollectionInfo] = []
|
||||
|
||||
for item in overview:
|
||||
count_raw = item.get("count", 0)
|
||||
count_val = int(count_raw) if isinstance(count_raw, (int, str)) else 0
|
||||
size_mb_raw = item.get("size_mb", 0.0)
|
||||
size_mb_val = float(size_mb_raw) if isinstance(size_mb_raw, (int, float, str)) else 0.0
|
||||
collections.append(
|
||||
CollectionInfo(
|
||||
name=str(item.get("name", "Unknown")),
|
||||
type="weaviate",
|
||||
count=count_val,
|
||||
backend="🗄️ Weaviate",
|
||||
status="✓ Active",
|
||||
last_updated=datetime.now().strftime("%Y-%m-%d %H:%M"),
|
||||
size_mb=size_mb_val,
|
||||
)
|
||||
)
|
||||
|
||||
return collections
|
||||
except Exception as e:
|
||||
self.notify(f"Error listing Weaviate collections: {e}", severity="error", markup=False)
|
||||
return []
|
||||
|
||||
async def list_openwebui_collections(self) -> list[CollectionInfo]:
|
||||
"""List OpenWebUI collections with enhanced metadata."""
|
||||
# Try to get OpenWebUI backend from storage manager if direct instance not available
|
||||
openwebui_backend = self.openwebui
|
||||
if not openwebui_backend:
|
||||
backend = self.storage_manager.get_backend(StorageBackend.OPEN_WEBUI)
|
||||
if not isinstance(backend, OpenWebUIStorage):
|
||||
return []
|
||||
openwebui_backend = backend
|
||||
if not openwebui_backend:
|
||||
return []
|
||||
|
||||
try:
|
||||
overview = await openwebui_backend.describe_collections()
|
||||
collections: list[CollectionInfo] = []
|
||||
|
||||
for item in overview:
|
||||
count_raw = item.get("count", 0)
|
||||
count_val = int(count_raw) if isinstance(count_raw, (int, str)) else 0
|
||||
size_mb_raw = item.get("size_mb", 0.0)
|
||||
size_mb_val = float(size_mb_raw) if isinstance(size_mb_raw, (int, float, str)) else 0.0
|
||||
collection_name = str(item.get("name", "Unknown"))
|
||||
collections.append(
|
||||
CollectionInfo(
|
||||
name=collection_name,
|
||||
type="openwebui",
|
||||
count=count_val,
|
||||
backend="🌐 OpenWebUI",
|
||||
status="✓ Active",
|
||||
last_updated=datetime.now().strftime("%Y-%m-%d %H:%M"),
|
||||
size_mb=size_mb_val,
|
||||
)
|
||||
)
|
||||
|
||||
return collections
|
||||
except Exception as e:
|
||||
self.notify(f"Error listing OpenWebUI collections: {e}", severity="error", markup=False)
|
||||
return []
|
||||
|
||||
async def update_collections_table(self) -> None:
|
||||
"""Update the collections table with enhanced formatting."""
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING, ClassVar
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from textual.app import ComposeResult
|
||||
from textual.binding import Binding
|
||||
@@ -15,6 +15,7 @@ from typing_extensions import override
|
||||
from ..models import CollectionInfo
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from ..app import CollectionManagementApp
|
||||
from .dashboard import CollectionOverviewScreen
|
||||
from .documents import DocumentManagementScreen
|
||||
|
||||
@@ -25,7 +26,12 @@ class ConfirmDeleteScreen(Screen[None]):
|
||||
collection: CollectionInfo
|
||||
parent_screen: CollectionOverviewScreen
|
||||
|
||||
BINDINGS: list[Binding] = [
|
||||
@property
|
||||
def app(self) -> CollectionManagementApp: # type: ignore[override]
|
||||
"""Return the typed app instance."""
|
||||
return super().app # type: ignore[return-value]
|
||||
|
||||
BINDINGS = [
|
||||
Binding("escape", "app.pop_screen", "Cancel"),
|
||||
Binding("y", "confirm_delete", "Yes"),
|
||||
Binding("n", "app.pop_screen", "No"),
|
||||
@@ -132,12 +138,16 @@ class ConfirmDeleteScreen(Screen[None]):
|
||||
return
|
||||
|
||||
# Refresh parent screen after a short delay to ensure deletion is processed
|
||||
self.call_later(lambda _: self.parent_screen.refresh_collections(), 0.5) # 500ms delay
|
||||
self.call_later(self._refresh_parent_collections, 0.5) # 500ms delay
|
||||
self.app.pop_screen()
|
||||
|
||||
except Exception as e:
|
||||
self.notify(f"Failed to delete collection: {e}", severity="error", markup=False)
|
||||
|
||||
def _refresh_parent_collections(self) -> None:
|
||||
"""Helper method to refresh parent collections."""
|
||||
self.parent_screen.refresh_collections()
|
||||
|
||||
|
||||
|
||||
class ConfirmDocumentDeleteScreen(Screen[None]):
|
||||
@@ -145,9 +155,14 @@ class ConfirmDocumentDeleteScreen(Screen[None]):
|
||||
|
||||
doc_ids: list[str]
|
||||
collection: CollectionInfo
|
||||
parent_screen: "DocumentManagementScreen"
|
||||
parent_screen: DocumentManagementScreen
|
||||
|
||||
BINDINGS: list[Binding] = [
|
||||
@property
|
||||
def app(self) -> CollectionManagementApp: # type: ignore[override]
|
||||
"""Return the typed app instance."""
|
||||
return super().app # type: ignore[return-value]
|
||||
|
||||
BINDINGS = [
|
||||
Binding("escape", "app.pop_screen", "Cancel"),
|
||||
Binding("y", "confirm_delete", "Yes"),
|
||||
Binding("n", "app.pop_screen", "No"),
|
||||
@@ -158,7 +173,7 @@ class ConfirmDocumentDeleteScreen(Screen[None]):
|
||||
self,
|
||||
doc_ids: list[str],
|
||||
collection: CollectionInfo,
|
||||
parent_screen: "DocumentManagementScreen",
|
||||
parent_screen: DocumentManagementScreen,
|
||||
):
|
||||
super().__init__()
|
||||
self.doc_ids = doc_ids
|
||||
@@ -244,7 +259,12 @@ class LogViewerScreen(ModalScreen[None]):
|
||||
_log_widget: RichLog | None
|
||||
_log_file: Path | None
|
||||
|
||||
BINDINGS: list[Binding] = [
|
||||
@property
|
||||
def app(self) -> CollectionManagementApp: # type: ignore[override]
|
||||
"""Return the typed app instance."""
|
||||
return super().app # type: ignore[return-value]
|
||||
|
||||
BINDINGS = [
|
||||
Binding("escape", "close", "Close"),
|
||||
Binding("ctrl+l", "close", "Close"),
|
||||
Binding("s", "show_path", "Log File"),
|
||||
@@ -272,13 +292,13 @@ class LogViewerScreen(ModalScreen[None]):
|
||||
self._log_widget = self.query_one(RichLog)
|
||||
|
||||
if hasattr(self.app, 'attach_log_viewer'):
|
||||
self.app.attach_log_viewer(self)
|
||||
self.app.attach_log_viewer(self) # type: ignore[arg-type]
|
||||
|
||||
def on_unmount(self) -> None:
|
||||
"""Detach from the parent application when closed."""
|
||||
|
||||
if hasattr(self.app, 'detach_log_viewer'):
|
||||
self.app.detach_log_viewer(self)
|
||||
self.app.detach_log_viewer(self) # type: ignore[arg-type]
|
||||
|
||||
def _get_log_widget(self) -> RichLog:
|
||||
if self._log_widget is None:
|
||||
|
||||
@@ -4,9 +4,9 @@ from datetime import datetime
|
||||
|
||||
from textual.app import ComposeResult
|
||||
from textual.binding import Binding
|
||||
from textual.containers import Container, Horizontal
|
||||
from textual.screen import Screen
|
||||
from textual.widgets import Button, Footer, Header, Label, LoadingIndicator, Static
|
||||
from textual.containers import Container, Horizontal, ScrollableContainer
|
||||
from textual.screen import ModalScreen, Screen
|
||||
from textual.widgets import Button, Footer, Header, Label, LoadingIndicator, Markdown, Static
|
||||
from typing_extensions import override
|
||||
|
||||
from ....storage.base import BaseStorage
|
||||
@@ -27,6 +27,7 @@ class DocumentManagementScreen(Screen[None]):
|
||||
BINDINGS = [
|
||||
Binding("escape", "app.pop_screen", "Back"),
|
||||
Binding("r", "refresh", "Refresh"),
|
||||
Binding("v", "view_document", "View"),
|
||||
Binding("delete", "delete_selected", "Delete Selected"),
|
||||
Binding("a", "select_all", "Select All"),
|
||||
Binding("ctrl+a", "select_all", "Select All"),
|
||||
@@ -324,3 +325,112 @@ class DocumentManagementScreen(Screen[None]):
|
||||
) -> None:
|
||||
"""Handle clear selection from enhanced table."""
|
||||
self.action_select_none()
|
||||
|
||||
def action_view_document(self) -> None:
|
||||
"""View the content of the currently selected document."""
|
||||
if doc := self.get_current_document():
|
||||
if self.storage:
|
||||
self.app.push_screen(DocumentContentModal(doc, self.storage, self.collection["name"]))
|
||||
else:
|
||||
self.notify("No storage backend available", severity="error")
|
||||
else:
|
||||
self.notify("No document selected", severity="warning")
|
||||
|
||||
|
||||
class DocumentContentModal(ModalScreen[None]):
|
||||
"""Modal screen for viewing document content."""
|
||||
|
||||
DEFAULT_CSS = """
|
||||
DocumentContentModal {
|
||||
align: center middle;
|
||||
}
|
||||
|
||||
DocumentContentModal > Container {
|
||||
width: 90%;
|
||||
height: 85%;
|
||||
background: $surface;
|
||||
border: thick $primary;
|
||||
}
|
||||
|
||||
DocumentContentModal .modal-header {
|
||||
background: $primary;
|
||||
color: $text;
|
||||
padding: 1;
|
||||
dock: top;
|
||||
height: 3;
|
||||
}
|
||||
|
||||
DocumentContentModal .modal-content {
|
||||
padding: 1;
|
||||
height: 1fr;
|
||||
}
|
||||
"""
|
||||
|
||||
BINDINGS = [
|
||||
Binding("escape", "app.pop_screen", "Close"),
|
||||
Binding("q", "app.pop_screen", "Close"),
|
||||
]
|
||||
|
||||
def __init__(self, document: DocumentInfo, storage: BaseStorage, collection_name: str):
|
||||
super().__init__()
|
||||
self.document = document
|
||||
self.storage = storage
|
||||
self.collection_name = collection_name
|
||||
|
||||
def compose(self) -> ComposeResult:
|
||||
yield Container(
|
||||
Static(
|
||||
f"📄 Document: {self.document['title'][:60]}{'...' if len(self.document['title']) > 60 else ''}",
|
||||
classes="modal-header"
|
||||
),
|
||||
ScrollableContainer(
|
||||
Markdown("Loading document content...", id="document_content"),
|
||||
LoadingIndicator(id="content_loading"),
|
||||
classes="modal-content"
|
||||
)
|
||||
)
|
||||
|
||||
async def on_mount(self) -> None:
|
||||
"""Load and display the document content."""
|
||||
content_widget = self.query_one("#document_content", Markdown)
|
||||
loading = self.query_one("#content_loading")
|
||||
|
||||
try:
|
||||
# Get full document content
|
||||
doc_content = await self.storage.retrieve(
|
||||
self.document["id"],
|
||||
collection_name=self.collection_name
|
||||
)
|
||||
|
||||
# Format content for display
|
||||
if isinstance(doc_content, str):
|
||||
formatted_content = f"""# {self.document['title']}
|
||||
|
||||
**Source:** {self.document.get('source_url', 'N/A')}
|
||||
**Type:** {self.document.get('content_type', 'text/plain')}
|
||||
**Words:** {self.document.get('word_count', 0):,}
|
||||
**Timestamp:** {self.document.get('timestamp', 'N/A')}
|
||||
|
||||
---
|
||||
|
||||
{doc_content}
|
||||
"""
|
||||
else:
|
||||
formatted_content = f"""# {self.document['title']}
|
||||
|
||||
**Source:** {self.document.get('source_url', 'N/A')}
|
||||
**Type:** {self.document.get('content_type', 'text/plain')}
|
||||
**Words:** {self.document.get('word_count', 0):,}
|
||||
**Timestamp:** {self.document.get('timestamp', 'N/A')}
|
||||
|
||||
---
|
||||
|
||||
*Content format not supported for display*
|
||||
"""
|
||||
|
||||
content_widget.update(formatted_content)
|
||||
|
||||
except Exception as e:
|
||||
content_widget.update(f"# Error Loading Document\n\nFailed to load document content: {e}")
|
||||
finally:
|
||||
loading.display = False
|
||||
|
||||
@@ -2,7 +2,20 @@
|
||||
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from typing import Any
|
||||
from typing import Protocol
|
||||
|
||||
from textual.app import App
|
||||
|
||||
# Type alias for Textual apps with unknown return type
|
||||
TextualApp = App[object]
|
||||
|
||||
|
||||
class AppProtocol(Protocol):
|
||||
"""Protocol for apps that support CSS and refresh."""
|
||||
|
||||
def refresh(self) -> None:
|
||||
"""Refresh the app."""
|
||||
...
|
||||
|
||||
|
||||
class ThemeType(Enum):
|
||||
@@ -181,8 +194,8 @@ class ThemeManager:
|
||||
"""Manages theme selection and CSS generation."""
|
||||
|
||||
def __init__(self, default_theme: ThemeType = ThemeType.DARK):
|
||||
self.current_theme = default_theme
|
||||
self._themes = {
|
||||
self.current_theme: ThemeType = default_theme
|
||||
self._themes: dict[ThemeType, ColorPalette] = {
|
||||
ThemeType.DARK: ThemeRegistry.get_enhanced_dark(),
|
||||
ThemeType.LIGHT: ThemeRegistry.get_light(),
|
||||
ThemeType.HIGH_CONTRAST: ThemeRegistry.get_high_contrast(),
|
||||
@@ -1106,18 +1119,16 @@ def get_css_for_theme(theme_type: ThemeType) -> str:
|
||||
return css
|
||||
|
||||
|
||||
def apply_theme_to_app(app: object, theme_type: ThemeType) -> None:
|
||||
def apply_theme_to_app(app: TextualApp | AppProtocol, theme_type: ThemeType) -> None:
|
||||
"""Apply a theme to a Textual app instance."""
|
||||
try:
|
||||
css = set_theme(theme_type)
|
||||
if hasattr(app, "stylesheet"):
|
||||
app.stylesheet.clear()
|
||||
app.stylesheet.parse(css)
|
||||
elif hasattr(app, "CSS"):
|
||||
# Set CSS using the standard Textual approach
|
||||
if hasattr(app, "CSS") or isinstance(app, App):
|
||||
setattr(app, "CSS", css)
|
||||
elif hasattr(app, "refresh"):
|
||||
# Fallback: try to refresh the app with new CSS
|
||||
app.refresh()
|
||||
# Refresh the app to apply new CSS
|
||||
if hasattr(app, "refresh"):
|
||||
app.refresh()
|
||||
except Exception as e:
|
||||
# Graceful fallback - log but don't crash the UI
|
||||
import logging
|
||||
@@ -1127,9 +1138,9 @@ def apply_theme_to_app(app: object, theme_type: ThemeType) -> None:
|
||||
class ThemeSwitcher:
|
||||
"""Helper class for managing theme switching in TUI applications."""
|
||||
|
||||
def __init__(self, app: object | None = None) -> None:
|
||||
self.app = app
|
||||
self.theme_history = [ThemeType.DARK]
|
||||
def __init__(self, app: TextualApp | AppProtocol | None = None) -> None:
|
||||
self.app: TextualApp | AppProtocol | None = app
|
||||
self.theme_history: list[ThemeType] = [ThemeType.DARK]
|
||||
|
||||
def switch_theme(self, theme_type: ThemeType) -> str:
|
||||
"""Switch to a new theme and apply it to the app if available."""
|
||||
@@ -1157,7 +1168,7 @@ class ThemeSwitcher:
|
||||
next_theme = themes[(current_index + 1) % len(themes)]
|
||||
return self.switch_theme(next_theme)
|
||||
|
||||
def get_theme_info(self) -> dict[str, Any]:
|
||||
def get_theme_info(self) -> dict[str, str | list[str] | dict[str, str]]:
|
||||
"""Get information about the current theme."""
|
||||
palette = get_theme_palette()
|
||||
return {
|
||||
|
||||
Binary file not shown.
Binary file not shown.
@@ -86,49 +86,18 @@ async def run_textual_tui() -> None:
|
||||
LOGGER.info("Initializing collection management TUI")
|
||||
LOGGER.info("Scanning available storage backends")
|
||||
|
||||
# Initialize storage manager
|
||||
# Create storage manager without initialization - let TUI handle it asynchronously
|
||||
storage_manager = StorageManager(settings)
|
||||
backend_status = await storage_manager.initialize_all_backends()
|
||||
|
||||
# Report initialization results
|
||||
for backend, success in backend_status.items():
|
||||
if success:
|
||||
LOGGER.info("%s connected successfully", backend.value)
|
||||
else:
|
||||
LOGGER.warning("%s connection failed", backend.value)
|
||||
|
||||
available_backends = storage_manager.get_available_backends()
|
||||
if not available_backends:
|
||||
LOGGER.error("Could not connect to any storage backend")
|
||||
LOGGER.info("Please check your configuration and try again")
|
||||
LOGGER.info("Supported backends: Weaviate, OpenWebUI, R2R")
|
||||
return
|
||||
|
||||
LOGGER.info(
|
||||
"Launching TUI with %d backend(s): %s",
|
||||
len(available_backends),
|
||||
", ".join(backend.value for backend in available_backends),
|
||||
)
|
||||
|
||||
# Get individual storage instances for backward compatibility
|
||||
from ....storage.openwebui import OpenWebUIStorage
|
||||
from ....storage.weaviate import WeaviateStorage
|
||||
|
||||
weaviate_backend = storage_manager.get_backend(StorageBackend.WEAVIATE)
|
||||
openwebui_backend = storage_manager.get_backend(StorageBackend.OPEN_WEBUI)
|
||||
r2r_backend = storage_manager.get_backend(StorageBackend.R2R)
|
||||
|
||||
# Type-safe casting to specific storage types
|
||||
weaviate = weaviate_backend if isinstance(weaviate_backend, WeaviateStorage) else None
|
||||
openwebui = openwebui_backend if isinstance(openwebui_backend, OpenWebUIStorage) else None
|
||||
LOGGER.info("Launching TUI - storage backends will initialize in background")
|
||||
|
||||
# Import here to avoid circular import
|
||||
from ..app import CollectionManagementApp
|
||||
app = CollectionManagementApp(
|
||||
storage_manager,
|
||||
weaviate,
|
||||
openwebui,
|
||||
r2r_backend,
|
||||
None, # weaviate - will be available after initialization
|
||||
None, # openwebui - will be available after initialization
|
||||
None, # r2r_backend - will be available after initialization
|
||||
log_queue=logging_context.queue,
|
||||
log_formatter=logging_context.formatter,
|
||||
log_file=logging_context.log_file,
|
||||
|
||||
@@ -4,9 +4,11 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from collections.abc import AsyncGenerator, Sequence
|
||||
from collections.abc import AsyncGenerator, Coroutine, Sequence
|
||||
from typing import TYPE_CHECKING, Protocol
|
||||
|
||||
from pydantic import SecretStr
|
||||
|
||||
from ....core.exceptions import StorageError
|
||||
from ....core.models import Document, StorageBackend, StorageConfig
|
||||
from ..models import CollectionInfo, StorageCapabilities
|
||||
@@ -54,8 +56,8 @@ class MultiStorageAdapter(BaseStorage):
|
||||
seen_ids.add(storage_id)
|
||||
unique.append(storage)
|
||||
|
||||
self._storages = unique
|
||||
self._primary = unique[0]
|
||||
self._storages: list[BaseStorage] = unique
|
||||
self._primary: BaseStorage = unique[0]
|
||||
super().__init__(self._primary.config)
|
||||
|
||||
async def initialize(self) -> None:
|
||||
@@ -226,10 +228,10 @@ class StorageManager:
|
||||
|
||||
def __init__(self, settings: Settings) -> None:
|
||||
"""Initialize storage manager with application settings."""
|
||||
self.settings = settings
|
||||
self.settings: Settings = settings
|
||||
self.backends: dict[StorageBackend, BaseStorage] = {}
|
||||
self.capabilities: dict[StorageBackend, StorageCapabilities] = {}
|
||||
self._initialized = False
|
||||
self._initialized: bool = False
|
||||
|
||||
async def initialize_all_backends(self) -> dict[StorageBackend, bool]:
|
||||
"""Initialize all available storage backends with timeout protection."""
|
||||
@@ -252,14 +254,14 @@ class StorageManager:
|
||||
return False
|
||||
|
||||
# Initialize backends concurrently with timeout protection
|
||||
tasks = []
|
||||
tasks: list[tuple[StorageBackend, Coroutine[None, None, bool]]] = []
|
||||
|
||||
# Try Weaviate
|
||||
if self.settings.weaviate_endpoint:
|
||||
config = StorageConfig(
|
||||
backend=StorageBackend.WEAVIATE,
|
||||
endpoint=self.settings.weaviate_endpoint,
|
||||
api_key=self.settings.weaviate_api_key,
|
||||
api_key=SecretStr(self.settings.weaviate_api_key) if self.settings.weaviate_api_key else None,
|
||||
collection_name="default",
|
||||
)
|
||||
tasks.append((StorageBackend.WEAVIATE, init_backend(StorageBackend.WEAVIATE, config, WeaviateStorage)))
|
||||
@@ -271,7 +273,7 @@ class StorageManager:
|
||||
config = StorageConfig(
|
||||
backend=StorageBackend.OPEN_WEBUI,
|
||||
endpoint=self.settings.openwebui_endpoint,
|
||||
api_key=self.settings.openwebui_api_key,
|
||||
api_key=SecretStr(self.settings.openwebui_api_key) if self.settings.openwebui_api_key else None,
|
||||
collection_name="default",
|
||||
)
|
||||
tasks.append((StorageBackend.OPEN_WEBUI, init_backend(StorageBackend.OPEN_WEBUI, config, OpenWebUIStorage)))
|
||||
@@ -283,7 +285,7 @@ class StorageManager:
|
||||
config = StorageConfig(
|
||||
backend=StorageBackend.R2R,
|
||||
endpoint=self.settings.r2r_endpoint,
|
||||
api_key=self.settings.r2r_api_key,
|
||||
api_key=SecretStr(self.settings.r2r_api_key) if self.settings.r2r_api_key else None,
|
||||
collection_name="default",
|
||||
)
|
||||
tasks.append((StorageBackend.R2R, init_backend(StorageBackend.R2R, config, R2RStorage)))
|
||||
@@ -293,7 +295,7 @@ class StorageManager:
|
||||
# Execute initialization tasks concurrently
|
||||
if tasks:
|
||||
backend_types, task_coroutines = zip(*tasks, strict=False)
|
||||
task_results = await asyncio.gather(*task_coroutines, return_exceptions=True)
|
||||
task_results: Sequence[bool | BaseException] = await asyncio.gather(*task_coroutines, return_exceptions=True)
|
||||
|
||||
for backend_type, task_result in zip(backend_types, task_results, strict=False):
|
||||
results[backend_type] = task_result if isinstance(task_result, bool) else False
|
||||
@@ -426,7 +428,7 @@ class StorageManager:
|
||||
storage = self.backends.get(backend_type)
|
||||
if storage:
|
||||
try:
|
||||
documents = []
|
||||
documents: list[Document] = []
|
||||
async for doc in storage.search(query, limit=limit):
|
||||
documents.append(doc)
|
||||
results[backend_type] = documents
|
||||
@@ -455,7 +457,7 @@ class StorageManager:
|
||||
for collection in collections:
|
||||
total_docs += await storage.count(collection_name=collection)
|
||||
|
||||
backend_status = {
|
||||
backend_status: dict[str, str | int | bool | StorageCapabilities] = {
|
||||
"available": True,
|
||||
"collections": len(collections),
|
||||
"total_documents": total_docs,
|
||||
|
||||
Binary file not shown.
@@ -1,7 +1,7 @@
|
||||
"""Application settings and configuration."""
|
||||
|
||||
from functools import lru_cache
|
||||
from typing import Annotated, ClassVar, Literal
|
||||
from typing import Annotated, ClassVar, Final, Literal
|
||||
|
||||
from prefect.variables import Variable
|
||||
from pydantic import Field, HttpUrl, model_validator
|
||||
@@ -20,6 +20,8 @@ class Settings(BaseSettings):
|
||||
|
||||
# API Keys
|
||||
firecrawl_api_key: str | None = None
|
||||
llm_api_key: str | None = None
|
||||
openai_api_key: str | None = None
|
||||
openwebui_api_key: str | None = None
|
||||
weaviate_api_key: str | None = None
|
||||
r2r_api_key: str | None = None
|
||||
@@ -33,6 +35,7 @@ class Settings(BaseSettings):
|
||||
|
||||
# Model Configuration
|
||||
embedding_model: str = "ollama/bge-m3:latest"
|
||||
metadata_model: str = "fireworks/glm-4p5-air"
|
||||
embedding_dimension: int = 1024
|
||||
|
||||
# Ingestion Settings
|
||||
@@ -100,14 +103,20 @@ class Settings(BaseSettings):
|
||||
Returns:
|
||||
API key or None
|
||||
"""
|
||||
service_map = {
|
||||
service_map: Final[dict[str, str | None]] = {
|
||||
"firecrawl": self.firecrawl_api_key,
|
||||
"openwebui": self.openwebui_api_key,
|
||||
"weaviate": self.weaviate_api_key,
|
||||
"r2r": self.r2r_api_key,
|
||||
"llm": self.get_llm_api_key(),
|
||||
"openai": self.openai_api_key,
|
||||
}
|
||||
return service_map.get(service)
|
||||
|
||||
def get_llm_api_key(self) -> str | None:
|
||||
"""Get API key for LLM services with OpenAI fallback."""
|
||||
return self.llm_api_key or (self.openai_api_key or None)
|
||||
|
||||
@model_validator(mode="after")
|
||||
def validate_backend_configuration(self) -> "Settings":
|
||||
"""Validate that required configuration is present for the default backend."""
|
||||
|
||||
Binary file not shown.
@@ -8,6 +8,40 @@ from uuid import UUID, uuid4
|
||||
from prefect.blocks.core import Block
|
||||
from pydantic import BaseModel, Field, HttpUrl, SecretStr
|
||||
|
||||
from ..config import get_settings
|
||||
|
||||
|
||||
def _default_embedding_model() -> str:
|
||||
return get_settings().embedding_model
|
||||
|
||||
|
||||
def _default_embedding_endpoint() -> HttpUrl:
|
||||
return get_settings().llm_endpoint
|
||||
|
||||
|
||||
def _default_embedding_dimension() -> int:
|
||||
return get_settings().embedding_dimension
|
||||
|
||||
|
||||
def _default_batch_size() -> int:
|
||||
return get_settings().default_batch_size
|
||||
|
||||
|
||||
def _default_collection_name() -> str:
|
||||
return get_settings().default_collection_prefix
|
||||
|
||||
|
||||
def _default_max_crawl_depth() -> int:
|
||||
return get_settings().max_crawl_depth
|
||||
|
||||
|
||||
def _default_max_crawl_pages() -> int:
|
||||
return get_settings().max_crawl_pages
|
||||
|
||||
|
||||
def _default_max_file_size() -> int:
|
||||
return get_settings().max_file_size
|
||||
|
||||
|
||||
class IngestionStatus(str, Enum):
|
||||
"""Status of an ingestion job."""
|
||||
@@ -39,36 +73,36 @@ class IngestionSource(str, Enum):
|
||||
class VectorConfig(BaseModel):
|
||||
"""Configuration for vectorization."""
|
||||
|
||||
model: str = Field(default="ollama/bge-m3:latest")
|
||||
embedding_endpoint: HttpUrl = Field(default=HttpUrl("http://llm.lab"))
|
||||
dimension: int = Field(default=1024)
|
||||
batch_size: Annotated[int, Field(gt=0, le=1000)] = 100
|
||||
model: str = Field(default_factory=_default_embedding_model)
|
||||
embedding_endpoint: HttpUrl = Field(default_factory=_default_embedding_endpoint)
|
||||
dimension: int = Field(default_factory=_default_embedding_dimension)
|
||||
batch_size: Annotated[int, Field(gt=0, le=1000)] = Field(default_factory=_default_batch_size)
|
||||
|
||||
|
||||
class StorageConfig(Block):
|
||||
"""Configuration for storage backend."""
|
||||
|
||||
_block_type_name: ClassVar[str] = "Storage Configuration"
|
||||
_block_type_slug: ClassVar[str] = "storage-config"
|
||||
_description: ClassVar[str] = "Configures storage backend connections and settings for document ingestion"
|
||||
_block_type_name: ClassVar[str | None] = "Storage Configuration"
|
||||
_block_type_slug: ClassVar[str | None] = "storage-config"
|
||||
_description: ClassVar[str | None] = "Configures storage backend connections and settings for document ingestion"
|
||||
|
||||
backend: StorageBackend
|
||||
endpoint: HttpUrl
|
||||
api_key: SecretStr | None = Field(default=None)
|
||||
collection_name: str = Field(default="documents")
|
||||
batch_size: Annotated[int, Field(gt=0, le=1000)] = 100
|
||||
collection_name: str = Field(default_factory=_default_collection_name)
|
||||
batch_size: Annotated[int, Field(gt=0, le=1000)] = Field(default_factory=_default_batch_size)
|
||||
|
||||
|
||||
class FirecrawlConfig(Block):
|
||||
"""Configuration for Firecrawl ingestion (operational parameters only)."""
|
||||
|
||||
_block_type_name: ClassVar[str] = "Firecrawl Configuration"
|
||||
_block_type_slug: ClassVar[str] = "firecrawl-config"
|
||||
_description: ClassVar[str] = "Configures Firecrawl web scraping and crawling parameters"
|
||||
_block_type_name: ClassVar[str | None] = "Firecrawl Configuration"
|
||||
_block_type_slug: ClassVar[str | None] = "firecrawl-config"
|
||||
_description: ClassVar[str | None] = "Configures Firecrawl web scraping and crawling parameters"
|
||||
|
||||
formats: list[str] = Field(default_factory=lambda: ["markdown", "html"])
|
||||
max_depth: Annotated[int, Field(ge=1, le=20)] = 5
|
||||
limit: Annotated[int, Field(ge=1, le=1000)] = 100
|
||||
max_depth: Annotated[int, Field(ge=1, le=20)] = Field(default_factory=_default_max_crawl_depth)
|
||||
limit: Annotated[int, Field(ge=1, le=1000)] = Field(default_factory=_default_max_crawl_pages)
|
||||
only_main_content: bool = Field(default=True)
|
||||
include_subdomains: bool = Field(default=False)
|
||||
|
||||
@@ -76,9 +110,9 @@ class FirecrawlConfig(Block):
|
||||
class RepomixConfig(Block):
|
||||
"""Configuration for Repomix ingestion."""
|
||||
|
||||
_block_type_name: ClassVar[str] = "Repomix Configuration"
|
||||
_block_type_slug: ClassVar[str] = "repomix-config"
|
||||
_description: ClassVar[str] = "Configures repository ingestion patterns and file processing settings"
|
||||
_block_type_name: ClassVar[str | None] = "Repomix Configuration"
|
||||
_block_type_slug: ClassVar[str | None] = "repomix-config"
|
||||
_description: ClassVar[str | None] = "Configures repository ingestion patterns and file processing settings"
|
||||
|
||||
include_patterns: list[str] = Field(
|
||||
default_factory=lambda: ["*.py", "*.js", "*.ts", "*.md", "*.yaml", "*.json"]
|
||||
@@ -86,16 +120,16 @@ class RepomixConfig(Block):
|
||||
exclude_patterns: list[str] = Field(
|
||||
default_factory=lambda: ["**/node_modules/**", "**/__pycache__/**", "**/.git/**"]
|
||||
)
|
||||
max_file_size: int = Field(default=1_000_000) # 1MB
|
||||
max_file_size: int = Field(default_factory=_default_max_file_size) # 1MB
|
||||
respect_gitignore: bool = Field(default=True)
|
||||
|
||||
|
||||
class R2RConfig(Block):
|
||||
"""Configuration for R2R ingestion."""
|
||||
|
||||
_block_type_name: ClassVar[str] = "R2R Configuration"
|
||||
_block_type_slug: ClassVar[str] = "r2r-config"
|
||||
_description: ClassVar[str] = "Configures R2R-specific ingestion settings including chunking and graph enrichment"
|
||||
_block_type_name: ClassVar[str | None] = "R2R Configuration"
|
||||
_block_type_slug: ClassVar[str | None] = "r2r-config"
|
||||
_description: ClassVar[str | None] = "Configures R2R-specific ingestion settings including chunking and graph enrichment"
|
||||
|
||||
chunk_size: Annotated[int, Field(ge=100, le=8192)] = 1000
|
||||
chunk_overlap: Annotated[int, Field(ge=0, le=1000)] = 200
|
||||
@@ -168,7 +202,7 @@ class Document(BaseModel):
|
||||
vector: list[float] | None = Field(default=None)
|
||||
score: float | None = Field(default=None)
|
||||
source: IngestionSource
|
||||
collection: str = Field(default="documents")
|
||||
collection: str = Field(default_factory=_default_collection_name)
|
||||
|
||||
|
||||
class IngestionJob(BaseModel):
|
||||
|
||||
Binary file not shown.
Binary file not shown.
@@ -3,8 +3,8 @@
|
||||
from datetime import timedelta
|
||||
from typing import Literal, Protocol, cast
|
||||
|
||||
from prefect import serve
|
||||
from prefect.deployments.runner import RunnerDeployment
|
||||
from prefect.flows import serve as prefect_serve
|
||||
from prefect.schedules import Cron, Interval
|
||||
from prefect.variables import Variable
|
||||
|
||||
@@ -82,7 +82,7 @@ def create_scheduled_deployment(
|
||||
tags = [source_enum.value, backend_enum.value]
|
||||
|
||||
# Create deployment parameters with block support
|
||||
parameters = {
|
||||
parameters: dict[str, str | bool] = {
|
||||
"source_url": source_url,
|
||||
"source_type": source_enum.value,
|
||||
"storage_backend": backend_enum.value,
|
||||
@@ -97,8 +97,8 @@ def create_scheduled_deployment(
|
||||
|
||||
# Create deployment
|
||||
# The flow decorator adds the to_deployment method at runtime
|
||||
to_deployment = create_ingestion_flow.to_deployment
|
||||
deployment = to_deployment(
|
||||
flow_with_deployment = cast(FlowWithDeployment, create_ingestion_flow)
|
||||
return flow_with_deployment.to_deployment(
|
||||
name=name,
|
||||
schedule=schedule,
|
||||
parameters=parameters,
|
||||
@@ -106,8 +106,6 @@ def create_scheduled_deployment(
|
||||
description=f"Scheduled ingestion from {source_url}",
|
||||
)
|
||||
|
||||
return cast("RunnerDeployment", deployment)
|
||||
|
||||
|
||||
def serve_deployments(deployments: list[RunnerDeployment]) -> None:
|
||||
"""
|
||||
@@ -116,4 +114,4 @@ def serve_deployments(deployments: list[RunnerDeployment]) -> None:
|
||||
Args:
|
||||
deployments: List of deployment configurations
|
||||
"""
|
||||
serve(*deployments, limit=10)
|
||||
prefect_serve(*deployments, limit=10)
|
||||
|
||||
Binary file not shown.
@@ -6,7 +6,7 @@ import re
|
||||
from collections.abc import AsyncGenerator, Awaitable, Callable
|
||||
from dataclasses import dataclass
|
||||
from datetime import UTC, datetime
|
||||
from typing import TYPE_CHECKING
|
||||
from typing import TYPE_CHECKING, Protocol, cast
|
||||
from urllib.parse import urlparse
|
||||
from uuid import NAMESPACE_URL, UUID, uuid5
|
||||
|
||||
@@ -28,9 +28,70 @@ if TYPE_CHECKING:
|
||||
from ..storage.base import BaseStorage
|
||||
|
||||
|
||||
class FirecrawlMetadata(Protocol):
|
||||
"""Protocol for Firecrawl metadata objects."""
|
||||
|
||||
title: str | None
|
||||
description: str | None
|
||||
author: str | None
|
||||
language: str | None
|
||||
sitemap_last_modified: str | None
|
||||
sourceURL: str | None
|
||||
keywords: str | list[str] | None
|
||||
robots: str | None
|
||||
ogTitle: str | None
|
||||
ogDescription: str | None
|
||||
ogUrl: str | None
|
||||
ogImage: str | None
|
||||
twitterCard: str | None
|
||||
twitterSite: str | None
|
||||
twitterCreator: str | None
|
||||
favicon: str | None
|
||||
statusCode: int | None
|
||||
|
||||
|
||||
class FirecrawlResult(Protocol):
|
||||
"""Protocol for Firecrawl scrape result objects."""
|
||||
|
||||
metadata: FirecrawlMetadata | None
|
||||
markdown: str | None
|
||||
|
||||
|
||||
class FirecrawlMapLink(Protocol):
|
||||
"""Protocol for Firecrawl map link objects."""
|
||||
|
||||
url: str
|
||||
|
||||
|
||||
class FirecrawlMapResult(Protocol):
|
||||
"""Protocol for Firecrawl map result objects."""
|
||||
|
||||
links: list[FirecrawlMapLink] | None
|
||||
|
||||
|
||||
class AsyncFirecrawlSession(Protocol):
|
||||
"""Protocol for AsyncFirecrawl session objects."""
|
||||
|
||||
async def close(self) -> None: ...
|
||||
|
||||
|
||||
class AsyncFirecrawlClient(Protocol):
|
||||
"""Protocol for AsyncFirecrawl client objects."""
|
||||
|
||||
_session: AsyncFirecrawlSession | None
|
||||
|
||||
async def close(self) -> None: ...
|
||||
|
||||
async def scrape(self, url: str, formats: list[str]) -> FirecrawlResult: ...
|
||||
|
||||
async def map(self, url: str, limit: int | None = None) -> "FirecrawlMapResult": ...
|
||||
|
||||
|
||||
class FirecrawlError(IngestionError):
|
||||
"""Base exception for Firecrawl-related errors."""
|
||||
|
||||
status_code: int | None
|
||||
|
||||
def __init__(self, message: str, status_code: int | None = None) -> None:
|
||||
super().__init__(message)
|
||||
self.status_code = status_code
|
||||
@@ -64,7 +125,7 @@ async def retry_with_backoff(
|
||||
except Exception as e:
|
||||
if attempt == max_retries - 1:
|
||||
raise e
|
||||
delay = 1.0 * (2**attempt)
|
||||
delay: float = 1.0 * (2**attempt)
|
||||
logging.warning(
|
||||
f"Firecrawl operation failed (attempt {attempt + 1}/{max_retries}): {e}. Retrying in {delay:.1f}s..."
|
||||
)
|
||||
@@ -104,7 +165,7 @@ class FirecrawlIngestor(BaseIngestor):
|
||||
"""Ingestor for web and documentation sites using Firecrawl."""
|
||||
|
||||
config: FirecrawlConfig
|
||||
client: AsyncFirecrawl
|
||||
client: AsyncFirecrawlClient
|
||||
|
||||
def __init__(self, config: FirecrawlConfig | None = None):
|
||||
"""
|
||||
@@ -130,15 +191,15 @@ class FirecrawlIngestor(BaseIngestor):
|
||||
"http://localhost"
|
||||
):
|
||||
# Self-hosted instance - try with api_url if supported
|
||||
self.client = AsyncFirecrawl(
|
||||
self.client = cast(AsyncFirecrawlClient, AsyncFirecrawl(
|
||||
api_key=api_key, api_url=str(settings.firecrawl_endpoint)
|
||||
)
|
||||
))
|
||||
else:
|
||||
# Cloud instance - use standard initialization
|
||||
self.client = AsyncFirecrawl(api_key=api_key)
|
||||
self.client = cast(AsyncFirecrawlClient, AsyncFirecrawl(api_key=api_key))
|
||||
except Exception:
|
||||
# Fallback to standard initialization
|
||||
self.client = AsyncFirecrawl(api_key=api_key)
|
||||
self.client = cast(AsyncFirecrawlClient, AsyncFirecrawl(api_key=api_key))
|
||||
|
||||
@override
|
||||
async def ingest(self, job: IngestionJob) -> AsyncGenerator[Document, None]:
|
||||
@@ -277,11 +338,11 @@ class FirecrawlIngestor(BaseIngestor):
|
||||
"""
|
||||
try:
|
||||
# Use SDK v2 map endpoint following official pattern
|
||||
result = await self.client.map(url=url, limit=self.config.limit)
|
||||
result: FirecrawlMapResult = await self.client.map(url=url, limit=self.config.limit)
|
||||
|
||||
if result and getattr(result, "links", None):
|
||||
if result and result.links:
|
||||
# Extract URLs from the result following official pattern
|
||||
return [getattr(link, "url", str(link)) for link in result.links]
|
||||
return [link.url for link in result.links]
|
||||
return []
|
||||
except Exception as e:
|
||||
# If map fails (might not be available in all versions), fall back to single URL
|
||||
@@ -324,43 +385,43 @@ class FirecrawlIngestor(BaseIngestor):
|
||||
try:
|
||||
# Use SDK v2 scrape endpoint following official pattern with retry
|
||||
async def scrape_operation() -> FirecrawlPage | None:
|
||||
result = await self.client.scrape(url, formats=self.config.formats)
|
||||
result: FirecrawlResult = await self.client.scrape(url, formats=self.config.formats)
|
||||
|
||||
# Extract data from the result following official response handling
|
||||
if result:
|
||||
# The SDK returns a ScrapeData object with typed metadata
|
||||
metadata = getattr(result, "metadata", None)
|
||||
metadata: FirecrawlMetadata | None = getattr(result, "metadata", None)
|
||||
|
||||
# Extract basic metadata
|
||||
title = getattr(metadata, "title", None) if metadata else None
|
||||
description = getattr(metadata, "description", None) if metadata else None
|
||||
title: str | None = getattr(metadata, "title", None) if metadata else None
|
||||
description: str | None = getattr(metadata, "description", None) if metadata else None
|
||||
|
||||
# Extract enhanced metadata if available
|
||||
author = getattr(metadata, "author", None) if metadata else None
|
||||
language = getattr(metadata, "language", None) if metadata else None
|
||||
sitemap_last_modified = (
|
||||
author: str | None = getattr(metadata, "author", None) if metadata else None
|
||||
language: str | None = getattr(metadata, "language", None) if metadata else None
|
||||
sitemap_last_modified: str | None = (
|
||||
getattr(metadata, "sitemap_last_modified", None) if metadata else None
|
||||
)
|
||||
source_url = getattr(metadata, "sourceURL", None) if metadata else None
|
||||
keywords = getattr(metadata, "keywords", None) if metadata else None
|
||||
robots = getattr(metadata, "robots", None) if metadata else None
|
||||
source_url: str | None = getattr(metadata, "sourceURL", None) if metadata else None
|
||||
keywords: str | list[str] | None = getattr(metadata, "keywords", None) if metadata else None
|
||||
robots: str | None = getattr(metadata, "robots", None) if metadata else None
|
||||
|
||||
# Open Graph metadata
|
||||
og_title = getattr(metadata, "ogTitle", None) if metadata else None
|
||||
og_description = getattr(metadata, "ogDescription", None) if metadata else None
|
||||
og_url = getattr(metadata, "ogUrl", None) if metadata else None
|
||||
og_image = getattr(metadata, "ogImage", None) if metadata else None
|
||||
og_title: str | None = getattr(metadata, "ogTitle", None) if metadata else None
|
||||
og_description: str | None = getattr(metadata, "ogDescription", None) if metadata else None
|
||||
og_url: str | None = getattr(metadata, "ogUrl", None) if metadata else None
|
||||
og_image: str | None = getattr(metadata, "ogImage", None) if metadata else None
|
||||
|
||||
# Twitter metadata
|
||||
twitter_card = getattr(metadata, "twitterCard", None) if metadata else None
|
||||
twitter_site = getattr(metadata, "twitterSite", None) if metadata else None
|
||||
twitter_creator = (
|
||||
twitter_card: str | None = getattr(metadata, "twitterCard", None) if metadata else None
|
||||
twitter_site: str | None = getattr(metadata, "twitterSite", None) if metadata else None
|
||||
twitter_creator: str | None = (
|
||||
getattr(metadata, "twitterCreator", None) if metadata else None
|
||||
)
|
||||
|
||||
# Additional metadata
|
||||
favicon = getattr(metadata, "favicon", None) if metadata else None
|
||||
status_code = getattr(metadata, "statusCode", None) if metadata else None
|
||||
favicon: str | None = getattr(metadata, "favicon", None) if metadata else None
|
||||
status_code: int | None = getattr(metadata, "statusCode", None) if metadata else None
|
||||
|
||||
return FirecrawlPage(
|
||||
url=url,
|
||||
@@ -373,7 +434,7 @@ class FirecrawlIngestor(BaseIngestor):
|
||||
source_url=source_url,
|
||||
keywords=keywords.split(",")
|
||||
if keywords and isinstance(keywords, str)
|
||||
else keywords,
|
||||
else (keywords if isinstance(keywords, list) else None),
|
||||
robots=robots,
|
||||
og_title=og_title,
|
||||
og_description=og_description,
|
||||
@@ -399,11 +460,11 @@ class FirecrawlIngestor(BaseIngestor):
|
||||
return uuid5(NAMESPACE_URL, source_url)
|
||||
|
||||
@staticmethod
|
||||
def _analyze_content_structure(content: str) -> dict[str, object]:
|
||||
def _analyze_content_structure(content: str) -> dict[str, str | int | bool | list[str]]:
|
||||
"""Analyze markdown content to extract structural information."""
|
||||
# Extract heading hierarchy
|
||||
heading_pattern = r"^(#{1,6})\s+(.+)$"
|
||||
headings = []
|
||||
headings: list[str] = []
|
||||
for match in re.finditer(heading_pattern, content, re.MULTILINE):
|
||||
level = len(match.group(1))
|
||||
text = match.group(2).strip()
|
||||
@@ -418,7 +479,8 @@ class FirecrawlIngestor(BaseIngestor):
|
||||
max_depth = 0
|
||||
if headings:
|
||||
for heading in headings:
|
||||
depth = (len(heading) - len(heading.lstrip())) // 2 + 1
|
||||
heading_str: str = str(heading)
|
||||
depth = (len(heading_str) - len(heading_str.lstrip())) // 2 + 1
|
||||
max_depth = max(max_depth, depth)
|
||||
|
||||
return {
|
||||
@@ -570,7 +632,7 @@ class FirecrawlIngestor(BaseIngestor):
|
||||
await self.client.close()
|
||||
except Exception as e:
|
||||
logging.debug(f"Error closing Firecrawl client: {e}")
|
||||
elif hasattr(self.client, "_session") and hasattr(self.client._session, "close"):
|
||||
elif hasattr(self.client, "_session") and self.client._session and hasattr(self.client._session, "close"):
|
||||
try:
|
||||
await self.client._session.close()
|
||||
except Exception as e:
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,9 +1,136 @@
|
||||
"""Base storage interface."""
|
||||
|
||||
import logging
|
||||
from abc import ABC, abstractmethod
|
||||
from collections.abc import AsyncGenerator
|
||||
from typing import Final
|
||||
from types import TracebackType
|
||||
|
||||
import httpx
|
||||
from pydantic import SecretStr
|
||||
|
||||
from ..core.exceptions import StorageError
|
||||
from ..core.models import Document, StorageConfig
|
||||
from .types import CollectionSummary, DocumentInfo
|
||||
|
||||
LOGGER: Final[logging.Logger] = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TypedHttpClient:
|
||||
"""
|
||||
A properly typed HTTP client wrapper for HTTPX.
|
||||
|
||||
Provides consistent exception handling and type annotations
|
||||
for storage adapters that use HTTP APIs.
|
||||
|
||||
Note: Some type checkers (Pylance) may report warnings about HTTPX types
|
||||
due to library compatibility issues. The code functions correctly at runtime.
|
||||
"""
|
||||
|
||||
client: httpx.AsyncClient
|
||||
_base_url: str
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
base_url: str,
|
||||
*,
|
||||
api_key: SecretStr | None = None,
|
||||
timeout: float = 30.0,
|
||||
headers: dict[str, str] | None = None,
|
||||
):
|
||||
"""
|
||||
Initialize the typed HTTP client.
|
||||
|
||||
Args:
|
||||
base_url: Base URL for all requests
|
||||
api_key: Optional API key for authentication
|
||||
timeout: Request timeout in seconds
|
||||
headers: Additional headers to include with requests
|
||||
"""
|
||||
self._base_url = base_url
|
||||
|
||||
# Build headers with optional authentication
|
||||
client_headers: dict[str, str] = headers or {}
|
||||
if api_key:
|
||||
client_headers["Authorization"] = f"Bearer {api_key.get_secret_value()}"
|
||||
|
||||
# Note: Pylance incorrectly reports "No parameter named 'base_url'"
|
||||
# but base_url is a valid AsyncClient parameter (see HTTPX docs)
|
||||
client_kwargs: dict[str, str | dict[str, str] | float] = {
|
||||
"base_url": base_url,
|
||||
"headers": client_headers,
|
||||
"timeout": timeout,
|
||||
}
|
||||
self.client = httpx.AsyncClient(**client_kwargs) # type: ignore
|
||||
|
||||
async def request(
|
||||
self,
|
||||
method: str,
|
||||
path: str,
|
||||
*,
|
||||
allow_404: bool = False,
|
||||
json: dict[str, object] | None = None,
|
||||
data: dict[str, object] | None = None,
|
||||
files: dict[str, tuple[str, bytes, str]] | None = None,
|
||||
params: dict[str, str | bool] | None = None,
|
||||
) -> httpx.Response | None:
|
||||
"""
|
||||
Perform an HTTP request with consistent error handling.
|
||||
|
||||
Args:
|
||||
method: HTTP method (GET, POST, DELETE, etc.)
|
||||
path: URL path relative to base_url
|
||||
allow_404: If True, return None for 404 responses instead of raising
|
||||
**kwargs: Arguments passed to httpx request
|
||||
|
||||
Returns:
|
||||
HTTP response object, or None if allow_404=True and status is 404
|
||||
|
||||
Raises:
|
||||
StorageError: If request fails
|
||||
"""
|
||||
try:
|
||||
response = await self.client.request( # type: ignore
|
||||
method, path, json=json, data=data, files=files, params=params
|
||||
)
|
||||
response.raise_for_status() # type: ignore
|
||||
return response # type: ignore
|
||||
except Exception as e:
|
||||
# Handle 404 as special case if requested
|
||||
if allow_404 and hasattr(e, 'response') and getattr(e.response, 'status_code', None) == 404: # type: ignore
|
||||
LOGGER.debug("Resource not found (404): %s %s", method, path)
|
||||
return None
|
||||
|
||||
# Convert all HTTP-related exceptions to StorageError
|
||||
error_name = e.__class__.__name__
|
||||
if 'HTTP' in error_name or 'Connect' in error_name or 'Request' in error_name:
|
||||
if hasattr(e, 'response') and hasattr(e.response, 'status_code'): # type: ignore
|
||||
status_code = getattr(e.response, 'status_code', 'unknown') # type: ignore
|
||||
raise StorageError(f"HTTP {status_code} error from {self._base_url}: {e}") from e
|
||||
else:
|
||||
raise StorageError(f"Request failed to {self._base_url}: {e}") from e
|
||||
# Re-raise non-HTTP exceptions
|
||||
raise
|
||||
|
||||
async def close(self) -> None:
|
||||
"""Close the HTTP client and cleanup resources."""
|
||||
try:
|
||||
await self.client.aclose()
|
||||
except Exception as e:
|
||||
LOGGER.warning("Error closing HTTP client: %s", e)
|
||||
|
||||
async def __aenter__(self) -> "TypedHttpClient":
|
||||
"""Async context manager entry."""
|
||||
return self
|
||||
|
||||
async def __aexit__(
|
||||
self,
|
||||
exc_type: type[BaseException] | None,
|
||||
exc_val: BaseException | None,
|
||||
exc_tb: TracebackType | None
|
||||
) -> None:
|
||||
"""Async context manager exit."""
|
||||
await self.close()
|
||||
|
||||
|
||||
class BaseStorage(ABC):
|
||||
@@ -164,12 +291,12 @@ class BaseStorage(ABC):
|
||||
"""
|
||||
return []
|
||||
|
||||
async def describe_collections(self) -> list[dict[str, object]]:
|
||||
async def describe_collections(self) -> list[CollectionSummary]:
|
||||
"""
|
||||
Describe available collections with metadata (if supported by backend).
|
||||
|
||||
Returns:
|
||||
List of collection metadata dictionaries, empty list if not supported
|
||||
List of collection metadata, empty list if not supported
|
||||
"""
|
||||
return []
|
||||
|
||||
@@ -206,7 +333,7 @@ class BaseStorage(ABC):
|
||||
offset: int = 0,
|
||||
*,
|
||||
collection_name: str | None = None,
|
||||
) -> list[dict[str, object]]:
|
||||
) -> list[DocumentInfo]:
|
||||
"""
|
||||
List documents in the storage backend (if supported).
|
||||
|
||||
@@ -216,7 +343,7 @@ class BaseStorage(ABC):
|
||||
collection_name: Collection to list documents from
|
||||
|
||||
Returns:
|
||||
List of document dictionaries with metadata
|
||||
List of document information with metadata
|
||||
|
||||
Raises:
|
||||
NotImplementedError: If backend doesn't support document listing
|
||||
|
||||
@@ -1,33 +1,49 @@
|
||||
"""Open WebUI storage adapter."""
|
||||
|
||||
|
||||
import asyncio
|
||||
import contextlib
|
||||
import logging
|
||||
from typing import TYPE_CHECKING, Final, TypedDict, cast
|
||||
from typing import Final, TypedDict, cast
|
||||
|
||||
import httpx
|
||||
from typing_extensions import override
|
||||
|
||||
if TYPE_CHECKING:
|
||||
# Type checking imports - these will be ignored at runtime
|
||||
from httpx import AsyncClient, ConnectError, HTTPStatusError, RequestError
|
||||
else:
|
||||
# Runtime imports that work properly
|
||||
AsyncClient = httpx.AsyncClient
|
||||
ConnectError = httpx.ConnectError
|
||||
HTTPStatusError = httpx.HTTPStatusError
|
||||
RequestError = httpx.RequestError
|
||||
|
||||
from ..core.exceptions import StorageError
|
||||
from ..core.models import Document, StorageConfig
|
||||
from .base import BaseStorage
|
||||
from .base import BaseStorage, TypedHttpClient
|
||||
from .types import CollectionSummary, DocumentInfo
|
||||
|
||||
LOGGER: Final[logging.Logger] = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class OpenWebUIFileResponse(TypedDict, total=False):
|
||||
"""OpenWebUI API file response structure."""
|
||||
id: str
|
||||
filename: str
|
||||
name: str
|
||||
content_type: str
|
||||
size: int
|
||||
created_at: str
|
||||
meta: dict[str, str | int]
|
||||
|
||||
|
||||
class OpenWebUIKnowledgeBase(TypedDict, total=False):
|
||||
"""OpenWebUI knowledge base response structure."""
|
||||
id: str
|
||||
name: str
|
||||
description: str
|
||||
files: list[OpenWebUIFileResponse]
|
||||
data: dict[str, str]
|
||||
created_at: str
|
||||
updated_at: str
|
||||
|
||||
|
||||
|
||||
|
||||
class OpenWebUIStorage(BaseStorage):
|
||||
"""Storage adapter for Open WebUI knowledge endpoints."""
|
||||
|
||||
client: AsyncClient
|
||||
http_client: TypedHttpClient
|
||||
_knowledge_cache: dict[str, str]
|
||||
|
||||
def __init__(self, config: StorageConfig):
|
||||
@@ -39,13 +55,9 @@ class OpenWebUIStorage(BaseStorage):
|
||||
"""
|
||||
super().__init__(config)
|
||||
|
||||
headers: dict[str, str] = {}
|
||||
if config.api_key:
|
||||
headers["Authorization"] = f"Bearer {config.api_key}"
|
||||
|
||||
self.client = AsyncClient(
|
||||
self.http_client = TypedHttpClient(
|
||||
base_url=str(config.endpoint),
|
||||
headers=headers,
|
||||
api_key=config.api_key,
|
||||
timeout=30.0,
|
||||
)
|
||||
self._knowledge_cache = {}
|
||||
@@ -59,60 +71,45 @@ class OpenWebUIStorage(BaseStorage):
|
||||
self.config.collection_name,
|
||||
create=True,
|
||||
)
|
||||
|
||||
except ConnectError as e:
|
||||
raise StorageError(f"Connection to OpenWebUI failed: {e}") from e
|
||||
except HTTPStatusError as e:
|
||||
raise StorageError(f"OpenWebUI returned error {e.response.status_code}: {e}") from e
|
||||
except RequestError as e:
|
||||
raise StorageError(f"Request to OpenWebUI failed: {e}") from e
|
||||
except Exception as e:
|
||||
raise StorageError(f"Failed to initialize Open WebUI: {e}") from e
|
||||
|
||||
async def _create_collection(self, name: str) -> str:
|
||||
"""Create knowledge base in Open WebUI."""
|
||||
try:
|
||||
response = await self.client.post(
|
||||
"/api/v1/knowledge/create",
|
||||
json={
|
||||
"name": name,
|
||||
"description": "Documents ingested from various sources",
|
||||
"data": {},
|
||||
"access_control": None,
|
||||
},
|
||||
)
|
||||
response.raise_for_status()
|
||||
result = response.json()
|
||||
knowledge_id = result.get("id")
|
||||
response = await self.http_client.request(
|
||||
"POST",
|
||||
"/api/v1/knowledge/create",
|
||||
json={
|
||||
"name": name,
|
||||
"description": "Documents ingested from various sources",
|
||||
"data": {},
|
||||
"access_control": None,
|
||||
},
|
||||
)
|
||||
if response is None:
|
||||
raise StorageError("Unexpected None response from knowledge base creation")
|
||||
result = response.json()
|
||||
knowledge_id = result.get("id")
|
||||
|
||||
if not knowledge_id or not isinstance(knowledge_id, str):
|
||||
raise StorageError("Knowledge base creation failed: no ID returned")
|
||||
if not knowledge_id or not isinstance(knowledge_id, str):
|
||||
raise StorageError("Knowledge base creation failed: no ID returned")
|
||||
|
||||
return str(knowledge_id)
|
||||
return str(knowledge_id)
|
||||
|
||||
except ConnectError as e:
|
||||
raise StorageError(f"Connection to OpenWebUI failed during creation: {e}") from e
|
||||
except HTTPStatusError as e:
|
||||
raise StorageError(
|
||||
f"OpenWebUI returned error {e.response.status_code} during creation: {e}"
|
||||
) from e
|
||||
except RequestError as e:
|
||||
raise StorageError(f"Request to OpenWebUI failed during creation: {e}") from e
|
||||
except Exception as e:
|
||||
raise StorageError(f"Failed to create knowledge base: {e}") from e
|
||||
|
||||
async def _fetch_knowledge_bases(self) -> list[dict[str, object]]:
|
||||
async def _fetch_knowledge_bases(self) -> list[OpenWebUIKnowledgeBase]:
|
||||
"""Return the list of knowledge bases from the API."""
|
||||
response = await self.client.get("/api/v1/knowledge/list")
|
||||
response.raise_for_status()
|
||||
response = await self.http_client.request("GET", "/api/v1/knowledge/list")
|
||||
if response is None:
|
||||
return []
|
||||
data = response.json()
|
||||
if not isinstance(data, list):
|
||||
return []
|
||||
normalized: list[dict[str, object]] = []
|
||||
normalized: list[OpenWebUIKnowledgeBase] = []
|
||||
for item in data:
|
||||
if isinstance(item, dict):
|
||||
item_dict: dict[str, object] = item
|
||||
normalized.append({str(k): v for k, v in item_dict.items()})
|
||||
# Cast to our expected structure
|
||||
kb_item = cast(OpenWebUIKnowledgeBase, item)
|
||||
normalized.append(kb_item)
|
||||
return normalized
|
||||
|
||||
async def _get_knowledge_id(
|
||||
@@ -171,12 +168,14 @@ class OpenWebUIStorage(BaseStorage):
|
||||
if not filename.endswith(('.txt', '.md', '.pdf', '.doc', '.docx')):
|
||||
filename = f"{filename}.txt"
|
||||
files = {"file": (filename, document.content.encode(), "text/plain")}
|
||||
response = await self.client.post(
|
||||
response = await self.http_client.request(
|
||||
"POST",
|
||||
"/api/v1/files/",
|
||||
files=files,
|
||||
params={"process": True, "process_in_background": False},
|
||||
)
|
||||
response.raise_for_status()
|
||||
if response is None:
|
||||
raise StorageError("Unexpected None response from file upload")
|
||||
|
||||
file_data = response.json()
|
||||
file_id = file_data.get("id")
|
||||
@@ -185,19 +184,14 @@ class OpenWebUIStorage(BaseStorage):
|
||||
raise StorageError("File upload failed: no file ID returned")
|
||||
|
||||
# Step 2: Add file to knowledge base
|
||||
response = await self.client.post(
|
||||
f"/api/v1/knowledge/{knowledge_id}/file/add", json={"file_id": file_id}
|
||||
response = await self.http_client.request(
|
||||
"POST",
|
||||
f"/api/v1/knowledge/{knowledge_id}/file/add",
|
||||
json={"file_id": file_id}
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
|
||||
return str(file_id)
|
||||
|
||||
except ConnectError as e:
|
||||
raise StorageError(f"Connection to OpenWebUI failed: {e}") from e
|
||||
except HTTPStatusError as e:
|
||||
raise StorageError(f"OpenWebUI returned error {e.response.status_code}: {e}") from e
|
||||
except RequestError as e:
|
||||
raise StorageError(f"Request to OpenWebUI failed: {e}") from e
|
||||
except Exception as e:
|
||||
raise StorageError(f"Failed to store document: {e}") from e
|
||||
|
||||
@@ -229,12 +223,14 @@ class OpenWebUIStorage(BaseStorage):
|
||||
if not filename.endswith(('.txt', '.md', '.pdf', '.doc', '.docx')):
|
||||
filename = f"{filename}.txt"
|
||||
files = {"file": (filename, doc.content.encode(), "text/plain")}
|
||||
upload_response = await self.client.post(
|
||||
upload_response = await self.http_client.request(
|
||||
"POST",
|
||||
"/api/v1/files/",
|
||||
files=files,
|
||||
params={"process": True, "process_in_background": False},
|
||||
)
|
||||
upload_response.raise_for_status()
|
||||
if upload_response is None:
|
||||
raise StorageError(f"Unexpected None response from file upload for document {doc.id}")
|
||||
|
||||
file_data = upload_response.json()
|
||||
file_id = file_data.get("id")
|
||||
@@ -244,10 +240,11 @@ class OpenWebUIStorage(BaseStorage):
|
||||
f"File upload failed for document {doc.id}: no file ID returned"
|
||||
)
|
||||
|
||||
attach_response = await self.client.post(
|
||||
f"/api/v1/knowledge/{knowledge_id}/file/add", json={"file_id": file_id}
|
||||
await self.http_client.request(
|
||||
"POST",
|
||||
f"/api/v1/knowledge/{knowledge_id}/file/add",
|
||||
json={"file_id": file_id}
|
||||
)
|
||||
attach_response.raise_for_status()
|
||||
|
||||
return str(file_id)
|
||||
|
||||
@@ -273,14 +270,6 @@ class OpenWebUIStorage(BaseStorage):
|
||||
|
||||
return file_ids
|
||||
|
||||
except ConnectError as e:
|
||||
raise StorageError(f"Connection to OpenWebUI failed during batch: {e}") from e
|
||||
except HTTPStatusError as e:
|
||||
raise StorageError(
|
||||
f"OpenWebUI returned error {e.response.status_code} during batch: {e}"
|
||||
) from e
|
||||
except RequestError as e:
|
||||
raise StorageError(f"Request to OpenWebUI failed during batch: {e}") from e
|
||||
except Exception as e:
|
||||
raise StorageError(f"Failed to store batch: {e}") from e
|
||||
|
||||
@@ -298,6 +287,7 @@ class OpenWebUIStorage(BaseStorage):
|
||||
Returns:
|
||||
Always None - retrieval not supported
|
||||
"""
|
||||
_ = document_id, collection_name # Mark as used
|
||||
# OpenWebUI uses file-based storage without direct document retrieval
|
||||
# This will cause the base check_exists method to return False,
|
||||
# which means documents will always be re-scraped for OpenWebUI
|
||||
@@ -323,35 +313,20 @@ class OpenWebUIStorage(BaseStorage):
|
||||
return False
|
||||
|
||||
# Remove file from knowledge base
|
||||
response = await self.client.post(
|
||||
f"/api/v1/knowledge/{knowledge_id}/file/remove", json={"file_id": document_id}
|
||||
await self.http_client.request(
|
||||
"POST",
|
||||
f"/api/v1/knowledge/{knowledge_id}/file/remove",
|
||||
json={"file_id": document_id}
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
delete_response = await self.client.delete(f"/api/v1/files/{document_id}")
|
||||
if delete_response.status_code == 404:
|
||||
return True
|
||||
delete_response.raise_for_status()
|
||||
await self.http_client.request(
|
||||
"DELETE",
|
||||
f"/api/v1/files/{document_id}",
|
||||
allow_404=True
|
||||
)
|
||||
return True
|
||||
|
||||
except ConnectError as exc:
|
||||
LOGGER.error(
|
||||
"Failed to reach OpenWebUI when deleting file %s", document_id, exc_info=exc
|
||||
)
|
||||
return False
|
||||
except HTTPStatusError as exc:
|
||||
LOGGER.error(
|
||||
"OpenWebUI returned status error %s when deleting file %s",
|
||||
exc.response.status_code if exc.response else "unknown",
|
||||
document_id,
|
||||
exc_info=exc,
|
||||
)
|
||||
return False
|
||||
except RequestError as exc:
|
||||
LOGGER.error("Request error deleting file %s from OpenWebUI", document_id, exc_info=exc)
|
||||
return False
|
||||
except Exception as exc:
|
||||
LOGGER.error("Unexpected error deleting file %s", document_id, exc_info=exc)
|
||||
LOGGER.error("Error deleting file %s from OpenWebUI", document_id, exc_info=exc)
|
||||
return False
|
||||
|
||||
async def list_collections(self) -> list[str]:
|
||||
@@ -370,12 +345,6 @@ class OpenWebUIStorage(BaseStorage):
|
||||
for kb in knowledge_bases
|
||||
]
|
||||
|
||||
except ConnectError as e:
|
||||
raise StorageError(f"Connection to OpenWebUI failed: {e}") from e
|
||||
except HTTPStatusError as e:
|
||||
raise StorageError(f"OpenWebUI returned error {e.response.status_code}: {e}") from e
|
||||
except RequestError as e:
|
||||
raise StorageError(f"Request to OpenWebUI failed: {e}") from e
|
||||
except Exception as e:
|
||||
raise StorageError(f"Failed to list knowledge bases: {e}") from e
|
||||
|
||||
@@ -396,8 +365,11 @@ class OpenWebUIStorage(BaseStorage):
|
||||
return True
|
||||
|
||||
# Delete the knowledge base using the OpenWebUI API
|
||||
response = await self.client.delete(f"/api/v1/knowledge/{knowledge_id}/delete")
|
||||
response.raise_for_status()
|
||||
await self.http_client.request(
|
||||
"DELETE",
|
||||
f"/api/v1/knowledge/{knowledge_id}/delete",
|
||||
allow_404=True
|
||||
)
|
||||
|
||||
# Remove from cache if it exists
|
||||
if collection_name in self._knowledge_cache:
|
||||
@@ -406,45 +378,25 @@ class OpenWebUIStorage(BaseStorage):
|
||||
LOGGER.info("Successfully deleted knowledge base: %s", collection_name)
|
||||
return True
|
||||
|
||||
except HTTPStatusError as e:
|
||||
# Handle 404 as success (already deleted)
|
||||
if e.response.status_code == 404:
|
||||
LOGGER.info("Knowledge base %s was already deleted or not found", collection_name)
|
||||
return True
|
||||
LOGGER.error(
|
||||
"OpenWebUI returned error %s when deleting knowledge base %s",
|
||||
e.response.status_code,
|
||||
collection_name,
|
||||
exc_info=e,
|
||||
)
|
||||
return False
|
||||
except ConnectError as e:
|
||||
LOGGER.error(
|
||||
"Failed to reach OpenWebUI when deleting knowledge base %s",
|
||||
collection_name,
|
||||
exc_info=e,
|
||||
)
|
||||
return False
|
||||
except RequestError as e:
|
||||
LOGGER.error(
|
||||
"Request error deleting knowledge base %s from OpenWebUI",
|
||||
collection_name,
|
||||
exc_info=e,
|
||||
)
|
||||
return False
|
||||
except Exception as e:
|
||||
LOGGER.error("Unexpected error deleting knowledge base %s", collection_name, exc_info=e)
|
||||
if hasattr(e, 'response'):
|
||||
response_attr = getattr(e, 'response', None)
|
||||
if response_attr is not None and hasattr(response_attr, 'status_code'):
|
||||
with contextlib.suppress(Exception):
|
||||
status_code = response_attr.status_code # type: ignore[attr-defined]
|
||||
if status_code == 404:
|
||||
LOGGER.info("Knowledge base %s was already deleted or not found", collection_name)
|
||||
return True
|
||||
LOGGER.error(
|
||||
"Error deleting knowledge base %s from OpenWebUI",
|
||||
collection_name,
|
||||
exc_info=e,
|
||||
)
|
||||
return False
|
||||
|
||||
class CollectionSummary(TypedDict):
|
||||
"""Structure describing a knowledge base summary."""
|
||||
|
||||
name: str
|
||||
count: int
|
||||
size_mb: float
|
||||
|
||||
|
||||
async def _get_knowledge_base_count(self, kb: dict[str, object]) -> int:
|
||||
async def _get_knowledge_base_count(self, kb: OpenWebUIKnowledgeBase) -> int:
|
||||
"""Get the file count for a knowledge base."""
|
||||
kb_id = kb.get("id")
|
||||
name = kb.get("name", "Unknown")
|
||||
@@ -454,17 +406,22 @@ class OpenWebUIStorage(BaseStorage):
|
||||
|
||||
return await self._count_files_from_detailed_info(str(kb_id), str(name), kb)
|
||||
|
||||
def _count_files_from_basic_info(self, kb: dict[str, object]) -> int:
|
||||
def _count_files_from_basic_info(self, kb: OpenWebUIKnowledgeBase) -> int:
|
||||
"""Count files from basic knowledge base info."""
|
||||
files = kb.get("files", [])
|
||||
return len(files) if isinstance(files, list) and files is not None else 0
|
||||
|
||||
async def _count_files_from_detailed_info(self, kb_id: str, name: str, kb: dict[str, object]) -> int:
|
||||
async def _count_files_from_detailed_info(self, kb_id: str, name: str, kb: OpenWebUIKnowledgeBase) -> int:
|
||||
"""Count files by fetching detailed knowledge base info."""
|
||||
try:
|
||||
LOGGER.debug(f"Fetching detailed info for KB '{name}' from /api/v1/knowledge/{kb_id}")
|
||||
detail_response = await self.client.get(f"/api/v1/knowledge/{kb_id}")
|
||||
detail_response.raise_for_status()
|
||||
detail_response = await self.http_client.request(
|
||||
"GET",
|
||||
f"/api/v1/knowledge/{kb_id}"
|
||||
)
|
||||
if detail_response is None:
|
||||
LOGGER.warning(f"Knowledge base '{name}' (ID: {kb_id}) not found")
|
||||
return self._count_files_from_basic_info(kb)
|
||||
detailed_kb = detail_response.json()
|
||||
|
||||
files = detailed_kb.get("files", [])
|
||||
@@ -477,21 +434,18 @@ class OpenWebUIStorage(BaseStorage):
|
||||
LOGGER.warning(f"Failed to get detailed info for KB '{name}' (ID: {kb_id}): {e}")
|
||||
return self._count_files_from_basic_info(kb)
|
||||
|
||||
async def describe_collections(self) -> list[dict[str, object]]:
|
||||
async def describe_collections(self) -> list[CollectionSummary]:
|
||||
"""Return metadata about each knowledge base."""
|
||||
try:
|
||||
knowledge_bases = await self._fetch_knowledge_bases()
|
||||
collections: list[dict[str, object]] = []
|
||||
collections: list[CollectionSummary] = []
|
||||
|
||||
for kb in knowledge_bases:
|
||||
if not isinstance(kb, dict):
|
||||
continue
|
||||
|
||||
count = await self._get_knowledge_base_count(kb)
|
||||
name = kb.get("name", "Unknown")
|
||||
size_mb = count * 0.5 # rough heuristic
|
||||
|
||||
summary: dict[str, object] = {
|
||||
summary: CollectionSummary = {
|
||||
"name": str(name),
|
||||
"count": count,
|
||||
"size_mb": float(size_mb),
|
||||
@@ -535,8 +489,13 @@ class OpenWebUIStorage(BaseStorage):
|
||||
return 0
|
||||
|
||||
# Get detailed knowledge base information to get accurate file count
|
||||
detail_response = await self.client.get(f"/api/v1/knowledge/{kb_id}")
|
||||
detail_response.raise_for_status()
|
||||
detail_response = await self.http_client.request(
|
||||
"GET",
|
||||
f"/api/v1/knowledge/{kb_id}"
|
||||
)
|
||||
if detail_response is None:
|
||||
LOGGER.warning(f"Knowledge base '{collection_name}' (ID: {kb_id}) not found")
|
||||
return self._count_files_from_basic_info(kb)
|
||||
detailed_kb = detail_response.json()
|
||||
|
||||
files = detailed_kb.get("files", [])
|
||||
@@ -549,7 +508,7 @@ class OpenWebUIStorage(BaseStorage):
|
||||
LOGGER.warning(f"Failed to get count for collection '{collection_name}': {e}")
|
||||
return 0
|
||||
|
||||
async def get_knowledge_by_name(self, name: str) -> dict[str, object] | None:
|
||||
async def get_knowledge_by_name(self, name: str) -> OpenWebUIKnowledgeBase | None:
|
||||
"""
|
||||
Get knowledge base details by name.
|
||||
|
||||
@@ -560,13 +519,14 @@ class OpenWebUIStorage(BaseStorage):
|
||||
Knowledge base details or None if not found
|
||||
"""
|
||||
try:
|
||||
response = await self.client.get("/api/v1/knowledge/list")
|
||||
response.raise_for_status()
|
||||
response = await self.http_client.request("GET", "/api/v1/knowledge/list")
|
||||
if response is None:
|
||||
return None
|
||||
knowledge_bases = response.json()
|
||||
|
||||
return next(
|
||||
(
|
||||
{str(k): v for k, v in kb.items()}
|
||||
cast(OpenWebUIKnowledgeBase, kb)
|
||||
for kb in knowledge_bases
|
||||
if isinstance(kb, dict) and kb.get("name") == name
|
||||
),
|
||||
@@ -587,6 +547,7 @@ class OpenWebUIStorage(BaseStorage):
|
||||
exc_tb: object | None,
|
||||
) -> None:
|
||||
"""Async context manager exit."""
|
||||
_ = exc_type, exc_val, exc_tb # Mark as used
|
||||
await self.close()
|
||||
|
||||
async def list_documents(
|
||||
@@ -595,7 +556,7 @@ class OpenWebUIStorage(BaseStorage):
|
||||
offset: int = 0,
|
||||
*,
|
||||
collection_name: str | None = None,
|
||||
) -> list[dict[str, object]]:
|
||||
) -> list[DocumentInfo]:
|
||||
"""
|
||||
List documents (files) in a knowledge base.
|
||||
|
||||
@@ -645,11 +606,8 @@ class OpenWebUIStorage(BaseStorage):
|
||||
paginated_files = files[offset : offset + limit]
|
||||
|
||||
# Convert to document format with safe field access
|
||||
documents: list[dict[str, object]] = []
|
||||
documents: list[DocumentInfo] = []
|
||||
for i, file_info in enumerate(paginated_files):
|
||||
if not isinstance(file_info, dict):
|
||||
continue
|
||||
|
||||
# Safely extract fields with fallbacks
|
||||
doc_id = str(file_info.get("id", f"file_{i}"))
|
||||
|
||||
@@ -663,7 +621,9 @@ class OpenWebUIStorage(BaseStorage):
|
||||
filename = file_info["name"]
|
||||
# Check meta.name (from FileModelResponse schema)
|
||||
elif isinstance(file_info.get("meta"), dict):
|
||||
filename = file_info["meta"].get("name")
|
||||
meta = file_info.get("meta")
|
||||
if isinstance(meta, dict):
|
||||
filename = meta.get("name")
|
||||
|
||||
# Final fallback
|
||||
if not filename:
|
||||
@@ -673,28 +633,28 @@ class OpenWebUIStorage(BaseStorage):
|
||||
|
||||
# Extract size from meta if available
|
||||
size = 0
|
||||
if isinstance(file_info.get("meta"), dict):
|
||||
size = file_info["meta"].get("size", 0)
|
||||
meta = file_info.get("meta")
|
||||
if isinstance(meta, dict):
|
||||
size = meta.get("size", 0)
|
||||
else:
|
||||
size = file_info.get("size", 0)
|
||||
|
||||
# Estimate word count from file size (very rough approximation)
|
||||
word_count = max(1, int(size / 6)) if isinstance(size, (int, float)) else 0
|
||||
|
||||
documents.append(
|
||||
{
|
||||
"id": doc_id,
|
||||
"title": filename,
|
||||
"source_url": "", # OpenWebUI files don't typically have source URLs
|
||||
"description": f"File: {filename}",
|
||||
"content_type": str(file_info.get("content_type", "text/plain")),
|
||||
"content_preview": f"File uploaded to OpenWebUI: {filename}",
|
||||
"word_count": word_count,
|
||||
"timestamp": str(
|
||||
file_info.get("created_at") or file_info.get("timestamp", "")
|
||||
),
|
||||
}
|
||||
)
|
||||
doc_info: DocumentInfo = {
|
||||
"id": doc_id,
|
||||
"title": filename,
|
||||
"source_url": "", # OpenWebUI files don't typically have source URLs
|
||||
"description": f"File: {filename}",
|
||||
"content_type": str(file_info.get("content_type", "text/plain")),
|
||||
"content_preview": f"File uploaded to OpenWebUI: {filename}",
|
||||
"word_count": word_count,
|
||||
"timestamp": str(
|
||||
file_info.get("created_at") or file_info.get("timestamp", "")
|
||||
),
|
||||
}
|
||||
documents.append(doc_info)
|
||||
|
||||
return documents
|
||||
|
||||
@@ -721,10 +681,5 @@ class OpenWebUIStorage(BaseStorage):
|
||||
|
||||
async def close(self) -> None:
|
||||
"""Close client connection."""
|
||||
if hasattr(self, "client") and self.client:
|
||||
try:
|
||||
await self.client.aclose()
|
||||
except Exception as e:
|
||||
import logging
|
||||
|
||||
logging.warning(f"Error closing OpenWebUI client: {e}")
|
||||
if hasattr(self, "http_client"):
|
||||
await self.http_client.close()
|
||||
|
||||
Binary file not shown.
@@ -10,15 +10,14 @@ from typing import Self, TypeVar, cast
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
# Direct imports for runtime and type checking
|
||||
# Note: Some type checkers (basedpyright/Pyrefly) may report import issues
|
||||
# but these work correctly at runtime and with mypy
|
||||
from httpx import AsyncClient, HTTPStatusError
|
||||
from r2r import R2RAsyncClient, R2RException
|
||||
from httpx import AsyncClient, HTTPStatusError # type: ignore
|
||||
from r2r import R2RAsyncClient, R2RException # type: ignore
|
||||
from typing_extensions import override
|
||||
|
||||
from ...core.exceptions import StorageError
|
||||
from ...core.models import Document, DocumentMetadata, IngestionSource, StorageConfig
|
||||
from ..base import BaseStorage
|
||||
from ..types import DocumentInfo
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
@@ -80,6 +79,24 @@ class R2RStorage(BaseStorage):
|
||||
self.client: R2RAsyncClient = R2RAsyncClient(self.endpoint)
|
||||
self.default_collection_id: str | None = None
|
||||
|
||||
def _get_http_client_headers(self) -> dict[str, str]:
|
||||
"""Get consistent HTTP headers for direct API calls."""
|
||||
headers = {"Content-Type": "application/json"}
|
||||
|
||||
# Add authentication headers if available
|
||||
# Note: R2R SDK may handle auth internally, so we extract it if possible
|
||||
if hasattr(self.client, "_get_headers"):
|
||||
with contextlib.suppress(Exception):
|
||||
sdk_headers = self.client._get_headers() # type: ignore[attr-defined]
|
||||
if isinstance(sdk_headers, dict):
|
||||
headers |= sdk_headers
|
||||
return headers
|
||||
|
||||
def _create_http_client(self) -> AsyncClient:
|
||||
"""Create a properly configured HTTP client for direct API calls."""
|
||||
headers = self._get_http_client_headers()
|
||||
return AsyncClient(headers=headers, timeout=30.0)
|
||||
|
||||
@override
|
||||
async def initialize(self) -> None:
|
||||
"""Initialize R2R connection and ensure default collection exists."""
|
||||
@@ -96,7 +113,7 @@ class R2RStorage(BaseStorage):
|
||||
|
||||
# Test connection using direct HTTP call to v3 API
|
||||
endpoint = self.endpoint
|
||||
client = AsyncClient()
|
||||
client = self._create_http_client()
|
||||
try:
|
||||
response = await client.get(f"{endpoint}/v3/collections")
|
||||
response.raise_for_status()
|
||||
@@ -109,7 +126,7 @@ class R2RStorage(BaseStorage):
|
||||
async def _ensure_collection(self, collection_name: str) -> str:
|
||||
"""Get or create collection by name."""
|
||||
endpoint = self.endpoint
|
||||
client = AsyncClient()
|
||||
client = self._create_http_client()
|
||||
try:
|
||||
# List collections and find by name
|
||||
response = await client.get(f"{endpoint}/v3/collections")
|
||||
@@ -152,6 +169,9 @@ class R2RStorage(BaseStorage):
|
||||
finally:
|
||||
await client.aclose()
|
||||
|
||||
# This should never be reached, but satisfies static analyzer
|
||||
raise StorageError(f"Unexpected code path in _ensure_collection for '{collection_name}'")
|
||||
|
||||
@override
|
||||
async def store(self, document: Document, *, collection_name: str | None = None) -> str:
|
||||
"""Store a single document."""
|
||||
@@ -161,20 +181,44 @@ class R2RStorage(BaseStorage):
|
||||
async def store_batch(
|
||||
self, documents: list[Document], *, collection_name: str | None = None
|
||||
) -> list[str]:
|
||||
"""Store multiple documents."""
|
||||
"""Store multiple documents efficiently with connection reuse."""
|
||||
collection_id = await self._resolve_collection_id(collection_name)
|
||||
print(
|
||||
f"Using collection ID: {collection_id} for collection: {collection_name or self.config.collection_name}"
|
||||
)
|
||||
|
||||
stored_ids: list[str] = []
|
||||
for document in documents:
|
||||
if not self._is_document_valid(document):
|
||||
continue
|
||||
# Filter valid documents upfront
|
||||
valid_documents = [doc for doc in documents if self._is_document_valid(doc)]
|
||||
if not valid_documents:
|
||||
return []
|
||||
|
||||
stored_id = await self._store_single_document(document, collection_id)
|
||||
if stored_id:
|
||||
stored_ids.append(stored_id)
|
||||
stored_ids: list[str] = []
|
||||
|
||||
# Use a single HTTP client for all requests
|
||||
http_client = AsyncClient()
|
||||
async with http_client: # type: ignore
|
||||
# Process documents with controlled concurrency
|
||||
import asyncio
|
||||
|
||||
semaphore = asyncio.Semaphore(5) # Limit concurrent uploads
|
||||
|
||||
async def store_single_with_client(document: Document) -> str | None:
|
||||
async with semaphore:
|
||||
return await self._store_single_document_with_client(
|
||||
document, collection_id, http_client
|
||||
)
|
||||
|
||||
# Execute all uploads concurrently
|
||||
results = await asyncio.gather(
|
||||
*[store_single_with_client(doc) for doc in valid_documents], return_exceptions=True
|
||||
)
|
||||
|
||||
# Collect successful IDs
|
||||
for result in results:
|
||||
if isinstance(result, str):
|
||||
stored_ids.append(result)
|
||||
elif isinstance(result, Exception):
|
||||
print(f"Document upload failed: {result}")
|
||||
|
||||
return stored_ids
|
||||
|
||||
@@ -208,6 +252,16 @@ class R2RStorage(BaseStorage):
|
||||
|
||||
async def _store_single_document(self, document: Document, collection_id: str) -> str | None:
|
||||
"""Store a single document with retry logic."""
|
||||
http_client = AsyncClient()
|
||||
async with http_client: # type: ignore
|
||||
return await self._store_single_document_with_client(
|
||||
document, collection_id, http_client
|
||||
)
|
||||
|
||||
async def _store_single_document_with_client(
|
||||
self, document: Document, collection_id: str, http_client: AsyncClient
|
||||
) -> str | None:
|
||||
"""Store a single document with retry logic using provided HTTP client."""
|
||||
requested_id = str(document.id)
|
||||
print(f"Creating document with ID: {requested_id}")
|
||||
|
||||
@@ -216,15 +270,23 @@ class R2RStorage(BaseStorage):
|
||||
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
doc_response = await self._attempt_document_creation(document, collection_id)
|
||||
doc_response = await self._attempt_document_creation_with_client(
|
||||
document, collection_id, http_client
|
||||
)
|
||||
if doc_response:
|
||||
return self._process_document_response(doc_response, requested_id, collection_id)
|
||||
return self._process_document_response(
|
||||
doc_response, requested_id, collection_id
|
||||
)
|
||||
except (TimeoutError, OSError) as e:
|
||||
if not await self._should_retry_timeout(e, attempt, max_retries, requested_id, retry_delay):
|
||||
if not await self._should_retry_timeout(
|
||||
e, attempt, max_retries, requested_id, retry_delay
|
||||
):
|
||||
break
|
||||
retry_delay *= 2
|
||||
except HTTPStatusError as e:
|
||||
if not await self._should_retry_http_error(e, attempt, max_retries, requested_id, retry_delay):
|
||||
if not await self._should_retry_http_error(
|
||||
e, attempt, max_retries, requested_id, retry_delay
|
||||
):
|
||||
break
|
||||
retry_delay *= 2
|
||||
except Exception as exc:
|
||||
@@ -233,8 +295,20 @@ class R2RStorage(BaseStorage):
|
||||
|
||||
return None
|
||||
|
||||
async def _attempt_document_creation(self, document: Document, collection_id: str) -> dict[str, object] | None:
|
||||
async def _attempt_document_creation(
|
||||
self, document: Document, collection_id: str
|
||||
) -> dict[str, object] | None:
|
||||
"""Attempt to create a document via HTTP API."""
|
||||
http_client = AsyncClient()
|
||||
async with http_client: # type: ignore
|
||||
return await self._attempt_document_creation_with_client(
|
||||
document, collection_id, http_client
|
||||
)
|
||||
|
||||
async def _attempt_document_creation_with_client(
|
||||
self, document: Document, collection_id: str, http_client: AsyncClient
|
||||
) -> dict[str, object] | None:
|
||||
"""Attempt to create a document via HTTP API using provided client."""
|
||||
import json
|
||||
|
||||
requested_id = str(document.id)
|
||||
@@ -255,29 +329,36 @@ class R2RStorage(BaseStorage):
|
||||
print(f"Sending to R2R - files keys: {list(files.keys())}")
|
||||
print(f"Metadata JSON: {files['metadata'][1]}")
|
||||
|
||||
async with AsyncClient() as http_client:
|
||||
response = await http_client.post(f"{self.endpoint}/v3/documents", files=files)
|
||||
response = await http_client.post(f"{self.endpoint}/v3/documents", files=files) # type: ignore[call-arg]
|
||||
|
||||
if response.status_code == 422:
|
||||
self._handle_validation_error(response, requested_id, metadata)
|
||||
return None
|
||||
if response.status_code == 422:
|
||||
self._handle_validation_error(response, requested_id, metadata)
|
||||
return None
|
||||
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
def _handle_validation_error(self, response: object, requested_id: str, metadata: dict[str, object]) -> None:
|
||||
def _handle_validation_error(
|
||||
self, response: object, requested_id: str, metadata: dict[str, object]
|
||||
) -> None:
|
||||
"""Handle validation errors from R2R API."""
|
||||
try:
|
||||
error_detail = getattr(response, 'json', lambda: {})() if hasattr(response, 'json') else {}
|
||||
error_detail = (
|
||||
getattr(response, "json", lambda: {})() if hasattr(response, "json") else {}
|
||||
)
|
||||
print(f"R2R validation error for document {requested_id}: {error_detail}")
|
||||
print(f"Document metadata sent: {metadata}")
|
||||
print(f"Response status: {getattr(response, 'status_code', 'unknown')}")
|
||||
print(f"Response headers: {dict(getattr(response, 'headers', {}))}")
|
||||
except Exception:
|
||||
print(f"R2R validation error for document {requested_id}: {getattr(response, 'text', 'unknown error')}")
|
||||
print(
|
||||
f"R2R validation error for document {requested_id}: {getattr(response, 'text', 'unknown error')}"
|
||||
)
|
||||
print(f"Document metadata sent: {metadata}")
|
||||
|
||||
def _process_document_response(self, doc_response: dict[str, object], requested_id: str, collection_id: str) -> str:
|
||||
def _process_document_response(
|
||||
self, doc_response: dict[str, object], requested_id: str, collection_id: str
|
||||
) -> str:
|
||||
"""Process successful document creation response."""
|
||||
response_payload = doc_response.get("results", doc_response)
|
||||
doc_id = _extract_id(response_payload, requested_id)
|
||||
@@ -288,11 +369,20 @@ class R2RStorage(BaseStorage):
|
||||
print(f"Warning: Requested ID {requested_id} but got {doc_id}")
|
||||
|
||||
if collection_id:
|
||||
print(f"Document {doc_id} should be assigned to collection {collection_id} via creation API")
|
||||
print(
|
||||
f"Document {doc_id} should be assigned to collection {collection_id} via creation API"
|
||||
)
|
||||
|
||||
return doc_id
|
||||
|
||||
async def _should_retry_timeout(self, error: Exception, attempt: int, max_retries: int, requested_id: str, retry_delay: float) -> bool:
|
||||
async def _should_retry_timeout(
|
||||
self,
|
||||
error: Exception,
|
||||
attempt: int,
|
||||
max_retries: int,
|
||||
requested_id: str,
|
||||
retry_delay: float,
|
||||
) -> bool:
|
||||
"""Determine if timeout error should be retried."""
|
||||
if attempt >= max_retries - 1:
|
||||
return False
|
||||
@@ -301,12 +391,22 @@ class R2RStorage(BaseStorage):
|
||||
await asyncio.sleep(retry_delay)
|
||||
return True
|
||||
|
||||
async def _should_retry_http_error(self, error: HTTPStatusError, attempt: int, max_retries: int, requested_id: str, retry_delay: float) -> bool:
|
||||
async def _should_retry_http_error(
|
||||
self,
|
||||
error: HTTPStatusError,
|
||||
attempt: int,
|
||||
max_retries: int,
|
||||
requested_id: str,
|
||||
retry_delay: float,
|
||||
) -> bool:
|
||||
"""Determine if HTTP error should be retried."""
|
||||
if error.response.status_code < 500 or attempt >= max_retries - 1:
|
||||
status_code = error.response.status_code
|
||||
if status_code < 500 or attempt >= max_retries - 1:
|
||||
return False
|
||||
|
||||
print(f"Server error {error.response.status_code} for document {requested_id}, retrying in {retry_delay}s...")
|
||||
print(
|
||||
f"Server error {status_code} for document {requested_id}, retrying in {retry_delay}s..."
|
||||
)
|
||||
await asyncio.sleep(retry_delay)
|
||||
return True
|
||||
|
||||
@@ -323,13 +423,13 @@ class R2RStorage(BaseStorage):
|
||||
print(" → Server error - R2R internal issue")
|
||||
else:
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
|
||||
def _build_metadata(self, document: Document) -> dict[str, object]:
|
||||
"""Convert document metadata to enriched R2R format."""
|
||||
metadata = document.metadata
|
||||
|
||||
|
||||
# Core required fields
|
||||
result: dict[str, object] = {
|
||||
"source_url": metadata["source_url"],
|
||||
@@ -465,7 +565,9 @@ class R2RStorage(BaseStorage):
|
||||
except ValueError:
|
||||
return uuid4()
|
||||
|
||||
def _build_core_metadata(self, metadata_map: dict[str, object], timestamp: datetime) -> DocumentMetadata:
|
||||
def _build_core_metadata(
|
||||
self, metadata_map: dict[str, object], timestamp: datetime
|
||||
) -> DocumentMetadata:
|
||||
"""Build core required metadata fields."""
|
||||
return {
|
||||
"source_url": str(metadata_map.get("source_url", "")),
|
||||
@@ -475,7 +577,12 @@ class R2RStorage(BaseStorage):
|
||||
"char_count": _as_int(metadata_map.get("char_count")),
|
||||
}
|
||||
|
||||
def _add_optional_metadata_fields(self, metadata: DocumentMetadata, doc_map: dict[str, object], metadata_map: dict[str, object]) -> None:
|
||||
def _add_optional_metadata_fields(
|
||||
self,
|
||||
metadata: DocumentMetadata,
|
||||
doc_map: dict[str, object],
|
||||
metadata_map: dict[str, object],
|
||||
) -> None:
|
||||
"""Add optional metadata fields if present."""
|
||||
self._add_title_and_description(metadata, doc_map, metadata_map)
|
||||
self._add_content_categorization(metadata, metadata_map)
|
||||
@@ -484,7 +591,12 @@ class R2RStorage(BaseStorage):
|
||||
self._add_processing_fields(metadata, metadata_map)
|
||||
self._add_quality_scores(metadata, metadata_map)
|
||||
|
||||
def _add_title_and_description(self, metadata: DocumentMetadata, doc_map: dict[str, object], metadata_map: dict[str, object]) -> None:
|
||||
def _add_title_and_description(
|
||||
self,
|
||||
metadata: DocumentMetadata,
|
||||
doc_map: dict[str, object],
|
||||
metadata_map: dict[str, object],
|
||||
) -> None:
|
||||
"""Add title and description fields."""
|
||||
if title := (doc_map.get("title") or metadata_map.get("title")):
|
||||
metadata["title"] = cast(str | None, title)
|
||||
@@ -494,7 +606,9 @@ class R2RStorage(BaseStorage):
|
||||
elif description := metadata_map.get("description"):
|
||||
metadata["description"] = cast(str | None, description)
|
||||
|
||||
def _add_content_categorization(self, metadata: DocumentMetadata, metadata_map: dict[str, object]) -> None:
|
||||
def _add_content_categorization(
|
||||
self, metadata: DocumentMetadata, metadata_map: dict[str, object]
|
||||
) -> None:
|
||||
"""Add content categorization fields."""
|
||||
if tags := metadata_map.get("tags"):
|
||||
metadata["tags"] = [str(tag) for tag in tags] if isinstance(tags, list) else []
|
||||
@@ -505,7 +619,9 @@ class R2RStorage(BaseStorage):
|
||||
if language := metadata_map.get("language"):
|
||||
metadata["language"] = str(language)
|
||||
|
||||
def _add_authorship_fields(self, metadata: DocumentMetadata, metadata_map: dict[str, object]) -> None:
|
||||
def _add_authorship_fields(
|
||||
self, metadata: DocumentMetadata, metadata_map: dict[str, object]
|
||||
) -> None:
|
||||
"""Add authorship and source information fields."""
|
||||
if author := metadata_map.get("author"):
|
||||
metadata["author"] = str(author)
|
||||
@@ -514,7 +630,9 @@ class R2RStorage(BaseStorage):
|
||||
if site_name := metadata_map.get("site_name"):
|
||||
metadata["site_name"] = str(site_name)
|
||||
|
||||
def _add_structure_fields(self, metadata: DocumentMetadata, metadata_map: dict[str, object]) -> None:
|
||||
def _add_structure_fields(
|
||||
self, metadata: DocumentMetadata, metadata_map: dict[str, object]
|
||||
) -> None:
|
||||
"""Add document structure fields."""
|
||||
if heading_hierarchy := metadata_map.get("heading_hierarchy"):
|
||||
metadata["heading_hierarchy"] = (
|
||||
@@ -529,7 +647,9 @@ class R2RStorage(BaseStorage):
|
||||
if has_links := metadata_map.get("has_links"):
|
||||
metadata["has_links"] = bool(has_links)
|
||||
|
||||
def _add_processing_fields(self, metadata: DocumentMetadata, metadata_map: dict[str, object]) -> None:
|
||||
def _add_processing_fields(
|
||||
self, metadata: DocumentMetadata, metadata_map: dict[str, object]
|
||||
) -> None:
|
||||
"""Add processing-related metadata fields."""
|
||||
if extraction_method := metadata_map.get("extraction_method"):
|
||||
metadata["extraction_method"] = str(extraction_method)
|
||||
@@ -538,7 +658,9 @@ class R2RStorage(BaseStorage):
|
||||
if last_modified := metadata_map.get("last_modified"):
|
||||
metadata["last_modified"] = _as_datetime(last_modified)
|
||||
|
||||
def _add_quality_scores(self, metadata: DocumentMetadata, metadata_map: dict[str, object]) -> None:
|
||||
def _add_quality_scores(
|
||||
self, metadata: DocumentMetadata, metadata_map: dict[str, object]
|
||||
) -> None:
|
||||
"""Add quality score fields with safe float conversion."""
|
||||
if readability_score := metadata_map.get("readability_score"):
|
||||
try:
|
||||
@@ -641,7 +763,7 @@ class R2RStorage(BaseStorage):
|
||||
async def count(self, *, collection_name: str | None = None) -> int:
|
||||
"""Get document count in collection."""
|
||||
endpoint = self.endpoint
|
||||
client = AsyncClient()
|
||||
client = self._create_http_client()
|
||||
try:
|
||||
# Get collections and find the count for the specific collection
|
||||
response = await client.get(f"{endpoint}/v3/collections")
|
||||
@@ -662,6 +784,9 @@ class R2RStorage(BaseStorage):
|
||||
finally:
|
||||
await client.aclose()
|
||||
|
||||
# This should never be reached, but satisfies static analyzer
|
||||
return 0
|
||||
|
||||
@override
|
||||
async def close(self) -> None:
|
||||
"""Close R2R client."""
|
||||
@@ -709,7 +834,7 @@ class R2RStorage(BaseStorage):
|
||||
async def list_collections(self) -> list[str]:
|
||||
"""List all available collections."""
|
||||
endpoint = self.endpoint
|
||||
client = AsyncClient()
|
||||
client = self._create_http_client()
|
||||
try:
|
||||
response = await client.get(f"{endpoint}/v3/collections")
|
||||
response.raise_for_status()
|
||||
@@ -726,6 +851,9 @@ class R2RStorage(BaseStorage):
|
||||
finally:
|
||||
await client.aclose()
|
||||
|
||||
# This should never be reached, but satisfies static analyzer
|
||||
return []
|
||||
|
||||
async def list_collections_detailed(self) -> list[dict[str, object]]:
|
||||
"""List all available collections with detailed information."""
|
||||
try:
|
||||
@@ -789,7 +917,7 @@ class R2RStorage(BaseStorage):
|
||||
offset: int = 0,
|
||||
*,
|
||||
collection_name: str | None = None,
|
||||
) -> list[dict[str, object]]:
|
||||
) -> list[DocumentInfo]:
|
||||
"""
|
||||
List documents in R2R with pagination.
|
||||
|
||||
@@ -802,14 +930,14 @@ class R2RStorage(BaseStorage):
|
||||
List of document dictionaries with metadata
|
||||
"""
|
||||
try:
|
||||
documents: list[dict[str, object]] = []
|
||||
documents: list[DocumentInfo] = []
|
||||
|
||||
if collection_name:
|
||||
# Get collection ID first
|
||||
collection_id = await self._ensure_collection(collection_name)
|
||||
# Use the collections API to list documents in a specific collection
|
||||
endpoint = self.endpoint
|
||||
client = AsyncClient()
|
||||
client = self._create_http_client()
|
||||
try:
|
||||
params = {"offset": offset, "limit": limit}
|
||||
response = await client.get(
|
||||
@@ -842,20 +970,19 @@ class R2RStorage(BaseStorage):
|
||||
title = str(doc_map.get("title", "Untitled"))
|
||||
metadata = _as_mapping(doc_map.get("metadata", {}))
|
||||
|
||||
documents.append(
|
||||
{
|
||||
"id": doc_id,
|
||||
"title": title,
|
||||
"source_url": str(metadata.get("source_url", "")),
|
||||
"description": str(metadata.get("description", "")),
|
||||
"content_type": str(metadata.get("content_type", "text/plain")),
|
||||
"content_preview": str(doc_map.get("content", ""))[:200] + "..."
|
||||
if doc_map.get("content")
|
||||
else "",
|
||||
"word_count": _as_int(metadata.get("word_count", 0)),
|
||||
"timestamp": str(doc_map.get("created_at", "")),
|
||||
}
|
||||
)
|
||||
document_info: DocumentInfo = {
|
||||
"id": doc_id,
|
||||
"title": title,
|
||||
"source_url": str(metadata.get("source_url", "")),
|
||||
"description": str(metadata.get("description", "")),
|
||||
"content_type": str(metadata.get("content_type", "text/plain")),
|
||||
"content_preview": str(doc_map.get("content", ""))[:200] + "..."
|
||||
if doc_map.get("content")
|
||||
else "",
|
||||
"word_count": _as_int(metadata.get("word_count", 0)),
|
||||
"timestamp": str(doc_map.get("created_at", "")),
|
||||
}
|
||||
documents.append(document_info)
|
||||
|
||||
return documents
|
||||
|
||||
|
||||
22
ingest_pipeline/storage/types.py
Normal file
22
ingest_pipeline/storage/types.py
Normal file
@@ -0,0 +1,22 @@
|
||||
"""Shared types for storage adapters."""
|
||||
|
||||
from typing import TypedDict
|
||||
|
||||
|
||||
class CollectionSummary(TypedDict):
|
||||
"""Collection metadata for describe_collections."""
|
||||
name: str
|
||||
count: int
|
||||
size_mb: float
|
||||
|
||||
|
||||
class DocumentInfo(TypedDict):
|
||||
"""Document information for list_documents."""
|
||||
id: str
|
||||
title: str
|
||||
source_url: str
|
||||
description: str
|
||||
content_type: str
|
||||
content_preview: str
|
||||
word_count: int
|
||||
timestamp: str
|
||||
@@ -21,6 +21,7 @@ from ..core.exceptions import StorageError
|
||||
from ..core.models import Document, DocumentMetadata, IngestionSource, StorageConfig
|
||||
from ..utils.vectorizer import Vectorizer
|
||||
from .base import BaseStorage
|
||||
from .types import CollectionSummary, DocumentInfo
|
||||
|
||||
VectorContainer: TypeAlias = Mapping[str, object] | Sequence[object] | None
|
||||
|
||||
@@ -594,14 +595,14 @@ class WeaviateStorage(BaseStorage):
|
||||
except Exception as e:
|
||||
raise StorageError(f"Failed to list collections: {e}") from e
|
||||
|
||||
async def describe_collections(self) -> list[dict[str, object]]:
|
||||
async def describe_collections(self) -> list[CollectionSummary]:
|
||||
"""Return metadata for each Weaviate collection."""
|
||||
if not self.client:
|
||||
raise StorageError("Weaviate client not initialized")
|
||||
|
||||
try:
|
||||
client = cast(weaviate.WeaviateClient, self.client)
|
||||
collections: list[dict[str, object]] = []
|
||||
collections: list[CollectionSummary] = []
|
||||
for name in client.collections.list_all():
|
||||
collection_obj = client.collections.get(name)
|
||||
if not collection_obj:
|
||||
@@ -609,13 +610,12 @@ class WeaviateStorage(BaseStorage):
|
||||
|
||||
count = collection_obj.aggregate.over_all(total_count=True).total_count or 0
|
||||
size_mb = count * 0.01
|
||||
collections.append(
|
||||
{
|
||||
"name": name,
|
||||
"count": count,
|
||||
"size_mb": size_mb,
|
||||
}
|
||||
)
|
||||
collection_summary: CollectionSummary = {
|
||||
"name": name,
|
||||
"count": count,
|
||||
"size_mb": size_mb,
|
||||
}
|
||||
collections.append(collection_summary)
|
||||
|
||||
return collections
|
||||
except Exception as e:
|
||||
@@ -812,7 +812,7 @@ class WeaviateStorage(BaseStorage):
|
||||
offset: int = 0,
|
||||
*,
|
||||
collection_name: str | None = None,
|
||||
) -> list[dict[str, object]]:
|
||||
) -> list[DocumentInfo]:
|
||||
"""
|
||||
List documents in the collection with pagination.
|
||||
|
||||
@@ -834,7 +834,7 @@ class WeaviateStorage(BaseStorage):
|
||||
limit=limit, offset=offset, return_metadata=["creation_time"]
|
||||
)
|
||||
|
||||
documents: list[dict[str, object]] = []
|
||||
documents: list[DocumentInfo] = []
|
||||
for obj in response.objects:
|
||||
props = self._coerce_properties(
|
||||
obj.properties,
|
||||
@@ -853,7 +853,7 @@ class WeaviateStorage(BaseStorage):
|
||||
else:
|
||||
word_count = 0
|
||||
|
||||
doc_info: dict[str, object] = {
|
||||
doc_info: DocumentInfo = {
|
||||
"id": str(obj.uuid),
|
||||
"title": str(props.get("title", "Untitled")),
|
||||
"source_url": str(props.get("source_url", "")),
|
||||
|
||||
Binary file not shown.
Binary file not shown.
@@ -2,13 +2,17 @@
|
||||
|
||||
import json
|
||||
from datetime import UTC, datetime
|
||||
from typing import Protocol, TypedDict, cast
|
||||
from typing import Final, Protocol, TypedDict, cast
|
||||
|
||||
import httpx
|
||||
|
||||
from ..core.exceptions import IngestionError
|
||||
from ..core.models import Document
|
||||
|
||||
JSON_CONTENT_TYPE: Final[str] = "application/json"
|
||||
AUTHORIZATION_HEADER: Final[str] = "Authorization"
|
||||
from ..config import get_settings
|
||||
|
||||
|
||||
class HttpResponse(Protocol):
|
||||
"""Protocol for HTTP response."""
|
||||
@@ -29,6 +33,15 @@ class AsyncHttpClient(Protocol):
|
||||
|
||||
async def aclose(self) -> None: ...
|
||||
|
||||
async def __aenter__(self) -> "AsyncHttpClient": ...
|
||||
|
||||
async def __aexit__(
|
||||
self,
|
||||
exc_type: type[BaseException] | None,
|
||||
exc_val: BaseException | None,
|
||||
exc_tb: object | None,
|
||||
) -> None: ...
|
||||
|
||||
|
||||
class LlmResponse(TypedDict):
|
||||
"""Type for LLM API response structure."""
|
||||
@@ -66,8 +79,11 @@ class MetadataTagger:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
llm_endpoint: str = "http://llm.lab",
|
||||
model: str = "fireworks/glm-4p5-air",
|
||||
llm_endpoint: str | None = None,
|
||||
model: str | None = None,
|
||||
api_key: str | None = None,
|
||||
*,
|
||||
timeout: float | None = None,
|
||||
):
|
||||
"""
|
||||
Initialize metadata tagger.
|
||||
@@ -75,30 +91,26 @@ class MetadataTagger:
|
||||
Args:
|
||||
llm_endpoint: LLM API endpoint
|
||||
model: Model to use for tagging
|
||||
api_key: Explicit API key override
|
||||
timeout: Optional request timeout override in seconds
|
||||
"""
|
||||
self.endpoint = llm_endpoint.rstrip('/')
|
||||
self.model = model
|
||||
settings = get_settings()
|
||||
endpoint_value = llm_endpoint or str(settings.llm_endpoint)
|
||||
self.endpoint = endpoint_value.rstrip('/')
|
||||
self.model = model or settings.metadata_model
|
||||
|
||||
# Get API key from environment
|
||||
import os
|
||||
from pathlib import Path
|
||||
resolved_timeout = timeout if timeout is not None else float(settings.request_timeout)
|
||||
resolved_api_key = api_key or settings.get_llm_api_key() or ""
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# Load .env from the project root
|
||||
env_path = Path(__file__).parent.parent.parent / ".env"
|
||||
_ = load_dotenv(env_path)
|
||||
|
||||
api_key = os.getenv("LLM_API_KEY") or os.getenv("OPENAI_API_KEY") or ""
|
||||
|
||||
headers = {"Content-Type": "application/json"}
|
||||
if api_key:
|
||||
headers["Authorization"] = f"Bearer {api_key}"
|
||||
headers: dict[str, str] = {"Content-Type": JSON_CONTENT_TYPE}
|
||||
if resolved_api_key:
|
||||
headers[AUTHORIZATION_HEADER] = f"Bearer {resolved_api_key}"
|
||||
|
||||
# Create client with proper typing - httpx.AsyncClient implements AsyncHttpClient protocol
|
||||
AsyncClientClass = getattr(httpx, "AsyncClient")
|
||||
raw_client = AsyncClientClass(timeout=60.0, headers=headers)
|
||||
self.client = cast(AsyncHttpClient, raw_client)
|
||||
self.client = cast(
|
||||
AsyncHttpClient,
|
||||
httpx.AsyncClient(timeout=resolved_timeout, headers=headers),
|
||||
)
|
||||
|
||||
async def tag_document(
|
||||
self, document: Document, custom_instructions: str | None = None
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
"""Vectorizer utility for generating embeddings."""
|
||||
|
||||
from types import TracebackType
|
||||
from typing import Self, cast
|
||||
from typing import Final, Self, cast
|
||||
|
||||
import httpx
|
||||
|
||||
@@ -9,6 +9,10 @@ from typings import EmbeddingResponse
|
||||
|
||||
from ..core.exceptions import VectorizationError
|
||||
from ..core.models import StorageConfig, VectorConfig
|
||||
from ..config import get_settings
|
||||
|
||||
JSON_CONTENT_TYPE: Final[str] = "application/json"
|
||||
AUTHORIZATION_HEADER: Final[str] = "Authorization"
|
||||
|
||||
|
||||
class Vectorizer:
|
||||
@@ -25,33 +29,24 @@ class Vectorizer:
|
||||
Args:
|
||||
config: Configuration with embedding details
|
||||
"""
|
||||
settings = get_settings()
|
||||
if isinstance(config, StorageConfig):
|
||||
# Extract vector config from storage config
|
||||
self.endpoint = "http://llm.lab"
|
||||
self.model = "ollama/bge-m3"
|
||||
self.dimension = 1024
|
||||
# Extract vector config from global settings when storage config is provided
|
||||
self.endpoint = str(settings.llm_endpoint).rstrip("/")
|
||||
self.model = settings.embedding_model
|
||||
self.dimension = settings.embedding_dimension
|
||||
else:
|
||||
self.endpoint = str(config.embedding_endpoint)
|
||||
self.endpoint = str(config.embedding_endpoint).rstrip("/")
|
||||
self.model = config.model
|
||||
self.dimension = config.dimension
|
||||
|
||||
# Get API key from environment
|
||||
import os
|
||||
from pathlib import Path
|
||||
resolved_api_key = settings.get_llm_api_key() or ""
|
||||
headers: dict[str, str] = {"Content-Type": JSON_CONTENT_TYPE}
|
||||
if resolved_api_key:
|
||||
headers[AUTHORIZATION_HEADER] = f"Bearer {resolved_api_key}"
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# Load .env from the project root
|
||||
env_path = Path(__file__).parent.parent.parent / ".env"
|
||||
_ = load_dotenv(env_path)
|
||||
|
||||
api_key = os.getenv("LLM_API_KEY") or os.getenv("OPENAI_API_KEY") or ""
|
||||
|
||||
headers = {"Content-Type": "application/json"}
|
||||
if api_key:
|
||||
headers["Authorization"] = f"Bearer {api_key}"
|
||||
|
||||
self.client: httpx.AsyncClient = httpx.AsyncClient(timeout=60.0, headers=headers)
|
||||
timeout_seconds = float(settings.request_timeout)
|
||||
self.client = httpx.AsyncClient(timeout=timeout_seconds, headers=headers)
|
||||
|
||||
async def vectorize(self, text: str) -> list[float]:
|
||||
"""
|
||||
|
||||
@@ -38,9 +38,9 @@ dev-dependencies = [
|
||||
"pytest-cov>=4.1.0",
|
||||
"mypy>=1.7.0",
|
||||
"ruff>=0.1.0",
|
||||
"basedpyright>=1.31.4",
|
||||
"pyrefly>=0.33.0",
|
||||
"sourcery>=1.37.0",
|
||||
"pylance>=0.36.0",
|
||||
]
|
||||
|
||||
[tool.ruff]
|
||||
|
||||
13165
repomix-output.xml
13165
repomix-output.xml
File diff suppressed because it is too large
Load Diff
Binary file not shown.
Binary file not shown.
@@ -218,6 +218,46 @@ class AsyncClientStub:
|
||||
files=None,
|
||||
)
|
||||
|
||||
async def request(
|
||||
self,
|
||||
method: str,
|
||||
url: str,
|
||||
*,
|
||||
json: dict[str, object] | None = None,
|
||||
data: dict[str, object] | None = None,
|
||||
files: dict[str, tuple[str, bytes, str]] | None = None,
|
||||
params: dict[str, str | bool] | None = None,
|
||||
) -> StubbedResponse:
|
||||
"""Generic request method that delegates to specific HTTP methods."""
|
||||
# Convert params to the format expected by other methods
|
||||
converted_params: dict[str, object] | None = None
|
||||
if params:
|
||||
converted_params = {k: v for k, v in params.items()}
|
||||
|
||||
method_upper = method.upper()
|
||||
if method_upper == "GET":
|
||||
return await self.get(url, params=converted_params)
|
||||
elif method_upper == "POST":
|
||||
return await self.post(url, json=json, files=files, params=converted_params)
|
||||
elif method_upper == "DELETE":
|
||||
return await self.delete(url, json=json, params=converted_params)
|
||||
else:
|
||||
# For other methods, use the consume/record pattern directly
|
||||
normalized = self._record(
|
||||
method=method_upper,
|
||||
url=url,
|
||||
json=json or data,
|
||||
params=converted_params,
|
||||
files=files,
|
||||
)
|
||||
return self._consume(
|
||||
method=method_upper,
|
||||
url=normalized,
|
||||
json=json or data,
|
||||
params=converted_params,
|
||||
files=files,
|
||||
)
|
||||
|
||||
async def aclose(self) -> None:
|
||||
return None
|
||||
|
||||
|
||||
Binary file not shown.
@@ -11,10 +11,9 @@ from ingest_pipeline.flows import scheduler
|
||||
def test_create_scheduled_deployment_cron(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
captured: dict[str, object] = {}
|
||||
|
||||
|
||||
|
||||
class DummyFlow:
|
||||
def to_deployment(self, **kwargs: object) -> SimpleNamespace:
|
||||
nonlocal captured
|
||||
captured |= kwargs
|
||||
return SimpleNamespace(**kwargs)
|
||||
|
||||
@@ -37,10 +36,9 @@ def test_create_scheduled_deployment_cron(monkeypatch: pytest.MonkeyPatch) -> No
|
||||
def test_create_scheduled_deployment_interval(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
captured: dict[str, object] = {}
|
||||
|
||||
|
||||
|
||||
class DummyFlow:
|
||||
def to_deployment(self, **kwargs: object) -> SimpleNamespace:
|
||||
nonlocal captured
|
||||
captured |= kwargs
|
||||
return SimpleNamespace(**kwargs)
|
||||
|
||||
@@ -69,7 +67,7 @@ def test_serve_deployments_invokes_prefect(monkeypatch: pytest.MonkeyPatch) -> N
|
||||
called["deployments"] = deployments
|
||||
called["limit"] = limit
|
||||
|
||||
monkeypatch.setattr(scheduler, "serve", fake_serve)
|
||||
monkeypatch.setattr(scheduler, "prefect_serve", fake_serve)
|
||||
|
||||
deployment = SimpleNamespace(name="only")
|
||||
scheduler.serve_deployments([deployment])
|
||||
|
||||
Binary file not shown.
Binary file not shown.
@@ -31,7 +31,7 @@ async def test_get_knowledge_id_returns_existing(
|
||||
assert knowledge_id == "kb-123"
|
||||
urls = [request["url"] for request in httpx_stub.requests]
|
||||
assert "http://storage.local/api/v1/knowledge/list" in urls
|
||||
await storage.client.aclose()
|
||||
await storage.http_client.client.aclose()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -54,7 +54,7 @@ async def test_get_knowledge_id_creates_when_missing(
|
||||
url.startswith("http://storage.local/api/v1/knowledge/") and url.endswith("/create")
|
||||
for url in urls
|
||||
)
|
||||
await storage.client.aclose()
|
||||
await storage.http_client.client.aclose()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -80,7 +80,7 @@ async def test_store_uploads_and_attaches_document(
|
||||
_, knowledge = knowledge_entry
|
||||
assert len(knowledge.get("files", [])) == 1
|
||||
assert knowledge["files"][0]["id"] == file_id
|
||||
await storage.client.aclose()
|
||||
await storage.http_client.client.aclose()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -105,7 +105,7 @@ async def test_store_batch_handles_multiple_documents(
|
||||
assert knowledge_entry is not None
|
||||
_, knowledge = knowledge_entry
|
||||
assert {meta["id"] for meta in knowledge.get("files", [])} == set(file_ids)
|
||||
await storage.client.aclose()
|
||||
await storage.http_client.client.aclose()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -133,4 +133,4 @@ async def test_delete_removes_file(
|
||||
knowledge = openwebui_service.get_knowledge("kb-55")
|
||||
assert knowledge is not None
|
||||
assert knowledge.get("files", []) == []
|
||||
await storage.client.aclose()
|
||||
await storage.http_client.client.aclose()
|
||||
|
||||
@@ -207,7 +207,7 @@ def r2r_client_stub(
|
||||
mock_async_client = MockAsyncClient(r2r_service)
|
||||
monkeypatch.setattr(
|
||||
"ingest_pipeline.storage.r2r.storage.AsyncClient",
|
||||
lambda: mock_async_client,
|
||||
lambda **kwargs: mock_async_client,
|
||||
)
|
||||
|
||||
client = DummyClient(r2r_service)
|
||||
|
||||
Binary file not shown.
Binary file not shown.
@@ -48,7 +48,7 @@ async def test_vectorizer_storage_config_uses_defaults(
|
||||
vector = await vectorizer.vectorize("repo content")
|
||||
|
||||
assert len(vector) == 1024
|
||||
assert httpx_stub.requests[0]["json_body"]["model"] == "ollama/bge-m3"
|
||||
assert httpx_stub.requests[0]["json_body"]["model"] == "ollama/bge-m3:latest"
|
||||
assert httpx_stub.requests[0]["url"] == "http://llm.lab/v1/embeddings"
|
||||
|
||||
|
||||
|
||||
157
uv.lock
generated
157
uv.lock
generated
@@ -236,18 +236,6 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/25/2f/efa9d26dbb612b774990741fd8f13c7cf4cfd085b870e4a5af5c82eaf5f1/authlib-1.6.3-py2.py3-none-any.whl", hash = "sha256:7ea0f082edd95a03b7b72edac65ec7f8f68d703017d7e37573aee4fc603f2a48", size = 240105, upload-time = "2025-08-26T12:13:23.889Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "basedpyright"
|
||||
version = "1.31.4"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "nodejs-wheel-binaries" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/0b/53/570b03ec0445a9b2cc69788482c1d12902a9b88a9b159e449c4c537c4e3a/basedpyright-1.31.4.tar.gz", hash = "sha256:2450deb16530f7c88c1a7da04530a079f9b0b18ae1c71cb6f812825b3b82d0b1", size = 22494467, upload-time = "2025-09-03T13:05:55.817Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/e5/40/d1047a5addcade9291685d06ef42a63c1347517018bafd82747af9da0294/basedpyright-1.31.4-py3-none-any.whl", hash = "sha256:055e4a38024bd653be12d6216c1cfdbee49a1096d342b4d5f5b4560f7714b6fc", size = 11731440, upload-time = "2025-09-03T13:05:52.308Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cachetools"
|
||||
version = "6.2.0"
|
||||
@@ -989,8 +977,8 @@ dependencies = [
|
||||
|
||||
[package.dev-dependencies]
|
||||
dev = [
|
||||
{ name = "basedpyright" },
|
||||
{ name = "mypy" },
|
||||
{ name = "pylance" },
|
||||
{ name = "pyrefly" },
|
||||
{ name = "pytest" },
|
||||
{ name = "pytest-asyncio" },
|
||||
@@ -1019,8 +1007,8 @@ requires-dist = [
|
||||
|
||||
[package.metadata.requires-dev]
|
||||
dev = [
|
||||
{ name = "basedpyright", specifier = ">=1.31.4" },
|
||||
{ name = "mypy", specifier = ">=1.7.0" },
|
||||
{ name = "pylance", specifier = ">=0.36.0" },
|
||||
{ name = "pyrefly", specifier = ">=0.33.0" },
|
||||
{ name = "pytest", specifier = ">=7.4.0" },
|
||||
{ name = "pytest-asyncio", specifier = ">=0.21.0" },
|
||||
@@ -1432,19 +1420,84 @@ wheels = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nodejs-wheel-binaries"
|
||||
version = "22.19.0"
|
||||
name = "numpy"
|
||||
version = "2.3.3"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/bd/ca/6033f80b7aebc23cb31ed8b09608b6308c5273c3522aedd043e8a0644d83/nodejs_wheel_binaries-22.19.0.tar.gz", hash = "sha256:e69b97ef443d36a72602f7ed356c6a36323873230f894799f4270a853932fdb3", size = 8060, upload-time = "2025-09-12T10:33:46.935Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/d0/19/95b3d357407220ed24c139018d2518fab0a61a948e68286a25f1a4d049ff/numpy-2.3.3.tar.gz", hash = "sha256:ddc7c39727ba62b80dfdbedf400d1c10ddfa8eefbd7ec8dcb118be8b56d31029", size = 20576648, upload-time = "2025-09-09T16:54:12.543Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/93/a2/0d055fd1d8c9a7a971c4db10cf42f3bba57c964beb6cf383ca053f2cdd20/nodejs_wheel_binaries-22.19.0-py2.py3-none-macosx_11_0_arm64.whl", hash = "sha256:43eca1526455a1fb4cb777095198f7ebe5111a4444749c87f5c2b84645aaa72a", size = 50902454, upload-time = "2025-09-12T10:33:18.3Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b5/f5/446f7b3c5be1d2f5145ffa3c9aac3496e06cdf0f436adeb21a1f95dd79a7/nodejs_wheel_binaries-22.19.0-py2.py3-none-macosx_11_0_x86_64.whl", hash = "sha256:feb06709e1320790d34babdf71d841ec7f28e4c73217d733e7f5023060a86bfc", size = 51837860, upload-time = "2025-09-12T10:33:21.599Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/1e/4e/d0a036f04fd0f5dc3ae505430657044b8d9853c33be6b2d122bb171aaca3/nodejs_wheel_binaries-22.19.0-py2.py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:db9f5777292491430457c99228d3a267decf12a09d31246f0692391e3513285e", size = 57841528, upload-time = "2025-09-12T10:33:25.433Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e2/11/4811d27819f229cc129925c170db20c12d4f01ad366a0066f06d6eb833cf/nodejs_wheel_binaries-22.19.0-py2.py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1392896f1a05a88a8a89b26e182d90fdf3020b4598a047807b91b65731e24c00", size = 58368815, upload-time = "2025-09-12T10:33:29.083Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/6e/94/df41416856b980e38a7ff280cfb59f142a77955ccdbec7cc4260d8ab2e78/nodejs_wheel_binaries-22.19.0-py2.py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:9164c876644f949cad665e3ada00f75023e18f381e78a1d7b60ccbbfb4086e73", size = 59690937, upload-time = "2025-09-12T10:33:32.771Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d1/39/8d0d5f84b7616bdc4eca725f5d64a1cfcac3d90cf3f30cae17d12f8e987f/nodejs_wheel_binaries-22.19.0-py2.py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:6b4b75166134010bc9cfebd30dc57047796a27049fef3fc22316216d76bc0af7", size = 60751996, upload-time = "2025-09-12T10:33:36.962Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/41/93/2d66b5b60055dd1de6e37e35bef563c15e4cafa5cfe3a6990e0ab358e515/nodejs_wheel_binaries-22.19.0-py2.py3-none-win_amd64.whl", hash = "sha256:3f271f5abfc71b052a6b074225eca8c1223a0f7216863439b86feaca814f6e5a", size = 40026140, upload-time = "2025-09-12T10:33:40.33Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a3/46/c9cf7ff7e3c71f07ca8331c939afd09b6e59fc85a2944ea9411e8b29ce50/nodejs_wheel_binaries-22.19.0-py2.py3-none-win_arm64.whl", hash = "sha256:666a355fe0c9bde44a9221cd543599b029045643c8196b8eedb44f28dc192e06", size = 38804500, upload-time = "2025-09-12T10:33:43.302Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/7a/45/e80d203ef6b267aa29b22714fb558930b27960a0c5ce3c19c999232bb3eb/numpy-2.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0ffc4f5caba7dfcbe944ed674b7eef683c7e94874046454bb79ed7ee0236f59d", size = 21259253, upload-time = "2025-09-09T15:56:02.094Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/52/18/cf2c648fccf339e59302e00e5f2bc87725a3ce1992f30f3f78c9044d7c43/numpy-2.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e7e946c7170858a0295f79a60214424caac2ffdb0063d4d79cb681f9aa0aa569", size = 14450980, upload-time = "2025-09-09T15:56:05.926Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/93/fb/9af1082bec870188c42a1c239839915b74a5099c392389ff04215dcee812/numpy-2.3.3-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:cd4260f64bc794c3390a63bf0728220dd1a68170c169088a1e0dfa2fde1be12f", size = 5379709, upload-time = "2025-09-09T15:56:07.95Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/75/0f/bfd7abca52bcbf9a4a65abc83fe18ef01ccdeb37bfb28bbd6ad613447c79/numpy-2.3.3-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:f0ddb4b96a87b6728df9362135e764eac3cfa674499943ebc44ce96c478ab125", size = 6913923, upload-time = "2025-09-09T15:56:09.443Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/79/55/d69adad255e87ab7afda1caf93ca997859092afeb697703e2f010f7c2e55/numpy-2.3.3-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:afd07d377f478344ec6ca2b8d4ca08ae8bd44706763d1efb56397de606393f48", size = 14589591, upload-time = "2025-09-09T15:56:11.234Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/10/a2/010b0e27ddeacab7839957d7a8f00e91206e0c2c47abbb5f35a2630e5387/numpy-2.3.3-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bc92a5dedcc53857249ca51ef29f5e5f2f8c513e22cfb90faeb20343b8c6f7a6", size = 16938714, upload-time = "2025-09-09T15:56:14.637Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/1c/6b/12ce8ede632c7126eb2762b9e15e18e204b81725b81f35176eac14dc5b82/numpy-2.3.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7af05ed4dc19f308e1d9fc759f36f21921eb7bbfc82843eeec6b2a2863a0aefa", size = 16370592, upload-time = "2025-09-09T15:56:17.285Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b4/35/aba8568b2593067bb6a8fe4c52babb23b4c3b9c80e1b49dff03a09925e4a/numpy-2.3.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:433bf137e338677cebdd5beac0199ac84712ad9d630b74eceeb759eaa45ddf30", size = 18884474, upload-time = "2025-09-09T15:56:20.943Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/45/fa/7f43ba10c77575e8be7b0138d107e4f44ca4a1ef322cd16980ea3e8b8222/numpy-2.3.3-cp311-cp311-win32.whl", hash = "sha256:eb63d443d7b4ffd1e873f8155260d7f58e7e4b095961b01c91062935c2491e57", size = 6599794, upload-time = "2025-09-09T15:56:23.258Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/0a/a2/a4f78cb2241fe5664a22a10332f2be886dcdea8784c9f6a01c272da9b426/numpy-2.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:ec9d249840f6a565f58d8f913bccac2444235025bbb13e9a4681783572ee3caa", size = 13088104, upload-time = "2025-09-09T15:56:25.476Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/79/64/e424e975adbd38282ebcd4891661965b78783de893b381cbc4832fb9beb2/numpy-2.3.3-cp311-cp311-win_arm64.whl", hash = "sha256:74c2a948d02f88c11a3c075d9733f1ae67d97c6bdb97f2bb542f980458b257e7", size = 10460772, upload-time = "2025-09-09T15:56:27.679Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/51/5d/bb7fc075b762c96329147799e1bcc9176ab07ca6375ea976c475482ad5b3/numpy-2.3.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:cfdd09f9c84a1a934cde1eec2267f0a43a7cd44b2cca4ff95b7c0d14d144b0bf", size = 20957014, upload-time = "2025-09-09T15:56:29.966Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/6b/0e/c6211bb92af26517acd52125a237a92afe9c3124c6a68d3b9f81b62a0568/numpy-2.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:cb32e3cf0f762aee47ad1ddc6672988f7f27045b0783c887190545baba73aa25", size = 14185220, upload-time = "2025-09-09T15:56:32.175Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/22/f2/07bb754eb2ede9073f4054f7c0286b0d9d2e23982e090a80d478b26d35ca/numpy-2.3.3-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:396b254daeb0a57b1fe0ecb5e3cff6fa79a380fa97c8f7781a6d08cd429418fe", size = 5113918, upload-time = "2025-09-09T15:56:34.175Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/81/0a/afa51697e9fb74642f231ea36aca80fa17c8fb89f7a82abd5174023c3960/numpy-2.3.3-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:067e3d7159a5d8f8a0b46ee11148fc35ca9b21f61e3c49fbd0a027450e65a33b", size = 6647922, upload-time = "2025-09-09T15:56:36.149Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/5d/f5/122d9cdb3f51c520d150fef6e87df9279e33d19a9611a87c0d2cf78a89f4/numpy-2.3.3-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1c02d0629d25d426585fb2e45a66154081b9fa677bc92a881ff1d216bc9919a8", size = 14281991, upload-time = "2025-09-09T15:56:40.548Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/51/64/7de3c91e821a2debf77c92962ea3fe6ac2bc45d0778c1cbe15d4fce2fd94/numpy-2.3.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d9192da52b9745f7f0766531dcfa978b7763916f158bb63bdb8a1eca0068ab20", size = 16641643, upload-time = "2025-09-09T15:56:43.343Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/30/e4/961a5fa681502cd0d68907818b69f67542695b74e3ceaa513918103b7e80/numpy-2.3.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:cd7de500a5b66319db419dc3c345244404a164beae0d0937283b907d8152e6ea", size = 16056787, upload-time = "2025-09-09T15:56:46.141Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/99/26/92c912b966e47fbbdf2ad556cb17e3a3088e2e1292b9833be1dfa5361a1a/numpy-2.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:93d4962d8f82af58f0b2eb85daaf1b3ca23fe0a85d0be8f1f2b7bb46034e56d7", size = 18579598, upload-time = "2025-09-09T15:56:49.844Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/17/b6/fc8f82cb3520768718834f310c37d96380d9dc61bfdaf05fe5c0b7653e01/numpy-2.3.3-cp312-cp312-win32.whl", hash = "sha256:5534ed6b92f9b7dca6c0a19d6df12d41c68b991cef051d108f6dbff3babc4ebf", size = 6320800, upload-time = "2025-09-09T15:56:52.499Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/32/ee/de999f2625b80d043d6d2d628c07d0d5555a677a3cf78fdf868d409b8766/numpy-2.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:497d7cad08e7092dba36e3d296fe4c97708c93daf26643a1ae4b03f6294d30eb", size = 12786615, upload-time = "2025-09-09T15:56:54.422Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/49/6e/b479032f8a43559c383acb20816644f5f91c88f633d9271ee84f3b3a996c/numpy-2.3.3-cp312-cp312-win_arm64.whl", hash = "sha256:ca0309a18d4dfea6fc6262a66d06c26cfe4640c3926ceec90e57791a82b6eee5", size = 10195936, upload-time = "2025-09-09T15:56:56.541Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/7d/b9/984c2b1ee61a8b803bf63582b4ac4242cf76e2dbd663efeafcb620cc0ccb/numpy-2.3.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f5415fb78995644253370985342cd03572ef8620b934da27d77377a2285955bf", size = 20949588, upload-time = "2025-09-09T15:56:59.087Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a6/e4/07970e3bed0b1384d22af1e9912527ecbeb47d3b26e9b6a3bced068b3bea/numpy-2.3.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d00de139a3324e26ed5b95870ce63be7ec7352171bc69a4cf1f157a48e3eb6b7", size = 14177802, upload-time = "2025-09-09T15:57:01.73Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/35/c7/477a83887f9de61f1203bad89cf208b7c19cc9fef0cebef65d5a1a0619f2/numpy-2.3.3-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:9dc13c6a5829610cc07422bc74d3ac083bd8323f14e2827d992f9e52e22cd6a6", size = 5106537, upload-time = "2025-09-09T15:57:03.765Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/52/47/93b953bd5866a6f6986344d045a207d3f1cfbad99db29f534ea9cee5108c/numpy-2.3.3-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:d79715d95f1894771eb4e60fb23f065663b2298f7d22945d66877aadf33d00c7", size = 6640743, upload-time = "2025-09-09T15:57:07.921Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/23/83/377f84aaeb800b64c0ef4de58b08769e782edcefa4fea712910b6f0afd3c/numpy-2.3.3-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:952cfd0748514ea7c3afc729a0fc639e61655ce4c55ab9acfab14bda4f402b4c", size = 14278881, upload-time = "2025-09-09T15:57:11.349Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/9a/a5/bf3db6e66c4b160d6ea10b534c381a1955dfab34cb1017ea93aa33c70ed3/numpy-2.3.3-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5b83648633d46f77039c29078751f80da65aa64d5622a3cd62aaef9d835b6c93", size = 16636301, upload-time = "2025-09-09T15:57:14.245Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a2/59/1287924242eb4fa3f9b3a2c30400f2e17eb2707020d1c5e3086fe7330717/numpy-2.3.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:b001bae8cea1c7dfdb2ae2b017ed0a6f2102d7a70059df1e338e307a4c78a8ae", size = 16053645, upload-time = "2025-09-09T15:57:16.534Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e6/93/b3d47ed882027c35e94ac2320c37e452a549f582a5e801f2d34b56973c97/numpy-2.3.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8e9aced64054739037d42fb84c54dd38b81ee238816c948c8f3ed134665dcd86", size = 18578179, upload-time = "2025-09-09T15:57:18.883Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/20/d9/487a2bccbf7cc9d4bfc5f0f197761a5ef27ba870f1e3bbb9afc4bbe3fcc2/numpy-2.3.3-cp313-cp313-win32.whl", hash = "sha256:9591e1221db3f37751e6442850429b3aabf7026d3b05542d102944ca7f00c8a8", size = 6312250, upload-time = "2025-09-09T15:57:21.296Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/1b/b5/263ebbbbcede85028f30047eab3d58028d7ebe389d6493fc95ae66c636ab/numpy-2.3.3-cp313-cp313-win_amd64.whl", hash = "sha256:f0dadeb302887f07431910f67a14d57209ed91130be0adea2f9793f1a4f817cf", size = 12783269, upload-time = "2025-09-09T15:57:23.034Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/fa/75/67b8ca554bbeaaeb3fac2e8bce46967a5a06544c9108ec0cf5cece559b6c/numpy-2.3.3-cp313-cp313-win_arm64.whl", hash = "sha256:3c7cf302ac6e0b76a64c4aecf1a09e51abd9b01fc7feee80f6c43e3ab1b1dbc5", size = 10195314, upload-time = "2025-09-09T15:57:25.045Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/11/d0/0d1ddec56b162042ddfafeeb293bac672de9b0cfd688383590090963720a/numpy-2.3.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:eda59e44957d272846bb407aad19f89dc6f58fecf3504bd144f4c5cf81a7eacc", size = 21048025, upload-time = "2025-09-09T15:57:27.257Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/36/9e/1996ca6b6d00415b6acbdd3c42f7f03ea256e2c3f158f80bd7436a8a19f3/numpy-2.3.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:823d04112bc85ef5c4fda73ba24e6096c8f869931405a80aa8b0e604510a26bc", size = 14301053, upload-time = "2025-09-09T15:57:30.077Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/05/24/43da09aa764c68694b76e84b3d3f0c44cb7c18cdc1ba80e48b0ac1d2cd39/numpy-2.3.3-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:40051003e03db4041aa325da2a0971ba41cf65714e65d296397cc0e32de6018b", size = 5229444, upload-time = "2025-09-09T15:57:32.733Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/bc/14/50ffb0f22f7218ef8af28dd089f79f68289a7a05a208db9a2c5dcbe123c1/numpy-2.3.3-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:6ee9086235dd6ab7ae75aba5662f582a81ced49f0f1c6de4260a78d8f2d91a19", size = 6738039, upload-time = "2025-09-09T15:57:34.328Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/55/52/af46ac0795e09657d45a7f4db961917314377edecf66db0e39fa7ab5c3d3/numpy-2.3.3-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:94fcaa68757c3e2e668ddadeaa86ab05499a70725811e582b6a9858dd472fb30", size = 14352314, upload-time = "2025-09-09T15:57:36.255Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a7/b1/dc226b4c90eb9f07a3fff95c2f0db3268e2e54e5cce97c4ac91518aee71b/numpy-2.3.3-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:da1a74b90e7483d6ce5244053399a614b1d6b7bc30a60d2f570e5071f8959d3e", size = 16701722, upload-time = "2025-09-09T15:57:38.622Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/9d/9d/9d8d358f2eb5eced14dba99f110d83b5cd9a4460895230f3b396ad19a323/numpy-2.3.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:2990adf06d1ecee3b3dcbb4977dfab6e9f09807598d647f04d385d29e7a3c3d3", size = 16132755, upload-time = "2025-09-09T15:57:41.16Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b6/27/b3922660c45513f9377b3fb42240bec63f203c71416093476ec9aa0719dc/numpy-2.3.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ed635ff692483b8e3f0fcaa8e7eb8a75ee71aa6d975388224f70821421800cea", size = 18651560, upload-time = "2025-09-09T15:57:43.459Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/5b/8e/3ab61a730bdbbc201bb245a71102aa609f0008b9ed15255500a99cd7f780/numpy-2.3.3-cp313-cp313t-win32.whl", hash = "sha256:a333b4ed33d8dc2b373cc955ca57babc00cd6f9009991d9edc5ddbc1bac36bcd", size = 6442776, upload-time = "2025-09-09T15:57:45.793Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/1c/3a/e22b766b11f6030dc2decdeff5c2fb1610768055603f9f3be88b6d192fb2/numpy-2.3.3-cp313-cp313t-win_amd64.whl", hash = "sha256:4384a169c4d8f97195980815d6fcad04933a7e1ab3b530921c3fef7a1c63426d", size = 12927281, upload-time = "2025-09-09T15:57:47.492Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/7b/42/c2e2bc48c5e9b2a83423f99733950fbefd86f165b468a3d85d52b30bf782/numpy-2.3.3-cp313-cp313t-win_arm64.whl", hash = "sha256:75370986cc0bc66f4ce5110ad35aae6d182cc4ce6433c40ad151f53690130bf1", size = 10265275, upload-time = "2025-09-09T15:57:49.647Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/6b/01/342ad585ad82419b99bcf7cebe99e61da6bedb89e213c5fd71acc467faee/numpy-2.3.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:cd052f1fa6a78dee696b58a914b7229ecfa41f0a6d96dc663c1220a55e137593", size = 20951527, upload-time = "2025-09-09T15:57:52.006Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ef/d8/204e0d73fc1b7a9ee80ab1fe1983dd33a4d64a4e30a05364b0208e9a241a/numpy-2.3.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:414a97499480067d305fcac9716c29cf4d0d76db6ebf0bf3cbce666677f12652", size = 14186159, upload-time = "2025-09-09T15:57:54.407Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/22/af/f11c916d08f3a18fb8ba81ab72b5b74a6e42ead4c2846d270eb19845bf74/numpy-2.3.3-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:50a5fe69f135f88a2be9b6ca0481a68a136f6febe1916e4920e12f1a34e708a7", size = 5114624, upload-time = "2025-09-09T15:57:56.5Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/fb/11/0ed919c8381ac9d2ffacd63fd1f0c34d27e99cab650f0eb6f110e6ae4858/numpy-2.3.3-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:b912f2ed2b67a129e6a601e9d93d4fa37bef67e54cac442a2f588a54afe5c67a", size = 6642627, upload-time = "2025-09-09T15:57:58.206Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ee/83/deb5f77cb0f7ba6cb52b91ed388b47f8f3c2e9930d4665c600408d9b90b9/numpy-2.3.3-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9e318ee0596d76d4cb3d78535dc005fa60e5ea348cd131a51e99d0bdbe0b54fe", size = 14296926, upload-time = "2025-09-09T15:58:00.035Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/77/cc/70e59dcb84f2b005d4f306310ff0a892518cc0c8000a33d0e6faf7ca8d80/numpy-2.3.3-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ce020080e4a52426202bdb6f7691c65bb55e49f261f31a8f506c9f6bc7450421", size = 16638958, upload-time = "2025-09-09T15:58:02.738Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b6/5a/b2ab6c18b4257e099587d5b7f903317bd7115333ad8d4ec4874278eafa61/numpy-2.3.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:e6687dc183aa55dae4a705b35f9c0f8cb178bcaa2f029b241ac5356221d5c021", size = 16071920, upload-time = "2025-09-09T15:58:05.029Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b8/f1/8b3fdc44324a259298520dd82147ff648979bed085feeacc1250ef1656c0/numpy-2.3.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d8f3b1080782469fdc1718c4ed1d22549b5fb12af0d57d35e992158a772a37cf", size = 18577076, upload-time = "2025-09-09T15:58:07.745Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f0/a1/b87a284fb15a42e9274e7fcea0dad259d12ddbf07c1595b26883151ca3b4/numpy-2.3.3-cp314-cp314-win32.whl", hash = "sha256:cb248499b0bc3be66ebd6578b83e5acacf1d6cb2a77f2248ce0e40fbec5a76d0", size = 6366952, upload-time = "2025-09-09T15:58:10.096Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/70/5f/1816f4d08f3b8f66576d8433a66f8fa35a5acfb3bbd0bf6c31183b003f3d/numpy-2.3.3-cp314-cp314-win_amd64.whl", hash = "sha256:691808c2b26b0f002a032c73255d0bd89751425f379f7bcd22d140db593a96e8", size = 12919322, upload-time = "2025-09-09T15:58:12.138Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/8c/de/072420342e46a8ea41c324a555fa90fcc11637583fb8df722936aed1736d/numpy-2.3.3-cp314-cp314-win_arm64.whl", hash = "sha256:9ad12e976ca7b10f1774b03615a2a4bab8addce37ecc77394d8e986927dc0dfe", size = 10478630, upload-time = "2025-09-09T15:58:14.64Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d5/df/ee2f1c0a9de7347f14da5dd3cd3c3b034d1b8607ccb6883d7dd5c035d631/numpy-2.3.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:9cc48e09feb11e1db00b320e9d30a4151f7369afb96bd0e48d942d09da3a0d00", size = 21047987, upload-time = "2025-09-09T15:58:16.889Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d6/92/9453bdc5a4e9e69cf4358463f25e8260e2ffc126d52e10038b9077815989/numpy-2.3.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:901bf6123879b7f251d3631967fd574690734236075082078e0571977c6a8e6a", size = 14301076, upload-time = "2025-09-09T15:58:20.343Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/13/77/1447b9eb500f028bb44253105bd67534af60499588a5149a94f18f2ca917/numpy-2.3.3-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:7f025652034199c301049296b59fa7d52c7e625017cae4c75d8662e377bf487d", size = 5229491, upload-time = "2025-09-09T15:58:22.481Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/3d/f9/d72221b6ca205f9736cb4b2ce3b002f6e45cd67cd6a6d1c8af11a2f0b649/numpy-2.3.3-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:533ca5f6d325c80b6007d4d7fb1984c303553534191024ec6a524a4c92a5935a", size = 6737913, upload-time = "2025-09-09T15:58:24.569Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/3c/5f/d12834711962ad9c46af72f79bb31e73e416ee49d17f4c797f72c96b6ca5/numpy-2.3.3-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0edd58682a399824633b66885d699d7de982800053acf20be1eaa46d92009c54", size = 14352811, upload-time = "2025-09-09T15:58:26.416Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a1/0d/fdbec6629d97fd1bebed56cd742884e4eead593611bbe1abc3eb40d304b2/numpy-2.3.3-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:367ad5d8fbec5d9296d18478804a530f1191e24ab4d75ab408346ae88045d25e", size = 16702689, upload-time = "2025-09-09T15:58:28.831Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/9b/09/0a35196dc5575adde1eb97ddfbc3e1687a814f905377621d18ca9bc2b7dd/numpy-2.3.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:8f6ac61a217437946a1fa48d24c47c91a0c4f725237871117dea264982128097", size = 16133855, upload-time = "2025-09-09T15:58:31.349Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/7a/ca/c9de3ea397d576f1b6753eaa906d4cdef1bf97589a6d9825a349b4729cc2/numpy-2.3.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:179a42101b845a816d464b6fe9a845dfaf308fdfc7925387195570789bb2c970", size = 18652520, upload-time = "2025-09-09T15:58:33.762Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/fd/c2/e5ed830e08cd0196351db55db82f65bc0ab05da6ef2b72a836dcf1936d2f/numpy-2.3.3-cp314-cp314t-win32.whl", hash = "sha256:1250c5d3d2562ec4174bce2e3a1523041595f9b651065e4a4473f5f48a6bc8a5", size = 6515371, upload-time = "2025-09-09T15:58:36.04Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/47/c7/b0f6b5b67f6788a0725f744496badbb604d226bf233ba716683ebb47b570/numpy-2.3.3-cp314-cp314t-win_amd64.whl", hash = "sha256:b37a0b2e5935409daebe82c1e42274d30d9dd355852529eab91dab8dcca7419f", size = 13112576, upload-time = "2025-09-09T15:58:37.927Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/06/b9/33bba5ff6fb679aa0b1f8a07e853f002a6b04b9394db3069a1270a7784ca/numpy-2.3.3-cp314-cp314t-win_arm64.whl", hash = "sha256:78c9f6560dc7e6b3990e32df7ea1a50bbd0e2a111e05209963f5ddcab7073b0b", size = 10545953, upload-time = "2025-09-09T15:58:40.576Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b8/f2/7e0a37cfced2644c9563c529f29fa28acbd0960dde32ece683aafa6f4949/numpy-2.3.3-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:1e02c7159791cd481e1e6d5ddd766b62a4d5acf8df4d4d1afe35ee9c5c33a41e", size = 21131019, upload-time = "2025-09-09T15:58:42.838Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/1a/7e/3291f505297ed63831135a6cc0f474da0c868a1f31b0dd9a9f03a7a0d2ed/numpy-2.3.3-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:dca2d0fc80b3893ae72197b39f69d55a3cd8b17ea1b50aa4c62de82419936150", size = 14376288, upload-time = "2025-09-09T15:58:45.425Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/bf/4b/ae02e985bdeee73d7b5abdefeb98aef1207e96d4c0621ee0cf228ddfac3c/numpy-2.3.3-pp311-pypy311_pp73-macosx_14_0_arm64.whl", hash = "sha256:99683cbe0658f8271b333a1b1b4bb3173750ad59c0c61f5bbdc5b318918fffe3", size = 5305425, upload-time = "2025-09-09T15:58:48.6Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/8b/eb/9df215d6d7250db32007941500dc51c48190be25f2401d5b2b564e467247/numpy-2.3.3-pp311-pypy311_pp73-macosx_14_0_x86_64.whl", hash = "sha256:d9d537a39cc9de668e5cd0e25affb17aec17b577c6b3ae8a3d866b479fbe88d0", size = 6819053, upload-time = "2025-09-09T15:58:50.401Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/57/62/208293d7d6b2a8998a4a1f23ac758648c3c32182d4ce4346062018362e29/numpy-2.3.3-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8596ba2f8af5f93b01d97563832686d20206d303024777f6dfc2e7c7c3f1850e", size = 14420354, upload-time = "2025-09-09T15:58:52.704Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ed/0c/8e86e0ff7072e14a71b4c6af63175e40d1e7e933ce9b9e9f765a95b4e0c3/numpy-2.3.3-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e1ec5615b05369925bd1125f27df33f3b6c8bc10d788d5999ecd8769a1fa04db", size = 16760413, upload-time = "2025-09-09T15:58:55.027Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/af/11/0cc63f9f321ccf63886ac203336777140011fb669e739da36d8db3c53b98/numpy-2.3.3-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:2e267c7da5bf7309670523896df97f93f6e469fb931161f483cd6882b3b1a5dc", size = 12971844, upload-time = "2025-09-09T15:58:57.359Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1835,6 +1888,42 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/5a/dd/464bd739bacb3b745a1c93bc15f20f0b1e27f0a64ec693367794b398673b/psycopg_binary-3.2.10-cp314-cp314-win_amd64.whl", hash = "sha256:d5c6a66a76022af41970bf19f51bc6bf87bd10165783dd1d40484bfd87d6b382", size = 2973554, upload-time = "2025-09-08T09:12:05.884Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyarrow"
|
||||
version = "21.0.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/ef/c2/ea068b8f00905c06329a3dfcd40d0fcc2b7d0f2e355bdb25b65e0a0e4cd4/pyarrow-21.0.0.tar.gz", hash = "sha256:5051f2dccf0e283ff56335760cbc8622cf52264d67e359d5569541ac11b6d5bc", size = 1133487, upload-time = "2025-07-18T00:57:31.761Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/94/dc/80564a3071a57c20b7c32575e4a0120e8a330ef487c319b122942d665960/pyarrow-21.0.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:c077f48aab61738c237802836fc3844f85409a46015635198761b0d6a688f87b", size = 31243234, upload-time = "2025-07-18T00:55:03.812Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ea/cc/3b51cb2db26fe535d14f74cab4c79b191ed9a8cd4cbba45e2379b5ca2746/pyarrow-21.0.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:689f448066781856237eca8d1975b98cace19b8dd2ab6145bf49475478bcaa10", size = 32714370, upload-time = "2025-07-18T00:55:07.495Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/24/11/a4431f36d5ad7d83b87146f515c063e4d07ef0b7240876ddb885e6b44f2e/pyarrow-21.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:479ee41399fcddc46159a551705b89c05f11e8b8cb8e968f7fec64f62d91985e", size = 41135424, upload-time = "2025-07-18T00:55:11.461Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/74/dc/035d54638fc5d2971cbf1e987ccd45f1091c83bcf747281cf6cc25e72c88/pyarrow-21.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:40ebfcb54a4f11bcde86bc586cbd0272bac0d516cfa539c799c2453768477569", size = 42823810, upload-time = "2025-07-18T00:55:16.301Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/2e/3b/89fced102448a9e3e0d4dded1f37fa3ce4700f02cdb8665457fcc8015f5b/pyarrow-21.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8d58d8497814274d3d20214fbb24abcad2f7e351474357d552a8d53bce70c70e", size = 43391538, upload-time = "2025-07-18T00:55:23.82Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/fb/bb/ea7f1bd08978d39debd3b23611c293f64a642557e8141c80635d501e6d53/pyarrow-21.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:585e7224f21124dd57836b1530ac8f2df2afc43c861d7bf3d58a4870c42ae36c", size = 45120056, upload-time = "2025-07-18T00:55:28.231Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/6e/0b/77ea0600009842b30ceebc3337639a7380cd946061b620ac1a2f3cb541e2/pyarrow-21.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:555ca6935b2cbca2c0e932bedd853e9bc523098c39636de9ad4693b5b1df86d6", size = 26220568, upload-time = "2025-07-18T00:55:32.122Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ca/d4/d4f817b21aacc30195cf6a46ba041dd1be827efa4a623cc8bf39a1c2a0c0/pyarrow-21.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:3a302f0e0963db37e0a24a70c56cf91a4faa0bca51c23812279ca2e23481fccd", size = 31160305, upload-time = "2025-07-18T00:55:35.373Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a2/9c/dcd38ce6e4b4d9a19e1d36914cb8e2b1da4e6003dd075474c4cfcdfe0601/pyarrow-21.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:b6b27cf01e243871390474a211a7922bfbe3bda21e39bc9160daf0da3fe48876", size = 32684264, upload-time = "2025-07-18T00:55:39.303Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/4f/74/2a2d9f8d7a59b639523454bec12dba35ae3d0a07d8ab529dc0809f74b23c/pyarrow-21.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:e72a8ec6b868e258a2cd2672d91f2860ad532d590ce94cdf7d5e7ec674ccf03d", size = 41108099, upload-time = "2025-07-18T00:55:42.889Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ad/90/2660332eeb31303c13b653ea566a9918484b6e4d6b9d2d46879a33ab0622/pyarrow-21.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b7ae0bbdc8c6674259b25bef5d2a1d6af5d39d7200c819cf99e07f7dfef1c51e", size = 42829529, upload-time = "2025-07-18T00:55:47.069Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/33/27/1a93a25c92717f6aa0fca06eb4700860577d016cd3ae51aad0e0488ac899/pyarrow-21.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:58c30a1729f82d201627c173d91bd431db88ea74dcaa3885855bc6203e433b82", size = 43367883, upload-time = "2025-07-18T00:55:53.069Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/05/d9/4d09d919f35d599bc05c6950095e358c3e15148ead26292dfca1fb659b0c/pyarrow-21.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:072116f65604b822a7f22945a7a6e581cfa28e3454fdcc6939d4ff6090126623", size = 45133802, upload-time = "2025-07-18T00:55:57.714Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/71/30/f3795b6e192c3ab881325ffe172e526499eb3780e306a15103a2764916a2/pyarrow-21.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:cf56ec8b0a5c8c9d7021d6fd754e688104f9ebebf1bf4449613c9531f5346a18", size = 26203175, upload-time = "2025-07-18T00:56:01.364Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/16/ca/c7eaa8e62db8fb37ce942b1ea0c6d7abfe3786ca193957afa25e71b81b66/pyarrow-21.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:e99310a4ebd4479bcd1964dff9e14af33746300cb014aa4a3781738ac63baf4a", size = 31154306, upload-time = "2025-07-18T00:56:04.42Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ce/e8/e87d9e3b2489302b3a1aea709aaca4b781c5252fcb812a17ab6275a9a484/pyarrow-21.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:d2fe8e7f3ce329a71b7ddd7498b3cfac0eeb200c2789bd840234f0dc271a8efe", size = 32680622, upload-time = "2025-07-18T00:56:07.505Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/84/52/79095d73a742aa0aba370c7942b1b655f598069489ab387fe47261a849e1/pyarrow-21.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:f522e5709379d72fb3da7785aa489ff0bb87448a9dc5a75f45763a795a089ebd", size = 41104094, upload-time = "2025-07-18T00:56:10.994Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/89/4b/7782438b551dbb0468892a276b8c789b8bbdb25ea5c5eb27faadd753e037/pyarrow-21.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:69cbbdf0631396e9925e048cfa5bce4e8c3d3b41562bbd70c685a8eb53a91e61", size = 42825576, upload-time = "2025-07-18T00:56:15.569Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b3/62/0f29de6e0a1e33518dec92c65be0351d32d7ca351e51ec5f4f837a9aab91/pyarrow-21.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:731c7022587006b755d0bdb27626a1a3bb004bb56b11fb30d98b6c1b4718579d", size = 43368342, upload-time = "2025-07-18T00:56:19.531Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/90/c7/0fa1f3f29cf75f339768cc698c8ad4ddd2481c1742e9741459911c9ac477/pyarrow-21.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:dc56bc708f2d8ac71bd1dcb927e458c93cec10b98eb4120206a4091db7b67b99", size = 45131218, upload-time = "2025-07-18T00:56:23.347Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/01/63/581f2076465e67b23bc5a37d4a2abff8362d389d29d8105832e82c9c811c/pyarrow-21.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:186aa00bca62139f75b7de8420f745f2af12941595bbbfa7ed3870ff63e25636", size = 26087551, upload-time = "2025-07-18T00:56:26.758Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c9/ab/357d0d9648bb8241ee7348e564f2479d206ebe6e1c47ac5027c2e31ecd39/pyarrow-21.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:a7a102574faa3f421141a64c10216e078df467ab9576684d5cd696952546e2da", size = 31290064, upload-time = "2025-07-18T00:56:30.214Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/3f/8a/5685d62a990e4cac2043fc76b4661bf38d06efed55cf45a334b455bd2759/pyarrow-21.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:1e005378c4a2c6db3ada3ad4c217b381f6c886f0a80d6a316fe586b90f77efd7", size = 32727837, upload-time = "2025-07-18T00:56:33.935Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/fc/de/c0828ee09525c2bafefd3e736a248ebe764d07d0fd762d4f0929dbc516c9/pyarrow-21.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:65f8e85f79031449ec8706b74504a316805217b35b6099155dd7e227eef0d4b6", size = 41014158, upload-time = "2025-07-18T00:56:37.528Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/6e/26/a2865c420c50b7a3748320b614f3484bfcde8347b2639b2b903b21ce6a72/pyarrow-21.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:3a81486adc665c7eb1a2bde0224cfca6ceaba344a82a971ef059678417880eb8", size = 42667885, upload-time = "2025-07-18T00:56:41.483Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/0a/f9/4ee798dc902533159250fb4321267730bc0a107d8c6889e07c3add4fe3a5/pyarrow-21.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:fc0d2f88b81dcf3ccf9a6ae17f89183762c8a94a5bdcfa09e05cfe413acf0503", size = 43276625, upload-time = "2025-07-18T00:56:48.002Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/5a/da/e02544d6997037a4b0d22d8e5f66bc9315c3671371a8b18c79ade1cefe14/pyarrow-21.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6299449adf89df38537837487a4f8d3bd91ec94354fdd2a7d30bc11c48ef6e79", size = 44951890, upload-time = "2025-07-18T00:56:52.568Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e5/4e/519c1bc1876625fe6b71e9a28287c43ec2f20f73c658b9ae1d485c0c206e/pyarrow-21.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:222c39e2c70113543982c6b34f3077962b44fca38c0bd9e68bb6781534425c10", size = 26371006, upload-time = "2025-07-18T00:56:56.379Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pycparser"
|
||||
version = "2.23"
|
||||
@@ -1960,6 +2049,24 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pylance"
|
||||
version = "0.36.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "numpy" },
|
||||
{ name = "pyarrow" },
|
||||
]
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/09/13/f7f029d12a3dfdc9f3059d77b3999d40f9cc064ba85fef885a08bf65dcb2/pylance-0.36.0-cp39-abi3-macosx_10_15_x86_64.whl", hash = "sha256:160ed088dc5fb63a71c8c96640d43ea58464f64bca8aa23b0337b1a96fd47b79", size = 43403867, upload-time = "2025-09-12T20:29:25.507Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/95/95/defad18786260653b33d5ef8223736c0e481861c8d33311756bd471468ad/pylance-0.36.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:ce43ad002b4e67ffb1a33925d05d472bbde77c57a5e84aca1728faa9ace0c086", size = 39777498, upload-time = "2025-09-12T20:27:02.906Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/19/33/7080ed4e45648d8c803a49cd5a206eb95176ef9dc06bff26748ec2109c65/pylance-0.36.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ad7b168b0d4b7864be6040bebaf6d9a3959e76a190ff401a84b165b75eade96", size = 41819489, upload-time = "2025-09-12T20:17:06.37Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/29/9a/0c572994d96e03e70481dafb2b062033a9ce24beb5ac6045f00f013ca57c/pylance-0.36.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:353deeb7b19be505db490258b5f2fc897efd4a45255fa0d51455662e01ad59ab", size = 45366480, upload-time = "2025-09-12T20:19:53.924Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/fe/82/a74f0436b6a983c2798d1f44699352cd98c42bc335781ece98a878cf63fb/pylance-0.36.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:9cd963fc22257591d1daf281fa2369e05299d78950cb11980aa099d7cbacdf00", size = 41833322, upload-time = "2025-09-12T20:17:40.784Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a8/f2/d28fa3487992c3bd46af6838da13cf9a00be24fcf4cf928f77feec52d8d6/pylance-0.36.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:40117569a87379e08ed12eccac658999158f81df946f2ed02693b77776b57597", size = 45347065, upload-time = "2025-09-12T20:19:26.435Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ff/ab/e7fc302950f1c6815a6e832d052d0860130374bfe4bd482b075299dc8384/pylance-0.36.0-cp39-abi3-win_amd64.whl", hash = "sha256:a2930738192e5075220bc38c8a58ff4e48a71d53b3ca2a577ffce0318609cac0", size = 46348996, upload-time = "2025-09-12T20:36:04.663Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyrefly"
|
||||
version = "0.33.0"
|
||||
|
||||
Reference in New Issue
Block a user