ee
This commit is contained in:
@@ -46,7 +46,9 @@ uv run ruff check .
|
||||
uv run ruff format .
|
||||
|
||||
# Type checking
|
||||
uv run mypy ingest_pipeline
|
||||
uv run pyrefly check ingest_pipeline/ tests/
|
||||
uv run basedpyright
|
||||
uv run sourcery review ingest_pipeline/ tests/ --fix
|
||||
|
||||
# Install dev dependencies
|
||||
uv sync --dev
|
||||
|
||||
@@ -2,10 +2,11 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
from typing import cast
|
||||
|
||||
from textual.app import ComposeResult
|
||||
from textual.containers import Container, VerticalScroll
|
||||
from textual.widget import Widget
|
||||
from textual.widgets import Static
|
||||
from typing_extensions import override
|
||||
|
||||
@@ -13,7 +14,7 @@ from typing_extensions import override
|
||||
class ResponsiveGrid(Container):
|
||||
"""Grid that auto-adjusts based on terminal size."""
|
||||
|
||||
DEFAULT_CSS = """
|
||||
DEFAULT_CSS: str = """
|
||||
ResponsiveGrid {
|
||||
layout: grid;
|
||||
grid-size: 1;
|
||||
@@ -45,58 +46,64 @@ class ResponsiveGrid(Container):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*children: Any,
|
||||
*children: Widget,
|
||||
columns: int = 1,
|
||||
auto_fit: bool = False,
|
||||
compact: bool = False,
|
||||
**kwargs: Any,
|
||||
name: str | None = None,
|
||||
id: str | None = None,
|
||||
classes: str | None = None,
|
||||
disabled: bool = False,
|
||||
markup: bool = True,
|
||||
) -> None:
|
||||
"""Initialize responsive grid."""
|
||||
super().__init__(*children, **kwargs)
|
||||
self.columns = columns
|
||||
self.auto_fit = auto_fit
|
||||
self.compact = compact
|
||||
super().__init__(*children, name=name, id=id, classes=classes, disabled=disabled, markup=markup)
|
||||
self._columns: int = columns
|
||||
self._auto_fit: bool = auto_fit
|
||||
self._compact: bool = compact
|
||||
|
||||
def on_mount(self) -> None:
|
||||
"""Apply responsive classes based on configuration."""
|
||||
if self.auto_fit:
|
||||
_ = self.add_class("auto-fit")
|
||||
elif self.columns == 2:
|
||||
_ = self.add_class("two-column")
|
||||
elif self.columns == 3:
|
||||
_ = self.add_class("three-column")
|
||||
widget = cast(Widget, self)
|
||||
if self._auto_fit:
|
||||
widget.add_class("auto-fit")
|
||||
elif self._columns == 2:
|
||||
widget.add_class("two-column")
|
||||
elif self._columns == 3:
|
||||
widget.add_class("three-column")
|
||||
|
||||
if self.compact:
|
||||
_ = self.add_class("compact")
|
||||
if self._compact:
|
||||
widget.add_class("compact")
|
||||
|
||||
def on_resize(self) -> None:
|
||||
"""Adjust layout based on terminal size."""
|
||||
if self.auto_fit:
|
||||
if self._auto_fit:
|
||||
# Let CSS handle auto-fit
|
||||
return
|
||||
|
||||
terminal_width = self.size.width
|
||||
widget = cast(Widget, self)
|
||||
terminal_width = widget.size.width
|
||||
if terminal_width < 60:
|
||||
# Force single column on narrow terminals
|
||||
_ = self.remove_class("two-column", "three-column")
|
||||
self.styles.grid_size_columns = 1
|
||||
self.styles.grid_columns = "1fr"
|
||||
elif terminal_width < 100 and self.columns > 2:
|
||||
widget.remove_class("two-column", "three-column")
|
||||
widget.styles.grid_size_columns = 1
|
||||
widget.styles.grid_columns = "1fr"
|
||||
elif terminal_width < 100 and self._columns > 2:
|
||||
# Force two columns on medium terminals
|
||||
_ = self.remove_class("three-column")
|
||||
_ = self.add_class("two-column")
|
||||
self.styles.grid_size_columns = 2
|
||||
self.styles.grid_columns = "1fr 1fr"
|
||||
elif self.columns == 2:
|
||||
_ = self.add_class("two-column")
|
||||
elif self.columns == 3:
|
||||
_ = self.add_class("three-column")
|
||||
widget.remove_class("three-column")
|
||||
widget.add_class("two-column")
|
||||
widget.styles.grid_size_columns = 2
|
||||
widget.styles.grid_columns = "1fr 1fr"
|
||||
elif self._columns == 2:
|
||||
widget.add_class("two-column")
|
||||
elif self._columns == 3:
|
||||
widget.add_class("three-column")
|
||||
|
||||
|
||||
class CollapsibleSidebar(Container):
|
||||
"""Sidebar that can be collapsed to save space."""
|
||||
|
||||
DEFAULT_CSS = """
|
||||
DEFAULT_CSS: str = """
|
||||
CollapsibleSidebar {
|
||||
dock: left;
|
||||
width: 25%;
|
||||
@@ -133,11 +140,20 @@ class CollapsibleSidebar(Container):
|
||||
}
|
||||
"""
|
||||
|
||||
def __init__(self, *children: Any, collapsed: bool = False, **kwargs: Any) -> None:
|
||||
def __init__(
|
||||
self,
|
||||
*children: Widget,
|
||||
collapsed: bool = False,
|
||||
name: str | None = None,
|
||||
id: str | None = None,
|
||||
classes: str | None = None,
|
||||
disabled: bool = False,
|
||||
markup: bool = True,
|
||||
) -> None:
|
||||
"""Initialize collapsible sidebar."""
|
||||
super().__init__(**kwargs)
|
||||
self.collapsed = collapsed
|
||||
self._children = children
|
||||
super().__init__(name=name, id=id, classes=classes, disabled=disabled, markup=markup)
|
||||
self._collapsed: bool = collapsed
|
||||
self._children: tuple[Widget, ...] = children
|
||||
|
||||
@override
|
||||
def compose(self) -> ComposeResult:
|
||||
@@ -148,8 +164,8 @@ class CollapsibleSidebar(Container):
|
||||
|
||||
def on_mount(self) -> None:
|
||||
"""Apply initial collapsed state."""
|
||||
if self.collapsed:
|
||||
_ = self.add_class("collapsed")
|
||||
if self._collapsed:
|
||||
cast(Widget, self).add_class("collapsed")
|
||||
|
||||
def on_click(self) -> None:
|
||||
"""Toggle sidebar when clicked."""
|
||||
@@ -157,27 +173,28 @@ class CollapsibleSidebar(Container):
|
||||
|
||||
def toggle(self) -> None:
|
||||
"""Toggle sidebar collapsed state."""
|
||||
self.collapsed = not self.collapsed
|
||||
if self.collapsed:
|
||||
_ = self.add_class("collapsed")
|
||||
self._collapsed = not self._collapsed
|
||||
widget = cast(Widget, self)
|
||||
if self._collapsed:
|
||||
widget.add_class("collapsed")
|
||||
else:
|
||||
_ = self.remove_class("collapsed")
|
||||
widget.remove_class("collapsed")
|
||||
|
||||
def expand_sidebar(self) -> None:
|
||||
"""Expand sidebar."""
|
||||
if self.collapsed:
|
||||
if self._collapsed:
|
||||
self.toggle()
|
||||
|
||||
def collapse_sidebar(self) -> None:
|
||||
"""Collapse sidebar."""
|
||||
if not self.collapsed:
|
||||
if not self._collapsed:
|
||||
self.toggle()
|
||||
|
||||
|
||||
class TabularLayout(Container):
|
||||
"""Optimized layout for data tables with optional sidebar."""
|
||||
|
||||
DEFAULT_CSS = """
|
||||
DEFAULT_CSS: str = """
|
||||
TabularLayout {
|
||||
layout: horizontal;
|
||||
height: 100%;
|
||||
@@ -215,18 +232,22 @@ class TabularLayout(Container):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
table_widget: Any,
|
||||
header_content: Any | None = None,
|
||||
footer_content: Any | None = None,
|
||||
sidebar_content: Any | None = None,
|
||||
**kwargs: Any,
|
||||
table_widget: Widget,
|
||||
header_content: Widget | None = None,
|
||||
footer_content: Widget | None = None,
|
||||
sidebar_content: Widget | None = None,
|
||||
name: str | None = None,
|
||||
id: str | None = None,
|
||||
classes: str | None = None,
|
||||
disabled: bool = False,
|
||||
markup: bool = True,
|
||||
) -> None:
|
||||
"""Initialize tabular layout."""
|
||||
super().__init__(**kwargs)
|
||||
self.table_widget = table_widget
|
||||
self.header_content = header_content
|
||||
self.footer_content = footer_content
|
||||
self.sidebar_content = sidebar_content
|
||||
super().__init__(name=name, id=id, classes=classes, disabled=disabled, markup=markup)
|
||||
self.table_widget: Widget = table_widget
|
||||
self.header_content: Widget | None = header_content
|
||||
self.footer_content: Widget | None = footer_content
|
||||
self.sidebar_content: Widget | None = sidebar_content
|
||||
|
||||
@override
|
||||
def compose(self) -> ComposeResult:
|
||||
@@ -247,7 +268,7 @@ class TabularLayout(Container):
|
||||
class CardLayout(ResponsiveGrid):
|
||||
"""Grid layout optimized for card-based content."""
|
||||
|
||||
DEFAULT_CSS = """
|
||||
DEFAULT_CSS: str = """
|
||||
CardLayout {
|
||||
grid-gutter: 2;
|
||||
padding: 2;
|
||||
@@ -296,16 +317,23 @@ class CardLayout(ResponsiveGrid):
|
||||
}
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs: Any) -> None:
|
||||
def __init__(
|
||||
self,
|
||||
name: str | None = None,
|
||||
id: str | None = None,
|
||||
classes: str | None = None,
|
||||
disabled: bool = False,
|
||||
markup: bool = True,
|
||||
) -> None:
|
||||
"""Initialize card layout with default settings for cards."""
|
||||
# Default to auto-fit cards with minimum width
|
||||
super().__init__(auto_fit=True, **kwargs)
|
||||
super().__init__(auto_fit=True, name=name, id=id, classes=classes, disabled=disabled, markup=markup)
|
||||
|
||||
|
||||
class SplitPane(Container):
|
||||
"""Resizable split pane layout."""
|
||||
|
||||
DEFAULT_CSS = """
|
||||
DEFAULT_CSS: str = """
|
||||
SplitPane {
|
||||
layout: horizontal;
|
||||
height: 100%;
|
||||
@@ -345,36 +373,41 @@ class SplitPane(Container):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
left_content: Any,
|
||||
right_content: Any,
|
||||
left_content: Widget,
|
||||
right_content: Widget,
|
||||
vertical: bool = False,
|
||||
split_ratio: float = 0.5,
|
||||
**kwargs: Any,
|
||||
name: str | None = None,
|
||||
id: str | None = None,
|
||||
classes: str | None = None,
|
||||
disabled: bool = False,
|
||||
markup: bool = True,
|
||||
) -> None:
|
||||
"""Initialize split pane."""
|
||||
super().__init__(**kwargs)
|
||||
self.left_content = left_content
|
||||
self.right_content = right_content
|
||||
self.vertical = vertical
|
||||
self.split_ratio = split_ratio
|
||||
super().__init__(name=name, id=id, classes=classes, disabled=disabled, markup=markup)
|
||||
self._left_content: Widget = left_content
|
||||
self._right_content: Widget = right_content
|
||||
self._vertical: bool = vertical
|
||||
self._split_ratio: float = split_ratio
|
||||
|
||||
@override
|
||||
def compose(self) -> ComposeResult:
|
||||
"""Compose split pane layout."""
|
||||
if self.vertical:
|
||||
_ = self.add_class("vertical")
|
||||
if self._vertical:
|
||||
cast(Widget, self).add_class("vertical")
|
||||
|
||||
pane_classes = ("top-pane", "bottom-pane") if self.vertical else ("left-pane", "right-pane")
|
||||
pane_classes = ("top-pane", "bottom-pane") if self._vertical else ("left-pane", "right-pane")
|
||||
|
||||
yield Container(self.left_content, classes=pane_classes[0])
|
||||
yield Container(self._left_content, classes=pane_classes[0])
|
||||
yield Static("", classes="splitter")
|
||||
yield Container(self.right_content, classes=pane_classes[1])
|
||||
yield Container(self._right_content, classes=pane_classes[1])
|
||||
|
||||
def on_mount(self) -> None:
|
||||
"""Apply split ratio."""
|
||||
if self.vertical:
|
||||
self.query_one(f".{self.__class__.__name__} .top-pane").styles.height = f"{self.split_ratio * 100}%"
|
||||
self.query_one(f".{self.__class__.__name__} .bottom-pane").styles.height = f"{(1 - self.split_ratio) * 100}%"
|
||||
widget = cast(Widget, self)
|
||||
if self._vertical:
|
||||
widget.query_one(".top-pane").styles.height = f"{self._split_ratio * 100}%"
|
||||
widget.query_one(".bottom-pane").styles.height = f"{(1 - self._split_ratio) * 100}%"
|
||||
else:
|
||||
self.query_one(f".{self.__class__.__name__} .left-pane").styles.width = f"{self.split_ratio * 100}%"
|
||||
self.query_one(f".{self.__class__.__name__} .right-pane").styles.width = f"{(1 - self.split_ratio) * 100}%"
|
||||
widget.query_one(".left-pane").styles.width = f"{self._split_ratio * 100}%"
|
||||
widget.query_one(".right-pane").styles.width = f"{(1 - self._split_ratio) * 100}%"
|
||||
|
||||
@@ -387,7 +387,12 @@ class CollectionOverviewScreen(Screen[None]):
|
||||
async def list_openwebui_collections(self) -> list[CollectionInfo]:
|
||||
"""List OpenWebUI collections with enhanced metadata."""
|
||||
# Try to get OpenWebUI backend from storage manager if direct instance not available
|
||||
openwebui_backend = self.openwebui or self.storage_manager.get_backend(StorageBackend.OPEN_WEBUI)
|
||||
openwebui_backend = self.openwebui
|
||||
if not openwebui_backend:
|
||||
backend = self.storage_manager.get_backend(StorageBackend.OPEN_WEBUI)
|
||||
if not isinstance(backend, OpenWebUIStorage):
|
||||
return []
|
||||
openwebui_backend = backend
|
||||
if not openwebui_backend:
|
||||
return []
|
||||
|
||||
@@ -578,42 +583,42 @@ class CollectionOverviewScreen(Screen[None]):
|
||||
|
||||
def action_tab_dashboard(self) -> None:
|
||||
"""Switch to dashboard tab."""
|
||||
tabs = self.query_one(TabbedContent)
|
||||
tabs.active = "dashboard"
|
||||
tabbed_content: TabbedContent = self.query_one(TabbedContent)
|
||||
tabbed_content.active = "dashboard"
|
||||
|
||||
def action_tab_collections(self) -> None:
|
||||
"""Switch to collections tab."""
|
||||
tabs = self.query_one(TabbedContent)
|
||||
tabs.active = "collections"
|
||||
tabbed_content: TabbedContent = self.query_one(TabbedContent)
|
||||
tabbed_content.active = "collections"
|
||||
|
||||
def action_tab_analytics(self) -> None:
|
||||
"""Switch to analytics tab."""
|
||||
tabs = self.query_one(TabbedContent)
|
||||
tabs.active = "analytics"
|
||||
tabbed_content: TabbedContent = self.query_one(TabbedContent)
|
||||
tabbed_content.active = "analytics"
|
||||
|
||||
def action_next_tab(self) -> None:
|
||||
"""Switch to next tab."""
|
||||
tabs = self.query_one(TabbedContent)
|
||||
tabbed_content: TabbedContent = self.query_one(TabbedContent)
|
||||
tab_ids = ["dashboard", "collections", "analytics"]
|
||||
current = tabs.active
|
||||
current = tabbed_content.active
|
||||
try:
|
||||
current_index = tab_ids.index(current)
|
||||
next_index = (current_index + 1) % len(tab_ids)
|
||||
tabs.active = tab_ids[next_index]
|
||||
tabbed_content.active = tab_ids[next_index]
|
||||
except (ValueError, AttributeError):
|
||||
tabs.active = tab_ids[0]
|
||||
tabbed_content.active = tab_ids[0]
|
||||
|
||||
def action_prev_tab(self) -> None:
|
||||
"""Switch to previous tab."""
|
||||
tabs = self.query_one(TabbedContent)
|
||||
tabbed_content: TabbedContent = self.query_one(TabbedContent)
|
||||
tab_ids = ["dashboard", "collections", "analytics"]
|
||||
current = tabs.active
|
||||
current = tabbed_content.active
|
||||
try:
|
||||
current_index = tab_ids.index(current)
|
||||
prev_index = (current_index - 1) % len(tab_ids)
|
||||
tabs.active = tab_ids[prev_index]
|
||||
tabbed_content.active = tab_ids[prev_index]
|
||||
except (ValueError, AttributeError):
|
||||
tabs.active = tab_ids[0]
|
||||
tabbed_content.active = tab_ids[0]
|
||||
|
||||
def action_help(self) -> None:
|
||||
"""Show help screen."""
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
"""Dialog screens for confirmations and user interactions."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
from typing import TYPE_CHECKING, ClassVar
|
||||
|
||||
from textual.app import ComposeResult
|
||||
from textual.binding import Binding
|
||||
@@ -21,16 +23,16 @@ class ConfirmDeleteScreen(Screen[None]):
|
||||
"""Screen for confirming collection deletion."""
|
||||
|
||||
collection: CollectionInfo
|
||||
parent_screen: "CollectionOverviewScreen"
|
||||
parent_screen: CollectionOverviewScreen
|
||||
|
||||
BINDINGS = [
|
||||
BINDINGS: list[Binding] = [
|
||||
Binding("escape", "app.pop_screen", "Cancel"),
|
||||
Binding("y", "confirm_delete", "Yes"),
|
||||
Binding("n", "app.pop_screen", "No"),
|
||||
Binding("enter", "confirm_delete", "Confirm"),
|
||||
]
|
||||
|
||||
def __init__(self, collection: CollectionInfo, parent_screen: "CollectionOverviewScreen"):
|
||||
def __init__(self, collection: CollectionInfo, parent_screen: CollectionOverviewScreen):
|
||||
super().__init__()
|
||||
self.collection = collection
|
||||
self.parent_screen = parent_screen
|
||||
@@ -74,7 +76,7 @@ class ConfirmDeleteScreen(Screen[None]):
|
||||
try:
|
||||
if self.collection["type"] == "weaviate" and self.parent_screen.weaviate:
|
||||
# Delete Weaviate collection
|
||||
if self.parent_screen.weaviate.client:
|
||||
if self.parent_screen.weaviate.client and self.parent_screen.weaviate.client.collections:
|
||||
self.parent_screen.weaviate.client.collections.delete(self.collection["name"])
|
||||
self.notify(
|
||||
f"Deleted Weaviate collection: {self.collection['name']}",
|
||||
@@ -145,7 +147,7 @@ class ConfirmDocumentDeleteScreen(Screen[None]):
|
||||
collection: CollectionInfo
|
||||
parent_screen: "DocumentManagementScreen"
|
||||
|
||||
BINDINGS = [
|
||||
BINDINGS: list[Binding] = [
|
||||
Binding("escape", "app.pop_screen", "Cancel"),
|
||||
Binding("y", "confirm_delete", "Yes"),
|
||||
Binding("n", "app.pop_screen", "No"),
|
||||
@@ -205,15 +207,16 @@ class ConfirmDocumentDeleteScreen(Screen[None]):
|
||||
loading.display = True
|
||||
|
||||
try:
|
||||
results: dict[str, bool] = {}
|
||||
if hasattr(self.parent_screen, 'storage') and self.parent_screen.storage:
|
||||
# Delete documents via storage
|
||||
# The storage should have delete_documents method for weaviate
|
||||
storage = self.parent_screen.storage
|
||||
if hasattr(storage, 'delete_documents'):
|
||||
results = await storage.delete_documents(
|
||||
self.doc_ids,
|
||||
collection_name=self.collection["name"],
|
||||
)
|
||||
self.doc_ids,
|
||||
collection_name=self.collection["name"],
|
||||
)
|
||||
|
||||
# Count successful deletions
|
||||
successful = sum(bool(success) for success in results.values())
|
||||
@@ -241,7 +244,7 @@ class LogViewerScreen(ModalScreen[None]):
|
||||
_log_widget: RichLog | None
|
||||
_log_file: Path | None
|
||||
|
||||
BINDINGS = [
|
||||
BINDINGS: list[Binding] = [
|
||||
Binding("escape", "close", "Close"),
|
||||
Binding("ctrl+l", "close", "Close"),
|
||||
Binding("s", "show_path", "Log File"),
|
||||
|
||||
@@ -8,13 +8,14 @@ from typing import cast
|
||||
from textual.app import ComposeResult
|
||||
from textual.containers import Container, Horizontal
|
||||
from textual.validation import Integer
|
||||
from textual.widget import Widget
|
||||
from textual.widgets import Button, Checkbox, Input, Label, Switch, TextArea
|
||||
from typing_extensions import override
|
||||
|
||||
from ..models import FirecrawlOptions
|
||||
|
||||
|
||||
class ScrapeOptionsForm(Container):
|
||||
class ScrapeOptionsForm(Widget):
|
||||
"""Form for configuring Firecrawl scraping options."""
|
||||
|
||||
DEFAULT_CSS = """
|
||||
@@ -206,7 +207,7 @@ class ScrapeOptionsForm(Container):
|
||||
self.query_one("#wait_for", Input).value = str(wait_for)
|
||||
|
||||
|
||||
class MapOptionsForm(Container):
|
||||
class MapOptionsForm(Widget):
|
||||
"""Form for configuring site mapping options."""
|
||||
|
||||
DEFAULT_CSS = """
|
||||
@@ -352,7 +353,7 @@ class MapOptionsForm(Container):
|
||||
self.query_one("#max_depth", Input).value = str(max_depth)
|
||||
|
||||
|
||||
class ExtractOptionsForm(Container):
|
||||
class ExtractOptionsForm(Widget):
|
||||
"""Form for configuring data extraction options."""
|
||||
|
||||
DEFAULT_CSS = """
|
||||
@@ -532,7 +533,7 @@ class ExtractOptionsForm(Container):
|
||||
prompt_widget.text = "Extract numerical data, metrics, and tabular information"
|
||||
|
||||
|
||||
class FirecrawlConfigWidget(Container):
|
||||
class FirecrawlConfigWidget(Widget):
|
||||
"""Complete Firecrawl configuration widget with tabbed interface."""
|
||||
|
||||
DEFAULT_CSS = """
|
||||
|
||||
@@ -7,6 +7,7 @@ from typing import Any
|
||||
from textual import work
|
||||
from textual.app import ComposeResult
|
||||
from textual.containers import Container, Horizontal, Vertical, VerticalScroll
|
||||
from textual.widget import Widget
|
||||
from textual.widgets import Button, DataTable, Label, Markdown, ProgressBar, Static, Tree
|
||||
from typing_extensions import override
|
||||
|
||||
@@ -14,7 +15,7 @@ from ....storage.r2r.storage import R2RStorage
|
||||
from ..models import ChunkInfo, EntityInfo
|
||||
|
||||
|
||||
class ChunkViewer(Container):
|
||||
class ChunkViewer(Widget):
|
||||
"""Widget for viewing document chunks with navigation."""
|
||||
|
||||
DEFAULT_CSS = """
|
||||
@@ -56,10 +57,10 @@ class ChunkViewer(Container):
|
||||
def __init__(self, r2r_storage: R2RStorage, document_id: str, **kwargs: Any) -> None:
|
||||
"""Initialize chunk viewer."""
|
||||
super().__init__(**kwargs)
|
||||
self.r2r_storage = r2r_storage
|
||||
self.document_id = document_id
|
||||
self.r2r_storage: R2RStorage = r2r_storage
|
||||
self.document_id: str = document_id
|
||||
self.chunks: list[ChunkInfo] = []
|
||||
self.current_chunk_index = 0
|
||||
self.current_chunk_index: int = 0
|
||||
|
||||
@override
|
||||
def compose(self) -> ComposeResult:
|
||||
@@ -154,7 +155,7 @@ class ChunkViewer(Container):
|
||||
self.update_chunk_display()
|
||||
|
||||
|
||||
class EntityGraph(Container):
|
||||
class EntityGraph(Widget):
|
||||
"""Widget for visualizing extracted entities and relationships."""
|
||||
|
||||
DEFAULT_CSS = """
|
||||
@@ -189,8 +190,8 @@ class EntityGraph(Container):
|
||||
def __init__(self, r2r_storage: R2RStorage, document_id: str, **kwargs: Any) -> None:
|
||||
"""Initialize entity graph."""
|
||||
super().__init__(**kwargs)
|
||||
self.r2r_storage = r2r_storage
|
||||
self.document_id = document_id
|
||||
self.r2r_storage: R2RStorage = r2r_storage
|
||||
self.document_id: str = document_id
|
||||
self.entities: list[EntityInfo] = []
|
||||
|
||||
@override
|
||||
@@ -287,7 +288,7 @@ class EntityGraph(Container):
|
||||
details_widget.update(details_text)
|
||||
|
||||
|
||||
class CollectionStats(Container):
|
||||
class CollectionStats(Widget):
|
||||
"""Widget for showing R2R-specific collection statistics."""
|
||||
|
||||
DEFAULT_CSS = """
|
||||
@@ -342,8 +343,8 @@ class CollectionStats(Container):
|
||||
def __init__(self, r2r_storage: R2RStorage, collection_name: str, **kwargs: Any) -> None:
|
||||
"""Initialize collection stats."""
|
||||
super().__init__(**kwargs)
|
||||
self.r2r_storage = r2r_storage
|
||||
self.collection_name = collection_name
|
||||
self.r2r_storage: R2RStorage = r2r_storage
|
||||
self.collection_name: str = collection_name
|
||||
|
||||
@override
|
||||
def compose(self) -> ComposeResult:
|
||||
@@ -413,7 +414,7 @@ class CollectionStats(Container):
|
||||
self.query_one("#processing_status", Static).update(f"Error: {e}")
|
||||
|
||||
|
||||
class DocumentOverview(Container):
|
||||
class DocumentOverview(Widget):
|
||||
"""Widget for comprehensive document overview and statistics."""
|
||||
|
||||
DEFAULT_CSS = """
|
||||
@@ -454,8 +455,8 @@ class DocumentOverview(Container):
|
||||
def __init__(self, r2r_storage: R2RStorage, document_id: str, **kwargs: Any) -> None:
|
||||
"""Initialize document overview."""
|
||||
super().__init__(**kwargs)
|
||||
self.r2r_storage = r2r_storage
|
||||
self.document_id = document_id
|
||||
self.r2r_storage: R2RStorage = r2r_storage
|
||||
self.document_id: str = document_id
|
||||
|
||||
@override
|
||||
def compose(self) -> ComposeResult:
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
"""Application settings and configuration."""
|
||||
|
||||
from functools import lru_cache
|
||||
from typing import Annotated, Literal
|
||||
from typing import Annotated, ClassVar, Literal
|
||||
|
||||
from prefect.variables import Variable
|
||||
from pydantic import Field, HttpUrl, model_validator
|
||||
@@ -11,7 +11,7 @@ from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||
class Settings(BaseSettings):
|
||||
"""Application settings."""
|
||||
|
||||
model_config = SettingsConfigDict(
|
||||
model_config: ClassVar[SettingsConfigDict] = SettingsConfigDict(
|
||||
env_file=".env",
|
||||
env_file_encoding="utf-8",
|
||||
case_sensitive=False,
|
||||
@@ -154,8 +154,8 @@ class PrefectVariableConfig:
|
||||
"""Helper class for managing Prefect variables with fallbacks to settings."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._settings = get_settings()
|
||||
self._variable_names = [
|
||||
self._settings: Settings = get_settings()
|
||||
self._variable_names: list[str] = [
|
||||
"default_batch_size", "max_file_size", "max_crawl_depth", "max_crawl_pages",
|
||||
"default_storage_backend", "default_collection_prefix", "max_concurrent_tasks",
|
||||
"request_timeout", "default_schedule_interval"
|
||||
@@ -212,7 +212,7 @@ class PrefectVariableConfig:
|
||||
|
||||
async def get_ingestion_config_async(self) -> dict[str, str | int | float | None]:
|
||||
"""Get all ingestion-related configuration variables asynchronously."""
|
||||
result = {}
|
||||
result: dict[str, str | int | float | None] = {}
|
||||
for name in self._variable_names:
|
||||
result[name] = await self.get_with_fallback_async(name)
|
||||
return result
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
from datetime import UTC, datetime
|
||||
from enum import Enum
|
||||
from typing import Annotated, TypedDict
|
||||
from typing import Annotated, ClassVar, TypedDict
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
from prefect.blocks.core import Block
|
||||
@@ -48,9 +48,9 @@ class VectorConfig(BaseModel):
|
||||
class StorageConfig(Block):
|
||||
"""Configuration for storage backend."""
|
||||
|
||||
_block_type_name = "Storage Configuration"
|
||||
_block_type_slug = "storage-config"
|
||||
_description = "Configures storage backend connections and settings for document ingestion"
|
||||
_block_type_name: ClassVar[str] = "Storage Configuration"
|
||||
_block_type_slug: ClassVar[str] = "storage-config"
|
||||
_description: ClassVar[str] = "Configures storage backend connections and settings for document ingestion"
|
||||
|
||||
backend: StorageBackend
|
||||
endpoint: HttpUrl
|
||||
@@ -62,9 +62,9 @@ class StorageConfig(Block):
|
||||
class FirecrawlConfig(Block):
|
||||
"""Configuration for Firecrawl ingestion (operational parameters only)."""
|
||||
|
||||
_block_type_name = "Firecrawl Configuration"
|
||||
_block_type_slug = "firecrawl-config"
|
||||
_description = "Configures Firecrawl web scraping and crawling parameters"
|
||||
_block_type_name: ClassVar[str] = "Firecrawl Configuration"
|
||||
_block_type_slug: ClassVar[str] = "firecrawl-config"
|
||||
_description: ClassVar[str] = "Configures Firecrawl web scraping and crawling parameters"
|
||||
|
||||
formats: list[str] = Field(default_factory=lambda: ["markdown", "html"])
|
||||
max_depth: Annotated[int, Field(ge=1, le=20)] = 5
|
||||
@@ -76,9 +76,9 @@ class FirecrawlConfig(Block):
|
||||
class RepomixConfig(Block):
|
||||
"""Configuration for Repomix ingestion."""
|
||||
|
||||
_block_type_name = "Repomix Configuration"
|
||||
_block_type_slug = "repomix-config"
|
||||
_description = "Configures repository ingestion patterns and file processing settings"
|
||||
_block_type_name: ClassVar[str] = "Repomix Configuration"
|
||||
_block_type_slug: ClassVar[str] = "repomix-config"
|
||||
_description: ClassVar[str] = "Configures repository ingestion patterns and file processing settings"
|
||||
|
||||
include_patterns: list[str] = Field(
|
||||
default_factory=lambda: ["*.py", "*.js", "*.ts", "*.md", "*.yaml", "*.json"]
|
||||
@@ -93,9 +93,9 @@ class RepomixConfig(Block):
|
||||
class R2RConfig(Block):
|
||||
"""Configuration for R2R ingestion."""
|
||||
|
||||
_block_type_name = "R2R Configuration"
|
||||
_block_type_slug = "r2r-config"
|
||||
_description = "Configures R2R-specific ingestion settings including chunking and graph enrichment"
|
||||
_block_type_name: ClassVar[str] = "R2R Configuration"
|
||||
_block_type_slug: ClassVar[str] = "r2r-config"
|
||||
_description: ClassVar[str] = "Configures R2R-specific ingestion settings including chunking and graph enrichment"
|
||||
|
||||
chunk_size: Annotated[int, Field(ge=100, le=8192)] = 1000
|
||||
chunk_overlap: Annotated[int, Field(ge=0, le=1000)] = 200
|
||||
|
||||
@@ -9,7 +9,7 @@ from typing import TYPE_CHECKING, Literal, TypeAlias, assert_never, cast
|
||||
from prefect import flow, get_run_logger, task
|
||||
from prefect.blocks.core import Block
|
||||
from prefect.variables import Variable
|
||||
from pydantic.types import SecretStr
|
||||
from pydantic import SecretStr
|
||||
|
||||
from ..config.settings import Settings
|
||||
from ..core.exceptions import IngestionError
|
||||
|
||||
Binary file not shown.
@@ -173,6 +173,33 @@ class BaseStorage(ABC):
|
||||
"""
|
||||
return []
|
||||
|
||||
async def delete_collection(self, collection_name: str) -> bool:
|
||||
"""
|
||||
Delete a collection (if supported by backend).
|
||||
|
||||
Args:
|
||||
collection_name: Name of collection to delete
|
||||
|
||||
Returns:
|
||||
True if deleted successfully, False if not supported
|
||||
"""
|
||||
return False
|
||||
|
||||
async def delete_documents(
|
||||
self, document_ids: list[str], *, collection_name: str | None = None
|
||||
) -> dict[str, bool]:
|
||||
"""
|
||||
Delete documents by IDs (if supported by backend).
|
||||
|
||||
Args:
|
||||
document_ids: List of document IDs to delete
|
||||
collection_name: Collection to delete from
|
||||
|
||||
Returns:
|
||||
Dict mapping document IDs to success status, empty if not supported
|
||||
"""
|
||||
return {}
|
||||
|
||||
async def list_documents(
|
||||
self,
|
||||
limit: int = 100,
|
||||
|
||||
@@ -2,11 +2,21 @@
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from typing import Final, TypedDict, cast
|
||||
from typing import TYPE_CHECKING, Final, TypedDict, cast
|
||||
|
||||
import httpx
|
||||
from typing_extensions import override
|
||||
|
||||
if TYPE_CHECKING:
|
||||
# Type checking imports - these will be ignored at runtime
|
||||
from httpx import AsyncClient, ConnectError, HTTPStatusError, RequestError
|
||||
else:
|
||||
# Runtime imports that work properly
|
||||
AsyncClient = httpx.AsyncClient
|
||||
ConnectError = httpx.ConnectError
|
||||
HTTPStatusError = httpx.HTTPStatusError
|
||||
RequestError = httpx.RequestError
|
||||
|
||||
from ..core.exceptions import StorageError
|
||||
from ..core.models import Document, StorageConfig
|
||||
from .base import BaseStorage
|
||||
@@ -17,7 +27,7 @@ LOGGER: Final[logging.Logger] = logging.getLogger(__name__)
|
||||
class OpenWebUIStorage(BaseStorage):
|
||||
"""Storage adapter for Open WebUI knowledge endpoints."""
|
||||
|
||||
client: httpx.AsyncClient
|
||||
client: AsyncClient
|
||||
_knowledge_cache: dict[str, str]
|
||||
|
||||
def __init__(self, config: StorageConfig):
|
||||
@@ -33,7 +43,7 @@ class OpenWebUIStorage(BaseStorage):
|
||||
if config.api_key:
|
||||
headers["Authorization"] = f"Bearer {config.api_key}"
|
||||
|
||||
self.client = httpx.AsyncClient(
|
||||
self.client = AsyncClient(
|
||||
base_url=str(config.endpoint),
|
||||
headers=headers,
|
||||
timeout=30.0,
|
||||
@@ -50,11 +60,11 @@ class OpenWebUIStorage(BaseStorage):
|
||||
create=True,
|
||||
)
|
||||
|
||||
except httpx.ConnectError as e:
|
||||
except ConnectError as e:
|
||||
raise StorageError(f"Connection to OpenWebUI failed: {e}") from e
|
||||
except httpx.HTTPStatusError as e:
|
||||
except HTTPStatusError as e:
|
||||
raise StorageError(f"OpenWebUI returned error {e.response.status_code}: {e}") from e
|
||||
except httpx.RequestError as e:
|
||||
except RequestError as e:
|
||||
raise StorageError(f"Request to OpenWebUI failed: {e}") from e
|
||||
except Exception as e:
|
||||
raise StorageError(f"Failed to initialize Open WebUI: {e}") from e
|
||||
@@ -80,13 +90,13 @@ class OpenWebUIStorage(BaseStorage):
|
||||
|
||||
return str(knowledge_id)
|
||||
|
||||
except httpx.ConnectError as e:
|
||||
except ConnectError as e:
|
||||
raise StorageError(f"Connection to OpenWebUI failed during creation: {e}") from e
|
||||
except httpx.HTTPStatusError as e:
|
||||
except HTTPStatusError as e:
|
||||
raise StorageError(
|
||||
f"OpenWebUI returned error {e.response.status_code} during creation: {e}"
|
||||
) from e
|
||||
except httpx.RequestError as e:
|
||||
except RequestError as e:
|
||||
raise StorageError(f"Request to OpenWebUI failed during creation: {e}") from e
|
||||
except Exception as e:
|
||||
raise StorageError(f"Failed to create knowledge base: {e}") from e
|
||||
@@ -182,11 +192,11 @@ class OpenWebUIStorage(BaseStorage):
|
||||
|
||||
return str(file_id)
|
||||
|
||||
except httpx.ConnectError as e:
|
||||
except ConnectError as e:
|
||||
raise StorageError(f"Connection to OpenWebUI failed: {e}") from e
|
||||
except httpx.HTTPStatusError as e:
|
||||
except HTTPStatusError as e:
|
||||
raise StorageError(f"OpenWebUI returned error {e.response.status_code}: {e}") from e
|
||||
except httpx.RequestError as e:
|
||||
except RequestError as e:
|
||||
raise StorageError(f"Request to OpenWebUI failed: {e}") from e
|
||||
except Exception as e:
|
||||
raise StorageError(f"Failed to store document: {e}") from e
|
||||
@@ -263,13 +273,13 @@ class OpenWebUIStorage(BaseStorage):
|
||||
|
||||
return file_ids
|
||||
|
||||
except httpx.ConnectError as e:
|
||||
except ConnectError as e:
|
||||
raise StorageError(f"Connection to OpenWebUI failed during batch: {e}") from e
|
||||
except httpx.HTTPStatusError as e:
|
||||
except HTTPStatusError as e:
|
||||
raise StorageError(
|
||||
f"OpenWebUI returned error {e.response.status_code} during batch: {e}"
|
||||
) from e
|
||||
except httpx.RequestError as e:
|
||||
except RequestError as e:
|
||||
raise StorageError(f"Request to OpenWebUI failed during batch: {e}") from e
|
||||
except Exception as e:
|
||||
raise StorageError(f"Failed to store batch: {e}") from e
|
||||
@@ -324,12 +334,12 @@ class OpenWebUIStorage(BaseStorage):
|
||||
delete_response.raise_for_status()
|
||||
return True
|
||||
|
||||
except httpx.ConnectError as exc:
|
||||
except ConnectError as exc:
|
||||
LOGGER.error(
|
||||
"Failed to reach OpenWebUI when deleting file %s", document_id, exc_info=exc
|
||||
)
|
||||
return False
|
||||
except httpx.HTTPStatusError as exc:
|
||||
except HTTPStatusError as exc:
|
||||
LOGGER.error(
|
||||
"OpenWebUI returned status error %s when deleting file %s",
|
||||
exc.response.status_code if exc.response else "unknown",
|
||||
@@ -337,7 +347,7 @@ class OpenWebUIStorage(BaseStorage):
|
||||
exc_info=exc,
|
||||
)
|
||||
return False
|
||||
except httpx.RequestError as exc:
|
||||
except RequestError as exc:
|
||||
LOGGER.error("Request error deleting file %s from OpenWebUI", document_id, exc_info=exc)
|
||||
return False
|
||||
except Exception as exc:
|
||||
@@ -360,11 +370,11 @@ class OpenWebUIStorage(BaseStorage):
|
||||
for kb in knowledge_bases
|
||||
]
|
||||
|
||||
except httpx.ConnectError as e:
|
||||
except ConnectError as e:
|
||||
raise StorageError(f"Connection to OpenWebUI failed: {e}") from e
|
||||
except httpx.HTTPStatusError as e:
|
||||
except HTTPStatusError as e:
|
||||
raise StorageError(f"OpenWebUI returned error {e.response.status_code}: {e}") from e
|
||||
except httpx.RequestError as e:
|
||||
except RequestError as e:
|
||||
raise StorageError(f"Request to OpenWebUI failed: {e}") from e
|
||||
except Exception as e:
|
||||
raise StorageError(f"Failed to list knowledge bases: {e}") from e
|
||||
@@ -396,7 +406,7 @@ class OpenWebUIStorage(BaseStorage):
|
||||
LOGGER.info("Successfully deleted knowledge base: %s", collection_name)
|
||||
return True
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
except HTTPStatusError as e:
|
||||
# Handle 404 as success (already deleted)
|
||||
if e.response.status_code == 404:
|
||||
LOGGER.info("Knowledge base %s was already deleted or not found", collection_name)
|
||||
@@ -408,14 +418,14 @@ class OpenWebUIStorage(BaseStorage):
|
||||
exc_info=e,
|
||||
)
|
||||
return False
|
||||
except httpx.ConnectError as e:
|
||||
except ConnectError as e:
|
||||
LOGGER.error(
|
||||
"Failed to reach OpenWebUI when deleting knowledge base %s",
|
||||
collection_name,
|
||||
exc_info=e,
|
||||
)
|
||||
return False
|
||||
except httpx.RequestError as e:
|
||||
except RequestError as e:
|
||||
LOGGER.error(
|
||||
"Request error deleting knowledge base %s from OpenWebUI",
|
||||
collection_name,
|
||||
|
||||
@@ -110,45 +110,49 @@ class R2RStorage(BaseStorage):
|
||||
|
||||
async def _ensure_collection(self, collection_name: str) -> str:
|
||||
"""Get or create collection by name."""
|
||||
endpoint = self.endpoint
|
||||
client = AsyncClient()
|
||||
try:
|
||||
endpoint = self.endpoint
|
||||
client = AsyncClient()
|
||||
try:
|
||||
# List collections and find by name
|
||||
response = await client.get(f"{endpoint}/v3/collections")
|
||||
response.raise_for_status()
|
||||
data: dict[str, object] = response.json()
|
||||
# List collections and find by name
|
||||
response = await client.get(f"{endpoint}/v3/collections")
|
||||
response.raise_for_status()
|
||||
data: dict[str, object] = response.json()
|
||||
|
||||
results = cast(list[dict[str, object]], data.get("results", []))
|
||||
for collection in results:
|
||||
if collection.get("name") == collection_name:
|
||||
collection_id = str(collection.get("id"))
|
||||
if collection_name == self.config.collection_name:
|
||||
self.default_collection_id = collection_id
|
||||
return collection_id
|
||||
results = cast(list[dict[str, object]], data.get("results", []))
|
||||
for collection in results:
|
||||
if collection.get("name") == collection_name:
|
||||
collection_id_raw = collection.get("id")
|
||||
if collection_id_raw is None:
|
||||
raise StorageError(f"Collection '{collection_name}' exists but has no ID")
|
||||
collection_id = str(collection_id_raw)
|
||||
if collection_name == self.config.collection_name:
|
||||
self.default_collection_id = collection_id
|
||||
return collection_id
|
||||
|
||||
# Create if not found
|
||||
create_response = await client.post(
|
||||
f"{endpoint}/v3/collections",
|
||||
json={
|
||||
"name": collection_name,
|
||||
"description": f"Auto-created collection: {collection_name}",
|
||||
},
|
||||
)
|
||||
create_response.raise_for_status()
|
||||
created: dict[str, object] = create_response.json()
|
||||
created_results = cast(dict[str, object], created.get("results", {}))
|
||||
collection_id = str(created_results.get("id"))
|
||||
# Create if not found
|
||||
create_response = await client.post(
|
||||
f"{endpoint}/v3/collections",
|
||||
json={
|
||||
"name": collection_name,
|
||||
"description": f"Auto-created collection: {collection_name}",
|
||||
},
|
||||
)
|
||||
create_response.raise_for_status()
|
||||
created: dict[str, object] = create_response.json()
|
||||
created_results = cast(dict[str, object], created.get("results", {}))
|
||||
collection_id_raw = created_results.get("id")
|
||||
if collection_id_raw is None:
|
||||
raise StorageError("Failed to get collection ID from creation response")
|
||||
collection_id = str(collection_id_raw)
|
||||
|
||||
if collection_name == self.config.collection_name:
|
||||
self.default_collection_id = collection_id
|
||||
|
||||
return collection_id
|
||||
finally:
|
||||
await client.aclose()
|
||||
if collection_name == self.config.collection_name:
|
||||
self.default_collection_id = collection_id
|
||||
|
||||
return collection_id
|
||||
except Exception as e:
|
||||
raise StorageError(f"Failed to ensure collection '{collection_name}': {e}") from e
|
||||
finally:
|
||||
await client.aclose()
|
||||
|
||||
@override
|
||||
async def store(self, document: Document, *, collection_name: str | None = None) -> str:
|
||||
@@ -614,27 +618,27 @@ class R2RStorage(BaseStorage):
|
||||
@override
|
||||
async def count(self, *, collection_name: str | None = None) -> int:
|
||||
"""Get document count in collection."""
|
||||
endpoint = self.endpoint
|
||||
client = AsyncClient()
|
||||
try:
|
||||
endpoint = self.endpoint
|
||||
client = AsyncClient()
|
||||
try:
|
||||
# Get collections and find the count for the specific collection
|
||||
response = await client.get(f"{endpoint}/v3/collections")
|
||||
response.raise_for_status()
|
||||
data: dict[str, object] = response.json()
|
||||
# Get collections and find the count for the specific collection
|
||||
response = await client.get(f"{endpoint}/v3/collections")
|
||||
response.raise_for_status()
|
||||
data: dict[str, object] = response.json()
|
||||
|
||||
target_collection = collection_name or self.config.collection_name
|
||||
results = cast(list[dict[str, object]], data.get("results", []))
|
||||
for collection in results:
|
||||
if collection.get("name") == target_collection:
|
||||
doc_count = collection.get("document_count", 0)
|
||||
return _as_int(doc_count)
|
||||
target_collection = collection_name or self.config.collection_name
|
||||
results = cast(list[dict[str, object]], data.get("results", []))
|
||||
for collection in results:
|
||||
if collection.get("name") == target_collection:
|
||||
doc_count = collection.get("document_count", 0)
|
||||
return _as_int(doc_count)
|
||||
|
||||
return 0
|
||||
finally:
|
||||
await client.aclose()
|
||||
# Collection not found
|
||||
return 0
|
||||
except Exception:
|
||||
return 0
|
||||
finally:
|
||||
await client.aclose()
|
||||
|
||||
@override
|
||||
async def close(self) -> None:
|
||||
@@ -682,24 +686,23 @@ class R2RStorage(BaseStorage):
|
||||
@override
|
||||
async def list_collections(self) -> list[str]:
|
||||
"""List all available collections."""
|
||||
endpoint = self.endpoint
|
||||
client = AsyncClient()
|
||||
try:
|
||||
endpoint = self.endpoint
|
||||
client = AsyncClient()
|
||||
try:
|
||||
response = await client.get(f"{endpoint}/v3/collections")
|
||||
response.raise_for_status()
|
||||
data: dict[str, object] = response.json()
|
||||
response = await client.get(f"{endpoint}/v3/collections")
|
||||
response.raise_for_status()
|
||||
data: dict[str, object] = response.json()
|
||||
|
||||
collection_names: list[str] = []
|
||||
results = cast(list[dict[str, object]], data.get("results", []))
|
||||
for entry in results:
|
||||
if name := entry.get("name"):
|
||||
collection_names.append(str(name))
|
||||
return collection_names
|
||||
finally:
|
||||
await client.aclose()
|
||||
collection_names: list[str] = []
|
||||
results = cast(list[dict[str, object]], data.get("results", []))
|
||||
for entry in results:
|
||||
if name := entry.get("name"):
|
||||
collection_names.append(str(name))
|
||||
return collection_names
|
||||
except Exception as e:
|
||||
raise StorageError(f"Failed to list collections: {e}") from e
|
||||
finally:
|
||||
await client.aclose()
|
||||
|
||||
async def list_collections_detailed(self) -> list[dict[str, object]]:
|
||||
"""List all available collections with detailed information."""
|
||||
|
||||
@@ -75,7 +75,8 @@ class WeaviateStorage(BaseStorage):
|
||||
if not self.client:
|
||||
raise StorageError("Weaviate client not initialized")
|
||||
try:
|
||||
self.client.collections.create(
|
||||
client = cast(weaviate.WeaviateClient, self.client)
|
||||
client.collections.create(
|
||||
name=collection_name,
|
||||
properties=[
|
||||
Property(
|
||||
@@ -225,7 +226,8 @@ class WeaviateStorage(BaseStorage):
|
||||
if not self.client:
|
||||
raise StorageError("Weaviate client not initialized")
|
||||
|
||||
existing = self.client.collections.list_all()
|
||||
client = cast(weaviate.WeaviateClient, self.client)
|
||||
existing = client.collections.list_all()
|
||||
if collection_name not in existing:
|
||||
await self._create_collection(collection_name)
|
||||
|
||||
@@ -244,7 +246,8 @@ class WeaviateStorage(BaseStorage):
|
||||
if ensure_exists:
|
||||
await self._ensure_collection(normalized)
|
||||
|
||||
return self.client.collections.get(normalized), normalized
|
||||
client = cast(weaviate.WeaviateClient, self.client)
|
||||
return client.collections.get(normalized), normalized
|
||||
|
||||
@override
|
||||
async def store(self, document: Document, *, collection_name: str | None = None) -> str:
|
||||
@@ -585,7 +588,8 @@ class WeaviateStorage(BaseStorage):
|
||||
if not self.client:
|
||||
raise StorageError("Weaviate client not initialized")
|
||||
|
||||
return list(self.client.collections.list_all())
|
||||
client = cast(weaviate.WeaviateClient, self.client)
|
||||
return list(client.collections.list_all())
|
||||
|
||||
except Exception as e:
|
||||
raise StorageError(f"Failed to list collections: {e}") from e
|
||||
@@ -596,9 +600,10 @@ class WeaviateStorage(BaseStorage):
|
||||
raise StorageError("Weaviate client not initialized")
|
||||
|
||||
try:
|
||||
client = cast(weaviate.WeaviateClient, self.client)
|
||||
collections: list[dict[str, object]] = []
|
||||
for name in self.client.collections.list_all():
|
||||
collection_obj = self.client.collections.get(name)
|
||||
for name in client.collections.list_all():
|
||||
collection_obj = client.collections.get(name)
|
||||
if not collection_obj:
|
||||
continue
|
||||
|
||||
@@ -970,7 +975,8 @@ class WeaviateStorage(BaseStorage):
|
||||
target = self._normalize_collection_name(collection_name)
|
||||
|
||||
# Delete the collection using the client's collections API
|
||||
self.client.collections.delete(target)
|
||||
client = cast(weaviate.WeaviateClient, self.client)
|
||||
client.collections.delete(target)
|
||||
|
||||
return True
|
||||
|
||||
@@ -994,7 +1000,8 @@ class WeaviateStorage(BaseStorage):
|
||||
"""Close client connection."""
|
||||
if self.client:
|
||||
try:
|
||||
self.client.close()
|
||||
client = cast(weaviate.WeaviateClient, self.client)
|
||||
client.close()
|
||||
except Exception as e:
|
||||
import logging
|
||||
logging.warning(f"Error closing Weaviate client: {e}")
|
||||
@@ -1003,6 +1010,7 @@ class WeaviateStorage(BaseStorage):
|
||||
"""Clean up client connection as fallback."""
|
||||
if self.client:
|
||||
try:
|
||||
self.client.close()
|
||||
client = cast(weaviate.WeaviateClient, self.client)
|
||||
client.close()
|
||||
except Exception:
|
||||
pass # Ignore errors in destructor
|
||||
|
||||
1996
repomix-output.xml
1996
repomix-output.xml
File diff suppressed because it is too large
Load Diff
@@ -1,17 +1,40 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Protocol
|
||||
|
||||
import pytest
|
||||
from pydantic import HttpUrl
|
||||
from textual.app import App
|
||||
|
||||
from ingest_pipeline.cli.tui.models import CollectionInfo
|
||||
from ingest_pipeline.cli.tui.screens.dashboard import CollectionOverviewScreen
|
||||
from ingest_pipeline.cli.tui.screens.documents import DocumentManagementScreen
|
||||
from ingest_pipeline.cli.tui.widgets.tables import EnhancedDataTable
|
||||
from ingest_pipeline.core.models import StorageBackend, StorageConfig
|
||||
from ingest_pipeline.core.models import Document, StorageBackend, StorageConfig
|
||||
from ingest_pipeline.storage.base import BaseStorage
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from ingest_pipeline.cli.tui.utils.storage_manager import StorageManager
|
||||
|
||||
|
||||
class StorageManagerProtocol(Protocol):
|
||||
"""Protocol for storage manager interface used in tests."""
|
||||
|
||||
is_initialized: bool
|
||||
|
||||
async def initialize_all_backends(self) -> dict[StorageBackend, bool]: ...
|
||||
async def get_all_collections(self) -> list[CollectionInfo]: ...
|
||||
def get_available_backends(self) -> list[StorageBackend]: ...
|
||||
def get_backend(self, backend: StorageBackend) -> BaseStorage | None: ...
|
||||
async def close_all(self) -> None: ...
|
||||
|
||||
|
||||
class StorageManagerStub:
|
||||
_collections: list[CollectionInfo]
|
||||
_available: list[StorageBackend]
|
||||
backends: dict[StorageBackend, BaseStorage]
|
||||
is_initialized: bool
|
||||
|
||||
def __init__(self, collections: list[CollectionInfo], backends: dict[StorageBackend, BaseStorage]) -> None:
|
||||
self._collections = collections
|
||||
self._available = list(backends.keys())
|
||||
@@ -40,7 +63,7 @@ class PassiveStorage(BaseStorage):
|
||||
super().__init__(
|
||||
StorageConfig(
|
||||
backend=backend,
|
||||
endpoint="http://example.com",
|
||||
endpoint=HttpUrl("http://example.com"),
|
||||
collection_name="collection",
|
||||
)
|
||||
)
|
||||
@@ -48,10 +71,10 @@ class PassiveStorage(BaseStorage):
|
||||
async def initialize(self) -> None:
|
||||
return None
|
||||
|
||||
async def store(self, document, *, collection_name: str | None = None) -> str: # pragma: no cover - unused
|
||||
async def store(self, document: Document, *, collection_name: str | None = None) -> str: # pragma: no cover - unused
|
||||
raise NotImplementedError
|
||||
|
||||
async def store_batch(self, documents, *, collection_name: str | None = None): # pragma: no cover - unused
|
||||
async def store_batch(self, documents: list[Document], *, collection_name: str | None = None) -> list[str]: # pragma: no cover - unused
|
||||
raise NotImplementedError
|
||||
|
||||
async def delete(self, document_id: str, *, collection_name: str | None = None) -> bool: # pragma: no cover - unused
|
||||
@@ -59,6 +82,8 @@ class PassiveStorage(BaseStorage):
|
||||
|
||||
|
||||
class ScreenTestApp(App[None]):
|
||||
_screen: CollectionOverviewScreen
|
||||
|
||||
def __init__(self, screen: CollectionOverviewScreen) -> None:
|
||||
super().__init__()
|
||||
self._screen = screen
|
||||
@@ -68,7 +93,7 @@ class ScreenTestApp(App[None]):
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_collection_overview_table_reflects_collections(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
async def test_collection_overview_table_reflects_collections() -> None:
|
||||
collections: list[CollectionInfo] = [
|
||||
{
|
||||
"name": "docs_example_com",
|
||||
@@ -98,10 +123,13 @@ async def test_collection_overview_table_reflects_collections(monkeypatch: pytes
|
||||
},
|
||||
)
|
||||
|
||||
# Type cast for the protocol - storage_manager implements the required interface
|
||||
storage_manager_typed: StorageManager = storage_manager # type: ignore[assignment]
|
||||
|
||||
screen = CollectionOverviewScreen(
|
||||
storage_manager,
|
||||
weaviate=storage_manager.get_backend(StorageBackend.WEAVIATE),
|
||||
openwebui=storage_manager.get_backend(StorageBackend.OPEN_WEBUI),
|
||||
storage_manager_typed,
|
||||
weaviate=None, # Use None and let the screen get it from storage_manager
|
||||
openwebui=None, # Use None and let the screen get it from storage_manager
|
||||
r2r=None,
|
||||
)
|
||||
|
||||
@@ -142,7 +170,7 @@ class DocumentStorageStub(BaseStorage):
|
||||
super().__init__(
|
||||
StorageConfig(
|
||||
backend=StorageBackend.WEAVIATE,
|
||||
endpoint="http://example.com",
|
||||
endpoint=HttpUrl("http://example.com"),
|
||||
collection_name="docs",
|
||||
)
|
||||
)
|
||||
@@ -150,16 +178,16 @@ class DocumentStorageStub(BaseStorage):
|
||||
async def initialize(self) -> None:
|
||||
return None
|
||||
|
||||
async def store(self, document, *, collection_name: str | None = None) -> str: # pragma: no cover - unused
|
||||
async def store(self, document: Document, *, collection_name: str | None = None) -> str: # pragma: no cover - unused
|
||||
raise NotImplementedError
|
||||
|
||||
async def store_batch(self, documents, *, collection_name: str | None = None): # pragma: no cover - unused
|
||||
async def store_batch(self, documents: list[Document], *, collection_name: str | None = None) -> list[str]: # pragma: no cover - unused
|
||||
raise NotImplementedError
|
||||
|
||||
async def delete(self, document_id: str, *, collection_name: str | None = None) -> bool: # pragma: no cover - unused
|
||||
raise NotImplementedError
|
||||
|
||||
async def list_documents(self, *, limit: int = 100, offset: int = 0, collection_name: str | None = None):
|
||||
async def list_documents(self, limit: int = 100, offset: int = 0, *, collection_name: str | None = None) -> list[dict[str, object]]:
|
||||
return [
|
||||
{
|
||||
"id": "doc-1234567890",
|
||||
@@ -187,7 +215,9 @@ def build_collection(count: int) -> CollectionInfo:
|
||||
|
||||
|
||||
class DocumentScreenTestApp(App[None]):
|
||||
def __init__(self, screen) -> None:
|
||||
_screen: DocumentManagementScreen
|
||||
|
||||
def __init__(self, screen: DocumentManagementScreen) -> None:
|
||||
super().__init__()
|
||||
self._screen = screen
|
||||
|
||||
@@ -208,8 +238,10 @@ async def test_document_management_screen_formats_rows() -> None:
|
||||
|
||||
assert row[1] == "Deep Dive into Markdown Rendering"
|
||||
assert row[2] == "https://example.com/post"
|
||||
assert row[3].startswith("Extensive article")
|
||||
description: str = str(row[3])
|
||||
doc_id: str = str(row[7])
|
||||
assert description.startswith("Extensive article")
|
||||
assert row[4] == "📝 md"
|
||||
assert row[5] == "321"
|
||||
assert row[6] == "03/04 10:15"
|
||||
assert row[7].startswith("doc-1234")
|
||||
assert doc_id.startswith("doc-1234")
|
||||
|
||||
Reference in New Issue
Block a user