Files
rag-manager/ingest_pipeline/cli/tui/screens/documents.py
2025-09-19 06:56:19 +00:00

327 lines
13 KiB
Python

"""Document management screen with enhanced navigation."""
from datetime import datetime
from textual.app import ComposeResult
from textual.binding import Binding
from textual.containers import Container, Horizontal
from textual.screen import Screen
from textual.widgets import Button, Footer, Header, Label, LoadingIndicator, Static
from typing_extensions import override
from ....storage.base import BaseStorage
from ..models import CollectionInfo, DocumentInfo
from ..widgets import EnhancedDataTable
class DocumentManagementScreen(Screen[None]):
"""Screen for managing documents within a collection with enhanced keyboard navigation."""
collection: CollectionInfo
storage: BaseStorage | None
documents: list[DocumentInfo]
selected_docs: set[str]
current_offset: int
page_size: int
BINDINGS = [
Binding("escape", "app.pop_screen", "Back"),
Binding("r", "refresh", "Refresh"),
Binding("delete", "delete_selected", "Delete Selected"),
Binding("a", "select_all", "Select All"),
Binding("ctrl+a", "select_all", "Select All"),
Binding("n", "select_none", "Clear Selection"),
Binding("ctrl+shift+a", "select_none", "Clear Selection"),
Binding("space", "toggle_selection", "Toggle Selection"),
Binding("ctrl+d", "delete_selected", "Delete Selected"),
Binding("pageup", "prev_page", "Previous Page"),
Binding("pagedown", "next_page", "Next Page"),
Binding("home", "first_page", "First Page"),
Binding("end", "last_page", "Last Page"),
]
def __init__(self, collection: CollectionInfo, storage: BaseStorage | None):
super().__init__()
self.collection = collection
self.storage = storage
self.documents: list[DocumentInfo] = []
self.selected_docs: set[str] = set()
self.current_offset = 0
self.page_size = 50
@override
def compose(self) -> ComposeResult:
yield Header()
yield Container(
Static(f"📄 Document Management: {self.collection['name']}", classes="title"),
Static(
f"Total Documents: {self.collection['count']:,} | Use Space to select, Delete to remove",
classes="subtitle",
),
Label(f"Page size: {self.page_size} documents"),
EnhancedDataTable(id="documents_table", classes="enhanced-table"),
Horizontal(
Button("🔄 Refresh", id="refresh_docs_btn", variant="primary"),
Button("🗑️ Delete Selected", id="delete_selected_btn", variant="error"),
Button("✅ Select All", id="select_all_btn", variant="default"),
Button("❌ Clear Selection", id="clear_selection_btn", variant="default"),
Button("⬅️ Previous Page", id="prev_page_btn", variant="default"),
Button("➡️ Next Page", id="next_page_btn", variant="default"),
classes="button_bar",
),
Label("", id="selection_status"),
Static("", id="page_info", classes="status-text"),
LoadingIndicator(id="loading"),
classes="main_container",
)
yield Footer()
async def on_mount(self) -> None:
"""Initialize the screen."""
self.query_one("#loading").display = False
# Setup documents table with enhanced columns
table = self.query_one("#documents_table", EnhancedDataTable)
table.add_columns(
"", "Title", "Source URL", "Description", "Type", "Words", "Timestamp", "ID"
)
# Set up message handling for table events
table.can_focus = True
await self.load_documents()
async def load_documents(self) -> None:
"""Load documents from the collection."""
loading = self.query_one("#loading")
loading.display = True
try:
if self.storage:
# Try to load documents using the storage backend
try:
raw_docs = await self.storage.list_documents(
limit=self.page_size,
offset=self.current_offset,
collection_name=self.collection["name"],
)
# Cast to proper type with type checking
self.documents = [
DocumentInfo(
id=str(doc.get("id", f"doc_{i}")),
title=str(doc.get("title", "Untitled Document")),
source_url=str(doc.get("source_url", "")),
description=str(doc.get("description", "")),
content_type=str(doc.get("content_type", "text/plain")),
content_preview=str(doc.get("content_preview", "")),
word_count=(
lambda wc_val: int(wc_val) if isinstance(wc_val, (int, str)) and str(wc_val).isdigit() else 0
)(doc.get("word_count", 0)),
timestamp=str(doc.get("timestamp", "")),
)
for i, doc in enumerate(raw_docs)
]
except NotImplementedError:
# For storage backends that don't support document listing, show a message
self.notify(
f"Document listing not supported for {self.storage.__class__.__name__}",
severity="information"
)
self.documents = []
await self.update_table()
self.update_selection_status()
self.update_page_info()
except Exception as e:
self.notify(f"Error loading documents: {e}", severity="error", markup=False)
finally:
loading.display = False
async def update_table(self) -> None:
"""Update the documents table with enhanced metadata display."""
table = self.query_one("#documents_table", EnhancedDataTable)
table.clear(columns=True)
# Add enhanced columns with more metadata
table.add_columns(
"", "Title", "Source URL", "Description", "Type", "Words", "Timestamp", "ID"
)
# Add rows with enhanced metadata
for doc in self.documents:
selected = "" if doc["id"] in self.selected_docs else ""
# Get additional metadata from the raw docs
description = str(doc.get("description") or "").strip()[:40]
if not description:
description = "[dim]No description[/dim]"
elif len(str(doc.get("description") or "")) > 40:
description += "..."
# Format content type with appropriate icon
content_type = doc.get("content_type", "text/plain")
if "markdown" in content_type.lower():
type_display = "📝 md"
elif "html" in content_type.lower():
type_display = "🌐 html"
elif "text" in content_type.lower():
type_display = "📄 txt"
else:
type_display = f"📄 {content_type.split('/')[-1][:5]}"
# Format timestamp to be more readable
timestamp = doc.get("timestamp", "")
if timestamp:
try:
# Parse ISO format timestamp
dt = datetime.fromisoformat(timestamp.replace("Z", "+00:00"))
timestamp = dt.strftime("%m/%d %H:%M")
except Exception:
timestamp = str(timestamp)[:16] # Fallback
table.add_row(
selected,
doc.get("title", "Untitled")[:40],
doc.get("source_url", "")[:35],
description,
type_display,
str(doc.get("word_count", 0)),
timestamp,
doc["id"][:8] + "...", # Show truncated ID
)
def update_selection_status(self) -> None:
"""Update the selection status label."""
status_label = self.query_one("#selection_status", Label)
total_selected = len(self.selected_docs)
status_label.update(f"Selected: {total_selected} documents")
def update_page_info(self) -> None:
"""Update the page information."""
page_info = self.query_one("#page_info", Static)
total_docs = self.collection["count"]
start = self.current_offset + 1
end = min(self.current_offset + len(self.documents), total_docs)
page_num = (self.current_offset // self.page_size) + 1
total_pages = (total_docs + self.page_size - 1) // self.page_size
page_info.update(
f"Showing {start:,}-{end:,} of {total_docs:,} documents (Page {page_num} of {total_pages})"
)
def get_current_document(self) -> DocumentInfo | None:
"""Get the currently selected document."""
table = self.query_one("#documents_table", EnhancedDataTable)
try:
if 0 <= table.cursor_coordinate.row < len(self.documents):
return self.documents[table.cursor_coordinate.row]
except (AttributeError, IndexError):
pass
return None
# Action methods
def action_refresh(self) -> None:
"""Refresh the document list."""
self.run_worker(self.load_documents())
def action_toggle_selection(self) -> None:
"""Toggle selection of current row."""
if doc := self.get_current_document():
doc_id = doc["id"]
if doc_id in self.selected_docs:
self.selected_docs.remove(doc_id)
else:
self.selected_docs.add(doc_id)
self.run_worker(self.update_table())
self.update_selection_status()
def action_select_all(self) -> None:
"""Select all documents on current page."""
for doc in self.documents:
self.selected_docs.add(doc["id"])
self.run_worker(self.update_table())
self.update_selection_status()
def action_select_none(self) -> None:
"""Clear all selections."""
self.selected_docs.clear()
self.run_worker(self.update_table())
self.update_selection_status()
def action_delete_selected(self) -> None:
"""Delete selected documents."""
if self.selected_docs:
from .dialogs import ConfirmDocumentDeleteScreen
self.app.push_screen(
ConfirmDocumentDeleteScreen(list(self.selected_docs), self.collection, self)
)
else:
self.notify("No documents selected", severity="warning")
def action_next_page(self) -> None:
"""Go to next page."""
if self.current_offset + self.page_size < self.collection["count"]:
self.current_offset += self.page_size
self.run_worker(self.load_documents())
def action_prev_page(self) -> None:
"""Go to previous page."""
if self.current_offset >= self.page_size:
self.current_offset -= self.page_size
self.run_worker(self.load_documents())
def action_first_page(self) -> None:
"""Go to first page."""
if self.current_offset > 0:
self.current_offset = 0
self.run_worker(self.load_documents())
def action_last_page(self) -> None:
"""Go to last page."""
total_docs = self.collection["count"]
last_offset = ((total_docs - 1) // self.page_size) * self.page_size
if self.current_offset != last_offset:
self.current_offset = last_offset
self.run_worker(self.load_documents())
def on_button_pressed(self, event: Button.Pressed) -> None:
"""Handle button presses."""
if event.button.id == "refresh_docs_btn":
self.action_refresh()
elif event.button.id == "delete_selected_btn":
self.action_delete_selected()
elif event.button.id == "select_all_btn":
self.action_select_all()
elif event.button.id == "clear_selection_btn":
self.action_select_none()
elif event.button.id == "next_page_btn":
self.action_next_page()
elif event.button.id == "prev_page_btn":
self.action_prev_page()
def on_enhanced_data_table_row_toggled(self, event: EnhancedDataTable.RowToggled) -> None:
"""Handle row toggle from enhanced table."""
if 0 <= event.row_index < len(self.documents):
doc = self.documents[event.row_index]
doc_id = doc["id"]
if doc_id in self.selected_docs:
self.selected_docs.remove(doc_id)
else:
self.selected_docs.add(doc_id)
self.run_worker(self.update_table())
self.update_selection_status()
def on_enhanced_data_table_select_all(self, event: EnhancedDataTable.SelectAll) -> None:
"""Handle select all from enhanced table."""
self.action_select_all()
def on_enhanced_data_table_clear_selection(
self, event: EnhancedDataTable.ClearSelection
) -> None:
"""Handle clear selection from enhanced table."""
self.action_select_none()