234 lines
6.6 KiB
Python
234 lines
6.6 KiB
Python
"""Base storage interface."""
|
|
|
|
from abc import ABC, abstractmethod
|
|
from collections.abc import AsyncGenerator
|
|
|
|
from ..core.models import Document, StorageConfig
|
|
|
|
|
|
class BaseStorage(ABC):
|
|
"""Abstract base class for storage adapters."""
|
|
|
|
config: StorageConfig
|
|
|
|
def __init__(self, config: StorageConfig):
|
|
"""
|
|
Initialize storage adapter.
|
|
|
|
Args:
|
|
config: Storage configuration
|
|
"""
|
|
self.config = config
|
|
|
|
@property
|
|
def display_name(self) -> str:
|
|
"""Human-readable name for UI display."""
|
|
return self.__class__.__name__.replace("Storage", "")
|
|
|
|
@abstractmethod
|
|
async def initialize(self) -> None:
|
|
"""Initialize the storage backend and create collections if needed."""
|
|
pass # pragma: no cover
|
|
|
|
@abstractmethod
|
|
async def store(self, document: Document, *, collection_name: str | None = None) -> str:
|
|
"""
|
|
Store a single document.
|
|
|
|
Args:
|
|
document: Document to store
|
|
|
|
Returns:
|
|
Document ID
|
|
"""
|
|
pass # pragma: no cover
|
|
|
|
@abstractmethod
|
|
async def store_batch(
|
|
self, documents: list[Document], *, collection_name: str | None = None
|
|
) -> list[str]:
|
|
"""
|
|
Store multiple documents in batch.
|
|
|
|
Args:
|
|
documents: List of documents to store
|
|
|
|
Returns:
|
|
List of document IDs
|
|
"""
|
|
pass # pragma: no cover
|
|
|
|
async def retrieve(
|
|
self, document_id: str, *, collection_name: str | None = None
|
|
) -> Document | None:
|
|
"""
|
|
Retrieve a document by ID (if supported by backend).
|
|
|
|
Args:
|
|
document_id: Document ID
|
|
|
|
Returns:
|
|
Document or None if not found
|
|
|
|
Raises:
|
|
NotImplementedError: If backend doesn't support retrieval
|
|
"""
|
|
raise NotImplementedError(f"{self.__class__.__name__} doesn't support document retrieval")
|
|
|
|
async def check_exists(
|
|
self, document_id: str, *, collection_name: str | None = None, stale_after_days: int = 30
|
|
) -> bool:
|
|
"""
|
|
Check if a document exists and is not stale.
|
|
|
|
Args:
|
|
document_id: Document ID to check
|
|
collection_name: Collection to check in
|
|
stale_after_days: Consider document stale after this many days
|
|
|
|
Returns:
|
|
True if document exists and is not stale, False otherwise
|
|
"""
|
|
try:
|
|
document = await self.retrieve(document_id, collection_name=collection_name)
|
|
if document is None:
|
|
return False
|
|
|
|
# Check staleness if timestamp is available
|
|
if "timestamp" in document.metadata:
|
|
from datetime import UTC, datetime, timedelta
|
|
timestamp_obj = document.metadata["timestamp"]
|
|
if isinstance(timestamp_obj, datetime):
|
|
timestamp = timestamp_obj
|
|
cutoff = datetime.now(UTC) - timedelta(days=stale_after_days)
|
|
return timestamp >= cutoff
|
|
|
|
# If no timestamp, assume it exists and is valid
|
|
return True
|
|
except Exception:
|
|
# Backend doesn't support retrieval, assume doesn't exist
|
|
return False
|
|
|
|
def search(
|
|
self,
|
|
query: str,
|
|
limit: int = 10,
|
|
threshold: float = 0.7,
|
|
*,
|
|
collection_name: str | None = None,
|
|
) -> AsyncGenerator[Document, None]:
|
|
"""
|
|
Search for documents (if supported by backend).
|
|
|
|
Args:
|
|
query: Search query
|
|
limit: Maximum number of results
|
|
threshold: Similarity threshold
|
|
|
|
Yields:
|
|
Matching documents
|
|
|
|
Raises:
|
|
NotImplementedError: If backend doesn't support search
|
|
"""
|
|
raise NotImplementedError(f"{self.__class__.__name__} doesn't support search")
|
|
|
|
@abstractmethod
|
|
async def delete(self, document_id: str, *, collection_name: str | None = None) -> bool:
|
|
"""
|
|
Delete a document.
|
|
|
|
Args:
|
|
document_id: Document ID
|
|
|
|
Returns:
|
|
True if deleted successfully
|
|
"""
|
|
pass # pragma: no cover
|
|
|
|
async def count(self, *, collection_name: str | None = None) -> int:
|
|
"""
|
|
Get total document count (if supported by backend).
|
|
|
|
Returns:
|
|
Number of documents, 0 if not supported
|
|
"""
|
|
return 0
|
|
|
|
async def list_collections(self) -> list[str]:
|
|
"""
|
|
List available collections (if supported by backend).
|
|
|
|
Returns:
|
|
List of collection names, empty list if not supported
|
|
"""
|
|
return []
|
|
|
|
async def describe_collections(self) -> list[dict[str, object]]:
|
|
"""
|
|
Describe available collections with metadata (if supported by backend).
|
|
|
|
Returns:
|
|
List of collection metadata dictionaries, empty list if not supported
|
|
"""
|
|
return []
|
|
|
|
async def delete_collection(self, collection_name: str) -> bool:
|
|
"""
|
|
Delete a collection (if supported by backend).
|
|
|
|
Args:
|
|
collection_name: Name of collection to delete
|
|
|
|
Returns:
|
|
True if deleted successfully, False if not supported
|
|
"""
|
|
return False
|
|
|
|
async def delete_documents(
|
|
self, document_ids: list[str], *, collection_name: str | None = None
|
|
) -> dict[str, bool]:
|
|
"""
|
|
Delete documents by IDs (if supported by backend).
|
|
|
|
Args:
|
|
document_ids: List of document IDs to delete
|
|
collection_name: Collection to delete from
|
|
|
|
Returns:
|
|
Dict mapping document IDs to success status, empty if not supported
|
|
"""
|
|
return {}
|
|
|
|
async def list_documents(
|
|
self,
|
|
limit: int = 100,
|
|
offset: int = 0,
|
|
*,
|
|
collection_name: str | None = None,
|
|
) -> list[dict[str, object]]:
|
|
"""
|
|
List documents in the storage backend (if supported).
|
|
|
|
Args:
|
|
limit: Maximum number of documents to return
|
|
offset: Number of documents to skip
|
|
collection_name: Collection to list documents from
|
|
|
|
Returns:
|
|
List of document dictionaries with metadata
|
|
|
|
Raises:
|
|
NotImplementedError: If backend doesn't support document listing
|
|
"""
|
|
raise NotImplementedError(f"{self.__class__.__name__} doesn't support document listing")
|
|
|
|
async def close(self) -> None:
|
|
"""
|
|
Close storage connections and cleanup resources.
|
|
|
|
Default implementation does nothing.
|
|
"""
|
|
# Default implementation - storage backends can override to cleanup connections
|
|
return None
|