Files
rag-manager/tests/unit/tui/test_storage_manager.py
2025-09-21 03:00:57 +00:00

477 lines
16 KiB
Python

from __future__ import annotations
from types import SimpleNamespace
import pytest
from ingest_pipeline.cli.tui.utils.storage_manager import (
MultiStorageAdapter,
StorageCapabilities,
StorageManager,
)
from typing import cast
from ingest_pipeline.config.settings import Settings
from ingest_pipeline.core.exceptions import StorageError
from ingest_pipeline.core.models import Document, StorageBackend, StorageConfig
from ingest_pipeline.storage.base import BaseStorage
class StubStorage(BaseStorage):
def __init__(
self, config: StorageConfig, *, documents: list[Document] | None = None, fail: bool = False
) -> None:
super().__init__(config)
self.documents = documents or []
self.fail = fail
self.stored: list[Document] = []
async def initialize(self) -> None:
return None
async def store(self, document: Document, *, collection_name: str | None = None) -> str:
self.stored.append(document)
if self.fail:
raise RuntimeError("store failed")
return f"{self.config.backend.value}-single"
async def store_batch(
self, documents: list[Document], *, collection_name: str | None = None
) -> list[str]:
self.stored.extend(documents)
if self.fail:
raise RuntimeError("batch failed")
return [f"{self.config.backend.value}-{index}" for index in range(len(documents))]
async def delete(self, document_id: str, *, collection_name: str | None = None) -> bool:
if self.fail:
raise RuntimeError("delete failed")
return True
async def count(self, *, collection_name: str | None = None) -> int:
return len(self.documents)
async def list_collections(self) -> list[str]:
return ["collection"]
async def search(
self,
query: str,
limit: int = 10,
threshold: float = 0.7,
*,
collection_name: str | None = None,
):
for document in self.documents:
yield document
async def close(self) -> None:
return None
class CollectionStubStorage(StubStorage):
def __init__(
self,
config: StorageConfig,
*,
collections: list[str],
counts: dict[str, int],
) -> None:
super().__init__(config)
self.collections = collections
self.counts = counts
async def list_collections(self) -> list[str]:
return self.collections
async def count(self, *, collection_name: str | None = None) -> int:
if collection_name is None:
raise ValueError("collection name required")
return self.counts[collection_name]
class FailingStatusStorage(StubStorage):
async def list_collections(self) -> list[str]:
raise RuntimeError("status unavailable")
class ClosableStubStorage(StubStorage):
def __init__(self, config: StorageConfig) -> None:
super().__init__(config)
self.closed = False
async def close(self) -> None:
self.closed = True
class FailingCloseStorage(StubStorage):
async def close(self) -> None:
raise RuntimeError("close failure")
@pytest.mark.asyncio
async def test_multi_storage_adapter_reports_replication_failure(document_factory) -> None:
primary_config = StorageConfig(
backend=StorageBackend.WEAVIATE,
endpoint="http://weaviate.local",
collection_name="primary",
)
secondary_config = StorageConfig(
backend=StorageBackend.OPEN_WEBUI,
endpoint="http://chat.local",
collection_name="secondary",
)
primary = StubStorage(primary_config)
secondary = StubStorage(secondary_config, fail=True)
adapter = MultiStorageAdapter([primary, secondary])
with pytest.raises(StorageError):
await adapter.store(document_factory(content="payload"))
assert primary.stored[0].content == "payload"
def test_storage_manager_build_multi_storage_adapter_deduplicates(document_factory) -> None:
settings = cast(Settings, SimpleNamespace(
weaviate_endpoint="http://weaviate.local",
weaviate_api_key=None,
openwebui_endpoint="http://chat.local",
openwebui_api_key=None,
r2r_endpoint=None,
r2r_api_key=None,
))
manager = StorageManager(settings)
weaviate_config = StorageConfig(
backend=StorageBackend.WEAVIATE,
endpoint="http://weaviate.local",
collection_name="primary",
)
openwebui_config = StorageConfig(
backend=StorageBackend.OPEN_WEBUI,
endpoint="http://chat.local",
collection_name="secondary",
)
manager.backends[StorageBackend.WEAVIATE] = StubStorage(weaviate_config)
manager.backends[StorageBackend.OPEN_WEBUI] = StubStorage(openwebui_config)
adapter = manager.build_multi_storage_adapter(
[StorageBackend.WEAVIATE, StorageBackend.WEAVIATE, StorageBackend.OPEN_WEBUI]
)
assert len(adapter._storages) == 2
assert adapter._storages[0].config.backend == StorageBackend.WEAVIATE
assert adapter._storages[1].config.backend == StorageBackend.OPEN_WEBUI
def test_storage_manager_build_multi_storage_adapter_missing_backend() -> None:
settings = cast(Settings, SimpleNamespace(
weaviate_endpoint="http://weaviate.local",
weaviate_api_key=None,
openwebui_endpoint="http://chat.local",
openwebui_api_key=None,
r2r_endpoint=None,
r2r_api_key=None,
))
manager = StorageManager(settings)
with pytest.raises(ValueError):
manager.build_multi_storage_adapter([StorageBackend.WEAVIATE])
@pytest.mark.asyncio
async def test_storage_manager_search_across_backends_groups_results(document_factory) -> None:
settings = cast(Settings, SimpleNamespace(
weaviate_endpoint="http://weaviate.local",
weaviate_api_key=None,
openwebui_endpoint="http://chat.local",
openwebui_api_key=None,
r2r_endpoint=None,
r2r_api_key=None,
))
manager = StorageManager(settings)
document_weaviate = document_factory(
content="alpha", metadata_updates={"source_url": "https://alpha"}
)
document_openwebui = document_factory(
content="beta", metadata_updates={"source_url": "https://beta"}
)
manager.backends[StorageBackend.WEAVIATE] = StubStorage(
StorageConfig(
backend=StorageBackend.WEAVIATE,
endpoint="http://weaviate.local",
collection_name="primary",
),
documents=[document_weaviate],
)
manager.backends[StorageBackend.OPEN_WEBUI] = StubStorage(
StorageConfig(
backend=StorageBackend.OPEN_WEBUI,
endpoint="http://chat.local",
collection_name="secondary",
),
documents=[document_openwebui],
)
results = await manager.search_across_backends(
"query",
limit=5,
backends=[StorageBackend.WEAVIATE, StorageBackend.OPEN_WEBUI],
)
assert results[StorageBackend.WEAVIATE][0].content == "alpha"
assert results[StorageBackend.OPEN_WEBUI][0].content == "beta"
@pytest.mark.asyncio
async def test_multi_storage_adapter_store_batch_replicates_to_all_backends(document_factory) -> None:
primary_config = StorageConfig(
backend=StorageBackend.WEAVIATE,
endpoint="http://weaviate.local",
collection_name="primary",
)
secondary_config = StorageConfig(
backend=StorageBackend.OPEN_WEBUI,
endpoint="http://chat.local",
collection_name="secondary",
)
primary = StubStorage(primary_config)
secondary = StubStorage(secondary_config)
adapter = MultiStorageAdapter([primary, secondary, secondary])
first_document = document_factory(content="first")
second_document = document_factory(content="second")
document_ids = await adapter.store_batch([first_document, second_document])
assert document_ids == ["weaviate-0", "weaviate-1"]
assert adapter._storages[0] is primary
assert primary.stored[0].content == "first"
assert secondary.stored[1].content == "second"
@pytest.mark.asyncio
async def test_multi_storage_adapter_delete_reports_secondary_failures() -> None:
primary_config = StorageConfig(
backend=StorageBackend.WEAVIATE,
endpoint="http://weaviate.local",
collection_name="primary",
)
secondary_config = StorageConfig(
backend=StorageBackend.OPEN_WEBUI,
endpoint="http://chat.local",
collection_name="secondary",
)
primary = StubStorage(primary_config)
secondary = StubStorage(secondary_config, fail=True)
adapter = MultiStorageAdapter([primary, secondary])
with pytest.raises(StorageError) as exc_info:
await adapter.delete("identifier")
assert "open_webui" in str(exc_info.value)
@pytest.mark.asyncio
async def test_storage_manager_initialize_all_backends_registers_capabilities(monkeypatch) -> None:
settings = cast(Settings, SimpleNamespace(
weaviate_endpoint="http://weaviate.local",
weaviate_api_key="key",
openwebui_endpoint="http://chat.local",
openwebui_api_key="token",
r2r_endpoint="http://r2r.local",
r2r_api_key="secret",
))
manager = StorageManager(settings)
monkeypatch.setattr(
"ingest_pipeline.cli.tui.utils.storage_manager.WeaviateStorage",
StubStorage,
)
monkeypatch.setattr(
"ingest_pipeline.cli.tui.utils.storage_manager.OpenWebUIStorage",
StubStorage,
)
monkeypatch.setattr(
"ingest_pipeline.cli.tui.utils.storage_manager.R2RStorage",
StubStorage,
)
results = await manager.initialize_all_backends()
assert results[StorageBackend.WEAVIATE] is True
assert results[StorageBackend.OPEN_WEBUI] is True
assert results[StorageBackend.R2R] is True
assert manager.get_available_backends() == [
StorageBackend.WEAVIATE,
StorageBackend.OPEN_WEBUI,
StorageBackend.R2R,
]
assert manager.capabilities[StorageBackend.WEAVIATE] == StorageCapabilities.VECTOR_SEARCH
assert manager.capabilities[StorageBackend.OPEN_WEBUI] == StorageCapabilities.KNOWLEDGE_BASE
assert manager.capabilities[StorageBackend.R2R] == StorageCapabilities.FULL_FEATURED
assert manager.supports_advanced_features(StorageBackend.R2R) is True
assert manager.supports_advanced_features(StorageBackend.WEAVIATE) is False
assert manager.is_initialized is True
assert isinstance(manager.get_backend(StorageBackend.R2R), StubStorage)
@pytest.mark.asyncio
async def test_storage_manager_initialize_all_backends_handles_missing_config() -> None:
settings = cast(Settings, SimpleNamespace(
weaviate_endpoint=None,
weaviate_api_key=None,
openwebui_endpoint="http://chat.local",
openwebui_api_key=None,
r2r_endpoint=None,
r2r_api_key=None,
))
manager = StorageManager(settings)
results = await manager.initialize_all_backends()
assert results[StorageBackend.WEAVIATE] is False
assert results[StorageBackend.OPEN_WEBUI] is False
assert results[StorageBackend.R2R] is False
assert manager.get_available_backends() == []
assert manager.is_initialized is True
@pytest.mark.asyncio
async def test_storage_manager_get_all_collections_merges_counts_and_backends() -> None:
settings = cast(Settings, SimpleNamespace(
weaviate_endpoint="http://weaviate.local",
weaviate_api_key=None,
openwebui_endpoint="http://chat.local",
openwebui_api_key=None,
r2r_endpoint=None,
r2r_api_key=None,
))
manager = StorageManager(settings)
weaviate_storage = CollectionStubStorage(
StorageConfig(
backend=StorageBackend.WEAVIATE,
endpoint="http://weaviate.local",
collection_name="shared",
),
collections=["shared", ""],
counts={"shared": 2},
)
openwebui_storage = CollectionStubStorage(
StorageConfig(
backend=StorageBackend.OPEN_WEBUI,
endpoint="http://chat.local",
collection_name="secondary",
),
collections=["shared"],
counts={"shared": -1},
)
manager.backends = {
StorageBackend.WEAVIATE: weaviate_storage,
StorageBackend.OPEN_WEBUI: openwebui_storage,
}
collections = await manager.get_all_collections()
assert len(collections) == 1
assert collections[0]["name"] == "shared"
assert collections[0]["count"] == 2
assert collections[0]["backend"] == ["weaviate", "open_webui"]
assert collections[0]["type"] == "weaviate"
assert collections[0]["size_mb"] == pytest.approx(0.02)
@pytest.mark.asyncio
async def test_storage_manager_get_backend_status_reports_failures() -> None:
settings = cast(Settings, SimpleNamespace(
weaviate_endpoint="http://weaviate.local",
weaviate_api_key=None,
openwebui_endpoint="http://chat.local",
openwebui_api_key=None,
r2r_endpoint=None,
r2r_api_key=None,
))
manager = StorageManager(settings)
healthy_storage = CollectionStubStorage(
StorageConfig(
backend=StorageBackend.WEAVIATE,
endpoint="http://weaviate.local",
collection_name="primary",
),
collections=["collection", "archive"],
counts={"collection": 2, "archive": 1},
)
failing_storage = FailingStatusStorage(
StorageConfig(
backend=StorageBackend.OPEN_WEBUI,
endpoint="http://chat.local",
collection_name="secondary",
)
)
manager.backends = {
StorageBackend.WEAVIATE: healthy_storage,
StorageBackend.OPEN_WEBUI: failing_storage,
}
manager.capabilities[StorageBackend.WEAVIATE] = StorageCapabilities.VECTOR_SEARCH
status = await manager.get_backend_status()
assert status[StorageBackend.WEAVIATE]["available"] is True
assert status[StorageBackend.WEAVIATE]["collections"] == 2
assert status[StorageBackend.WEAVIATE]["total_documents"] == 3
assert status[StorageBackend.WEAVIATE]["capabilities"] == StorageCapabilities.VECTOR_SEARCH
assert str(status[StorageBackend.WEAVIATE]["endpoint"]) == "http://weaviate.local/"
assert status[StorageBackend.OPEN_WEBUI]["available"] is False
assert status[StorageBackend.OPEN_WEBUI]["capabilities"] == StorageCapabilities.NONE
assert "status unavailable" in str(status[StorageBackend.OPEN_WEBUI]["error"])
@pytest.mark.asyncio
async def test_storage_manager_close_all_clears_state() -> None:
settings = cast(Settings, SimpleNamespace(
weaviate_endpoint="http://weaviate.local",
weaviate_api_key=None,
openwebui_endpoint="http://chat.local",
openwebui_api_key=None,
r2r_endpoint=None,
r2r_api_key=None,
))
manager = StorageManager(settings)
closable_storage = ClosableStubStorage(
StorageConfig(
backend=StorageBackend.WEAVIATE,
endpoint="http://weaviate.local",
collection_name="primary",
)
)
failing_close_storage = FailingCloseStorage(
StorageConfig(
backend=StorageBackend.OPEN_WEBUI,
endpoint="http://chat.local",
collection_name="secondary",
)
)
manager.backends = {
StorageBackend.WEAVIATE: closable_storage,
StorageBackend.OPEN_WEBUI: failing_close_storage,
}
manager.capabilities[StorageBackend.WEAVIATE] = StorageCapabilities.VECTOR_SEARCH
manager.capabilities[StorageBackend.OPEN_WEBUI] = StorageCapabilities.KNOWLEDGE_BASE
manager._initialized = True
await manager.close_all()
assert closable_storage.closed is True
assert manager.backends == {}
assert manager.capabilities == {}
assert manager.is_initialized is False