477 lines
16 KiB
Python
477 lines
16 KiB
Python
from __future__ import annotations
|
|
|
|
from types import SimpleNamespace
|
|
|
|
import pytest
|
|
|
|
from ingest_pipeline.cli.tui.utils.storage_manager import (
|
|
MultiStorageAdapter,
|
|
StorageCapabilities,
|
|
StorageManager,
|
|
)
|
|
from typing import cast
|
|
|
|
from ingest_pipeline.config.settings import Settings
|
|
from ingest_pipeline.core.exceptions import StorageError
|
|
from ingest_pipeline.core.models import Document, StorageBackend, StorageConfig
|
|
from ingest_pipeline.storage.base import BaseStorage
|
|
|
|
|
|
class StubStorage(BaseStorage):
|
|
def __init__(
|
|
self, config: StorageConfig, *, documents: list[Document] | None = None, fail: bool = False
|
|
) -> None:
|
|
super().__init__(config)
|
|
self.documents = documents or []
|
|
self.fail = fail
|
|
self.stored: list[Document] = []
|
|
|
|
async def initialize(self) -> None:
|
|
return None
|
|
|
|
async def store(self, document: Document, *, collection_name: str | None = None) -> str:
|
|
self.stored.append(document)
|
|
if self.fail:
|
|
raise RuntimeError("store failed")
|
|
return f"{self.config.backend.value}-single"
|
|
|
|
async def store_batch(
|
|
self, documents: list[Document], *, collection_name: str | None = None
|
|
) -> list[str]:
|
|
self.stored.extend(documents)
|
|
if self.fail:
|
|
raise RuntimeError("batch failed")
|
|
return [f"{self.config.backend.value}-{index}" for index in range(len(documents))]
|
|
|
|
async def delete(self, document_id: str, *, collection_name: str | None = None) -> bool:
|
|
if self.fail:
|
|
raise RuntimeError("delete failed")
|
|
return True
|
|
|
|
async def count(self, *, collection_name: str | None = None) -> int:
|
|
return len(self.documents)
|
|
|
|
async def list_collections(self) -> list[str]:
|
|
return ["collection"]
|
|
|
|
async def search(
|
|
self,
|
|
query: str,
|
|
limit: int = 10,
|
|
threshold: float = 0.7,
|
|
*,
|
|
collection_name: str | None = None,
|
|
):
|
|
for document in self.documents:
|
|
yield document
|
|
|
|
async def close(self) -> None:
|
|
return None
|
|
|
|
|
|
class CollectionStubStorage(StubStorage):
|
|
def __init__(
|
|
self,
|
|
config: StorageConfig,
|
|
*,
|
|
collections: list[str],
|
|
counts: dict[str, int],
|
|
) -> None:
|
|
super().__init__(config)
|
|
self.collections = collections
|
|
self.counts = counts
|
|
|
|
async def list_collections(self) -> list[str]:
|
|
return self.collections
|
|
|
|
async def count(self, *, collection_name: str | None = None) -> int:
|
|
if collection_name is None:
|
|
raise ValueError("collection name required")
|
|
return self.counts[collection_name]
|
|
|
|
|
|
class FailingStatusStorage(StubStorage):
|
|
async def list_collections(self) -> list[str]:
|
|
raise RuntimeError("status unavailable")
|
|
|
|
|
|
class ClosableStubStorage(StubStorage):
|
|
def __init__(self, config: StorageConfig) -> None:
|
|
super().__init__(config)
|
|
self.closed = False
|
|
|
|
async def close(self) -> None:
|
|
self.closed = True
|
|
|
|
|
|
class FailingCloseStorage(StubStorage):
|
|
async def close(self) -> None:
|
|
raise RuntimeError("close failure")
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_multi_storage_adapter_reports_replication_failure(document_factory) -> None:
|
|
primary_config = StorageConfig(
|
|
backend=StorageBackend.WEAVIATE,
|
|
endpoint="http://weaviate.local",
|
|
collection_name="primary",
|
|
)
|
|
secondary_config = StorageConfig(
|
|
backend=StorageBackend.OPEN_WEBUI,
|
|
endpoint="http://chat.local",
|
|
collection_name="secondary",
|
|
)
|
|
|
|
primary = StubStorage(primary_config)
|
|
secondary = StubStorage(secondary_config, fail=True)
|
|
adapter = MultiStorageAdapter([primary, secondary])
|
|
|
|
with pytest.raises(StorageError):
|
|
await adapter.store(document_factory(content="payload"))
|
|
|
|
assert primary.stored[0].content == "payload"
|
|
|
|
|
|
def test_storage_manager_build_multi_storage_adapter_deduplicates(document_factory) -> None:
|
|
settings = cast(Settings, SimpleNamespace(
|
|
weaviate_endpoint="http://weaviate.local",
|
|
weaviate_api_key=None,
|
|
openwebui_endpoint="http://chat.local",
|
|
openwebui_api_key=None,
|
|
r2r_endpoint=None,
|
|
r2r_api_key=None,
|
|
))
|
|
manager = StorageManager(settings)
|
|
|
|
weaviate_config = StorageConfig(
|
|
backend=StorageBackend.WEAVIATE,
|
|
endpoint="http://weaviate.local",
|
|
collection_name="primary",
|
|
)
|
|
openwebui_config = StorageConfig(
|
|
backend=StorageBackend.OPEN_WEBUI,
|
|
endpoint="http://chat.local",
|
|
collection_name="secondary",
|
|
)
|
|
|
|
manager.backends[StorageBackend.WEAVIATE] = StubStorage(weaviate_config)
|
|
manager.backends[StorageBackend.OPEN_WEBUI] = StubStorage(openwebui_config)
|
|
|
|
adapter = manager.build_multi_storage_adapter(
|
|
[StorageBackend.WEAVIATE, StorageBackend.WEAVIATE, StorageBackend.OPEN_WEBUI]
|
|
)
|
|
|
|
assert len(adapter._storages) == 2
|
|
assert adapter._storages[0].config.backend == StorageBackend.WEAVIATE
|
|
assert adapter._storages[1].config.backend == StorageBackend.OPEN_WEBUI
|
|
|
|
|
|
def test_storage_manager_build_multi_storage_adapter_missing_backend() -> None:
|
|
settings = cast(Settings, SimpleNamespace(
|
|
weaviate_endpoint="http://weaviate.local",
|
|
weaviate_api_key=None,
|
|
openwebui_endpoint="http://chat.local",
|
|
openwebui_api_key=None,
|
|
r2r_endpoint=None,
|
|
r2r_api_key=None,
|
|
))
|
|
manager = StorageManager(settings)
|
|
|
|
with pytest.raises(ValueError):
|
|
manager.build_multi_storage_adapter([StorageBackend.WEAVIATE])
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_storage_manager_search_across_backends_groups_results(document_factory) -> None:
|
|
settings = cast(Settings, SimpleNamespace(
|
|
weaviate_endpoint="http://weaviate.local",
|
|
weaviate_api_key=None,
|
|
openwebui_endpoint="http://chat.local",
|
|
openwebui_api_key=None,
|
|
r2r_endpoint=None,
|
|
r2r_api_key=None,
|
|
))
|
|
manager = StorageManager(settings)
|
|
|
|
document_weaviate = document_factory(
|
|
content="alpha", metadata_updates={"source_url": "https://alpha"}
|
|
)
|
|
document_openwebui = document_factory(
|
|
content="beta", metadata_updates={"source_url": "https://beta"}
|
|
)
|
|
|
|
manager.backends[StorageBackend.WEAVIATE] = StubStorage(
|
|
StorageConfig(
|
|
backend=StorageBackend.WEAVIATE,
|
|
endpoint="http://weaviate.local",
|
|
collection_name="primary",
|
|
),
|
|
documents=[document_weaviate],
|
|
)
|
|
manager.backends[StorageBackend.OPEN_WEBUI] = StubStorage(
|
|
StorageConfig(
|
|
backend=StorageBackend.OPEN_WEBUI,
|
|
endpoint="http://chat.local",
|
|
collection_name="secondary",
|
|
),
|
|
documents=[document_openwebui],
|
|
)
|
|
|
|
results = await manager.search_across_backends(
|
|
"query",
|
|
limit=5,
|
|
backends=[StorageBackend.WEAVIATE, StorageBackend.OPEN_WEBUI],
|
|
)
|
|
|
|
assert results[StorageBackend.WEAVIATE][0].content == "alpha"
|
|
assert results[StorageBackend.OPEN_WEBUI][0].content == "beta"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_multi_storage_adapter_store_batch_replicates_to_all_backends(document_factory) -> None:
|
|
primary_config = StorageConfig(
|
|
backend=StorageBackend.WEAVIATE,
|
|
endpoint="http://weaviate.local",
|
|
collection_name="primary",
|
|
)
|
|
secondary_config = StorageConfig(
|
|
backend=StorageBackend.OPEN_WEBUI,
|
|
endpoint="http://chat.local",
|
|
collection_name="secondary",
|
|
)
|
|
|
|
primary = StubStorage(primary_config)
|
|
secondary = StubStorage(secondary_config)
|
|
adapter = MultiStorageAdapter([primary, secondary, secondary])
|
|
|
|
first_document = document_factory(content="first")
|
|
second_document = document_factory(content="second")
|
|
|
|
document_ids = await adapter.store_batch([first_document, second_document])
|
|
|
|
assert document_ids == ["weaviate-0", "weaviate-1"]
|
|
assert adapter._storages[0] is primary
|
|
assert primary.stored[0].content == "first"
|
|
assert secondary.stored[1].content == "second"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_multi_storage_adapter_delete_reports_secondary_failures() -> None:
|
|
primary_config = StorageConfig(
|
|
backend=StorageBackend.WEAVIATE,
|
|
endpoint="http://weaviate.local",
|
|
collection_name="primary",
|
|
)
|
|
secondary_config = StorageConfig(
|
|
backend=StorageBackend.OPEN_WEBUI,
|
|
endpoint="http://chat.local",
|
|
collection_name="secondary",
|
|
)
|
|
|
|
primary = StubStorage(primary_config)
|
|
secondary = StubStorage(secondary_config, fail=True)
|
|
adapter = MultiStorageAdapter([primary, secondary])
|
|
|
|
with pytest.raises(StorageError) as exc_info:
|
|
await adapter.delete("identifier")
|
|
|
|
assert "open_webui" in str(exc_info.value)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_storage_manager_initialize_all_backends_registers_capabilities(monkeypatch) -> None:
|
|
settings = cast(Settings, SimpleNamespace(
|
|
weaviate_endpoint="http://weaviate.local",
|
|
weaviate_api_key="key",
|
|
openwebui_endpoint="http://chat.local",
|
|
openwebui_api_key="token",
|
|
r2r_endpoint="http://r2r.local",
|
|
r2r_api_key="secret",
|
|
))
|
|
manager = StorageManager(settings)
|
|
|
|
monkeypatch.setattr(
|
|
"ingest_pipeline.cli.tui.utils.storage_manager.WeaviateStorage",
|
|
StubStorage,
|
|
)
|
|
monkeypatch.setattr(
|
|
"ingest_pipeline.cli.tui.utils.storage_manager.OpenWebUIStorage",
|
|
StubStorage,
|
|
)
|
|
monkeypatch.setattr(
|
|
"ingest_pipeline.cli.tui.utils.storage_manager.R2RStorage",
|
|
StubStorage,
|
|
)
|
|
|
|
results = await manager.initialize_all_backends()
|
|
|
|
assert results[StorageBackend.WEAVIATE] is True
|
|
assert results[StorageBackend.OPEN_WEBUI] is True
|
|
assert results[StorageBackend.R2R] is True
|
|
assert manager.get_available_backends() == [
|
|
StorageBackend.WEAVIATE,
|
|
StorageBackend.OPEN_WEBUI,
|
|
StorageBackend.R2R,
|
|
]
|
|
assert manager.capabilities[StorageBackend.WEAVIATE] == StorageCapabilities.VECTOR_SEARCH
|
|
assert manager.capabilities[StorageBackend.OPEN_WEBUI] == StorageCapabilities.KNOWLEDGE_BASE
|
|
assert manager.capabilities[StorageBackend.R2R] == StorageCapabilities.FULL_FEATURED
|
|
assert manager.supports_advanced_features(StorageBackend.R2R) is True
|
|
assert manager.supports_advanced_features(StorageBackend.WEAVIATE) is False
|
|
assert manager.is_initialized is True
|
|
assert isinstance(manager.get_backend(StorageBackend.R2R), StubStorage)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_storage_manager_initialize_all_backends_handles_missing_config() -> None:
|
|
settings = cast(Settings, SimpleNamespace(
|
|
weaviate_endpoint=None,
|
|
weaviate_api_key=None,
|
|
openwebui_endpoint="http://chat.local",
|
|
openwebui_api_key=None,
|
|
r2r_endpoint=None,
|
|
r2r_api_key=None,
|
|
))
|
|
manager = StorageManager(settings)
|
|
|
|
results = await manager.initialize_all_backends()
|
|
|
|
assert results[StorageBackend.WEAVIATE] is False
|
|
assert results[StorageBackend.OPEN_WEBUI] is False
|
|
assert results[StorageBackend.R2R] is False
|
|
assert manager.get_available_backends() == []
|
|
assert manager.is_initialized is True
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_storage_manager_get_all_collections_merges_counts_and_backends() -> None:
|
|
settings = cast(Settings, SimpleNamespace(
|
|
weaviate_endpoint="http://weaviate.local",
|
|
weaviate_api_key=None,
|
|
openwebui_endpoint="http://chat.local",
|
|
openwebui_api_key=None,
|
|
r2r_endpoint=None,
|
|
r2r_api_key=None,
|
|
))
|
|
manager = StorageManager(settings)
|
|
|
|
weaviate_storage = CollectionStubStorage(
|
|
StorageConfig(
|
|
backend=StorageBackend.WEAVIATE,
|
|
endpoint="http://weaviate.local",
|
|
collection_name="shared",
|
|
),
|
|
collections=["shared", ""],
|
|
counts={"shared": 2},
|
|
)
|
|
openwebui_storage = CollectionStubStorage(
|
|
StorageConfig(
|
|
backend=StorageBackend.OPEN_WEBUI,
|
|
endpoint="http://chat.local",
|
|
collection_name="secondary",
|
|
),
|
|
collections=["shared"],
|
|
counts={"shared": -1},
|
|
)
|
|
manager.backends = {
|
|
StorageBackend.WEAVIATE: weaviate_storage,
|
|
StorageBackend.OPEN_WEBUI: openwebui_storage,
|
|
}
|
|
|
|
collections = await manager.get_all_collections()
|
|
|
|
assert len(collections) == 1
|
|
assert collections[0]["name"] == "shared"
|
|
assert collections[0]["count"] == 2
|
|
assert collections[0]["backend"] == ["weaviate", "open_webui"]
|
|
assert collections[0]["type"] == "weaviate"
|
|
assert collections[0]["size_mb"] == pytest.approx(0.02)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_storage_manager_get_backend_status_reports_failures() -> None:
|
|
settings = cast(Settings, SimpleNamespace(
|
|
weaviate_endpoint="http://weaviate.local",
|
|
weaviate_api_key=None,
|
|
openwebui_endpoint="http://chat.local",
|
|
openwebui_api_key=None,
|
|
r2r_endpoint=None,
|
|
r2r_api_key=None,
|
|
))
|
|
manager = StorageManager(settings)
|
|
|
|
healthy_storage = CollectionStubStorage(
|
|
StorageConfig(
|
|
backend=StorageBackend.WEAVIATE,
|
|
endpoint="http://weaviate.local",
|
|
collection_name="primary",
|
|
),
|
|
collections=["collection", "archive"],
|
|
counts={"collection": 2, "archive": 1},
|
|
)
|
|
failing_storage = FailingStatusStorage(
|
|
StorageConfig(
|
|
backend=StorageBackend.OPEN_WEBUI,
|
|
endpoint="http://chat.local",
|
|
collection_name="secondary",
|
|
)
|
|
)
|
|
manager.backends = {
|
|
StorageBackend.WEAVIATE: healthy_storage,
|
|
StorageBackend.OPEN_WEBUI: failing_storage,
|
|
}
|
|
manager.capabilities[StorageBackend.WEAVIATE] = StorageCapabilities.VECTOR_SEARCH
|
|
|
|
status = await manager.get_backend_status()
|
|
|
|
assert status[StorageBackend.WEAVIATE]["available"] is True
|
|
assert status[StorageBackend.WEAVIATE]["collections"] == 2
|
|
assert status[StorageBackend.WEAVIATE]["total_documents"] == 3
|
|
assert status[StorageBackend.WEAVIATE]["capabilities"] == StorageCapabilities.VECTOR_SEARCH
|
|
assert str(status[StorageBackend.WEAVIATE]["endpoint"]) == "http://weaviate.local/"
|
|
assert status[StorageBackend.OPEN_WEBUI]["available"] is False
|
|
assert status[StorageBackend.OPEN_WEBUI]["capabilities"] == StorageCapabilities.NONE
|
|
assert "status unavailable" in str(status[StorageBackend.OPEN_WEBUI]["error"])
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_storage_manager_close_all_clears_state() -> None:
|
|
settings = cast(Settings, SimpleNamespace(
|
|
weaviate_endpoint="http://weaviate.local",
|
|
weaviate_api_key=None,
|
|
openwebui_endpoint="http://chat.local",
|
|
openwebui_api_key=None,
|
|
r2r_endpoint=None,
|
|
r2r_api_key=None,
|
|
))
|
|
manager = StorageManager(settings)
|
|
|
|
closable_storage = ClosableStubStorage(
|
|
StorageConfig(
|
|
backend=StorageBackend.WEAVIATE,
|
|
endpoint="http://weaviate.local",
|
|
collection_name="primary",
|
|
)
|
|
)
|
|
failing_close_storage = FailingCloseStorage(
|
|
StorageConfig(
|
|
backend=StorageBackend.OPEN_WEBUI,
|
|
endpoint="http://chat.local",
|
|
collection_name="secondary",
|
|
)
|
|
)
|
|
manager.backends = {
|
|
StorageBackend.WEAVIATE: closable_storage,
|
|
StorageBackend.OPEN_WEBUI: failing_close_storage,
|
|
}
|
|
manager.capabilities[StorageBackend.WEAVIATE] = StorageCapabilities.VECTOR_SEARCH
|
|
manager.capabilities[StorageBackend.OPEN_WEBUI] = StorageCapabilities.KNOWLEDGE_BASE
|
|
manager._initialized = True
|
|
|
|
await manager.close_all()
|
|
|
|
assert closable_storage.closed is True
|
|
assert manager.backends == {}
|
|
assert manager.capabilities == {}
|
|
assert manager.is_initialized is False
|