- Use check_table_exists DB method - Update mocks for keyset pagination - Enforce error on dimension mismatch - Remove deprecated module patches - Verify workspace migration isolation
221 lines
7.8 KiB
Python
221 lines
7.8 KiB
Python
"""
|
|
Tests for safety when model suffix is absent (no model_name provided).
|
|
|
|
This test module verifies that the system correctly handles the case when
|
|
no model_name is provided, preventing accidental deletion of the only table/collection
|
|
on restart.
|
|
|
|
Critical Bug: When model_suffix is empty, table_name == legacy_table_name.
|
|
On second startup, Case 1 logic would delete the only table/collection thinking
|
|
it's "legacy", causing all subsequent operations to fail.
|
|
"""
|
|
|
|
from unittest.mock import MagicMock, AsyncMock, patch
|
|
|
|
from lightrag.kg.qdrant_impl import QdrantVectorDBStorage
|
|
from lightrag.kg.postgres_impl import PGVectorStorage
|
|
|
|
|
|
class TestNoModelSuffixSafety:
|
|
"""Test suite for preventing data loss when model_suffix is absent."""
|
|
|
|
def test_qdrant_no_suffix_second_startup(self):
|
|
"""
|
|
Test Qdrant doesn't delete collection on second startup when no model_name.
|
|
|
|
Scenario:
|
|
1. First startup: Creates collection without suffix
|
|
2. Collection is empty
|
|
3. Second startup: Should NOT delete the collection
|
|
|
|
Bug: Without fix, Case 1 would delete the only collection.
|
|
"""
|
|
from qdrant_client import models
|
|
|
|
client = MagicMock()
|
|
|
|
# Simulate second startup: collection already exists and is empty
|
|
# IMPORTANT: Without suffix, collection_name == legacy collection name
|
|
collection_name = "lightrag_vdb_chunks" # No suffix, same as legacy
|
|
|
|
# Both exist (they're the same collection)
|
|
client.collection_exists.return_value = True
|
|
|
|
# Collection is empty
|
|
client.count.return_value.count = 0
|
|
|
|
# Patch _find_legacy_collection to return the SAME collection name
|
|
# This simulates the scenario where new collection == legacy collection
|
|
with patch(
|
|
"lightrag.kg.qdrant_impl._find_legacy_collection",
|
|
return_value="lightrag_vdb_chunks", # Same as collection_name
|
|
):
|
|
# Call setup_collection
|
|
# This should detect that new == legacy and skip deletion
|
|
QdrantVectorDBStorage.setup_collection(
|
|
client,
|
|
collection_name,
|
|
namespace="chunks",
|
|
workspace="_",
|
|
vectors_config=models.VectorParams(
|
|
size=1536, distance=models.Distance.COSINE
|
|
),
|
|
hnsw_config=models.HnswConfigDiff(
|
|
payload_m=16,
|
|
m=0,
|
|
),
|
|
model_suffix="", # Empty suffix to simulate no model_name provided
|
|
)
|
|
|
|
# CRITICAL: Collection should NOT be deleted
|
|
client.delete_collection.assert_not_called()
|
|
|
|
# Verify we returned early (skipped Case 1 cleanup)
|
|
# The collection_exists was checked, but we didn't proceed to count
|
|
# because we detected same name
|
|
assert client.collection_exists.call_count >= 1
|
|
|
|
async def test_postgres_no_suffix_second_startup(self):
|
|
"""
|
|
Test PostgreSQL doesn't delete table on second startup when no model_name.
|
|
|
|
Scenario:
|
|
1. First startup: Creates table without suffix
|
|
2. Table is empty
|
|
3. Second startup: Should NOT delete the table
|
|
|
|
Bug: Without fix, Case 1 would delete the only table.
|
|
"""
|
|
db = AsyncMock()
|
|
|
|
# Configure mock return values to avoid unawaited coroutine warnings
|
|
db.query.return_value = {"count": 0}
|
|
db._create_vector_index.return_value = None
|
|
|
|
# Simulate second startup: table already exists and is empty
|
|
# IMPORTANT: table_name and legacy_table_name are THE SAME
|
|
table_name = "LIGHTRAG_VDB_CHUNKS" # No suffix
|
|
legacy_table_name = "LIGHTRAG_VDB_CHUNKS" # Same as new
|
|
|
|
# Setup mock responses using check_table_exists on db
|
|
async def check_table_exists_side_effect(name):
|
|
# Both tables exist (they're the same)
|
|
return True
|
|
|
|
db.check_table_exists = AsyncMock(side_effect=check_table_exists_side_effect)
|
|
|
|
# Call setup_table
|
|
# This should detect that new == legacy and skip deletion
|
|
await PGVectorStorage.setup_table(
|
|
db,
|
|
table_name,
|
|
workspace="test_workspace",
|
|
embedding_dim=1536,
|
|
legacy_table_name=legacy_table_name,
|
|
base_table="LIGHTRAG_VDB_CHUNKS",
|
|
)
|
|
|
|
# CRITICAL: Table should NOT be deleted (no DROP TABLE)
|
|
drop_calls = [
|
|
call
|
|
for call in db.execute.call_args_list
|
|
if call[0][0] and "DROP TABLE" in call[0][0]
|
|
]
|
|
assert (
|
|
len(drop_calls) == 0
|
|
), "Should not drop table when new and legacy are the same"
|
|
|
|
# Note: COUNT queries for workspace data are expected behavior in Case 1
|
|
# (for logging/warning purposes when workspace data is empty).
|
|
# The critical safety check is that DROP TABLE is not called.
|
|
|
|
def test_qdrant_with_suffix_case1_still_works(self):
|
|
"""
|
|
Test that Case 1 cleanup still works when there IS a suffix.
|
|
|
|
This ensures our fix doesn't break the normal Case 1 scenario.
|
|
"""
|
|
from qdrant_client import models
|
|
|
|
client = MagicMock()
|
|
|
|
# Different names (normal case)
|
|
collection_name = "lightrag_vdb_chunks_ada_002_1536d" # With suffix
|
|
legacy_collection = "lightrag_vdb_chunks" # Without suffix
|
|
|
|
# Setup: both exist
|
|
def collection_exists_side_effect(name):
|
|
return name in [collection_name, legacy_collection]
|
|
|
|
client.collection_exists.side_effect = collection_exists_side_effect
|
|
|
|
# Legacy is empty
|
|
client.count.return_value.count = 0
|
|
|
|
# Call setup_collection
|
|
QdrantVectorDBStorage.setup_collection(
|
|
client,
|
|
collection_name,
|
|
namespace="chunks",
|
|
workspace="_",
|
|
vectors_config=models.VectorParams(
|
|
size=1536, distance=models.Distance.COSINE
|
|
),
|
|
hnsw_config=models.HnswConfigDiff(
|
|
payload_m=16,
|
|
m=0,
|
|
),
|
|
model_suffix="ada_002_1536d",
|
|
)
|
|
|
|
# SHOULD delete legacy (normal Case 1 behavior)
|
|
client.delete_collection.assert_called_once_with(
|
|
collection_name=legacy_collection
|
|
)
|
|
|
|
async def test_postgres_with_suffix_case1_still_works(self):
|
|
"""
|
|
Test that Case 1 cleanup still works when there IS a suffix.
|
|
|
|
This ensures our fix doesn't break the normal Case 1 scenario.
|
|
"""
|
|
db = AsyncMock()
|
|
|
|
# Different names (normal case)
|
|
table_name = "LIGHTRAG_VDB_CHUNKS_ADA_002_1536D" # With suffix
|
|
legacy_table_name = "LIGHTRAG_VDB_CHUNKS" # Without suffix
|
|
|
|
# Setup mock responses using check_table_exists on db
|
|
async def check_table_exists_side_effect(name):
|
|
# Both tables exist
|
|
return True
|
|
|
|
db.check_table_exists = AsyncMock(side_effect=check_table_exists_side_effect)
|
|
|
|
# Mock empty table
|
|
async def query_side_effect(sql, params, **kwargs):
|
|
if "COUNT(*)" in sql:
|
|
return {"count": 0}
|
|
return {}
|
|
|
|
db.query.side_effect = query_side_effect
|
|
|
|
# Call setup_table
|
|
await PGVectorStorage.setup_table(
|
|
db,
|
|
table_name,
|
|
workspace="test_workspace",
|
|
embedding_dim=1536,
|
|
legacy_table_name=legacy_table_name,
|
|
base_table="LIGHTRAG_VDB_CHUNKS",
|
|
)
|
|
|
|
# SHOULD delete legacy (normal Case 1 behavior)
|
|
drop_calls = [
|
|
call
|
|
for call in db.execute.call_args_list
|
|
if call[0][0] and "DROP TABLE" in call[0][0]
|
|
]
|
|
assert len(drop_calls) == 1, "Should drop legacy table in normal Case 1"
|
|
assert legacy_table_name in drop_calls[0][0][0]
|