Improve CJK detection and safely drop Neo4j indexes
- Expand CJK regex to extensions A-F - Use DROP INDEX IF EXISTS - Add cleanup in multi-workspace test - Safely handle legacy index drops
This commit is contained in:
@@ -101,9 +101,18 @@ class Neo4JStorage(BaseGraphStorage):
|
||||
return f"entity_id_fulltext_idx_{suffix}"
|
||||
|
||||
def _is_chinese_text(self, text: str) -> bool:
|
||||
"""Check if text contains Chinese characters."""
|
||||
chinese_pattern = re.compile(r"[\u4e00-\u9fff]+")
|
||||
return bool(chinese_pattern.search(text))
|
||||
"""Check if text contains Chinese/CJK characters.
|
||||
|
||||
Covers:
|
||||
- CJK Unified Ideographs (U+4E00-U+9FFF)
|
||||
- CJK Extension A (U+3400-U+4DBF)
|
||||
- CJK Compatibility Ideographs (U+F900-U+FAFF)
|
||||
- CJK Extension B-F (U+20000-U+2FA1F) - supplementary planes
|
||||
"""
|
||||
cjk_pattern = re.compile(
|
||||
r"[\u3400-\u4dbf\u4e00-\u9fff\uf900-\ufaff]|[\U00020000-\U0002fa1f]"
|
||||
)
|
||||
return bool(cjk_pattern.search(text))
|
||||
|
||||
async def initialize(self):
|
||||
async with get_data_init_lock():
|
||||
@@ -288,8 +297,8 @@ class Neo4JStorage(BaseGraphStorage):
|
||||
f"[{self.workspace}] Found legacy index '{legacy_index_name}'. Migrating to '{index_name}'."
|
||||
)
|
||||
try:
|
||||
# Drop the legacy index
|
||||
drop_query = f"DROP INDEX {legacy_index_name}"
|
||||
# Drop the legacy index (use IF EXISTS for safety)
|
||||
drop_query = f"DROP INDEX {legacy_index_name} IF EXISTS"
|
||||
result = await session.run(drop_query)
|
||||
await result.consume()
|
||||
logger.info(
|
||||
@@ -329,10 +338,10 @@ class Neo4JStorage(BaseGraphStorage):
|
||||
needs_creation = existing_index is None
|
||||
|
||||
if needs_recreation or needs_creation:
|
||||
# Drop existing index if it needs recreation
|
||||
# Drop existing index if it needs recreation (use IF EXISTS for safety)
|
||||
if needs_recreation:
|
||||
try:
|
||||
drop_query = f"DROP INDEX {index_name}"
|
||||
drop_query = f"DROP INDEX {index_name} IF EXISTS"
|
||||
result = await session.run(drop_query)
|
||||
await result.consume()
|
||||
logger.info(
|
||||
|
||||
@@ -294,6 +294,17 @@ async def test_multiple_workspaces_have_separate_indexes(neo4j_storage):
|
||||
), f"Workspace 2 index '{workspace2_index}' should exist"
|
||||
|
||||
finally:
|
||||
# Clean up: drop the fulltext index created for workspace 2 to prevent accumulation
|
||||
try:
|
||||
async with storage2._driver.session(database=storage2._DATABASE) as session:
|
||||
index_name = storage2._get_fulltext_index_name(
|
||||
storage2._get_workspace_label()
|
||||
)
|
||||
drop_query = f"DROP INDEX {index_name} IF EXISTS"
|
||||
result = await session.run(drop_query)
|
||||
await result.consume()
|
||||
except Exception:
|
||||
pass # Ignore errors during cleanup
|
||||
await storage2.drop()
|
||||
await storage2.finalize()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user