Improve CJK detection and safely drop Neo4j indexes
- Expand CJK regex to extensions A-F - Use DROP INDEX IF EXISTS - Add cleanup in multi-workspace test - Safely handle legacy index drops
This commit is contained in:
@@ -101,9 +101,18 @@ class Neo4JStorage(BaseGraphStorage):
|
|||||||
return f"entity_id_fulltext_idx_{suffix}"
|
return f"entity_id_fulltext_idx_{suffix}"
|
||||||
|
|
||||||
def _is_chinese_text(self, text: str) -> bool:
|
def _is_chinese_text(self, text: str) -> bool:
|
||||||
"""Check if text contains Chinese characters."""
|
"""Check if text contains Chinese/CJK characters.
|
||||||
chinese_pattern = re.compile(r"[\u4e00-\u9fff]+")
|
|
||||||
return bool(chinese_pattern.search(text))
|
Covers:
|
||||||
|
- CJK Unified Ideographs (U+4E00-U+9FFF)
|
||||||
|
- CJK Extension A (U+3400-U+4DBF)
|
||||||
|
- CJK Compatibility Ideographs (U+F900-U+FAFF)
|
||||||
|
- CJK Extension B-F (U+20000-U+2FA1F) - supplementary planes
|
||||||
|
"""
|
||||||
|
cjk_pattern = re.compile(
|
||||||
|
r"[\u3400-\u4dbf\u4e00-\u9fff\uf900-\ufaff]|[\U00020000-\U0002fa1f]"
|
||||||
|
)
|
||||||
|
return bool(cjk_pattern.search(text))
|
||||||
|
|
||||||
async def initialize(self):
|
async def initialize(self):
|
||||||
async with get_data_init_lock():
|
async with get_data_init_lock():
|
||||||
@@ -288,8 +297,8 @@ class Neo4JStorage(BaseGraphStorage):
|
|||||||
f"[{self.workspace}] Found legacy index '{legacy_index_name}'. Migrating to '{index_name}'."
|
f"[{self.workspace}] Found legacy index '{legacy_index_name}'. Migrating to '{index_name}'."
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
# Drop the legacy index
|
# Drop the legacy index (use IF EXISTS for safety)
|
||||||
drop_query = f"DROP INDEX {legacy_index_name}"
|
drop_query = f"DROP INDEX {legacy_index_name} IF EXISTS"
|
||||||
result = await session.run(drop_query)
|
result = await session.run(drop_query)
|
||||||
await result.consume()
|
await result.consume()
|
||||||
logger.info(
|
logger.info(
|
||||||
@@ -329,10 +338,10 @@ class Neo4JStorage(BaseGraphStorage):
|
|||||||
needs_creation = existing_index is None
|
needs_creation = existing_index is None
|
||||||
|
|
||||||
if needs_recreation or needs_creation:
|
if needs_recreation or needs_creation:
|
||||||
# Drop existing index if it needs recreation
|
# Drop existing index if it needs recreation (use IF EXISTS for safety)
|
||||||
if needs_recreation:
|
if needs_recreation:
|
||||||
try:
|
try:
|
||||||
drop_query = f"DROP INDEX {index_name}"
|
drop_query = f"DROP INDEX {index_name} IF EXISTS"
|
||||||
result = await session.run(drop_query)
|
result = await session.run(drop_query)
|
||||||
await result.consume()
|
await result.consume()
|
||||||
logger.info(
|
logger.info(
|
||||||
|
|||||||
@@ -294,6 +294,17 @@ async def test_multiple_workspaces_have_separate_indexes(neo4j_storage):
|
|||||||
), f"Workspace 2 index '{workspace2_index}' should exist"
|
), f"Workspace 2 index '{workspace2_index}' should exist"
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
|
# Clean up: drop the fulltext index created for workspace 2 to prevent accumulation
|
||||||
|
try:
|
||||||
|
async with storage2._driver.session(database=storage2._DATABASE) as session:
|
||||||
|
index_name = storage2._get_fulltext_index_name(
|
||||||
|
storage2._get_workspace_label()
|
||||||
|
)
|
||||||
|
drop_query = f"DROP INDEX {index_name} IF EXISTS"
|
||||||
|
result = await session.run(drop_query)
|
||||||
|
await result.consume()
|
||||||
|
except Exception:
|
||||||
|
pass # Ignore errors during cleanup
|
||||||
await storage2.drop()
|
await storage2.drop()
|
||||||
await storage2.finalize()
|
await storage2.finalize()
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user