From 561ba4e4b5d5d9b7939930be479f2fad353d1128 Mon Sep 17 00:00:00 2001 From: yangdx Date: Wed, 3 Dec 2025 12:40:48 +0800 Subject: [PATCH] Fix trailing whitespace and update test mocking for rerank module MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit • Remove trailing whitespace • Fix TiktokenTokenizer import patch • Add async context manager mocks • Update aiohttp.ClientSession patch • Improve test reliability --- lightrag/rerank.py | 2 +- tests/test_overlap_validation.py | 28 ++++++++++++++-------------- tests/test_rerank_chunking.py | 12 ++++++++---- 3 files changed, 23 insertions(+), 19 deletions(-) diff --git a/lightrag/rerank.py b/lightrag/rerank.py index 81632b71..2e22f19a 100644 --- a/lightrag/rerank.py +++ b/lightrag/rerank.py @@ -50,7 +50,7 @@ def chunk_documents_for_rerank( f"overlap_tokens ({original_overlap}) must be less than max_tokens ({max_tokens}). " f"Clamping to {overlap_tokens} to prevent infinite loop." ) - + try: from .utils import TiktokenTokenizer diff --git a/tests/test_overlap_validation.py b/tests/test_overlap_validation.py index 7f84a3cf..4e7c9cbd 100644 --- a/tests/test_overlap_validation.py +++ b/tests/test_overlap_validation.py @@ -14,12 +14,12 @@ class TestOverlapValidation: def test_overlap_greater_than_max_tokens(self): """Test that overlap_tokens > max_tokens is clamped and doesn't hang""" documents = [" ".join([f"word{i}" for i in range(100)])] - + # This should clamp overlap_tokens to 29 (max_tokens - 1) chunked_docs, doc_indices = chunk_documents_for_rerank( documents, max_tokens=30, overlap_tokens=32 ) - + # Should complete without hanging assert len(chunked_docs) > 0 assert all(idx == 0 for idx in doc_indices) @@ -27,12 +27,12 @@ class TestOverlapValidation: def test_overlap_equal_to_max_tokens(self): """Test that overlap_tokens == max_tokens is clamped and doesn't hang""" documents = [" ".join([f"word{i}" for i in range(100)])] - + # This should clamp overlap_tokens to 29 (max_tokens - 1) chunked_docs, doc_indices = chunk_documents_for_rerank( documents, max_tokens=30, overlap_tokens=30 ) - + # Should complete without hanging assert len(chunked_docs) > 0 assert all(idx == 0 for idx in doc_indices) @@ -40,12 +40,12 @@ class TestOverlapValidation: def test_overlap_slightly_less_than_max_tokens(self): """Test that overlap_tokens < max_tokens works normally""" documents = [" ".join([f"word{i}" for i in range(100)])] - + # This should work without clamping chunked_docs, doc_indices = chunk_documents_for_rerank( documents, max_tokens=30, overlap_tokens=29 ) - + # Should complete successfully assert len(chunked_docs) > 0 assert all(idx == 0 for idx in doc_indices) @@ -53,12 +53,12 @@ class TestOverlapValidation: def test_small_max_tokens_with_large_overlap(self): """Test edge case with very small max_tokens""" documents = [" ".join([f"word{i}" for i in range(50)])] - + # max_tokens=5, overlap_tokens=10 should clamp to 4 chunked_docs, doc_indices = chunk_documents_for_rerank( documents, max_tokens=5, overlap_tokens=10 ) - + # Should complete without hanging assert len(chunked_docs) > 0 assert all(idx == 0 for idx in doc_indices) @@ -70,12 +70,12 @@ class TestOverlapValidation: "short document", " ".join([f"word{i}" for i in range(75)]), ] - + # overlap_tokens > max_tokens chunked_docs, doc_indices = chunk_documents_for_rerank( documents, max_tokens=25, overlap_tokens=30 ) - + # Should complete successfully and chunk the long documents assert len(chunked_docs) >= len(documents) # Short document should not be chunked @@ -87,12 +87,12 @@ class TestOverlapValidation: " ".join([f"word{i}" for i in range(100)]), "short doc", ] - + # Normal case: overlap_tokens (10) < max_tokens (50) chunked_docs, doc_indices = chunk_documents_for_rerank( documents, max_tokens=50, overlap_tokens=10 ) - + # Long document should be chunked, short one should not assert len(chunked_docs) > 2 # At least 3 chunks (2 from long doc + 1 short) assert "short doc" in chunked_docs @@ -102,12 +102,12 @@ class TestOverlapValidation: def test_edge_case_max_tokens_one(self): """Test edge case where max_tokens=1""" documents = [" ".join([f"word{i}" for i in range(20)])] - + # max_tokens=1, overlap_tokens=5 should clamp to 0 chunked_docs, doc_indices = chunk_documents_for_rerank( documents, max_tokens=1, overlap_tokens=5 ) - + # Should complete without hanging assert len(chunked_docs) > 0 assert all(idx == 0 for idx in doc_indices) diff --git a/tests/test_rerank_chunking.py b/tests/test_rerank_chunking.py index f31331d2..1700988a 100644 --- a/tests/test_rerank_chunking.py +++ b/tests/test_rerank_chunking.py @@ -40,7 +40,7 @@ class TestChunkDocumentsForRerank: long_doc = "a" * 2000 # 2000 characters documents = [long_doc, "short doc"] - with patch("lightrag.rerank.TiktokenTokenizer", side_effect=ImportError): + with patch("lightrag.utils.TiktokenTokenizer", side_effect=ImportError): chunked_docs, doc_indices = chunk_documents_for_rerank( documents, max_tokens=100, # 100 tokens = ~400 chars @@ -360,13 +360,17 @@ class TestEndToEndChunking: mock_response.request_info = None mock_response.history = None mock_response.headers = {} + # Make mock_response an async context manager (for `async with session.post() as response`) + mock_response.__aenter__ = AsyncMock(return_value=mock_response) + mock_response.__aexit__ = AsyncMock(return_value=None) mock_session = Mock() - mock_session.post = AsyncMock(return_value=mock_response) + # session.post() returns an async context manager, so return mock_response which is now one + mock_session.post = Mock(return_value=mock_response) mock_session.__aenter__ = AsyncMock(return_value=mock_session) - mock_session.__aexit__ = AsyncMock() + mock_session.__aexit__ = AsyncMock(return_value=None) - with patch("aiohttp.ClientSession", return_value=mock_session): + with patch("lightrag.rerank.aiohttp.ClientSession", return_value=mock_session): result = await cohere_rerank( query=query, documents=documents,