From 561ba4e4b5d5d9b7939930be479f2fad353d1128 Mon Sep 17 00:00:00 2001
From: yangdx <gzdaniel@me.com>
Date: Wed, 3 Dec 2025 12:40:48 +0800
Subject: [PATCH] Fix trailing whitespace and update test mocking for rerank
 module
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

• Remove trailing whitespace
• Fix TiktokenTokenizer import patch
• Add async context manager mocks
• Update aiohttp.ClientSession patch
• Improve test reliability
---
 lightrag/rerank.py               |  2 +-
 tests/test_overlap_validation.py | 28 ++++++++++++++--------------
 tests/test_rerank_chunking.py    | 12 ++++++++----
 3 files changed, 23 insertions(+), 19 deletions(-)

diff --git a/lightrag/rerank.py b/lightrag/rerank.py
index 81632b71..2e22f19a 100644
--- a/lightrag/rerank.py
+++ b/lightrag/rerank.py
@@ -50,7 +50,7 @@ def chunk_documents_for_rerank(
             f"overlap_tokens ({original_overlap}) must be less than max_tokens ({max_tokens}). "
             f"Clamping to {overlap_tokens} to prevent infinite loop."
         )
-    
+
     try:
         from .utils import TiktokenTokenizer
 
diff --git a/tests/test_overlap_validation.py b/tests/test_overlap_validation.py
index 7f84a3cf..4e7c9cbd 100644
--- a/tests/test_overlap_validation.py
+++ b/tests/test_overlap_validation.py
@@ -14,12 +14,12 @@ class TestOverlapValidation:
     def test_overlap_greater_than_max_tokens(self):
         """Test that overlap_tokens > max_tokens is clamped and doesn't hang"""
         documents = [" ".join([f"word{i}" for i in range(100)])]
-        
+
         # This should clamp overlap_tokens to 29 (max_tokens - 1)
         chunked_docs, doc_indices = chunk_documents_for_rerank(
             documents, max_tokens=30, overlap_tokens=32
         )
-        
+
         # Should complete without hanging
         assert len(chunked_docs) > 0
         assert all(idx == 0 for idx in doc_indices)
@@ -27,12 +27,12 @@ class TestOverlapValidation:
     def test_overlap_equal_to_max_tokens(self):
         """Test that overlap_tokens == max_tokens is clamped and doesn't hang"""
         documents = [" ".join([f"word{i}" for i in range(100)])]
-        
+
         # This should clamp overlap_tokens to 29 (max_tokens - 1)
         chunked_docs, doc_indices = chunk_documents_for_rerank(
             documents, max_tokens=30, overlap_tokens=30
         )
-        
+
         # Should complete without hanging
         assert len(chunked_docs) > 0
         assert all(idx == 0 for idx in doc_indices)
@@ -40,12 +40,12 @@ class TestOverlapValidation:
     def test_overlap_slightly_less_than_max_tokens(self):
         """Test that overlap_tokens < max_tokens works normally"""
         documents = [" ".join([f"word{i}" for i in range(100)])]
-        
+
         # This should work without clamping
         chunked_docs, doc_indices = chunk_documents_for_rerank(
             documents, max_tokens=30, overlap_tokens=29
         )
-        
+
         # Should complete successfully
         assert len(chunked_docs) > 0
         assert all(idx == 0 for idx in doc_indices)
@@ -53,12 +53,12 @@ class TestOverlapValidation:
     def test_small_max_tokens_with_large_overlap(self):
         """Test edge case with very small max_tokens"""
         documents = [" ".join([f"word{i}" for i in range(50)])]
-        
+
         # max_tokens=5, overlap_tokens=10 should clamp to 4
         chunked_docs, doc_indices = chunk_documents_for_rerank(
             documents, max_tokens=5, overlap_tokens=10
         )
-        
+
         # Should complete without hanging
         assert len(chunked_docs) > 0
         assert all(idx == 0 for idx in doc_indices)
@@ -70,12 +70,12 @@ class TestOverlapValidation:
             "short document",
             " ".join([f"word{i}" for i in range(75)]),
         ]
-        
+
         # overlap_tokens > max_tokens
         chunked_docs, doc_indices = chunk_documents_for_rerank(
             documents, max_tokens=25, overlap_tokens=30
         )
-        
+
         # Should complete successfully and chunk the long documents
         assert len(chunked_docs) >= len(documents)
         # Short document should not be chunked
@@ -87,12 +87,12 @@ class TestOverlapValidation:
             " ".join([f"word{i}" for i in range(100)]),
             "short doc",
         ]
-        
+
         # Normal case: overlap_tokens (10) < max_tokens (50)
         chunked_docs, doc_indices = chunk_documents_for_rerank(
             documents, max_tokens=50, overlap_tokens=10
         )
-        
+
         # Long document should be chunked, short one should not
         assert len(chunked_docs) > 2  # At least 3 chunks (2 from long doc + 1 short)
         assert "short doc" in chunked_docs
@@ -102,12 +102,12 @@ class TestOverlapValidation:
     def test_edge_case_max_tokens_one(self):
         """Test edge case where max_tokens=1"""
         documents = [" ".join([f"word{i}" for i in range(20)])]
-        
+
         # max_tokens=1, overlap_tokens=5 should clamp to 0
         chunked_docs, doc_indices = chunk_documents_for_rerank(
             documents, max_tokens=1, overlap_tokens=5
         )
-        
+
         # Should complete without hanging
         assert len(chunked_docs) > 0
         assert all(idx == 0 for idx in doc_indices)
diff --git a/tests/test_rerank_chunking.py b/tests/test_rerank_chunking.py
index f31331d2..1700988a 100644
--- a/tests/test_rerank_chunking.py
+++ b/tests/test_rerank_chunking.py
@@ -40,7 +40,7 @@ class TestChunkDocumentsForRerank:
         long_doc = "a" * 2000  # 2000 characters
         documents = [long_doc, "short doc"]
 
-        with patch("lightrag.rerank.TiktokenTokenizer", side_effect=ImportError):
+        with patch("lightrag.utils.TiktokenTokenizer", side_effect=ImportError):
             chunked_docs, doc_indices = chunk_documents_for_rerank(
                 documents,
                 max_tokens=100,  # 100 tokens = ~400 chars
@@ -360,13 +360,17 @@ class TestEndToEndChunking:
         mock_response.request_info = None
         mock_response.history = None
         mock_response.headers = {}
+        # Make mock_response an async context manager (for `async with session.post() as response`)
+        mock_response.__aenter__ = AsyncMock(return_value=mock_response)
+        mock_response.__aexit__ = AsyncMock(return_value=None)
 
         mock_session = Mock()
-        mock_session.post = AsyncMock(return_value=mock_response)
+        # session.post() returns an async context manager, so return mock_response which is now one
+        mock_session.post = Mock(return_value=mock_response)
         mock_session.__aenter__ = AsyncMock(return_value=mock_session)
-        mock_session.__aexit__ = AsyncMock()
+        mock_session.__aexit__ = AsyncMock(return_value=None)
 
-        with patch("aiohttp.ClientSession", return_value=mock_session):
+        with patch("lightrag.rerank.aiohttp.ClientSession", return_value=mock_session):
             result = await cohere_rerank(
                 query=query,
                 documents=documents,