Add Cohere reranker config, chunking, and tests

This commit is contained in:
netbrah
2025-11-22 16:43:13 -05:00
parent 16eb0d5bee
commit a05bbf105e
5 changed files with 620 additions and 20 deletions

View File

@@ -15,9 +15,12 @@ Configuration Required:
EMBEDDING_BINDING_HOST
EMBEDDING_BINDING_API_KEY
3. Set your vLLM deployed AI rerank model setting with env vars:
RERANK_MODEL
RERANK_BINDING_HOST
RERANK_BINDING=cohere
RERANK_MODEL (e.g., answerai-colbert-small-v1 or rerank-v3.5)
RERANK_BINDING_HOST (e.g., https://api.cohere.com/v2/rerank or LiteLLM proxy)
RERANK_BINDING_API_KEY
RERANK_ENABLE_CHUNKING=true (optional, for models with token limits)
RERANK_MAX_TOKENS_PER_DOC=480 (optional, default 4096)
Note: Rerank is controlled per query via the 'enable_rerank' parameter (default: True)
"""
@@ -66,9 +69,11 @@ async def embedding_func(texts: list[str]) -> np.ndarray:
rerank_model_func = partial(
cohere_rerank,
model=os.getenv("RERANK_MODEL"),
model=os.getenv("RERANK_MODEL", "rerank-v3.5"),
api_key=os.getenv("RERANK_BINDING_API_KEY"),
base_url=os.getenv("RERANK_BINDING_HOST"),
base_url=os.getenv("RERANK_BINDING_HOST", "https://api.cohere.com/v2/rerank"),
enable_chunking=os.getenv("RERANK_ENABLE_CHUNKING", "false").lower() == "true",
max_tokens_per_doc=int(os.getenv("RERANK_MAX_TOKENS_PER_DOC", "4096")),
)