Add Cohere reranker config, chunking, and tests
This commit is contained in:
@@ -15,9 +15,12 @@ Configuration Required:
|
||||
EMBEDDING_BINDING_HOST
|
||||
EMBEDDING_BINDING_API_KEY
|
||||
3. Set your vLLM deployed AI rerank model setting with env vars:
|
||||
RERANK_MODEL
|
||||
RERANK_BINDING_HOST
|
||||
RERANK_BINDING=cohere
|
||||
RERANK_MODEL (e.g., answerai-colbert-small-v1 or rerank-v3.5)
|
||||
RERANK_BINDING_HOST (e.g., https://api.cohere.com/v2/rerank or LiteLLM proxy)
|
||||
RERANK_BINDING_API_KEY
|
||||
RERANK_ENABLE_CHUNKING=true (optional, for models with token limits)
|
||||
RERANK_MAX_TOKENS_PER_DOC=480 (optional, default 4096)
|
||||
|
||||
Note: Rerank is controlled per query via the 'enable_rerank' parameter (default: True)
|
||||
"""
|
||||
@@ -66,9 +69,11 @@ async def embedding_func(texts: list[str]) -> np.ndarray:
|
||||
|
||||
rerank_model_func = partial(
|
||||
cohere_rerank,
|
||||
model=os.getenv("RERANK_MODEL"),
|
||||
model=os.getenv("RERANK_MODEL", "rerank-v3.5"),
|
||||
api_key=os.getenv("RERANK_BINDING_API_KEY"),
|
||||
base_url=os.getenv("RERANK_BINDING_HOST"),
|
||||
base_url=os.getenv("RERANK_BINDING_HOST", "https://api.cohere.com/v2/rerank"),
|
||||
enable_chunking=os.getenv("RERANK_ENABLE_CHUNKING", "false").lower() == "true",
|
||||
max_tokens_per_doc=int(os.getenv("RERANK_MAX_TOKENS_PER_DOC", "4096")),
|
||||
)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user