From caed4fb9b6d62bd239e288b7dda783e23512ba2d Mon Sep 17 00:00:00 2001 From: yangdx Date: Sun, 21 Dec 2025 01:32:27 +0800 Subject: [PATCH] Add model_name attribute to embedding wrappers - Add `model_name` to embedding decorators - Update `EmbeddingFunc` class definition - Set default models for LLM providers - Refactor wrapper docstrings in utils - Update README usage examples --- README-zh.md | 6 ++-- README.md | 6 ++-- lightrag/llm/bedrock.py | 4 ++- lightrag/llm/gemini.py | 4 ++- lightrag/llm/hf.py | 4 ++- lightrag/llm/jina.py | 4 ++- lightrag/llm/lollms.py | 4 ++- lightrag/llm/nvidia_openai.py | 4 ++- lightrag/llm/ollama.py | 4 ++- lightrag/llm/openai.py | 10 +++++-- lightrag/llm/zhipu.py | 4 ++- lightrag/utils.py | 55 ++++++++++++----------------------- 12 files changed, 56 insertions(+), 53 deletions(-) diff --git a/README-zh.md b/README-zh.md index 5a331b39..e6d3b3fc 100644 --- a/README-zh.md +++ b/README-zh.md @@ -425,7 +425,7 @@ async def llm_model_func( **kwargs ) -@wrap_embedding_func_with_attrs(embedding_dim=4096, max_token_size=8192) +@wrap_embedding_func_with_attrs(embedding_dim=4096, max_token_size=8192, model_name="solar-embedding-1-large-query") async def embedding_func(texts: list[str]) -> np.ndarray: return await openai_embed.func( texts, @@ -490,7 +490,7 @@ import numpy as np from lightrag.utils import wrap_embedding_func_with_attrs from lightrag.llm.ollama import ollama_model_complete, ollama_embed -@wrap_embedding_func_with_attrs(embedding_dim=768, max_token_size=8192) +@wrap_embedding_func_with_attrs(embedding_dim=768, max_token_size=8192, model_name="nomic-embed-text") async def embedding_func(texts: list[str]) -> np.ndarray: return await ollama_embed.func(texts, embed_model="nomic-embed-text") @@ -542,7 +542,7 @@ import numpy as np from lightrag.utils import wrap_embedding_func_with_attrs from lightrag.llm.ollama import ollama_model_complete, ollama_embed -@wrap_embedding_func_with_attrs(embedding_dim=768, max_token_size=8192) +@wrap_embedding_func_with_attrs(embedding_dim=768, max_token_size=8192, model_name="nomic-embed-text") async def embedding_func(texts: list[str]) -> np.ndarray: return await ollama_embed.func(texts, embed_model="nomic-embed-text") diff --git a/README.md b/README.md index b157c350..64ebc9ef 100644 --- a/README.md +++ b/README.md @@ -421,7 +421,7 @@ async def llm_model_func( **kwargs ) -@wrap_embedding_func_with_attrs(embedding_dim=4096, max_token_size=8192) +@wrap_embedding_func_with_attrs(embedding_dim=4096, max_token_size=8192, model_name="solar-embedding-1-large-query") async def embedding_func(texts: list[str]) -> np.ndarray: return await openai_embed.func( texts, @@ -488,7 +488,7 @@ import numpy as np from lightrag.utils import wrap_embedding_func_with_attrs from lightrag.llm.ollama import ollama_model_complete, ollama_embed -@wrap_embedding_func_with_attrs(embedding_dim=768, max_token_size=8192) +@wrap_embedding_func_with_attrs(embedding_dim=768, max_token_size=8192, model_name="nomic-embed-text") async def embedding_func(texts: list[str]) -> np.ndarray: return await ollama_embed.func(texts, embed_model="nomic-embed-text") @@ -540,7 +540,7 @@ import numpy as np from lightrag.utils import wrap_embedding_func_with_attrs from lightrag.llm.ollama import ollama_model_complete, ollama_embed -@wrap_embedding_func_with_attrs(embedding_dim=768, max_token_size=8192) +@wrap_embedding_func_with_attrs(embedding_dim=768, max_token_size=8192, model_name="nomic-embed-text") async def embedding_func(texts: list[str]) -> np.ndarray: return await ollama_embed.func(texts, embed_model="nomic-embed-text") diff --git a/lightrag/llm/bedrock.py b/lightrag/llm/bedrock.py index f6871422..e651e3c8 100644 --- a/lightrag/llm/bedrock.py +++ b/lightrag/llm/bedrock.py @@ -351,7 +351,9 @@ async def bedrock_complete( return result -@wrap_embedding_func_with_attrs(embedding_dim=1024, max_token_size=8192) +@wrap_embedding_func_with_attrs( + embedding_dim=1024, max_token_size=8192, model_name="amazon.titan-embed-text-v2:0" +) @retry( stop=stop_after_attempt(5), wait=wait_exponential(multiplier=1, min=4, max=60), diff --git a/lightrag/llm/gemini.py b/lightrag/llm/gemini.py index 37ce7206..5e438ceb 100644 --- a/lightrag/llm/gemini.py +++ b/lightrag/llm/gemini.py @@ -453,7 +453,9 @@ async def gemini_model_complete( ) -@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=2048) +@wrap_embedding_func_with_attrs( + embedding_dim=1536, max_token_size=2048, model_name="gemini-embedding-001" +) @retry( stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=60), diff --git a/lightrag/llm/hf.py b/lightrag/llm/hf.py index 447f95c3..eff89650 100644 --- a/lightrag/llm/hf.py +++ b/lightrag/llm/hf.py @@ -142,7 +142,9 @@ async def hf_model_complete( return result -@wrap_embedding_func_with_attrs(embedding_dim=1024, max_token_size=8192) +@wrap_embedding_func_with_attrs( + embedding_dim=1024, max_token_size=8192, model_name="hf_embedding_model" +) async def hf_embed(texts: list[str], tokenizer, embed_model) -> np.ndarray: # Detect the appropriate device if torch.cuda.is_available(): diff --git a/lightrag/llm/jina.py b/lightrag/llm/jina.py index 41251f4a..5c380854 100644 --- a/lightrag/llm/jina.py +++ b/lightrag/llm/jina.py @@ -58,7 +58,9 @@ async def fetch_data(url, headers, data): return data_list -@wrap_embedding_func_with_attrs(embedding_dim=2048, max_token_size=8192) +@wrap_embedding_func_with_attrs( + embedding_dim=2048, max_token_size=8192, model_name="jina-embeddings-v4" +) @retry( stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=60), diff --git a/lightrag/llm/lollms.py b/lightrag/llm/lollms.py index 2f2a1dbf..3eaef1af 100644 --- a/lightrag/llm/lollms.py +++ b/lightrag/llm/lollms.py @@ -138,7 +138,9 @@ async def lollms_model_complete( ) -@wrap_embedding_func_with_attrs(embedding_dim=1024, max_token_size=8192) +@wrap_embedding_func_with_attrs( + embedding_dim=1024, max_token_size=8192, model_name="lollms_embedding_model" +) async def lollms_embed( texts: List[str], embed_model=None, base_url="http://localhost:9600", **kwargs ) -> np.ndarray: diff --git a/lightrag/llm/nvidia_openai.py b/lightrag/llm/nvidia_openai.py index 1ebaf3a6..9025ec13 100644 --- a/lightrag/llm/nvidia_openai.py +++ b/lightrag/llm/nvidia_openai.py @@ -33,7 +33,9 @@ from lightrag.utils import ( import numpy as np -@wrap_embedding_func_with_attrs(embedding_dim=2048, max_token_size=8192) +@wrap_embedding_func_with_attrs( + embedding_dim=2048, max_token_size=8192, model_name="nvidia_embedding_model" +) @retry( stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=60), diff --git a/lightrag/llm/ollama.py b/lightrag/llm/ollama.py index cd633e80..62269296 100644 --- a/lightrag/llm/ollama.py +++ b/lightrag/llm/ollama.py @@ -172,7 +172,9 @@ async def ollama_model_complete( ) -@wrap_embedding_func_with_attrs(embedding_dim=1024, max_token_size=8192) +@wrap_embedding_func_with_attrs( + embedding_dim=1024, max_token_size=8192, model_name="bge-m3:latest" +) async def ollama_embed( texts: list[str], embed_model: str = "bge-m3:latest", **kwargs ) -> np.ndarray: diff --git a/lightrag/llm/openai.py b/lightrag/llm/openai.py index 9c3d0261..b49cac71 100644 --- a/lightrag/llm/openai.py +++ b/lightrag/llm/openai.py @@ -677,7 +677,9 @@ async def nvidia_openai_complete( return result -@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192) +@wrap_embedding_func_with_attrs( + embedding_dim=1536, max_token_size=8192, model_name="text-embedding-3-small" +) @retry( stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=60), @@ -867,7 +869,11 @@ async def azure_openai_complete( return result -@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192) +@wrap_embedding_func_with_attrs( + embedding_dim=1536, + max_token_size=8192, + model_name="my-text-embedding-3-large-deployment", +) async def azure_openai_embed( texts: list[str], model: str | None = None, diff --git a/lightrag/llm/zhipu.py b/lightrag/llm/zhipu.py index d90f3cc1..5caa82bf 100644 --- a/lightrag/llm/zhipu.py +++ b/lightrag/llm/zhipu.py @@ -179,7 +179,9 @@ async def zhipu_complete( ) -@wrap_embedding_func_with_attrs(embedding_dim=1024) +@wrap_embedding_func_with_attrs( + embedding_dim=1024, max_token_size=8192, model_name="embedding-3" +) @retry( stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=60), diff --git a/lightrag/utils.py b/lightrag/utils.py index b0c90885..d795acdb 100644 --- a/lightrag/utils.py +++ b/lightrag/utils.py @@ -425,7 +425,9 @@ class EmbeddingFunc: send_dimensions: bool = ( False # Control whether to send embedding_dim to the function ) - model_name: str | None = None + model_name: str | None = ( + None # Model name for implementating workspace data isolation in vector DB + ) async def __call__(self, *args, **kwargs) -> np.ndarray: # Only inject embedding_dim when send_dimensions is True @@ -1017,42 +1019,36 @@ def wrap_embedding_func_with_attrs(**kwargs): Correct usage patterns: - 1. Direct implementation (decorated): + 1. Direct decoration: ```python - @wrap_embedding_func_with_attrs(embedding_dim=1536) + @wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192, model_name="my_embedding_model") async def my_embed(texts, embedding_dim=None): # Direct implementation return embeddings ``` - - 2. Wrapper calling decorated function (DO NOT decorate wrapper): + 2. Double decoration: ```python - # my_embed is already decorated above + @wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192, model_name="my_embedding_model") + @retry(...) + async def openai_embed(texts, ...): + # Base implementation + pass - async def my_wrapper(texts, **kwargs): # ❌ DO NOT decorate this! - # Must call .func to access unwrapped implementation - return await my_embed.func(texts, **kwargs) - ``` - - 3. Wrapper calling decorated function (properly decorated): - ```python - @wrap_embedding_func_with_attrs(embedding_dim=1536) - async def my_wrapper(texts, **kwargs): # ✅ Can decorate if calling .func - # Calling .func avoids double decoration - return await my_embed.func(texts, **kwargs) + @wrap_embedding_func_with_attrs(embedding_dim=1024, max_token_size=4096, model_name="another_embedding_model") + # Note: No @retry here! + async def new_openai_embed(texts, ...): + # CRITICAL: Call .func to access unwrapped function + return await openai_embed.func(texts, ...) # ✅ Correct + # return await openai_embed(texts, ...) # ❌ Wrong - double decoration! ``` The decorated function becomes an EmbeddingFunc instance with: - embedding_dim: The embedding dimension - max_token_size: Maximum token limit (optional) + - model_name: Model name (optional) - func: The original unwrapped function (access via .func) - __call__: Wrapper that injects embedding_dim parameter - Double decoration causes: - - Double injection of embedding_dim parameter - - Incorrect parameter passing to the underlying implementation - - Runtime errors due to parameter conflicts - Args: embedding_dim: The dimension of embedding vectors max_token_size: Maximum number of tokens (optional) @@ -1060,21 +1056,6 @@ def wrap_embedding_func_with_attrs(**kwargs): Returns: A decorator that wraps the function as an EmbeddingFunc instance - - Example of correct wrapper implementation: - ```python - @wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192) - @retry(...) - async def openai_embed(texts, ...): - # Base implementation - pass - - @wrap_embedding_func_with_attrs(embedding_dim=1536) # Note: No @retry here! - async def azure_openai_embed(texts, ...): - # CRITICAL: Call .func to access unwrapped function - return await openai_embed.func(texts, ...) # ✅ Correct - # return await openai_embed(texts, ...) # ❌ Wrong - double decoration! - ``` """ def final_decro(func) -> EmbeddingFunc: