Refactor: Rename llm_model_max_token_size to summary_max_tokens

This commit renames the parameter 'llm_model_max_token_size' to 'summary_max_tokens' for better clarity, as it specifically controls the token limit for entity relation summaries.
This commit is contained in:
yangdx
2025-07-28 00:49:08 +08:00
parent d0d57a45b6
commit 598eecd06d
10 changed files with 17 additions and 14 deletions

View File

@@ -265,7 +265,7 @@ if __name__ == "__main__":
| **embedding_func_max_async** | `int` | 最大并发异步嵌入进程数 | `16` |
| **llm_model_func** | `callable` | LLM生成的函数 | `gpt_4o_mini_complete` |
| **llm_model_name** | `str` | 用于生成的LLM模型名称 | `meta-llama/Llama-3.2-1B-Instruct` |
| **llm_model_max_token_size** | `int` | 生成实体关系摘要时送给LLM的最大令牌数 | `32000`默认值由环境变量MAX_TOKENS更改 |
| **summary_max_tokens** | `int` | 生成实体关系摘要时送给LLM的最大令牌数 | `32000`默认值由环境变量MAX_TOKENS更改 |
| **llm_model_max_async** | `int` | 最大并发异步LLM进程数 | `4`默认值由环境变量MAX_ASYNC更改 |
| **llm_model_kwargs** | `dict` | LLM生成的附加参数 | |
| **vector_db_storage_cls_kwargs** | `dict` | 向量数据库的附加参数,如设置节点和关系检索的阈值 | cosine_better_than_threshold: 0.2默认值由环境变量COSINE_THRESHOLD更改 |

View File

@@ -272,7 +272,7 @@ A full list of LightRAG init parameters:
| **embedding_func_max_async** | `int` | Maximum number of concurrent asynchronous embedding processes | `16` |
| **llm_model_func** | `callable` | Function for LLM generation | `gpt_4o_mini_complete` |
| **llm_model_name** | `str` | LLM model name for generation | `meta-llama/Llama-3.2-1B-Instruct` |
| **llm_model_max_token_size** | `int` | Maximum tokens send to LLM to generate entity relation summaries | `32000`default value changed by env var MAX_TOKENS) |
| **summary_max_tokens** | `int` | Maximum tokens send to LLM to generate entity relation summaries | `32000`default value changed by env var MAX_TOKENS) |
| **llm_model_max_async** | `int` | Maximum number of concurrent asynchronous LLM processes | `4`default value changed by env var MAX_ASYNC) |
| **llm_model_kwargs** | `dict` | Additional parameters for LLM generation | |
| **vector_db_storage_cls_kwargs** | `dict` | Additional parameters for vector database, like setting the threshold for nodes and relations retrieval | cosine_better_than_threshold: 0.2default value changed by env var COSINE_THRESHOLD) |

View File

@@ -87,7 +87,7 @@ async def initialize_rag():
working_dir=WORKING_DIR,
llm_model_func=ollama_model_complete,
llm_model_name=os.getenv("LLM_MODEL", "qwen2.5-coder:7b"),
llm_model_max_token_size=8192,
summary_max_tokens=8192,
llm_model_kwargs={
"host": os.getenv("LLM_BINDING_HOST", "http://localhost:11434"),
"options": {"num_ctx": 8192},

View File

@@ -211,7 +211,7 @@ async def initialize_rag():
max_parallel_insert=2,
llm_model_func=cloudflare_worker.query,
llm_model_name=os.getenv("LLM_MODEL", LLM_MODEL),
llm_model_max_token_size=4080,
summary_max_tokens=4080,
embedding_func=EmbeddingFunc(
embedding_dim=int(os.getenv("EMBEDDING_DIM", "1024")),
max_token_size=int(os.getenv("MAX_EMBED_TOKENS", "2048")),

View File

@@ -56,7 +56,7 @@ async def initialize_rag():
rag = LightRAG(
working_dir=WORKING_DIR,
llm_model_func=llm_model_func,
llm_model_max_token_size=32768,
summary_max_tokens=10000,
embedding_func=embedding_func,
chunk_token_size=512,
chunk_overlap_token_size=256,

View File

@@ -22,7 +22,7 @@ from lightrag.constants import (
DEFAULT_MIN_RERANK_SCORE,
DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE,
DEFAULT_MAX_ASYNC,
DEFAULT_MAX_TOKENS,
DEFAULT_SUMMARY_MAX_TOKENS,
DEFAULT_SUMMARY_LANGUAGE,
DEFAULT_EMBEDDING_FUNC_MAX_ASYNC,
DEFAULT_EMBEDDING_BATCH_NUM,
@@ -118,13 +118,13 @@ def parse_args() -> argparse.Namespace:
"--max-async",
type=int,
default=get_env_value("MAX_ASYNC", DEFAULT_MAX_ASYNC, int),
help="Maximum async operations (default: from env or 4)",
help=f"Maximum async operations (default: from env or {DEFAULT_MAX_ASYNC})",
)
parser.add_argument(
"--max-tokens",
type=int,
default=get_env_value("MAX_TOKENS", DEFAULT_MAX_TOKENS, int),
help="Maximum token size (default: from env or 32000)",
default=get_env_value("MAX_TOKENS", DEFAULT_SUMMARY_MAX_TOKENS, int),
help=f"Maximum token size (default: from env or {DEFAULT_SUMMARY_MAX_TOKENS})",
)
# Logging configuration

View File

@@ -347,7 +347,7 @@ def create_app(args):
else openai_alike_model_complete,
llm_model_name=args.llm_model,
llm_model_max_async=args.max_async,
llm_model_max_token_size=args.max_tokens,
summary_max_tokens=args.max_tokens,
chunk_token_size=int(args.chunk_size),
chunk_overlap_token_size=int(args.chunk_overlap_size),
llm_model_kwargs={
@@ -386,7 +386,7 @@ def create_app(args):
},
llm_model_name=args.llm_model,
llm_model_max_async=args.max_async,
llm_model_max_token_size=args.max_tokens,
summary_max_tokens=args.max_tokens,
embedding_func=embedding_func,
kv_storage=args.kv_storage,
graph_storage=args.graph_storage,

View File

@@ -14,7 +14,7 @@ DEFAULT_TIMEOUT = 150
DEFAULT_SUMMARY_LANGUAGE = "English" # Default language for summaries
DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE = 4
DEFAULT_MAX_GLEANING = 1
DEFAULT_MAX_TOKENS = 10000 # Default maximum token size
DEFAULT_SUMMARY_MAX_TOKENS = 10000 # Default maximum token size
# Separator for graph fields
GRAPH_FIELD_SEP = "<SEP>"

View File

@@ -32,6 +32,7 @@ from lightrag.constants import (
DEFAULT_COSINE_THRESHOLD,
DEFAULT_RELATED_CHUNK_NUMBER,
DEFAULT_MIN_RERANK_SCORE,
DEFAULT_SUMMARY_MAX_TOKENS,
)
from lightrag.utils import get_env_value
@@ -270,7 +271,9 @@ class LightRAG:
llm_model_name: str = field(default="gpt-4o-mini")
"""Name of the LLM model used for generating responses."""
llm_model_max_token_size: int = field(default=int(os.getenv("MAX_TOKENS", 10000)))
summary_max_tokens: int = field(
default=int(os.getenv("MAX_TOKENS", DEFAULT_SUMMARY_MAX_TOKENS))
)
"""Maximum number of tokens allowed per LLM response."""
llm_model_max_async: int = field(default=int(os.getenv("MAX_ASYNC", 4)))

View File

@@ -125,7 +125,7 @@ async def _handle_entity_relation_summary(
use_llm_func = partial(use_llm_func, _priority=8)
tokenizer: Tokenizer = global_config["tokenizer"]
llm_max_tokens = global_config["llm_model_max_token_size"]
llm_max_tokens = global_config["summary_max_tokens"]
language = global_config["addon_params"].get(
"language", PROMPTS["DEFAULT_LANGUAGE"]