Refactor: Rename llm_model_max_token_size to summary_max_tokens

This commit renames the parameter 'llm_model_max_token_size' to 'summary_max_tokens' for better clarity, as it specifically controls the token limit for entity relation summaries.
2025-07-28 00:49:08 +08:00
parent d0d57a45b6
commit 598eecd06d
10 changed files with 17 additions and 14 deletions
--- a/README-zh.md
+++ b/README-zh.md
@@ -265,7 +265,7 @@ if __name__ == "__main__":
 | **embedding_func_max_async** | `int` | 最大并发异步嵌入进程数 | `16` |
 | **llm_model_func** | `callable` | LLM生成的函数 | `gpt_4o_mini_complete` |
 | **llm_model_name** | `str` | 用于生成的LLM模型名称 | `meta-llama/Llama-3.2-1B-Instruct` |
-| **llm_model_max_token_size** | `int` | 生成实体关系摘要时送给LLM的最大令牌数 | `32000`（默认值由环境变量MAX_TOKENS更改） |
+| **summary_max_tokens** | `int` | 生成实体关系摘要时送给LLM的最大令牌数 | `32000`（默认值由环境变量MAX_TOKENS更改） |
 | **llm_model_max_async** | `int` | 最大并发异步LLM进程数 | `4`（默认值由环境变量MAX_ASYNC更改） |
 | **llm_model_kwargs** | `dict` | LLM生成的附加参数 | |
 | **vector_db_storage_cls_kwargs** | `dict` | 向量数据库的附加参数，如设置节点和关系检索的阈值 | cosine_better_than_threshold: 0.2（默认值由环境变量COSINE_THRESHOLD更改） |
--- a/README.md
+++ b/README.md
@@ -272,7 +272,7 @@ A full list of LightRAG init parameters:
 | **embedding_func_max_async** | `int` | Maximum number of concurrent asynchronous embedding processes | `16` |
 | **llm_model_func** | `callable` | Function for LLM generation | `gpt_4o_mini_complete` |
 | **llm_model_name** | `str` | LLM model name for generation | `meta-llama/Llama-3.2-1B-Instruct` |
-| **llm_model_max_token_size** | `int` | Maximum tokens send to LLM to generate entity relation summaries | `32000`（default value changed by env var MAX_TOKENS) |
+| **summary_max_tokens** | `int` | Maximum tokens send to LLM to generate entity relation summaries | `32000`（default value changed by env var MAX_TOKENS) |
 | **llm_model_max_async** | `int` | Maximum number of concurrent asynchronous LLM processes | `4`（default value changed by env var MAX_ASYNC) |
 | **llm_model_kwargs** | `dict` | Additional parameters for LLM generation | |
 | **vector_db_storage_cls_kwargs** | `dict` | Additional parameters for vector database, like setting the threshold for nodes and relations retrieval | cosine_better_than_threshold: 0.2（default value changed by env var COSINE_THRESHOLD) |
--- a/examples/lightrag_ollama_demo.py
+++ b/examples/lightrag_ollama_demo.py
@@ -87,7 +87,7 @@ async def initialize_rag():
        working_dir=WORKING_DIR,
        llm_model_func=ollama_model_complete,
        llm_model_name=os.getenv("LLM_MODEL", "qwen2.5-coder:7b"),
-        llm_model_max_token_size=8192,
+        summary_max_tokens=8192,
        llm_model_kwargs={
            "host": os.getenv("LLM_BINDING_HOST", "http://localhost:11434"),
            "options": {"num_ctx": 8192},
--- a/examples/unofficial-sample/lightrag_cloudflare_demo.py
+++ b/examples/unofficial-sample/lightrag_cloudflare_demo.py
@@ -211,7 +211,7 @@ async def initialize_rag():
        max_parallel_insert=2,
        llm_model_func=cloudflare_worker.query,
        llm_model_name=os.getenv("LLM_MODEL", LLM_MODEL),
-        llm_model_max_token_size=4080,
+        summary_max_tokens=4080,
        embedding_func=EmbeddingFunc(
            embedding_dim=int(os.getenv("EMBEDDING_DIM", "1024")),
            max_token_size=int(os.getenv("MAX_EMBED_TOKENS", "2048")),
--- a/examples/unofficial-sample/lightrag_openai_neo4j_milvus_redis_demo.py
+++ b/examples/unofficial-sample/lightrag_openai_neo4j_milvus_redis_demo.py
@@ -56,7 +56,7 @@ async def initialize_rag():
    rag = LightRAG(
        working_dir=WORKING_DIR,
        llm_model_func=llm_model_func,
-        llm_model_max_token_size=32768,
+        summary_max_tokens=10000,
        embedding_func=embedding_func,
        chunk_token_size=512,
        chunk_overlap_token_size=256,
--- a/lightrag/api/config.py
+++ b/lightrag/api/config.py
@@ -22,7 +22,7 @@ from lightrag.constants import (
    DEFAULT_MIN_RERANK_SCORE,
    DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE,
    DEFAULT_MAX_ASYNC,
-    DEFAULT_MAX_TOKENS,
+    DEFAULT_SUMMARY_MAX_TOKENS,
    DEFAULT_SUMMARY_LANGUAGE,
    DEFAULT_EMBEDDING_FUNC_MAX_ASYNC,
    DEFAULT_EMBEDDING_BATCH_NUM,
@@ -118,13 +118,13 @@ def parse_args() -> argparse.Namespace:
        "--max-async",
        type=int,
        default=get_env_value("MAX_ASYNC", DEFAULT_MAX_ASYNC, int),
-        help="Maximum async operations (default: from env or 4)",
+        help=f"Maximum async operations (default: from env or {DEFAULT_MAX_ASYNC})",
    )
    parser.add_argument(
        "--max-tokens",
        type=int,
-        default=get_env_value("MAX_TOKENS", DEFAULT_MAX_TOKENS, int),
-        help="Maximum token size (default: from env or 32000)",
+        default=get_env_value("MAX_TOKENS", DEFAULT_SUMMARY_MAX_TOKENS, int),
+        help=f"Maximum token size (default: from env or {DEFAULT_SUMMARY_MAX_TOKENS})",
    )

    # Logging configuration
--- a/lightrag/api/lightrag_server.py
+++ b/lightrag/api/lightrag_server.py
@@ -347,7 +347,7 @@ def create_app(args):
            else openai_alike_model_complete,
            llm_model_name=args.llm_model,
            llm_model_max_async=args.max_async,
-            llm_model_max_token_size=args.max_tokens,
+            summary_max_tokens=args.max_tokens,
            chunk_token_size=int(args.chunk_size),
            chunk_overlap_token_size=int(args.chunk_overlap_size),
            llm_model_kwargs={
@@ -386,7 +386,7 @@ def create_app(args):
            },
            llm_model_name=args.llm_model,
            llm_model_max_async=args.max_async,
-            llm_model_max_token_size=args.max_tokens,
+            summary_max_tokens=args.max_tokens,
            embedding_func=embedding_func,
            kv_storage=args.kv_storage,
            graph_storage=args.graph_storage,
--- a/lightrag/constants.py
+++ b/lightrag/constants.py
@@ -14,7 +14,7 @@ DEFAULT_TIMEOUT = 150
 DEFAULT_SUMMARY_LANGUAGE = "English"  # Default language for summaries
 DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE = 4
 DEFAULT_MAX_GLEANING = 1
-DEFAULT_MAX_TOKENS = 10000  # Default maximum token size
+DEFAULT_SUMMARY_MAX_TOKENS = 10000  # Default maximum token size

 # Separator for graph fields
 GRAPH_FIELD_SEP = "<SEP>"
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@@ -32,6 +32,7 @@ from lightrag.constants import (
    DEFAULT_COSINE_THRESHOLD,
    DEFAULT_RELATED_CHUNK_NUMBER,
    DEFAULT_MIN_RERANK_SCORE,
+    DEFAULT_SUMMARY_MAX_TOKENS,
 )
 from lightrag.utils import get_env_value

@@ -270,7 +271,9 @@ class LightRAG:
    llm_model_name: str = field(default="gpt-4o-mini")
    """Name of the LLM model used for generating responses."""

-    llm_model_max_token_size: int = field(default=int(os.getenv("MAX_TOKENS", 10000)))
+    summary_max_tokens: int = field(
+        default=int(os.getenv("MAX_TOKENS", DEFAULT_SUMMARY_MAX_TOKENS))
+    )
    """Maximum number of tokens allowed per LLM response."""

    llm_model_max_async: int = field(default=int(os.getenv("MAX_ASYNC", 4)))
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -125,7 +125,7 @@ async def _handle_entity_relation_summary(
    use_llm_func = partial(use_llm_func, _priority=8)

    tokenizer: Tokenizer = global_config["tokenizer"]
-    llm_max_tokens = global_config["llm_model_max_token_size"]
+    llm_max_tokens = global_config["summary_max_tokens"]

    language = global_config["addon_params"].get(
        "language", PROMPTS["DEFAULT_LANGUAGE"]