Update env.example

2025-11-11 12:02:37 +08:00
parent c434879c7a
commit ff8f158891
1 changed files with 23 additions and 16 deletions
--- a/env.example
+++ b/env.example
@@ -172,6 +172,8 @@ MAX_PARALLEL_INSERT=2
 ### LLM Configuration
 ### LLM_BINDING type: openai, ollama, lollms, azure_openai, aws_bedrock, gemini
 ### LLM_BINDING_HOST: host only for Ollama, endpoint for other LLM service
+### If LightRAG deployed in Docker:
+###    uses host.docker.internal instead of localhost in LLM_BINDING_HOST
 ###########################################################################
 ### LLM request timeout setting for all llm (0 means no timeout for Ollma)
 # LLM_TIMEOUT=180
@@ -181,7 +183,7 @@ LLM_MODEL=gpt-4o
 LLM_BINDING_HOST=https://api.openai.com/v1
 LLM_BINDING_API_KEY=your_api_key

-### Optional for Azure
+### Env vars for Azure openai
 # AZURE_OPENAI_API_VERSION=2024-08-01-preview
 # AZURE_OPENAI_DEPLOYMENT=gpt-4o

@@ -196,22 +198,16 @@ LLM_BINDING_API_KEY=your_api_key
 # LLM_MODEL=gemini-flash-latest
 # LLM_BINDING_API_KEY=your_gemini_api_key
 # LLM_BINDING_HOST=https://generativelanguage.googleapis.com
-GEMINI_LLM_THINKING_CONFIG='{"thinking_budget": 0, "include_thoughts": false}'
+
+### use the following command to see all support options for OpenAI, azure_openai or OpenRouter
+### lightrag-server --llm-binding gemini --help
+### Gemini Specific Parameters
 # GEMINI_LLM_MAX_OUTPUT_TOKENS=9000
 # GEMINI_LLM_TEMPERATURE=0.7
-
-### OpenAI Compatible API Specific Parameters
-### Increased temperature values may mitigate infinite inference loops in certain LLM, such as Qwen3-30B.
-# OPENAI_LLM_TEMPERATURE=0.9
-### Set the max_tokens to mitigate endless output of some LLM (less than LLM_TIMEOUT * llm_output_tokens/second, i.e. 9000 = 180s * 50 tokens/s)
-### Typically, max_tokens does not include prompt content, though some models, such as Gemini Models, are exceptions
-### For vLLM/SGLang deployed models, or most of OpenAI compatible API provider
-# OPENAI_LLM_MAX_TOKENS=9000
-### For OpenAI o1-mini or newer modles
-OPENAI_LLM_MAX_COMPLETION_TOKENS=9000
-
-#### OpenAI's new API utilizes max_completion_tokens instead of max_tokens
-# OPENAI_LLM_MAX_COMPLETION_TOKENS=9000
+### Enable Thinking
+# GEMINI_LLM_THINKING_CONFIG='{"thinking_budget": -1, "include_thoughts": true}'
+### Disable Thinking
+# GEMINI_LLM_THINKING_CONFIG='{"thinking_budget": 0, "include_thoughts": false}'

 ### use the following command to see all support options for OpenAI, azure_openai or OpenRouter
 ### lightrag-server --llm-binding openai --help
@@ -222,8 +218,17 @@ OPENAI_LLM_MAX_COMPLETION_TOKENS=9000
 ### Qwen3 Specific Parameters deploy by vLLM
 # OPENAI_LLM_EXTRA_BODY='{"chat_template_kwargs": {"enable_thinking": false}}'

+### OpenAI Compatible API Specific Parameters
+### Increased temperature values may mitigate infinite inference loops in certain LLM, such as Qwen3-30B.
+# OPENAI_LLM_TEMPERATURE=0.9
+### Set the max_tokens to mitigate endless output of some LLM (less than LLM_TIMEOUT * llm_output_tokens/second, i.e. 9000 = 180s * 50 tokens/s)
+### Typically, max_tokens does not include prompt content
+### For vLLM/SGLang deployed models, or most of OpenAI compatible API provider
+# OPENAI_LLM_MAX_TOKENS=9000
+### For OpenAI o1-mini or newer modles utilizes max_completion_tokens instead of max_tokens
+OPENAI_LLM_MAX_COMPLETION_TOKENS=9000
+
 ### use the following command to see all support options for Ollama LLM
-### If LightRAG deployed in Docker uses host.docker.internal instead of localhost in LLM_BINDING_HOST
 ### lightrag-server --llm-binding ollama --help
 ### Ollama Server Specific Parameters
 ### OLLAMA_LLM_NUM_CTX must be provided, and should at least larger than MAX_TOTAL_TOKENS + 2000
@@ -240,6 +245,8 @@ OLLAMA_LLM_NUM_CTX=32768
 ### Embedding Configuration (Should not be changed after the first file processed)
 ### EMBEDDING_BINDING: ollama, openai, azure_openai, jina, lollms, aws_bedrock
 ### EMBEDDING_BINDING_HOST: host only for Ollama, endpoint for other Embedding service
+### If LightRAG deployed in Docker:
+###    uses host.docker.internal instead of localhost in EMBEDDING_BINDING_HOST
 #######################################################################################
 # EMBEDDING_TIMEOUT=30