Update env.example

This commit is contained in:
yangdx
2025-11-11 12:02:37 +08:00
parent c434879c7a
commit ff8f158891

View File

@@ -172,6 +172,8 @@ MAX_PARALLEL_INSERT=2
### LLM Configuration
### LLM_BINDING type: openai, ollama, lollms, azure_openai, aws_bedrock, gemini
### LLM_BINDING_HOST: host only for Ollama, endpoint for other LLM service
### If LightRAG deployed in Docker:
### uses host.docker.internal instead of localhost in LLM_BINDING_HOST
###########################################################################
### LLM request timeout setting for all llm (0 means no timeout for Ollma)
# LLM_TIMEOUT=180
@@ -181,7 +183,7 @@ LLM_MODEL=gpt-4o
LLM_BINDING_HOST=https://api.openai.com/v1
LLM_BINDING_API_KEY=your_api_key
### Optional for Azure
### Env vars for Azure openai
# AZURE_OPENAI_API_VERSION=2024-08-01-preview
# AZURE_OPENAI_DEPLOYMENT=gpt-4o
@@ -196,22 +198,16 @@ LLM_BINDING_API_KEY=your_api_key
# LLM_MODEL=gemini-flash-latest
# LLM_BINDING_API_KEY=your_gemini_api_key
# LLM_BINDING_HOST=https://generativelanguage.googleapis.com
GEMINI_LLM_THINKING_CONFIG='{"thinking_budget": 0, "include_thoughts": false}'
### use the following command to see all support options for OpenAI, azure_openai or OpenRouter
### lightrag-server --llm-binding gemini --help
### Gemini Specific Parameters
# GEMINI_LLM_MAX_OUTPUT_TOKENS=9000
# GEMINI_LLM_TEMPERATURE=0.7
### OpenAI Compatible API Specific Parameters
### Increased temperature values may mitigate infinite inference loops in certain LLM, such as Qwen3-30B.
# OPENAI_LLM_TEMPERATURE=0.9
### Set the max_tokens to mitigate endless output of some LLM (less than LLM_TIMEOUT * llm_output_tokens/second, i.e. 9000 = 180s * 50 tokens/s)
### Typically, max_tokens does not include prompt content, though some models, such as Gemini Models, are exceptions
### For vLLM/SGLang deployed models, or most of OpenAI compatible API provider
# OPENAI_LLM_MAX_TOKENS=9000
### For OpenAI o1-mini or newer modles
OPENAI_LLM_MAX_COMPLETION_TOKENS=9000
#### OpenAI's new API utilizes max_completion_tokens instead of max_tokens
# OPENAI_LLM_MAX_COMPLETION_TOKENS=9000
### Enable Thinking
# GEMINI_LLM_THINKING_CONFIG='{"thinking_budget": -1, "include_thoughts": true}'
### Disable Thinking
# GEMINI_LLM_THINKING_CONFIG='{"thinking_budget": 0, "include_thoughts": false}'
### use the following command to see all support options for OpenAI, azure_openai or OpenRouter
### lightrag-server --llm-binding openai --help
@@ -222,8 +218,17 @@ OPENAI_LLM_MAX_COMPLETION_TOKENS=9000
### Qwen3 Specific Parameters deploy by vLLM
# OPENAI_LLM_EXTRA_BODY='{"chat_template_kwargs": {"enable_thinking": false}}'
### OpenAI Compatible API Specific Parameters
### Increased temperature values may mitigate infinite inference loops in certain LLM, such as Qwen3-30B.
# OPENAI_LLM_TEMPERATURE=0.9
### Set the max_tokens to mitigate endless output of some LLM (less than LLM_TIMEOUT * llm_output_tokens/second, i.e. 9000 = 180s * 50 tokens/s)
### Typically, max_tokens does not include prompt content
### For vLLM/SGLang deployed models, or most of OpenAI compatible API provider
# OPENAI_LLM_MAX_TOKENS=9000
### For OpenAI o1-mini or newer modles utilizes max_completion_tokens instead of max_tokens
OPENAI_LLM_MAX_COMPLETION_TOKENS=9000
### use the following command to see all support options for Ollama LLM
### If LightRAG deployed in Docker uses host.docker.internal instead of localhost in LLM_BINDING_HOST
### lightrag-server --llm-binding ollama --help
### Ollama Server Specific Parameters
### OLLAMA_LLM_NUM_CTX must be provided, and should at least larger than MAX_TOTAL_TOKENS + 2000
@@ -240,6 +245,8 @@ OLLAMA_LLM_NUM_CTX=32768
### Embedding Configuration (Should not be changed after the first file processed)
### EMBEDDING_BINDING: ollama, openai, azure_openai, jina, lollms, aws_bedrock
### EMBEDDING_BINDING_HOST: host only for Ollama, endpoint for other Embedding service
### If LightRAG deployed in Docker:
### uses host.docker.internal instead of localhost in EMBEDDING_BINDING_HOST
#######################################################################################
# EMBEDDING_TIMEOUT=30