Merge branch 'main' into feature/vector-model-isolation

This commit is contained in:
yangdx
2025-12-12 10:28:59 +08:00
49 changed files with 4137 additions and 1760 deletions

206
.github/dependabot.yml vendored Normal file
View File

@@ -0,0 +1,206 @@
# Keep GitHub Actions up to date with GitHub's Dependabot...
# https://docs.github.com/en/code-security/dependabot/working-with-dependabot/keeping-your-actions-up-to-date-with-dependabot
# https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file#package-ecosystem
version: 2
updates:
# ============================================================
# GitHub Actions
# PR Strategy:
# - All updates (major/minor/patch): Grouped into a single PR
# ============================================================
- package-ecosystem: github-actions
directory: /
groups:
github-actions:
patterns:
- "*" # Group all Actions updates into a single larger pull request
schedule:
interval: weekly
day: monday
time: "02:00"
timezone: "Asia/Shanghai"
labels:
- "dependencies"
- "github-actions"
open-pull-requests-limit: 2
# ============================================================
# Python (pip) Dependencies
# PR Strategy:
# - Major updates: Individual PR per package (except numpy which is ignored)
# - Minor updates: Grouped by category (llm-providers, storage, etc.)
# - Patch updates: Grouped by category
# ============================================================
- package-ecosystem: "pip"
directory: "/"
schedule:
interval: "weekly"
day: "wednesday"
time: "02:00"
timezone: "Asia/Shanghai"
cooldown:
default-days: 5
semver-major-days: 30
semver-minor-days: 7
semver-patch-days: 3
groups:
# Core dependencies - LLM providers and embeddings
llm-providers:
patterns:
- "openai"
- "anthropic"
- "google-*"
- "boto3"
- "botocore"
- "ollama"
update-types:
- "minor"
- "patch"
# Storage backends
storage:
patterns:
- "neo4j"
- "pymongo"
- "redis"
- "psycopg*"
- "asyncpg"
- "milvus*"
- "qdrant*"
update-types:
- "minor"
- "patch"
# Data processing and ML
data-processing:
patterns:
- "numpy"
- "scipy"
- "pandas"
- "tiktoken"
- "transformers"
- "torch*"
update-types:
- "minor"
- "patch"
# Web framework and API
web-framework:
patterns:
- "fastapi"
- "uvicorn"
- "gunicorn"
- "starlette"
- "pydantic*"
update-types:
- "minor"
- "patch"
# Development and testing tools
dev-tools:
patterns:
- "pytest*"
- "ruff"
- "pre-commit"
- "black"
- "mypy"
update-types:
- "minor"
- "patch"
# Minor and patch updates for everything else
python-minor-patch:
patterns:
- "*"
update-types:
- "minor"
- "patch"
ignore:
- dependency-name: "numpy"
update-types:
- "version-update:semver-major"
labels:
- "dependencies"
- "python"
open-pull-requests-limit: 5
# ============================================================
# Frontend (bun) Dependencies
# PR Strategy:
# - Major updates: Individual PR per package
# - Minor updates: Grouped by category (react, ui-components, etc.)
# - Patch updates: Grouped by category
# ============================================================
- package-ecosystem: "bun"
directory: "/lightrag_webui"
schedule:
interval: "weekly"
day: "friday"
time: "02:00"
timezone: "Asia/Shanghai"
cooldown:
default-days: 5
semver-major-days: 30
semver-minor-days: 7
semver-patch-days: 3
groups:
# React ecosystem
react:
patterns:
- "react"
- "react-dom"
- "react-router*"
- "@types/react*"
update-types:
- "minor"
- "patch"
# UI components and styling
ui-components:
patterns:
- "@radix-ui/*"
- "tailwind*"
- "@tailwindcss/*"
- "lucide-react"
- "class-variance-authority"
- "clsx"
update-types:
- "minor"
- "patch"
# Graph visualization
graph-viz:
patterns:
- "sigma"
- "@sigma/*"
- "graphology*"
update-types:
- "minor"
- "patch"
# Build tools and dev dependencies
build-tools:
patterns:
- "vite"
- "@vitejs/*"
- "typescript"
- "eslint*"
- "@eslint/*"
- "typescript-eslint"
- "prettier"
- "prettier-*"
- "@types/bun"
update-types:
- "minor"
- "patch"
# Content rendering libraries (math, diagrams, etc.)
content-rendering:
patterns:
- "katex"
- "mermaid"
update-types:
- "minor"
- "patch"
# All other minor and patch updates
frontend-minor-patch:
patterns:
- "*"
update-types:
- "minor"
- "patch"
labels:
- "dependencies"
- "frontend"
open-pull-requests-limit: 5

View File

@@ -0,0 +1,58 @@
name: "Copilot Setup Steps"
# Automatically run the setup steps when they are changed to allow for easy validation, and
# allow manual testing through the repository's "Actions" tab
on:
workflow_dispatch:
push:
paths:
- .github/workflows/copilot-setup-steps.yml
pull_request:
paths:
- .github/workflows/copilot-setup-steps.yml
jobs:
# The job MUST be called `copilot-setup-steps` or it will not be picked up by Copilot.
copilot-setup-steps:
runs-on: ubuntu-latest
# Timeout after 30 minutes (maximum is 59)
timeout-minutes: 30
# You can define any steps you want, and they will run before the agent starts.
# If you do not check out your code, Copilot will do this for you.
steps:
- name: Checkout code
uses: actions/checkout@v6
- name: Set up Python 3.11
uses: actions/setup-python@v6
with:
python-version: '3.11'
- name: Cache pip packages
uses: actions/cache@v4
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-copilot-${{ hashFiles('**/pyproject.toml') }}
restore-keys: |
${{ runner.os }}-pip-copilot-
${{ runner.os }}-pip-
- name: Install Python dependencies
run: |
python -m pip install --upgrade pip
pip install -e ".[api]"
pip install pytest pytest-asyncio httpx
- name: Create minimal frontend stub for Copilot agent
run: |
mkdir -p lightrag/api/webui
echo '<!DOCTYPE html><html><head><title>LightRAG - Copilot Agent</title></head><body><h1>Copilot Agent Mode</h1></body></html>' > lightrag/api/webui/index.html
echo "Created minimal frontend stub for Copilot agent environment"
- name: Verify installation
run: |
python --version
pip list | grep lightrag
lightrag-server --help || echo "Note: Server requires .env configuration to run"

View File

@@ -18,7 +18,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
uses: actions/checkout@v6
with:
fetch-depth: 0
@@ -66,7 +66,7 @@ jobs:
type=raw,value=lite
- name: Build and push lite Docker image
uses: docker/build-push-action@v5
uses: docker/build-push-action@v6
with:
context: .
file: ./Dockerfile.lite

View File

@@ -18,7 +18,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
uses: actions/checkout@v6
with:
fetch-depth: 0 # Fetch all history for tags
@@ -61,7 +61,7 @@ jobs:
type=raw,value=${{ steps.get_tag.outputs.tag }}
- name: Build and push Docker image
uses: docker/build-push-action@v5
uses: docker/build-push-action@v6
with:
context: .
file: ./Dockerfile

View File

@@ -14,7 +14,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
uses: actions/checkout@v6
with:
fetch-depth: 0 # Fetch all history for tags
@@ -63,7 +63,7 @@ jobs:
type=raw,value=latest,enable=${{ steps.check_prerelease.outputs.is_prerelease == 'false' }}
- name: Build and push Docker image
uses: docker/build-push-action@v5
uses: docker/build-push-action@v6
with:
context: .
file: ./Dockerfile

View File

@@ -10,14 +10,15 @@ on:
jobs:
lint-and-format:
name: Linting and Formatting
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v2
uses: actions/checkout@v6
- name: Set up Python
uses: actions/setup-python@v2
uses: actions/setup-python@v6
with:
python-version: '3.x'

View File

@@ -13,13 +13,13 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v6
with:
fetch-depth: 0 # Fetch all history for tags
# Build frontend WebUI
- name: Setup Bun
uses: oven-sh/setup-bun@v1
uses: oven-sh/setup-bun@v2
with:
bun-version: latest
@@ -40,7 +40,7 @@ jobs:
echo "Frontend files:"
ls -lh lightrag/api/webui/ | head -10
- uses: actions/setup-python@v5
- uses: actions/setup-python@v6
with:
python-version: "3.x"
@@ -64,7 +64,7 @@ jobs:
python -m build
- name: Upload distributions
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@v5
with:
name: release-dists
path: dist/
@@ -81,7 +81,7 @@ jobs:
steps:
- name: Retrieve release distributions
uses: actions/download-artifact@v4
uses: actions/download-artifact@v6
with:
name: release-dists
path: dist/

View File

@@ -13,7 +13,7 @@ jobs:
stale:
runs-on: ubuntu-latest
steps:
- uses: actions/stale@v9
- uses: actions/stale@v10
with:
days-before-stale: 90 # 90 days
days-before-close: 7 # 7 days after marked as stale

View File

@@ -13,13 +13,13 @@ jobs:
strategy:
matrix:
python-version: ['3.10', '3.11', '3.12']
python-version: ['3.12', '3.13', '3.14']
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v6
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
uses: actions/setup-python@v6
with:
python-version: ${{ matrix.python-version }}
@@ -45,7 +45,7 @@ jobs:
- name: Upload test results
if: always()
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@v5
with:
name: test-results-py${{ matrix.python-version }}
path: |

View File

@@ -12,7 +12,7 @@ LightRAG is an advanced Retrieval-Augmented Generation (RAG) framework designed
- `python -m venv .venv && source .venv/bin/activate`: set up the Python runtime.
- `pip install -e .` / `pip install -e .[api]`: install the package and API extras in editable mode.
- `lightrag-server` or `uvicorn lightrag.api.lightrag_server:app --reload`: start the API locally; ensure `.env` is present.
- `python -m pytest tests` or `python test_graph_storage.py`: run the full suite or a targeted script.
- `python -m pytest tests` (offline markers apply by default) or `python -m pytest tests --run-integration` / `python test_graph_storage.py`: run the full suite, opt into integration coverage, or target an individual script.
- `ruff check .`: lint Python sources before committing.
- `bun install`, `bun run dev`, `bun run build`, `bun test`: manage the web UI workflow (Bun is mandatory).
@@ -24,9 +24,11 @@ LightRAG is an advanced Retrieval-Augmented Generation (RAG) framework designed
- Front-end code should remain in TypeScript with two-space indentation, rely on functional React components with hooks, and follow Tailwind utility style.
## Testing Guidelines
- Add pytest cases beside the affected module or the relevant `test_*.py`; functions should start with `test_`.
- Export required `LIGHTRAG_*` environment variables before running integration or storage tests.
- For UI updates, pair code with Vitest specs and run `bun test`.
- Keep pytest additions close to the code you touch (`tests/` mirrors feature folders and there are root-level `test_*.py` helpers); functions must start with `test_`.
- Follow `tests/pytest.ini`: markers include `offline`, `integration`, `requires_db`, and `requires_api`, and the suite runs with `-m "not integration"` by default—pass `--run-integration` (or set `LIGHTRAG_RUN_INTEGRATION=true`) when external services are available.
- Use the custom CLI toggles from `tests/conftest.py`: `--keep-artifacts`/`LIGHTRAG_KEEP_ARTIFACTS=true`, `--stress-test`/`LIGHTRAG_STRESS_TEST=true`, and `--test-workers N`/`LIGHTRAG_TEST_WORKERS` to dial up workloads or preserve temp files during investigations.
- Export other required `LIGHTRAG_*` environment variables before running integration or storage tests so adapters can reach configured backends.
- For UI updates, pair changes with Vitest specs and run `bun test`.
## Commit & Pull Request Guidelines
- Use concise, imperative commit subjects (e.g., `Fix lock key normalization`) and add body context only when necessary.
@@ -37,3 +39,10 @@ LightRAG is an advanced Retrieval-Augmented Generation (RAG) framework designed
- Copy `.env.example` and `config.ini.example`; never commit secrets or real connection strings.
- Configure storage backends through `LIGHTRAG_*` variables and validate them with `docker-compose` services when needed.
- Treat `lightrag.log*` as local artefacts; purge sensitive information before sharing logs or outputs.
## Automation & Agent Workflow
- Use repo-relative `workdir` arguments for every shell command and prefer `rg`/`rg --files` for searches since they are faster under the CLI harness.
- Default edits to ASCII, rely on `apply_patch` for single-file changes, and only add concise comments that aid comprehension of complex logic.
- Honor existing local modifications; never revert or discard user changes (especially via `git reset --hard`) unless explicitly asked.
- Follow the planning tool guidance: skip it for trivial fixes, but provide multi-step plans for non-trivial work and keep the plan updated as steps progress.
- Validate changes by running the relevant `ruff`/`pytest`/`bun test` commands whenever feasible, and describe any unrun checks with follow-up guidance.

View File

@@ -53,28 +53,24 @@
## 🎉 新闻
- [x] [2025.11.05]🎯📢添加**基于RAGAS的**评估框架和**Langfuse**可观测性支持。
- [x] [2025.10.22]🎯📢消除处理**大规模数据集**的瓶颈。
- [x] [2025.09.15]🎯📢显著提升**小型LLM**如Qwen3-30B-A3B的知识图谱提取准确性。
- [x] [2025.08.29]🎯📢现已支持**Reranker**,显著提升混合查询性能。
- [x] [2025.08.04]🎯📢支持**文档删除**并重新生成知识图谱以确保查询性能。
- [x] [2025.06.16]🎯📢我们的团队发布了[RAG-Anything](https://github.com/HKUDS/RAG-Anything),一个用于无缝处理文本、图像、表格和方程式的全功能多模态 RAG 系统。
- [X] [2025.06.05]🎯📢LightRAG现已集成[RAG-Anything](https://github.com/HKUDS/RAG-Anything)支持全面的多模态文档解析与RAG能力PDF、图片、Office文档、表格、公式等。详见下方[多模态处理模块](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#多模态文档处理rag-anything集成)。
- [X] [2025.03.18]🎯📢LightRAG现已支持引文功能。
- [X] [2025.02.05]🎯📢我们团队发布了[VideoRAG](https://github.com/HKUDS/VideoRAG),用于理解超长上下文视频
- [X] [2025.01.13]🎯📢我们团队发布了[MiniRAG](https://github.com/HKUDS/MiniRAG)使用小型模型简化RAG
- [X] [2025.01.06]🎯📢现在您可以[使用PostgreSQL进行存储](#using-postgresql-for-storage)
- [X] [2024.11.25]🎯📢LightRAG现在支持无缝集成[自定义知识图谱](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#insert-custom-kg),使用户能够用自己的领域专业知识增强系统
- [X] [2024.11.19]🎯📢LightRAG的综合指南现已在[LearnOpenCV](https://learnopencv.com/lightrag)上发布。非常感谢博客作者。
- [X] [2024.11.11]🎯📢LightRAG现在支持[通过实体名称删除实体](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#delete)
- [X] [2024.11.09]🎯📢推出[LightRAG Gui](https://lightrag-gui.streamlit.app)允许您插入、查询、可视化和下载LightRAG知识
- [X] [2024.11.04]🎯📢现在您可以[使用Neo4J进行存储](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#using-neo4j-for-storage)。
- [X] [2024.10.29]🎯📢LightRAG现在通过`textract`支持多种文件类型包括PDF、DOC、PPT和CSV。
- [X] [2024.10.20]🎯📢我们为LightRAG添加了一个新功能图形可视化。
- [X] [2024.10.18]🎯📢我们添加了[LightRAG介绍视频](https://youtu.be/oageL-1I0GE)的链接。感谢作者!
- [X] [2024.10.17]🎯📢我们创建了一个[Discord频道](https://discord.gg/yF2MmDJyGJ)!欢迎加入分享和讨论!🎉🎉
- [X] [2024.10.16]🎯📢LightRAG现在支持[Ollama模型](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#quick-start)
- [X] [2024.10.15]🎯📢LightRAG现在支持[Hugging Face模型](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#quick-start)
- [2025.11.05]🎯添加**基于RAGAS的**评估框架和**Langfuse**可观测性支持API可随查询结果返回召回上下文
- [2025.10.22]🎯消除处理**大规模数据集**的性能瓶颈。
- [2025.09.15]🎯显著提升**小型LLM**如Qwen3-30B-A3B的知识图谱提取准确性。
- [2025.08.29]🎯现已支持**Reranker**,显著提升混合查询性能(现已设为默认查询模式)
- [2025.08.04]🎯支持**文档删除**并重新生成知识图谱以确保查询性能。
- [2025.06.16]🎯我们的团队发布了[RAG-Anything](https://github.com/HKUDS/RAG-Anything),一个用于无缝处理文本、图像、表格和方程式的全功能多模态 RAG 系统。
- [2025.06.05]🎯LightRAG现已集成[RAG-Anything](https://github.com/HKUDS/RAG-Anything)支持全面的多模态文档解析与RAG能力PDF、图片、Office文档、表格、公式等。详见下方[多模态处理模块](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#多模态文档处理rag-anything集成)。
- [2025.03.18]🎯LightRAG现已支持参考文献功能。
- [2025.02.12]🎯现在您可以使用MongoDB作为一体化存储解决方案
- [2025.02.05]🎯我们团队发布了[VideoRAG](https://github.com/HKUDS/VideoRAG)用于理解超长上下文视频
- [2025.01.13]🎯我们团队发布了[MiniRAG](https://github.com/HKUDS/MiniRAG)使用小型模型简化RAG
- [2025.01.06]🎯现在您可以使用PostgreSQL作为一体化存储解决方案
- [2024.11.19]🎯LightRAG的综合指南现已在[LearnOpenCV](https://learnopencv.com/lightrag)上发布。非常感谢博客作者。
- [2024.11.09]🎯推出LightRAG Webui允许您插入、查询、可视化LightRAG知识
- [2024.11.04]🎯现在您可以[使用Neo4J进行存储](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#using-neo4j-for-storage)
- [2024.10.18]🎯我们添加了[LightRAG介绍视频](https://youtu.be/oageL-1I0GE)的链接。感谢作者!
- [2024.10.17]🎯我们创建了一个[Discord频道](https://discord.gg/yF2MmDJyGJ)!欢迎加入分享和讨论!🎉🎉
- [2024.10.16]🎯LightRAG现在支持[Ollama模型](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#quick-start)
<details>
<summary style="font-size: 1.4em; font-weight: bold; cursor: pointer; display: list-item;">
@@ -411,6 +407,11 @@ LightRAG 需要利用LLM和Embeding模型来完成文档索引和知识库查询
* LightRAG还支持类OpenAI的聊天/嵌入API
```python
import os
import numpy as np
from lightrag.utils import wrap_embedding_func_with_attrs
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
async def llm_model_func(
prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs
) -> str:
@@ -424,8 +425,9 @@ async def llm_model_func(
**kwargs
)
@wrap_embedding_func_with_attrs(embedding_dim=4096, max_token_size=8192)
async def embedding_func(texts: list[str]) -> np.ndarray:
return await openai_embed(
return await openai_embed.func(
texts,
model="solar-embedding-1-large-query",
api_key=os.getenv("UPSTAGE_API_KEY"),
@@ -436,16 +438,17 @@ async def initialize_rag():
rag = LightRAG(
working_dir=WORKING_DIR,
llm_model_func=llm_model_func,
embedding_func=EmbeddingFunc(
embedding_dim=4096,
func=embedding_func
)
embedding_func=embedding_func # 直接传入装饰后的函数
)
await rag.initialize_storages()
return rag
```
> **关于嵌入函数封装的重要说明:**
>
> `EmbeddingFunc` 不能嵌套封装。已经被 `@wrap_embedding_func_with_attrs` 装饰过的嵌入函数(如 `openai_embed`、`ollama_embed` 等)不能再次使用 `EmbeddingFunc()` 封装。这就是为什么在创建自定义嵌入函数时,我们调用 `xxx_embed.func`(底层未封装的函数)而不是直接调用 `xxx_embed`。
</details>
<details>
@@ -477,24 +480,26 @@ rag = LightRAG(
<details>
<summary> <b>使用Ollama模型</b> </summary>
如果您想使用Ollama模型您需要拉取计划使用的模型和嵌入模型例如`nomic-embed-text`。
然后您只需要按如下方式设置LightRAG
```python
import numpy as np
from lightrag.utils import wrap_embedding_func_with_attrs
from lightrag.llm.ollama import ollama_model_complete, ollama_embed
@wrap_embedding_func_with_attrs(embedding_dim=768, max_token_size=8192)
async def embedding_func(texts: list[str]) -> np.ndarray:
return await ollama_embed.func(texts, embed_model="nomic-embed-text")
# 使用Ollama模型初始化LightRAG
rag = LightRAG(
working_dir=WORKING_DIR,
llm_model_func=ollama_model_complete, # 使用Ollama模型进行文本生成
llm_model_name='your_model_name', # 您的模型名称
# 使用Ollama嵌入函数
embedding_func=EmbeddingFunc(
embedding_dim=768,
func=lambda texts: ollama_embed(
texts,
embed_model="nomic-embed-text"
)
),
embedding_func=embedding_func, # 直接传入装饰后的函数
)
```
@@ -533,22 +538,27 @@ ollama create -f Modelfile qwen2m
您可以使用`llm_model_kwargs`参数配置ollama
```python
import numpy as np
from lightrag.utils import wrap_embedding_func_with_attrs
from lightrag.llm.ollama import ollama_model_complete, ollama_embed
@wrap_embedding_func_with_attrs(embedding_dim=768, max_token_size=8192)
async def embedding_func(texts: list[str]) -> np.ndarray:
return await ollama_embed.func(texts, embed_model="nomic-embed-text")
rag = LightRAG(
working_dir=WORKING_DIR,
llm_model_func=ollama_model_complete, # 使用Ollama模型进行文本生成
llm_model_name='your_model_name', # 您的模型名称
llm_model_kwargs={"options": {"num_ctx": 32768}},
# 使用Ollama嵌入函数
embedding_func=EmbeddingFunc(
embedding_dim=768,
func=lambda texts: ollama_embed(
texts,
embed_model="nomic-embed-text"
)
),
embedding_func=embedding_func, # 直接传入装饰后的函数
)
```
> **关于嵌入函数封装的重要说明:**
>
> `EmbeddingFunc` 不能嵌套封装。已经被 `@wrap_embedding_func_with_attrs` 装饰过的嵌入函数(如 `openai_embed`、`ollama_embed` 等)不能再次使用 `EmbeddingFunc()` 封装。这就是为什么在创建自定义嵌入函数时,我们调用 `xxx_embed.func`(底层未封装的函数)而不是直接调用 `xxx_embed`。
* **低RAM GPU**
为了在低RAM GPU上运行此实验您应该选择小型模型并调整上下文窗口增加上下文会增加内存消耗。例如在6Gb RAM的改装挖矿GPU上运行这个ollama示例需要将上下文大小设置为26k同时使用`gemma2:2b`。它能够在`book.txt`中找到197个实体和19个关系。
@@ -560,7 +570,7 @@ rag = LightRAG(
LightRAG支持与LlamaIndex集成 (`llm/llama_index_impl.py`):
- 通过LlamaIndex与OpenAI和其他提供商集成
- 详细设置和示例请参见[LlamaIndex文档](lightrag/llm/Readme.md)
- 详细设置和示例请参见[LlamaIndex文档](https://developers.llamaindex.ai/python/framework/)
**使用示例:**
@@ -622,9 +632,10 @@ if __name__ == "__main__":
**详细文档和示例,请参见:**
- [LlamaIndex文档](lightrag/llm/Readme.md)
- [直接OpenAI示例](examples/lightrag_llamaindex_direct_demo.py)
- [LiteLLM代理示例](examples/lightrag_llamaindex_litellm_demo.py)
- [LlamaIndex文档](https://developers.llamaindex.ai/python/framework/)
- [直接OpenAI示例](examples/unofficial-sample/lightrag_llamaindex_direct_demo.py)
- [LiteLLM代理示例](examples/unofficial-sample/lightrag_llamaindex_litellm_demo.py)
- [LiteLLM+OPIK代理示例](examples/unofficial-sample/lightrag_llamaindex_litellm_opik_demo.py)
</details>
@@ -885,7 +896,7 @@ rag = LightRAG(
对于生产级场景您很可能想要利用企业级解决方案。PostgreSQL可以为您提供一站式储解解决方案作为KV存储、向量数据库pgvector和图数据库apache AGE。支持 PostgreSQL 版本为16.6或以上。
* 如果您是初学者并想避免麻烦推荐使用docker请从这个镜像开始请务必阅读概述https://hub.docker.com/r/shangor/postgres-for-rag
* 如果您是初学者并想避免麻烦推荐使用docker请从这个镜像开始默认帐号密码:rag/raghttps://hub.docker.com/r/gzdaniel/postgres-for-rag
* Apache AGE的性能不如Neo4j。追求高性能的图数据库请使用Noe4j。
</details>
@@ -1527,7 +1538,7 @@ LANGFUSE_ENABLE_TRACE=true
## RAGAS评估
**RAGAS**Retrieval Augmented Generation Assessment检索增强生成评估是一个使用LLM对RAG系统进行无参考评估的框架。我们提供了基于RAGAS的评估脚本。详细信息请参阅[基于RAGAS的评估框架](lightrag/evaluation/README.md)。
**RAGAS**Retrieval Augmented Generation Assessment检索增强生成评估是一个使用LLM对RAG系统进行无参考评估的框架。我们提供了基于RAGAS的评估脚本。详细信息请参阅[基于RAGAS的评估框架](lightrag/evaluation/README_EVALUASTION_RAGAS.md)。
## 评估

117
README.md
View File

@@ -51,28 +51,24 @@
---
## 🎉 News
- [x] [2025.11.05]🎯📢Add **RAGAS-based** Evaluation Framework and **Langfuse** observability for LightRAG.
- [x] [2025.10.22]🎯📢Eliminate bottlenecks in processing **large-scale datasets**.
- [x] [2025.09.15]🎯📢Significantly enhances KG extraction accuracy for **small LLMs** like Qwen3-30B-A3B.
- [x] [2025.08.29]🎯📢**Reranker** is supported now , significantly boosting performance for mixed queries.
- [x] [2025.08.04]🎯📢**Document deletion** with KG regeneration to ensure query performance.
- [x] [2025.06.16]🎯📢Our team has released [RAG-Anything](https://github.com/HKUDS/RAG-Anything) an All-in-One Multimodal RAG System for seamless text, image, table, and equation processing.
- [X] [2025.06.05]🎯📢LightRAG now supports comprehensive multimodal data handling through [RAG-Anything](https://github.com/HKUDS/RAG-Anything) integration, enabling seamless document parsing and RAG capabilities across diverse formats including PDFs, images, Office documents, tables, and formulas. Please refer to the new [multimodal section](https://github.com/HKUDS/LightRAG/?tab=readme-ov-file#multimodal-document-processing-rag-anything-integration) for details.
- [X] [2025.03.18]🎯📢LightRAG now supports citation functionality, enabling proper source attribution.
- [X] [2025.02.05]🎯📢Our team has released [VideoRAG](https://github.com/HKUDS/VideoRAG) understanding extremely long-context videos.
- [X] [2025.01.13]🎯📢Our team has released [MiniRAG](https://github.com/HKUDS/MiniRAG) making RAG simpler with small models.
- [X] [2025.01.06]🎯📢You can now [use PostgreSQL for Storage](#using-postgresql-for-storage).
- [X] [2024.11.25]🎯📢LightRAG now supports seamless integration of [custom knowledge graphs](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#insert-custom-kg), empowering users to enhance the system with their own domain expertise.
- [X] [2024.11.19]🎯📢A comprehensive guide to LightRAG is now available on [LearnOpenCV](https://learnopencv.com/lightrag). Many thanks to the blog author.
- [X] [2024.11.11]🎯📢LightRAG now supports [deleting entities by their names](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#delete).
- [X] [2024.11.09]🎯📢Introducing the [LightRAG Gui](https://lightrag-gui.streamlit.app), which allows you to insert, query, visualize, and download LightRAG knowledge.
- [X] [2024.11.04]🎯📢You can now [use Neo4J for Storage](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#using-neo4j-for-storage).
- [X] [2024.10.29]🎯📢LightRAG now supports multiple file types, including PDF, DOC, PPT, and CSV via `textract`.
- [X] [2024.10.20]🎯📢We've added a new feature to LightRAG: Graph Visualization.
- [X] [2024.10.18]🎯📢We've added a link to a [LightRAG Introduction Video](https://youtu.be/oageL-1I0GE). Thanks to the author!
- [X] [2024.10.17]🎯📢We have created a [Discord channel](https://discord.gg/yF2MmDJyGJ)! Welcome to join for sharing and discussions! 🎉🎉
- [X] [2024.10.16]🎯📢LightRAG now supports [Ollama models](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#quick-start)!
- [X] [2024.10.15]🎯📢LightRAG now supports [Hugging Face models](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#quick-start)!
- [2025.11]🎯[New Feature]: Integrated **RAGAS for Evaluation** and **Langfuse for Tracing**. Updated the API to return retrieved contexts alongside query results to support context precision metrics.
- [2025.10]🎯[Scalability Enhancement]: Eliminated processing bottlenecks to support **Large-Scale Datasets Efficiently**.
- [2025.09]🎯[New Feature] Enhances knowledge graph extraction accuracy for **Open-Sourced LLMs** such as Qwen3-30B-A3B.
- [2025.08]🎯[New Feature] **Reranker** is now supported, significantly boosting performance for mixed queries (set as default query mode).
- [2025.08]🎯[New Feature] Added **Document Deletion** with automatic KG regeneration to ensure optimal query performance.
- [2025.06]🎯[New Release] Our team has released [RAG-Anything](https://github.com/HKUDS/RAG-Anything) an **All-in-One Multimodal RAG** system for seamless processing of text, images, tables, and equations.
- [2025.06]🎯[New Feature] LightRAG now supports comprehensive multimodal data handling through [RAG-Anything](https://github.com/HKUDS/RAG-Anything) integration, enabling seamless document parsing and RAG capabilities across diverse formats including PDFs, images, Office documents, tables, and formulas. Please refer to the new [multimodal section](https://github.com/HKUDS/LightRAG/?tab=readme-ov-file#multimodal-document-processing-rag-anything-integration) for details.
- [2025.03]🎯[New Feature] LightRAG now supports citation functionality, enabling proper source attribution and enhanced document traceability.
- [2025.02]🎯[New Feature] You can now use MongoDB as an all-in-one storage solution for unified data management.
- [2025.02]🎯[New Release] Our team has released [VideoRAG](https://github.com/HKUDS/VideoRAG)-a RAG system for understanding extremely long-context videos
- [2025.01]🎯[New Release] Our team has released [MiniRAG](https://github.com/HKUDS/MiniRAG) making RAG simpler with small models.
- [2025.01]🎯You can now use PostgreSQL as an all-in-one storage solution for data management.
- [2024.11]🎯[New Resource] A comprehensive guide to LightRAG is now available on [LearnOpenCV](https://learnopencv.com/lightrag). — explore in-depth tutorials and best practices. Many thanks to the blog author for this excellent contribution!
- [2024.11]🎯[New Feature] Introducing the LightRAG WebUI — an interface that allows you to insert, query, and visualize LightRAG knowledge through an intuitive web-based dashboard.
- [2024.11]🎯[New Feature] You can now [use Neo4J for Storage](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#using-neo4j-for-storage)-enabling graph database support.
- [2024.10]🎯[New Feature] We've added a link to a [LightRAG Introduction Video](https://youtu.be/oageL-1I0GE). — a walkthrough of LightRAG's capabilities. Thanks to the author for this excellent contribution!
- [2024.10]🎯[New Channel] We have created a [Discord channel](https://discord.gg/yF2MmDJyGJ)!💬 Welcome to join our community for sharing, discussions, and collaboration! 🎉🎉
- [2024.10]🎯[New Feature] LightRAG now supports [Ollama models](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#quick-start)!
<details>
<summary style="font-size: 1.4em; font-weight: bold; cursor: pointer; display: list-item;">
@@ -218,7 +214,7 @@ For a streaming response implementation example, please see `examples/lightrag_o
**Note 2**: Only `lightrag_openai_demo.py` and `lightrag_openai_compatible_demo.py` are officially supported sample codes. Other sample files are community contributions that haven't undergone full testing and optimization.
## Programing with LightRAG Core
## Programming with LightRAG Core
> ⚠️ **If you would like to integrate LightRAG into your project, we recommend utilizing the REST API provided by the LightRAG Server**. LightRAG Core is typically intended for embedded applications or for researchers who wish to conduct studies and evaluations.
@@ -317,7 +313,7 @@ A full list of LightRAG init parameters:
| **vector_db_storage_cls_kwargs** | `dict` | Additional parameters for vector database, like setting the threshold for nodes and relations retrieval | cosine_better_than_threshold: 0.2default value changed by env var COSINE_THRESHOLD) |
| **enable_llm_cache** | `bool` | If `TRUE`, stores LLM results in cache; repeated prompts return cached responses | `TRUE` |
| **enable_llm_cache_for_entity_extract** | `bool` | If `TRUE`, stores LLM results in cache for entity extraction; Good for beginners to debug your application | `TRUE` |
| **addon_params** | `dict` | Additional parameters, e.g., `{"language": "Simplified Chinese", "entity_types": ["organization", "person", "location", "event"]}`: sets example limit, entiy/relation extraction output language | language: English` |
| **addon_params** | `dict` | Additional parameters, e.g., `{"language": "Simplified Chinese", "entity_types": ["organization", "person", "location", "event"]}`: sets example limit, entity/relation extraction output language | language: English` |
| **embedding_cache_config** | `dict` | Configuration for question-answer caching. Contains three parameters: `enabled`: Boolean value to enable/disable cache lookup functionality. When enabled, the system will check cached responses before generating new answers. `similarity_threshold`: Float value (0-1), similarity threshold. When a new question's similarity with a cached question exceeds this threshold, the cached answer will be returned directly without calling the LLM. `use_llm_check`: Boolean value to enable/disable LLM similarity verification. When enabled, LLM will be used as a secondary check to verify the similarity between questions before returning cached answers. | Default: `{"enabled": False, "similarity_threshold": 0.95, "use_llm_check": False}` |
</details>
@@ -368,7 +364,7 @@ class QueryParam:
max_total_tokens: int = int(os.getenv("MAX_TOTAL_TOKENS", "30000"))
"""Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt)."""
# History mesages is only send to LLM for context, not used for retrieval
# History messages are only sent to LLM for context, not used for retrieval
conversation_history: list[dict[str, str]] = field(default_factory=list)
"""Stores past conversation history to maintain context.
Format: [{"role": "user/assistant", "content": "message"}].
@@ -407,6 +403,11 @@ LightRAG requires the utilization of LLM and Embedding models to accomplish docu
* LightRAG also supports Open AI-like chat/embeddings APIs:
```python
import os
import numpy as np
from lightrag.utils import wrap_embedding_func_with_attrs
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
async def llm_model_func(
prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs
) -> str:
@@ -420,8 +421,9 @@ async def llm_model_func(
**kwargs
)
@wrap_embedding_func_with_attrs(embedding_dim=4096, max_token_size=8192)
async def embedding_func(texts: list[str]) -> np.ndarray:
return await openai_embed(
return await openai_embed.func(
texts,
model="solar-embedding-1-large-query",
api_key=os.getenv("UPSTAGE_API_KEY"),
@@ -432,16 +434,17 @@ async def initialize_rag():
rag = LightRAG(
working_dir=WORKING_DIR,
llm_model_func=llm_model_func,
embedding_func=EmbeddingFunc(
embedding_dim=4096,
func=embedding_func
)
embedding_func=embedding_func # Pass the decorated function directly
)
await rag.initialize_storages()
return rag
```
> **Important Note on Embedding Function Wrapping:**
>
> `EmbeddingFunc` cannot be nested. Functions that have been decorated with `@wrap_embedding_func_with_attrs` (such as `openai_embed`, `ollama_embed`, etc.) cannot be wrapped again using `EmbeddingFunc()`. This is why we call `xxx_embed.func` (the underlying unwrapped function) instead of `xxx_embed` directly when creating custom embedding functions.
</details>
<details>
@@ -473,6 +476,7 @@ rag = LightRAG(
<details>
<summary> <b>Using Ollama Models</b> </summary>
**Overview**
If you want to use Ollama models, you need to pull model you plan to use and embedding model, for example `nomic-embed-text`.
@@ -480,19 +484,20 @@ If you want to use Ollama models, you need to pull model you plan to use and emb
Then you only need to set LightRAG as follows:
```python
import numpy as np
from lightrag.utils import wrap_embedding_func_with_attrs
from lightrag.llm.ollama import ollama_model_complete, ollama_embed
@wrap_embedding_func_with_attrs(embedding_dim=768, max_token_size=8192)
async def embedding_func(texts: list[str]) -> np.ndarray:
return await ollama_embed.func(texts, embed_model="nomic-embed-text")
# Initialize LightRAG with Ollama model
rag = LightRAG(
working_dir=WORKING_DIR,
llm_model_func=ollama_model_complete, # Use Ollama model for text generation
llm_model_name='your_model_name', # Your model name
# Use Ollama embedding function
embedding_func=EmbeddingFunc(
embedding_dim=768,
func=lambda texts: ollama_embed(
texts,
embed_model="nomic-embed-text"
)
),
embedding_func=embedding_func, # Pass the decorated function directly
)
```
@@ -531,22 +536,27 @@ ollama create -f Modelfile qwen2m
Tiy can use `llm_model_kwargs` param to configure ollama:
```python
import numpy as np
from lightrag.utils import wrap_embedding_func_with_attrs
from lightrag.llm.ollama import ollama_model_complete, ollama_embed
@wrap_embedding_func_with_attrs(embedding_dim=768, max_token_size=8192)
async def embedding_func(texts: list[str]) -> np.ndarray:
return await ollama_embed.func(texts, embed_model="nomic-embed-text")
rag = LightRAG(
working_dir=WORKING_DIR,
llm_model_func=ollama_model_complete, # Use Ollama model for text generation
llm_model_name='your_model_name', # Your model name
llm_model_kwargs={"options": {"num_ctx": 32768}},
# Use Ollama embedding function
embedding_func=EmbeddingFunc(
embedding_dim=768,
func=lambda texts: ollama_embed(
texts,
embed_model="nomic-embed-text"
)
),
embedding_func=embedding_func, # Pass the decorated function directly
)
```
> **Important Note on Embedding Function Wrapping:**
>
> `EmbeddingFunc` cannot be nested. Functions that have been decorated with `@wrap_embedding_func_with_attrs` (such as `openai_embed`, `ollama_embed`, etc.) cannot be wrapped again using `EmbeddingFunc()`. This is why we call `xxx_embed.func` (the underlying unwrapped function) instead of `xxx_embed` directly when creating custom embedding functions.
* **Low RAM GPUs**
In order to run this experiment on low RAM GPU you should select small model and tune context window (increasing context increase memory consumption). For example, running this ollama example on repurposed mining GPU with 6Gb of RAM required to set context size to 26k while using `gemma2:2b`. It was able to find 197 entities and 19 relations on `book.txt`.
@@ -558,7 +568,7 @@ In order to run this experiment on low RAM GPU you should select small model and
LightRAG supports integration with LlamaIndex (`llm/llama_index_impl.py`):
- Integrates with OpenAI and other providers through LlamaIndex
- See [LlamaIndex Documentation](lightrag/llm/Readme.md) for detailed setup and examples
- See [LlamaIndex Documentation](https://developers.llamaindex.ai/python/framework/) for detailed setup or the [examples](examples/unofficial-sample/)
**Example Usage**
@@ -620,9 +630,10 @@ if __name__ == "__main__":
**For detailed documentation and examples, see:**
- [LlamaIndex Documentation](lightrag/llm/Readme.md)
- [Direct OpenAI Example](examples/lightrag_llamaindex_direct_demo.py)
- [LiteLLM Proxy Example](examples/lightrag_llamaindex_litellm_demo.py)
- [LlamaIndex Documentation](https://developers.llamaindex.ai/python/framework/)
- [Direct OpenAI Example](examples/unofficial-sample/lightrag_llamaindex_direct_demo.py)
- [LiteLLM Proxy Example](examples/unofficial-sample/lightrag_llamaindex_litellm_demo.py)
- [LiteLLM Proxy with Opik Example](examples/unofficial-sample/lightrag_llamaindex_litellm_opik_demo.py)
</details>
@@ -849,7 +860,7 @@ see test_neo4j.py for a working example.
For production level scenarios you will most likely want to leverage an enterprise solution. PostgreSQL can provide a one-stop solution for you as KV store, VectorDB (pgvector) and GraphDB (apache AGE). PostgreSQL version 16.6 or higher is supported.
* PostgreSQL is lightweight,the whole binary distribution including all necessary plugins can be zipped to 40MB: Ref to [Windows Release](https://github.com/ShanGor/apache-age-windows/releases/tag/PG17%2Fv1.5.0-rc0) as it is easy to install for Linux/Mac.
* If you prefer docker, please start with this image if you are a beginner to avoid hiccups (DO read the overview): https://hub.docker.com/r/shangor/postgres-for-rag
* If you prefer docker, please start with this image if you are a beginner to avoid hiccups (Default user password:rag/rag): https://hub.docker.com/r/gzdaniel/postgres-for-rag
* How to start? Ref to: [examples/lightrag_zhipu_postgres_demo.py](https://github.com/HKUDS/LightRAG/blob/main/examples/lightrag_zhipu_postgres_demo.py)
* For high-performance graph database requirements, Neo4j is recommended as Apache AGE's performance is not as competitive.
@@ -1559,7 +1570,7 @@ Langfuse provides a drop-in replacement for the OpenAI client that automatically
pip install lightrag-hku
pip install lightrag-hku[observability]
# Or install from souce code with debug mode enabled
# Or install from source code with debug mode enabled
pip install -e .
pip install -e ".[observability]"
```
@@ -1595,7 +1606,7 @@ Once installed and configured, Langfuse automatically traces all OpenAI LLM call
## RAGAS-based Evaluation
**RAGAS** (Retrieval Augmented Generation Assessment) is a framework for reference-free evaluation of RAG systems using LLMs. There is an evaluation script based on RAGAS. For detailed information, please refer to [RAGAS-based Evaluation Framework](lightrag/evaluation/README.md).
**RAGAS** (Retrieval Augmented Generation Assessment) is a framework for reference-free evaluation of RAG systems using LLMs. There is an evaluation script based on RAGAS. For detailed information, please refer to [RAGAS-based Evaluation Framework](lightrag/evaluation/README_EVALUASTION_RAGAS.md).
## Evaluation

View File

@@ -102,6 +102,9 @@ RERANK_BINDING=null
# RERANK_MODEL=rerank-v3.5
# RERANK_BINDING_HOST=https://api.cohere.com/v2/rerank
# RERANK_BINDING_API_KEY=your_rerank_api_key_here
### Cohere rerank chunking configuration (useful for models with token limits like ColBERT)
# RERANK_ENABLE_CHUNKING=true
# RERANK_MAX_TOKENS_PER_DOC=480
### Default value for Jina AI
# RERANK_MODEL=jina-reranker-v2-base-multilingual
@@ -183,9 +186,13 @@ LLM_MODEL=gpt-4o
LLM_BINDING_HOST=https://api.openai.com/v1
LLM_BINDING_API_KEY=your_api_key
### Env vars for Azure openai
### Azure OpenAI example
### Use deployment name as model name or set AZURE_OPENAI_DEPLOYMENT instead
# AZURE_OPENAI_API_VERSION=2024-08-01-preview
# AZURE_OPENAI_DEPLOYMENT=gpt-4o
# LLM_BINDING=azure_openai
# LLM_BINDING_HOST=https://xxxx.openai.azure.com/
# LLM_BINDING_API_KEY=your_api_key
# LLM_MODEL=my-gpt-mini-deployment
### Openrouter example
# LLM_MODEL=google/gemini-2.5-flash
@@ -273,11 +280,14 @@ EMBEDDING_TOKEN_LIMIT=8192
EMBEDDING_BINDING_HOST=https://api.openai.com/v1
EMBEDDING_BINDING_API_KEY=your_api_key
### Optional for Azure
# AZURE_EMBEDDING_DEPLOYMENT=text-embedding-3-large
# AZURE_EMBEDDING_API_VERSION=2023-05-15
# AZURE_EMBEDDING_ENDPOINT=your_endpoint
# AZURE_EMBEDDING_API_KEY=your_api_key
### Optional for Azure embedding
### Use deployment name as model name or set AZURE_EMBEDDING_DEPLOYMENT instead
# AZURE_EMBEDDING_API_VERSION=2024-08-01-preview
# EMBEDDING_BINDING=azure_openai
# EMBEDDING_BINDING_HOST=https://xxxx.openai.azure.com/
# EMBEDDING_API_KEY=your_api_key
# EMBEDDING_MODEL==my-text-embedding-3-large-deployment
# EMBEDDING_DIM=3072
### Gemini embedding
# EMBEDDING_BINDING=gemini
@@ -440,6 +450,17 @@ MEMGRAPH_DATABASE=memgraph
### DB specific workspace should not be set, keep for compatible only
### MEMGRAPH_WORKSPACE=forced_workspace_name
###########################################################
### Langfuse Observability Configuration
### Only works with LLM provided by OpenAI compatible API
### Install with: pip install lightrag-hku[observability]
### Sign up at: https://cloud.langfuse.com or self-host
###########################################################
# LANGFUSE_SECRET_KEY=""
# LANGFUSE_PUBLIC_KEY=""
# LANGFUSE_HOST="https://cloud.langfuse.com" # 或您的自托管实例地址
# LANGFUSE_ENABLE_TRACE=true
############################
### Evaluation Configuration
############################

View File

@@ -1,195 +0,0 @@
################################################################################
# Autogenerated .env entries list for LightRAG binding options
#
# To generate run:
# $ python -m lightrag.llm.binding_options
################################################################################
# ollama_embedding -- Context window size (number of tokens)
# OLLAMA_EMBEDDING_NUM_CTX=4096
# ollama_embedding -- Maximum number of tokens to predict
# OLLAMA_EMBEDDING_NUM_PREDICT=128
# ollama_embedding -- Number of tokens to keep from the initial prompt
# OLLAMA_EMBEDDING_NUM_KEEP=0
# ollama_embedding -- Random seed for generation (-1 for random)
# OLLAMA_EMBEDDING_SEED=-1
# ollama_embedding -- Controls randomness (0.0-2.0, higher = more creative)
# OLLAMA_EMBEDDING_TEMPERATURE=0.8
# ollama_embedding -- Top-k sampling parameter (0 = disabled)
# OLLAMA_EMBEDDING_TOP_K=40
# ollama_embedding -- Top-p (nucleus) sampling parameter (0.0-1.0)
# OLLAMA_EMBEDDING_TOP_P=0.9
# ollama_embedding -- Tail free sampling parameter (1.0 = disabled)
# OLLAMA_EMBEDDING_TFS_Z=1.0
# ollama_embedding -- Typical probability mass (1.0 = disabled)
# OLLAMA_EMBEDDING_TYPICAL_P=1.0
# ollama_embedding -- Minimum probability threshold (0.0 = disabled)
# OLLAMA_EMBEDDING_MIN_P=0.0
# ollama_embedding -- Number of tokens to consider for repetition penalty
# OLLAMA_EMBEDDING_REPEAT_LAST_N=64
# ollama_embedding -- Penalty for repetition (1.0 = no penalty)
# OLLAMA_EMBEDDING_REPEAT_PENALTY=1.1
# ollama_embedding -- Penalty for token presence (-2.0 to 2.0)
# OLLAMA_EMBEDDING_PRESENCE_PENALTY=0.0
# ollama_embedding -- Penalty for token frequency (-2.0 to 2.0)
# OLLAMA_EMBEDDING_FREQUENCY_PENALTY=0.0
# ollama_embedding -- Mirostat sampling algorithm (0=disabled, 1=Mirostat 1.0, 2=Mirostat 2.0)
# OLLAMA_EMBEDDING_MIROSTAT=0
# ollama_embedding -- Mirostat target entropy
# OLLAMA_EMBEDDING_MIROSTAT_TAU=5.0
# ollama_embedding -- Mirostat learning rate
# OLLAMA_EMBEDDING_MIROSTAT_ETA=0.1
# ollama_embedding -- Enable NUMA optimization
# OLLAMA_EMBEDDING_NUMA=False
# ollama_embedding -- Batch size for processing
# OLLAMA_EMBEDDING_NUM_BATCH=512
# ollama_embedding -- Number of GPUs to use (-1 for auto)
# OLLAMA_EMBEDDING_NUM_GPU=-1
# ollama_embedding -- Main GPU index
# OLLAMA_EMBEDDING_MAIN_GPU=0
# ollama_embedding -- Optimize for low VRAM
# OLLAMA_EMBEDDING_LOW_VRAM=False
# ollama_embedding -- Number of CPU threads (0 for auto)
# OLLAMA_EMBEDDING_NUM_THREAD=0
# ollama_embedding -- Use half-precision for key/value cache
# OLLAMA_EMBEDDING_F16_KV=True
# ollama_embedding -- Return logits for all tokens
# OLLAMA_EMBEDDING_LOGITS_ALL=False
# ollama_embedding -- Only load vocabulary
# OLLAMA_EMBEDDING_VOCAB_ONLY=False
# ollama_embedding -- Use memory mapping for model files
# OLLAMA_EMBEDDING_USE_MMAP=True
# ollama_embedding -- Lock model in memory
# OLLAMA_EMBEDDING_USE_MLOCK=False
# ollama_embedding -- Only use for embeddings
# OLLAMA_EMBEDDING_EMBEDDING_ONLY=False
# ollama_embedding -- Penalize newline tokens
# OLLAMA_EMBEDDING_PENALIZE_NEWLINE=True
# ollama_embedding -- Stop sequences (comma-separated string)
# OLLAMA_EMBEDDING_STOP=
# ollama_llm -- Context window size (number of tokens)
# OLLAMA_LLM_NUM_CTX=4096
# ollama_llm -- Maximum number of tokens to predict
# OLLAMA_LLM_NUM_PREDICT=128
# ollama_llm -- Number of tokens to keep from the initial prompt
# OLLAMA_LLM_NUM_KEEP=0
# ollama_llm -- Random seed for generation (-1 for random)
# OLLAMA_LLM_SEED=-1
# ollama_llm -- Controls randomness (0.0-2.0, higher = more creative)
# OLLAMA_LLM_TEMPERATURE=0.8
# ollama_llm -- Top-k sampling parameter (0 = disabled)
# OLLAMA_LLM_TOP_K=40
# ollama_llm -- Top-p (nucleus) sampling parameter (0.0-1.0)
# OLLAMA_LLM_TOP_P=0.9
# ollama_llm -- Tail free sampling parameter (1.0 = disabled)
# OLLAMA_LLM_TFS_Z=1.0
# ollama_llm -- Typical probability mass (1.0 = disabled)
# OLLAMA_LLM_TYPICAL_P=1.0
# ollama_llm -- Minimum probability threshold (0.0 = disabled)
# OLLAMA_LLM_MIN_P=0.0
# ollama_llm -- Number of tokens to consider for repetition penalty
# OLLAMA_LLM_REPEAT_LAST_N=64
# ollama_llm -- Penalty for repetition (1.0 = no penalty)
# OLLAMA_LLM_REPEAT_PENALTY=1.1
# ollama_llm -- Penalty for token presence (-2.0 to 2.0)
# OLLAMA_LLM_PRESENCE_PENALTY=0.0
# ollama_llm -- Penalty for token frequency (-2.0 to 2.0)
# OLLAMA_LLM_FREQUENCY_PENALTY=0.0
# ollama_llm -- Mirostat sampling algorithm (0=disabled, 1=Mirostat 1.0, 2=Mirostat 2.0)
# OLLAMA_LLM_MIROSTAT=0
# ollama_llm -- Mirostat target entropy
# OLLAMA_LLM_MIROSTAT_TAU=5.0
# ollama_llm -- Mirostat learning rate
# OLLAMA_LLM_MIROSTAT_ETA=0.1
# ollama_llm -- Enable NUMA optimization
# OLLAMA_LLM_NUMA=False
# ollama_llm -- Batch size for processing
# OLLAMA_LLM_NUM_BATCH=512
# ollama_llm -- Number of GPUs to use (-1 for auto)
# OLLAMA_LLM_NUM_GPU=-1
# ollama_llm -- Main GPU index
# OLLAMA_LLM_MAIN_GPU=0
# ollama_llm -- Optimize for low VRAM
# OLLAMA_LLM_LOW_VRAM=False
# ollama_llm -- Number of CPU threads (0 for auto)
# OLLAMA_LLM_NUM_THREAD=0
# ollama_llm -- Use half-precision for key/value cache
# OLLAMA_LLM_F16_KV=True
# ollama_llm -- Return logits for all tokens
# OLLAMA_LLM_LOGITS_ALL=False
# ollama_llm -- Only load vocabulary
# OLLAMA_LLM_VOCAB_ONLY=False
# ollama_llm -- Use memory mapping for model files
# OLLAMA_LLM_USE_MMAP=True
# ollama_llm -- Lock model in memory
# OLLAMA_LLM_USE_MLOCK=False
# ollama_llm -- Only use for embeddings
# OLLAMA_LLM_EMBEDDING_ONLY=False
# ollama_llm -- Penalize newline tokens
# OLLAMA_LLM_PENALIZE_NEWLINE=True
# ollama_llm -- Stop sequences (comma-separated string)
# OLLAMA_LLM_STOP=
#
# End of .env entries for LightRAG binding options
################################################################################

View File

@@ -15,9 +15,12 @@ Configuration Required:
EMBEDDING_BINDING_HOST
EMBEDDING_BINDING_API_KEY
3. Set your vLLM deployed AI rerank model setting with env vars:
RERANK_MODEL
RERANK_BINDING_HOST
RERANK_BINDING=cohere
RERANK_MODEL (e.g., answerai-colbert-small-v1 or rerank-v3.5)
RERANK_BINDING_HOST (e.g., https://api.cohere.com/v2/rerank or LiteLLM proxy)
RERANK_BINDING_API_KEY
RERANK_ENABLE_CHUNKING=true (optional, for models with token limits)
RERANK_MAX_TOKENS_PER_DOC=480 (optional, default 4096)
Note: Rerank is controlled per query via the 'enable_rerank' parameter (default: True)
"""
@@ -66,9 +69,11 @@ async def embedding_func(texts: list[str]) -> np.ndarray:
rerank_model_func = partial(
cohere_rerank,
model=os.getenv("RERANK_MODEL"),
model=os.getenv("RERANK_MODEL", "rerank-v3.5"),
api_key=os.getenv("RERANK_BINDING_API_KEY"),
base_url=os.getenv("RERANK_BINDING_HOST"),
base_url=os.getenv("RERANK_BINDING_HOST", "https://api.cohere.com/v2/rerank"),
enable_chunking=os.getenv("RERANK_ENABLE_CHUNKING", "false").lower() == "true",
max_tokens_per_doc=int(os.getenv("RERANK_MAX_TOKENS_PER_DOC", "4096")),
)

View File

@@ -1 +1 @@
__api_version__ = "0256"
__api_version__ = "0260"

View File

@@ -365,8 +365,12 @@ def parse_args() -> argparse.Namespace:
# Inject model configuration
args.llm_model = get_env_value("LLM_MODEL", "mistral-nemo:latest")
args.embedding_model = get_env_value("EMBEDDING_MODEL", "bge-m3:latest")
args.embedding_dim = get_env_value("EMBEDDING_DIM", 1024, int)
# EMBEDDING_MODEL defaults to None - each binding will use its own default model
# e.g., OpenAI uses "text-embedding-3-small", Jina uses "jina-embeddings-v4"
args.embedding_model = get_env_value("EMBEDDING_MODEL", None, special_none=True)
# EMBEDDING_DIM defaults to None - each binding will use its own default dimension
# Value is inherited from provider defaults via wrap_embedding_func_with_attrs decorator
args.embedding_dim = get_env_value("EMBEDDING_DIM", None, int, special_none=True)
args.embedding_send_dim = get_env_value("EMBEDDING_SEND_DIM", False, bool)
# Inject chunk configuration

View File

@@ -159,19 +159,22 @@ def check_frontend_build():
"""Check if frontend is built and optionally check if source is up-to-date
Returns:
bool: True if frontend is outdated, False if up-to-date or production environment
tuple: (assets_exist: bool, is_outdated: bool)
- assets_exist: True if WebUI build files exist
- is_outdated: True if source is newer than build (only in dev environment)
"""
webui_dir = Path(__file__).parent / "webui"
index_html = webui_dir / "index.html"
# 1. Check if build files exist (required)
# 1. Check if build files exist
if not index_html.exists():
ASCIIColors.red("\n" + "=" * 80)
ASCIIColors.red("ERROR: Frontend Not Built")
ASCIIColors.red("=" * 80)
ASCIIColors.yellow("\n" + "=" * 80)
ASCIIColors.yellow("WARNING: Frontend Not Built")
ASCIIColors.yellow("=" * 80)
ASCIIColors.yellow("The WebUI frontend has not been built yet.")
ASCIIColors.yellow("The API server will start without the WebUI interface.")
ASCIIColors.yellow(
"Please build the frontend code first using the following commands:\n"
"\nTo enable WebUI, build the frontend using these commands:\n"
)
ASCIIColors.cyan(" cd lightrag_webui")
ASCIIColors.cyan(" bun install --frozen-lockfile")
@@ -181,8 +184,8 @@ def check_frontend_build():
ASCIIColors.cyan(
"Note: Make sure you have Bun installed. Visit https://bun.sh for installation."
)
ASCIIColors.red("=" * 80 + "\n")
sys.exit(1) # Exit immediately
ASCIIColors.yellow("=" * 80 + "\n")
return (False, False) # Assets don't exist, not outdated
# 2. Check if this is a development environment (source directory exists)
try:
@@ -195,7 +198,7 @@ def check_frontend_build():
logger.debug(
"Production environment detected, skipping source freshness check"
)
return False
return (True, False) # Assets exist, not outdated (prod environment)
# Development environment, perform source code timestamp check
logger.debug("Development environment detected, checking source freshness")
@@ -270,20 +273,20 @@ def check_frontend_build():
ASCIIColors.cyan(" cd ..")
ASCIIColors.yellow("\nThe server will continue with the current build.")
ASCIIColors.yellow("=" * 80 + "\n")
return True # Frontend is outdated
return (True, True) # Assets exist, outdated
else:
logger.info("Frontend build is up-to-date")
return False # Frontend is up-to-date
return (True, False) # Assets exist, up-to-date
except Exception as e:
# If check fails, log warning but don't affect startup
logger.warning(f"Failed to check frontend source freshness: {e}")
return False # Assume up-to-date on error
return (True, False) # Assume assets exist and up-to-date on error
def create_app(args):
# Check frontend build first and get outdated status
is_frontend_outdated = check_frontend_build()
# Check frontend build first and get status
webui_assets_exist, is_frontend_outdated = check_frontend_build()
# Create unified API version display with warning symbol if frontend is outdated
api_version_display = (
@@ -651,6 +654,17 @@ def create_app(args):
2. Extracts max_token_size and embedding_dim from provider if it's an EmbeddingFunc
3. Creates an optimized wrapper that calls the underlying function directly (avoiding double-wrapping)
4. Returns a properly configured EmbeddingFunc instance
Configuration Rules:
- When EMBEDDING_MODEL is not set: Uses provider's default model and dimension
(e.g., jina-embeddings-v4 with 2048 dims, text-embedding-3-small with 1536 dims)
- When EMBEDDING_MODEL is set to a custom model: User MUST also set EMBEDDING_DIM
to match the custom model's dimension (e.g., for jina-embeddings-v3, set EMBEDDING_DIM=1024)
Note: The embedding_dim parameter is automatically injected by EmbeddingFunc wrapper
when send_dimensions=True (enabled for Jina and Gemini bindings). This wrapper calls
the underlying provider function directly (.func) to avoid double-wrapping, so we must
explicitly pass embedding_dim to the provider's underlying function.
"""
# Step 1: Import provider function and extract default attributes
@@ -710,6 +724,7 @@ def create_app(args):
)
# Step 3: Create optimized embedding function (calls underlying function directly)
# Note: When model is None, each binding will use its own default model
async def optimized_embedding_function(texts, embedding_dim=None):
try:
if binding == "lollms":
@@ -721,9 +736,9 @@ def create_app(args):
if isinstance(lollms_embed, EmbeddingFunc)
else lollms_embed
)
return await actual_func(
texts, embed_model=model, host=host, api_key=api_key
)
# lollms embed_model is not used (server uses configured vectorizer)
# Only pass base_url and api_key
return await actual_func(texts, base_url=host, api_key=api_key)
elif binding == "ollama":
from lightrag.llm.ollama import ollama_embed
@@ -742,13 +757,16 @@ def create_app(args):
ollama_options = OllamaEmbeddingOptions.options_dict(args)
return await actual_func(
texts,
embed_model=model,
host=host,
api_key=api_key,
options=ollama_options,
)
# Pass embed_model only if provided, let function use its default (bge-m3:latest)
kwargs = {
"texts": texts,
"host": host,
"api_key": api_key,
"options": ollama_options,
}
if model:
kwargs["embed_model"] = model
return await actual_func(**kwargs)
elif binding == "azure_openai":
from lightrag.llm.azure_openai import azure_openai_embed
@@ -757,7 +775,11 @@ def create_app(args):
if isinstance(azure_openai_embed, EmbeddingFunc)
else azure_openai_embed
)
return await actual_func(texts, model=model, api_key=api_key)
# Pass model only if provided, let function use its default otherwise
kwargs = {"texts": texts, "api_key": api_key}
if model:
kwargs["model"] = model
return await actual_func(**kwargs)
elif binding == "aws_bedrock":
from lightrag.llm.bedrock import bedrock_embed
@@ -766,7 +788,11 @@ def create_app(args):
if isinstance(bedrock_embed, EmbeddingFunc)
else bedrock_embed
)
return await actual_func(texts, model=model)
# Pass model only if provided, let function use its default otherwise
kwargs = {"texts": texts}
if model:
kwargs["model"] = model
return await actual_func(**kwargs)
elif binding == "jina":
from lightrag.llm.jina import jina_embed
@@ -775,12 +801,16 @@ def create_app(args):
if isinstance(jina_embed, EmbeddingFunc)
else jina_embed
)
return await actual_func(
texts,
embedding_dim=embedding_dim,
base_url=host,
api_key=api_key,
)
# Pass model only if provided, let function use its default (jina-embeddings-v4)
kwargs = {
"texts": texts,
"embedding_dim": embedding_dim,
"base_url": host,
"api_key": api_key,
}
if model:
kwargs["model"] = model
return await actual_func(**kwargs)
elif binding == "gemini":
from lightrag.llm.gemini import gemini_embed
@@ -798,14 +828,19 @@ def create_app(args):
gemini_options = GeminiEmbeddingOptions.options_dict(args)
return await actual_func(
texts,
model=model,
base_url=host,
api_key=api_key,
embedding_dim=embedding_dim,
task_type=gemini_options.get("task_type", "RETRIEVAL_DOCUMENT"),
)
# Pass model only if provided, let function use its default (gemini-embedding-001)
kwargs = {
"texts": texts,
"base_url": host,
"api_key": api_key,
"embedding_dim": embedding_dim,
"task_type": gemini_options.get(
"task_type", "RETRIEVAL_DOCUMENT"
),
}
if model:
kwargs["model"] = model
return await actual_func(**kwargs)
else: # openai and compatible
from lightrag.llm.openai import openai_embed
@@ -814,13 +849,16 @@ def create_app(args):
if isinstance(openai_embed, EmbeddingFunc)
else openai_embed
)
return await actual_func(
texts,
model=model,
base_url=host,
api_key=api_key,
embedding_dim=embedding_dim,
)
# Pass model only if provided, let function use its default (text-embedding-3-small)
kwargs = {
"texts": texts,
"base_url": host,
"api_key": api_key,
"embedding_dim": embedding_dim,
}
if model:
kwargs["model"] = model
return await actual_func(**kwargs)
except ImportError as e:
raise Exception(f"Failed to import {binding} embedding: {e}")
@@ -967,15 +1005,27 @@ def create_app(args):
query: str, documents: list, top_n: int = None, extra_body: dict = None
):
"""Server rerank function with configuration from environment variables"""
return await selected_rerank_func(
query=query,
documents=documents,
top_n=top_n,
api_key=args.rerank_binding_api_key,
model=args.rerank_model,
base_url=args.rerank_binding_host,
extra_body=extra_body,
)
# Prepare kwargs for rerank function
kwargs = {
"query": query,
"documents": documents,
"top_n": top_n,
"api_key": args.rerank_binding_api_key,
"model": args.rerank_model,
"base_url": args.rerank_binding_host,
}
# Add Cohere-specific parameters if using cohere binding
if args.rerank_binding == "cohere":
# Enable chunking if configured (useful for models with token limits like ColBERT)
kwargs["enable_chunking"] = (
os.getenv("RERANK_ENABLE_CHUNKING", "false").lower() == "true"
)
kwargs["max_tokens_per_doc"] = int(
os.getenv("RERANK_MAX_TOKENS_PER_DOC", "4096")
)
return await selected_rerank_func(**kwargs, extra_body=extra_body)
rerank_model_func = server_rerank_func
logger.info(
@@ -1067,8 +1117,11 @@ def create_app(args):
@app.get("/")
async def redirect_to_webui():
"""Redirect root path to /webui"""
return RedirectResponse(url="/webui")
"""Redirect root path based on WebUI availability"""
if webui_assets_exist:
return RedirectResponse(url="/webui")
else:
return RedirectResponse(url="/docs")
@app.get("/auth-status")
async def get_auth_status():
@@ -1135,9 +1188,41 @@ def create_app(args):
"webui_description": webui_description,
}
@app.get("/health", dependencies=[Depends(combined_auth)])
@app.get(
"/health",
dependencies=[Depends(combined_auth)],
summary="Get system health and configuration status",
description="Returns comprehensive system status including WebUI availability, configuration, and operational metrics",
response_description="System health status with configuration details",
responses={
200: {
"description": "Successful response with system status",
"content": {
"application/json": {
"example": {
"status": "healthy",
"webui_available": True,
"working_directory": "/path/to/working/dir",
"input_directory": "/path/to/input/dir",
"configuration": {
"llm_binding": "openai",
"llm_model": "gpt-4",
"embedding_binding": "openai",
"embedding_model": "text-embedding-ada-002",
"workspace": "default",
},
"auth_mode": "enabled",
"pipeline_busy": False,
"core_version": "0.0.1",
"api_version": "0.0.1",
}
}
},
}
},
)
async def get_status(request: Request):
"""Get current system status"""
"""Get current system status including WebUI availability"""
try:
workspace = get_workspace_from_request(request)
default_workspace = get_default_workspace()
@@ -1157,6 +1242,7 @@ def create_app(args):
return {
"status": "healthy",
"webui_available": webui_assets_exist,
"working_directory": str(args.working_dir),
"input_directory": str(args.input_dir),
"configuration": {
@@ -1246,16 +1332,27 @@ def create_app(args):
name="swagger-ui-static",
)
# Webui mount webui/index.html
static_dir = Path(__file__).parent / "webui"
static_dir.mkdir(exist_ok=True)
app.mount(
"/webui",
SmartStaticFiles(
directory=static_dir, html=True, check_dir=True
), # Use SmartStaticFiles
name="webui",
)
# Conditionally mount WebUI only if assets exist
if webui_assets_exist:
static_dir = Path(__file__).parent / "webui"
static_dir.mkdir(exist_ok=True)
app.mount(
"/webui",
SmartStaticFiles(
directory=static_dir, html=True, check_dir=True
), # Use SmartStaticFiles
name="webui",
)
logger.info("WebUI assets mounted at /webui")
else:
logger.info("WebUI assets not available, /webui route not mounted")
# Add redirect for /webui when assets are not available
@app.get("/webui")
@app.get("/webui/")
async def webui_redirect_to_docs():
"""Redirect /webui to /docs when WebUI is not available"""
return RedirectResponse(url="/docs")
return app

View File

@@ -24,7 +24,11 @@ from pydantic import BaseModel, Field, field_validator
from lightrag import LightRAG
from lightrag.base import DeletionResult, DocProcessingStatus, DocStatus
from lightrag.utils import generate_track_id
from lightrag.utils import (
generate_track_id,
compute_mdhash_id,
sanitize_text_for_encoding,
)
from lightrag.api.utils_api import get_combined_auth_dependency
from ..config import global_args
@@ -159,7 +163,7 @@ class ReprocessResponse(BaseModel):
Attributes:
status: Status of the reprocessing operation
message: Message describing the operation result
track_id: Tracking ID for monitoring reprocessing progress
track_id: Always empty string. Reprocessed documents retain their original track_id.
"""
status: Literal["reprocessing_started"] = Field(
@@ -167,7 +171,8 @@ class ReprocessResponse(BaseModel):
)
message: str = Field(description="Human-readable message describing the operation")
track_id: str = Field(
description="Tracking ID for monitoring reprocessing progress"
default="",
description="Always empty string. Reprocessed documents retain their original track_id from initial upload.",
)
class Config:
@@ -175,7 +180,7 @@ class ReprocessResponse(BaseModel):
"example": {
"status": "reprocessing_started",
"message": "Reprocessing of failed documents has been initiated in background",
"track_id": "retry_20250729_170612_def456",
"track_id": "",
}
}
@@ -976,19 +981,82 @@ def _extract_pdf_pypdf(file_bytes: bytes, password: str = None) -> str:
def _extract_docx(file_bytes: bytes) -> str:
"""Extract DOCX content (synchronous).
"""Extract DOCX content including tables in document order (synchronous).
Args:
file_bytes: DOCX file content as bytes
Returns:
str: Extracted text content
str: Extracted text content with tables in their original positions.
Tables are separated from paragraphs with blank lines for clarity.
"""
from docx import Document # type: ignore
from docx.table import Table # type: ignore
from docx.text.paragraph import Paragraph # type: ignore
docx_file = BytesIO(file_bytes)
doc = Document(docx_file)
return "\n".join([paragraph.text for paragraph in doc.paragraphs])
def escape_cell(cell_value: str | None) -> str:
"""Escape characters that would break tab-delimited layout.
Escape order is critical: backslashes first, then tabs/newlines.
This prevents double-escaping issues.
Args:
cell_value: The cell value to escape (can be None or str)
Returns:
str: Escaped cell value safe for tab-delimited format
"""
if cell_value is None:
return ""
text = str(cell_value)
# CRITICAL: Escape backslash first to avoid double-escaping
return (
text.replace("\\", "\\\\") # Must be first: \ -> \\
.replace("\t", "\\t") # Tab -> \t (visible)
.replace("\r\n", "\\n") # Windows newline -> \n
.replace("\r", "\\n") # Mac newline -> \n
.replace("\n", "\\n") # Unix newline -> \n
)
content_parts = []
in_table = False # Track if we're currently processing a table
# Iterate through all body elements in document order
for element in doc.element.body:
# Check if element is a paragraph
if element.tag.endswith("p"):
# If coming out of a table, add blank line after table
if in_table:
content_parts.append("") # Blank line after table
in_table = False
paragraph = Paragraph(element, doc)
text = paragraph.text
# Always append to preserve document spacing (including blank paragraphs)
content_parts.append(text)
# Check if element is a table
elif element.tag.endswith("tbl"):
# Add blank line before table (if content exists)
if content_parts and not in_table:
content_parts.append("") # Blank line before table
in_table = True
table = Table(element, doc)
for row in table.rows:
row_text = []
for cell in row.cells:
cell_text = cell.text
# Escape special characters to preserve tab-delimited structure
row_text.append(escape_cell(cell_text))
# Only add row if at least one cell has content
if any(cell for cell in row_text):
content_parts.append("\t".join(row_text))
return "\n".join(content_parts)
def _extract_pptx(file_bytes: bytes) -> str:
@@ -1013,27 +1081,112 @@ def _extract_pptx(file_bytes: bytes) -> str:
def _extract_xlsx(file_bytes: bytes) -> str:
"""Extract XLSX content (synchronous).
"""Extract XLSX content in tab-delimited format with clear sheet separation.
This function processes Excel workbooks and converts them to a structured text format
suitable for LLM prompts and RAG systems. Each sheet is clearly delimited with
separator lines, and special characters are escaped to preserve the tab-delimited structure.
Features:
- Each sheet is wrapped with '====================' separators for visual distinction
- Special characters (tabs, newlines, backslashes) are escaped to prevent structure corruption
- Column alignment is preserved across all rows to maintain tabular structure
- Empty rows are preserved as blank lines to maintain row structure
- Uses sheet.max_column to determine column width efficiently
Args:
file_bytes: XLSX file content as bytes
Returns:
str: Extracted text content
str: Extracted text content with all sheets in tab-delimited format.
Format: Sheet separators, sheet name, then tab-delimited rows.
Example output:
==================== Sheet: Data ====================
Name\tAge\tCity
Alice\t30\tNew York
Bob\t25\tLondon
==================== Sheet: Summary ====================
Total\t2
====================
"""
from openpyxl import load_workbook # type: ignore
xlsx_file = BytesIO(file_bytes)
wb = load_workbook(xlsx_file)
content = ""
for sheet in wb:
content += f"Sheet: {sheet.title}\n"
def escape_cell(cell_value: str | int | float | None) -> str:
"""Escape characters that would break tab-delimited layout.
Escape order is critical: backslashes first, then tabs/newlines.
This prevents double-escaping issues.
Args:
cell_value: The cell value to escape (can be None, str, int, or float)
Returns:
str: Escaped cell value safe for tab-delimited format
"""
if cell_value is None:
return ""
text = str(cell_value)
# CRITICAL: Escape backslash first to avoid double-escaping
return (
text.replace("\\", "\\\\") # Must be first: \ -> \\
.replace("\t", "\\t") # Tab -> \t (visible)
.replace("\r\n", "\\n") # Windows newline -> \n
.replace("\r", "\\n") # Mac newline -> \n
.replace("\n", "\\n") # Unix newline -> \n
)
def escape_sheet_title(title: str) -> str:
"""Escape sheet title to prevent formatting issues in separators.
Args:
title: Original sheet title
Returns:
str: Sanitized sheet title with tabs/newlines replaced
"""
return str(title).replace("\n", " ").replace("\t", " ").replace("\r", " ")
content_parts: list[str] = []
sheet_separator = "=" * 20
for idx, sheet in enumerate(wb):
if idx > 0:
content_parts.append("") # Blank line between sheets for readability
# Escape sheet title to handle edge cases with special characters
safe_title = escape_sheet_title(sheet.title)
content_parts.append(f"{sheet_separator} Sheet: {safe_title} {sheet_separator}")
# Use sheet.max_column to get the maximum column width directly
max_columns = sheet.max_column if sheet.max_column else 0
# Extract rows with consistent width to preserve column alignment
for row in sheet.iter_rows(values_only=True):
content += (
"\t".join(str(cell) if cell is not None else "" for cell in row) + "\n"
)
content += "\n"
return content
row_parts = []
# Build row up to max_columns width
for idx in range(max_columns):
if idx < len(row):
row_parts.append(escape_cell(row[idx]))
else:
row_parts.append("") # Pad short rows
# Check if row is completely empty
if all(part == "" for part in row_parts):
# Preserve empty rows as blank lines (maintains row structure)
content_parts.append("")
else:
# Join all columns to maintain consistent column count
content_parts.append("\t".join(row_parts))
# Final separator for symmetry (makes parsing easier)
content_parts.append(sheet_separator)
return "\n".join(content_parts)
async def pipeline_enqueue_file(
@@ -1949,12 +2102,14 @@ def create_document_routes(
# Check if filename already exists in doc_status storage
existing_doc_data = await rag.doc_status.get_doc_by_file_path(safe_filename)
if existing_doc_data:
# Get document status information for error message
# Get document status and track_id from existing document
status = existing_doc_data.get("status", "unknown")
# Use `or ""` to handle both missing key and None value (e.g., legacy rows without track_id)
existing_track_id = existing_doc_data.get("track_id") or ""
return InsertResponse(
status="duplicated",
message=f"File '{safe_filename}' already exists in document storage (Status: {status}).",
track_id="",
track_id=existing_track_id,
)
file_path = doc_manager.input_dir / safe_filename
@@ -2018,14 +2173,30 @@ def create_document_routes(
request.file_source
)
if existing_doc_data:
# Get document status information for error message
# Get document status and track_id from existing document
status = existing_doc_data.get("status", "unknown")
# Use `or ""` to handle both missing key and None value (e.g., legacy rows without track_id)
existing_track_id = existing_doc_data.get("track_id") or ""
return InsertResponse(
status="duplicated",
message=f"File source '{request.file_source}' already exists in document storage (Status: {status}).",
track_id="",
track_id=existing_track_id,
)
# Check if content already exists by computing content hash (doc_id)
sanitized_text = sanitize_text_for_encoding(request.text)
content_doc_id = compute_mdhash_id(sanitized_text, prefix="doc-")
existing_doc = await rag.doc_status.get_by_id(content_doc_id)
if existing_doc:
# Content already exists, return duplicated with existing track_id
status = existing_doc.get("status", "unknown")
existing_track_id = existing_doc.get("track_id") or ""
return InsertResponse(
status="duplicated",
message=f"Identical content already exists in document storage (doc_id: {content_doc_id}, Status: {status}).",
track_id=existing_track_id,
)
# Generate track_id for text insertion
track_id = generate_track_id("insert")
@@ -2084,14 +2255,31 @@ def create_document_routes(
file_source
)
if existing_doc_data:
# Get document status information for error message
# Get document status and track_id from existing document
status = existing_doc_data.get("status", "unknown")
# Use `or ""` to handle both missing key and None value (e.g., legacy rows without track_id)
existing_track_id = existing_doc_data.get("track_id") or ""
return InsertResponse(
status="duplicated",
message=f"File source '{file_source}' already exists in document storage (Status: {status}).",
track_id="",
track_id=existing_track_id,
)
# Check if any content already exists by computing content hash (doc_id)
for text in request.texts:
sanitized_text = sanitize_text_for_encoding(text)
content_doc_id = compute_mdhash_id(sanitized_text, prefix="doc-")
existing_doc = await rag.doc_status.get_by_id(content_doc_id)
if existing_doc:
# Content already exists, return duplicated with existing track_id
status = existing_doc.get("status", "unknown")
existing_track_id = existing_doc.get("track_id") or ""
return InsertResponse(
status="duplicated",
message=f"Identical content already exists in document storage (doc_id: {content_doc_id}, Status: {status}).",
track_id=existing_track_id,
)
# Generate track_id for texts insertion
track_id = generate_track_id("insert")
@@ -2910,29 +3098,27 @@ def create_document_routes(
This is useful for recovering from server crashes, network errors, LLM service
outages, or other temporary failures that caused document processing to fail.
The processing happens in the background and can be monitored using the
returned track_id or by checking the pipeline status.
The processing happens in the background and can be monitored by checking the
pipeline status. The reprocessed documents retain their original track_id from
initial upload, so use their original track_id to monitor progress.
Returns:
ReprocessResponse: Response with status, message, and track_id
ReprocessResponse: Response with status and message.
track_id is always empty string because reprocessed documents retain
their original track_id from initial upload.
Raises:
HTTPException: If an error occurs while initiating reprocessing (500).
"""
try:
# Generate track_id with "retry" prefix for retry operation
track_id = generate_track_id("retry")
# Start the reprocessing in the background
# Note: Reprocessed documents retain their original track_id from initial upload
background_tasks.add_task(rag.apipeline_process_enqueue_documents)
logger.info(
f"Reprocessing of failed documents initiated with track_id: {track_id}"
)
logger.info("Reprocessing of failed documents initiated")
return ReprocessResponse(
status="reprocessing_started",
message="Reprocessing of failed documents has been initiated in background",
track_id=track_id,
message="Reprocessing of failed documents has been initiated in background. Documents retain their original track_id.",
)
except Exception as e:

View File

@@ -8,7 +8,6 @@ import re
from enum import Enum
from fastapi.responses import StreamingResponse
import asyncio
from ascii_colors import trace_exception
from lightrag import LightRAG, QueryParam
from lightrag.utils import TiktokenTokenizer
from lightrag.api.utils_api import get_combined_auth_dependency
@@ -309,118 +308,113 @@ class OllamaAPI:
)
async def stream_generator():
try:
first_chunk_time = None
first_chunk_time = None
last_chunk_time = time.time_ns()
total_response = ""
# Ensure response is an async generator
if isinstance(response, str):
# If it's a string, send in two parts
first_chunk_time = start_time
last_chunk_time = time.time_ns()
total_response = ""
total_response = response
# Ensure response is an async generator
if isinstance(response, str):
# If it's a string, send in two parts
first_chunk_time = start_time
last_chunk_time = time.time_ns()
total_response = response
data = {
"model": self.ollama_server_infos.LIGHTRAG_MODEL,
"created_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT,
"response": response,
"done": False,
}
yield f"{json.dumps(data, ensure_ascii=False)}\n"
data = {
completion_tokens = estimate_tokens(total_response)
total_time = last_chunk_time - start_time
prompt_eval_time = first_chunk_time - start_time
eval_time = last_chunk_time - first_chunk_time
data = {
"model": self.ollama_server_infos.LIGHTRAG_MODEL,
"created_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT,
"response": "",
"done": True,
"done_reason": "stop",
"context": [],
"total_duration": total_time,
"load_duration": 0,
"prompt_eval_count": prompt_tokens,
"prompt_eval_duration": prompt_eval_time,
"eval_count": completion_tokens,
"eval_duration": eval_time,
}
yield f"{json.dumps(data, ensure_ascii=False)}\n"
else:
try:
async for chunk in response:
if chunk:
if first_chunk_time is None:
first_chunk_time = time.time_ns()
last_chunk_time = time.time_ns()
total_response += chunk
data = {
"model": self.ollama_server_infos.LIGHTRAG_MODEL,
"created_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT,
"response": chunk,
"done": False,
}
yield f"{json.dumps(data, ensure_ascii=False)}\n"
except (asyncio.CancelledError, Exception) as e:
error_msg = str(e)
if isinstance(e, asyncio.CancelledError):
error_msg = "Stream was cancelled by server"
else:
error_msg = f"Provider error: {error_msg}"
logger.error(f"Stream error: {error_msg}")
# Send error message to client
error_data = {
"model": self.ollama_server_infos.LIGHTRAG_MODEL,
"created_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT,
"response": response,
"response": f"\n\nError: {error_msg}",
"error": f"\n\nError: {error_msg}",
"done": False,
}
yield f"{json.dumps(data, ensure_ascii=False)}\n"
yield f"{json.dumps(error_data, ensure_ascii=False)}\n"
completion_tokens = estimate_tokens(total_response)
total_time = last_chunk_time - start_time
prompt_eval_time = first_chunk_time - start_time
eval_time = last_chunk_time - first_chunk_time
data = {
# Send final message to close the stream
final_data = {
"model": self.ollama_server_infos.LIGHTRAG_MODEL,
"created_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT,
"response": "",
"done": True,
"done_reason": "stop",
"context": [],
"total_duration": total_time,
"load_duration": 0,
"prompt_eval_count": prompt_tokens,
"prompt_eval_duration": prompt_eval_time,
"eval_count": completion_tokens,
"eval_duration": eval_time,
}
yield f"{json.dumps(data, ensure_ascii=False)}\n"
else:
try:
async for chunk in response:
if chunk:
if first_chunk_time is None:
first_chunk_time = time.time_ns()
last_chunk_time = time.time_ns()
total_response += chunk
data = {
"model": self.ollama_server_infos.LIGHTRAG_MODEL,
"created_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT,
"response": chunk,
"done": False,
}
yield f"{json.dumps(data, ensure_ascii=False)}\n"
except (asyncio.CancelledError, Exception) as e:
error_msg = str(e)
if isinstance(e, asyncio.CancelledError):
error_msg = "Stream was cancelled by server"
else:
error_msg = f"Provider error: {error_msg}"
logger.error(f"Stream error: {error_msg}")
# Send error message to client
error_data = {
"model": self.ollama_server_infos.LIGHTRAG_MODEL,
"created_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT,
"response": f"\n\nError: {error_msg}",
"error": f"\n\nError: {error_msg}",
"done": False,
}
yield f"{json.dumps(error_data, ensure_ascii=False)}\n"
# Send final message to close the stream
final_data = {
"model": self.ollama_server_infos.LIGHTRAG_MODEL,
"created_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT,
"response": "",
"done": True,
}
yield f"{json.dumps(final_data, ensure_ascii=False)}\n"
return
if first_chunk_time is None:
first_chunk_time = start_time
completion_tokens = estimate_tokens(total_response)
total_time = last_chunk_time - start_time
prompt_eval_time = first_chunk_time - start_time
eval_time = last_chunk_time - first_chunk_time
data = {
"model": self.ollama_server_infos.LIGHTRAG_MODEL,
"created_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT,
"response": "",
"done": True,
"done_reason": "stop",
"context": [],
"total_duration": total_time,
"load_duration": 0,
"prompt_eval_count": prompt_tokens,
"prompt_eval_duration": prompt_eval_time,
"eval_count": completion_tokens,
"eval_duration": eval_time,
}
yield f"{json.dumps(data, ensure_ascii=False)}\n"
yield f"{json.dumps(final_data, ensure_ascii=False)}\n"
return
if first_chunk_time is None:
first_chunk_time = start_time
completion_tokens = estimate_tokens(total_response)
total_time = last_chunk_time - start_time
prompt_eval_time = first_chunk_time - start_time
eval_time = last_chunk_time - first_chunk_time
except Exception as e:
trace_exception(e)
raise
data = {
"model": self.ollama_server_infos.LIGHTRAG_MODEL,
"created_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT,
"response": "",
"done": True,
"done_reason": "stop",
"context": [],
"total_duration": total_time,
"load_duration": 0,
"prompt_eval_count": prompt_tokens,
"prompt_eval_duration": prompt_eval_time,
"eval_count": completion_tokens,
"eval_duration": eval_time,
}
yield f"{json.dumps(data, ensure_ascii=False)}\n"
return
return StreamingResponse(
stream_generator(),
@@ -462,7 +456,7 @@ class OllamaAPI:
"eval_duration": eval_time,
}
except Exception as e:
trace_exception(e)
logger.error(f"Ollama generate error: {str(e)}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
@self.router.post(
@@ -535,36 +529,98 @@ class OllamaAPI:
)
async def stream_generator():
try:
first_chunk_time = None
first_chunk_time = None
last_chunk_time = time.time_ns()
total_response = ""
# Ensure response is an async generator
if isinstance(response, str):
# If it's a string, send in two parts
first_chunk_time = start_time
last_chunk_time = time.time_ns()
total_response = ""
total_response = response
# Ensure response is an async generator
if isinstance(response, str):
# If it's a string, send in two parts
first_chunk_time = start_time
last_chunk_time = time.time_ns()
total_response = response
data = {
"model": self.ollama_server_infos.LIGHTRAG_MODEL,
"created_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT,
"message": {
"role": "assistant",
"content": response,
"images": None,
},
"done": False,
}
yield f"{json.dumps(data, ensure_ascii=False)}\n"
data = {
completion_tokens = estimate_tokens(total_response)
total_time = last_chunk_time - start_time
prompt_eval_time = first_chunk_time - start_time
eval_time = last_chunk_time - first_chunk_time
data = {
"model": self.ollama_server_infos.LIGHTRAG_MODEL,
"created_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT,
"message": {
"role": "assistant",
"content": "",
"images": None,
},
"done_reason": "stop",
"done": True,
"total_duration": total_time,
"load_duration": 0,
"prompt_eval_count": prompt_tokens,
"prompt_eval_duration": prompt_eval_time,
"eval_count": completion_tokens,
"eval_duration": eval_time,
}
yield f"{json.dumps(data, ensure_ascii=False)}\n"
else:
try:
async for chunk in response:
if chunk:
if first_chunk_time is None:
first_chunk_time = time.time_ns()
last_chunk_time = time.time_ns()
total_response += chunk
data = {
"model": self.ollama_server_infos.LIGHTRAG_MODEL,
"created_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT,
"message": {
"role": "assistant",
"content": chunk,
"images": None,
},
"done": False,
}
yield f"{json.dumps(data, ensure_ascii=False)}\n"
except (asyncio.CancelledError, Exception) as e:
error_msg = str(e)
if isinstance(e, asyncio.CancelledError):
error_msg = "Stream was cancelled by server"
else:
error_msg = f"Provider error: {error_msg}"
logger.error(f"Stream error: {error_msg}")
# Send error message to client
error_data = {
"model": self.ollama_server_infos.LIGHTRAG_MODEL,
"created_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT,
"message": {
"role": "assistant",
"content": response,
"content": f"\n\nError: {error_msg}",
"images": None,
},
"error": f"\n\nError: {error_msg}",
"done": False,
}
yield f"{json.dumps(data, ensure_ascii=False)}\n"
yield f"{json.dumps(error_data, ensure_ascii=False)}\n"
completion_tokens = estimate_tokens(total_response)
total_time = last_chunk_time - start_time
prompt_eval_time = first_chunk_time - start_time
eval_time = last_chunk_time - first_chunk_time
data = {
# Send final message to close the stream
final_data = {
"model": self.ollama_server_infos.LIGHTRAG_MODEL,
"created_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT,
"message": {
@@ -572,103 +628,36 @@ class OllamaAPI:
"content": "",
"images": None,
},
"done_reason": "stop",
"done": True,
"total_duration": total_time,
"load_duration": 0,
"prompt_eval_count": prompt_tokens,
"prompt_eval_duration": prompt_eval_time,
"eval_count": completion_tokens,
"eval_duration": eval_time,
}
yield f"{json.dumps(data, ensure_ascii=False)}\n"
else:
try:
async for chunk in response:
if chunk:
if first_chunk_time is None:
first_chunk_time = time.time_ns()
yield f"{json.dumps(final_data, ensure_ascii=False)}\n"
return
last_chunk_time = time.time_ns()
if first_chunk_time is None:
first_chunk_time = start_time
completion_tokens = estimate_tokens(total_response)
total_time = last_chunk_time - start_time
prompt_eval_time = first_chunk_time - start_time
eval_time = last_chunk_time - first_chunk_time
total_response += chunk
data = {
"model": self.ollama_server_infos.LIGHTRAG_MODEL,
"created_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT,
"message": {
"role": "assistant",
"content": chunk,
"images": None,
},
"done": False,
}
yield f"{json.dumps(data, ensure_ascii=False)}\n"
except (asyncio.CancelledError, Exception) as e:
error_msg = str(e)
if isinstance(e, asyncio.CancelledError):
error_msg = "Stream was cancelled by server"
else:
error_msg = f"Provider error: {error_msg}"
logger.error(f"Stream error: {error_msg}")
# Send error message to client
error_data = {
"model": self.ollama_server_infos.LIGHTRAG_MODEL,
"created_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT,
"message": {
"role": "assistant",
"content": f"\n\nError: {error_msg}",
"images": None,
},
"error": f"\n\nError: {error_msg}",
"done": False,
}
yield f"{json.dumps(error_data, ensure_ascii=False)}\n"
# Send final message to close the stream
final_data = {
"model": self.ollama_server_infos.LIGHTRAG_MODEL,
"created_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT,
"message": {
"role": "assistant",
"content": "",
"images": None,
},
"done": True,
}
yield f"{json.dumps(final_data, ensure_ascii=False)}\n"
return
if first_chunk_time is None:
first_chunk_time = start_time
completion_tokens = estimate_tokens(total_response)
total_time = last_chunk_time - start_time
prompt_eval_time = first_chunk_time - start_time
eval_time = last_chunk_time - first_chunk_time
data = {
"model": self.ollama_server_infos.LIGHTRAG_MODEL,
"created_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT,
"message": {
"role": "assistant",
"content": "",
"images": None,
},
"done_reason": "stop",
"done": True,
"total_duration": total_time,
"load_duration": 0,
"prompt_eval_count": prompt_tokens,
"prompt_eval_duration": prompt_eval_time,
"eval_count": completion_tokens,
"eval_duration": eval_time,
}
yield f"{json.dumps(data, ensure_ascii=False)}\n"
except Exception as e:
trace_exception(e)
raise
data = {
"model": self.ollama_server_infos.LIGHTRAG_MODEL,
"created_at": self.ollama_server_infos.LIGHTRAG_CREATED_AT,
"message": {
"role": "assistant",
"content": "",
"images": None,
},
"done_reason": "stop",
"done": True,
"total_duration": total_time,
"load_duration": 0,
"prompt_eval_count": prompt_tokens,
"prompt_eval_duration": prompt_eval_time,
"eval_count": completion_tokens,
"eval_duration": eval_time,
}
yield f"{json.dumps(data, ensure_ascii=False)}\n"
return StreamingResponse(
stream_generator(),
@@ -730,5 +719,5 @@ class OllamaAPI:
"eval_duration": eval_time,
}
except Exception as e:
trace_exception(e)
logger.error(f"Ollama chat error: {str(e)}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))

View File

@@ -3,16 +3,13 @@ This module contains all query-related routes for the LightRAG API.
"""
import json
import logging
from typing import Any, Dict, List, Literal, Optional
from fastapi import APIRouter, Depends, HTTPException
from lightrag.base import QueryParam
from lightrag.api.utils_api import get_combined_auth_dependency
from lightrag.utils import logger
from pydantic import BaseModel, Field, field_validator
from ascii_colors import trace_exception
router = APIRouter(tags=["query"])
@@ -453,7 +450,7 @@ def create_query_routes(rag, api_key: Optional[str] = None, top_k: int = 60):
else:
return QueryResponse(response=response_content, references=None)
except Exception as e:
trace_exception(e)
logger.error(f"Error processing query: {str(e)}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
@router.post(
@@ -713,7 +710,7 @@ def create_query_routes(rag, api_key: Optional[str] = None, top_k: int = 60):
if chunk: # Only send non-empty content
yield f"{json.dumps({'response': chunk})}\n"
except Exception as e:
logging.error(f"Streaming error: {str(e)}")
logger.error(f"Streaming error: {str(e)}")
yield f"{json.dumps({'error': str(e)})}\n"
else:
# Non-streaming mode: send complete response in one message
@@ -739,7 +736,7 @@ def create_query_routes(rag, api_key: Optional[str] = None, top_k: int = 60):
},
)
except Exception as e:
trace_exception(e)
logger.error(f"Error processing streaming query: {str(e)}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
@router.post(
@@ -1156,7 +1153,7 @@ def create_query_routes(rag, api_key: Optional[str] = None, top_k: int = 60):
data={},
)
except Exception as e:
trace_exception(e)
logger.error(f"Error processing data query: {str(e)}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
return router

View File

@@ -100,7 +100,7 @@ def main():
print("\nHow to fix:")
print(" Option 1 - Set environment variable before starting (recommended):")
print(" export OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES")
print(" lightrag-server")
print(" lightrag-gunicorn --workers 2")
print("\n Option 2 - Add to your shell profile (~/.zshrc or ~/.bash_profile):")
print(" echo 'export OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES' >> ~/.zshrc")
print(" source ~/.zshrc")

View File

@@ -106,6 +106,28 @@ class PipelineCancelledException(Exception):
self.message = message
class ChunkTokenLimitExceededError(ValueError):
"""Raised when a chunk exceeds the configured token limit."""
def __init__(
self,
chunk_tokens: int,
chunk_token_limit: int,
chunk_preview: str | None = None,
) -> None:
preview = chunk_preview.strip() if chunk_preview else None
truncated_preview = preview[:80] if preview else None
preview_note = f" Preview: '{truncated_preview}'" if truncated_preview else ""
message = (
f"Chunk token length {chunk_tokens} exceeds chunk_token_size {chunk_token_limit}."
f"{preview_note}"
)
super().__init__(message)
self.chunk_tokens = chunk_tokens
self.chunk_token_limit = chunk_token_limit
self.chunk_preview = truncated_preview
class QdrantMigrationError(Exception):
"""Raised when Qdrant data migration from legacy collections fails."""

View File

@@ -44,6 +44,23 @@ config.read("config.ini", "utf-8")
logging.getLogger("neo4j").setLevel(logging.ERROR)
READ_RETRY_EXCEPTIONS = (
neo4jExceptions.ServiceUnavailable,
neo4jExceptions.TransientError,
neo4jExceptions.SessionExpired,
ConnectionResetError,
OSError,
AttributeError,
)
READ_RETRY = retry(
stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=4, max=10),
retry=retry_if_exception_type(READ_RETRY_EXCEPTIONS),
reraise=True,
)
@final
@dataclass
class Neo4JStorage(BaseGraphStorage):
@@ -352,6 +369,7 @@ class Neo4JStorage(BaseGraphStorage):
# Neo4J handles persistence automatically
pass
@READ_RETRY
async def has_node(self, node_id: str) -> bool:
"""
Check if a node with the given label exists in the database
@@ -385,6 +403,7 @@ class Neo4JStorage(BaseGraphStorage):
await result.consume() # Ensure results are consumed even on error
raise
@READ_RETRY
async def has_edge(self, source_node_id: str, target_node_id: str) -> bool:
"""
Check if an edge exists between two nodes
@@ -426,6 +445,7 @@ class Neo4JStorage(BaseGraphStorage):
await result.consume() # Ensure results are consumed even on error
raise
@READ_RETRY
async def get_node(self, node_id: str) -> dict[str, str] | None:
"""Get node by its label identifier, return only node properties
@@ -479,6 +499,7 @@ class Neo4JStorage(BaseGraphStorage):
)
raise
@READ_RETRY
async def get_nodes_batch(self, node_ids: list[str]) -> dict[str, dict]:
"""
Retrieve multiple nodes in one query using UNWIND.
@@ -515,6 +536,7 @@ class Neo4JStorage(BaseGraphStorage):
await result.consume() # Make sure to consume the result fully
return nodes
@READ_RETRY
async def node_degree(self, node_id: str) -> int:
"""Get the degree (number of relationships) of a node with the given label.
If multiple nodes have the same label, returns the degree of the first node.
@@ -563,6 +585,7 @@ class Neo4JStorage(BaseGraphStorage):
)
raise
@READ_RETRY
async def node_degrees_batch(self, node_ids: list[str]) -> dict[str, int]:
"""
Retrieve the degree for multiple nodes in a single query using UNWIND.
@@ -621,6 +644,7 @@ class Neo4JStorage(BaseGraphStorage):
degrees = int(src_degree) + int(trg_degree)
return degrees
@READ_RETRY
async def edge_degrees_batch(
self, edge_pairs: list[tuple[str, str]]
) -> dict[tuple[str, str], int]:
@@ -647,6 +671,7 @@ class Neo4JStorage(BaseGraphStorage):
edge_degrees[(src, tgt)] = degrees.get(src, 0) + degrees.get(tgt, 0)
return edge_degrees
@READ_RETRY
async def get_edge(
self, source_node_id: str, target_node_id: str
) -> dict[str, str] | None:
@@ -734,6 +759,7 @@ class Neo4JStorage(BaseGraphStorage):
)
raise
@READ_RETRY
async def get_edges_batch(
self, pairs: list[dict[str, str]]
) -> dict[tuple[str, str], dict]:
@@ -784,6 +810,7 @@ class Neo4JStorage(BaseGraphStorage):
await result.consume()
return edges_dict
@READ_RETRY
async def get_node_edges(self, source_node_id: str) -> list[tuple[str, str]] | None:
"""Retrieves all edges (relationships) for a particular node identified by its label.
@@ -851,6 +878,7 @@ class Neo4JStorage(BaseGraphStorage):
)
raise
@READ_RETRY
async def get_nodes_edges_batch(
self, node_ids: list[str]
) -> dict[str, list[tuple[str, str]]]:

View File

@@ -383,7 +383,7 @@ class PostgreSQLDB:
async def configure_age_extension(connection: asyncpg.Connection) -> None:
"""Create AGE extension if it doesn't exist for graph operations."""
try:
await connection.execute("CREATE EXTENSION IF NOT EXISTS age") # type: ignore
await connection.execute("CREATE EXTENSION IF NOT EXISTS AGE CASCADE") # type: ignore
logger.info("PostgreSQL, AGE extension enabled")
except Exception as e:
logger.warning(f"Could not create AGE extension: {e}")

View File

@@ -1700,3 +1700,17 @@ def get_default_workspace() -> str:
"""
global _default_workspace
return _default_workspace
def get_pipeline_status_lock(
enable_logging: bool = False, workspace: str = None
) -> NamespaceLock:
"""Return unified storage lock for pipeline status data consistency.
This function is for compatibility with legacy code only.
"""
global _default_workspace
actual_workspace = workspace if workspace else _default_workspace
return get_namespace_lock(
"pipeline_status", workspace=actual_workspace, enable_logging=enable_logging
)

View File

@@ -1,177 +1,22 @@
from collections.abc import Iterable
import os
import pipmaster as pm # Pipmaster for dynamic library install
"""
Azure OpenAI compatibility layer.
# install specific modules
if not pm.is_installed("openai"):
pm.install("openai")
This module provides backward compatibility by re-exporting Azure OpenAI functions
from the main openai module where the actual implementation resides.
from openai import (
AsyncAzureOpenAI,
APIConnectionError,
RateLimitError,
APITimeoutError,
)
from openai.types.chat import ChatCompletionMessageParam
All core logic for both OpenAI and Azure OpenAI now lives in lightrag.llm.openai,
with this module serving as a thin compatibility wrapper for existing code that
imports from lightrag.llm.azure_openai.
"""
from tenacity import (
retry,
stop_after_attempt,
wait_exponential,
retry_if_exception_type,
from lightrag.llm.openai import (
azure_openai_complete_if_cache,
azure_openai_complete,
azure_openai_embed,
)
from lightrag.utils import (
wrap_embedding_func_with_attrs,
safe_unicode_decode,
logger,
)
import numpy as np
@retry(
stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=4, max=10),
retry=retry_if_exception_type(
(RateLimitError, APIConnectionError, APIConnectionError)
),
)
async def azure_openai_complete_if_cache(
model,
prompt,
system_prompt: str | None = None,
history_messages: Iterable[ChatCompletionMessageParam] | None = None,
enable_cot: bool = False,
base_url: str | None = None,
api_key: str | None = None,
api_version: str | None = None,
**kwargs,
):
if enable_cot:
logger.debug(
"enable_cot=True is not supported for the Azure OpenAI API and will be ignored."
)
deployment = os.getenv("AZURE_OPENAI_DEPLOYMENT") or model or os.getenv("LLM_MODEL")
base_url = (
base_url or os.getenv("AZURE_OPENAI_ENDPOINT") or os.getenv("LLM_BINDING_HOST")
)
api_key = (
api_key or os.getenv("AZURE_OPENAI_API_KEY") or os.getenv("LLM_BINDING_API_KEY")
)
api_version = (
api_version
or os.getenv("AZURE_OPENAI_API_VERSION")
or os.getenv("OPENAI_API_VERSION")
)
kwargs.pop("hashing_kv", None)
kwargs.pop("keyword_extraction", None)
timeout = kwargs.pop("timeout", None)
openai_async_client = AsyncAzureOpenAI(
azure_endpoint=base_url,
azure_deployment=deployment,
api_key=api_key,
api_version=api_version,
timeout=timeout,
)
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
if history_messages:
messages.extend(history_messages)
if prompt is not None:
messages.append({"role": "user", "content": prompt})
if "response_format" in kwargs:
response = await openai_async_client.beta.chat.completions.parse(
model=model, messages=messages, **kwargs
)
else:
response = await openai_async_client.chat.completions.create(
model=model, messages=messages, **kwargs
)
if hasattr(response, "__aiter__"):
async def inner():
async for chunk in response:
if len(chunk.choices) == 0:
continue
content = chunk.choices[0].delta.content
if content is None:
continue
if r"\u" in content:
content = safe_unicode_decode(content.encode("utf-8"))
yield content
return inner()
else:
content = response.choices[0].message.content
if r"\u" in content:
content = safe_unicode_decode(content.encode("utf-8"))
return content
async def azure_openai_complete(
prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs
) -> str:
kwargs.pop("keyword_extraction", None)
result = await azure_openai_complete_if_cache(
os.getenv("LLM_MODEL", "gpt-4o-mini"),
prompt,
system_prompt=system_prompt,
history_messages=history_messages,
**kwargs,
)
return result
@wrap_embedding_func_with_attrs(embedding_dim=1536)
@retry(
stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=4, max=10),
retry=retry_if_exception_type(
(RateLimitError, APIConnectionError, APITimeoutError)
),
)
async def azure_openai_embed(
texts: list[str],
model: str | None = None,
base_url: str | None = None,
api_key: str | None = None,
api_version: str | None = None,
) -> np.ndarray:
deployment = (
os.getenv("AZURE_EMBEDDING_DEPLOYMENT")
or model
or os.getenv("EMBEDDING_MODEL", "text-embedding-3-small")
)
base_url = (
base_url
or os.getenv("AZURE_EMBEDDING_ENDPOINT")
or os.getenv("EMBEDDING_BINDING_HOST")
)
api_key = (
api_key
or os.getenv("AZURE_EMBEDDING_API_KEY")
or os.getenv("EMBEDDING_BINDING_API_KEY")
)
api_version = (
api_version
or os.getenv("AZURE_EMBEDDING_API_VERSION")
or os.getenv("OPENAI_API_VERSION")
)
openai_async_client = AsyncAzureOpenAI(
azure_endpoint=base_url,
azure_deployment=deployment,
api_key=api_key,
api_version=api_version,
)
response = await openai_async_client.embeddings.create(
model=model, input=texts, encoding_format="float"
)
return np.array([dp.embedding for dp in response.data])
__all__ = [
"azure_openai_complete_if_cache",
"azure_openai_complete",
"azure_openai_embed",
]

View File

@@ -69,6 +69,7 @@ async def fetch_data(url, headers, data):
)
async def jina_embed(
texts: list[str],
model: str = "jina-embeddings-v4",
embedding_dim: int = 2048,
late_chunking: bool = False,
base_url: str = None,
@@ -78,6 +79,8 @@ async def jina_embed(
Args:
texts: List of texts to embed.
model: The Jina embedding model to use (default: jina-embeddings-v4).
Supported models: jina-embeddings-v3, jina-embeddings-v4, etc.
embedding_dim: The embedding dimensions (default: 2048 for jina-embeddings-v4).
**IMPORTANT**: This parameter is automatically injected by the EmbeddingFunc wrapper.
Do NOT manually pass this parameter when calling the function directly.
@@ -107,7 +110,7 @@ async def jina_embed(
"Authorization": f"Bearer {os.environ['JINA_API_KEY']}",
}
data = {
"model": "jina-embeddings-v4",
"model": model,
"task": "text-matching",
"dimensions": embedding_dim,
"embedding_type": "base64",

View File

@@ -173,7 +173,9 @@ async def ollama_model_complete(
@wrap_embedding_func_with_attrs(embedding_dim=1024, max_token_size=8192)
async def ollama_embed(texts: list[str], embed_model, **kwargs) -> np.ndarray:
async def ollama_embed(
texts: list[str], embed_model: str = "bge-m3:latest", **kwargs
) -> np.ndarray:
api_key = kwargs.pop("api_key", None)
if not api_key:
api_key = os.getenv("OLLAMA_API_KEY")

View File

@@ -77,46 +77,86 @@ class InvalidResponseError(Exception):
def create_openai_async_client(
api_key: str | None = None,
base_url: str | None = None,
use_azure: bool = False,
azure_deployment: str | None = None,
api_version: str | None = None,
timeout: int | None = None,
client_configs: dict[str, Any] | None = None,
) -> AsyncOpenAI:
"""Create an AsyncOpenAI client with the given configuration.
"""Create an AsyncOpenAI or AsyncAzureOpenAI client with the given configuration.
Args:
api_key: OpenAI API key. If None, uses the OPENAI_API_KEY environment variable.
base_url: Base URL for the OpenAI API. If None, uses the default OpenAI API URL.
use_azure: Whether to create an Azure OpenAI client. Default is False.
azure_deployment: Azure OpenAI deployment name (only used when use_azure=True).
api_version: Azure OpenAI API version (only used when use_azure=True).
timeout: Request timeout in seconds.
client_configs: Additional configuration options for the AsyncOpenAI client.
These will override any default configurations but will be overridden by
explicit parameters (api_key, base_url).
Returns:
An AsyncOpenAI client instance.
An AsyncOpenAI or AsyncAzureOpenAI client instance.
"""
if not api_key:
api_key = os.environ["OPENAI_API_KEY"]
if use_azure:
from openai import AsyncAzureOpenAI
default_headers = {
"User-Agent": f"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_8) LightRAG/{__api_version__}",
"Content-Type": "application/json",
}
if not api_key:
api_key = os.environ.get("AZURE_OPENAI_API_KEY") or os.environ.get(
"LLM_BINDING_API_KEY"
)
if client_configs is None:
client_configs = {}
if client_configs is None:
client_configs = {}
# Create a merged config dict with precedence: explicit params > client_configs > defaults
merged_configs = {
**client_configs,
"default_headers": default_headers,
"api_key": api_key,
}
# Create a merged config dict with precedence: explicit params > client_configs
merged_configs = {
**client_configs,
"api_key": api_key,
}
if base_url is not None:
merged_configs["base_url"] = base_url
# Add explicit parameters (override client_configs)
if base_url is not None:
merged_configs["azure_endpoint"] = base_url
if azure_deployment is not None:
merged_configs["azure_deployment"] = azure_deployment
if api_version is not None:
merged_configs["api_version"] = api_version
if timeout is not None:
merged_configs["timeout"] = timeout
return AsyncAzureOpenAI(**merged_configs)
else:
merged_configs["base_url"] = os.environ.get(
"OPENAI_API_BASE", "https://api.openai.com/v1"
)
if not api_key:
api_key = os.environ["OPENAI_API_KEY"]
return AsyncOpenAI(**merged_configs)
default_headers = {
"User-Agent": f"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_8) LightRAG/{__api_version__}",
"Content-Type": "application/json",
}
if client_configs is None:
client_configs = {}
# Create a merged config dict with precedence: explicit params > client_configs > defaults
merged_configs = {
**client_configs,
"default_headers": default_headers,
"api_key": api_key,
}
if base_url is not None:
merged_configs["base_url"] = base_url
else:
merged_configs["base_url"] = os.environ.get(
"OPENAI_API_BASE", "https://api.openai.com/v1"
)
if timeout is not None:
merged_configs["timeout"] = timeout
return AsyncOpenAI(**merged_configs)
@retry(
@@ -141,6 +181,9 @@ async def openai_complete_if_cache(
stream: bool | None = None,
timeout: int | None = None,
keyword_extraction: bool = False,
use_azure: bool = False,
azure_deployment: str | None = None,
api_version: str | None = None,
**kwargs: Any,
) -> str:
"""Complete a prompt using OpenAI's API with caching support and Chain of Thought (COT) integration.
@@ -162,23 +205,33 @@ async def openai_complete_if_cache(
6. For non-streaming: COT content is prepended to regular content with <think> tags.
Args:
model: The OpenAI model to use.
model: The OpenAI model to use. For Azure, this can be the deployment name.
prompt: The prompt to complete.
system_prompt: Optional system prompt to include.
history_messages: Optional list of previous messages in the conversation.
base_url: Optional base URL for the OpenAI API.
api_key: Optional OpenAI API key. If None, uses the OPENAI_API_KEY environment variable.
token_tracker: Optional token usage tracker for monitoring API usage.
enable_cot: Whether to enable Chain of Thought (COT) processing. Default is False.
base_url: Optional base URL for the OpenAI API. For Azure, this should be the
Azure OpenAI endpoint (e.g., https://your-resource.openai.azure.com/).
api_key: Optional API key. For standard OpenAI, uses OPENAI_API_KEY environment
variable if None. For Azure, uses AZURE_OPENAI_API_KEY if None.
token_tracker: Optional token usage tracker for monitoring API usage.
stream: Whether to stream the response. Default is False.
timeout: Request timeout in seconds. Default is None.
keyword_extraction: Whether to enable keyword extraction mode. When True, triggers
special response formatting for keyword extraction. Default is False.
use_azure: Whether to use Azure OpenAI service instead of standard OpenAI.
When True, creates an AsyncAzureOpenAI client. Default is False.
azure_deployment: Azure OpenAI deployment name. Only used when use_azure=True.
If not specified, falls back to AZURE_OPENAI_DEPLOYMENT environment variable.
api_version: Azure OpenAI API version (e.g., "2024-02-15-preview"). Only used
when use_azure=True. If not specified, falls back to AZURE_OPENAI_API_VERSION
environment variable.
**kwargs: Additional keyword arguments to pass to the OpenAI API.
Special kwargs:
- openai_client_configs: Dict of configuration options for the AsyncOpenAI client.
These will be passed to the client constructor but will be overridden by
explicit parameters (api_key, base_url).
explicit parameters (api_key, base_url). Supports proxy configuration,
custom headers, retry policies, etc.
Returns:
The completed text (with integrated COT content if available) or an async iterator
@@ -203,10 +256,18 @@ async def openai_complete_if_cache(
# Extract client configuration options
client_configs = kwargs.pop("openai_client_configs", {})
# Create the OpenAI client
# Handle keyword extraction mode
if keyword_extraction:
kwargs["response_format"] = GPTKeywordExtractionFormat
# Create the OpenAI client (supports both OpenAI and Azure)
openai_async_client = create_openai_async_client(
api_key=api_key,
base_url=base_url,
use_azure=use_azure,
azure_deployment=azure_deployment,
api_version=api_version,
timeout=timeout,
client_configs=client_configs,
)
@@ -234,16 +295,24 @@ async def openai_complete_if_cache(
if timeout is not None:
kwargs["timeout"] = timeout
# Determine the correct model identifier to use
# For Azure OpenAI, we must use the deployment name instead of the model name
api_model = azure_deployment if use_azure and azure_deployment else model
try:
# Don't use async with context manager, use client directly
if "response_format" in kwargs:
response = await openai_async_client.beta.chat.completions.parse(
model=model, messages=messages, **kwargs
response = await openai_async_client.chat.completions.parse(
model=api_model, messages=messages, **kwargs
)
else:
response = await openai_async_client.chat.completions.create(
model=model, messages=messages, **kwargs
model=api_model, messages=messages, **kwargs
)
except APITimeoutError as e:
logger.error(f"OpenAI API Timeout Error: {e}")
await openai_async_client.close() # Ensure client is closed
raise
except APIConnectionError as e:
logger.error(f"OpenAI API Connection Error: {e}")
await openai_async_client.close() # Ensure client is closed
@@ -252,10 +321,6 @@ async def openai_complete_if_cache(
logger.error(f"OpenAI API Rate Limit Error: {e}")
await openai_async_client.close() # Ensure client is closed
raise
except APITimeoutError as e:
logger.error(f"OpenAI API Timeout Error: {e}")
await openai_async_client.close() # Ensure client is closed
raise
except Exception as e:
logger.error(
f"OpenAI API Call Failed,\nModel: {model},\nParams: {kwargs}, Got: {e}"
@@ -287,7 +352,10 @@ async def openai_complete_if_cache(
# Check if choices exists and is not empty
if not hasattr(chunk, "choices") or not chunk.choices:
logger.warning(f"Received chunk without choices: {chunk}")
# Azure OpenAI sends content filter results in first chunk without choices
logger.debug(
f"Received chunk without choices (likely Azure content filter): {chunk}"
)
continue
# Check if delta exists
@@ -449,46 +517,57 @@ async def openai_complete_if_cache(
raise InvalidResponseError("Invalid response from OpenAI API")
message = response.choices[0].message
content = getattr(message, "content", None)
reasoning_content = getattr(message, "reasoning_content", "")
# Handle COT logic for non-streaming responses (only if enabled)
final_content = ""
# Handle parsed responses (structured output via response_format)
# When using beta.chat.completions.parse(), the response is in message.parsed
if hasattr(message, "parsed") and message.parsed is not None:
# Serialize the parsed structured response to JSON
final_content = message.parsed.model_dump_json()
logger.debug("Using parsed structured response from API")
else:
# Handle regular content responses
content = getattr(message, "content", None)
reasoning_content = getattr(message, "reasoning_content", "")
if enable_cot:
# Check if we should include reasoning content
should_include_reasoning = False
if reasoning_content and reasoning_content.strip():
if not content or content.strip() == "":
# Case 1: Only reasoning content, should include COT
should_include_reasoning = True
final_content = (
content or ""
) # Use empty string if content is None
# Handle COT logic for non-streaming responses (only if enabled)
final_content = ""
if enable_cot:
# Check if we should include reasoning content
should_include_reasoning = False
if reasoning_content and reasoning_content.strip():
if not content or content.strip() == "":
# Case 1: Only reasoning content, should include COT
should_include_reasoning = True
final_content = (
content or ""
) # Use empty string if content is None
else:
# Case 3: Both content and reasoning_content present, ignore reasoning
should_include_reasoning = False
final_content = content
else:
# Case 3: Both content and reasoning_content present, ignore reasoning
should_include_reasoning = False
final_content = content
# No reasoning content, use regular content
final_content = content or ""
# Apply COT wrapping if needed
if should_include_reasoning:
if r"\u" in reasoning_content:
reasoning_content = safe_unicode_decode(
reasoning_content.encode("utf-8")
)
final_content = (
f"<think>{reasoning_content}</think>{final_content}"
)
else:
# No reasoning content, use regular content
# COT disabled, only use regular content
final_content = content or ""
# Apply COT wrapping if needed
if should_include_reasoning:
if r"\u" in reasoning_content:
reasoning_content = safe_unicode_decode(
reasoning_content.encode("utf-8")
)
final_content = f"<think>{reasoning_content}</think>{final_content}"
else:
# COT disabled, only use regular content
final_content = content or ""
# Validate final content
if not final_content or final_content.strip() == "":
logger.error("Received empty content from OpenAI API")
await openai_async_client.close() # Ensure client is closed
raise InvalidResponseError("Received empty content from OpenAI API")
# Validate final content
if not final_content or final_content.strip() == "":
logger.error("Received empty content from OpenAI API")
await openai_async_client.close() # Ensure client is closed
raise InvalidResponseError("Received empty content from OpenAI API")
# Apply Unicode decoding to final content if needed
if r"\u" in final_content:
@@ -522,8 +601,6 @@ async def openai_complete(
) -> Union[str, AsyncIterator[str]]:
if history_messages is None:
history_messages = []
if keyword_extraction:
kwargs["response_format"] = "json"
model_name = kwargs["hashing_kv"].global_config["llm_model_name"]
return await openai_complete_if_cache(
model_name,
@@ -545,8 +622,6 @@ async def gpt_4o_complete(
) -> str:
if history_messages is None:
history_messages = []
if keyword_extraction:
kwargs["response_format"] = GPTKeywordExtractionFormat
return await openai_complete_if_cache(
"gpt-4o",
prompt,
@@ -568,8 +643,6 @@ async def gpt_4o_mini_complete(
) -> str:
if history_messages is None:
history_messages = []
if keyword_extraction:
kwargs["response_format"] = GPTKeywordExtractionFormat
return await openai_complete_if_cache(
"gpt-4o-mini",
prompt,
@@ -622,24 +695,40 @@ async def openai_embed(
embedding_dim: int | None = None,
client_configs: dict[str, Any] | None = None,
token_tracker: Any | None = None,
use_azure: bool = False,
azure_deployment: str | None = None,
api_version: str | None = None,
) -> np.ndarray:
"""Generate embeddings for a list of texts using OpenAI's API.
This function supports both standard OpenAI and Azure OpenAI services.
Args:
texts: List of texts to embed.
model: The OpenAI embedding model to use.
base_url: Optional base URL for the OpenAI API.
api_key: Optional OpenAI API key. If None, uses the OPENAI_API_KEY environment variable.
model: The embedding model to use. For standard OpenAI (e.g., "text-embedding-3-small").
For Azure, this can be the deployment name.
base_url: Optional base URL for the API. For standard OpenAI, uses default OpenAI endpoint.
For Azure, this should be the Azure OpenAI endpoint (e.g., https://your-resource.openai.azure.com/).
api_key: Optional API key. For standard OpenAI, uses OPENAI_API_KEY environment variable if None.
For Azure, uses AZURE_EMBEDDING_API_KEY environment variable if None.
embedding_dim: Optional embedding dimension for dynamic dimension reduction.
**IMPORTANT**: This parameter is automatically injected by the EmbeddingFunc wrapper.
Do NOT manually pass this parameter when calling the function directly.
The dimension is controlled by the @wrap_embedding_func_with_attrs decorator.
Manually passing a different value will trigger a warning and be ignored.
When provided (by EmbeddingFunc), it will be passed to the OpenAI API for dimension reduction.
client_configs: Additional configuration options for the AsyncOpenAI client.
client_configs: Additional configuration options for the AsyncOpenAI/AsyncAzureOpenAI client.
These will override any default configurations but will be overridden by
explicit parameters (api_key, base_url).
explicit parameters (api_key, base_url). Supports proxy configuration,
custom headers, retry policies, etc.
token_tracker: Optional token usage tracker for monitoring API usage.
use_azure: Whether to use Azure OpenAI service instead of standard OpenAI.
When True, creates an AsyncAzureOpenAI client. Default is False.
azure_deployment: Azure OpenAI deployment name. Only used when use_azure=True.
If not specified, falls back to AZURE_EMBEDDING_DEPLOYMENT environment variable.
api_version: Azure OpenAI API version (e.g., "2024-02-15-preview"). Only used
when use_azure=True. If not specified, falls back to AZURE_EMBEDDING_API_VERSION
environment variable.
Returns:
A numpy array of embeddings, one per input text.
@@ -649,15 +738,24 @@ async def openai_embed(
RateLimitError: If the OpenAI API rate limit is exceeded.
APITimeoutError: If the OpenAI API request times out.
"""
# Create the OpenAI client
# Create the OpenAI client (supports both OpenAI and Azure)
openai_async_client = create_openai_async_client(
api_key=api_key, base_url=base_url, client_configs=client_configs
api_key=api_key,
base_url=base_url,
use_azure=use_azure,
azure_deployment=azure_deployment,
api_version=api_version,
client_configs=client_configs,
)
async with openai_async_client:
# Determine the correct model identifier to use
# For Azure OpenAI, we must use the deployment name instead of the model name
api_model = azure_deployment if use_azure and azure_deployment else model
# Prepare API call parameters
api_params = {
"model": model,
"model": api_model,
"input": texts,
"encoding_format": "base64",
}
@@ -684,3 +782,172 @@ async def openai_embed(
for dp in response.data
]
)
# Azure OpenAI wrapper functions for backward compatibility
async def azure_openai_complete_if_cache(
model,
prompt,
system_prompt: str | None = None,
history_messages: list[dict[str, Any]] | None = None,
enable_cot: bool = False,
base_url: str | None = None,
api_key: str | None = None,
token_tracker: Any | None = None,
stream: bool | None = None,
timeout: int | None = None,
api_version: str | None = None,
keyword_extraction: bool = False,
**kwargs,
):
"""Azure OpenAI completion wrapper function.
This function provides backward compatibility by wrapping the unified
openai_complete_if_cache implementation with Azure-specific parameter handling.
All parameters from the underlying openai_complete_if_cache are exposed to ensure
full feature parity and API consistency.
"""
# Handle Azure-specific environment variables and parameters
deployment = os.getenv("AZURE_OPENAI_DEPLOYMENT") or model or os.getenv("LLM_MODEL")
base_url = (
base_url or os.getenv("AZURE_OPENAI_ENDPOINT") or os.getenv("LLM_BINDING_HOST")
)
api_key = (
api_key or os.getenv("AZURE_OPENAI_API_KEY") or os.getenv("LLM_BINDING_API_KEY")
)
api_version = (
api_version
or os.getenv("AZURE_OPENAI_API_VERSION")
or os.getenv("OPENAI_API_VERSION")
or "2024-08-01-preview"
)
# Call the unified implementation with Azure-specific parameters
return await openai_complete_if_cache(
model=deployment,
prompt=prompt,
system_prompt=system_prompt,
history_messages=history_messages,
enable_cot=enable_cot,
base_url=base_url,
api_key=api_key,
token_tracker=token_tracker,
stream=stream,
timeout=timeout,
use_azure=True,
azure_deployment=deployment,
api_version=api_version,
keyword_extraction=keyword_extraction,
**kwargs,
)
async def azure_openai_complete(
prompt,
system_prompt=None,
history_messages=None,
keyword_extraction=False,
**kwargs,
) -> str:
"""Azure OpenAI complete wrapper function.
Provides backward compatibility for azure_openai_complete calls.
"""
if history_messages is None:
history_messages = []
result = await azure_openai_complete_if_cache(
os.getenv("LLM_MODEL", "gpt-4o-mini"),
prompt,
system_prompt=system_prompt,
history_messages=history_messages,
keyword_extraction=keyword_extraction,
**kwargs,
)
return result
@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192)
async def azure_openai_embed(
texts: list[str],
model: str | None = None,
base_url: str | None = None,
api_key: str | None = None,
token_tracker: Any | None = None,
client_configs: dict[str, Any] | None = None,
api_version: str | None = None,
) -> np.ndarray:
"""Azure OpenAI embedding wrapper function.
This function provides backward compatibility by wrapping the unified
openai_embed implementation with Azure-specific parameter handling.
All parameters from the underlying openai_embed are exposed to ensure
full feature parity and API consistency.
IMPORTANT - Decorator Usage:
1. This function is decorated with @wrap_embedding_func_with_attrs to provide
the EmbeddingFunc interface for users who need to access embedding_dim
and other attributes.
2. This function does NOT use @retry decorator to avoid double-wrapping,
since the underlying openai_embed.func already has retry logic.
3. This function calls openai_embed.func (the unwrapped function) instead of
openai_embed (the EmbeddingFunc instance) to avoid double decoration issues:
✅ Correct: await openai_embed.func(...) # Calls unwrapped function with retry
❌ Wrong: await openai_embed(...) # Would cause double EmbeddingFunc wrapping
Double decoration causes:
- Double injection of embedding_dim parameter
- Incorrect parameter passing to the underlying implementation
- Runtime errors due to parameter conflicts
The call chain with correct implementation:
azure_openai_embed(texts)
→ EmbeddingFunc.__call__(texts) # azure's decorator
→ azure_openai_embed_impl(texts, embedding_dim=1536)
→ openai_embed.func(texts, ...)
→ @retry_wrapper(texts, ...) # openai's retry (only one layer)
→ openai_embed_impl(texts, ...)
→ actual embedding computation
"""
# Handle Azure-specific environment variables and parameters
deployment = (
os.getenv("AZURE_EMBEDDING_DEPLOYMENT")
or model
or os.getenv("EMBEDDING_MODEL", "text-embedding-3-small")
)
base_url = (
base_url
or os.getenv("AZURE_EMBEDDING_ENDPOINT")
or os.getenv("EMBEDDING_BINDING_HOST")
)
api_key = (
api_key
or os.getenv("AZURE_EMBEDDING_API_KEY")
or os.getenv("EMBEDDING_BINDING_API_KEY")
)
api_version = (
api_version
or os.getenv("AZURE_EMBEDDING_API_VERSION")
or os.getenv("AZURE_OPENAI_API_VERSION")
or os.getenv("OPENAI_API_VERSION")
or "2024-08-01-preview"
)
# CRITICAL: Call openai_embed.func (unwrapped) to avoid double decoration
# openai_embed is an EmbeddingFunc instance, .func accesses the underlying function
return await openai_embed.func(
texts=texts,
model=deployment,
base_url=base_url,
api_key=api_key,
token_tracker=token_tracker,
client_configs=client_configs,
use_azure=True,
azure_deployment=deployment,
api_version=api_version,
)

View File

@@ -8,7 +8,10 @@ import json_repair
from typing import Any, AsyncIterator, overload, Literal
from collections import Counter, defaultdict
from lightrag.exceptions import PipelineCancelledException
from lightrag.exceptions import (
PipelineCancelledException,
ChunkTokenLimitExceededError,
)
from lightrag.utils import (
logger,
compute_mdhash_id,
@@ -109,6 +112,17 @@ def chunking_by_token_size(
if split_by_character_only:
for chunk in raw_chunks:
_tokens = tokenizer.encode(chunk)
if len(_tokens) > chunk_token_size:
logger.warning(
"Chunk split_by_character exceeds token limit: len=%d limit=%d",
len(_tokens),
chunk_token_size,
)
raise ChunkTokenLimitExceededError(
chunk_tokens=len(_tokens),
chunk_token_limit=chunk_token_size,
chunk_preview=chunk[:120],
)
new_chunks.append((len(_tokens), chunk))
else:
for chunk in raw_chunks:
@@ -383,8 +397,8 @@ async def _handle_single_entity_extraction(
# Validate entity name after all cleaning steps
if not entity_name or not entity_name.strip():
logger.warning(
f"Entity extraction error: entity name became empty after cleaning. Original: '{record_attributes[1]}'"
logger.info(
f"Empty entity name found after sanitization. Original: '{record_attributes[1]}'"
)
return None
@@ -460,14 +474,14 @@ async def _handle_single_relationship_extraction(
# Validate entity names after all cleaning steps
if not source:
logger.warning(
f"Relationship extraction error: source entity became empty after cleaning. Original: '{record_attributes[1]}'"
logger.info(
f"Empty source entity found after sanitization. Original: '{record_attributes[1]}'"
)
return None
if not target:
logger.warning(
f"Relationship extraction error: target entity became empty after cleaning. Original: '{record_attributes[2]}'"
logger.info(
f"Empty target entity found after sanitization. Original: '{record_attributes[2]}'"
)
return None
@@ -2818,9 +2832,11 @@ async def extract_entities(
cache_keys_collector = []
# Get initial extraction
# Format system prompt without input_text for each chunk (enables OpenAI prompt caching across chunks)
entity_extraction_system_prompt = PROMPTS[
"entity_extraction_system_prompt"
].format(**{**context_base, "input_text": content})
].format(**context_base)
# Format user prompts with input_text for each chunk
entity_extraction_user_prompt = PROMPTS["entity_extraction_user_prompt"].format(
**{**context_base, "input_text": content}
)
@@ -3250,10 +3266,16 @@ async def extract_keywords_only(
It ONLY extracts keywords (hl_keywords, ll_keywords).
"""
# 1. Handle cache if needed - add cache type for keywords
# 1. Build the examples
examples = "\n".join(PROMPTS["keywords_extraction_examples"])
language = global_config["addon_params"].get("language", DEFAULT_SUMMARY_LANGUAGE)
# 2. Handle cache if needed - add cache type for keywords
args_hash = compute_args_hash(
param.mode,
text,
language,
)
cached_result = await handle_cache(
hashing_kv, args_hash, text, param.mode, cache_type="keywords"
@@ -3270,11 +3292,6 @@ async def extract_keywords_only(
"Invalid cache format for keywords, proceeding with extraction"
)
# 2. Build the examples
examples = "\n".join(PROMPTS["keywords_extraction_examples"])
language = global_config["addon_params"].get("language", DEFAULT_SUMMARY_LANGUAGE)
# 3. Build the keyword-extraction prompt
kw_prompt = PROMPTS["keywords_extraction"].format(
query=text,

View File

@@ -58,18 +58,10 @@ You are a Knowledge Graph Specialist responsible for extracting entities and rel
---Examples---
{examples}
---Real Data to be Processed---
<Input>
Entity_types: [{entity_types}]
Text:
```
{input_text}
```
"""
PROMPTS["entity_extraction_user_prompt"] = """---Task---
Extract entities and relationships from the input text to be processed.
Extract entities and relationships from the input text in Data to be Processed below.
---Instructions---
1. **Strict Adherence to Format:** Strictly adhere to all format requirements for entity and relationship lists, including output order, field delimiters, and proper noun handling, as specified in the system prompt.
@@ -77,6 +69,15 @@ Extract entities and relationships from the input text to be processed.
3. **Completion Signal:** Output `{completion_delimiter}` as the final line after all relevant entities and relationships have been extracted and presented.
4. **Output Language:** Ensure the output language is {language}. Proper nouns (e.g., personal names, place names, organization names) must be kept in their original language and not translated.
---Data to be Processed---
<Entity_types>
[{entity_types}]
<Input Text>
```
{input_text}
```
<Output>
"""
@@ -99,7 +100,10 @@ Based on the last extraction task, identify and extract any **missed or incorrec
"""
PROMPTS["entity_extraction_examples"] = [
"""<Input Text>
"""<Entity_types>
["Person","Creature","Organization","Location","Event","Concept","Method","Content","Data","Artifact","NaturalObject"]
<Input Text>
```
while Alex clenched his jaw, the buzz of frustration dull against the backdrop of Taylor's authoritarian certainty. It was this competitive undercurrent that kept him alert, the sense that his and Jordan's shared commitment to discovery was an unspoken rebellion against Cruz's narrowing vision of control and order.
@@ -124,7 +128,10 @@ relation{tuple_delimiter}Taylor{tuple_delimiter}The Device{tuple_delimiter}rever
{completion_delimiter}
""",
"""<Input Text>
"""<Entity_types>
["Person","Creature","Organization","Location","Event","Concept","Method","Content","Data","Artifact","NaturalObject"]
<Input Text>
```
Stock markets faced a sharp downturn today as tech giants saw significant declines, with the global tech index dropping by 3.4% in midday trading. Analysts attribute the selloff to investor concerns over rising interest rates and regulatory uncertainty.
@@ -151,7 +158,10 @@ relation{tuple_delimiter}Federal Reserve Policy Announcement{tuple_delimiter}Mar
{completion_delimiter}
""",
"""<Input Text>
"""<Entity_types>
["Person","Creature","Organization","Location","Event","Concept","Method","Content","Data","Artifact","NaturalObject"]
<Input Text>
```
At the World Athletics Championship in Tokyo, Noah Carter broke the 100m sprint record using cutting-edge carbon-fiber spikes.
```
@@ -374,6 +384,7 @@ Given a user query, your task is to extract two distinct types of keywords:
2. **Source of Truth**: All keywords must be explicitly derived from the user query, with both high-level and low-level keyword categories are required to contain content.
3. **Concise & Meaningful**: Keywords should be concise words or meaningful phrases. Prioritize multi-word phrases when they represent a single concept. For example, from "latest financial report of Apple Inc.", you should extract "latest financial report" and "Apple Inc." rather than "latest", "financial", "report", and "Apple".
4. **Handle Edge Cases**: For queries that are too simple, vague, or nonsensical (e.g., "hello", "ok", "asdfghjkl"), you must return a JSON object with empty lists for both keyword types.
5. **Language**: All extracted keywords MUST be in {language}. Proper nouns (e.g., personal names, place names, organization names) should be kept in their original language.
---Examples---
{examples}

View File

@@ -2,7 +2,7 @@ from __future__ import annotations
import os
import aiohttp
from typing import Any, List, Dict, Optional
from typing import Any, List, Dict, Optional, Tuple
from tenacity import (
retry,
stop_after_attempt,
@@ -19,6 +19,158 @@ from dotenv import load_dotenv
load_dotenv(dotenv_path=".env", override=False)
def chunk_documents_for_rerank(
documents: List[str],
max_tokens: int = 480,
overlap_tokens: int = 32,
tokenizer_model: str = "gpt-4o-mini",
) -> Tuple[List[str], List[int]]:
"""
Chunk documents that exceed token limit for reranking.
Args:
documents: List of document strings to chunk
max_tokens: Maximum tokens per chunk (default 480 to leave margin for 512 limit)
overlap_tokens: Number of tokens to overlap between chunks
tokenizer_model: Model name for tiktoken tokenizer
Returns:
Tuple of (chunked_documents, original_doc_indices)
- chunked_documents: List of document chunks (may be more than input)
- original_doc_indices: Maps each chunk back to its original document index
"""
# Clamp overlap_tokens to ensure the loop always advances
# If overlap_tokens >= max_tokens, the chunking loop would hang
if overlap_tokens >= max_tokens:
original_overlap = overlap_tokens
# Ensure overlap is at least 1 token less than max to guarantee progress
# For very small max_tokens (e.g., 1), set overlap to 0
overlap_tokens = max(0, max_tokens - 1)
logger.warning(
f"overlap_tokens ({original_overlap}) must be less than max_tokens ({max_tokens}). "
f"Clamping to {overlap_tokens} to prevent infinite loop."
)
try:
from .utils import TiktokenTokenizer
tokenizer = TiktokenTokenizer(model_name=tokenizer_model)
except Exception as e:
logger.warning(
f"Failed to initialize tokenizer: {e}. Using character-based approximation."
)
# Fallback: approximate 1 token ≈ 4 characters
max_chars = max_tokens * 4
overlap_chars = overlap_tokens * 4
chunked_docs = []
doc_indices = []
for idx, doc in enumerate(documents):
if len(doc) <= max_chars:
chunked_docs.append(doc)
doc_indices.append(idx)
else:
# Split into overlapping chunks
start = 0
while start < len(doc):
end = min(start + max_chars, len(doc))
chunk = doc[start:end]
chunked_docs.append(chunk)
doc_indices.append(idx)
if end >= len(doc):
break
start = end - overlap_chars
return chunked_docs, doc_indices
# Use tokenizer for accurate chunking
chunked_docs = []
doc_indices = []
for idx, doc in enumerate(documents):
tokens = tokenizer.encode(doc)
if len(tokens) <= max_tokens:
# Document fits in one chunk
chunked_docs.append(doc)
doc_indices.append(idx)
else:
# Split into overlapping chunks
start = 0
while start < len(tokens):
end = min(start + max_tokens, len(tokens))
chunk_tokens = tokens[start:end]
chunk_text = tokenizer.decode(chunk_tokens)
chunked_docs.append(chunk_text)
doc_indices.append(idx)
if end >= len(tokens):
break
start = end - overlap_tokens
return chunked_docs, doc_indices
def aggregate_chunk_scores(
chunk_results: List[Dict[str, Any]],
doc_indices: List[int],
num_original_docs: int,
aggregation: str = "max",
) -> List[Dict[str, Any]]:
"""
Aggregate rerank scores from document chunks back to original documents.
Args:
chunk_results: Rerank results for chunks [{"index": chunk_idx, "relevance_score": score}, ...]
doc_indices: Maps each chunk index to original document index
num_original_docs: Total number of original documents
aggregation: Strategy for aggregating scores ("max", "mean", "first")
Returns:
List of results for original documents [{"index": doc_idx, "relevance_score": score}, ...]
"""
# Group scores by original document index
doc_scores: Dict[int, List[float]] = {i: [] for i in range(num_original_docs)}
for result in chunk_results:
chunk_idx = result["index"]
score = result["relevance_score"]
if 0 <= chunk_idx < len(doc_indices):
original_doc_idx = doc_indices[chunk_idx]
doc_scores[original_doc_idx].append(score)
# Aggregate scores
aggregated_results = []
for doc_idx, scores in doc_scores.items():
if not scores:
continue
if aggregation == "max":
final_score = max(scores)
elif aggregation == "mean":
final_score = sum(scores) / len(scores)
elif aggregation == "first":
final_score = scores[0]
else:
logger.warning(f"Unknown aggregation strategy: {aggregation}, using max")
final_score = max(scores)
aggregated_results.append(
{
"index": doc_idx,
"relevance_score": final_score,
}
)
# Sort by relevance score (descending)
aggregated_results.sort(key=lambda x: x["relevance_score"], reverse=True)
return aggregated_results
@retry(
stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=4, max=60),
@@ -38,6 +190,8 @@ async def generic_rerank_api(
extra_body: Optional[Dict[str, Any]] = None,
response_format: str = "standard", # "standard" (Jina/Cohere) or "aliyun"
request_format: str = "standard", # "standard" (Jina/Cohere) or "aliyun"
enable_chunking: bool = False,
max_tokens_per_doc: int = 480,
) -> List[Dict[str, Any]]:
"""
Generic rerank API call for Jina/Cohere/Aliyun models.
@@ -52,6 +206,9 @@ async def generic_rerank_api(
return_documents: Whether to return document text (Jina only)
extra_body: Additional body parameters
response_format: Response format type ("standard" for Jina/Cohere, "aliyun" for Aliyun)
request_format: Request format type
enable_chunking: Whether to chunk documents exceeding token limit
max_tokens_per_doc: Maximum tokens per document for chunking
Returns:
List of dictionary of ["index": int, "relevance_score": float]
@@ -63,6 +220,27 @@ async def generic_rerank_api(
if api_key is not None:
headers["Authorization"] = f"Bearer {api_key}"
# Handle document chunking if enabled
original_documents = documents
doc_indices = None
original_top_n = top_n # Save original top_n for post-aggregation limiting
if enable_chunking:
documents, doc_indices = chunk_documents_for_rerank(
documents, max_tokens=max_tokens_per_doc
)
logger.debug(
f"Chunked {len(original_documents)} documents into {len(documents)} chunks"
)
# When chunking is enabled, disable top_n at API level to get all chunk scores
# This ensures proper document-level coverage after aggregation
# We'll apply top_n to aggregated document results instead
if top_n is not None:
logger.debug(
f"Chunking enabled: disabled API-level top_n={top_n} to ensure complete document coverage"
)
top_n = None
# Build request payload based on request format
if request_format == "aliyun":
# Aliyun format: nested input/parameters structure
@@ -86,7 +264,7 @@ async def generic_rerank_api(
if extra_body:
payload["parameters"].update(extra_body)
else:
# Standard format for Jina/Cohere
# Standard format for Jina/Cohere/OpenAI
payload = {
"model": model,
"query": query,
@@ -98,7 +276,7 @@ async def generic_rerank_api(
payload["top_n"] = top_n
# Only Jina API supports return_documents parameter
if return_documents is not None:
if return_documents is not None and response_format in ("standard",):
payload["return_documents"] = return_documents
# Add extra parameters
@@ -147,7 +325,6 @@ async def generic_rerank_api(
f"Expected 'output.results' to be list, got {type(results)}: {results}"
)
results = []
elif response_format == "standard":
# Standard format: {"results": [...]}
results = response_json.get("results", [])
@@ -158,16 +335,35 @@ async def generic_rerank_api(
results = []
else:
raise ValueError(f"Unsupported response format: {response_format}")
if not results:
logger.warning("Rerank API returned empty results")
return []
# Standardize return format
return [
standardized_results = [
{"index": result["index"], "relevance_score": result["relevance_score"]}
for result in results
]
# Aggregate chunk scores back to original documents if chunking was enabled
if enable_chunking and doc_indices:
standardized_results = aggregate_chunk_scores(
standardized_results,
doc_indices,
len(original_documents),
aggregation="max",
)
# Apply original top_n limit at document level (post-aggregation)
# This preserves document-level semantics: top_n limits documents, not chunks
if (
original_top_n is not None
and len(standardized_results) > original_top_n
):
standardized_results = standardized_results[:original_top_n]
return standardized_results
async def cohere_rerank(
query: str,
@@ -177,21 +373,46 @@ async def cohere_rerank(
model: str = "rerank-v3.5",
base_url: str = "https://api.cohere.com/v2/rerank",
extra_body: Optional[Dict[str, Any]] = None,
enable_chunking: bool = False,
max_tokens_per_doc: int = 4096,
) -> List[Dict[str, Any]]:
"""
Rerank documents using Cohere API.
Supports both standard Cohere API and Cohere-compatible proxies
Args:
query: The search query
documents: List of strings to rerank
top_n: Number of top results to return
api_key: API key
model: rerank model name
api_key: API key for authentication
model: rerank model name (default: rerank-v3.5)
base_url: API endpoint
extra_body: Additional body for http request(reserved for extra params)
enable_chunking: Whether to chunk documents exceeding max_tokens_per_doc
max_tokens_per_doc: Maximum tokens per document (default: 4096 for Cohere v3.5)
Returns:
List of dictionary of ["index": int, "relevance_score": float]
Example:
>>> # Standard Cohere API
>>> results = await cohere_rerank(
... query="What is the meaning of life?",
... documents=["Doc1", "Doc2"],
... api_key="your-cohere-key"
... )
>>> # LiteLLM proxy with user authentication
>>> results = await cohere_rerank(
... query="What is vector search?",
... documents=["Doc1", "Doc2"],
... model="answerai-colbert-small-v1",
... base_url="https://llm-proxy.example.com/v2/rerank",
... api_key="your-proxy-key",
... enable_chunking=True,
... max_tokens_per_doc=480
... )
"""
if api_key is None:
api_key = os.getenv("COHERE_API_KEY") or os.getenv("RERANK_BINDING_API_KEY")
@@ -206,6 +427,8 @@ async def cohere_rerank(
return_documents=None, # Cohere doesn't support this parameter
extra_body=extra_body,
response_format="standard",
enable_chunking=enable_chunking,
max_tokens_per_doc=max_tokens_per_doc,
)

View File

@@ -1129,11 +1129,16 @@ class CleanupTool:
pass
async def main():
"""Main entry point"""
async def async_main():
"""Async main entry point"""
tool = CleanupTool()
await tool.run()
def main():
"""Synchronous entry point for CLI command"""
asyncio.run(async_main())
if __name__ == "__main__":
asyncio.run(main())
main()

View File

@@ -1,6 +1,8 @@
from __future__ import annotations
import weakref
import sys
import asyncio
import html
import csv
@@ -40,6 +42,35 @@ from lightrag.constants import (
SOURCE_IDS_LIMIT_METHOD_FIFO,
)
# Precompile regex pattern for JSON sanitization (module-level, compiled once)
_SURROGATE_PATTERN = re.compile(r"[\uD800-\uDFFF\uFFFE\uFFFF]")
class SafeStreamHandler(logging.StreamHandler):
"""StreamHandler that gracefully handles closed streams during shutdown.
This handler prevents "ValueError: I/O operation on closed file" errors
that can occur when pytest or other test frameworks close stdout/stderr
before Python's logging cleanup runs.
"""
def flush(self):
"""Flush the stream, ignoring errors if the stream is closed."""
try:
super().flush()
except (ValueError, OSError):
# Stream is closed or otherwise unavailable, silently ignore
pass
def close(self):
"""Close the handler, ignoring errors if the stream is already closed."""
try:
super().close()
except (ValueError, OSError):
# Stream is closed or otherwise unavailable, silently ignore
pass
# Initialize logger with basic configuration
logger = logging.getLogger("lightrag")
logger.propagate = False # prevent log message send to root logger
@@ -47,7 +78,7 @@ logger.setLevel(logging.INFO)
# Add console handler if no handlers exist
if not logger.handlers:
console_handler = logging.StreamHandler()
console_handler = SafeStreamHandler()
console_handler.setLevel(logging.INFO)
formatter = logging.Formatter("%(levelname)s: %(message)s")
console_handler.setFormatter(formatter)
@@ -56,8 +87,32 @@ if not logger.handlers:
# Set httpx logging level to WARNING
logging.getLogger("httpx").setLevel(logging.WARNING)
# Precompile regex pattern for JSON sanitization (module-level, compiled once)
_SURROGATE_PATTERN = re.compile(r"[\uD800-\uDFFF\uFFFE\uFFFF]")
def _patch_ascii_colors_console_handler() -> None:
"""Prevent ascii_colors from printing flush errors during interpreter exit."""
try:
from ascii_colors import ConsoleHandler
except ImportError:
return
if getattr(ConsoleHandler, "_lightrag_patched", False):
return
original_handle_error = ConsoleHandler.handle_error
def _safe_handle_error(self, message: str) -> None: # type: ignore[override]
exc_type, _, _ = sys.exc_info()
if exc_type in (ValueError, OSError) and "close" in message.lower():
return
original_handle_error(self, message)
ConsoleHandler.handle_error = _safe_handle_error # type: ignore[assignment]
ConsoleHandler._lightrag_patched = True # type: ignore[attr-defined]
_patch_ascii_colors_console_handler()
# Global import for pypinyin with startup-time logging
try:
@@ -286,8 +341,8 @@ def setup_logger(
logger_instance.handlers = [] # Clear existing handlers
logger_instance.propagate = False
# Add console handler
console_handler = logging.StreamHandler()
# Add console handler with safe stream handling
console_handler = SafeStreamHandler()
console_handler.setFormatter(simple_formatter)
console_handler.setLevel(level)
logger_instance.addHandler(console_handler)
@@ -963,7 +1018,76 @@ def priority_limit_async_func_call(
def wrap_embedding_func_with_attrs(**kwargs):
"""Wrap a function with attributes"""
"""Decorator to add embedding dimension and token limit attributes to embedding functions.
This decorator wraps an async embedding function and returns an EmbeddingFunc instance
that automatically handles dimension parameter injection and attribute management.
WARNING: DO NOT apply this decorator to wrapper functions that call other
decorated embedding functions. This will cause double decoration and parameter
injection conflicts.
Correct usage patterns:
1. Direct implementation (decorated):
```python
@wrap_embedding_func_with_attrs(embedding_dim=1536)
async def my_embed(texts, embedding_dim=None):
# Direct implementation
return embeddings
```
2. Wrapper calling decorated function (DO NOT decorate wrapper):
```python
# my_embed is already decorated above
async def my_wrapper(texts, **kwargs): # ❌ DO NOT decorate this!
# Must call .func to access unwrapped implementation
return await my_embed.func(texts, **kwargs)
```
3. Wrapper calling decorated function (properly decorated):
```python
@wrap_embedding_func_with_attrs(embedding_dim=1536)
async def my_wrapper(texts, **kwargs): # ✅ Can decorate if calling .func
# Calling .func avoids double decoration
return await my_embed.func(texts, **kwargs)
```
The decorated function becomes an EmbeddingFunc instance with:
- embedding_dim: The embedding dimension
- max_token_size: Maximum token limit (optional)
- func: The original unwrapped function (access via .func)
- __call__: Wrapper that injects embedding_dim parameter
Double decoration causes:
- Double injection of embedding_dim parameter
- Incorrect parameter passing to the underlying implementation
- Runtime errors due to parameter conflicts
Args:
embedding_dim: The dimension of embedding vectors
max_token_size: Maximum number of tokens (optional)
send_dimensions: Whether to inject embedding_dim as a keyword argument (optional)
Returns:
A decorator that wraps the function as an EmbeddingFunc instance
Example of correct wrapper implementation:
```python
@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192)
@retry(...)
async def openai_embed(texts, ...):
# Base implementation
pass
@wrap_embedding_func_with_attrs(embedding_dim=1536) # Note: No @retry here!
async def azure_openai_embed(texts, ...):
# CRITICAL: Call .func to access unwrapped function
return await openai_embed.func(texts, ...) # ✅ Correct
# return await openai_embed(texts, ...) # ❌ Wrong - double decoration!
```
"""
def final_decro(func) -> EmbeddingFunc:
new_func = EmbeddingFunc(**kwargs, func=func)

File diff suppressed because it is too large Load Diff

View File

@@ -16,32 +16,32 @@
"preview-no-bun": "vite preview"
},
"dependencies": {
"@faker-js/faker": "^9.9.0",
"@faker-js/faker": "^10.1.0",
"@radix-ui/react-alert-dialog": "^1.1.15",
"@radix-ui/react-checkbox": "^1.3.3",
"@radix-ui/react-dialog": "^1.1.15",
"@radix-ui/react-popover": "^1.1.15",
"@radix-ui/react-progress": "^1.1.7",
"@radix-ui/react-progress": "^1.1.8",
"@radix-ui/react-scroll-area": "^1.2.10",
"@radix-ui/react-select": "^2.2.6",
"@radix-ui/react-separator": "^1.1.7",
"@radix-ui/react-slot": "^1.2.3",
"@radix-ui/react-separator": "^1.1.8",
"@radix-ui/react-slot": "^1.2.4",
"@radix-ui/react-tabs": "^1.1.13",
"@radix-ui/react-tooltip": "^1.2.8",
"@radix-ui/react-use-controllable-state": "^1.2.2",
"@react-sigma/core": "^5.0.4",
"@react-sigma/graph-search": "^5.0.4",
"@react-sigma/layout-circlepack": "^5.0.4",
"@react-sigma/layout-circular": "^5.0.4",
"@react-sigma/layout-force": "^5.0.4",
"@react-sigma/layout-forceatlas2": "^5.0.4",
"@react-sigma/layout-noverlap": "^5.0.4",
"@react-sigma/layout-random": "^5.0.4",
"@react-sigma/minimap": "^5.0.5",
"@react-sigma/core": "^5.0.6",
"@react-sigma/graph-search": "^5.0.6",
"@react-sigma/layout-circlepack": "^5.0.6",
"@react-sigma/layout-circular": "^5.0.6",
"@react-sigma/layout-force": "^5.0.6",
"@react-sigma/layout-forceatlas2": "^5.0.6",
"@react-sigma/layout-noverlap": "^5.0.6",
"@react-sigma/layout-random": "^5.0.6",
"@react-sigma/minimap": "^5.0.6",
"@sigma/edge-curve": "^3.1.0",
"@sigma/node-border": "^3.0.0",
"@tanstack/react-table": "^8.21.3",
"axios": "^1.12.2",
"axios": "^1.13.2",
"class-variance-authority": "^0.7.1",
"clsx": "^2.1.1",
"cmdk": "^1.1.1",
@@ -51,21 +51,21 @@
"graphology-layout-force": "^0.2.4",
"graphology-layout-forceatlas2": "^0.10.1",
"graphology-layout-noverlap": "^0.4.2",
"i18next": "^24.2.3",
"katex": "^0.16.23",
"lucide-react": "^0.475.0",
"mermaid": "^11.12.0",
"i18next": "^25.6.3",
"katex": "^0.16.25",
"mermaid": "^11.12.1",
"lucide-react": "^0.555.0",
"minisearch": "^7.2.0",
"react": "^19.2.0",
"react-dom": "^19.2.0",
"react": "^19.2.1",
"react-dom": "^19.2.1",
"react-dropzone": "^14.3.8",
"react-error-boundary": "^5.0.0",
"react-i18next": "^15.7.4",
"react-markdown": "^9.1.0",
"react-error-boundary": "^6.0.0",
"react-i18next": "^16.3.5",
"react-markdown": "^10.1.0",
"react-number-format": "^5.4.4",
"react-router-dom": "^7.9.4",
"react-router-dom": "^7.9.6",
"react-select": "^5.10.2",
"react-syntax-highlighter": "^15.6.6",
"react-syntax-highlighter": "^16.1.0",
"rehype-katex": "^7.0.1",
"rehype-raw": "^7.0.0",
"rehype-react": "^8.0.0",
@@ -73,40 +73,40 @@
"remark-math": "^6.0.0",
"seedrandom": "^3.0.5",
"sigma": "^3.0.2",
"sonner": "^1.7.4",
"tailwind-merge": "^3.3.1",
"sonner": "^2.0.7",
"tailwind-merge": "^3.4.0",
"tailwind-scrollbar": "^4.0.2",
"typography": "^0.16.24",
"unist-util-visit": "^5.0.0",
"zustand": "^5.0.8"
"zustand": "^5.0.9"
},
"devDependencies": {
"@eslint/js": "^9.37.0",
"@stylistic/eslint-plugin-js": "^3.1.0",
"@tailwindcss/vite": "^4.1.14",
"@types/bun": "^1.2.23",
"@eslint/js": "^9.39.1",
"@stylistic/eslint-plugin-js": "^4.4.1",
"@types/bun": "^1.3.3",
"@tailwindcss/vite": "^4.1.17",
"@types/katex": "^0.16.7",
"@types/node": "^22.18.9",
"@types/node": "^24.10.1",
"@tailwindcss/typography": "^0.5.15",
"@types/react": "^19.2.2",
"@types/react-dom": "^19.2.1",
"@types/react": "^19.2.7",
"@types/react-dom": "^19.2.3",
"@types/react-i18next": "^8.1.0",
"@types/react-syntax-highlighter": "^15.5.13",
"@types/seedrandom": "^3.0.8",
"@vitejs/plugin-react-swc": "^3.11.0",
"eslint": "^9.37.0",
"@vitejs/plugin-react-swc": "^4.2.2",
"eslint": "^9.39.1",
"eslint-config-prettier": "^10.1.8",
"eslint-plugin-react": "^7.37.5",
"eslint-plugin-react-hooks": "^5.2.0",
"eslint-plugin-react-refresh": "^0.4.23",
"globals": "^15.15.0",
"eslint-plugin-react-hooks": "^7.0.1",
"eslint-plugin-react-refresh": "^0.4.24",
"globals": "^16.5.0",
"graphology-types": "^0.24.8",
"prettier": "^3.6.2",
"prettier-plugin-tailwindcss": "^0.6.14",
"tailwindcss": "^4.1.14",
"prettier": "^3.7.1",
"prettier-plugin-tailwindcss": "^0.7.2",
"typescript-eslint": "^8.48.0",
"tailwindcss": "^4.1.17",
"tailwindcss-animate": "^1.0.7",
"typescript": "~5.7.3",
"typescript-eslint": "^8.46.0",
"vite": "^6.3.6"
"typescript": "~5.9.3",
"vite": "^7.2.6"
}
}

View File

@@ -76,7 +76,8 @@ export const ChatMessage = ({
? message.content
: (displayContent !== undefined ? displayContent : (message.content || ''))
// Load KaTeX dynamically
// Load KaTeX rehype plugin dynamically
// Note: KaTeX extensions (mhchem, copy-tex) are imported statically in main.tsx
useEffect(() => {
const loadKaTeX = async () => {
try {
@@ -84,7 +85,6 @@ export const ChatMessage = ({
setKatexPlugin(() => rehypeKatex);
} catch (error) {
console.error('Failed to load KaTeX plugin:', error);
// Set to null to ensure we don't try to use a failed plugin
setKatexPlugin(null);
}
};

View File

@@ -4,6 +4,9 @@ import './index.css'
import AppRouter from './AppRouter'
import './i18n.ts';
import 'katex/dist/katex.min.css';
// Import KaTeX extensions at app startup to ensure they are registered before any rendering
import 'katex/contrib/mhchem'; // Chemistry formulas: \ce{} and \pu{}
import 'katex/contrib/copy-tex'; // Allow copying rendered formulas as LaTeX source

View File

@@ -1 +1,2 @@
declare module 'katex/contrib/mhchem';
declare module 'katex/contrib/copy-tex';

View File

@@ -10,7 +10,10 @@ export default defineConfig({
resolve: {
alias: {
'@': path.resolve(__dirname, './src')
}
},
// Force all modules to use the same katex instance
// This ensures mhchem extension registered in main.tsx is available to rehype-katex
dedupe: ['katex']
},
// base: import.meta.env.VITE_BASE_URL || '/webui/',
base: webuiPrefix,

251
paging.md
View File

@@ -1,251 +0,0 @@
# 文档列表页面分页显示功能改造方案
## 一、改造目标
### 问题现状
- 当前文档页面一次性加载所有文档,导致大量文档时界面加载慢
- 前端内存占用过大,用户操作体验差
- 状态过滤和排序都在前端进行,效率低下
### 改造目标
- 实现后端分页查询,减少单次数据传输量
- 添加分页控制组件,支持翻页和跳转功能
- 允许用户设置每页显示行数10-200条
- 保持现有状态过滤和排序功能不变
- 提升大数据量场景下的性能表现
## 二、总体架构设计
### 设计原则
1. **统一分页接口**后端提供统一的分页API支持状态过滤和排序
2. **智能刷新策略**:根据处理状态选择合适的刷新频率和范围
3. **即时用户反馈**:状态切换、分页操作提供立即响应
4. **向后兼容**:保持现有功能完整性,不影响现有操作流程
5. **性能优化**:减少内存占用,优化网络请求
### 技术方案
- **后端**:在现有存储层基础上添加分页查询接口
- **前端**改造DocumentManager组件添加分页控制
- **数据流**:统一分页查询 + 独立状态计数查询
## 三、后端改造步骤
### 步骤1存储层接口扩展
**改动文件**`lightrag/kg/base.py`
**关键思路**
- 在BaseDocStatusStorage抽象类中添加分页查询方法
- 设计统一的分页接口,支持状态过滤、排序、分页参数
- 返回文档列表和总数量的元组
**接口设计要点**
```
get_docs_paginated(status_filter, page, page_size, sort_field, sort_direction) -> (documents, total_count)
count_by_status(status) -> int
get_all_status_counts() -> Dict[str, int]
```
### 步骤2各存储后端实现
**改动文件**
- `lightrag/kg/postgres_impl.py`
- `lightrag/kg/mongo_impl.py`
- `lightrag/kg/redis_impl.py`
- `lightrag/kg/json_doc_status_impl.py`
**PostgreSQL实现要点**
- 使用LIMIT和OFFSET实现分页
- 构建动态WHERE条件支持状态过滤
- 使用COUNT查询获取总数量
- 添加合适的数据库索引优化查询性能
**MongoDB实现要点**
- 使用skip()和limit()实现分页
- 使用聚合管道进行状态统计
- 优化查询条件和索引
**Redis 与 Json实现要点**
* 考虑先用简单的方式实现,即把所有文件清单读到内存中后进行过滤和排序
**关键考虑**
- 确保各存储后端的分页逻辑一致性
- 处理边界情况(空结果、超出页码范围等)
- 优化查询性能,避免全表扫描
### 步骤3API路由层改造
**改动文件**`lightrag/api/routers/document_routes.py`
**新增接口**
1. `POST /documents/paginated` - 分页查询文档
2. `GET /documents/status_counts` - 获取状态计数
**数据模型设计**
- DocumentsRequest分页请求参数
- PaginatedDocsResponse分页响应数据
- PaginationInfo分页元信息
**关键逻辑**
- 参数验证(页码范围、页面大小限制)
- 并行查询分页数据和状态计数
- 错误处理和异常响应
### 步骤4数据库优化
**索引策略**
- 为workspace + status + updated_at创建复合索引
- 为workspace + status + created_at创建复合索引
- 为workspace + updated_at创建索引
- 为workspace + created_at创建索引
**性能考虑**
- 避免深度分页的性能问题
- 考虑添加缓存层优化状态计数查询
- 监控查询性能,必要时调整索引策略
## 四、前端改造步骤
### 步骤1API客户端扩展
**改动文件**`lightrag_webui/src/api/lightrag.ts`
**新增函数**
- `getDocumentsPaginated()` - 分页查询文档
- `getDocumentStatusCounts()` - 获取状态计数
**类型定义**
- 定义分页请求和响应的TypeScript类型
- 确保类型安全和代码提示
### 步骤2分页控制组件开发
**新增文件**`lightrag_webui/src/components/ui/PaginationControls.tsx`
**组件功能**
- 支持紧凑模式和完整模式
- 页码输入和跳转功能
- 每页显示数量选择10-200
- 总数信息显示
- 禁用状态处理
**设计要点**
- 响应式设计,适配不同屏幕尺寸
- 防抖处理,避免频繁请求
- 错误处理和状态回滚
- 组件摆放位置目前状态按钮上方与scan按钮同一层居中摆放
### 步骤3状态过滤按钮优化
**改动文件**:现有状态过滤相关组件
**优化要点**
- 添加加载状态指示
- 数据不足时的智能提示
- 定期刷新数据状态切换时如果最先的状态数据距离上次刷新数据超过5秒应即时刷新数据
- 防止重复点击和并发请求
### 步骤4主组件DocumentManager改造
**改动文件**`lightrag_webui/src/features/DocumentManager.tsx`
**核心改动**
**状态管理重构**
- 将docs状态改为currentPageDocs仅存储当前页数据
- 添加pagination状态管理分页信息
- 添加statusCounts状态独立管理状态计数
- 添加加载状态管理isStatusChanging, isRefreshing
**数据获取策略**
- 实现智能刷新:活跃期完整刷新,稳定期轻量刷新
- 状态切换时立即刷新数据
- 分页操作时立即更新数据
- 定期刷新与手动操作协调
**布局调整**
- 将分页控制组件放置在顶部操作栏中间位置
- 保持状态过滤按钮在表格上方
- 确保响应式布局适配
**事件处理优化**
- 状态切换时,如果当前页码数据不足,则重置到第一页
- 页面大小变更时智能计算新页码
- 错误时状态回滚机制
## 五、用户体验优化
### 即时反馈机制
- 状态切换时显示加载动画
- 分页操作时提供视觉反馈
- 数据不足时智能提示用户
### 错误处理策略
- 网络错误时自动重试
- 操作失败时状态回滚
- 友好的错误提示信息
### 性能优化措施
- 防抖处理频繁操作
- 智能刷新策略减少不必要请求
- 组件卸载时清理定时器和请求
## 六、兼容性保障
### 向后兼容
- 保留原有的/documents接口作为备用
- 现有功能(排序、过滤、选择)保持不变
- 渐进式升级,支持配置开关
### 数据一致性
- 确保分页数据与状态计数同步
- 处理并发更新的数据一致性问题
- 定期刷新保持数据最新
## 七、测试策略
### 功能测试
- 各种分页场景测试
- 状态过滤组合测试
- 排序功能验证
- 边界条件测试
### 性能测试
- 大数据量场景测试
- 并发访问压力测试
- 内存使用情况监控
- 响应时间测试
### 兼容性测试
- 不同存储后端测试
- 不同浏览器兼容性
- 移动端响应式测试
## 八、关键实现细节
### 后端分页查询设计
- **统一接口**:所有存储后端实现相同的分页接口签名
- **参数验证**:严格验证页码、页面大小、排序参数的合法性
- **性能优化**:使用数据库原生分页功能,避免应用层分页
- **错误处理**:统一的错误响应格式和异常处理机制
### 前端状态管理策略
- **数据分离**:当前页数据与状态计数分别管理
- **智能刷新**:根据文档处理状态选择刷新策略
- **状态同步**确保UI状态与后端数据保持一致
- **错误恢复**:操作失败时自动回滚到之前状态
### 分页控制组件设计
- **紧凑布局**:适配顶部操作栏的空间限制
- **响应式设计**:在不同屏幕尺寸下自适应布局
- **交互优化**:防抖处理、加载状态、禁用状态管理
- **可访问性**:支持键盘导航和屏幕阅读器
### 数据库索引优化
- **复合索引**workspace + status + sort_field的组合索引
- **覆盖索引**:尽可能使用覆盖索引减少回表查询
- **索引监控**:定期监控索引使用情况和查询性能
- **渐进优化**:根据实际使用情况调整索引策略

View File

@@ -23,7 +23,6 @@ classifiers = [
dependencies = [
"aiohttp",
"configparser",
"future",
"google-api-core>=2.0.0,<3.0.0",
"google-genai>=1.0.0,<2.0.0",
"json_repair",
@@ -47,18 +46,18 @@ pytest = [
"pytest>=8.4.2",
"pytest-asyncio>=1.2.0",
"pre-commit",
"ruff",
]
api = [
# Core dependencies
"aiohttp",
"configparser",
"future",
"json_repair",
"nano-vectordb",
"networkx",
"numpy>=1.24.0,<2.0.0",
"openai>=1.0.0,<3.0.0",
"openai>=2.0.0,<3.0.0",
"pandas>=2.0.0,<2.4.0",
"pipmaster",
"pydantic",
@@ -77,9 +76,9 @@ api = [
"distro",
"fastapi",
"httpcore",
"httpx",
"httpx>=0.28.1",
"jiter",
"passlib[bcrypt]",
"bcrypt>=4.0.0",
"psutil",
"PyJWT>=2.8.0,<3.0.0",
"python-jose[cryptography]",
@@ -115,7 +114,7 @@ offline-storage = [
offline-llm = [
# LLM provider dependencies
"openai>=1.0.0,<3.0.0",
"openai>=2.0.0,<3.0.0",
"anthropic>=0.18.0,<1.0.0",
"ollama>=0.1.0,<1.0.0",
"zhipuai>=2.0.0,<3.0.0",
@@ -131,15 +130,18 @@ offline = [
"lightrag-hku[api,offline-storage,offline-llm]",
]
evaluation = [
# Test framework dependencies (for evaluation)
test = [
"lightrag-hku[api]",
"pytest>=8.4.2",
"pytest-asyncio>=1.2.0",
"pre-commit",
# RAG evaluation dependencies (RAGAS framework)
"ruff",
]
evaluation = [
"lightrag-hku[api]",
"ragas>=0.3.7",
"datasets>=4.3.0",
"httpx>=0.28.1",
]
observability = [
@@ -151,6 +153,7 @@ observability = [
lightrag-server = "lightrag.api.lightrag_server:main"
lightrag-gunicorn = "lightrag.api.run_with_gunicorn:main"
lightrag-download-cache = "lightrag.tools.download_cache:main"
lightrag-clean-llmqc = "lightrag.tools.clean_llm_query_cache:main"
[project.urls]
Homepage = "https://github.com/HKUDS/LightRAG"

View File

@@ -14,6 +14,6 @@ google-api-core>=2.0.0,<3.0.0
google-genai>=1.0.0,<2.0.0
llama-index>=0.9.0,<1.0.0
ollama>=0.1.0,<1.0.0
openai>=1.0.0,<3.0.0
openai>=2.0.0,<3.0.0
voyageai>=0.2.0,<1.0.0
zhipuai>=2.0.0,<3.0.0

View File

@@ -19,7 +19,7 @@ google-genai>=1.0.0,<2.0.0
llama-index>=0.9.0,<1.0.0
neo4j>=5.0.0,<7.0.0
ollama>=0.1.0,<1.0.0
openai>=1.0.0,<3.0.0
openai>=2.0.0,<3.0.0
openpyxl>=3.0.0,<4.0.0
pycryptodome>=3.0.0,<4.0.0
pymilvus>=2.6.2,<3.0.0

1066
tests/test_chunking.py Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,113 @@
"""
Test for overlap_tokens validation to prevent infinite loop.
This test validates the fix for the bug where overlap_tokens >= max_tokens
causes an infinite loop in the chunking function.
"""
from lightrag.rerank import chunk_documents_for_rerank
class TestOverlapValidation:
"""Test suite for overlap_tokens validation"""
def test_overlap_greater_than_max_tokens(self):
"""Test that overlap_tokens > max_tokens is clamped and doesn't hang"""
documents = [" ".join([f"word{i}" for i in range(100)])]
# This should clamp overlap_tokens to 29 (max_tokens - 1)
chunked_docs, doc_indices = chunk_documents_for_rerank(
documents, max_tokens=30, overlap_tokens=32
)
# Should complete without hanging
assert len(chunked_docs) > 0
assert all(idx == 0 for idx in doc_indices)
def test_overlap_equal_to_max_tokens(self):
"""Test that overlap_tokens == max_tokens is clamped and doesn't hang"""
documents = [" ".join([f"word{i}" for i in range(100)])]
# This should clamp overlap_tokens to 29 (max_tokens - 1)
chunked_docs, doc_indices = chunk_documents_for_rerank(
documents, max_tokens=30, overlap_tokens=30
)
# Should complete without hanging
assert len(chunked_docs) > 0
assert all(idx == 0 for idx in doc_indices)
def test_overlap_slightly_less_than_max_tokens(self):
"""Test that overlap_tokens < max_tokens works normally"""
documents = [" ".join([f"word{i}" for i in range(100)])]
# This should work without clamping
chunked_docs, doc_indices = chunk_documents_for_rerank(
documents, max_tokens=30, overlap_tokens=29
)
# Should complete successfully
assert len(chunked_docs) > 0
assert all(idx == 0 for idx in doc_indices)
def test_small_max_tokens_with_large_overlap(self):
"""Test edge case with very small max_tokens"""
documents = [" ".join([f"word{i}" for i in range(50)])]
# max_tokens=5, overlap_tokens=10 should clamp to 4
chunked_docs, doc_indices = chunk_documents_for_rerank(
documents, max_tokens=5, overlap_tokens=10
)
# Should complete without hanging
assert len(chunked_docs) > 0
assert all(idx == 0 for idx in doc_indices)
def test_multiple_documents_with_invalid_overlap(self):
"""Test multiple documents with overlap_tokens >= max_tokens"""
documents = [
" ".join([f"word{i}" for i in range(50)]),
"short document",
" ".join([f"word{i}" for i in range(75)]),
]
# overlap_tokens > max_tokens
chunked_docs, doc_indices = chunk_documents_for_rerank(
documents, max_tokens=25, overlap_tokens=30
)
# Should complete successfully and chunk the long documents
assert len(chunked_docs) >= len(documents)
# Short document should not be chunked
assert "short document" in chunked_docs
def test_normal_operation_unaffected(self):
"""Test that normal cases continue to work correctly"""
documents = [
" ".join([f"word{i}" for i in range(100)]),
"short doc",
]
# Normal case: overlap_tokens (10) < max_tokens (50)
chunked_docs, doc_indices = chunk_documents_for_rerank(
documents, max_tokens=50, overlap_tokens=10
)
# Long document should be chunked, short one should not
assert len(chunked_docs) > 2 # At least 3 chunks (2 from long doc + 1 short)
assert "short doc" in chunked_docs
# Verify doc_indices maps correctly
assert doc_indices[-1] == 1 # Last chunk is from second document
def test_edge_case_max_tokens_one(self):
"""Test edge case where max_tokens=1"""
documents = [" ".join([f"word{i}" for i in range(20)])]
# max_tokens=1, overlap_tokens=5 should clamp to 0
chunked_docs, doc_indices = chunk_documents_for_rerank(
documents, max_tokens=1, overlap_tokens=5
)
# Should complete without hanging
assert len(chunked_docs) > 0
assert all(idx == 0 for idx in doc_indices)

View File

@@ -0,0 +1,564 @@
"""
Unit tests for rerank document chunking functionality.
Tests the chunk_documents_for_rerank and aggregate_chunk_scores functions
in lightrag/rerank.py to ensure proper document splitting and score aggregation.
"""
import pytest
from unittest.mock import Mock, patch, AsyncMock
from lightrag.rerank import (
chunk_documents_for_rerank,
aggregate_chunk_scores,
cohere_rerank,
)
class TestChunkDocumentsForRerank:
"""Test suite for chunk_documents_for_rerank function"""
def test_no_chunking_needed_for_short_docs(self):
"""Documents shorter than max_tokens should not be chunked"""
documents = [
"Short doc 1",
"Short doc 2",
"Short doc 3",
]
chunked_docs, doc_indices = chunk_documents_for_rerank(
documents, max_tokens=100, overlap_tokens=10
)
# No chunking should occur
assert len(chunked_docs) == 3
assert chunked_docs == documents
assert doc_indices == [0, 1, 2]
def test_chunking_with_character_fallback(self):
"""Test chunking falls back to character-based when tokenizer unavailable"""
# Create a very long document that exceeds character limit
long_doc = "a" * 2000 # 2000 characters
documents = [long_doc, "short doc"]
with patch("lightrag.utils.TiktokenTokenizer", side_effect=ImportError):
chunked_docs, doc_indices = chunk_documents_for_rerank(
documents,
max_tokens=100, # 100 tokens = ~400 chars
overlap_tokens=10, # 10 tokens = ~40 chars
)
# First doc should be split into chunks, second doc stays whole
assert len(chunked_docs) > 2 # At least one chunk from first doc + second doc
assert chunked_docs[-1] == "short doc" # Last chunk is the short doc
# Verify doc_indices maps chunks to correct original document
assert doc_indices[-1] == 1 # Last chunk maps to document 1
def test_chunking_with_tiktoken_tokenizer(self):
"""Test chunking with actual tokenizer"""
# Create document with known token count
# Approximate: "word " = ~1 token, so 200 words ~ 200 tokens
long_doc = " ".join([f"word{i}" for i in range(200)])
documents = [long_doc, "short"]
chunked_docs, doc_indices = chunk_documents_for_rerank(
documents, max_tokens=50, overlap_tokens=10
)
# Long doc should be split, short doc should remain
assert len(chunked_docs) > 2
assert doc_indices[-1] == 1 # Last chunk is from second document
# Verify overlapping chunks contain overlapping content
if len(chunked_docs) > 2:
# Check that consecutive chunks from same doc have some overlap
for i in range(len(doc_indices) - 1):
if doc_indices[i] == doc_indices[i + 1] == 0:
# Both chunks from first doc, should have overlap
chunk1_words = chunked_docs[i].split()
chunk2_words = chunked_docs[i + 1].split()
# At least one word should be common due to overlap
assert any(word in chunk2_words for word in chunk1_words[-5:])
def test_empty_documents(self):
"""Test handling of empty document list"""
documents = []
chunked_docs, doc_indices = chunk_documents_for_rerank(documents)
assert chunked_docs == []
assert doc_indices == []
def test_single_document_chunking(self):
"""Test chunking of a single long document"""
# Create document with ~100 tokens
long_doc = " ".join([f"token{i}" for i in range(100)])
documents = [long_doc]
chunked_docs, doc_indices = chunk_documents_for_rerank(
documents, max_tokens=30, overlap_tokens=5
)
# Should create multiple chunks
assert len(chunked_docs) > 1
# All chunks should map to document 0
assert all(idx == 0 for idx in doc_indices)
class TestAggregateChunkScores:
"""Test suite for aggregate_chunk_scores function"""
def test_no_chunking_simple_aggregation(self):
"""Test aggregation when no chunking occurred (1:1 mapping)"""
chunk_results = [
{"index": 0, "relevance_score": 0.9},
{"index": 1, "relevance_score": 0.7},
{"index": 2, "relevance_score": 0.5},
]
doc_indices = [0, 1, 2] # 1:1 mapping
num_original_docs = 3
aggregated = aggregate_chunk_scores(
chunk_results, doc_indices, num_original_docs, aggregation="max"
)
# Results should be sorted by score
assert len(aggregated) == 3
assert aggregated[0]["index"] == 0
assert aggregated[0]["relevance_score"] == 0.9
assert aggregated[1]["index"] == 1
assert aggregated[1]["relevance_score"] == 0.7
assert aggregated[2]["index"] == 2
assert aggregated[2]["relevance_score"] == 0.5
def test_max_aggregation_with_chunks(self):
"""Test max aggregation strategy with multiple chunks per document"""
# 5 chunks: first 3 from doc 0, last 2 from doc 1
chunk_results = [
{"index": 0, "relevance_score": 0.5},
{"index": 1, "relevance_score": 0.8},
{"index": 2, "relevance_score": 0.6},
{"index": 3, "relevance_score": 0.7},
{"index": 4, "relevance_score": 0.4},
]
doc_indices = [0, 0, 0, 1, 1]
num_original_docs = 2
aggregated = aggregate_chunk_scores(
chunk_results, doc_indices, num_original_docs, aggregation="max"
)
# Should take max score for each document
assert len(aggregated) == 2
assert aggregated[0]["index"] == 0
assert aggregated[0]["relevance_score"] == 0.8 # max of 0.5, 0.8, 0.6
assert aggregated[1]["index"] == 1
assert aggregated[1]["relevance_score"] == 0.7 # max of 0.7, 0.4
def test_mean_aggregation_with_chunks(self):
"""Test mean aggregation strategy"""
chunk_results = [
{"index": 0, "relevance_score": 0.6},
{"index": 1, "relevance_score": 0.8},
{"index": 2, "relevance_score": 0.4},
]
doc_indices = [0, 0, 1] # First two chunks from doc 0, last from doc 1
num_original_docs = 2
aggregated = aggregate_chunk_scores(
chunk_results, doc_indices, num_original_docs, aggregation="mean"
)
assert len(aggregated) == 2
assert aggregated[0]["index"] == 0
assert aggregated[0]["relevance_score"] == pytest.approx(0.7) # (0.6 + 0.8) / 2
assert aggregated[1]["index"] == 1
assert aggregated[1]["relevance_score"] == 0.4
def test_first_aggregation_with_chunks(self):
"""Test first aggregation strategy"""
chunk_results = [
{"index": 0, "relevance_score": 0.6},
{"index": 1, "relevance_score": 0.8},
{"index": 2, "relevance_score": 0.4},
]
doc_indices = [0, 0, 1]
num_original_docs = 2
aggregated = aggregate_chunk_scores(
chunk_results, doc_indices, num_original_docs, aggregation="first"
)
assert len(aggregated) == 2
# First should use first score seen for each doc
assert aggregated[0]["index"] == 0
assert aggregated[0]["relevance_score"] == 0.6 # First score for doc 0
assert aggregated[1]["index"] == 1
assert aggregated[1]["relevance_score"] == 0.4
def test_empty_chunk_results(self):
"""Test handling of empty results"""
aggregated = aggregate_chunk_scores([], [], 3, aggregation="max")
assert aggregated == []
def test_documents_with_no_scores(self):
"""Test when some documents have no chunks/scores"""
chunk_results = [
{"index": 0, "relevance_score": 0.9},
{"index": 1, "relevance_score": 0.7},
]
doc_indices = [0, 0] # Both chunks from document 0
num_original_docs = 3 # But we have 3 documents total
aggregated = aggregate_chunk_scores(
chunk_results, doc_indices, num_original_docs, aggregation="max"
)
# Only doc 0 should appear in results
assert len(aggregated) == 1
assert aggregated[0]["index"] == 0
def test_unknown_aggregation_strategy(self):
"""Test that unknown strategy falls back to max"""
chunk_results = [
{"index": 0, "relevance_score": 0.6},
{"index": 1, "relevance_score": 0.8},
]
doc_indices = [0, 0]
num_original_docs = 1
# Use invalid strategy
aggregated = aggregate_chunk_scores(
chunk_results, doc_indices, num_original_docs, aggregation="invalid"
)
# Should fall back to max
assert aggregated[0]["relevance_score"] == 0.8
@pytest.mark.offline
class TestTopNWithChunking:
"""Tests for top_n behavior when chunking is enabled (Bug fix verification)"""
@pytest.mark.asyncio
async def test_top_n_limits_documents_not_chunks(self):
"""
Test that top_n correctly limits documents (not chunks) when chunking is enabled.
Bug scenario: 10 docs expand to 50 chunks. With old behavior, top_n=5 would
return scores for only 5 chunks (possibly all from 1-2 docs). After aggregation,
fewer than 5 documents would be returned.
Fixed behavior: top_n=5 should return exactly 5 documents after aggregation.
"""
# Setup: 5 documents, each producing multiple chunks when chunked
# Using small max_tokens to force chunking
long_docs = [" ".join([f"doc{i}_word{j}" for j in range(50)]) for i in range(5)]
query = "test query"
# First, determine how many chunks will be created by actual chunking
_, doc_indices = chunk_documents_for_rerank(
long_docs, max_tokens=50, overlap_tokens=10
)
num_chunks = len(doc_indices)
# Mock API returns scores for ALL chunks (simulating disabled API-level top_n)
# Give different scores to ensure doc 0 gets highest, doc 1 second, etc.
# Assign scores based on original document index (lower doc index = higher score)
mock_chunk_scores = []
for i in range(num_chunks):
original_doc = doc_indices[i]
# Higher score for lower doc index, with small variation per chunk
base_score = 0.9 - (original_doc * 0.1)
mock_chunk_scores.append({"index": i, "relevance_score": base_score})
mock_response = Mock()
mock_response.status = 200
mock_response.json = AsyncMock(return_value={"results": mock_chunk_scores})
mock_response.request_info = None
mock_response.history = None
mock_response.headers = {}
mock_response.__aenter__ = AsyncMock(return_value=mock_response)
mock_response.__aexit__ = AsyncMock(return_value=None)
mock_session = Mock()
mock_session.post = Mock(return_value=mock_response)
mock_session.__aenter__ = AsyncMock(return_value=mock_session)
mock_session.__aexit__ = AsyncMock(return_value=None)
with patch("lightrag.rerank.aiohttp.ClientSession", return_value=mock_session):
result = await cohere_rerank(
query=query,
documents=long_docs,
api_key="test-key",
base_url="http://test.com/rerank",
enable_chunking=True,
max_tokens_per_doc=50, # Match chunking above
top_n=3, # Request top 3 documents
)
# Verify: should get exactly 3 documents (not unlimited chunks)
assert len(result) == 3
# All results should have valid document indices (0-4)
assert all(0 <= r["index"] < 5 for r in result)
# Results should be sorted by score (descending)
assert all(
result[i]["relevance_score"] >= result[i + 1]["relevance_score"]
for i in range(len(result) - 1)
)
# The top 3 docs should be 0, 1, 2 (highest scores)
result_indices = [r["index"] for r in result]
assert set(result_indices) == {0, 1, 2}
@pytest.mark.asyncio
async def test_api_receives_no_top_n_when_chunking_enabled(self):
"""
Test that the API request does NOT include top_n when chunking is enabled.
This ensures all chunk scores are retrieved for proper aggregation.
"""
documents = [" ".join([f"word{i}" for i in range(100)]), "short doc"]
query = "test query"
captured_payload = {}
mock_response = Mock()
mock_response.status = 200
mock_response.json = AsyncMock(
return_value={
"results": [
{"index": 0, "relevance_score": 0.9},
{"index": 1, "relevance_score": 0.8},
{"index": 2, "relevance_score": 0.7},
]
}
)
mock_response.request_info = None
mock_response.history = None
mock_response.headers = {}
mock_response.__aenter__ = AsyncMock(return_value=mock_response)
mock_response.__aexit__ = AsyncMock(return_value=None)
def capture_post(*args, **kwargs):
captured_payload.update(kwargs.get("json", {}))
return mock_response
mock_session = Mock()
mock_session.post = Mock(side_effect=capture_post)
mock_session.__aenter__ = AsyncMock(return_value=mock_session)
mock_session.__aexit__ = AsyncMock(return_value=None)
with patch("lightrag.rerank.aiohttp.ClientSession", return_value=mock_session):
await cohere_rerank(
query=query,
documents=documents,
api_key="test-key",
base_url="http://test.com/rerank",
enable_chunking=True,
max_tokens_per_doc=30,
top_n=1, # User wants top 1 document
)
# Verify: API payload should NOT have top_n (disabled for chunking)
assert "top_n" not in captured_payload
@pytest.mark.asyncio
async def test_top_n_not_modified_when_chunking_disabled(self):
"""
Test that top_n is passed through to API when chunking is disabled.
"""
documents = ["doc1", "doc2"]
query = "test query"
captured_payload = {}
mock_response = Mock()
mock_response.status = 200
mock_response.json = AsyncMock(
return_value={
"results": [
{"index": 0, "relevance_score": 0.9},
]
}
)
mock_response.request_info = None
mock_response.history = None
mock_response.headers = {}
mock_response.__aenter__ = AsyncMock(return_value=mock_response)
mock_response.__aexit__ = AsyncMock(return_value=None)
def capture_post(*args, **kwargs):
captured_payload.update(kwargs.get("json", {}))
return mock_response
mock_session = Mock()
mock_session.post = Mock(side_effect=capture_post)
mock_session.__aenter__ = AsyncMock(return_value=mock_session)
mock_session.__aexit__ = AsyncMock(return_value=None)
with patch("lightrag.rerank.aiohttp.ClientSession", return_value=mock_session):
await cohere_rerank(
query=query,
documents=documents,
api_key="test-key",
base_url="http://test.com/rerank",
enable_chunking=False, # Chunking disabled
top_n=1,
)
# Verify: API payload should have top_n when chunking is disabled
assert captured_payload.get("top_n") == 1
@pytest.mark.offline
class TestCohereRerankChunking:
"""Integration tests for cohere_rerank with chunking enabled"""
@pytest.mark.asyncio
async def test_cohere_rerank_with_chunking_disabled(self):
"""Test that chunking can be disabled"""
documents = ["doc1", "doc2"]
query = "test query"
# Mock the generic_rerank_api
with patch(
"lightrag.rerank.generic_rerank_api", new_callable=AsyncMock
) as mock_api:
mock_api.return_value = [
{"index": 0, "relevance_score": 0.9},
{"index": 1, "relevance_score": 0.7},
]
result = await cohere_rerank(
query=query,
documents=documents,
api_key="test-key",
enable_chunking=False,
max_tokens_per_doc=100,
)
# Verify generic_rerank_api was called with correct parameters
mock_api.assert_called_once()
call_kwargs = mock_api.call_args[1]
assert call_kwargs["enable_chunking"] is False
assert call_kwargs["max_tokens_per_doc"] == 100
# Result should mirror mocked scores
assert len(result) == 2
assert result[0]["index"] == 0
assert result[0]["relevance_score"] == 0.9
assert result[1]["index"] == 1
assert result[1]["relevance_score"] == 0.7
@pytest.mark.asyncio
async def test_cohere_rerank_with_chunking_enabled(self):
"""Test that chunking parameters are passed through"""
documents = ["doc1", "doc2"]
query = "test query"
with patch(
"lightrag.rerank.generic_rerank_api", new_callable=AsyncMock
) as mock_api:
mock_api.return_value = [
{"index": 0, "relevance_score": 0.9},
{"index": 1, "relevance_score": 0.7},
]
result = await cohere_rerank(
query=query,
documents=documents,
api_key="test-key",
enable_chunking=True,
max_tokens_per_doc=480,
)
# Verify parameters were passed
call_kwargs = mock_api.call_args[1]
assert call_kwargs["enable_chunking"] is True
assert call_kwargs["max_tokens_per_doc"] == 480
# Result should mirror mocked scores
assert len(result) == 2
assert result[0]["index"] == 0
assert result[0]["relevance_score"] == 0.9
assert result[1]["index"] == 1
assert result[1]["relevance_score"] == 0.7
@pytest.mark.asyncio
async def test_cohere_rerank_default_parameters(self):
"""Test default parameter values for cohere_rerank"""
documents = ["doc1"]
query = "test"
with patch(
"lightrag.rerank.generic_rerank_api", new_callable=AsyncMock
) as mock_api:
mock_api.return_value = [{"index": 0, "relevance_score": 0.9}]
result = await cohere_rerank(
query=query, documents=documents, api_key="test-key"
)
# Verify default values
call_kwargs = mock_api.call_args[1]
assert call_kwargs["enable_chunking"] is False
assert call_kwargs["max_tokens_per_doc"] == 4096
assert call_kwargs["model"] == "rerank-v3.5"
# Result should mirror mocked scores
assert len(result) == 1
assert result[0]["index"] == 0
assert result[0]["relevance_score"] == 0.9
@pytest.mark.offline
class TestEndToEndChunking:
"""End-to-end tests for chunking workflow"""
@pytest.mark.asyncio
async def test_end_to_end_chunking_workflow(self):
"""Test complete chunking workflow from documents to aggregated results"""
# Create documents where first one needs chunking
long_doc = " ".join([f"word{i}" for i in range(100)])
documents = [long_doc, "short doc"]
query = "test query"
# Mock the HTTP call inside generic_rerank_api
mock_response = Mock()
mock_response.status = 200
mock_response.json = AsyncMock(
return_value={
"results": [
{"index": 0, "relevance_score": 0.5}, # chunk 0 from doc 0
{"index": 1, "relevance_score": 0.8}, # chunk 1 from doc 0
{"index": 2, "relevance_score": 0.6}, # chunk 2 from doc 0
{"index": 3, "relevance_score": 0.7}, # doc 1 (short)
]
}
)
mock_response.request_info = None
mock_response.history = None
mock_response.headers = {}
# Make mock_response an async context manager (for `async with session.post() as response`)
mock_response.__aenter__ = AsyncMock(return_value=mock_response)
mock_response.__aexit__ = AsyncMock(return_value=None)
mock_session = Mock()
# session.post() returns an async context manager, so return mock_response which is now one
mock_session.post = Mock(return_value=mock_response)
mock_session.__aenter__ = AsyncMock(return_value=mock_session)
mock_session.__aexit__ = AsyncMock(return_value=None)
with patch("lightrag.rerank.aiohttp.ClientSession", return_value=mock_session):
result = await cohere_rerank(
query=query,
documents=documents,
api_key="test-key",
base_url="http://test.com/rerank",
enable_chunking=True,
max_tokens_per_doc=30, # Force chunking of long doc
)
# Should get 2 results (one per original document)
# The long doc's chunks should be aggregated
assert len(result) <= len(documents)
# Results should be sorted by score
assert all(
result[i]["relevance_score"] >= result[i + 1]["relevance_score"]
for i in range(len(result) - 1)
)

72
uv.lock generated
View File

@@ -1334,15 +1334,6 @@ http = [
{ name = "aiohttp" },
]
[[package]]
name = "future"
version = "1.0.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/a7/b2/4140c69c6a66432916b26158687e821ba631a4c9273c474343badf84d3ba/future-1.0.0.tar.gz", hash = "sha256:bd2968309307861edae1458a4f8a4f3598c03be43b97521076aebf5d94c07b05", size = 1228490, upload-time = "2024-02-21T11:52:38.461Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/da/71/ae30dadffc90b9006d77af76b393cb9dfbfc9629f339fc1574a1c52e6806/future-1.0.0-py3-none-any.whl", hash = "sha256:929292d34f5872e70396626ef385ec22355a1fae8ad29e1a734c3e43f9fbc216", size = 491326, upload-time = "2024-02-21T11:52:35.956Z" },
]
[[package]]
name = "gitdb"
version = "4.0.12"
@@ -2542,7 +2533,6 @@ source = { editable = "." }
dependencies = [
{ name = "aiohttp" },
{ name = "configparser" },
{ name = "future" },
{ name = "google-api-core" },
{ name = "google-genai" },
{ name = "json-repair" },
@@ -2567,10 +2557,10 @@ api = [
{ name = "aiohttp" },
{ name = "ascii-colors" },
{ name = "asyncpg" },
{ name = "bcrypt" },
{ name = "configparser" },
{ name = "distro" },
{ name = "fastapi" },
{ name = "future" },
{ name = "google-api-core" },
{ name = "google-genai" },
{ name = "gunicorn" },
@@ -2585,7 +2575,6 @@ api = [
{ name = "openai" },
{ name = "openpyxl" },
{ name = "pandas" },
{ name = "passlib", extra = ["bcrypt"] },
{ name = "pipmaster" },
{ name = "psutil" },
{ name = "pycryptodome" },
@@ -2615,6 +2604,7 @@ evaluation = [
{ name = "pytest" },
{ name = "pytest-asyncio" },
{ name = "ragas" },
{ name = "ruff" },
]
observability = [
{ name = "langfuse" },
@@ -2626,10 +2616,10 @@ offline = [
{ name = "anthropic" },
{ name = "ascii-colors" },
{ name = "asyncpg" },
{ name = "bcrypt" },
{ name = "configparser" },
{ name = "distro" },
{ name = "fastapi" },
{ name = "future" },
{ name = "google-api-core" },
{ name = "google-genai" },
{ name = "gunicorn" },
@@ -2647,7 +2637,6 @@ offline = [
{ name = "openai" },
{ name = "openpyxl" },
{ name = "pandas" },
{ name = "passlib", extra = ["bcrypt"] },
{ name = "pipmaster" },
{ name = "psutil" },
{ name = "pycryptodome" },
@@ -2700,6 +2689,7 @@ pytest = [
{ name = "pre-commit" },
{ name = "pytest" },
{ name = "pytest-asyncio" },
{ name = "ruff" },
]
[package.metadata]
@@ -2712,14 +2702,13 @@ requires-dist = [
{ name = "ascii-colors", marker = "extra == 'api'" },
{ name = "asyncpg", marker = "extra == 'api'" },
{ name = "asyncpg", marker = "extra == 'offline-storage'", specifier = ">=0.29.0,<1.0.0" },
{ name = "bcrypt", marker = "extra == 'api'", specifier = ">=4.0.0" },
{ name = "configparser" },
{ name = "configparser", marker = "extra == 'api'" },
{ name = "datasets", marker = "extra == 'evaluation'", specifier = ">=4.3.0" },
{ name = "distro", marker = "extra == 'api'" },
{ name = "docling", marker = "sys_platform != 'darwin' and extra == 'docling'", specifier = ">=2.0.0,<3.0.0" },
{ name = "fastapi", marker = "extra == 'api'" },
{ name = "future" },
{ name = "future", marker = "extra == 'api'" },
{ name = "google-api-core", specifier = ">=2.0.0,<3.0.0" },
{ name = "google-api-core", marker = "extra == 'api'", specifier = ">=2.0.0,<3.0.0" },
{ name = "google-api-core", marker = "extra == 'offline-llm'", specifier = ">=2.0.0,<3.0.0" },
@@ -2735,7 +2724,6 @@ requires-dist = [
{ name = "json-repair", marker = "extra == 'api'" },
{ name = "langfuse", marker = "extra == 'observability'", specifier = ">=3.8.1" },
{ name = "lightrag-hku", extras = ["api", "offline-llm", "offline-storage"], marker = "extra == 'offline'" },
{ name = "lightrag-hku", extras = ["pytest"], marker = "extra == 'evaluation'" },
{ name = "llama-index", marker = "extra == 'offline-llm'", specifier = ">=0.9.0,<1.0.0" },
{ name = "nano-vectordb" },
{ name = "nano-vectordb", marker = "extra == 'api'" },
@@ -2745,14 +2733,14 @@ requires-dist = [
{ name = "numpy", specifier = ">=1.24.0,<2.0.0" },
{ name = "numpy", marker = "extra == 'api'", specifier = ">=1.24.0,<2.0.0" },
{ name = "ollama", marker = "extra == 'offline-llm'", specifier = ">=0.1.0,<1.0.0" },
{ name = "openai", marker = "extra == 'api'", specifier = ">=1.0.0,<3.0.0" },
{ name = "openai", marker = "extra == 'offline-llm'", specifier = ">=1.0.0,<3.0.0" },
{ name = "openai", marker = "extra == 'api'", specifier = ">=2.0.0,<3.0.0" },
{ name = "openai", marker = "extra == 'offline-llm'", specifier = ">=2.0.0,<3.0.0" },
{ name = "openpyxl", marker = "extra == 'api'", specifier = ">=3.0.0,<4.0.0" },
{ name = "pandas", specifier = ">=2.0.0,<2.4.0" },
{ name = "pandas", marker = "extra == 'api'", specifier = ">=2.0.0,<2.4.0" },
{ name = "passlib", extras = ["bcrypt"], marker = "extra == 'api'" },
{ name = "pipmaster" },
{ name = "pipmaster", marker = "extra == 'api'" },
{ name = "pre-commit", marker = "extra == 'evaluation'" },
{ name = "pre-commit", marker = "extra == 'pytest'" },
{ name = "psutil", marker = "extra == 'api'" },
{ name = "pycryptodome", marker = "extra == 'api'", specifier = ">=3.0.0,<4.0.0" },
@@ -2764,7 +2752,9 @@ requires-dist = [
{ name = "pypdf", marker = "extra == 'api'", specifier = ">=6.1.0" },
{ name = "pypinyin" },
{ name = "pypinyin", marker = "extra == 'api'" },
{ name = "pytest", marker = "extra == 'evaluation'", specifier = ">=8.4.2" },
{ name = "pytest", marker = "extra == 'pytest'", specifier = ">=8.4.2" },
{ name = "pytest-asyncio", marker = "extra == 'evaluation'", specifier = ">=1.2.0" },
{ name = "pytest-asyncio", marker = "extra == 'pytest'", specifier = ">=1.2.0" },
{ name = "python-docx", marker = "extra == 'api'", specifier = ">=0.8.11,<2.0.0" },
{ name = "python-dotenv" },
@@ -2776,6 +2766,8 @@ requires-dist = [
{ name = "qdrant-client", marker = "extra == 'offline-storage'", specifier = ">=1.11.0,<2.0.0" },
{ name = "ragas", marker = "extra == 'evaluation'", specifier = ">=0.3.7" },
{ name = "redis", marker = "extra == 'offline-storage'", specifier = ">=5.0.0,<8.0.0" },
{ name = "ruff", marker = "extra == 'evaluation'" },
{ name = "ruff", marker = "extra == 'pytest'" },
{ name = "setuptools" },
{ name = "setuptools", marker = "extra == 'api'" },
{ name = "tenacity" },
@@ -4104,20 +4096,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/ab/5f/b38085618b950b79d2d9164a711c52b10aefc0ae6833b96f626b7021b2ed/pandas-2.2.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ad5b65698ab28ed8d7f18790a0dc58005c7629f227be9ecc1072aa74c0c1d43a", size = 13098436, upload-time = "2024-09-20T13:09:48.112Z" },
]
[[package]]
name = "passlib"
version = "1.7.4"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/b6/06/9da9ee59a67fae7761aab3ccc84fa4f3f33f125b370f1ccdb915bf967c11/passlib-1.7.4.tar.gz", hash = "sha256:defd50f72b65c5402ab2c573830a6978e5f202ad0d984793c8dde2c4152ebe04", size = 689844, upload-time = "2020-10-08T19:00:52.121Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/3b/a4/ab6b7589382ca3df236e03faa71deac88cae040af60c071a78d254a62172/passlib-1.7.4-py2.py3-none-any.whl", hash = "sha256:aa6bca462b8d8bda89c70b382f0c298a20b5560af6cbfa2dce410c0a2fb669f1", size = 525554, upload-time = "2020-10-08T19:00:49.856Z" },
]
[package.optional-dependencies]
bcrypt = [
{ name = "bcrypt" },
]
[[package]]
name = "pillow"
version = "11.3.0"
@@ -5635,6 +5613,32 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/3f/50/0a9e7e7afe7339bd5e36911f0ceb15fed51945836ed803ae5afd661057fd/rtree-1.4.1-py3-none-win_arm64.whl", hash = "sha256:3d46f55729b28138e897ffef32f7ce93ac335cb67f9120125ad3742a220800f0", size = 355253, upload-time = "2025-08-13T19:32:00.296Z" },
]
[[package]]
name = "ruff"
version = "0.14.6"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/52/f0/62b5a1a723fe183650109407fa56abb433b00aa1c0b9ba555f9c4efec2c6/ruff-0.14.6.tar.gz", hash = "sha256:6f0c742ca6a7783a736b867a263b9a7a80a45ce9bee391eeda296895f1b4e1cc", size = 5669501, upload-time = "2025-11-21T14:26:17.903Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/67/d2/7dd544116d107fffb24a0064d41a5d2ed1c9d6372d142f9ba108c8e39207/ruff-0.14.6-py3-none-linux_armv6l.whl", hash = "sha256:d724ac2f1c240dbd01a2ae98db5d1d9a5e1d9e96eba999d1c48e30062df578a3", size = 13326119, upload-time = "2025-11-21T14:25:24.2Z" },
{ url = "https://files.pythonhosted.org/packages/36/6a/ad66d0a3315d6327ed6b01f759d83df3c4d5f86c30462121024361137b6a/ruff-0.14.6-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:9f7539ea257aa4d07b7ce87aed580e485c40143f2473ff2f2b75aee003186004", size = 13526007, upload-time = "2025-11-21T14:25:26.906Z" },
{ url = "https://files.pythonhosted.org/packages/a3/9d/dae6db96df28e0a15dea8e986ee393af70fc97fd57669808728080529c37/ruff-0.14.6-py3-none-macosx_11_0_arm64.whl", hash = "sha256:7f6007e55b90a2a7e93083ba48a9f23c3158c433591c33ee2e99a49b889c6332", size = 12676572, upload-time = "2025-11-21T14:25:29.826Z" },
{ url = "https://files.pythonhosted.org/packages/76/a4/f319e87759949062cfee1b26245048e92e2acce900ad3a909285f9db1859/ruff-0.14.6-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a8e7b9d73d8728b68f632aa8e824ef041d068d231d8dbc7808532d3629a6bef", size = 13140745, upload-time = "2025-11-21T14:25:32.788Z" },
{ url = "https://files.pythonhosted.org/packages/95/d3/248c1efc71a0a8ed4e8e10b4b2266845d7dfc7a0ab64354afe049eaa1310/ruff-0.14.6-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d50d45d4553a3ebcbd33e7c5e0fe6ca4aafd9a9122492de357205c2c48f00775", size = 13076486, upload-time = "2025-11-21T14:25:35.601Z" },
{ url = "https://files.pythonhosted.org/packages/a5/19/b68d4563fe50eba4b8c92aa842149bb56dd24d198389c0ed12e7faff4f7d/ruff-0.14.6-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:118548dd121f8a21bfa8ab2c5b80e5b4aed67ead4b7567790962554f38e598ce", size = 13727563, upload-time = "2025-11-21T14:25:38.514Z" },
{ url = "https://files.pythonhosted.org/packages/47/ac/943169436832d4b0e867235abbdb57ce3a82367b47e0280fa7b4eabb7593/ruff-0.14.6-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:57256efafbfefcb8748df9d1d766062f62b20150691021f8ab79e2d919f7c11f", size = 15199755, upload-time = "2025-11-21T14:25:41.516Z" },
{ url = "https://files.pythonhosted.org/packages/c9/b9/288bb2399860a36d4bb0541cb66cce3c0f4156aaff009dc8499be0c24bf2/ruff-0.14.6-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ff18134841e5c68f8e5df1999a64429a02d5549036b394fafbe410f886e1989d", size = 14850608, upload-time = "2025-11-21T14:25:44.428Z" },
{ url = "https://files.pythonhosted.org/packages/ee/b1/a0d549dd4364e240f37e7d2907e97ee80587480d98c7799d2d8dc7a2f605/ruff-0.14.6-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:29c4b7ec1e66a105d5c27bd57fa93203637d66a26d10ca9809dc7fc18ec58440", size = 14118754, upload-time = "2025-11-21T14:25:47.214Z" },
{ url = "https://files.pythonhosted.org/packages/13/ac/9b9fe63716af8bdfddfacd0882bc1586f29985d3b988b3c62ddce2e202c3/ruff-0.14.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:167843a6f78680746d7e226f255d920aeed5e4ad9c03258094a2d49d3028b105", size = 13949214, upload-time = "2025-11-21T14:25:50.002Z" },
{ url = "https://files.pythonhosted.org/packages/12/27/4dad6c6a77fede9560b7df6802b1b697e97e49ceabe1f12baf3ea20862e9/ruff-0.14.6-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:16a33af621c9c523b1ae006b1b99b159bf5ac7e4b1f20b85b2572455018e0821", size = 14106112, upload-time = "2025-11-21T14:25:52.841Z" },
{ url = "https://files.pythonhosted.org/packages/6a/db/23e322d7177873eaedea59a7932ca5084ec5b7e20cb30f341ab594130a71/ruff-0.14.6-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:1432ab6e1ae2dc565a7eea707d3b03a0c234ef401482a6f1621bc1f427c2ff55", size = 13035010, upload-time = "2025-11-21T14:25:55.536Z" },
{ url = "https://files.pythonhosted.org/packages/a8/9c/20e21d4d69dbb35e6a1df7691e02f363423658a20a2afacf2a2c011800dc/ruff-0.14.6-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:4c55cfbbe7abb61eb914bfd20683d14cdfb38a6d56c6c66efa55ec6570ee4e71", size = 13054082, upload-time = "2025-11-21T14:25:58.625Z" },
{ url = "https://files.pythonhosted.org/packages/66/25/906ee6a0464c3125c8d673c589771a974965c2be1a1e28b5c3b96cb6ef88/ruff-0.14.6-py3-none-musllinux_1_2_i686.whl", hash = "sha256:efea3c0f21901a685fff4befda6d61a1bf4cb43de16da87e8226a281d614350b", size = 13303354, upload-time = "2025-11-21T14:26:01.816Z" },
{ url = "https://files.pythonhosted.org/packages/4c/58/60577569e198d56922b7ead07b465f559002b7b11d53f40937e95067ca1c/ruff-0.14.6-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:344d97172576d75dc6afc0e9243376dbe1668559c72de1864439c4fc95f78185", size = 14054487, upload-time = "2025-11-21T14:26:05.058Z" },
{ url = "https://files.pythonhosted.org/packages/67/0b/8e4e0639e4cc12547f41cb771b0b44ec8225b6b6a93393176d75fe6f7d40/ruff-0.14.6-py3-none-win32.whl", hash = "sha256:00169c0c8b85396516fdd9ce3446c7ca20c2a8f90a77aa945ba6b8f2bfe99e85", size = 13013361, upload-time = "2025-11-21T14:26:08.152Z" },
{ url = "https://files.pythonhosted.org/packages/fb/02/82240553b77fd1341f80ebb3eaae43ba011c7a91b4224a9f317d8e6591af/ruff-0.14.6-py3-none-win_amd64.whl", hash = "sha256:390e6480c5e3659f8a4c8d6a0373027820419ac14fa0d2713bd8e6c3e125b8b9", size = 14432087, upload-time = "2025-11-21T14:26:10.891Z" },
{ url = "https://files.pythonhosted.org/packages/a5/1f/93f9b0fad9470e4c829a5bb678da4012f0c710d09331b860ee555216f4ea/ruff-0.14.6-py3-none-win_arm64.whl", hash = "sha256:d43c81fbeae52cfa8728d8766bbf46ee4298c888072105815b392da70ca836b2", size = 13520930, upload-time = "2025-11-21T14:26:13.951Z" },
]
[[package]]
name = "s3transfer"
version = "0.14.0"