Remove local file ingestion tools

This commit is contained in:
2026-01-15 03:34:06 -05:00
parent b8a30dcf7f
commit 55a38a4e95
2 changed files with 1 additions and 68 deletions

View File

@@ -5,7 +5,7 @@ An MCP server exposing the LightRAG Server API as tools, resources, and prompts
## Features ## Features
- Retrieval tools: `query_data`, `query`, `query_stream`, `query_stream_chunks` - Retrieval tools: `query_data`, `query`, `query_stream`, `query_stream_chunks`
- Ingestion tools: `ingest_text`, `ingest_texts`, `ingest_file`, `ingest_files`, `upload_document` - Ingestion tools: `ingest_text`, `ingest_texts`, `upload_document`
- Freshness tools: `scan_documents`, `scan_and_wait`, `pipeline_status`, `wait_for_idle`, `track_status` - Freshness tools: `scan_documents`, `scan_and_wait`, `pipeline_status`, `wait_for_idle`, `track_status`
- Memory tool: `ingest_memory` for lessons, preferences, decisions, structures, functions, relationships - Memory tool: `ingest_memory` for lessons, preferences, decisions, structures, functions, relationships
- Graph tools: entity/relation CRUD, entity existence check, label search, graph export - Graph tools: entity/relation CRUD, entity existence check, label search, graph export
@@ -52,4 +52,3 @@ lightrag-mcp-smoke --query "What is this project?" --format pretty
- `query_stream` collects the streaming response and returns it as a single string. - `query_stream` collects the streaming response and returns it as a single string.
- `query_stream_chunks` returns chunked output and reports progress to clients that support progress events. - `query_stream_chunks` returns chunked output and reports progress to clients that support progress events.
- `refresh_and_query` is a convenience macro for evidence-first workflows. - `refresh_and_query` is a convenience macro for evidence-first workflows.
- `ingest_file(s)` chunk local files and store them with `file_source` references.

View File

@@ -3,7 +3,6 @@ from __future__ import annotations
import asyncio import asyncio
import json import json
import time import time
from pathlib import Path
from typing import Any, Iterable from typing import Any, Iterable
from mcp.server.fastmcp import Context, FastMCP from mcp.server.fastmcp import Context, FastMCP
@@ -89,32 +88,6 @@ async def _wait_for_idle(timeout_s: float, interval_s: float) -> dict[str, Any]:
await asyncio.sleep(interval_s) await asyncio.sleep(interval_s)
def _chunk_text(text: str, max_chars: int, overlap: int) -> list[str]:
if max_chars <= 0:
raise ValueError("max_chars must be > 0")
if overlap < 0 or overlap >= max_chars:
raise ValueError("overlap must be >= 0 and < max_chars")
chunks: list[str] = []
start = 0
length = len(text)
while start < length:
end = min(start + max_chars, length)
cut = text.rfind("\n", start, end)
if cut == -1 or cut <= start:
cut = end
chunk = text[start:cut].strip()
if chunk:
chunks.append(chunk)
if cut >= length:
break
next_start = max(cut - overlap, 0)
if next_start <= start:
next_start = cut
start = next_start
return chunks
def _format_list(items: Iterable[str]) -> str: def _format_list(items: Iterable[str]) -> str:
return ", ".join(item.strip() for item in items if item.strip()) return ", ".join(item.strip() for item in items if item.strip())
@@ -359,45 +332,6 @@ async def ingest_texts(
return await client.request_json("POST", "/documents/texts", json=payload) return await client.request_json("POST", "/documents/texts", json=payload)
@mcp.tool()
async def ingest_file(
path: str,
max_chars: int = 4000,
overlap: int = 200,
encoding: str = "utf-8",
) -> dict[str, Any]:
"""Read a local file, chunk it, and ingest as texts with file_sources set per chunk."""
file_path = Path(path)
if not file_path.exists():
raise FileNotFoundError(f"File not found: {file_path}")
text = file_path.read_text(encoding=encoding, errors="replace")
chunks = _chunk_text(text, max_chars=max_chars, overlap=overlap)
if not chunks:
raise ValueError(f"No content to ingest from {file_path}")
file_sources = [f"{file_path}#chunk:{idx + 1}/{len(chunks)}" for idx in range(len(chunks))]
payload: dict[str, Any] = {"texts": chunks, "file_sources": file_sources}
return await client.request_json("POST", "/documents/texts", json=payload)
@mcp.tool()
async def ingest_files(
paths: list[str],
max_chars: int = 4000,
overlap: int = 200,
encoding: str = "utf-8",
) -> dict[str, Any]:
"""Ingest multiple local files by chunking each file into texts."""
results: dict[str, Any] = {}
for path in paths:
results[path] = await ingest_file(
path=path,
max_chars=max_chars,
overlap=overlap,
encoding=encoding,
)
return {"results": results}
@mcp.tool() @mcp.tool()
async def upload_document(path: str) -> dict[str, Any]: async def upload_document(path: str) -> dict[str, Any]:
"""Upload a local file to the LightRAG input directory.""" """Upload a local file to the LightRAG input directory."""