Remove local file ingestion tools

This commit is contained in:
2026-01-15 03:34:06 -05:00
parent b8a30dcf7f
commit 55a38a4e95
2 changed files with 1 additions and 68 deletions

View File

@@ -5,7 +5,7 @@ An MCP server exposing the LightRAG Server API as tools, resources, and prompts
## Features
- Retrieval tools: `query_data`, `query`, `query_stream`, `query_stream_chunks`
- Ingestion tools: `ingest_text`, `ingest_texts`, `ingest_file`, `ingest_files`, `upload_document`
- Ingestion tools: `ingest_text`, `ingest_texts`, `upload_document`
- Freshness tools: `scan_documents`, `scan_and_wait`, `pipeline_status`, `wait_for_idle`, `track_status`
- Memory tool: `ingest_memory` for lessons, preferences, decisions, structures, functions, relationships
- Graph tools: entity/relation CRUD, entity existence check, label search, graph export
@@ -52,4 +52,3 @@ lightrag-mcp-smoke --query "What is this project?" --format pretty
- `query_stream` collects the streaming response and returns it as a single string.
- `query_stream_chunks` returns chunked output and reports progress to clients that support progress events.
- `refresh_and_query` is a convenience macro for evidence-first workflows.
- `ingest_file(s)` chunk local files and store them with `file_source` references.

View File

@@ -3,7 +3,6 @@ from __future__ import annotations
import asyncio
import json
import time
from pathlib import Path
from typing import Any, Iterable
from mcp.server.fastmcp import Context, FastMCP
@@ -89,32 +88,6 @@ async def _wait_for_idle(timeout_s: float, interval_s: float) -> dict[str, Any]:
await asyncio.sleep(interval_s)
def _chunk_text(text: str, max_chars: int, overlap: int) -> list[str]:
if max_chars <= 0:
raise ValueError("max_chars must be > 0")
if overlap < 0 or overlap >= max_chars:
raise ValueError("overlap must be >= 0 and < max_chars")
chunks: list[str] = []
start = 0
length = len(text)
while start < length:
end = min(start + max_chars, length)
cut = text.rfind("\n", start, end)
if cut == -1 or cut <= start:
cut = end
chunk = text[start:cut].strip()
if chunk:
chunks.append(chunk)
if cut >= length:
break
next_start = max(cut - overlap, 0)
if next_start <= start:
next_start = cut
start = next_start
return chunks
def _format_list(items: Iterable[str]) -> str:
return ", ".join(item.strip() for item in items if item.strip())
@@ -359,45 +332,6 @@ async def ingest_texts(
return await client.request_json("POST", "/documents/texts", json=payload)
@mcp.tool()
async def ingest_file(
path: str,
max_chars: int = 4000,
overlap: int = 200,
encoding: str = "utf-8",
) -> dict[str, Any]:
"""Read a local file, chunk it, and ingest as texts with file_sources set per chunk."""
file_path = Path(path)
if not file_path.exists():
raise FileNotFoundError(f"File not found: {file_path}")
text = file_path.read_text(encoding=encoding, errors="replace")
chunks = _chunk_text(text, max_chars=max_chars, overlap=overlap)
if not chunks:
raise ValueError(f"No content to ingest from {file_path}")
file_sources = [f"{file_path}#chunk:{idx + 1}/{len(chunks)}" for idx in range(len(chunks))]
payload: dict[str, Any] = {"texts": chunks, "file_sources": file_sources}
return await client.request_json("POST", "/documents/texts", json=payload)
@mcp.tool()
async def ingest_files(
paths: list[str],
max_chars: int = 4000,
overlap: int = 200,
encoding: str = "utf-8",
) -> dict[str, Any]:
"""Ingest multiple local files by chunking each file into texts."""
results: dict[str, Any] = {}
for path in paths:
results[path] = await ingest_file(
path=path,
max_chars=max_chars,
overlap=overlap,
encoding=encoding,
)
return {"results": results}
@mcp.tool()
async def upload_document(path: str) -> dict[str, Any]:
"""Upload a local file to the LightRAG input directory."""