Remove local file ingestion tools

2026-01-15 03:34:06 -05:00
parent b8a30dcf7f
commit 55a38a4e95
2 changed files with 1 additions and 68 deletions
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@ An MCP server exposing the LightRAG Server API as tools, resources, and prompts
 ## Features
 - Retrieval tools: `query_data`, `query`, `query_stream`, `query_stream_chunks`
- Ingestion tools: `ingest_text`, `ingest_texts`, `ingest_file`, `ingest_files`, `upload_document`
+- Ingestion tools: `ingest_text`, `ingest_texts`, `upload_document`
 - Freshness tools: `scan_documents`, `scan_and_wait`, `pipeline_status`, `wait_for_idle`, `track_status`
 - Memory tool: `ingest_memory` for lessons, preferences, decisions, structures, functions, relationships
 - Graph tools: entity/relation CRUD, entity existence check, label search, graph export
@@ -52,4 +52,3 @@ lightrag-mcp-smoke --query "What is this project?" --format pretty
 - `query_stream` collects the streaming response and returns it as a single string.
 - `query_stream_chunks` returns chunked output and reports progress to clients that support progress events.
 - `refresh_and_query` is a convenience macro for evidence-first workflows.
 - `ingest_file(s)` chunk local files and store them with `file_source` references.
--- a/src/lightrag_mcp/server.py
+++ b/src/lightrag_mcp/server.py
@@ -3,7 +3,6 @@ from __future__ import annotations
 import asyncio
 import json
 import time
 from pathlib import Path
 from typing import Any, Iterable
 from mcp.server.fastmcp import Context, FastMCP
@@ -89,32 +88,6 @@ async def _wait_for_idle(timeout_s: float, interval_s: float) -> dict[str, Any]:
        await asyncio.sleep(interval_s)
 def _chunk_text(text: str, max_chars: int, overlap: int) -> list[str]:
    if max_chars <= 0:
        raise ValueError("max_chars must be > 0")
    if overlap < 0 or overlap >= max_chars:
        raise ValueError("overlap must be >= 0 and < max_chars")
    chunks: list[str] = []
    start = 0
    length = len(text)
    while start < length:
        end = min(start + max_chars, length)
        cut = text.rfind("\n", start, end)
        if cut == -1 or cut <= start:
            cut = end
        chunk = text[start:cut].strip()
        if chunk:
            chunks.append(chunk)
        if cut >= length:
            break
        next_start = max(cut - overlap, 0)
        if next_start <= start:
            next_start = cut
        start = next_start
    return chunks
 def _format_list(items: Iterable[str]) -> str:
    return ", ".join(item.strip() for item in items if item.strip())
@@ -359,45 +332,6 @@ async def ingest_texts(
    return await client.request_json("POST", "/documents/texts", json=payload)
@mcp.tool()
 async def ingest_file(
    path: str,
    max_chars: int = 4000,
    overlap: int = 200,
    encoding: str = "utf-8",
 ) -> dict[str, Any]:
    """Read a local file, chunk it, and ingest as texts with file_sources set per chunk."""
    file_path = Path(path)
    if not file_path.exists():
        raise FileNotFoundError(f"File not found: {file_path}")
    text = file_path.read_text(encoding=encoding, errors="replace")
    chunks = _chunk_text(text, max_chars=max_chars, overlap=overlap)
    if not chunks:
        raise ValueError(f"No content to ingest from {file_path}")
    file_sources = [f"{file_path}#chunk:{idx + 1}/{len(chunks)}" for idx in range(len(chunks))]
    payload: dict[str, Any] = {"texts": chunks, "file_sources": file_sources}
    return await client.request_json("POST", "/documents/texts", json=payload)
@mcp.tool()
 async def ingest_files(
    paths: list[str],
    max_chars: int = 4000,
    overlap: int = 200,
    encoding: str = "utf-8",
 ) -> dict[str, Any]:
    """Ingest multiple local files by chunking each file into texts."""
    results: dict[str, Any] = {}
    for path in paths:
        results[path] = await ingest_file(
            path=path,
            max_chars=max_chars,
            overlap=overlap,
            encoding=encoding,
        )
    return {"results": results}
@mcp.tool()
 async def upload_document(path: str) -> dict[str, Any]:
    """Upload a local file to the LightRAG input directory."""