diff --git a/README.md b/README.md index b515490..cf048b6 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ An MCP server exposing the LightRAG Server API as tools, resources, and prompts ## Features - Retrieval tools: `query_data`, `query`, `query_stream`, `query_stream_chunks` -- Ingestion tools: `ingest_text`, `ingest_texts`, `ingest_file`, `ingest_files`, `upload_document` +- Ingestion tools: `ingest_text`, `ingest_texts`, `upload_document` - Freshness tools: `scan_documents`, `scan_and_wait`, `pipeline_status`, `wait_for_idle`, `track_status` - Memory tool: `ingest_memory` for lessons, preferences, decisions, structures, functions, relationships - Graph tools: entity/relation CRUD, entity existence check, label search, graph export @@ -52,4 +52,3 @@ lightrag-mcp-smoke --query "What is this project?" --format pretty - `query_stream` collects the streaming response and returns it as a single string. - `query_stream_chunks` returns chunked output and reports progress to clients that support progress events. - `refresh_and_query` is a convenience macro for evidence-first workflows. -- `ingest_file(s)` chunk local files and store them with `file_source` references. diff --git a/src/lightrag_mcp/server.py b/src/lightrag_mcp/server.py index 46d712d..dcc2421 100644 --- a/src/lightrag_mcp/server.py +++ b/src/lightrag_mcp/server.py @@ -3,7 +3,6 @@ from __future__ import annotations import asyncio import json import time -from pathlib import Path from typing import Any, Iterable from mcp.server.fastmcp import Context, FastMCP @@ -89,32 +88,6 @@ async def _wait_for_idle(timeout_s: float, interval_s: float) -> dict[str, Any]: await asyncio.sleep(interval_s) -def _chunk_text(text: str, max_chars: int, overlap: int) -> list[str]: - if max_chars <= 0: - raise ValueError("max_chars must be > 0") - if overlap < 0 or overlap >= max_chars: - raise ValueError("overlap must be >= 0 and < max_chars") - - chunks: list[str] = [] - start = 0 - length = len(text) - while start < length: - end = min(start + max_chars, length) - cut = text.rfind("\n", start, end) - if cut == -1 or cut <= start: - cut = end - chunk = text[start:cut].strip() - if chunk: - chunks.append(chunk) - if cut >= length: - break - next_start = max(cut - overlap, 0) - if next_start <= start: - next_start = cut - start = next_start - return chunks - - def _format_list(items: Iterable[str]) -> str: return ", ".join(item.strip() for item in items if item.strip()) @@ -359,45 +332,6 @@ async def ingest_texts( return await client.request_json("POST", "/documents/texts", json=payload) -@mcp.tool() -async def ingest_file( - path: str, - max_chars: int = 4000, - overlap: int = 200, - encoding: str = "utf-8", -) -> dict[str, Any]: - """Read a local file, chunk it, and ingest as texts with file_sources set per chunk.""" - file_path = Path(path) - if not file_path.exists(): - raise FileNotFoundError(f"File not found: {file_path}") - text = file_path.read_text(encoding=encoding, errors="replace") - chunks = _chunk_text(text, max_chars=max_chars, overlap=overlap) - if not chunks: - raise ValueError(f"No content to ingest from {file_path}") - file_sources = [f"{file_path}#chunk:{idx + 1}/{len(chunks)}" for idx in range(len(chunks))] - payload: dict[str, Any] = {"texts": chunks, "file_sources": file_sources} - return await client.request_json("POST", "/documents/texts", json=payload) - - -@mcp.tool() -async def ingest_files( - paths: list[str], - max_chars: int = 4000, - overlap: int = 200, - encoding: str = "utf-8", -) -> dict[str, Any]: - """Ingest multiple local files by chunking each file into texts.""" - results: dict[str, Any] = {} - for path in paths: - results[path] = await ingest_file( - path=path, - max_chars=max_chars, - overlap=overlap, - encoding=encoding, - ) - return {"results": results} - - @mcp.tool() async def upload_document(path: str) -> dict[str, Any]: """Upload a local file to the LightRAG input directory."""