From 0726460e13685767a585dcbd8552ee01fd0910b1 Mon Sep 17 00:00:00 2001 From: Vishva R <44122284+vishvaRam@users.noreply.github.com> Date: Thu, 25 Dec 2025 15:44:51 +0530 Subject: [PATCH 1/4] Add Gemini demo for LightRAG This PR adds a Gemini-based direct demo for LightRAG, mirroring the existing llamaindex direct demo. Features: - Uses gemini-2.0-flash for LLM inference - Uses text-embedding-004 for embeddings - Demonstrates naive, local, global, and hybrid query modes - Includes required storage initialization - Keeps a minimal, dependency-free setup This serves as a reference example for users who want to use LightRAG with Google Gemini without external frameworks. --- .../unofficial-sample/lightrag_gemini_demo.py | 89 +++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 examples/unofficial-sample/lightrag_gemini_demo.py diff --git a/examples/unofficial-sample/lightrag_gemini_demo.py b/examples/unofficial-sample/lightrag_gemini_demo.py new file mode 100644 index 00000000..d805d0da --- /dev/null +++ b/examples/unofficial-sample/lightrag_gemini_demo.py @@ -0,0 +1,89 @@ +import os +import asyncio +import nest_asyncio +import numpy as np + +from lightrag import LightRAG, QueryParam +from lightrag.llm.gemini import gemini_model_complete, gemini_embed +from lightrag.utils import wrap_embedding_func_with_attrs + +nest_asyncio.apply() + +WORKING_DIR = "./rag_storage" +GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY", "your-api-key-here") + +if not os.path.exists(WORKING_DIR): + os.mkdir(WORKING_DIR) + + +# -------------------------------------------------- +# LLM function +# -------------------------------------------------- +async def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwargs): + return await gemini_model_complete( + prompt, + system_prompt=system_prompt, + history_messages=history_messages, + api_key=GEMINI_API_KEY, + model_name="gemini-2.0-flash", + **kwargs + ) + +# -------------------------------------------------- +# Embedding function +# -------------------------------------------------- +@wrap_embedding_func_with_attrs( + embedding_dim=768, + max_token_size=2048, + model_name="models/text-embedding-004" +) +async def embedding_func(texts: list[str]) -> np.ndarray: + return await gemini_embed.func( + texts, + api_key=GEMINI_API_KEY, + model="models/text-embedding-004" + ) + +# -------------------------------------------------- +# Initialize RAG +# -------------------------------------------------- +async def initialize_rag(): + rag = LightRAG( + working_dir=WORKING_DIR, + llm_model_func=llm_model_func, + embedding_func=embedding_func, + llm_model_name="gemini-2.0-flash", + ) + + # 🔑 REQUIRED + await rag.initialize_storages() + return rag + + +# -------------------------------------------------- +# Main +# -------------------------------------------------- +def main(): + rag = asyncio.run(initialize_rag()) + + # Insert text + with open("./book.txt", "r", encoding="utf-8") as f: + rag.insert(f.read()) + + query = "What are the top themes?" + + print("\nNaive Search:") + print(rag.query(query, param=QueryParam(mode="naive"))) + + print("\nLocal Search:") + print(rag.query(query, param=QueryParam(mode="local"))) + + print("\nGlobal Search:") + print(rag.query(query, param=QueryParam(mode="global"))) + + print("\nHybrid Search:") + print(rag.query(query, param=QueryParam(mode="hybrid"))) + + +if __name__ == "__main__": + main() From d2ca1af81b7d6f39ade89c87b97ac37bffb899a1 Mon Sep 17 00:00:00 2001 From: yangdx Date: Fri, 26 Dec 2025 14:44:12 +0800 Subject: [PATCH 2/4] Move Gemini demo and update embedding docs - Move Gemini demo to examples root - Enable send_dimensions for Gemini - Clarify embedding wrapper docstrings --- .../{unofficial-sample => }/lightrag_gemini_demo.py | 11 ++++++----- lightrag/utils.py | 10 +++++----- 2 files changed, 11 insertions(+), 10 deletions(-) rename examples/{unofficial-sample => }/lightrag_gemini_demo.py (93%) diff --git a/examples/unofficial-sample/lightrag_gemini_demo.py b/examples/lightrag_gemini_demo.py similarity index 93% rename from examples/unofficial-sample/lightrag_gemini_demo.py rename to examples/lightrag_gemini_demo.py index d805d0da..8d441461 100644 --- a/examples/unofficial-sample/lightrag_gemini_demo.py +++ b/examples/lightrag_gemini_demo.py @@ -26,24 +26,25 @@ async def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwar history_messages=history_messages, api_key=GEMINI_API_KEY, model_name="gemini-2.0-flash", - **kwargs + **kwargs, ) + # -------------------------------------------------- # Embedding function # -------------------------------------------------- @wrap_embedding_func_with_attrs( embedding_dim=768, + send_dimensions=True, max_token_size=2048, - model_name="models/text-embedding-004" + model_name="models/text-embedding-004", ) async def embedding_func(texts: list[str]) -> np.ndarray: return await gemini_embed.func( - texts, - api_key=GEMINI_API_KEY, - model="models/text-embedding-004" + texts, api_key=GEMINI_API_KEY, model="models/text-embedding-004" ) + # -------------------------------------------------- # Initialize RAG # -------------------------------------------------- diff --git a/lightrag/utils.py b/lightrag/utils.py index cd3f26d1..2ed8c183 100644 --- a/lightrag/utils.py +++ b/lightrag/utils.py @@ -1081,16 +1081,16 @@ def wrap_embedding_func_with_attrs(**kwargs): ```python @wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192, model_name="my_embedding_model") @retry(...) - async def openai_embed(texts, ...): + async def my_embed(texts, ...): # Base implementation pass @wrap_embedding_func_with_attrs(embedding_dim=1024, max_token_size=4096, model_name="another_embedding_model") # Note: No @retry here! - async def new_openai_embed(texts, ...): + async def my_new_embed(texts, ...): # CRITICAL: Call .func to access unwrapped function - return await openai_embed.func(texts, ...) # ✅ Correct - # return await openai_embed(texts, ...) # ❌ Wrong - double decoration! + return await my_embed.func(texts, ...) # ✅ Correct + # return await my_embed(texts, ...) # ❌ Wrong - double decoration! ``` The decorated function becomes an EmbeddingFunc instance with: @@ -1103,7 +1103,7 @@ def wrap_embedding_func_with_attrs(**kwargs): Args: embedding_dim: The dimension of embedding vectors max_token_size: Maximum number of tokens (optional) - send_dimensions: Whether to inject embedding_dim as a keyword argument (optional) + send_dimensions: Whether to pass embedding_dim as a keyword argument (for models with configurable embedding dimensions). Returns: A decorator that wraps the function as an EmbeddingFunc instance From 8a6212d0437b91c64fa87f3aa7dcc7bbbafbad45 Mon Sep 17 00:00:00 2001 From: yangdx Date: Fri, 26 Dec 2025 15:00:58 +0800 Subject: [PATCH 3/4] Improve Gemini demo documentation and validation - Add script docstring and usage info - Validate GEMINI_API_KEY presence - Check input file existence - Use constant for book file path --- examples/lightrag_gemini_demo.py | 35 ++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/examples/lightrag_gemini_demo.py b/examples/lightrag_gemini_demo.py index 8d441461..376a7bc3 100644 --- a/examples/lightrag_gemini_demo.py +++ b/examples/lightrag_gemini_demo.py @@ -1,3 +1,19 @@ +""" +LightRAG Demo with Google Gemini Models + +This example demonstrates how to use LightRAG with Google's Gemini 2.0 Flash model +for text generation and the text-embedding-004 model for embeddings. + +Prerequisites: + 1. Set GEMINI_API_KEY environment variable: + export GEMINI_API_KEY='your-actual-api-key' + + 2. Prepare a text file named 'book.txt' in the current directory + (or modify BOOK_FILE constant to point to your text file) + +Usage: + python examples/lightrag_gemini_demo.py +""" import os import asyncio import nest_asyncio @@ -10,7 +26,15 @@ from lightrag.utils import wrap_embedding_func_with_attrs nest_asyncio.apply() WORKING_DIR = "./rag_storage" -GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY", "your-api-key-here") +BOOK_FILE = "./book.txt" + +# Validate API key +GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY") +if not GEMINI_API_KEY: + raise ValueError( + "GEMINI_API_KEY environment variable is not set. " + "Please set it with: export GEMINI_API_KEY='your-api-key'" + ) if not os.path.exists(WORKING_DIR): os.mkdir(WORKING_DIR) @@ -65,10 +89,17 @@ async def initialize_rag(): # Main # -------------------------------------------------- def main(): + # Validate book file exists + if not os.path.exists(BOOK_FILE): + raise FileNotFoundError( + f"'{BOOK_FILE}' not found. " + "Please provide a text file to index in the current directory." + ) + rag = asyncio.run(initialize_rag()) # Insert text - with open("./book.txt", "r", encoding="utf-8") as f: + with open(BOOK_FILE, "r", encoding="utf-8") as f: rag.insert(f.read()) query = "What are the top themes?" From 4c032a9d4f411a3df1d686f36e67381103d9934f Mon Sep 17 00:00:00 2001 From: yangdx Date: Fri, 26 Dec 2025 15:01:25 +0800 Subject: [PATCH 4/4] Fix linting --- examples/lightrag_gemini_demo.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/examples/lightrag_gemini_demo.py b/examples/lightrag_gemini_demo.py index 376a7bc3..c6586922 100644 --- a/examples/lightrag_gemini_demo.py +++ b/examples/lightrag_gemini_demo.py @@ -7,13 +7,14 @@ for text generation and the text-embedding-004 model for embeddings. Prerequisites: 1. Set GEMINI_API_KEY environment variable: export GEMINI_API_KEY='your-actual-api-key' - + 2. Prepare a text file named 'book.txt' in the current directory (or modify BOOK_FILE constant to point to your text file) Usage: python examples/lightrag_gemini_demo.py """ + import os import asyncio import nest_asyncio @@ -95,7 +96,7 @@ def main(): f"'{BOOK_FILE}' not found. " "Please provide a text file to index in the current directory." ) - + rag = asyncio.run(initialize_rag()) # Insert text