From 14a015d4adfc5df2cd8f53c744f4a48810a1ba2f Mon Sep 17 00:00:00 2001 From: yangdx Date: Wed, 29 Oct 2025 19:11:40 +0800 Subject: [PATCH] Restore query generation example and fix README path reference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit • Fix path from example/ to examples/ • Add generate_query.py implementation --- README.md | 2 +- examples/generate_query.py | 55 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 1 deletion(-) create mode 100644 examples/generate_query.py diff --git a/README.md b/README.md index 9ba35c4b..991f6b58 100644 --- a/README.md +++ b/README.md @@ -1538,7 +1538,7 @@ The dataset used in LightRAG can be downloaded from [TommyChien/UltraDomain](htt ### Generate Query -LightRAG uses the following prompt to generate high-level queries, with the corresponding code in `example/generate_query.py`. +LightRAG uses the following prompt to generate high-level queries, with the corresponding code in `examples/generate_query.py`.
Prompt diff --git a/examples/generate_query.py b/examples/generate_query.py new file mode 100644 index 00000000..705b23d3 --- /dev/null +++ b/examples/generate_query.py @@ -0,0 +1,55 @@ +from openai import OpenAI + +# os.environ["OPENAI_API_KEY"] = "" + + +def openai_complete_if_cache( + model="gpt-4o-mini", prompt=None, system_prompt=None, history_messages=[], **kwargs +) -> str: + openai_client = OpenAI() + + messages = [] + if system_prompt: + messages.append({"role": "system", "content": system_prompt}) + messages.extend(history_messages) + messages.append({"role": "user", "content": prompt}) + + response = openai_client.chat.completions.create( + model=model, messages=messages, **kwargs + ) + return response.choices[0].message.content + + +if __name__ == "__main__": + description = "" + prompt = f""" + Given the following description of a dataset: + + {description} + + Please identify 5 potential users who would engage with this dataset. For each user, list 5 tasks they would perform with this dataset. Then, for each (user, task) combination, generate 5 questions that require a high-level understanding of the entire dataset. + + Output the results in the following structure: + - User 1: [user description] + - Task 1: [task description] + - Question 1: + - Question 2: + - Question 3: + - Question 4: + - Question 5: + - Task 2: [task description] + ... + - Task 5: [task description] + - User 2: [user description] + ... + - User 5: [user description] + ... + """ + + result = openai_complete_if_cache(model="gpt-4o-mini", prompt=prompt) + + file_path = "./queries.txt" + with open(file_path, "w") as file: + file.write(result) + + print(f"Queries written to {file_path}")