Files
biz-bud/tests/unit_tests/graphs/test_rag_graph.py
Travis Vasceannie 8ad47a7640 Modernize research graph metadata for LangGraph v1 (#60)
* Modernize research graph metadata for LangGraph v1

* Update src/biz_bud/core/langgraph/graph_builder.py

Co-authored-by: qodo-merge-pro[bot] <151058649+qodo-merge-pro[bot]@users.noreply.github.com>

---------

Co-authored-by: qodo-merge-pro[bot] <151058649+qodo-merge-pro[bot]@users.noreply.github.com>
2025-09-19 03:01:18 -04:00

154 lines
6.0 KiB
Python

"""Lightweight tests covering the LangGraph v1 RAG workflow configuration."""
from __future__ import annotations
import sys
from pathlib import Path
from types import ModuleType
import pytest
from langgraph.graph.state import CachePolicy, RetryPolicy
# ---------------------------------------------------------------------------
# Optional dependency shims
# ---------------------------------------------------------------------------
# The RAG graph pulls in a wide range of optional dependencies through the
# package ``biz_bud.graphs``. We only need to import the graph module itself
# to introspect configuration, so provide lightweight stubs that satisfy the
# imports without requiring the full production stack.
project_root = Path(__file__).resolve().parents[3]
if "biz_bud" not in sys.modules: # pragma: no cover - package bootstrap
biz_bud_pkg = ModuleType("biz_bud")
biz_bud_pkg.__path__ = [str(project_root / "src" / "biz_bud")]
sys.modules["biz_bud"] = biz_bud_pkg
if "biz_bud.graphs" not in sys.modules: # pragma: no cover - avoid heavy __init__
graphs_pkg = ModuleType("biz_bud.graphs")
graphs_pkg.__path__ = [str(project_root / "src" / "biz_bud" / "graphs")]
sys.modules["biz_bud.graphs"] = graphs_pkg
if "biz_bud.graphs.rag" not in sys.modules: # pragma: no cover - package shim
from importlib.machinery import ModuleSpec
rag_pkg = ModuleType("biz_bud.graphs.rag")
rag_path = str(project_root / "src" / "biz_bud" / "graphs" / "rag")
rag_pkg.__path__ = [rag_path]
rag_pkg.__spec__ = ModuleSpec("biz_bud.graphs.rag", loader=None, is_package=True)
rag_pkg.__spec__.submodule_search_locations = [rag_path]
sys.modules["biz_bud.graphs.rag"] = rag_pkg
def _passthrough(state, *_args, **_kwargs): # pragma: no cover - helper stub
return state
if "biz_bud.graphs.rag.nodes" not in sys.modules: # pragma: no cover - node stubs
rag_nodes = ModuleType("biz_bud.graphs.rag.nodes")
rag_nodes.analyze_content_for_rag_node = _passthrough
rag_nodes.check_existing_content_node = _passthrough
rag_nodes.check_r2r_duplicate_node = _passthrough
rag_nodes.decide_processing_node = _passthrough
rag_nodes.determine_processing_params_node = _passthrough
rag_nodes.upload_to_r2r_node = _passthrough
sys.modules[rag_nodes.__name__] = rag_nodes
if "biz_bud.graphs.rag.nodes.integrations" not in sys.modules: # pragma: no cover
integrations_module = ModuleType("biz_bud.graphs.rag.nodes.integrations")
integrations_module.repomix_process_node = _passthrough
sys.modules[integrations_module.__name__] = integrations_module
if "biz_bud.graphs.rag.nodes.scraping" not in sys.modules: # pragma: no cover
scraping_module = ModuleType("biz_bud.graphs.rag.nodes.scraping")
scraping_module.batch_process_urls_node = _passthrough
scraping_module.discover_urls_node = _passthrough
scraping_module.route_url_node = _passthrough
scraping_module.scrape_status_summary_node = _passthrough
sys.modules[scraping_module.__name__] = scraping_module
if "biz_bud.nodes" not in sys.modules: # pragma: no cover - minimal node facade
nodes_module = ModuleType("biz_bud.nodes")
nodes_module.finalize_status_node = _passthrough
nodes_module.preserve_url_fields_node = _passthrough
sys.modules[nodes_module.__name__] = nodes_module
if "docling" not in sys.modules: # pragma: no cover - optional dependency shim
docling_module = ModuleType("docling")
converter_module = ModuleType("docling.document_converter")
class _StubDocumentConverter: # pragma: no cover - minimal convert stub
async def convert(self, *_args, **_kwargs):
return None
converter_module.DocumentConverter = _StubDocumentConverter
docling_module.document_converter = converter_module
sys.modules["docling"] = docling_module
sys.modules["docling.document_converter"] = converter_module
graph_module_name = "biz_bud.graphs.rag.graph"
graph_module_path = project_root / "src" / "biz_bud" / "graphs" / "rag" / "graph.py"
if graph_module_name not in sys.modules: # pragma: no cover - manual module load
import importlib.util
spec = importlib.util.spec_from_file_location(graph_module_name, graph_module_path)
if spec and spec.loader:
module = importlib.util.module_from_spec(spec)
sys.modules[graph_module_name] = module
spec.loader.exec_module(module)
from biz_bud.graphs.rag.graph import ( # noqa: E402 - imported after shims
URLToRAGGraphContext,
URLToRAGGraphInput,
URLToRAGGraphOutput,
create_url_to_r2r_graph,
)
@pytest.fixture(scope="module")
def compiled_rag_graph():
"""Compile the RAG graph once for metadata inspection."""
return create_url_to_r2r_graph()
def test_rag_graph_declares_schemas(compiled_rag_graph):
"""The graph should expose LangGraph v1 schemas for type safety."""
assert compiled_rag_graph.context_schema is URLToRAGGraphContext
assert compiled_rag_graph.InputType is URLToRAGGraphInput
if compiled_rag_graph.output_schema is not None:
assert compiled_rag_graph.output_schema.__name__ == "url_to_r2r_graph_output"
def test_rag_graph_metadata_and_entry_point(compiled_rag_graph):
"""Verify graph-level metadata aligns with the registry definition."""
assert compiled_rag_graph.name == "url_to_r2r_graph"
builder = compiled_rag_graph.builder
assert ("__start__", "route_url") in builder.edges
def test_rag_nodes_use_langgraph_policies(compiled_rag_graph):
"""Key nodes should leverage retry, cache, and deferral policies."""
builder = compiled_rag_graph.builder
route_node = builder.nodes["route_url"]
assert isinstance(route_node.metadata, dict)
assert route_node.metadata["category"] == "routing"
assert isinstance(route_node.cache_policy, CachePolicy)
scrape_node = builder.nodes["scrape_url"]
assert isinstance(scrape_node.retry_policy, RetryPolicy)
assert scrape_node.defer is True
upload_node = builder.nodes["r2r_upload"]
assert isinstance(upload_node.retry_policy, RetryPolicy)
assert upload_node.defer is True