Files
rag-manager/ingest_pipeline/ingestors/base.py
2025-09-15 12:35:42 -04:00

51 lines
1.2 KiB
Python

"""Base ingestor interface."""
from abc import ABC, abstractmethod
from collections.abc import AsyncGenerator
from ..core.models import Document, IngestionJob
class BaseIngestor(ABC):
"""Abstract base class for all ingestors."""
@abstractmethod
async def ingest(self, job: IngestionJob) -> AsyncGenerator[Document, None]:
"""
Ingest data from a source.
Args:
job: The ingestion job configuration
Yields:
Documents from the source
"""
return # type: ignore # pragma: no cover
yield # pragma: no cover
@abstractmethod
async def validate_source(self, source_url: str) -> bool:
"""
Validate if the source is accessible.
Args:
source_url: URL or path to the source
Returns:
True if source is valid and accessible
"""
pass # pragma: no cover
@abstractmethod
async def estimate_size(self, source_url: str) -> int:
"""
Estimate the number of documents in the source.
Args:
source_url: URL or path to the source
Returns:
Estimated number of documents
"""
pass # pragma: no cover