51 lines
1.2 KiB
Python
51 lines
1.2 KiB
Python
"""Base ingestor interface."""
|
|
|
|
from abc import ABC, abstractmethod
|
|
from collections.abc import AsyncGenerator
|
|
|
|
from ..core.models import Document, IngestionJob
|
|
|
|
|
|
class BaseIngestor(ABC):
|
|
"""Abstract base class for all ingestors."""
|
|
|
|
@abstractmethod
|
|
async def ingest(self, job: IngestionJob) -> AsyncGenerator[Document, None]:
|
|
"""
|
|
Ingest data from a source.
|
|
|
|
Args:
|
|
job: The ingestion job configuration
|
|
|
|
Yields:
|
|
Documents from the source
|
|
"""
|
|
return # type: ignore # pragma: no cover
|
|
yield # pragma: no cover
|
|
|
|
@abstractmethod
|
|
async def validate_source(self, source_url: str) -> bool:
|
|
"""
|
|
Validate if the source is accessible.
|
|
|
|
Args:
|
|
source_url: URL or path to the source
|
|
|
|
Returns:
|
|
True if source is valid and accessible
|
|
"""
|
|
pass # pragma: no cover
|
|
|
|
@abstractmethod
|
|
async def estimate_size(self, source_url: str) -> int:
|
|
"""
|
|
Estimate the number of documents in the source.
|
|
|
|
Args:
|
|
source_url: URL or path to the source
|
|
|
|
Returns:
|
|
Estimated number of documents
|
|
"""
|
|
pass # pragma: no cover
|