Files
rag-manager/ingest_pipeline/config/settings.py
2025-09-15 12:35:42 -04:00

104 lines
2.9 KiB
Python

"""Application settings and configuration."""
from functools import lru_cache
from typing import Literal
from pydantic import Field, HttpUrl
from pydantic_settings import BaseSettings, SettingsConfigDict
class Settings(BaseSettings):
"""Application settings."""
model_config = SettingsConfigDict(
env_file=".env",
env_file_encoding="utf-8",
case_sensitive=False,
extra="ignore", # Ignore extra environment variables
)
# API Keys
firecrawl_api_key: str | None = None
openwebui_api_key: str | None = None
weaviate_api_key: str | None = None
# Endpoints
llm_endpoint: HttpUrl = HttpUrl("http://llm.lab")
weaviate_endpoint: HttpUrl = HttpUrl("http://weaviate.yo")
openwebui_endpoint: HttpUrl = HttpUrl("http://chat.lab") # This will be the API URL
firecrawl_endpoint: HttpUrl = HttpUrl("http://crawl.lab:30002")
# Model Configuration
embedding_model: str = "ollama/bge-m3:latest"
embedding_dimension: int = 1024
# Ingestion Settings
default_batch_size: int = Field(default=50, gt=0, le=500)
max_file_size: int = 1_000_000
max_crawl_depth: int = Field(default=5, ge=1, le=20)
max_crawl_pages: int = Field(default=100, ge=1, le=1000)
# Storage Settings
default_storage_backend: Literal["weaviate", "open_webui"] = "weaviate"
default_collection_prefix: str = "docs"
# Prefect Settings
prefect_api_url: HttpUrl | None = None
prefect_api_key: str | None = None
prefect_work_pool: str = "default"
# Scheduling Defaults
default_schedule_interval: int = Field(default=60, ge=1, le=10080) # Max 1 week
# Performance Settings
max_concurrent_tasks: int = Field(default=5, ge=1, le=20)
request_timeout: int = Field(default=60, ge=10, le=300)
# Logging
log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR"] = "INFO"
def get_storage_endpoint(self, backend: str) -> HttpUrl:
"""
Get endpoint for storage backend.
Args:
backend: Storage backend name
Returns:
Endpoint URL
"""
if backend == "weaviate":
return self.weaviate_endpoint
elif backend == "open_webui":
return self.openwebui_endpoint
else:
raise ValueError(f"Unknown backend: {backend}")
def get_api_key(self, service: str) -> str | None:
"""
Get API key for service.
Args:
service: Service name
Returns:
API key or None
"""
service_map = {
"firecrawl": self.firecrawl_api_key,
"openwebui": self.openwebui_api_key,
"weaviate": self.weaviate_api_key,
}
return service_map.get(service)
@lru_cache
def get_settings() -> Settings:
"""
Get cached settings instance.
Returns:
Settings instance
"""
return Settings()