150 lines
4.7 KiB
Python
150 lines
4.7 KiB
Python
"""Application settings and configuration."""
|
|
|
|
from functools import lru_cache
|
|
from typing import Annotated, Literal
|
|
|
|
from pydantic import Field, HttpUrl, model_validator
|
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
|
|
|
|
class Settings(BaseSettings):
|
|
"""Application settings."""
|
|
|
|
model_config = SettingsConfigDict(
|
|
env_file=".env",
|
|
env_file_encoding="utf-8",
|
|
case_sensitive=False,
|
|
extra="ignore", # Ignore extra environment variables
|
|
)
|
|
|
|
# API Keys
|
|
firecrawl_api_key: str | None = None
|
|
openwebui_api_key: str | None = None
|
|
weaviate_api_key: str | None = None
|
|
r2r_api_key: str | None = None
|
|
|
|
# Endpoints
|
|
llm_endpoint: HttpUrl = HttpUrl("http://llm.lab")
|
|
weaviate_endpoint: HttpUrl = HttpUrl("http://weaviate.yo")
|
|
openwebui_endpoint: HttpUrl = HttpUrl("http://chat.lab") # This will be the API URL
|
|
firecrawl_endpoint: HttpUrl = HttpUrl("http://crawl.lab:30002")
|
|
r2r_endpoint: HttpUrl | None = Field(default=None, alias="r2r_api_url")
|
|
|
|
# Model Configuration
|
|
embedding_model: str = "ollama/bge-m3:latest"
|
|
embedding_dimension: int = 1024
|
|
|
|
# Ingestion Settings
|
|
default_batch_size: Annotated[int, Field(gt=0, le=500)] = 50
|
|
max_file_size: int = 1_000_000
|
|
max_crawl_depth: Annotated[int, Field(ge=1, le=20)] = 5
|
|
max_crawl_pages: Annotated[int, Field(ge=1, le=1000)] = 100
|
|
|
|
# Storage Settings
|
|
default_storage_backend: Literal["weaviate", "open_webui", "r2r"] = "weaviate"
|
|
default_collection_prefix: str = "docs"
|
|
|
|
# Prefect Settings
|
|
prefect_api_url: HttpUrl | None = None
|
|
prefect_api_key: str | None = None
|
|
prefect_work_pool: str = "default"
|
|
|
|
# Scheduling Defaults
|
|
default_schedule_interval: Annotated[int, Field(ge=1, le=10080)] = 60 # Max 1 week
|
|
|
|
# Performance Settings
|
|
max_concurrent_tasks: Annotated[int, Field(ge=1, le=20)] = 5
|
|
request_timeout: Annotated[int, Field(ge=10, le=300)] = 60
|
|
|
|
# Logging
|
|
log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR"] = "INFO"
|
|
|
|
def get_storage_endpoint(self, backend: str) -> HttpUrl:
|
|
"""
|
|
Get endpoint for storage backend.
|
|
|
|
Args:
|
|
backend: Storage backend name
|
|
|
|
Returns:
|
|
Endpoint URL
|
|
|
|
Raises:
|
|
ValueError: If backend is unknown or R2R endpoint not configured
|
|
"""
|
|
endpoints = {
|
|
"weaviate": self.weaviate_endpoint,
|
|
"open_webui": self.openwebui_endpoint,
|
|
}
|
|
|
|
if backend in endpoints:
|
|
return endpoints[backend]
|
|
elif backend == "r2r":
|
|
if not self.r2r_endpoint:
|
|
raise ValueError(
|
|
"R2R_API_URL must be set in environment variables. "
|
|
"This should have been caught during settings validation."
|
|
)
|
|
return self.r2r_endpoint
|
|
else:
|
|
raise ValueError(f"Unknown backend: {backend}. Supported: weaviate, open_webui, r2r")
|
|
|
|
def get_api_key(self, service: str) -> str | None:
|
|
"""
|
|
Get API key for service.
|
|
|
|
Args:
|
|
service: Service name
|
|
|
|
Returns:
|
|
API key or None
|
|
"""
|
|
service_map = {
|
|
"firecrawl": self.firecrawl_api_key,
|
|
"openwebui": self.openwebui_api_key,
|
|
"weaviate": self.weaviate_api_key,
|
|
"r2r": self.r2r_api_key,
|
|
}
|
|
return service_map.get(service)
|
|
|
|
@model_validator(mode="after")
|
|
def validate_backend_configuration(self) -> "Settings":
|
|
"""Validate that required configuration is present for the default backend."""
|
|
backend = self.default_storage_backend
|
|
|
|
# Validate R2R backend configuration
|
|
if backend == "r2r" and not self.r2r_endpoint:
|
|
raise ValueError(
|
|
"R2R_API_URL must be set in environment variables when using R2R as default backend"
|
|
)
|
|
|
|
# Validate API key requirements (optional warning for missing keys)
|
|
required_keys = {
|
|
"weaviate": ("WEAVIATE_API_KEY", self.weaviate_api_key),
|
|
"open_webui": ("OPENWEBUI_API_KEY", self.openwebui_api_key),
|
|
"r2r": ("R2R_API_KEY", self.r2r_api_key),
|
|
}
|
|
|
|
if backend in required_keys:
|
|
key_name, key_value = required_keys[backend]
|
|
if not key_value:
|
|
import warnings
|
|
warnings.warn(
|
|
f"{key_name} not set - authentication may fail for {backend} backend",
|
|
UserWarning,
|
|
stacklevel=2
|
|
)
|
|
|
|
return self
|
|
|
|
|
|
@lru_cache
|
|
def get_settings() -> Settings:
|
|
"""
|
|
Get cached settings instance.
|
|
|
|
Returns:
|
|
Settings instance
|
|
"""
|
|
return Settings()
|