104 lines
2.9 KiB
Python
104 lines
2.9 KiB
Python
"""Application settings and configuration."""
|
|
|
|
from functools import lru_cache
|
|
from typing import Literal
|
|
|
|
from pydantic import Field, HttpUrl
|
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
|
|
|
|
class Settings(BaseSettings):
|
|
"""Application settings."""
|
|
|
|
model_config = SettingsConfigDict(
|
|
env_file=".env",
|
|
env_file_encoding="utf-8",
|
|
case_sensitive=False,
|
|
extra="ignore", # Ignore extra environment variables
|
|
)
|
|
|
|
# API Keys
|
|
firecrawl_api_key: str | None = None
|
|
openwebui_api_key: str | None = None
|
|
weaviate_api_key: str | None = None
|
|
|
|
# Endpoints
|
|
llm_endpoint: HttpUrl = HttpUrl("http://llm.lab")
|
|
weaviate_endpoint: HttpUrl = HttpUrl("http://weaviate.yo")
|
|
openwebui_endpoint: HttpUrl = HttpUrl("http://chat.lab") # This will be the API URL
|
|
firecrawl_endpoint: HttpUrl = HttpUrl("http://crawl.lab:30002")
|
|
|
|
# Model Configuration
|
|
embedding_model: str = "ollama/bge-m3:latest"
|
|
embedding_dimension: int = 1024
|
|
|
|
# Ingestion Settings
|
|
default_batch_size: int = Field(default=50, gt=0, le=500)
|
|
max_file_size: int = 1_000_000
|
|
max_crawl_depth: int = Field(default=5, ge=1, le=20)
|
|
max_crawl_pages: int = Field(default=100, ge=1, le=1000)
|
|
|
|
# Storage Settings
|
|
default_storage_backend: Literal["weaviate", "open_webui"] = "weaviate"
|
|
default_collection_prefix: str = "docs"
|
|
|
|
# Prefect Settings
|
|
prefect_api_url: HttpUrl | None = None
|
|
prefect_api_key: str | None = None
|
|
prefect_work_pool: str = "default"
|
|
|
|
# Scheduling Defaults
|
|
default_schedule_interval: int = Field(default=60, ge=1, le=10080) # Max 1 week
|
|
|
|
# Performance Settings
|
|
max_concurrent_tasks: int = Field(default=5, ge=1, le=20)
|
|
request_timeout: int = Field(default=60, ge=10, le=300)
|
|
|
|
# Logging
|
|
log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR"] = "INFO"
|
|
|
|
def get_storage_endpoint(self, backend: str) -> HttpUrl:
|
|
"""
|
|
Get endpoint for storage backend.
|
|
|
|
Args:
|
|
backend: Storage backend name
|
|
|
|
Returns:
|
|
Endpoint URL
|
|
"""
|
|
if backend == "weaviate":
|
|
return self.weaviate_endpoint
|
|
elif backend == "open_webui":
|
|
return self.openwebui_endpoint
|
|
else:
|
|
raise ValueError(f"Unknown backend: {backend}")
|
|
|
|
def get_api_key(self, service: str) -> str | None:
|
|
"""
|
|
Get API key for service.
|
|
|
|
Args:
|
|
service: Service name
|
|
|
|
Returns:
|
|
API key or None
|
|
"""
|
|
service_map = {
|
|
"firecrawl": self.firecrawl_api_key,
|
|
"openwebui": self.openwebui_api_key,
|
|
"weaviate": self.weaviate_api_key,
|
|
}
|
|
return service_map.get(service)
|
|
|
|
|
|
@lru_cache
|
|
def get_settings() -> Settings:
|
|
"""
|
|
Get cached settings instance.
|
|
|
|
Returns:
|
|
Settings instance
|
|
"""
|
|
return Settings()
|