Files
disbord/config/settings.py
Travis Vasceannie 3acb779569 chore: remove .env.example and add new files for project structure
- Deleted .env.example file as it is no longer needed.
- Added .gitignore to manage ignored files and directories.
- Introduced CLAUDE.md for AI provider integration documentation.
- Created dev.sh for development setup and scripts.
- Updated Dockerfile and Dockerfile.production for improved build processes.
- Added multiple test files and directories for comprehensive testing.
- Introduced new utility and service files for enhanced functionality.
- Organized codebase with new directories and files for better maintainability.
2025-08-27 23:00:19 -04:00

528 lines
19 KiB
Python

"""
Configuration Settings for Discord Voice Chat Quote Bot
Manages all environment variables, AI provider configurations,
and system settings with validation and defaults.
"""
from pathlib import Path
from typing import Any, Literal, Self
from pydantic import Field, field_validator, model_validator
from pydantic_settings import BaseSettings, SettingsConfigDict
class Settings(BaseSettings):
"""
Application settings with environment variable support
"""
model_config = SettingsConfigDict(
env_file=".env", env_file_encoding="utf-8", case_sensitive=False, extra="allow"
)
# Discord Configuration
discord_token: str = Field(..., description="Discord bot token")
guild_id: int | None = Field(None, description="Test server ID for development")
summary_channel_id: int | None = Field(
None, description="Channel for daily summaries"
)
bot_owner_ids: list[int] = Field(
default_factory=list, description="Discord user IDs of bot owners"
)
# Database Configuration
database_url: str = Field(
default="postgresql://quotes_user:password@localhost:5432/quotes_db",
description="PostgreSQL connection URL",
alias="POSTGRES_URL",
)
# Cache and Queue Services
redis_url: str = Field(
default="redis://localhost:6379", description="Redis connection URL"
)
qdrant_url: str = Field(
default="http://localhost:6333", description="Qdrant vector database URL"
)
qdrant_api_key: str | None = Field(None, description="Qdrant API key")
# AI Provider API Keys
openai_api_key: str | None = Field(None, description="OpenAI API key")
anthropic_api_key: str | None = Field(None, description="Anthropic API key")
groq_api_key: str | None = Field(None, description="Groq API key")
openrouter_api_key: str | None = Field(None, description="OpenRouter API key")
# TTS Provider Keys
elevenlabs_api_key: str | None = Field(None, description="ElevenLabs API key")
azure_speech_key: str | None = Field(None, description="Azure Speech Services key")
azure_speech_region: str | None = Field(None, description="Azure region")
# Optional AI Services
hume_ai_api_key: str | None = Field(None, description="Hume AI API key")
hugging_face_token: str | None = Field(None, description="Hugging Face token")
# Local AI Services
ollama_base_url: str = Field(
default="http://localhost:11434", description="Ollama server base URL"
)
lmstudio_base_url: str = Field(
default="http://localhost:1234", description="LMStudio server base URL"
)
# Audio Recording Configuration
recording_clip_duration: int = Field(
default=120, description="Duration of audio clips in seconds"
)
max_concurrent_recordings: int = Field(
default=5, description="Maximum concurrent voice channel recordings"
)
audio_retention_hours: int = Field(
default=24, description="Hours to retain audio files"
)
temp_audio_path: str = Field(
default="./temp", description="Path for temporary audio files"
)
max_audio_buffer_size: int = Field(
default=10485760, description="Maximum audio buffer size in bytes" # 10MB
)
# Quote Scoring Thresholds
quote_threshold_realtime: float = Field(
default=8.5, description="Score threshold for real-time responses"
)
quote_threshold_rotation: float = Field(
default=6.0, description="Score threshold for 6-hour rotation"
)
quote_threshold_daily: float = Field(
default=3.0, description="Score threshold for daily summaries"
)
# Scoring Algorithm Weights
scoring_weight_funny: float = Field(
default=0.3, description="Weight for funny score"
)
scoring_weight_dark: float = Field(
default=0.15, description="Weight for dark score"
)
scoring_weight_silly: float = Field(
default=0.2, description="Weight for silly score"
)
scoring_weight_suspicious: float = Field(
default=0.1, description="Weight for suspicious score"
)
scoring_weight_asinine: float = Field(
default=0.25, description="Weight for asinine score"
)
# AI Provider Configuration
default_ai_provider: Literal[
"openai", "anthropic", "groq", "openrouter", "ollama", "lmstudio"
] = Field(default="openai", description="Default AI provider for general tasks")
transcription_provider: Literal[
"openai", "anthropic", "groq", "openrouter", "ollama", "lmstudio"
] = Field(default="openai", description="AI provider for transcription")
analysis_provider: Literal[
"openai", "anthropic", "groq", "openrouter", "ollama", "lmstudio"
] = Field(default="openai", description="AI provider for quote analysis")
commentary_provider: Literal[
"openai", "anthropic", "groq", "openrouter", "ollama", "lmstudio"
] = Field(default="anthropic", description="AI provider for commentary generation")
fallback_provider: Literal[
"openai", "anthropic", "groq", "openrouter", "ollama", "lmstudio"
] = Field(default="groq", description="Fallback AI provider")
default_tts_provider: Literal["elevenlabs", "azure", "openai"] = Field(
default="elevenlabs", description="Default TTS provider"
)
# Speaker Recognition
speaker_recognition_provider: Literal["azure", "local", "disabled"] = Field(
default="azure", description="Speaker recognition provider"
)
speaker_confidence_threshold: float = Field(
default=0.8, description="Minimum confidence for speaker recognition"
)
enrollment_min_samples: int = Field(
default=3, description="Minimum samples required for speaker enrollment"
)
# Performance & Limits
max_memory_usage_mb: int = Field(
default=4096, description="Maximum memory usage in MB"
)
concurrent_transcriptions: int = Field(
default=3, description="Maximum concurrent transcription operations"
)
api_rate_limit_rpm: int = Field(
default=100, description="API rate limit requests per minute"
)
processing_timeout_seconds: int = Field(
default=30, description="Timeout for processing operations"
)
# Response Scheduling
rotation_interval_hours: int = Field(
default=6, description="Interval for rotation responses in hours"
)
daily_summary_hour: int = Field(
default=9, description="Hour for daily summary (24-hour format)"
)
max_rotation_quotes: int = Field(
default=5, description="Maximum quotes in rotation response"
)
max_daily_quotes: int = Field(
default=20, description="Maximum quotes in daily summary"
)
# Health Monitoring
log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = Field(
default="INFO", description="Logging level"
)
prometheus_port: int = Field(default=8080, description="Prometheus metrics port")
health_check_interval: int = Field(
default=30, description="Health check interval in seconds"
)
metrics_retention_days: int = Field(
default=30, description="Days to retain metrics data"
)
enable_performance_monitoring: bool = Field(
default=True, description="Enable performance monitoring"
)
# Security & Privacy
enable_data_encryption: bool = Field(
default=True, description="Enable data encryption"
)
gdpr_compliance_mode: bool = Field(
default=True, description="Enable GDPR compliance features"
)
auto_delete_audio_hours: int = Field(
default=24, description="Hours after which audio files are auto-deleted"
)
consent_timeout_minutes: int = Field(
default=5, description="Timeout for consent dialogs in minutes"
)
# Development & Debugging
debug_mode: bool = Field(default=False, description="Enable debug mode")
development_mode: bool = Field(default=False, description="Enable development mode")
enable_audio_logging: bool = Field(
default=False, description="Enable audio processing logging"
)
verbose_logging: bool = Field(default=False, description="Enable verbose logging")
test_mode: bool = Field(default=False, description="Enable test mode")
# Extension Configuration
enable_ai_voice_chat: bool = Field(
default=False, description="Enable AI voice chat extension"
)
enable_research_agents: bool = Field(
default=True, description="Enable research agents extension"
)
enable_personality_engine: bool = Field(
default=True, description="Enable personality engine extension"
)
enable_custom_responses: bool = Field(
default=True, description="Enable custom responses extension"
)
# Backup & Recovery
auto_backup_enabled: bool = Field(
default=True, description="Enable automatic backups"
)
backup_interval_hours: int = Field(
default=24, description="Backup interval in hours"
)
backup_retention_days: int = Field(
default=30, description="Days to retain backup files"
)
backup_storage_path: str = Field(
default="./backups", description="Path for backup storage"
)
@field_validator(
"quote_threshold_realtime", "quote_threshold_rotation", "quote_threshold_daily"
)
@classmethod
def validate_thresholds(cls, v: float) -> float:
"""Validate score thresholds are between 0 and 10."""
if not 0 <= v <= 10:
raise ValueError("Score thresholds must be between 0 and 10")
return v
@field_validator(
"scoring_weight_funny",
"scoring_weight_dark",
"scoring_weight_silly",
"scoring_weight_suspicious",
"scoring_weight_asinine",
)
@classmethod
def validate_weights(cls, v: float) -> float:
"""Validate scoring weights are between 0 and 1."""
if not 0 <= v <= 1:
raise ValueError("Scoring weights must be between 0 and 1")
return v
@field_validator("speaker_confidence_threshold")
@classmethod
def validate_confidence_threshold(cls, v: float) -> float:
"""Validate confidence threshold is between 0 and 1."""
if not 0 <= v <= 1:
raise ValueError("Confidence threshold must be between 0 and 1")
return v
@field_validator("daily_summary_hour")
@classmethod
def validate_summary_hour(cls, v: int) -> int:
"""Validate daily summary hour is valid."""
if not 0 <= v <= 23:
raise ValueError("Daily summary hour must be between 0 and 23")
return v
@field_validator("prometheus_port")
@classmethod
def validate_port(cls, v: int) -> int:
"""Validate port numbers are in valid range."""
if not 1 <= v <= 65535:
raise ValueError("Port must be between 1 and 65535")
return v
@field_validator("processing_timeout_seconds", "health_check_interval")
@classmethod
def validate_positive_integers(cls, v: int) -> int:
"""Validate that integer values are positive."""
if v <= 0:
raise ValueError("Value must be positive")
return v
@field_validator("max_memory_usage_mb")
@classmethod
def validate_memory_usage_mb(cls, v: int) -> int:
"""Validate memory usage in MB is reasonable."""
if v < 1:
raise ValueError("Memory size must be at least 1 MB")
if v > 32768: # 32GB limit
raise ValueError("Memory size cannot exceed 32768 MB")
return v
@field_validator("max_audio_buffer_size")
@classmethod
def validate_audio_buffer_size(cls, v: int) -> int:
"""Validate audio buffer size in bytes is reasonable."""
if v < 1024: # 1KB minimum
raise ValueError("Audio buffer size must be at least 1024 bytes")
if v > 1073741824: # 1GB maximum
raise ValueError("Audio buffer size cannot exceed 1GB")
return v
@property
def scoring_weights(self) -> dict[str, float]:
"""Get scoring weights as a dictionary."""
return {
"funny": self.scoring_weight_funny,
"dark": self.scoring_weight_dark,
"silly": self.scoring_weight_silly,
"suspicious": self.scoring_weight_suspicious,
"asinine": self.scoring_weight_asinine,
}
@property
def thresholds(self) -> dict[str, float]:
"""Get response thresholds as a dictionary."""
return {
"realtime": self.quote_threshold_realtime,
"rotation": self.quote_threshold_rotation,
"daily": self.quote_threshold_daily,
}
@property
def ai_providers(self) -> dict[str, str]:
"""Get AI provider configuration as a dictionary."""
return {
"default": self.default_ai_provider,
"transcription": self.transcription_provider,
"analysis": self.analysis_provider,
"commentary": self.commentary_provider,
"fallback": self.fallback_provider,
"tts": self.default_tts_provider,
}
def get_provider_config(
self,
provider: Literal[
"openai", "anthropic", "groq", "openrouter", "ollama", "lmstudio"
],
) -> dict[str, str | None]:
"""Get configuration for a specific AI provider.
Args:
provider: The name of the AI provider to get config for.
Returns:
Dictionary containing api_key and base_url for the provider.
Raises:
KeyError: If the provider is not supported.
"""
provider_configs: dict[str, dict[str, str | None]] = {
"openai": {"api_key": self.openai_api_key, "base_url": None},
"anthropic": {"api_key": self.anthropic_api_key, "base_url": None},
"groq": {"api_key": self.groq_api_key, "base_url": None},
"openrouter": {
"api_key": self.openrouter_api_key,
"base_url": "https://openrouter.ai/api/v1",
},
"ollama": {"api_key": None, "base_url": self.ollama_base_url},
"lmstudio": {"api_key": None, "base_url": self.lmstudio_base_url},
}
if provider not in provider_configs:
raise KeyError(f"Unsupported provider: {provider}")
return provider_configs[provider]
def validate_required_keys(self) -> list[str]:
"""Validate that required API keys are present.
Returns:
List of missing required configuration keys.
"""
missing_keys: list[str] = []
if not self.discord_token:
missing_keys.append("DISCORD_TOKEN")
# Check if at least one AI provider is configured
ai_keys: list[str | None] = [
self.openai_api_key,
self.anthropic_api_key,
self.groq_api_key,
self.openrouter_api_key,
]
# Local services are always considered available (URLs are provided)
has_local_services = bool(self.ollama_base_url or self.lmstudio_base_url)
if not any(ai_keys) and not has_local_services:
missing_keys.append("At least one AI provider API key or local service")
return missing_keys
def create_directories(self) -> None:
"""Create necessary directories for the application.
Creates all required directories if they don't exist, including
temporary audio storage, backup storage, logs, data, and config.
"""
directories: list[str] = [
self.temp_audio_path,
self.backup_storage_path,
"logs",
"data",
"config",
]
for directory in directories:
Path(directory).mkdir(parents=True, exist_ok=True)
def model_post_init(self, __context: Any) -> None:
"""Post-initialization setup after model validation.
Creates required directories for the application.
Args:
__context: Pydantic context (unused but required by interface).
"""
self.create_directories()
@model_validator(mode="after")
def validate_configuration(self) -> Self:
"""Validate the complete configuration after all fields are set.
Returns:
The validated settings instance.
Raises:
ValueError: If required configuration is missing.
"""
missing_keys = self.validate_required_keys()
if missing_keys:
raise ValueError(f"Missing required configuration: {missing_keys}")
# Validate scoring weights sum to a reasonable total
total_weight = sum(
[
self.scoring_weight_funny,
self.scoring_weight_dark,
self.scoring_weight_silly,
self.scoring_weight_suspicious,
self.scoring_weight_asinine,
]
)
if not 0.8 <= total_weight <= 1.2:
raise ValueError(
f"Scoring weights should sum to approximately 1.0, got {total_weight}"
)
# Validate threshold ordering
if not (
self.quote_threshold_daily
<= self.quote_threshold_rotation
<= self.quote_threshold_realtime
):
raise ValueError(
"Thresholds must be ordered: daily <= rotation <= realtime "
f"(got {self.quote_threshold_daily} <= {self.quote_threshold_rotation} <= {self.quote_threshold_realtime})"
)
return self
def get_settings() -> Settings:
"""Get the global settings instance.
Returns:
Initialized settings instance from environment variables.
Raises:
ValueError: If required configuration is missing.
RuntimeError: If settings cannot be initialized due to environment issues.
"""
try:
# Settings() automatically loads from environment variables via pydantic-settings
return Settings() # pyright: ignore[reportCallIssue]
except Exception as e:
raise RuntimeError(f"Failed to initialize settings: {e}") from e
# Global settings instance - initialize lazily to avoid import issues
_settings: Settings | None = None
def settings() -> Settings:
"""Get the cached global settings instance.
Returns:
The global settings instance, creating it if needed.
Raises:
RuntimeError: If settings initialization fails.
"""
global _settings
if _settings is None:
_settings = get_settings()
return _settings
# For backward compatibility, also provide a direct instance
# This will be initialized when first accessed
def get_settings_instance() -> Settings:
"""Get settings instance with backward compatibility.
Returns:
The settings instance.
"""
return settings()