- Deleted .env.example file as it is no longer needed. - Added .gitignore to manage ignored files and directories. - Introduced CLAUDE.md for AI provider integration documentation. - Created dev.sh for development setup and scripts. - Updated Dockerfile and Dockerfile.production for improved build processes. - Added multiple test files and directories for comprehensive testing. - Introduced new utility and service files for enhanced functionality. - Organized codebase with new directories and files for better maintainability.
528 lines
19 KiB
Python
528 lines
19 KiB
Python
"""
|
|
Configuration Settings for Discord Voice Chat Quote Bot
|
|
|
|
Manages all environment variables, AI provider configurations,
|
|
and system settings with validation and defaults.
|
|
"""
|
|
|
|
from pathlib import Path
|
|
from typing import Any, Literal, Self
|
|
|
|
from pydantic import Field, field_validator, model_validator
|
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
|
|
|
|
class Settings(BaseSettings):
|
|
"""
|
|
Application settings with environment variable support
|
|
"""
|
|
|
|
model_config = SettingsConfigDict(
|
|
env_file=".env", env_file_encoding="utf-8", case_sensitive=False, extra="allow"
|
|
)
|
|
|
|
# Discord Configuration
|
|
discord_token: str = Field(..., description="Discord bot token")
|
|
guild_id: int | None = Field(None, description="Test server ID for development")
|
|
summary_channel_id: int | None = Field(
|
|
None, description="Channel for daily summaries"
|
|
)
|
|
bot_owner_ids: list[int] = Field(
|
|
default_factory=list, description="Discord user IDs of bot owners"
|
|
)
|
|
|
|
# Database Configuration
|
|
database_url: str = Field(
|
|
default="postgresql://quotes_user:password@localhost:5432/quotes_db",
|
|
description="PostgreSQL connection URL",
|
|
alias="POSTGRES_URL",
|
|
)
|
|
|
|
# Cache and Queue Services
|
|
redis_url: str = Field(
|
|
default="redis://localhost:6379", description="Redis connection URL"
|
|
)
|
|
qdrant_url: str = Field(
|
|
default="http://localhost:6333", description="Qdrant vector database URL"
|
|
)
|
|
qdrant_api_key: str | None = Field(None, description="Qdrant API key")
|
|
|
|
# AI Provider API Keys
|
|
openai_api_key: str | None = Field(None, description="OpenAI API key")
|
|
anthropic_api_key: str | None = Field(None, description="Anthropic API key")
|
|
groq_api_key: str | None = Field(None, description="Groq API key")
|
|
openrouter_api_key: str | None = Field(None, description="OpenRouter API key")
|
|
|
|
# TTS Provider Keys
|
|
elevenlabs_api_key: str | None = Field(None, description="ElevenLabs API key")
|
|
azure_speech_key: str | None = Field(None, description="Azure Speech Services key")
|
|
azure_speech_region: str | None = Field(None, description="Azure region")
|
|
|
|
# Optional AI Services
|
|
hume_ai_api_key: str | None = Field(None, description="Hume AI API key")
|
|
hugging_face_token: str | None = Field(None, description="Hugging Face token")
|
|
|
|
# Local AI Services
|
|
ollama_base_url: str = Field(
|
|
default="http://localhost:11434", description="Ollama server base URL"
|
|
)
|
|
lmstudio_base_url: str = Field(
|
|
default="http://localhost:1234", description="LMStudio server base URL"
|
|
)
|
|
|
|
# Audio Recording Configuration
|
|
recording_clip_duration: int = Field(
|
|
default=120, description="Duration of audio clips in seconds"
|
|
)
|
|
max_concurrent_recordings: int = Field(
|
|
default=5, description="Maximum concurrent voice channel recordings"
|
|
)
|
|
audio_retention_hours: int = Field(
|
|
default=24, description="Hours to retain audio files"
|
|
)
|
|
temp_audio_path: str = Field(
|
|
default="./temp", description="Path for temporary audio files"
|
|
)
|
|
max_audio_buffer_size: int = Field(
|
|
default=10485760, description="Maximum audio buffer size in bytes" # 10MB
|
|
)
|
|
|
|
# Quote Scoring Thresholds
|
|
quote_threshold_realtime: float = Field(
|
|
default=8.5, description="Score threshold for real-time responses"
|
|
)
|
|
quote_threshold_rotation: float = Field(
|
|
default=6.0, description="Score threshold for 6-hour rotation"
|
|
)
|
|
quote_threshold_daily: float = Field(
|
|
default=3.0, description="Score threshold for daily summaries"
|
|
)
|
|
|
|
# Scoring Algorithm Weights
|
|
scoring_weight_funny: float = Field(
|
|
default=0.3, description="Weight for funny score"
|
|
)
|
|
scoring_weight_dark: float = Field(
|
|
default=0.15, description="Weight for dark score"
|
|
)
|
|
scoring_weight_silly: float = Field(
|
|
default=0.2, description="Weight for silly score"
|
|
)
|
|
scoring_weight_suspicious: float = Field(
|
|
default=0.1, description="Weight for suspicious score"
|
|
)
|
|
scoring_weight_asinine: float = Field(
|
|
default=0.25, description="Weight for asinine score"
|
|
)
|
|
|
|
# AI Provider Configuration
|
|
default_ai_provider: Literal[
|
|
"openai", "anthropic", "groq", "openrouter", "ollama", "lmstudio"
|
|
] = Field(default="openai", description="Default AI provider for general tasks")
|
|
transcription_provider: Literal[
|
|
"openai", "anthropic", "groq", "openrouter", "ollama", "lmstudio"
|
|
] = Field(default="openai", description="AI provider for transcription")
|
|
analysis_provider: Literal[
|
|
"openai", "anthropic", "groq", "openrouter", "ollama", "lmstudio"
|
|
] = Field(default="openai", description="AI provider for quote analysis")
|
|
commentary_provider: Literal[
|
|
"openai", "anthropic", "groq", "openrouter", "ollama", "lmstudio"
|
|
] = Field(default="anthropic", description="AI provider for commentary generation")
|
|
fallback_provider: Literal[
|
|
"openai", "anthropic", "groq", "openrouter", "ollama", "lmstudio"
|
|
] = Field(default="groq", description="Fallback AI provider")
|
|
default_tts_provider: Literal["elevenlabs", "azure", "openai"] = Field(
|
|
default="elevenlabs", description="Default TTS provider"
|
|
)
|
|
|
|
# Speaker Recognition
|
|
speaker_recognition_provider: Literal["azure", "local", "disabled"] = Field(
|
|
default="azure", description="Speaker recognition provider"
|
|
)
|
|
speaker_confidence_threshold: float = Field(
|
|
default=0.8, description="Minimum confidence for speaker recognition"
|
|
)
|
|
enrollment_min_samples: int = Field(
|
|
default=3, description="Minimum samples required for speaker enrollment"
|
|
)
|
|
|
|
# Performance & Limits
|
|
max_memory_usage_mb: int = Field(
|
|
default=4096, description="Maximum memory usage in MB"
|
|
)
|
|
concurrent_transcriptions: int = Field(
|
|
default=3, description="Maximum concurrent transcription operations"
|
|
)
|
|
api_rate_limit_rpm: int = Field(
|
|
default=100, description="API rate limit requests per minute"
|
|
)
|
|
processing_timeout_seconds: int = Field(
|
|
default=30, description="Timeout for processing operations"
|
|
)
|
|
|
|
# Response Scheduling
|
|
rotation_interval_hours: int = Field(
|
|
default=6, description="Interval for rotation responses in hours"
|
|
)
|
|
daily_summary_hour: int = Field(
|
|
default=9, description="Hour for daily summary (24-hour format)"
|
|
)
|
|
max_rotation_quotes: int = Field(
|
|
default=5, description="Maximum quotes in rotation response"
|
|
)
|
|
max_daily_quotes: int = Field(
|
|
default=20, description="Maximum quotes in daily summary"
|
|
)
|
|
|
|
# Health Monitoring
|
|
log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = Field(
|
|
default="INFO", description="Logging level"
|
|
)
|
|
prometheus_port: int = Field(default=8080, description="Prometheus metrics port")
|
|
health_check_interval: int = Field(
|
|
default=30, description="Health check interval in seconds"
|
|
)
|
|
metrics_retention_days: int = Field(
|
|
default=30, description="Days to retain metrics data"
|
|
)
|
|
enable_performance_monitoring: bool = Field(
|
|
default=True, description="Enable performance monitoring"
|
|
)
|
|
|
|
# Security & Privacy
|
|
enable_data_encryption: bool = Field(
|
|
default=True, description="Enable data encryption"
|
|
)
|
|
gdpr_compliance_mode: bool = Field(
|
|
default=True, description="Enable GDPR compliance features"
|
|
)
|
|
auto_delete_audio_hours: int = Field(
|
|
default=24, description="Hours after which audio files are auto-deleted"
|
|
)
|
|
consent_timeout_minutes: int = Field(
|
|
default=5, description="Timeout for consent dialogs in minutes"
|
|
)
|
|
|
|
# Development & Debugging
|
|
debug_mode: bool = Field(default=False, description="Enable debug mode")
|
|
development_mode: bool = Field(default=False, description="Enable development mode")
|
|
enable_audio_logging: bool = Field(
|
|
default=False, description="Enable audio processing logging"
|
|
)
|
|
verbose_logging: bool = Field(default=False, description="Enable verbose logging")
|
|
test_mode: bool = Field(default=False, description="Enable test mode")
|
|
|
|
# Extension Configuration
|
|
enable_ai_voice_chat: bool = Field(
|
|
default=False, description="Enable AI voice chat extension"
|
|
)
|
|
enable_research_agents: bool = Field(
|
|
default=True, description="Enable research agents extension"
|
|
)
|
|
enable_personality_engine: bool = Field(
|
|
default=True, description="Enable personality engine extension"
|
|
)
|
|
enable_custom_responses: bool = Field(
|
|
default=True, description="Enable custom responses extension"
|
|
)
|
|
|
|
# Backup & Recovery
|
|
auto_backup_enabled: bool = Field(
|
|
default=True, description="Enable automatic backups"
|
|
)
|
|
backup_interval_hours: int = Field(
|
|
default=24, description="Backup interval in hours"
|
|
)
|
|
backup_retention_days: int = Field(
|
|
default=30, description="Days to retain backup files"
|
|
)
|
|
backup_storage_path: str = Field(
|
|
default="./backups", description="Path for backup storage"
|
|
)
|
|
|
|
@field_validator(
|
|
"quote_threshold_realtime", "quote_threshold_rotation", "quote_threshold_daily"
|
|
)
|
|
@classmethod
|
|
def validate_thresholds(cls, v: float) -> float:
|
|
"""Validate score thresholds are between 0 and 10."""
|
|
if not 0 <= v <= 10:
|
|
raise ValueError("Score thresholds must be between 0 and 10")
|
|
return v
|
|
|
|
@field_validator(
|
|
"scoring_weight_funny",
|
|
"scoring_weight_dark",
|
|
"scoring_weight_silly",
|
|
"scoring_weight_suspicious",
|
|
"scoring_weight_asinine",
|
|
)
|
|
@classmethod
|
|
def validate_weights(cls, v: float) -> float:
|
|
"""Validate scoring weights are between 0 and 1."""
|
|
if not 0 <= v <= 1:
|
|
raise ValueError("Scoring weights must be between 0 and 1")
|
|
return v
|
|
|
|
@field_validator("speaker_confidence_threshold")
|
|
@classmethod
|
|
def validate_confidence_threshold(cls, v: float) -> float:
|
|
"""Validate confidence threshold is between 0 and 1."""
|
|
if not 0 <= v <= 1:
|
|
raise ValueError("Confidence threshold must be between 0 and 1")
|
|
return v
|
|
|
|
@field_validator("daily_summary_hour")
|
|
@classmethod
|
|
def validate_summary_hour(cls, v: int) -> int:
|
|
"""Validate daily summary hour is valid."""
|
|
if not 0 <= v <= 23:
|
|
raise ValueError("Daily summary hour must be between 0 and 23")
|
|
return v
|
|
|
|
@field_validator("prometheus_port")
|
|
@classmethod
|
|
def validate_port(cls, v: int) -> int:
|
|
"""Validate port numbers are in valid range."""
|
|
if not 1 <= v <= 65535:
|
|
raise ValueError("Port must be between 1 and 65535")
|
|
return v
|
|
|
|
@field_validator("processing_timeout_seconds", "health_check_interval")
|
|
@classmethod
|
|
def validate_positive_integers(cls, v: int) -> int:
|
|
"""Validate that integer values are positive."""
|
|
if v <= 0:
|
|
raise ValueError("Value must be positive")
|
|
return v
|
|
|
|
@field_validator("max_memory_usage_mb")
|
|
@classmethod
|
|
def validate_memory_usage_mb(cls, v: int) -> int:
|
|
"""Validate memory usage in MB is reasonable."""
|
|
if v < 1:
|
|
raise ValueError("Memory size must be at least 1 MB")
|
|
if v > 32768: # 32GB limit
|
|
raise ValueError("Memory size cannot exceed 32768 MB")
|
|
return v
|
|
|
|
@field_validator("max_audio_buffer_size")
|
|
@classmethod
|
|
def validate_audio_buffer_size(cls, v: int) -> int:
|
|
"""Validate audio buffer size in bytes is reasonable."""
|
|
if v < 1024: # 1KB minimum
|
|
raise ValueError("Audio buffer size must be at least 1024 bytes")
|
|
if v > 1073741824: # 1GB maximum
|
|
raise ValueError("Audio buffer size cannot exceed 1GB")
|
|
return v
|
|
|
|
@property
|
|
def scoring_weights(self) -> dict[str, float]:
|
|
"""Get scoring weights as a dictionary."""
|
|
return {
|
|
"funny": self.scoring_weight_funny,
|
|
"dark": self.scoring_weight_dark,
|
|
"silly": self.scoring_weight_silly,
|
|
"suspicious": self.scoring_weight_suspicious,
|
|
"asinine": self.scoring_weight_asinine,
|
|
}
|
|
|
|
@property
|
|
def thresholds(self) -> dict[str, float]:
|
|
"""Get response thresholds as a dictionary."""
|
|
return {
|
|
"realtime": self.quote_threshold_realtime,
|
|
"rotation": self.quote_threshold_rotation,
|
|
"daily": self.quote_threshold_daily,
|
|
}
|
|
|
|
@property
|
|
def ai_providers(self) -> dict[str, str]:
|
|
"""Get AI provider configuration as a dictionary."""
|
|
return {
|
|
"default": self.default_ai_provider,
|
|
"transcription": self.transcription_provider,
|
|
"analysis": self.analysis_provider,
|
|
"commentary": self.commentary_provider,
|
|
"fallback": self.fallback_provider,
|
|
"tts": self.default_tts_provider,
|
|
}
|
|
|
|
def get_provider_config(
|
|
self,
|
|
provider: Literal[
|
|
"openai", "anthropic", "groq", "openrouter", "ollama", "lmstudio"
|
|
],
|
|
) -> dict[str, str | None]:
|
|
"""Get configuration for a specific AI provider.
|
|
|
|
Args:
|
|
provider: The name of the AI provider to get config for.
|
|
|
|
Returns:
|
|
Dictionary containing api_key and base_url for the provider.
|
|
|
|
Raises:
|
|
KeyError: If the provider is not supported.
|
|
"""
|
|
provider_configs: dict[str, dict[str, str | None]] = {
|
|
"openai": {"api_key": self.openai_api_key, "base_url": None},
|
|
"anthropic": {"api_key": self.anthropic_api_key, "base_url": None},
|
|
"groq": {"api_key": self.groq_api_key, "base_url": None},
|
|
"openrouter": {
|
|
"api_key": self.openrouter_api_key,
|
|
"base_url": "https://openrouter.ai/api/v1",
|
|
},
|
|
"ollama": {"api_key": None, "base_url": self.ollama_base_url},
|
|
"lmstudio": {"api_key": None, "base_url": self.lmstudio_base_url},
|
|
}
|
|
|
|
if provider not in provider_configs:
|
|
raise KeyError(f"Unsupported provider: {provider}")
|
|
|
|
return provider_configs[provider]
|
|
|
|
def validate_required_keys(self) -> list[str]:
|
|
"""Validate that required API keys are present.
|
|
|
|
Returns:
|
|
List of missing required configuration keys.
|
|
"""
|
|
missing_keys: list[str] = []
|
|
|
|
if not self.discord_token:
|
|
missing_keys.append("DISCORD_TOKEN")
|
|
|
|
# Check if at least one AI provider is configured
|
|
ai_keys: list[str | None] = [
|
|
self.openai_api_key,
|
|
self.anthropic_api_key,
|
|
self.groq_api_key,
|
|
self.openrouter_api_key,
|
|
]
|
|
|
|
# Local services are always considered available (URLs are provided)
|
|
has_local_services = bool(self.ollama_base_url or self.lmstudio_base_url)
|
|
|
|
if not any(ai_keys) and not has_local_services:
|
|
missing_keys.append("At least one AI provider API key or local service")
|
|
|
|
return missing_keys
|
|
|
|
def create_directories(self) -> None:
|
|
"""Create necessary directories for the application.
|
|
|
|
Creates all required directories if they don't exist, including
|
|
temporary audio storage, backup storage, logs, data, and config.
|
|
"""
|
|
directories: list[str] = [
|
|
self.temp_audio_path,
|
|
self.backup_storage_path,
|
|
"logs",
|
|
"data",
|
|
"config",
|
|
]
|
|
|
|
for directory in directories:
|
|
Path(directory).mkdir(parents=True, exist_ok=True)
|
|
|
|
def model_post_init(self, __context: Any) -> None:
|
|
"""Post-initialization setup after model validation.
|
|
|
|
Creates required directories for the application.
|
|
|
|
Args:
|
|
__context: Pydantic context (unused but required by interface).
|
|
"""
|
|
self.create_directories()
|
|
|
|
@model_validator(mode="after")
|
|
def validate_configuration(self) -> Self:
|
|
"""Validate the complete configuration after all fields are set.
|
|
|
|
Returns:
|
|
The validated settings instance.
|
|
|
|
Raises:
|
|
ValueError: If required configuration is missing.
|
|
"""
|
|
missing_keys = self.validate_required_keys()
|
|
if missing_keys:
|
|
raise ValueError(f"Missing required configuration: {missing_keys}")
|
|
|
|
# Validate scoring weights sum to a reasonable total
|
|
total_weight = sum(
|
|
[
|
|
self.scoring_weight_funny,
|
|
self.scoring_weight_dark,
|
|
self.scoring_weight_silly,
|
|
self.scoring_weight_suspicious,
|
|
self.scoring_weight_asinine,
|
|
]
|
|
)
|
|
|
|
if not 0.8 <= total_weight <= 1.2:
|
|
raise ValueError(
|
|
f"Scoring weights should sum to approximately 1.0, got {total_weight}"
|
|
)
|
|
|
|
# Validate threshold ordering
|
|
if not (
|
|
self.quote_threshold_daily
|
|
<= self.quote_threshold_rotation
|
|
<= self.quote_threshold_realtime
|
|
):
|
|
raise ValueError(
|
|
"Thresholds must be ordered: daily <= rotation <= realtime "
|
|
f"(got {self.quote_threshold_daily} <= {self.quote_threshold_rotation} <= {self.quote_threshold_realtime})"
|
|
)
|
|
|
|
return self
|
|
|
|
|
|
def get_settings() -> Settings:
|
|
"""Get the global settings instance.
|
|
|
|
Returns:
|
|
Initialized settings instance from environment variables.
|
|
|
|
Raises:
|
|
ValueError: If required configuration is missing.
|
|
RuntimeError: If settings cannot be initialized due to environment issues.
|
|
"""
|
|
try:
|
|
# Settings() automatically loads from environment variables via pydantic-settings
|
|
return Settings() # pyright: ignore[reportCallIssue]
|
|
except Exception as e:
|
|
raise RuntimeError(f"Failed to initialize settings: {e}") from e
|
|
|
|
|
|
# Global settings instance - initialize lazily to avoid import issues
|
|
_settings: Settings | None = None
|
|
|
|
|
|
def settings() -> Settings:
|
|
"""Get the cached global settings instance.
|
|
|
|
Returns:
|
|
The global settings instance, creating it if needed.
|
|
|
|
Raises:
|
|
RuntimeError: If settings initialization fails.
|
|
"""
|
|
global _settings
|
|
if _settings is None:
|
|
_settings = get_settings()
|
|
return _settings
|
|
|
|
|
|
# For backward compatibility, also provide a direct instance
|
|
# This will be initialized when first accessed
|
|
def get_settings_instance() -> Settings:
|
|
"""Get settings instance with backward compatibility.
|
|
|
|
Returns:
|
|
The settings instance.
|
|
"""
|
|
return settings()
|