Files
disbord/tests/integration/test_service_monitoring_integration.py
Travis Vasceannie 3acb779569 chore: remove .env.example and add new files for project structure
- Deleted .env.example file as it is no longer needed.
- Added .gitignore to manage ignored files and directories.
- Introduced CLAUDE.md for AI provider integration documentation.
- Created dev.sh for development setup and scripts.
- Updated Dockerfile and Dockerfile.production for improved build processes.
- Added multiple test files and directories for comprehensive testing.
- Introduced new utility and service files for enhanced functionality.
- Organized codebase with new directories and files for better maintainability.
2025-08-27 23:00:19 -04:00

506 lines
18 KiB
Python

"""
Service integration tests for Monitoring and Health Check Services.
Tests the integration between health monitoring, metrics collection,
and their dependencies with external monitoring systems.
"""
from datetime import datetime, timedelta
from unittest.mock import AsyncMock, MagicMock
import pytest
from core.ai_manager import AIProviderManager
from core.database import DatabaseManager
from services.monitoring.health_endpoints import HealthEndpoints
from services.monitoring.health_monitor import HealthMonitor
@pytest.mark.integration
class TestMonitoringServiceIntegration:
"""Integration tests for monitoring service pipeline."""
@pytest.fixture
async def mock_dependencies(self):
"""Create all mock dependencies for monitoring services."""
return {
"db_manager": self._create_mock_db_manager(),
"ai_manager": self._create_mock_ai_manager(),
"redis_client": self._create_mock_redis_client(),
"settings": self._create_mock_settings(),
}
@pytest.fixture
async def monitoring_services(self, mock_dependencies):
"""Create integrated monitoring service instances."""
deps = mock_dependencies
# Create health monitor
health_monitor = HealthMonitor(
deps["db_manager"],
deps["ai_manager"],
deps["redis_client"],
deps["settings"],
)
# Create health endpoints
health_endpoints = HealthEndpoints(health_monitor, deps["settings"])
await health_monitor.initialize()
return {"health_monitor": health_monitor, "health_endpoints": health_endpoints}
@pytest.fixture
def sample_service_states(self):
"""Create sample service health states for testing."""
return {
"healthy_services": {
"database": {
"status": "healthy",
"response_time": 0.05,
"connections": 8,
"last_check": datetime.utcnow(),
"uptime": timedelta(days=5, hours=3).total_seconds(),
},
"ai_manager": {
"status": "healthy",
"response_time": 0.12,
"providers": ["openai", "anthropic"],
"last_check": datetime.utcnow(),
"requests_processed": 1250,
},
"transcription": {
"status": "healthy",
"response_time": 0.32,
"queue_size": 2,
"last_check": datetime.utcnow(),
"total_transcriptions": 450,
},
},
"degraded_services": {
"quote_analyzer": {
"status": "degraded",
"response_time": 1.85,
"error_rate": 0.12,
"last_check": datetime.utcnow(),
"recent_errors": ["Timeout error", "Rate limit exceeded"],
}
},
"unhealthy_services": {
"laughter_detector": {
"status": "unhealthy",
"response_time": None,
"last_error": "Service unreachable",
"last_check": datetime.utcnow(),
"downtime_duration": timedelta(minutes=15).total_seconds(),
}
},
}
@pytest.mark.asyncio
async def test_comprehensive_health_monitoring_integration(
self, monitoring_services, mock_dependencies, sample_service_states
):
"""Test comprehensive health monitoring across all services."""
health_monitor = monitoring_services["health_monitor"]
# Mock individual service health checks
services = sample_service_states["healthy_services"]
# Mock database health
mock_dependencies["db_manager"].check_health.return_value = services["database"]
# Mock AI manager health
mock_dependencies["ai_manager"].check_health.return_value = services[
"ai_manager"
]
# Perform comprehensive health check
overall_health = await health_monitor.check_all_services()
assert overall_health is not None
assert overall_health["overall_status"] in ["healthy", "degraded", "unhealthy"]
assert "services" in overall_health
assert "timestamp" in overall_health
assert "uptime" in overall_health
# Verify individual services checked
assert "database" in overall_health["services"]
assert "ai_manager" in overall_health["services"]
@pytest.mark.asyncio
async def test_degraded_service_detection_integration(
self, monitoring_services, mock_dependencies, sample_service_states
):
"""Test detection and handling of degraded services."""
health_monitor = monitoring_services["health_monitor"]
# Mock degraded service state
degraded_service = sample_service_states["degraded_services"]["quote_analyzer"]
# Mock AI manager returning degraded status
mock_dependencies["ai_manager"].check_health.return_value = degraded_service
# Check AI service health
ai_health = await health_monitor.check_service_health("ai_manager")
assert ai_health["status"] == "degraded"
assert ai_health["response_time"] > 1.0 # Slow response
assert ai_health["error_rate"] > 0.1 # High error rate
# Should trigger alert
alerts = await health_monitor.get_active_alerts()
degraded_alerts = [a for a in alerts if a["severity"] == "warning"]
assert len(degraded_alerts) > 0
@pytest.mark.asyncio
async def test_unhealthy_service_detection_integration(
self, monitoring_services, mock_dependencies, sample_service_states
):
"""Test detection and handling of unhealthy services."""
health_monitor = monitoring_services["health_monitor"]
# Mock unhealthy service state
sample_service_states["unhealthy_services"]["laughter_detector"]
# Mock database returning connection error
mock_dependencies["db_manager"].check_health.side_effect = Exception(
"Connection refused"
)
# Check database health
db_health = await health_monitor.check_service_health("database")
assert db_health["status"] == "unhealthy"
assert "error" in db_health
assert db_health["response_time"] is None
# Should trigger critical alert
alerts = await health_monitor.get_active_alerts()
critical_alerts = [a for a in alerts if a["severity"] == "critical"]
assert len(critical_alerts) > 0
@pytest.mark.asyncio
async def test_metrics_collection_integration(
self, monitoring_services, mock_dependencies
):
"""Test metrics collection across all services."""
health_monitor = monitoring_services["health_monitor"]
# Mock Redis for metrics storage
mock_redis = mock_dependencies["redis_client"]
mock_redis.get.return_value = None # No existing metrics
mock_redis.set.return_value = True
mock_redis.incr.return_value = 1
# Collect metrics from various services
await health_monitor.collect_metrics()
# Verify metrics were stored
assert mock_redis.set.call_count > 0
assert mock_redis.incr.call_count >= 0
# Get aggregated metrics
metrics = await health_monitor.get_metrics_summary()
assert metrics is not None
assert "system" in metrics
assert "services" in metrics
assert "timestamp" in metrics
@pytest.mark.asyncio
async def test_health_endpoints_integration(
self, monitoring_services, mock_dependencies
):
"""Test health check endpoints integration."""
health_endpoints = monitoring_services["health_endpoints"]
monitoring_services["health_monitor"]
# Mock healthy state
mock_dependencies["db_manager"].check_health.return_value = {
"status": "healthy",
"connections": 5,
}
mock_dependencies["ai_manager"].check_health.return_value = {
"status": "healthy",
"providers": ["openai"],
}
# Test basic health endpoint
health_response = await health_endpoints.basic_health_check()
assert health_response["status"] == "healthy"
assert "timestamp" in health_response
assert health_response["uptime"] > 0
# Test detailed health endpoint
detailed_response = await health_endpoints.detailed_health_check()
assert detailed_response["overall_status"] in [
"healthy",
"degraded",
"unhealthy",
]
assert "services" in detailed_response
assert "metrics" in detailed_response
@pytest.mark.asyncio
async def test_performance_monitoring_integration(
self, monitoring_services, mock_dependencies
):
"""Test performance monitoring and alerting."""
health_monitor = monitoring_services["health_monitor"]
# Simulate performance metrics
performance_data = {
"cpu_usage": 85.5, # High CPU
"memory_usage": 92.1, # High memory
"disk_usage": 45.3,
"response_times": {
"database": 0.05,
"ai_manager": 2.5, # Slow AI responses
"transcription": 0.8,
},
}
# Update performance metrics
await health_monitor.update_performance_metrics(performance_data)
# Should detect performance issues
performance_alerts = await health_monitor.get_performance_alerts()
assert len(performance_alerts) > 0
# Should have CPU and memory alerts
cpu_alerts = [a for a in performance_alerts if "cpu" in a["metric"].lower()]
memory_alerts = [
a for a in performance_alerts if "memory" in a["metric"].lower()
]
assert len(cpu_alerts) > 0
assert len(memory_alerts) > 0
@pytest.mark.asyncio
async def test_service_dependency_monitoring_integration(
self, monitoring_services, mock_dependencies
):
"""Test monitoring of service dependencies and cascading failures."""
health_monitor = monitoring_services["health_monitor"]
# Mock database failure affecting other services
mock_dependencies["db_manager"].check_health.side_effect = Exception("DB down")
# Check dependent services
dependency_health = await health_monitor.check_service_dependencies()
assert dependency_health is not None
# Should detect cascading impact
db_dependent_services = dependency_health.get("database_dependent", [])
affected_services = [s for s in db_dependent_services if s["affected"]]
assert len(affected_services) > 0
@pytest.mark.asyncio
async def test_alert_escalation_integration(
self, monitoring_services, mock_dependencies
):
"""Test alert escalation and notification systems."""
health_monitor = monitoring_services["health_monitor"]
# Create critical health issue
critical_issue = {
"service": "database",
"status": "unhealthy",
"error": "Connection timeout",
"severity": "critical",
"timestamp": datetime.utcnow(),
}
# Process critical alert
await health_monitor.process_alert(critical_issue)
# Should escalate critical alerts
escalated_alerts = await health_monitor.get_escalated_alerts()
assert len(escalated_alerts) > 0
assert escalated_alerts[0]["severity"] == "critical"
assert escalated_alerts[0]["escalated"] is True
@pytest.mark.asyncio
async def test_historical_health_tracking_integration(
self, monitoring_services, mock_dependencies
):
"""Test historical health data tracking and analysis."""
health_monitor = monitoring_services["health_monitor"]
# Mock historical data storage
mock_dependencies["db_manager"].execute_query.return_value = True
# Record health snapshots over time
for i in range(5):
health_snapshot = {
"timestamp": datetime.utcnow() - timedelta(hours=i),
"overall_status": "healthy" if i < 3 else "degraded",
"services": {
"database": {
"status": "healthy",
"response_time": 0.05 + (i * 0.01),
},
"ai_manager": {
"status": "healthy",
"response_time": 0.1 + (i * 0.02),
},
},
}
await health_monitor.record_health_snapshot(health_snapshot)
# Verify data was stored
assert mock_dependencies["db_manager"].execute_query.call_count >= 5
# Get health trends
trends = await health_monitor.get_health_trends(hours_back=24)
assert trends is not None
assert "status_changes" in trends
assert "performance_trends" in trends
@pytest.mark.asyncio
async def test_monitoring_service_recovery_integration(
self, monitoring_services, mock_dependencies
):
"""Test service recovery detection and notifications."""
health_monitor = monitoring_services["health_monitor"]
# Simulate service recovery scenario
# First: Service is down
mock_dependencies["ai_manager"].check_health.side_effect = Exception(
"Service down"
)
unhealthy_check = await health_monitor.check_service_health("ai_manager")
assert unhealthy_check["status"] == "unhealthy"
# Then: Service recovers
mock_dependencies["ai_manager"].check_health.side_effect = None
mock_dependencies["ai_manager"].check_health.return_value = {
"status": "healthy",
"response_time": 0.08,
}
recovery_check = await health_monitor.check_service_health("ai_manager")
assert recovery_check["status"] == "healthy"
# Should detect recovery
recovery_events = await health_monitor.get_recovery_events()
ai_recovery = [e for e in recovery_events if e["service"] == "ai_manager"]
assert len(ai_recovery) > 0
assert ai_recovery[0]["event_type"] == "recovery"
@pytest.mark.asyncio
async def test_monitoring_configuration_integration(
self, monitoring_services, mock_dependencies
):
"""Test dynamic monitoring configuration and thresholds."""
health_monitor = monitoring_services["health_monitor"]
# Update monitoring configuration
new_config = {
"check_interval_seconds": 30,
"response_time_threshold": 1.0,
"error_rate_threshold": 0.05,
"cpu_threshold": 80,
"memory_threshold": 85,
}
await health_monitor.update_configuration(new_config)
# Verify configuration was applied
current_config = await health_monitor.get_configuration()
assert current_config["check_interval_seconds"] == 30
assert current_config["response_time_threshold"] == 1.0
assert current_config["error_rate_threshold"] == 0.05
@pytest.mark.asyncio
async def test_monitoring_service_cleanup_integration(self, monitoring_services):
"""Test proper cleanup of monitoring services."""
health_monitor = monitoring_services["health_monitor"]
monitoring_services["health_endpoints"]
# Close monitoring services
await health_monitor.close()
# Should clean up background tasks
assert health_monitor._monitoring_task.cancelled()
# Should not be able to check health after cleanup
with pytest.raises(Exception):
await health_monitor.check_all_services()
def _create_mock_db_manager(self) -> AsyncMock:
"""Create mock database manager for monitoring services."""
db_manager = AsyncMock(spec=DatabaseManager)
# Default healthy state
db_manager.check_health.return_value = {
"status": "healthy",
"connections": 8,
"response_time": 0.05,
}
# Mock database operations
db_manager.execute_query.return_value = True
db_manager.fetch_all.return_value = []
return db_manager
def _create_mock_ai_manager(self) -> AsyncMock:
"""Create mock AI manager for monitoring services."""
ai_manager = AsyncMock(spec=AIProviderManager)
# Default healthy state
ai_manager.check_health.return_value = {
"status": "healthy",
"providers": ["openai", "anthropic"],
"response_time": 0.12,
}
return ai_manager
def _create_mock_redis_client(self) -> AsyncMock:
"""Create mock Redis client for metrics storage."""
redis_client = AsyncMock()
# Mock Redis operations
redis_client.get.return_value = None
redis_client.set.return_value = True
redis_client.incr.return_value = 1
redis_client.hgetall.return_value = {}
redis_client.hset.return_value = True
return redis_client
def _create_mock_settings(self) -> MagicMock:
"""Create mock settings for monitoring services."""
settings = MagicMock()
# Health check settings
settings.health_check_interval = 30
settings.health_check_timeout = 5
settings.max_response_time = 1.0
settings.max_error_rate = 0.1
# Performance thresholds
settings.cpu_threshold = 80
settings.memory_threshold = 85
settings.disk_threshold = 90
# Alert settings
settings.alert_cooldown_minutes = 15
settings.escalation_threshold = 3
return settings