- Deleted .env.example file as it is no longer needed. - Added .gitignore to manage ignored files and directories. - Introduced CLAUDE.md for AI provider integration documentation. - Created dev.sh for development setup and scripts. - Updated Dockerfile and Dockerfile.production for improved build processes. - Added multiple test files and directories for comprehensive testing. - Introduced new utility and service files for enhanced functionality. - Organized codebase with new directories and files for better maintainability.
506 lines
18 KiB
Python
506 lines
18 KiB
Python
"""
|
|
Service integration tests for Monitoring and Health Check Services.
|
|
|
|
Tests the integration between health monitoring, metrics collection,
|
|
and their dependencies with external monitoring systems.
|
|
"""
|
|
|
|
from datetime import datetime, timedelta
|
|
from unittest.mock import AsyncMock, MagicMock
|
|
|
|
import pytest
|
|
|
|
from core.ai_manager import AIProviderManager
|
|
from core.database import DatabaseManager
|
|
from services.monitoring.health_endpoints import HealthEndpoints
|
|
from services.monitoring.health_monitor import HealthMonitor
|
|
|
|
|
|
@pytest.mark.integration
|
|
class TestMonitoringServiceIntegration:
|
|
"""Integration tests for monitoring service pipeline."""
|
|
|
|
@pytest.fixture
|
|
async def mock_dependencies(self):
|
|
"""Create all mock dependencies for monitoring services."""
|
|
return {
|
|
"db_manager": self._create_mock_db_manager(),
|
|
"ai_manager": self._create_mock_ai_manager(),
|
|
"redis_client": self._create_mock_redis_client(),
|
|
"settings": self._create_mock_settings(),
|
|
}
|
|
|
|
@pytest.fixture
|
|
async def monitoring_services(self, mock_dependencies):
|
|
"""Create integrated monitoring service instances."""
|
|
deps = mock_dependencies
|
|
|
|
# Create health monitor
|
|
health_monitor = HealthMonitor(
|
|
deps["db_manager"],
|
|
deps["ai_manager"],
|
|
deps["redis_client"],
|
|
deps["settings"],
|
|
)
|
|
|
|
# Create health endpoints
|
|
health_endpoints = HealthEndpoints(health_monitor, deps["settings"])
|
|
|
|
await health_monitor.initialize()
|
|
|
|
return {"health_monitor": health_monitor, "health_endpoints": health_endpoints}
|
|
|
|
@pytest.fixture
|
|
def sample_service_states(self):
|
|
"""Create sample service health states for testing."""
|
|
return {
|
|
"healthy_services": {
|
|
"database": {
|
|
"status": "healthy",
|
|
"response_time": 0.05,
|
|
"connections": 8,
|
|
"last_check": datetime.utcnow(),
|
|
"uptime": timedelta(days=5, hours=3).total_seconds(),
|
|
},
|
|
"ai_manager": {
|
|
"status": "healthy",
|
|
"response_time": 0.12,
|
|
"providers": ["openai", "anthropic"],
|
|
"last_check": datetime.utcnow(),
|
|
"requests_processed": 1250,
|
|
},
|
|
"transcription": {
|
|
"status": "healthy",
|
|
"response_time": 0.32,
|
|
"queue_size": 2,
|
|
"last_check": datetime.utcnow(),
|
|
"total_transcriptions": 450,
|
|
},
|
|
},
|
|
"degraded_services": {
|
|
"quote_analyzer": {
|
|
"status": "degraded",
|
|
"response_time": 1.85,
|
|
"error_rate": 0.12,
|
|
"last_check": datetime.utcnow(),
|
|
"recent_errors": ["Timeout error", "Rate limit exceeded"],
|
|
}
|
|
},
|
|
"unhealthy_services": {
|
|
"laughter_detector": {
|
|
"status": "unhealthy",
|
|
"response_time": None,
|
|
"last_error": "Service unreachable",
|
|
"last_check": datetime.utcnow(),
|
|
"downtime_duration": timedelta(minutes=15).total_seconds(),
|
|
}
|
|
},
|
|
}
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_comprehensive_health_monitoring_integration(
|
|
self, monitoring_services, mock_dependencies, sample_service_states
|
|
):
|
|
"""Test comprehensive health monitoring across all services."""
|
|
health_monitor = monitoring_services["health_monitor"]
|
|
|
|
# Mock individual service health checks
|
|
services = sample_service_states["healthy_services"]
|
|
|
|
# Mock database health
|
|
mock_dependencies["db_manager"].check_health.return_value = services["database"]
|
|
|
|
# Mock AI manager health
|
|
mock_dependencies["ai_manager"].check_health.return_value = services[
|
|
"ai_manager"
|
|
]
|
|
|
|
# Perform comprehensive health check
|
|
overall_health = await health_monitor.check_all_services()
|
|
|
|
assert overall_health is not None
|
|
assert overall_health["overall_status"] in ["healthy", "degraded", "unhealthy"]
|
|
assert "services" in overall_health
|
|
assert "timestamp" in overall_health
|
|
assert "uptime" in overall_health
|
|
|
|
# Verify individual services checked
|
|
assert "database" in overall_health["services"]
|
|
assert "ai_manager" in overall_health["services"]
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_degraded_service_detection_integration(
|
|
self, monitoring_services, mock_dependencies, sample_service_states
|
|
):
|
|
"""Test detection and handling of degraded services."""
|
|
health_monitor = monitoring_services["health_monitor"]
|
|
|
|
# Mock degraded service state
|
|
degraded_service = sample_service_states["degraded_services"]["quote_analyzer"]
|
|
|
|
# Mock AI manager returning degraded status
|
|
mock_dependencies["ai_manager"].check_health.return_value = degraded_service
|
|
|
|
# Check AI service health
|
|
ai_health = await health_monitor.check_service_health("ai_manager")
|
|
|
|
assert ai_health["status"] == "degraded"
|
|
assert ai_health["response_time"] > 1.0 # Slow response
|
|
assert ai_health["error_rate"] > 0.1 # High error rate
|
|
|
|
# Should trigger alert
|
|
alerts = await health_monitor.get_active_alerts()
|
|
degraded_alerts = [a for a in alerts if a["severity"] == "warning"]
|
|
assert len(degraded_alerts) > 0
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_unhealthy_service_detection_integration(
|
|
self, monitoring_services, mock_dependencies, sample_service_states
|
|
):
|
|
"""Test detection and handling of unhealthy services."""
|
|
health_monitor = monitoring_services["health_monitor"]
|
|
|
|
# Mock unhealthy service state
|
|
sample_service_states["unhealthy_services"]["laughter_detector"]
|
|
|
|
# Mock database returning connection error
|
|
mock_dependencies["db_manager"].check_health.side_effect = Exception(
|
|
"Connection refused"
|
|
)
|
|
|
|
# Check database health
|
|
db_health = await health_monitor.check_service_health("database")
|
|
|
|
assert db_health["status"] == "unhealthy"
|
|
assert "error" in db_health
|
|
assert db_health["response_time"] is None
|
|
|
|
# Should trigger critical alert
|
|
alerts = await health_monitor.get_active_alerts()
|
|
critical_alerts = [a for a in alerts if a["severity"] == "critical"]
|
|
assert len(critical_alerts) > 0
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_metrics_collection_integration(
|
|
self, monitoring_services, mock_dependencies
|
|
):
|
|
"""Test metrics collection across all services."""
|
|
health_monitor = monitoring_services["health_monitor"]
|
|
|
|
# Mock Redis for metrics storage
|
|
mock_redis = mock_dependencies["redis_client"]
|
|
mock_redis.get.return_value = None # No existing metrics
|
|
mock_redis.set.return_value = True
|
|
mock_redis.incr.return_value = 1
|
|
|
|
# Collect metrics from various services
|
|
await health_monitor.collect_metrics()
|
|
|
|
# Verify metrics were stored
|
|
assert mock_redis.set.call_count > 0
|
|
assert mock_redis.incr.call_count >= 0
|
|
|
|
# Get aggregated metrics
|
|
metrics = await health_monitor.get_metrics_summary()
|
|
|
|
assert metrics is not None
|
|
assert "system" in metrics
|
|
assert "services" in metrics
|
|
assert "timestamp" in metrics
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_health_endpoints_integration(
|
|
self, monitoring_services, mock_dependencies
|
|
):
|
|
"""Test health check endpoints integration."""
|
|
health_endpoints = monitoring_services["health_endpoints"]
|
|
monitoring_services["health_monitor"]
|
|
|
|
# Mock healthy state
|
|
mock_dependencies["db_manager"].check_health.return_value = {
|
|
"status": "healthy",
|
|
"connections": 5,
|
|
}
|
|
mock_dependencies["ai_manager"].check_health.return_value = {
|
|
"status": "healthy",
|
|
"providers": ["openai"],
|
|
}
|
|
|
|
# Test basic health endpoint
|
|
health_response = await health_endpoints.basic_health_check()
|
|
|
|
assert health_response["status"] == "healthy"
|
|
assert "timestamp" in health_response
|
|
assert health_response["uptime"] > 0
|
|
|
|
# Test detailed health endpoint
|
|
detailed_response = await health_endpoints.detailed_health_check()
|
|
|
|
assert detailed_response["overall_status"] in [
|
|
"healthy",
|
|
"degraded",
|
|
"unhealthy",
|
|
]
|
|
assert "services" in detailed_response
|
|
assert "metrics" in detailed_response
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_performance_monitoring_integration(
|
|
self, monitoring_services, mock_dependencies
|
|
):
|
|
"""Test performance monitoring and alerting."""
|
|
health_monitor = monitoring_services["health_monitor"]
|
|
|
|
# Simulate performance metrics
|
|
performance_data = {
|
|
"cpu_usage": 85.5, # High CPU
|
|
"memory_usage": 92.1, # High memory
|
|
"disk_usage": 45.3,
|
|
"response_times": {
|
|
"database": 0.05,
|
|
"ai_manager": 2.5, # Slow AI responses
|
|
"transcription": 0.8,
|
|
},
|
|
}
|
|
|
|
# Update performance metrics
|
|
await health_monitor.update_performance_metrics(performance_data)
|
|
|
|
# Should detect performance issues
|
|
performance_alerts = await health_monitor.get_performance_alerts()
|
|
|
|
assert len(performance_alerts) > 0
|
|
|
|
# Should have CPU and memory alerts
|
|
cpu_alerts = [a for a in performance_alerts if "cpu" in a["metric"].lower()]
|
|
memory_alerts = [
|
|
a for a in performance_alerts if "memory" in a["metric"].lower()
|
|
]
|
|
|
|
assert len(cpu_alerts) > 0
|
|
assert len(memory_alerts) > 0
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_service_dependency_monitoring_integration(
|
|
self, monitoring_services, mock_dependencies
|
|
):
|
|
"""Test monitoring of service dependencies and cascading failures."""
|
|
health_monitor = monitoring_services["health_monitor"]
|
|
|
|
# Mock database failure affecting other services
|
|
mock_dependencies["db_manager"].check_health.side_effect = Exception("DB down")
|
|
|
|
# Check dependent services
|
|
dependency_health = await health_monitor.check_service_dependencies()
|
|
|
|
assert dependency_health is not None
|
|
|
|
# Should detect cascading impact
|
|
db_dependent_services = dependency_health.get("database_dependent", [])
|
|
affected_services = [s for s in db_dependent_services if s["affected"]]
|
|
|
|
assert len(affected_services) > 0
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_alert_escalation_integration(
|
|
self, monitoring_services, mock_dependencies
|
|
):
|
|
"""Test alert escalation and notification systems."""
|
|
health_monitor = monitoring_services["health_monitor"]
|
|
|
|
# Create critical health issue
|
|
critical_issue = {
|
|
"service": "database",
|
|
"status": "unhealthy",
|
|
"error": "Connection timeout",
|
|
"severity": "critical",
|
|
"timestamp": datetime.utcnow(),
|
|
}
|
|
|
|
# Process critical alert
|
|
await health_monitor.process_alert(critical_issue)
|
|
|
|
# Should escalate critical alerts
|
|
escalated_alerts = await health_monitor.get_escalated_alerts()
|
|
|
|
assert len(escalated_alerts) > 0
|
|
assert escalated_alerts[0]["severity"] == "critical"
|
|
assert escalated_alerts[0]["escalated"] is True
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_historical_health_tracking_integration(
|
|
self, monitoring_services, mock_dependencies
|
|
):
|
|
"""Test historical health data tracking and analysis."""
|
|
health_monitor = monitoring_services["health_monitor"]
|
|
|
|
# Mock historical data storage
|
|
mock_dependencies["db_manager"].execute_query.return_value = True
|
|
|
|
# Record health snapshots over time
|
|
for i in range(5):
|
|
health_snapshot = {
|
|
"timestamp": datetime.utcnow() - timedelta(hours=i),
|
|
"overall_status": "healthy" if i < 3 else "degraded",
|
|
"services": {
|
|
"database": {
|
|
"status": "healthy",
|
|
"response_time": 0.05 + (i * 0.01),
|
|
},
|
|
"ai_manager": {
|
|
"status": "healthy",
|
|
"response_time": 0.1 + (i * 0.02),
|
|
},
|
|
},
|
|
}
|
|
|
|
await health_monitor.record_health_snapshot(health_snapshot)
|
|
|
|
# Verify data was stored
|
|
assert mock_dependencies["db_manager"].execute_query.call_count >= 5
|
|
|
|
# Get health trends
|
|
trends = await health_monitor.get_health_trends(hours_back=24)
|
|
|
|
assert trends is not None
|
|
assert "status_changes" in trends
|
|
assert "performance_trends" in trends
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_monitoring_service_recovery_integration(
|
|
self, monitoring_services, mock_dependencies
|
|
):
|
|
"""Test service recovery detection and notifications."""
|
|
health_monitor = monitoring_services["health_monitor"]
|
|
|
|
# Simulate service recovery scenario
|
|
# First: Service is down
|
|
mock_dependencies["ai_manager"].check_health.side_effect = Exception(
|
|
"Service down"
|
|
)
|
|
|
|
unhealthy_check = await health_monitor.check_service_health("ai_manager")
|
|
assert unhealthy_check["status"] == "unhealthy"
|
|
|
|
# Then: Service recovers
|
|
mock_dependencies["ai_manager"].check_health.side_effect = None
|
|
mock_dependencies["ai_manager"].check_health.return_value = {
|
|
"status": "healthy",
|
|
"response_time": 0.08,
|
|
}
|
|
|
|
recovery_check = await health_monitor.check_service_health("ai_manager")
|
|
assert recovery_check["status"] == "healthy"
|
|
|
|
# Should detect recovery
|
|
recovery_events = await health_monitor.get_recovery_events()
|
|
ai_recovery = [e for e in recovery_events if e["service"] == "ai_manager"]
|
|
|
|
assert len(ai_recovery) > 0
|
|
assert ai_recovery[0]["event_type"] == "recovery"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_monitoring_configuration_integration(
|
|
self, monitoring_services, mock_dependencies
|
|
):
|
|
"""Test dynamic monitoring configuration and thresholds."""
|
|
health_monitor = monitoring_services["health_monitor"]
|
|
|
|
# Update monitoring configuration
|
|
new_config = {
|
|
"check_interval_seconds": 30,
|
|
"response_time_threshold": 1.0,
|
|
"error_rate_threshold": 0.05,
|
|
"cpu_threshold": 80,
|
|
"memory_threshold": 85,
|
|
}
|
|
|
|
await health_monitor.update_configuration(new_config)
|
|
|
|
# Verify configuration was applied
|
|
current_config = await health_monitor.get_configuration()
|
|
|
|
assert current_config["check_interval_seconds"] == 30
|
|
assert current_config["response_time_threshold"] == 1.0
|
|
assert current_config["error_rate_threshold"] == 0.05
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_monitoring_service_cleanup_integration(self, monitoring_services):
|
|
"""Test proper cleanup of monitoring services."""
|
|
health_monitor = monitoring_services["health_monitor"]
|
|
monitoring_services["health_endpoints"]
|
|
|
|
# Close monitoring services
|
|
await health_monitor.close()
|
|
|
|
# Should clean up background tasks
|
|
assert health_monitor._monitoring_task.cancelled()
|
|
|
|
# Should not be able to check health after cleanup
|
|
with pytest.raises(Exception):
|
|
await health_monitor.check_all_services()
|
|
|
|
def _create_mock_db_manager(self) -> AsyncMock:
|
|
"""Create mock database manager for monitoring services."""
|
|
db_manager = AsyncMock(spec=DatabaseManager)
|
|
|
|
# Default healthy state
|
|
db_manager.check_health.return_value = {
|
|
"status": "healthy",
|
|
"connections": 8,
|
|
"response_time": 0.05,
|
|
}
|
|
|
|
# Mock database operations
|
|
db_manager.execute_query.return_value = True
|
|
db_manager.fetch_all.return_value = []
|
|
|
|
return db_manager
|
|
|
|
def _create_mock_ai_manager(self) -> AsyncMock:
|
|
"""Create mock AI manager for monitoring services."""
|
|
ai_manager = AsyncMock(spec=AIProviderManager)
|
|
|
|
# Default healthy state
|
|
ai_manager.check_health.return_value = {
|
|
"status": "healthy",
|
|
"providers": ["openai", "anthropic"],
|
|
"response_time": 0.12,
|
|
}
|
|
|
|
return ai_manager
|
|
|
|
def _create_mock_redis_client(self) -> AsyncMock:
|
|
"""Create mock Redis client for metrics storage."""
|
|
redis_client = AsyncMock()
|
|
|
|
# Mock Redis operations
|
|
redis_client.get.return_value = None
|
|
redis_client.set.return_value = True
|
|
redis_client.incr.return_value = 1
|
|
redis_client.hgetall.return_value = {}
|
|
redis_client.hset.return_value = True
|
|
|
|
return redis_client
|
|
|
|
def _create_mock_settings(self) -> MagicMock:
|
|
"""Create mock settings for monitoring services."""
|
|
settings = MagicMock()
|
|
|
|
# Health check settings
|
|
settings.health_check_interval = 30
|
|
settings.health_check_timeout = 5
|
|
settings.max_response_time = 1.0
|
|
settings.max_error_rate = 0.1
|
|
|
|
# Performance thresholds
|
|
settings.cpu_threshold = 80
|
|
settings.memory_threshold = 85
|
|
settings.disk_threshold = 90
|
|
|
|
# Alert settings
|
|
settings.alert_cooldown_minutes = 15
|
|
settings.escalation_threshold = 3
|
|
|
|
return settings
|