disbord/tests/integration/test_service_monitoring_integration.py

"""
Service integration tests for Monitoring and Health Check Services.

Tests the integration between health monitoring, metrics collection,
and their dependencies with external monitoring systems.
"""

from datetime import datetime, timedelta
from unittest.mock import AsyncMock, MagicMock

import pytest

from core.ai_manager import AIProviderManager
from core.database import DatabaseManager
from services.monitoring.health_endpoints import HealthEndpoints
from services.monitoring.health_monitor import HealthMonitor


@pytest.mark.integration
class TestMonitoringServiceIntegration:
    """Integration tests for monitoring service pipeline."""

    @pytest.fixture
    async def mock_dependencies(self):
        """Create all mock dependencies for monitoring services."""
        return {
            "db_manager": self._create_mock_db_manager(),
            "ai_manager": self._create_mock_ai_manager(),
            "redis_client": self._create_mock_redis_client(),
            "settings": self._create_mock_settings(),
        }

    @pytest.fixture
    async def monitoring_services(self, mock_dependencies):
        """Create integrated monitoring service instances."""
        deps = mock_dependencies

        # Create health monitor
        health_monitor = HealthMonitor(
            deps["db_manager"],
            deps["ai_manager"],
            deps["redis_client"],
            deps["settings"],
        )

        # Create health endpoints
        health_endpoints = HealthEndpoints(health_monitor, deps["settings"])

        await health_monitor.initialize()

        return {"health_monitor": health_monitor, "health_endpoints": health_endpoints}

    @pytest.fixture
    def sample_service_states(self):
        """Create sample service health states for testing."""
        return {
            "healthy_services": {
                "database": {
                    "status": "healthy",
                    "response_time": 0.05,
                    "connections": 8,
                    "last_check": datetime.utcnow(),
                    "uptime": timedelta(days=5, hours=3).total_seconds(),
                },
                "ai_manager": {
                    "status": "healthy",
                    "response_time": 0.12,
                    "providers": ["openai", "anthropic"],
                    "last_check": datetime.utcnow(),
                    "requests_processed": 1250,
                },
                "transcription": {
                    "status": "healthy",
                    "response_time": 0.32,
                    "queue_size": 2,
                    "last_check": datetime.utcnow(),
                    "total_transcriptions": 450,
                },
            },
            "degraded_services": {
                "quote_analyzer": {
                    "status": "degraded",
                    "response_time": 1.85,
                    "error_rate": 0.12,
                    "last_check": datetime.utcnow(),
                    "recent_errors": ["Timeout error", "Rate limit exceeded"],
                }
            },
            "unhealthy_services": {
                "laughter_detector": {
                    "status": "unhealthy",
                    "response_time": None,
                    "last_error": "Service unreachable",
                    "last_check": datetime.utcnow(),
                    "downtime_duration": timedelta(minutes=15).total_seconds(),
                }
            },
        }

    @pytest.mark.asyncio
    async def test_comprehensive_health_monitoring_integration(
        self, monitoring_services, mock_dependencies, sample_service_states
    ):
        """Test comprehensive health monitoring across all services."""
        health_monitor = monitoring_services["health_monitor"]

        # Mock individual service health checks
        services = sample_service_states["healthy_services"]

        # Mock database health
        mock_dependencies["db_manager"].check_health.return_value = services["database"]

        # Mock AI manager health
        mock_dependencies["ai_manager"].check_health.return_value = services[
            "ai_manager"
        ]

        # Perform comprehensive health check
        overall_health = await health_monitor.check_all_services()

        assert overall_health is not None
        assert overall_health["overall_status"] in ["healthy", "degraded", "unhealthy"]
        assert "services" in overall_health
        assert "timestamp" in overall_health
        assert "uptime" in overall_health

        # Verify individual services checked
        assert "database" in overall_health["services"]
        assert "ai_manager" in overall_health["services"]

    @pytest.mark.asyncio
    async def test_degraded_service_detection_integration(
        self, monitoring_services, mock_dependencies, sample_service_states
    ):
        """Test detection and handling of degraded services."""
        health_monitor = monitoring_services["health_monitor"]

        # Mock degraded service state
        degraded_service = sample_service_states["degraded_services"]["quote_analyzer"]

        # Mock AI manager returning degraded status
        mock_dependencies["ai_manager"].check_health.return_value = degraded_service

        # Check AI service health
        ai_health = await health_monitor.check_service_health("ai_manager")

        assert ai_health["status"] == "degraded"
        assert ai_health["response_time"] > 1.0  # Slow response
        assert ai_health["error_rate"] > 0.1  # High error rate

        # Should trigger alert
        alerts = await health_monitor.get_active_alerts()
        degraded_alerts = [a for a in alerts if a["severity"] == "warning"]
        assert len(degraded_alerts) > 0

    @pytest.mark.asyncio
    async def test_unhealthy_service_detection_integration(
        self, monitoring_services, mock_dependencies, sample_service_states
    ):
        """Test detection and handling of unhealthy services."""
        health_monitor = monitoring_services["health_monitor"]

        # Mock unhealthy service state
        sample_service_states["unhealthy_services"]["laughter_detector"]

        # Mock database returning connection error
        mock_dependencies["db_manager"].check_health.side_effect = Exception(
            "Connection refused"
        )

        # Check database health
        db_health = await health_monitor.check_service_health("database")

        assert db_health["status"] == "unhealthy"
        assert "error" in db_health
        assert db_health["response_time"] is None

        # Should trigger critical alert
        alerts = await health_monitor.get_active_alerts()
        critical_alerts = [a for a in alerts if a["severity"] == "critical"]
        assert len(critical_alerts) > 0

    @pytest.mark.asyncio
    async def test_metrics_collection_integration(
        self, monitoring_services, mock_dependencies
    ):
        """Test metrics collection across all services."""
        health_monitor = monitoring_services["health_monitor"]

        # Mock Redis for metrics storage
        mock_redis = mock_dependencies["redis_client"]
        mock_redis.get.return_value = None  # No existing metrics
        mock_redis.set.return_value = True
        mock_redis.incr.return_value = 1

        # Collect metrics from various services
        await health_monitor.collect_metrics()

        # Verify metrics were stored
        assert mock_redis.set.call_count > 0
        assert mock_redis.incr.call_count >= 0

        # Get aggregated metrics
        metrics = await health_monitor.get_metrics_summary()

        assert metrics is not None
        assert "system" in metrics
        assert "services" in metrics
        assert "timestamp" in metrics

    @pytest.mark.asyncio
    async def test_health_endpoints_integration(
        self, monitoring_services, mock_dependencies
    ):
        """Test health check endpoints integration."""
        health_endpoints = monitoring_services["health_endpoints"]
        monitoring_services["health_monitor"]

        # Mock healthy state
        mock_dependencies["db_manager"].check_health.return_value = {
            "status": "healthy",
            "connections": 5,
        }
        mock_dependencies["ai_manager"].check_health.return_value = {
            "status": "healthy",
            "providers": ["openai"],
        }

        # Test basic health endpoint
        health_response = await health_endpoints.basic_health_check()

        assert health_response["status"] == "healthy"
        assert "timestamp" in health_response
        assert health_response["uptime"] > 0

        # Test detailed health endpoint
        detailed_response = await health_endpoints.detailed_health_check()

        assert detailed_response["overall_status"] in [
            "healthy",
            "degraded",
            "unhealthy",
        ]
        assert "services" in detailed_response
        assert "metrics" in detailed_response

    @pytest.mark.asyncio
    async def test_performance_monitoring_integration(
        self, monitoring_services, mock_dependencies
    ):
        """Test performance monitoring and alerting."""
        health_monitor = monitoring_services["health_monitor"]

        # Simulate performance metrics
        performance_data = {
            "cpu_usage": 85.5,  # High CPU
            "memory_usage": 92.1,  # High memory
            "disk_usage": 45.3,
            "response_times": {
                "database": 0.05,
                "ai_manager": 2.5,  # Slow AI responses
                "transcription": 0.8,
            },
        }

        # Update performance metrics
        await health_monitor.update_performance_metrics(performance_data)

        # Should detect performance issues
        performance_alerts = await health_monitor.get_performance_alerts()

        assert len(performance_alerts) > 0

        # Should have CPU and memory alerts
        cpu_alerts = [a for a in performance_alerts if "cpu" in a["metric"].lower()]
        memory_alerts = [
            a for a in performance_alerts if "memory" in a["metric"].lower()
        ]

        assert len(cpu_alerts) > 0
        assert len(memory_alerts) > 0

    @pytest.mark.asyncio
    async def test_service_dependency_monitoring_integration(
        self, monitoring_services, mock_dependencies
    ):
        """Test monitoring of service dependencies and cascading failures."""
        health_monitor = monitoring_services["health_monitor"]

        # Mock database failure affecting other services
        mock_dependencies["db_manager"].check_health.side_effect = Exception("DB down")

        # Check dependent services
        dependency_health = await health_monitor.check_service_dependencies()

        assert dependency_health is not None

        # Should detect cascading impact
        db_dependent_services = dependency_health.get("database_dependent", [])
        affected_services = [s for s in db_dependent_services if s["affected"]]

        assert len(affected_services) > 0

    @pytest.mark.asyncio
    async def test_alert_escalation_integration(
        self, monitoring_services, mock_dependencies
    ):
        """Test alert escalation and notification systems."""
        health_monitor = monitoring_services["health_monitor"]

        # Create critical health issue
        critical_issue = {
            "service": "database",
            "status": "unhealthy",
            "error": "Connection timeout",
            "severity": "critical",
            "timestamp": datetime.utcnow(),
        }

        # Process critical alert
        await health_monitor.process_alert(critical_issue)

        # Should escalate critical alerts
        escalated_alerts = await health_monitor.get_escalated_alerts()

        assert len(escalated_alerts) > 0
        assert escalated_alerts[0]["severity"] == "critical"
        assert escalated_alerts[0]["escalated"] is True

    @pytest.mark.asyncio
    async def test_historical_health_tracking_integration(
        self, monitoring_services, mock_dependencies
    ):
        """Test historical health data tracking and analysis."""
        health_monitor = monitoring_services["health_monitor"]

        # Mock historical data storage
        mock_dependencies["db_manager"].execute_query.return_value = True

        # Record health snapshots over time
        for i in range(5):
            health_snapshot = {
                "timestamp": datetime.utcnow() - timedelta(hours=i),
                "overall_status": "healthy" if i < 3 else "degraded",
                "services": {
                    "database": {
                        "status": "healthy",
                        "response_time": 0.05 + (i * 0.01),
                    },
                    "ai_manager": {
                        "status": "healthy",
                        "response_time": 0.1 + (i * 0.02),
                    },
                },
            }

            await health_monitor.record_health_snapshot(health_snapshot)

        # Verify data was stored
        assert mock_dependencies["db_manager"].execute_query.call_count >= 5

        # Get health trends
        trends = await health_monitor.get_health_trends(hours_back=24)

        assert trends is not None
        assert "status_changes" in trends
        assert "performance_trends" in trends

    @pytest.mark.asyncio
    async def test_monitoring_service_recovery_integration(
        self, monitoring_services, mock_dependencies
    ):
        """Test service recovery detection and notifications."""
        health_monitor = monitoring_services["health_monitor"]

        # Simulate service recovery scenario
        # First: Service is down
        mock_dependencies["ai_manager"].check_health.side_effect = Exception(
            "Service down"
        )

        unhealthy_check = await health_monitor.check_service_health("ai_manager")
        assert unhealthy_check["status"] == "unhealthy"

        # Then: Service recovers
        mock_dependencies["ai_manager"].check_health.side_effect = None
        mock_dependencies["ai_manager"].check_health.return_value = {
            "status": "healthy",
            "response_time": 0.08,
        }

        recovery_check = await health_monitor.check_service_health("ai_manager")
        assert recovery_check["status"] == "healthy"

        # Should detect recovery
        recovery_events = await health_monitor.get_recovery_events()
        ai_recovery = [e for e in recovery_events if e["service"] == "ai_manager"]

        assert len(ai_recovery) > 0
        assert ai_recovery[0]["event_type"] == "recovery"

    @pytest.mark.asyncio
    async def test_monitoring_configuration_integration(
        self, monitoring_services, mock_dependencies
    ):
        """Test dynamic monitoring configuration and thresholds."""
        health_monitor = monitoring_services["health_monitor"]

        # Update monitoring configuration
        new_config = {
            "check_interval_seconds": 30,
            "response_time_threshold": 1.0,
            "error_rate_threshold": 0.05,
            "cpu_threshold": 80,
            "memory_threshold": 85,
        }

        await health_monitor.update_configuration(new_config)

        # Verify configuration was applied
        current_config = await health_monitor.get_configuration()

        assert current_config["check_interval_seconds"] == 30
        assert current_config["response_time_threshold"] == 1.0
        assert current_config["error_rate_threshold"] == 0.05

    @pytest.mark.asyncio
    async def test_monitoring_service_cleanup_integration(self, monitoring_services):
        """Test proper cleanup of monitoring services."""
        health_monitor = monitoring_services["health_monitor"]
        monitoring_services["health_endpoints"]

        # Close monitoring services
        await health_monitor.close()

        # Should clean up background tasks
        assert health_monitor._monitoring_task.cancelled()

        # Should not be able to check health after cleanup
        with pytest.raises(Exception):
            await health_monitor.check_all_services()

    def _create_mock_db_manager(self) -> AsyncMock:
        """Create mock database manager for monitoring services."""
        db_manager = AsyncMock(spec=DatabaseManager)

        # Default healthy state
        db_manager.check_health.return_value = {
            "status": "healthy",
            "connections": 8,
            "response_time": 0.05,
        }

        # Mock database operations
        db_manager.execute_query.return_value = True
        db_manager.fetch_all.return_value = []

        return db_manager

    def _create_mock_ai_manager(self) -> AsyncMock:
        """Create mock AI manager for monitoring services."""
        ai_manager = AsyncMock(spec=AIProviderManager)

        # Default healthy state
        ai_manager.check_health.return_value = {
            "status": "healthy",
            "providers": ["openai", "anthropic"],
            "response_time": 0.12,
        }

        return ai_manager

    def _create_mock_redis_client(self) -> AsyncMock:
        """Create mock Redis client for metrics storage."""
        redis_client = AsyncMock()

        # Mock Redis operations
        redis_client.get.return_value = None
        redis_client.set.return_value = True
        redis_client.incr.return_value = 1
        redis_client.hgetall.return_value = {}
        redis_client.hset.return_value = True

        return redis_client

    def _create_mock_settings(self) -> MagicMock:
        """Create mock settings for monitoring services."""
        settings = MagicMock()

        # Health check settings
        settings.health_check_interval = 30
        settings.health_check_timeout = 5
        settings.max_response_time = 1.0
        settings.max_error_rate = 0.1

        # Performance thresholds
        settings.cpu_threshold = 80
        settings.memory_threshold = 85
        settings.disk_threshold = 90

        # Alert settings
        settings.alert_cooldown_minutes = 15
        settings.escalation_threshold = 3

        return settings