noteflow/tests/infrastructure/test_diarization.py

"""Tests for speaker diarization infrastructure.

Tests the SpeakerTurn DTO and speaker assignment utilities.
"""

from __future__ import annotations

import pytest

from noteflow.infrastructure.diarization import SpeakerTurn, assign_speaker, assign_speakers_batch


class TestSpeakerTurn:
    """Tests for the SpeakerTurn dataclass."""

    def test_create_valid_turn(self) -> None:
        """Create a valid speaker turn."""
        turn = SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=5.0)
        assert turn.speaker == "SPEAKER_00"
        assert turn.start == 0.0
        assert turn.end == 5.0
        assert turn.confidence == 1.0

    def test_create_turn_with_confidence(self) -> None:
        """Create a turn with custom confidence."""
        turn = SpeakerTurn(speaker="SPEAKER_01", start=10.0, end=15.0, confidence=0.85)
        assert turn.confidence == 0.85

    def test_invalid_end_before_start_raises(self) -> None:
        """End time before start time raises ValueError."""
        with pytest.raises(ValueError, match=r"end.*<.*start"):
            SpeakerTurn(speaker="SPEAKER_00", start=10.0, end=5.0)

    def test_invalid_confidence_negative_raises(self) -> None:
        """Negative confidence raises ValueError."""
        with pytest.raises(ValueError, match=r"Confidence must be 0\.0-1\.0"):
            SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=5.0, confidence=-0.1)

    def test_invalid_confidence_above_one_raises(self) -> None:
        """Confidence above 1.0 raises ValueError."""
        with pytest.raises(ValueError, match=r"Confidence must be 0\.0-1\.0"):
            SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=5.0, confidence=1.5)

    def test_duration_property(self) -> None:
        """Duration property calculates correctly."""
        turn = SpeakerTurn(speaker="SPEAKER_00", start=2.5, end=7.5)
        assert turn.duration == 5.0

    def test_overlaps_returns_true_for_overlap(self) -> None:
        """overlaps() returns True when ranges overlap."""
        turn = SpeakerTurn(speaker="SPEAKER_00", start=5.0, end=10.0)
        assert turn.overlaps(3.0, 7.0)
        assert turn.overlaps(7.0, 12.0)
        assert turn.overlaps(5.0, 10.0)
        assert turn.overlaps(0.0, 15.0)

    def test_overlaps_returns_false_for_no_overlap(self) -> None:
        """overlaps() returns False when ranges don't overlap."""
        turn = SpeakerTurn(speaker="SPEAKER_00", start=5.0, end=10.0)
        assert not turn.overlaps(0.0, 5.0)
        assert not turn.overlaps(10.0, 15.0)
        assert not turn.overlaps(0.0, 3.0)
        assert not turn.overlaps(12.0, 20.0)

    def test_overlap_duration_full_overlap(self) -> None:
        """overlap_duration() for full overlap returns turn duration."""
        turn = SpeakerTurn(speaker="SPEAKER_00", start=5.0, end=10.0)
        assert turn.overlap_duration(0.0, 15.0) == 5.0

    def test_overlap_duration_partial_overlap_left(self) -> None:
        """overlap_duration() for partial overlap on left side."""
        turn = SpeakerTurn(speaker="SPEAKER_00", start=5.0, end=10.0)
        assert turn.overlap_duration(3.0, 7.0) == 2.0

    def test_overlap_duration_partial_overlap_right(self) -> None:
        """overlap_duration() for partial overlap on right side."""
        turn = SpeakerTurn(speaker="SPEAKER_00", start=5.0, end=10.0)
        assert turn.overlap_duration(8.0, 15.0) == 2.0

    def test_overlap_duration_contained(self) -> None:
        """overlap_duration() when range is contained within turn."""
        turn = SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=20.0)
        assert turn.overlap_duration(5.0, 10.0) == 5.0

    def test_overlap_duration_no_overlap(self) -> None:
        """overlap_duration() returns 0.0 when no overlap."""
        turn = SpeakerTurn(speaker="SPEAKER_00", start=5.0, end=10.0)
        assert turn.overlap_duration(0.0, 3.0) == 0.0
        assert turn.overlap_duration(12.0, 20.0) == 0.0


class TestAssignSpeaker:
    """Tests for the assign_speaker function."""

    def test_empty_turns_returns_none(self) -> None:
        """Empty turns list returns None with 0 confidence."""
        speaker, confidence = assign_speaker(0.0, 5.0, [])
        assert speaker is None
        assert confidence == 0.0

    def test_zero_duration_segment_returns_none(self) -> None:
        """Zero duration segment returns None."""
        turns = [SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=10.0)]
        speaker, confidence = assign_speaker(5.0, 5.0, turns)
        assert speaker is None
        assert confidence == 0.0

    def test_single_turn_full_overlap(self) -> None:
        """Single turn with full overlap returns high confidence."""
        turns = [SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=10.0)]
        speaker, confidence = assign_speaker(2.0, 8.0, turns)
        assert speaker == "SPEAKER_00"
        assert confidence == 1.0

    def test_single_turn_partial_overlap(self) -> None:
        """Single turn with partial overlap returns proportional confidence."""
        turns = [SpeakerTurn(speaker="SPEAKER_00", start=5.0, end=10.0)]
        speaker, confidence = assign_speaker(0.0, 10.0, turns)
        assert speaker == "SPEAKER_00"
        assert confidence == 0.5

    def test_multiple_turns_chooses_dominant_speaker(self) -> None:
        """Multiple turns chooses speaker with most overlap."""
        turns = [
            SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=3.0),
            SpeakerTurn(speaker="SPEAKER_01", start=3.0, end=10.0),
        ]
        speaker, confidence = assign_speaker(0.0, 10.0, turns)
        assert speaker == "SPEAKER_01"
        assert confidence == 0.7

    def test_no_overlap_returns_none(self) -> None:
        """No overlapping turns returns None."""
        turns = [
            SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=5.0),
            SpeakerTurn(speaker="SPEAKER_01", start=10.0, end=15.0),
        ]
        speaker, confidence = assign_speaker(6.0, 9.0, turns)
        assert speaker is None
        assert confidence == 0.0

    def test_equal_overlap_chooses_first_encountered(self) -> None:
        """Equal overlap chooses first speaker encountered."""
        turns = [
            SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=5.0),
            SpeakerTurn(speaker="SPEAKER_01", start=5.0, end=10.0),
        ]
        speaker, confidence = assign_speaker(3.0, 7.0, turns)
        # SPEAKER_00: overlap 2.0, SPEAKER_01: overlap 2.0
        # First one wins since > not >=
        assert speaker == "SPEAKER_00"
        assert confidence == 0.5


class TestAssignSpeakersBatch:
    """Tests for the assign_speakers_batch function."""

    def test_empty_segments(self) -> None:
        """Empty segments list returns empty results."""
        turns = [SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=10.0)]
        results = assign_speakers_batch([], turns)
        assert results == []

    def test_empty_turns(self) -> None:
        """Empty turns returns all None speakers."""
        segments = [(0.0, 5.0), (5.0, 10.0)]
        results = assign_speakers_batch(segments, [])
        assert len(results) == 2
        assert all(speaker is None for speaker, _ in results)
        assert all(conf == 0.0 for _, conf in results)

    def test_batch_assignment(self) -> None:
        """Batch assignment processes all segments."""
        turns = [
            SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=5.0),
            SpeakerTurn(speaker="SPEAKER_01", start=5.0, end=10.0),
            SpeakerTurn(speaker="SPEAKER_00", start=10.0, end=15.0),
        ]
        segments = [(0.0, 5.0), (5.0, 10.0), (10.0, 15.0)]
        results = assign_speakers_batch(segments, turns)
        assert len(results) == 3
        assert results[0] == ("SPEAKER_00", 1.0)
        assert results[1] == ("SPEAKER_01", 1.0)
        assert results[2] == ("SPEAKER_00", 1.0)

    def test_batch_with_gaps(self) -> None:
        """Batch assignment handles gaps between turns."""
        turns = [
            SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=3.0),
            SpeakerTurn(speaker="SPEAKER_01", start=7.0, end=10.0),
        ]
        segments = [(0.0, 3.0), (3.0, 7.0), (7.0, 10.0)]
        results = assign_speakers_batch(segments, turns)
        assert results[0] == ("SPEAKER_00", 1.0)
        assert results[1] == (None, 0.0)
        assert results[2] == ("SPEAKER_01", 1.0)