Files
noteflow/tests/infrastructure/test_diarization.py
Travis Vasceannie b333ea5b23 Add initial Docker and development environment setup
- Created .dockerignore to exclude unnecessary files from Docker builds.
- Added .repomixignore for managing ignored patterns in Repomix.
- Introduced Dockerfile.dev for development environment setup with Python 3.12.
- Configured docker-compose.yaml to define services, including a PostgreSQL database.
- Established a devcontainer.json for Visual Studio Code integration.
- Implemented postCreate.sh for automatic dependency installation in the dev container.
- Added constants.py to centralize configuration constants for the project.
- Updated pyproject.toml to include new development dependencies.
- Created initial documentation files for project overview and style conventions.
- Added tests for new functionalities to ensure reliability and correctness.
2025-12-19 05:02:16 +00:00

197 lines
8.2 KiB
Python

"""Tests for speaker diarization infrastructure.
Tests the SpeakerTurn DTO and speaker assignment utilities.
"""
from __future__ import annotations
import pytest
from noteflow.infrastructure.diarization import SpeakerTurn, assign_speaker, assign_speakers_batch
class TestSpeakerTurn:
"""Tests for the SpeakerTurn dataclass."""
def test_create_valid_turn(self) -> None:
"""Create a valid speaker turn."""
turn = SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=5.0)
assert turn.speaker == "SPEAKER_00"
assert turn.start == 0.0
assert turn.end == 5.0
assert turn.confidence == 1.0
def test_create_turn_with_confidence(self) -> None:
"""Create a turn with custom confidence."""
turn = SpeakerTurn(speaker="SPEAKER_01", start=10.0, end=15.0, confidence=0.85)
assert turn.confidence == 0.85
def test_invalid_end_before_start_raises(self) -> None:
"""End time before start time raises ValueError."""
with pytest.raises(ValueError, match=r"end.*<.*start"):
SpeakerTurn(speaker="SPEAKER_00", start=10.0, end=5.0)
def test_invalid_confidence_negative_raises(self) -> None:
"""Negative confidence raises ValueError."""
with pytest.raises(ValueError, match=r"Confidence must be 0\.0-1\.0"):
SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=5.0, confidence=-0.1)
def test_invalid_confidence_above_one_raises(self) -> None:
"""Confidence above 1.0 raises ValueError."""
with pytest.raises(ValueError, match=r"Confidence must be 0\.0-1\.0"):
SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=5.0, confidence=1.5)
def test_duration_property(self) -> None:
"""Duration property calculates correctly."""
turn = SpeakerTurn(speaker="SPEAKER_00", start=2.5, end=7.5)
assert turn.duration == 5.0
def test_overlaps_returns_true_for_overlap(self) -> None:
"""overlaps() returns True when ranges overlap."""
turn = SpeakerTurn(speaker="SPEAKER_00", start=5.0, end=10.0)
assert turn.overlaps(3.0, 7.0)
assert turn.overlaps(7.0, 12.0)
assert turn.overlaps(5.0, 10.0)
assert turn.overlaps(0.0, 15.0)
def test_overlaps_returns_false_for_no_overlap(self) -> None:
"""overlaps() returns False when ranges don't overlap."""
turn = SpeakerTurn(speaker="SPEAKER_00", start=5.0, end=10.0)
assert not turn.overlaps(0.0, 5.0)
assert not turn.overlaps(10.0, 15.0)
assert not turn.overlaps(0.0, 3.0)
assert not turn.overlaps(12.0, 20.0)
def test_overlap_duration_full_overlap(self) -> None:
"""overlap_duration() for full overlap returns turn duration."""
turn = SpeakerTurn(speaker="SPEAKER_00", start=5.0, end=10.0)
assert turn.overlap_duration(0.0, 15.0) == 5.0
def test_overlap_duration_partial_overlap_left(self) -> None:
"""overlap_duration() for partial overlap on left side."""
turn = SpeakerTurn(speaker="SPEAKER_00", start=5.0, end=10.0)
assert turn.overlap_duration(3.0, 7.0) == 2.0
def test_overlap_duration_partial_overlap_right(self) -> None:
"""overlap_duration() for partial overlap on right side."""
turn = SpeakerTurn(speaker="SPEAKER_00", start=5.0, end=10.0)
assert turn.overlap_duration(8.0, 15.0) == 2.0
def test_overlap_duration_contained(self) -> None:
"""overlap_duration() when range is contained within turn."""
turn = SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=20.0)
assert turn.overlap_duration(5.0, 10.0) == 5.0
def test_overlap_duration_no_overlap(self) -> None:
"""overlap_duration() returns 0.0 when no overlap."""
turn = SpeakerTurn(speaker="SPEAKER_00", start=5.0, end=10.0)
assert turn.overlap_duration(0.0, 3.0) == 0.0
assert turn.overlap_duration(12.0, 20.0) == 0.0
class TestAssignSpeaker:
"""Tests for the assign_speaker function."""
def test_empty_turns_returns_none(self) -> None:
"""Empty turns list returns None with 0 confidence."""
speaker, confidence = assign_speaker(0.0, 5.0, [])
assert speaker is None
assert confidence == 0.0
def test_zero_duration_segment_returns_none(self) -> None:
"""Zero duration segment returns None."""
turns = [SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=10.0)]
speaker, confidence = assign_speaker(5.0, 5.0, turns)
assert speaker is None
assert confidence == 0.0
def test_single_turn_full_overlap(self) -> None:
"""Single turn with full overlap returns high confidence."""
turns = [SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=10.0)]
speaker, confidence = assign_speaker(2.0, 8.0, turns)
assert speaker == "SPEAKER_00"
assert confidence == 1.0
def test_single_turn_partial_overlap(self) -> None:
"""Single turn with partial overlap returns proportional confidence."""
turns = [SpeakerTurn(speaker="SPEAKER_00", start=5.0, end=10.0)]
speaker, confidence = assign_speaker(0.0, 10.0, turns)
assert speaker == "SPEAKER_00"
assert confidence == 0.5
def test_multiple_turns_chooses_dominant_speaker(self) -> None:
"""Multiple turns chooses speaker with most overlap."""
turns = [
SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=3.0),
SpeakerTurn(speaker="SPEAKER_01", start=3.0, end=10.0),
]
speaker, confidence = assign_speaker(0.0, 10.0, turns)
assert speaker == "SPEAKER_01"
assert confidence == 0.7
def test_no_overlap_returns_none(self) -> None:
"""No overlapping turns returns None."""
turns = [
SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=5.0),
SpeakerTurn(speaker="SPEAKER_01", start=10.0, end=15.0),
]
speaker, confidence = assign_speaker(6.0, 9.0, turns)
assert speaker is None
assert confidence == 0.0
def test_equal_overlap_chooses_first_encountered(self) -> None:
"""Equal overlap chooses first speaker encountered."""
turns = [
SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=5.0),
SpeakerTurn(speaker="SPEAKER_01", start=5.0, end=10.0),
]
speaker, confidence = assign_speaker(3.0, 7.0, turns)
# SPEAKER_00: overlap 2.0, SPEAKER_01: overlap 2.0
# First one wins since > not >=
assert speaker == "SPEAKER_00"
assert confidence == 0.5
class TestAssignSpeakersBatch:
"""Tests for the assign_speakers_batch function."""
def test_empty_segments(self) -> None:
"""Empty segments list returns empty results."""
turns = [SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=10.0)]
results = assign_speakers_batch([], turns)
assert results == []
def test_empty_turns(self) -> None:
"""Empty turns returns all None speakers."""
segments = [(0.0, 5.0), (5.0, 10.0)]
results = assign_speakers_batch(segments, [])
assert len(results) == 2
assert all(speaker is None for speaker, _ in results)
assert all(conf == 0.0 for _, conf in results)
def test_batch_assignment(self) -> None:
"""Batch assignment processes all segments."""
turns = [
SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=5.0),
SpeakerTurn(speaker="SPEAKER_01", start=5.0, end=10.0),
SpeakerTurn(speaker="SPEAKER_00", start=10.0, end=15.0),
]
segments = [(0.0, 5.0), (5.0, 10.0), (10.0, 15.0)]
results = assign_speakers_batch(segments, turns)
assert len(results) == 3
assert results[0] == ("SPEAKER_00", 1.0)
assert results[1] == ("SPEAKER_01", 1.0)
assert results[2] == ("SPEAKER_00", 1.0)
def test_batch_with_gaps(self) -> None:
"""Batch assignment handles gaps between turns."""
turns = [
SpeakerTurn(speaker="SPEAKER_00", start=0.0, end=3.0),
SpeakerTurn(speaker="SPEAKER_01", start=7.0, end=10.0),
]
segments = [(0.0, 3.0), (3.0, 7.0), (7.0, 10.0)]
results = assign_speakers_batch(segments, turns)
assert results[0] == ("SPEAKER_00", 1.0)
assert results[1] == (None, 0.0)
assert results[2] == ("SPEAKER_01", 1.0)