noteflow/tests/domain/test_segment.py

"""Tests for Segment and WordTiming entities."""

from __future__ import annotations

import pytest

from noteflow.domain.entities.segment import Segment, WordTiming

# Test constants
TEN_HOURS_SECONDS = 36000.0


class TestWordTiming:
    """Tests for WordTiming entity."""

    @pytest.mark.parametrize(
        "attr,expected",
        [("word", "hello"), ("start_time", 0.0), ("end_time", 0.5), ("probability", 0.95)],
    )
    def test_word_timing_attributes(self, attr: str, expected: object) -> None:
        """Test WordTiming stores attribute values correctly."""
        word = WordTiming(word="hello", start_time=0.0, end_time=0.5, probability=0.95)
        actual = getattr(word, attr)
        assert actual == expected, f"WordTiming.{attr} expected {expected!r}, got {actual!r}"

    def test_word_timing_invalid_times_raises(self) -> None:
        """Test WordTiming raises on end_time < start_time."""
        with pytest.raises(ValueError, match=r"end_time.*must be >= start_time"):
            WordTiming(word="hello", start_time=1.0, end_time=0.5, probability=0.9)

    @pytest.mark.parametrize("prob", [-0.1, 1.1, 2.0])
    def test_word_timing_invalid_probability_raises(self, prob: float) -> None:
        """Test WordTiming raises on invalid probability."""
        with pytest.raises(ValueError, match="probability must be between 0 and 1"):
            WordTiming(word="hello", start_time=0.0, end_time=0.5, probability=prob)

    @pytest.mark.parametrize("prob", [0.0, 0.5, 1.0])
    def test_word_timing_valid_probability_bounds(self, prob: float) -> None:
        """Test WordTiming accepts probability at boundaries."""
        word = WordTiming(word="test", start_time=0.0, end_time=0.5, probability=prob)
        assert word.probability == prob, f"probability expected {prob}, got {word.probability}"


class TestSegment:
    """Tests for Segment entity."""

    @pytest.mark.parametrize(
        "attr,expected",
        [
            ("segment_id", 0),
            ("text", "Hello world"),
            ("start_time", 0.0),
            ("end_time", 2.5),
            ("language", "en"),
        ],
    )
    def test_segment_attributes(self, attr: str, expected: object) -> None:
        """Test Segment stores attribute values correctly."""
        segment = Segment(
            segment_id=0, text="Hello world", start_time=0.0, end_time=2.5, language="en"
        )
        actual = getattr(segment, attr)
        assert actual == expected, f"Segment.{attr} expected {expected!r}, got {actual!r}"

    def test_segment_invalid_times_raises(self) -> None:
        """Test Segment raises on end_time < start_time."""
        with pytest.raises(ValueError, match=r"end_time.*must be >= start_time"):
            Segment(segment_id=0, text="test", start_time=5.0, end_time=1.0)

    def test_segment_invalid_id_raises(self) -> None:
        """Test Segment raises on negative segment_id."""
        with pytest.raises(ValueError, match="segment_id must be non-negative"):
            Segment(segment_id=-1, text="test", start_time=0.0, end_time=1.0)

    def test_segment_duration(self) -> None:
        """Test duration property calculation."""
        segment = Segment(segment_id=0, text="test", start_time=1.5, end_time=4.0)
        assert segment.duration == 2.5, f"duration expected 2.5, got {segment.duration}"

    def test_segment_word_count_from_text(self) -> None:
        """Test word_count from text when no words list."""
        segment = Segment(segment_id=0, text="Hello beautiful world", start_time=0.0, end_time=1.0)
        assert segment.word_count == 3, f"word_count expected 3, got {segment.word_count}"

    def test_segment_word_count_from_words(self) -> None:
        """Test word_count from words list when provided."""
        words = [
            WordTiming(word="Hello", start_time=0.0, end_time=0.3, probability=0.9),
            WordTiming(word="world", start_time=0.3, end_time=0.5, probability=0.95),
        ]
        segment = Segment(
            segment_id=0,
            text="Hello world",
            start_time=0.0,
            end_time=0.5,
            words=words,
        )
        assert segment.word_count == 2, f"word_count expected 2, got {segment.word_count}"

    @pytest.mark.parametrize(
        "embedding,expected",
        [
            pytest.param(None, False, id="none_embedding"),
            pytest.param([], False, id="empty_list"),
            pytest.param([0.1, 0.2, 0.3], True, id="with_values"),
        ],
    )
    def test_segment_has_embedding(self, embedding: list[float] | None, expected: bool) -> None:
        """Test has_embedding returns correct value based on embedding state."""
        segment = Segment(
            segment_id=0,
            text="test",
            start_time=0.0,
            end_time=1.0,
            embedding=embedding,
        )
        result = segment.has_embedding()
        assert result is expected, f"has_embedding() expected {expected}, got {result}"

    # --- Edge case tests ---

    @pytest.mark.parametrize(
        "text,expected_count",
        [
            pytest.param("", 0, id="empty_text"),
            pytest.param("   \t\n  ", 0, id="whitespace_only"),
            pytest.param("Hello    world", 2, id="multiple_spaces"),
            pytest.param("Hello, world! How are you?", 5, id="with_punctuation"),
            pytest.param("你好世界 🚀 café", 3, id="unicode_text"),
        ],
    )
    def test_segment_word_count_edge_cases(self, text: str, expected_count: int) -> None:
        """Test word_count correctly handles various text patterns."""
        segment = Segment(segment_id=0, text=text, start_time=0.0, end_time=1.0)
        assert segment.word_count == expected_count, (
            f"word_count for {text!r} expected {expected_count}, got {segment.word_count}"
        )

    def test_segment_word_count_empty_words_list(self) -> None:
        """Test word_count from text when words list is empty."""
        segment = Segment(
            segment_id=0,
            text="Hello world",
            start_time=0.0,
            end_time=0.5,
            words=[],
        )
        assert segment.word_count == 2, (
            f"word_count expected 2 from text fallback, got {segment.word_count}"
        )

    def test_segment_unicode_text_contains_emoji(self) -> None:
        """Test segment preserves unicode emoji in text."""
        segment = Segment(segment_id=0, text="你好世界 🚀 café", start_time=0.0, end_time=1.0)
        assert "🚀" in segment.text, f"expected emoji in text, got {segment.text!r}"

    def test_segment_zero_duration(self) -> None:
        """Test segment with zero duration is valid."""
        segment = Segment(segment_id=0, text="instant", start_time=5.0, end_time=5.0)
        assert segment.duration == 0.0, f"duration expected 0.0, got {segment.duration}"

    def test_segment_very_long_duration(self) -> None:
        """Test segment with very long duration."""
        segment = Segment(segment_id=0, text="marathon", start_time=0.0, end_time=TEN_HOURS_SECONDS)
        assert segment.duration == TEN_HOURS_SECONDS, (
            f"duration expected {TEN_HOURS_SECONDS}, got {segment.duration}"
        )


class TestWordTimingEdgeCases:
    """Edge case tests for WordTiming entity."""

    @pytest.mark.parametrize(
        "probability",
        [
            pytest.param(0.0, id="lower_boundary"),
            pytest.param(1.0, id="upper_boundary"),
        ],
    )
    def test_word_timing_boundary_probability(self, probability: float) -> None:
        word = WordTiming(word="test", start_time=0.0, end_time=0.5, probability=probability)
        assert word.probability == probability, (
            f"probability expected {probability}, got {word.probability}"
        )

    def test_word_timing_equal_times(self) -> None:
        """Test word timing with equal start and end times."""
        word = WordTiming(word="instant", start_time=1.5, end_time=1.5, probability=0.9)
        assert word.start_time == word.end_time, (
            f"start_time and end_time should be equal, got {word.start_time} and {word.end_time}"
        )

    def test_word_timing_empty_word(self) -> None:
        """Test word timing with empty word string."""
        word = WordTiming(word="", start_time=0.0, end_time=0.1, probability=0.5)
        assert word.word == "", f"word expected empty string, got {word.word!r}"

    def test_word_timing_unicode_word(self) -> None:
        """Test word timing with unicode characters."""
        word = WordTiming(word="日本語", start_time=0.0, end_time=0.5, probability=0.95)
        assert word.word == "日本語", f"word expected '日本語', got {word.word!r}"