Files
noteflow/tests/domain/test_segment.py
2026-01-23 07:45:20 +00:00

202 lines
8.5 KiB
Python

"""Tests for Segment and WordTiming entities."""
from __future__ import annotations
import pytest
from noteflow.domain.entities.segment import Segment, WordTiming
# Test constants
TEN_HOURS_SECONDS = 36000.0
class TestWordTiming:
"""Tests for WordTiming entity."""
@pytest.mark.parametrize(
"attr,expected",
[("word", "hello"), ("start_time", 0.0), ("end_time", 0.5), ("probability", 0.95)],
)
def test_word_timing_attributes(self, attr: str, expected: object) -> None:
"""Test WordTiming stores attribute values correctly."""
word = WordTiming(word="hello", start_time=0.0, end_time=0.5, probability=0.95)
actual = getattr(word, attr)
assert actual == expected, f"WordTiming.{attr} expected {expected!r}, got {actual!r}"
def test_word_timing_invalid_times_raises(self) -> None:
"""Test WordTiming raises on end_time < start_time."""
with pytest.raises(ValueError, match=r"end_time.*must be >= start_time"):
WordTiming(word="hello", start_time=1.0, end_time=0.5, probability=0.9)
@pytest.mark.parametrize("prob", [-0.1, 1.1, 2.0])
def test_word_timing_invalid_probability_raises(self, prob: float) -> None:
"""Test WordTiming raises on invalid probability."""
with pytest.raises(ValueError, match="probability must be between 0 and 1"):
WordTiming(word="hello", start_time=0.0, end_time=0.5, probability=prob)
@pytest.mark.parametrize("prob", [0.0, 0.5, 1.0])
def test_word_timing_valid_probability_bounds(self, prob: float) -> None:
"""Test WordTiming accepts probability at boundaries."""
word = WordTiming(word="test", start_time=0.0, end_time=0.5, probability=prob)
assert word.probability == prob, f"probability expected {prob}, got {word.probability}"
class TestSegment:
"""Tests for Segment entity."""
@pytest.mark.parametrize(
"attr,expected",
[
("segment_id", 0),
("text", "Hello world"),
("start_time", 0.0),
("end_time", 2.5),
("language", "en"),
],
)
def test_segment_attributes(self, attr: str, expected: object) -> None:
"""Test Segment stores attribute values correctly."""
segment = Segment(
segment_id=0, text="Hello world", start_time=0.0, end_time=2.5, language="en"
)
actual = getattr(segment, attr)
assert actual == expected, f"Segment.{attr} expected {expected!r}, got {actual!r}"
def test_segment_invalid_times_raises(self) -> None:
"""Test Segment raises on end_time < start_time."""
with pytest.raises(ValueError, match=r"end_time.*must be >= start_time"):
Segment(segment_id=0, text="test", start_time=5.0, end_time=1.0)
def test_segment_invalid_id_raises(self) -> None:
"""Test Segment raises on negative segment_id."""
with pytest.raises(ValueError, match="segment_id must be non-negative"):
Segment(segment_id=-1, text="test", start_time=0.0, end_time=1.0)
def test_segment_duration(self) -> None:
"""Test duration property calculation."""
segment = Segment(segment_id=0, text="test", start_time=1.5, end_time=4.0)
assert segment.duration == 2.5, f"duration expected 2.5, got {segment.duration}"
def test_segment_word_count_from_text(self) -> None:
"""Test word_count from text when no words list."""
segment = Segment(segment_id=0, text="Hello beautiful world", start_time=0.0, end_time=1.0)
assert segment.word_count == 3, f"word_count expected 3, got {segment.word_count}"
def test_segment_word_count_from_words(self) -> None:
"""Test word_count from words list when provided."""
words = [
WordTiming(word="Hello", start_time=0.0, end_time=0.3, probability=0.9),
WordTiming(word="world", start_time=0.3, end_time=0.5, probability=0.95),
]
segment = Segment(
segment_id=0,
text="Hello world",
start_time=0.0,
end_time=0.5,
words=words,
)
assert segment.word_count == 2, f"word_count expected 2, got {segment.word_count}"
@pytest.mark.parametrize(
"embedding,expected",
[
pytest.param(None, False, id="none_embedding"),
pytest.param([], False, id="empty_list"),
pytest.param([0.1, 0.2, 0.3], True, id="with_values"),
],
)
def test_segment_has_embedding(self, embedding: list[float] | None, expected: bool) -> None:
"""Test has_embedding returns correct value based on embedding state."""
segment = Segment(
segment_id=0,
text="test",
start_time=0.0,
end_time=1.0,
embedding=embedding,
)
result = segment.has_embedding()
assert result is expected, f"has_embedding() expected {expected}, got {result}"
# --- Edge case tests ---
@pytest.mark.parametrize(
"text,expected_count",
[
pytest.param("", 0, id="empty_text"),
pytest.param(" \t\n ", 0, id="whitespace_only"),
pytest.param("Hello world", 2, id="multiple_spaces"),
pytest.param("Hello, world! How are you?", 5, id="with_punctuation"),
pytest.param("你好世界 🚀 café", 3, id="unicode_text"),
],
)
def test_segment_word_count_edge_cases(self, text: str, expected_count: int) -> None:
"""Test word_count correctly handles various text patterns."""
segment = Segment(segment_id=0, text=text, start_time=0.0, end_time=1.0)
assert segment.word_count == expected_count, (
f"word_count for {text!r} expected {expected_count}, got {segment.word_count}"
)
def test_segment_word_count_empty_words_list(self) -> None:
"""Test word_count from text when words list is empty."""
segment = Segment(
segment_id=0,
text="Hello world",
start_time=0.0,
end_time=0.5,
words=[],
)
assert segment.word_count == 2, (
f"word_count expected 2 from text fallback, got {segment.word_count}"
)
def test_segment_unicode_text_contains_emoji(self) -> None:
"""Test segment preserves unicode emoji in text."""
segment = Segment(segment_id=0, text="你好世界 🚀 café", start_time=0.0, end_time=1.0)
assert "🚀" in segment.text, f"expected emoji in text, got {segment.text!r}"
def test_segment_zero_duration(self) -> None:
"""Test segment with zero duration is valid."""
segment = Segment(segment_id=0, text="instant", start_time=5.0, end_time=5.0)
assert segment.duration == 0.0, f"duration expected 0.0, got {segment.duration}"
def test_segment_very_long_duration(self) -> None:
"""Test segment with very long duration."""
segment = Segment(segment_id=0, text="marathon", start_time=0.0, end_time=TEN_HOURS_SECONDS)
assert segment.duration == TEN_HOURS_SECONDS, (
f"duration expected {TEN_HOURS_SECONDS}, got {segment.duration}"
)
class TestWordTimingEdgeCases:
"""Edge case tests for WordTiming entity."""
@pytest.mark.parametrize(
"probability",
[
pytest.param(0.0, id="lower_boundary"),
pytest.param(1.0, id="upper_boundary"),
],
)
def test_word_timing_boundary_probability(self, probability: float) -> None:
word = WordTiming(word="test", start_time=0.0, end_time=0.5, probability=probability)
assert word.probability == probability, (
f"probability expected {probability}, got {word.probability}"
)
def test_word_timing_equal_times(self) -> None:
"""Test word timing with equal start and end times."""
word = WordTiming(word="instant", start_time=1.5, end_time=1.5, probability=0.9)
assert word.start_time == word.end_time, (
f"start_time and end_time should be equal, got {word.start_time} and {word.end_time}"
)
def test_word_timing_empty_word(self) -> None:
"""Test word timing with empty word string."""
word = WordTiming(word="", start_time=0.0, end_time=0.1, probability=0.5)
assert word.word == "", f"word expected empty string, got {word.word!r}"
def test_word_timing_unicode_word(self) -> None:
"""Test word timing with unicode characters."""
word = WordTiming(word="日本語", start_time=0.0, end_time=0.5, probability=0.95)
assert word.word == "日本語", f"word expected '日本語', got {word.word!r}"