202 lines
8.5 KiB
Python
202 lines
8.5 KiB
Python
"""Tests for Segment and WordTiming entities."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import pytest
|
|
|
|
from noteflow.domain.entities.segment import Segment, WordTiming
|
|
|
|
# Test constants
|
|
TEN_HOURS_SECONDS = 36000.0
|
|
|
|
|
|
class TestWordTiming:
|
|
"""Tests for WordTiming entity."""
|
|
|
|
@pytest.mark.parametrize(
|
|
"attr,expected",
|
|
[("word", "hello"), ("start_time", 0.0), ("end_time", 0.5), ("probability", 0.95)],
|
|
)
|
|
def test_word_timing_attributes(self, attr: str, expected: object) -> None:
|
|
"""Test WordTiming stores attribute values correctly."""
|
|
word = WordTiming(word="hello", start_time=0.0, end_time=0.5, probability=0.95)
|
|
actual = getattr(word, attr)
|
|
assert actual == expected, f"WordTiming.{attr} expected {expected!r}, got {actual!r}"
|
|
|
|
def test_word_timing_invalid_times_raises(self) -> None:
|
|
"""Test WordTiming raises on end_time < start_time."""
|
|
with pytest.raises(ValueError, match=r"end_time.*must be >= start_time"):
|
|
WordTiming(word="hello", start_time=1.0, end_time=0.5, probability=0.9)
|
|
|
|
@pytest.mark.parametrize("prob", [-0.1, 1.1, 2.0])
|
|
def test_word_timing_invalid_probability_raises(self, prob: float) -> None:
|
|
"""Test WordTiming raises on invalid probability."""
|
|
with pytest.raises(ValueError, match="probability must be between 0 and 1"):
|
|
WordTiming(word="hello", start_time=0.0, end_time=0.5, probability=prob)
|
|
|
|
@pytest.mark.parametrize("prob", [0.0, 0.5, 1.0])
|
|
def test_word_timing_valid_probability_bounds(self, prob: float) -> None:
|
|
"""Test WordTiming accepts probability at boundaries."""
|
|
word = WordTiming(word="test", start_time=0.0, end_time=0.5, probability=prob)
|
|
assert word.probability == prob, f"probability expected {prob}, got {word.probability}"
|
|
|
|
|
|
class TestSegment:
|
|
"""Tests for Segment entity."""
|
|
|
|
@pytest.mark.parametrize(
|
|
"attr,expected",
|
|
[
|
|
("segment_id", 0),
|
|
("text", "Hello world"),
|
|
("start_time", 0.0),
|
|
("end_time", 2.5),
|
|
("language", "en"),
|
|
],
|
|
)
|
|
def test_segment_attributes(self, attr: str, expected: object) -> None:
|
|
"""Test Segment stores attribute values correctly."""
|
|
segment = Segment(
|
|
segment_id=0, text="Hello world", start_time=0.0, end_time=2.5, language="en"
|
|
)
|
|
actual = getattr(segment, attr)
|
|
assert actual == expected, f"Segment.{attr} expected {expected!r}, got {actual!r}"
|
|
|
|
def test_segment_invalid_times_raises(self) -> None:
|
|
"""Test Segment raises on end_time < start_time."""
|
|
with pytest.raises(ValueError, match=r"end_time.*must be >= start_time"):
|
|
Segment(segment_id=0, text="test", start_time=5.0, end_time=1.0)
|
|
|
|
def test_segment_invalid_id_raises(self) -> None:
|
|
"""Test Segment raises on negative segment_id."""
|
|
with pytest.raises(ValueError, match="segment_id must be non-negative"):
|
|
Segment(segment_id=-1, text="test", start_time=0.0, end_time=1.0)
|
|
|
|
def test_segment_duration(self) -> None:
|
|
"""Test duration property calculation."""
|
|
segment = Segment(segment_id=0, text="test", start_time=1.5, end_time=4.0)
|
|
assert segment.duration == 2.5, f"duration expected 2.5, got {segment.duration}"
|
|
|
|
def test_segment_word_count_from_text(self) -> None:
|
|
"""Test word_count from text when no words list."""
|
|
segment = Segment(segment_id=0, text="Hello beautiful world", start_time=0.0, end_time=1.0)
|
|
assert segment.word_count == 3, f"word_count expected 3, got {segment.word_count}"
|
|
|
|
def test_segment_word_count_from_words(self) -> None:
|
|
"""Test word_count from words list when provided."""
|
|
words = [
|
|
WordTiming(word="Hello", start_time=0.0, end_time=0.3, probability=0.9),
|
|
WordTiming(word="world", start_time=0.3, end_time=0.5, probability=0.95),
|
|
]
|
|
segment = Segment(
|
|
segment_id=0,
|
|
text="Hello world",
|
|
start_time=0.0,
|
|
end_time=0.5,
|
|
words=words,
|
|
)
|
|
assert segment.word_count == 2, f"word_count expected 2, got {segment.word_count}"
|
|
|
|
@pytest.mark.parametrize(
|
|
"embedding,expected",
|
|
[
|
|
pytest.param(None, False, id="none_embedding"),
|
|
pytest.param([], False, id="empty_list"),
|
|
pytest.param([0.1, 0.2, 0.3], True, id="with_values"),
|
|
],
|
|
)
|
|
def test_segment_has_embedding(self, embedding: list[float] | None, expected: bool) -> None:
|
|
"""Test has_embedding returns correct value based on embedding state."""
|
|
segment = Segment(
|
|
segment_id=0,
|
|
text="test",
|
|
start_time=0.0,
|
|
end_time=1.0,
|
|
embedding=embedding,
|
|
)
|
|
result = segment.has_embedding()
|
|
assert result is expected, f"has_embedding() expected {expected}, got {result}"
|
|
|
|
# --- Edge case tests ---
|
|
|
|
@pytest.mark.parametrize(
|
|
"text,expected_count",
|
|
[
|
|
pytest.param("", 0, id="empty_text"),
|
|
pytest.param(" \t\n ", 0, id="whitespace_only"),
|
|
pytest.param("Hello world", 2, id="multiple_spaces"),
|
|
pytest.param("Hello, world! How are you?", 5, id="with_punctuation"),
|
|
pytest.param("你好世界 🚀 café", 3, id="unicode_text"),
|
|
],
|
|
)
|
|
def test_segment_word_count_edge_cases(self, text: str, expected_count: int) -> None:
|
|
"""Test word_count correctly handles various text patterns."""
|
|
segment = Segment(segment_id=0, text=text, start_time=0.0, end_time=1.0)
|
|
assert segment.word_count == expected_count, (
|
|
f"word_count for {text!r} expected {expected_count}, got {segment.word_count}"
|
|
)
|
|
|
|
def test_segment_word_count_empty_words_list(self) -> None:
|
|
"""Test word_count from text when words list is empty."""
|
|
segment = Segment(
|
|
segment_id=0,
|
|
text="Hello world",
|
|
start_time=0.0,
|
|
end_time=0.5,
|
|
words=[],
|
|
)
|
|
assert segment.word_count == 2, (
|
|
f"word_count expected 2 from text fallback, got {segment.word_count}"
|
|
)
|
|
|
|
def test_segment_unicode_text_contains_emoji(self) -> None:
|
|
"""Test segment preserves unicode emoji in text."""
|
|
segment = Segment(segment_id=0, text="你好世界 🚀 café", start_time=0.0, end_time=1.0)
|
|
assert "🚀" in segment.text, f"expected emoji in text, got {segment.text!r}"
|
|
|
|
def test_segment_zero_duration(self) -> None:
|
|
"""Test segment with zero duration is valid."""
|
|
segment = Segment(segment_id=0, text="instant", start_time=5.0, end_time=5.0)
|
|
assert segment.duration == 0.0, f"duration expected 0.0, got {segment.duration}"
|
|
|
|
def test_segment_very_long_duration(self) -> None:
|
|
"""Test segment with very long duration."""
|
|
segment = Segment(segment_id=0, text="marathon", start_time=0.0, end_time=TEN_HOURS_SECONDS)
|
|
assert segment.duration == TEN_HOURS_SECONDS, (
|
|
f"duration expected {TEN_HOURS_SECONDS}, got {segment.duration}"
|
|
)
|
|
|
|
|
|
class TestWordTimingEdgeCases:
|
|
"""Edge case tests for WordTiming entity."""
|
|
|
|
@pytest.mark.parametrize(
|
|
"probability",
|
|
[
|
|
pytest.param(0.0, id="lower_boundary"),
|
|
pytest.param(1.0, id="upper_boundary"),
|
|
],
|
|
)
|
|
def test_word_timing_boundary_probability(self, probability: float) -> None:
|
|
word = WordTiming(word="test", start_time=0.0, end_time=0.5, probability=probability)
|
|
assert word.probability == probability, (
|
|
f"probability expected {probability}, got {word.probability}"
|
|
)
|
|
|
|
def test_word_timing_equal_times(self) -> None:
|
|
"""Test word timing with equal start and end times."""
|
|
word = WordTiming(word="instant", start_time=1.5, end_time=1.5, probability=0.9)
|
|
assert word.start_time == word.end_time, (
|
|
f"start_time and end_time should be equal, got {word.start_time} and {word.end_time}"
|
|
)
|
|
|
|
def test_word_timing_empty_word(self) -> None:
|
|
"""Test word timing with empty word string."""
|
|
word = WordTiming(word="", start_time=0.0, end_time=0.1, probability=0.5)
|
|
assert word.word == "", f"word expected empty string, got {word.word!r}"
|
|
|
|
def test_word_timing_unicode_word(self) -> None:
|
|
"""Test word timing with unicode characters."""
|
|
word = WordTiming(word="日本語", start_time=0.0, end_time=0.5, probability=0.95)
|
|
assert word.word == "日本語", f"word expected '日本語', got {word.word!r}"
|