- Moved all hookify configuration files from `.claude/` to `.claude/hooks/` subdirectory for better organization - Added four new blocking hooks to prevent common error handling anti-patterns: - `block-broad-exception-handler`: Prevents catching generic `Exception` with only logging - `block-datetime-now-fallback`: Blocks returning `datetime.now()` as fallback on parse failures to prevent data corruption - `block-default
266 lines
11 KiB
Python
266 lines
11 KiB
Python
"""Tests for NamedEntity domain entity."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from uuid import uuid4
|
|
|
|
import pytest
|
|
|
|
from noteflow.domain.entities.named_entity import EntityCategory, NamedEntity
|
|
from noteflow.domain.value_objects import MeetingId
|
|
|
|
|
|
class TestEntityCategory:
|
|
"""Tests for EntityCategory enum."""
|
|
|
|
@pytest.mark.parametrize(
|
|
("value", "expected"),
|
|
[
|
|
("person", EntityCategory.PERSON),
|
|
("company", EntityCategory.COMPANY),
|
|
("product", EntityCategory.PRODUCT),
|
|
("technical", EntityCategory.TECHNICAL),
|
|
("acronym", EntityCategory.ACRONYM),
|
|
("location", EntityCategory.LOCATION),
|
|
("date", EntityCategory.DATE),
|
|
("other", EntityCategory.OTHER),
|
|
],
|
|
)
|
|
def test_from_string_valid_values(
|
|
self, value: str, expected: EntityCategory
|
|
) -> None:
|
|
"""Convert lowercase string to EntityCategory."""
|
|
result = EntityCategory.from_string(value)
|
|
assert result == expected, f"from_string('{value}') should return {expected}, got {result}"
|
|
|
|
@pytest.mark.parametrize("value", ["PERSON", "Person", "COMPANY"])
|
|
def test_from_string_case_insensitive(self, value: str) -> None:
|
|
"""Convert mixed case string to EntityCategory."""
|
|
result = EntityCategory.from_string(value)
|
|
assert result in EntityCategory, f"from_string('{value}') should return valid EntityCategory, got {result}"
|
|
|
|
def test_from_string_invalid_raises(self) -> None:
|
|
"""Invalid category string raises ValueError."""
|
|
with pytest.raises(ValueError, match="Invalid entity category"):
|
|
EntityCategory.from_string("invalid_category")
|
|
|
|
|
|
class TestNamedEntityValidation:
|
|
"""Tests for NamedEntity validation in __post_init__."""
|
|
|
|
@pytest.mark.parametrize("confidence", [-0.1, 1.1, 2.0, -1.0])
|
|
def test_invalid_confidence_raises(self, confidence: float) -> None:
|
|
"""Confidence outside 0-1 range raises ValueError."""
|
|
with pytest.raises(ValueError, match="confidence must be between 0 and 1"):
|
|
NamedEntity(
|
|
text="John",
|
|
category=EntityCategory.PERSON,
|
|
confidence=confidence,
|
|
)
|
|
|
|
@pytest.mark.parametrize("confidence", [0.0, 0.5, 1.0, 0.95])
|
|
def test_valid_confidence_boundaries(self, confidence: float) -> None:
|
|
"""Confidence at valid boundaries is accepted."""
|
|
entity = NamedEntity(
|
|
text="John",
|
|
category=EntityCategory.PERSON,
|
|
confidence=confidence,
|
|
)
|
|
assert entity.confidence == confidence, f"expected confidence {confidence}, got {entity.confidence}"
|
|
|
|
def test_auto_computes_normalized_text(self) -> None:
|
|
"""Normalized text is auto-computed from text when not provided."""
|
|
entity = NamedEntity(
|
|
text="John SMITH",
|
|
category=EntityCategory.PERSON,
|
|
confidence=0.9,
|
|
)
|
|
assert entity.normalized_text == "john smith", f"expected normalized_text 'john smith', got '{entity.normalized_text}'"
|
|
|
|
def test_preserves_explicit_normalized_text(self) -> None:
|
|
"""Explicit normalized_text is preserved."""
|
|
entity = NamedEntity(
|
|
text="John Smith",
|
|
normalized_text="custom_normalization",
|
|
category=EntityCategory.PERSON,
|
|
confidence=0.9,
|
|
)
|
|
assert entity.normalized_text == "custom_normalization", f"expected explicit normalized_text 'custom_normalization', got '{entity.normalized_text}'"
|
|
|
|
|
|
class TestNamedEntityCreate:
|
|
"""Tests for NamedEntity.create() factory method."""
|
|
|
|
def test_create_with_valid_input(self) -> None:
|
|
"""Create entity with valid input returns properly initialized entity."""
|
|
meeting_id = MeetingId(uuid4())
|
|
entity = NamedEntity.create(
|
|
text="Acme Corporation",
|
|
category=EntityCategory.COMPANY,
|
|
segment_ids=[0, 2, 1],
|
|
confidence=0.85,
|
|
meeting_id=meeting_id,
|
|
)
|
|
|
|
assert entity.text == "Acme Corporation", "Text should be preserved"
|
|
assert entity.normalized_text == "acme corporation", "Normalized text should be lowercase"
|
|
assert entity.category == EntityCategory.COMPANY, "Category should be preserved"
|
|
assert entity.segment_ids == [0, 1, 2], "Segment IDs should be sorted"
|
|
assert entity.confidence == 0.85, "Confidence should be preserved"
|
|
assert entity.meeting_id == meeting_id, "Meeting ID should be preserved"
|
|
|
|
def test_create_strips_whitespace(self) -> None:
|
|
"""Create strips leading/trailing whitespace from text."""
|
|
entity = NamedEntity.create(
|
|
text=" John Smith ",
|
|
category=EntityCategory.PERSON,
|
|
segment_ids=[0],
|
|
confidence=0.9,
|
|
)
|
|
assert entity.text == "John Smith", f"expected stripped text 'John Smith', got '{entity.text}'"
|
|
assert entity.normalized_text == "john smith", f"expected normalized_text 'john smith', got '{entity.normalized_text}'"
|
|
|
|
def test_create_deduplicates_segment_ids(self) -> None:
|
|
"""Create deduplicates and sorts segment IDs."""
|
|
entity = NamedEntity.create(
|
|
text="Test",
|
|
category=EntityCategory.OTHER,
|
|
segment_ids=[3, 1, 1, 3, 2],
|
|
confidence=0.8,
|
|
)
|
|
assert entity.segment_ids == [1, 2, 3], f"expected deduplicated/sorted segment_ids [1, 2, 3], got {entity.segment_ids}"
|
|
|
|
@pytest.mark.parametrize(
|
|
"invalid_text",
|
|
[
|
|
pytest.param("", id="empty-text"),
|
|
pytest.param(" ", id="whitespace-only"),
|
|
pytest.param("\t\n", id="tabs-and-newlines"),
|
|
],
|
|
)
|
|
def test_create_invalid_text_raises(self, invalid_text: str) -> None:
|
|
"""Create with empty or whitespace-only text raises ValueError."""
|
|
with pytest.raises(ValueError, match="Entity text cannot be empty"):
|
|
NamedEntity.create(
|
|
text=invalid_text,
|
|
category=EntityCategory.PERSON,
|
|
segment_ids=[0],
|
|
confidence=0.9,
|
|
)
|
|
|
|
def test_create_invalid_confidence_raises(self) -> None:
|
|
"""Create with invalid confidence raises ValueError."""
|
|
with pytest.raises(ValueError, match="confidence must be between 0 and 1"):
|
|
NamedEntity.create(
|
|
text="John",
|
|
category=EntityCategory.PERSON,
|
|
segment_ids=[0],
|
|
confidence=1.5,
|
|
)
|
|
|
|
|
|
class TestNamedEntityOccurrenceCount:
|
|
"""Tests for occurrence_count property."""
|
|
|
|
def test_occurrence_count_with_segments(self) -> None:
|
|
"""Occurrence count returns number of unique segment IDs."""
|
|
entity = NamedEntity(
|
|
text="Test",
|
|
category=EntityCategory.OTHER,
|
|
segment_ids=[0, 1, 2],
|
|
confidence=0.8,
|
|
)
|
|
assert entity.occurrence_count == 3, f"expected occurrence_count 3 for 3 segments, got {entity.occurrence_count}"
|
|
|
|
def test_occurrence_count_empty_segments(self) -> None:
|
|
"""Occurrence count returns 0 for empty segment_ids."""
|
|
entity = NamedEntity(
|
|
text="Test",
|
|
category=EntityCategory.OTHER,
|
|
segment_ids=[],
|
|
confidence=0.8,
|
|
)
|
|
assert entity.occurrence_count == 0, f"expected occurrence_count 0 for empty segments, got {entity.occurrence_count}"
|
|
|
|
def test_occurrence_count_single_segment(self) -> None:
|
|
"""Occurrence count returns 1 for single segment."""
|
|
entity = NamedEntity(
|
|
text="Test",
|
|
category=EntityCategory.OTHER,
|
|
segment_ids=[5],
|
|
confidence=0.8,
|
|
)
|
|
assert entity.occurrence_count == 1, f"expected occurrence_count 1 for single segment, got {entity.occurrence_count}"
|
|
|
|
|
|
class TestNamedEntityMergeSegments:
|
|
"""Tests for merge_segments method."""
|
|
|
|
def test_merge_segments_adds_new(self) -> None:
|
|
"""Merge segments adds new segment IDs."""
|
|
entity = NamedEntity(
|
|
text="John",
|
|
category=EntityCategory.PERSON,
|
|
segment_ids=[0, 1],
|
|
confidence=0.9,
|
|
)
|
|
entity.merge_segments([3, 4])
|
|
assert entity.segment_ids == [0, 1, 3, 4], f"expected merged segment_ids [0, 1, 3, 4], got {entity.segment_ids}"
|
|
|
|
def test_merge_segments_deduplicates(self) -> None:
|
|
"""Merge segments deduplicates overlapping IDs."""
|
|
entity = NamedEntity(
|
|
text="John",
|
|
category=EntityCategory.PERSON,
|
|
segment_ids=[0, 1, 2],
|
|
confidence=0.9,
|
|
)
|
|
entity.merge_segments([1, 2, 3])
|
|
assert entity.segment_ids == [0, 1, 2, 3], f"expected deduplicated segment_ids [0, 1, 2, 3], got {entity.segment_ids}"
|
|
|
|
def test_merge_segments_sorts(self) -> None:
|
|
"""Merge segments keeps result sorted."""
|
|
entity = NamedEntity(
|
|
text="John",
|
|
category=EntityCategory.PERSON,
|
|
segment_ids=[5, 10],
|
|
confidence=0.9,
|
|
)
|
|
entity.merge_segments([1, 3])
|
|
assert entity.segment_ids == [1, 3, 5, 10], f"expected sorted segment_ids [1, 3, 5, 10], got {entity.segment_ids}"
|
|
|
|
def test_merge_empty_segments(self) -> None:
|
|
"""Merge with empty list preserves original segments."""
|
|
entity = NamedEntity(
|
|
text="John",
|
|
category=EntityCategory.PERSON,
|
|
segment_ids=[0, 1],
|
|
confidence=0.9,
|
|
)
|
|
entity.merge_segments([])
|
|
assert entity.segment_ids == [0, 1], f"expected unchanged segment_ids [0, 1] after merging empty list, got {entity.segment_ids}"
|
|
|
|
|
|
class TestNamedEntityDefaults:
|
|
"""Tests for NamedEntity default values."""
|
|
|
|
@pytest.mark.parametrize(
|
|
("attr", "expected"),
|
|
[
|
|
pytest.param("meeting_id", None, id="meeting_id-none"),
|
|
pytest.param("segment_ids", [], id="segment_ids-empty"),
|
|
pytest.param("is_pinned", False, id="is_pinned-false"),
|
|
pytest.param("db_id", None, id="db_id-none"),
|
|
],
|
|
)
|
|
def test_named_entity_default_values(self, attr: str, expected: object) -> None:
|
|
"""NamedEntity attributes have correct default values."""
|
|
entity = NamedEntity(text="Test", category=EntityCategory.OTHER, confidence=0.5)
|
|
actual = getattr(entity, attr)
|
|
assert actual == expected, f"{attr} should default to {expected}"
|
|
|
|
def test_id_is_auto_generated(self) -> None:
|
|
"""UUID id is auto-generated."""
|
|
entity = NamedEntity(text="Test", category=EntityCategory.OTHER, confidence=0.5)
|
|
assert entity.id is not None, "id should be auto-generated"
|