Files
noteflow/tests/domain/test_named_entity.py
Travis Vasceannie 1ce24cdf7b feat: reorganize Claude hooks and add RAG documentation structure with error handling policies
- Moved all hookify configuration files from `.claude/` to `.claude/hooks/` subdirectory for better organization
- Added four new blocking hooks to prevent common error handling anti-patterns:
  - `block-broad-exception-handler`: Prevents catching generic `Exception` with only logging
  - `block-datetime-now-fallback`: Blocks returning `datetime.now()` as fallback on parse failures to prevent data corruption
  - `block-default
2026-01-15 15:58:06 +00:00

266 lines
11 KiB
Python

"""Tests for NamedEntity domain entity."""
from __future__ import annotations
from uuid import uuid4
import pytest
from noteflow.domain.entities.named_entity import EntityCategory, NamedEntity
from noteflow.domain.value_objects import MeetingId
class TestEntityCategory:
"""Tests for EntityCategory enum."""
@pytest.mark.parametrize(
("value", "expected"),
[
("person", EntityCategory.PERSON),
("company", EntityCategory.COMPANY),
("product", EntityCategory.PRODUCT),
("technical", EntityCategory.TECHNICAL),
("acronym", EntityCategory.ACRONYM),
("location", EntityCategory.LOCATION),
("date", EntityCategory.DATE),
("other", EntityCategory.OTHER),
],
)
def test_from_string_valid_values(
self, value: str, expected: EntityCategory
) -> None:
"""Convert lowercase string to EntityCategory."""
result = EntityCategory.from_string(value)
assert result == expected, f"from_string('{value}') should return {expected}, got {result}"
@pytest.mark.parametrize("value", ["PERSON", "Person", "COMPANY"])
def test_from_string_case_insensitive(self, value: str) -> None:
"""Convert mixed case string to EntityCategory."""
result = EntityCategory.from_string(value)
assert result in EntityCategory, f"from_string('{value}') should return valid EntityCategory, got {result}"
def test_from_string_invalid_raises(self) -> None:
"""Invalid category string raises ValueError."""
with pytest.raises(ValueError, match="Invalid entity category"):
EntityCategory.from_string("invalid_category")
class TestNamedEntityValidation:
"""Tests for NamedEntity validation in __post_init__."""
@pytest.mark.parametrize("confidence", [-0.1, 1.1, 2.0, -1.0])
def test_invalid_confidence_raises(self, confidence: float) -> None:
"""Confidence outside 0-1 range raises ValueError."""
with pytest.raises(ValueError, match="confidence must be between 0 and 1"):
NamedEntity(
text="John",
category=EntityCategory.PERSON,
confidence=confidence,
)
@pytest.mark.parametrize("confidence", [0.0, 0.5, 1.0, 0.95])
def test_valid_confidence_boundaries(self, confidence: float) -> None:
"""Confidence at valid boundaries is accepted."""
entity = NamedEntity(
text="John",
category=EntityCategory.PERSON,
confidence=confidence,
)
assert entity.confidence == confidence, f"expected confidence {confidence}, got {entity.confidence}"
def test_auto_computes_normalized_text(self) -> None:
"""Normalized text is auto-computed from text when not provided."""
entity = NamedEntity(
text="John SMITH",
category=EntityCategory.PERSON,
confidence=0.9,
)
assert entity.normalized_text == "john smith", f"expected normalized_text 'john smith', got '{entity.normalized_text}'"
def test_preserves_explicit_normalized_text(self) -> None:
"""Explicit normalized_text is preserved."""
entity = NamedEntity(
text="John Smith",
normalized_text="custom_normalization",
category=EntityCategory.PERSON,
confidence=0.9,
)
assert entity.normalized_text == "custom_normalization", f"expected explicit normalized_text 'custom_normalization', got '{entity.normalized_text}'"
class TestNamedEntityCreate:
"""Tests for NamedEntity.create() factory method."""
def test_create_with_valid_input(self) -> None:
"""Create entity with valid input returns properly initialized entity."""
meeting_id = MeetingId(uuid4())
entity = NamedEntity.create(
text="Acme Corporation",
category=EntityCategory.COMPANY,
segment_ids=[0, 2, 1],
confidence=0.85,
meeting_id=meeting_id,
)
assert entity.text == "Acme Corporation", "Text should be preserved"
assert entity.normalized_text == "acme corporation", "Normalized text should be lowercase"
assert entity.category == EntityCategory.COMPANY, "Category should be preserved"
assert entity.segment_ids == [0, 1, 2], "Segment IDs should be sorted"
assert entity.confidence == 0.85, "Confidence should be preserved"
assert entity.meeting_id == meeting_id, "Meeting ID should be preserved"
def test_create_strips_whitespace(self) -> None:
"""Create strips leading/trailing whitespace from text."""
entity = NamedEntity.create(
text=" John Smith ",
category=EntityCategory.PERSON,
segment_ids=[0],
confidence=0.9,
)
assert entity.text == "John Smith", f"expected stripped text 'John Smith', got '{entity.text}'"
assert entity.normalized_text == "john smith", f"expected normalized_text 'john smith', got '{entity.normalized_text}'"
def test_create_deduplicates_segment_ids(self) -> None:
"""Create deduplicates and sorts segment IDs."""
entity = NamedEntity.create(
text="Test",
category=EntityCategory.OTHER,
segment_ids=[3, 1, 1, 3, 2],
confidence=0.8,
)
assert entity.segment_ids == [1, 2, 3], f"expected deduplicated/sorted segment_ids [1, 2, 3], got {entity.segment_ids}"
@pytest.mark.parametrize(
"invalid_text",
[
pytest.param("", id="empty-text"),
pytest.param(" ", id="whitespace-only"),
pytest.param("\t\n", id="tabs-and-newlines"),
],
)
def test_create_invalid_text_raises(self, invalid_text: str) -> None:
"""Create with empty or whitespace-only text raises ValueError."""
with pytest.raises(ValueError, match="Entity text cannot be empty"):
NamedEntity.create(
text=invalid_text,
category=EntityCategory.PERSON,
segment_ids=[0],
confidence=0.9,
)
def test_create_invalid_confidence_raises(self) -> None:
"""Create with invalid confidence raises ValueError."""
with pytest.raises(ValueError, match="confidence must be between 0 and 1"):
NamedEntity.create(
text="John",
category=EntityCategory.PERSON,
segment_ids=[0],
confidence=1.5,
)
class TestNamedEntityOccurrenceCount:
"""Tests for occurrence_count property."""
def test_occurrence_count_with_segments(self) -> None:
"""Occurrence count returns number of unique segment IDs."""
entity = NamedEntity(
text="Test",
category=EntityCategory.OTHER,
segment_ids=[0, 1, 2],
confidence=0.8,
)
assert entity.occurrence_count == 3, f"expected occurrence_count 3 for 3 segments, got {entity.occurrence_count}"
def test_occurrence_count_empty_segments(self) -> None:
"""Occurrence count returns 0 for empty segment_ids."""
entity = NamedEntity(
text="Test",
category=EntityCategory.OTHER,
segment_ids=[],
confidence=0.8,
)
assert entity.occurrence_count == 0, f"expected occurrence_count 0 for empty segments, got {entity.occurrence_count}"
def test_occurrence_count_single_segment(self) -> None:
"""Occurrence count returns 1 for single segment."""
entity = NamedEntity(
text="Test",
category=EntityCategory.OTHER,
segment_ids=[5],
confidence=0.8,
)
assert entity.occurrence_count == 1, f"expected occurrence_count 1 for single segment, got {entity.occurrence_count}"
class TestNamedEntityMergeSegments:
"""Tests for merge_segments method."""
def test_merge_segments_adds_new(self) -> None:
"""Merge segments adds new segment IDs."""
entity = NamedEntity(
text="John",
category=EntityCategory.PERSON,
segment_ids=[0, 1],
confidence=0.9,
)
entity.merge_segments([3, 4])
assert entity.segment_ids == [0, 1, 3, 4], f"expected merged segment_ids [0, 1, 3, 4], got {entity.segment_ids}"
def test_merge_segments_deduplicates(self) -> None:
"""Merge segments deduplicates overlapping IDs."""
entity = NamedEntity(
text="John",
category=EntityCategory.PERSON,
segment_ids=[0, 1, 2],
confidence=0.9,
)
entity.merge_segments([1, 2, 3])
assert entity.segment_ids == [0, 1, 2, 3], f"expected deduplicated segment_ids [0, 1, 2, 3], got {entity.segment_ids}"
def test_merge_segments_sorts(self) -> None:
"""Merge segments keeps result sorted."""
entity = NamedEntity(
text="John",
category=EntityCategory.PERSON,
segment_ids=[5, 10],
confidence=0.9,
)
entity.merge_segments([1, 3])
assert entity.segment_ids == [1, 3, 5, 10], f"expected sorted segment_ids [1, 3, 5, 10], got {entity.segment_ids}"
def test_merge_empty_segments(self) -> None:
"""Merge with empty list preserves original segments."""
entity = NamedEntity(
text="John",
category=EntityCategory.PERSON,
segment_ids=[0, 1],
confidence=0.9,
)
entity.merge_segments([])
assert entity.segment_ids == [0, 1], f"expected unchanged segment_ids [0, 1] after merging empty list, got {entity.segment_ids}"
class TestNamedEntityDefaults:
"""Tests for NamedEntity default values."""
@pytest.mark.parametrize(
("attr", "expected"),
[
pytest.param("meeting_id", None, id="meeting_id-none"),
pytest.param("segment_ids", [], id="segment_ids-empty"),
pytest.param("is_pinned", False, id="is_pinned-false"),
pytest.param("db_id", None, id="db_id-none"),
],
)
def test_named_entity_default_values(self, attr: str, expected: object) -> None:
"""NamedEntity attributes have correct default values."""
entity = NamedEntity(text="Test", category=EntityCategory.OTHER, confidence=0.5)
actual = getattr(entity, attr)
assert actual == expected, f"{attr} should default to {expected}"
def test_id_is_auto_generated(self) -> None:
"""UUID id is auto-generated."""
entity = NamedEntity(text="Test", category=EntityCategory.OTHER, confidence=0.5)
assert entity.id is not None, "id should be auto-generated"