noteflow/tests/domain/test_named_entity.py

"""Tests for NamedEntity domain entity."""

from __future__ import annotations

from uuid import uuid4

import pytest

from noteflow.domain.entities.named_entity import EntityCategory, NamedEntity
from noteflow.domain.value_objects import MeetingId


class TestEntityCategory:
    """Tests for EntityCategory enum."""

    @pytest.mark.parametrize(
        ("value", "expected"),
        [
            ("person", EntityCategory.PERSON),
            ("company", EntityCategory.COMPANY),
            ("product", EntityCategory.PRODUCT),
            ("technical", EntityCategory.TECHNICAL),
            ("acronym", EntityCategory.ACRONYM),
            ("location", EntityCategory.LOCATION),
            ("date", EntityCategory.DATE),
            ("other", EntityCategory.OTHER),
        ],
    )
    def test_from_string_valid_values(
        self, value: str, expected: EntityCategory
    ) -> None:
        """Convert lowercase string to EntityCategory."""
        result = EntityCategory.from_string(value)
        assert result == expected, f"from_string('{value}') should return {expected}, got {result}"

    @pytest.mark.parametrize("value", ["PERSON", "Person", "COMPANY"])
    def test_from_string_case_insensitive(self, value: str) -> None:
        """Convert mixed case string to EntityCategory."""
        result = EntityCategory.from_string(value)
        assert result in EntityCategory, f"from_string('{value}') should return valid EntityCategory, got {result}"

    def test_from_string_invalid_raises(self) -> None:
        """Invalid category string raises ValueError."""
        with pytest.raises(ValueError, match="Invalid entity category"):
            EntityCategory.from_string("invalid_category")


class TestNamedEntityValidation:
    """Tests for NamedEntity validation in __post_init__."""

    @pytest.mark.parametrize("confidence", [-0.1, 1.1, 2.0, -1.0])
    def test_invalid_confidence_raises(self, confidence: float) -> None:
        """Confidence outside 0-1 range raises ValueError."""
        with pytest.raises(ValueError, match="confidence must be between 0 and 1"):
            NamedEntity(
                text="John",
                category=EntityCategory.PERSON,
                confidence=confidence,
            )

    @pytest.mark.parametrize("confidence", [0.0, 0.5, 1.0, 0.95])
    def test_valid_confidence_boundaries(self, confidence: float) -> None:
        """Confidence at valid boundaries is accepted."""
        entity = NamedEntity(
            text="John",
            category=EntityCategory.PERSON,
            confidence=confidence,
        )
        assert entity.confidence == confidence, f"expected confidence {confidence}, got {entity.confidence}"

    def test_auto_computes_normalized_text(self) -> None:
        """Normalized text is auto-computed from text when not provided."""
        entity = NamedEntity(
            text="John SMITH",
            category=EntityCategory.PERSON,
            confidence=0.9,
        )
        assert entity.normalized_text == "john smith", f"expected normalized_text 'john smith', got '{entity.normalized_text}'"

    def test_preserves_explicit_normalized_text(self) -> None:
        """Explicit normalized_text is preserved."""
        entity = NamedEntity(
            text="John Smith",
            normalized_text="custom_normalization",
            category=EntityCategory.PERSON,
            confidence=0.9,
        )
        assert entity.normalized_text == "custom_normalization", f"expected explicit normalized_text 'custom_normalization', got '{entity.normalized_text}'"


class TestNamedEntityCreate:
    """Tests for NamedEntity.create() factory method."""

    def test_create_with_valid_input(self) -> None:
        """Create entity with valid input returns properly initialized entity."""
        meeting_id = MeetingId(uuid4())
        entity = NamedEntity.create(
            text="Acme Corporation",
            category=EntityCategory.COMPANY,
            segment_ids=[0, 2, 1],
            confidence=0.85,
            meeting_id=meeting_id,
        )

        assert entity.text == "Acme Corporation", "Text should be preserved"
        assert entity.normalized_text == "acme corporation", "Normalized text should be lowercase"
        assert entity.category == EntityCategory.COMPANY, "Category should be preserved"
        assert entity.segment_ids == [0, 1, 2], "Segment IDs should be sorted"
        assert entity.confidence == 0.85, "Confidence should be preserved"
        assert entity.meeting_id == meeting_id, "Meeting ID should be preserved"

    def test_create_strips_whitespace(self) -> None:
        """Create strips leading/trailing whitespace from text."""
        entity = NamedEntity.create(
            text="  John Smith  ",
            category=EntityCategory.PERSON,
            segment_ids=[0],
            confidence=0.9,
        )
        assert entity.text == "John Smith", f"expected stripped text 'John Smith', got '{entity.text}'"
        assert entity.normalized_text == "john smith", f"expected normalized_text 'john smith', got '{entity.normalized_text}'"

    def test_create_deduplicates_segment_ids(self) -> None:
        """Create deduplicates and sorts segment IDs."""
        entity = NamedEntity.create(
            text="Test",
            category=EntityCategory.OTHER,
            segment_ids=[3, 1, 1, 3, 2],
            confidence=0.8,
        )
        assert entity.segment_ids == [1, 2, 3], f"expected deduplicated/sorted segment_ids [1, 2, 3], got {entity.segment_ids}"

    @pytest.mark.parametrize(
        "invalid_text",
        [
            pytest.param("", id="empty-text"),
            pytest.param("   ", id="whitespace-only"),
            pytest.param("\t\n", id="tabs-and-newlines"),
        ],
    )
    def test_create_invalid_text_raises(self, invalid_text: str) -> None:
        """Create with empty or whitespace-only text raises ValueError."""
        with pytest.raises(ValueError, match="Entity text cannot be empty"):
            NamedEntity.create(
                text=invalid_text,
                category=EntityCategory.PERSON,
                segment_ids=[0],
                confidence=0.9,
            )

    def test_create_invalid_confidence_raises(self) -> None:
        """Create with invalid confidence raises ValueError."""
        with pytest.raises(ValueError, match="confidence must be between 0 and 1"):
            NamedEntity.create(
                text="John",
                category=EntityCategory.PERSON,
                segment_ids=[0],
                confidence=1.5,
            )


class TestNamedEntityOccurrenceCount:
    """Tests for occurrence_count property."""

    def test_occurrence_count_with_segments(self) -> None:
        """Occurrence count returns number of unique segment IDs."""
        entity = NamedEntity(
            text="Test",
            category=EntityCategory.OTHER,
            segment_ids=[0, 1, 2],
            confidence=0.8,
        )
        assert entity.occurrence_count == 3, f"expected occurrence_count 3 for 3 segments, got {entity.occurrence_count}"

    def test_occurrence_count_empty_segments(self) -> None:
        """Occurrence count returns 0 for empty segment_ids."""
        entity = NamedEntity(
            text="Test",
            category=EntityCategory.OTHER,
            segment_ids=[],
            confidence=0.8,
        )
        assert entity.occurrence_count == 0, f"expected occurrence_count 0 for empty segments, got {entity.occurrence_count}"

    def test_occurrence_count_single_segment(self) -> None:
        """Occurrence count returns 1 for single segment."""
        entity = NamedEntity(
            text="Test",
            category=EntityCategory.OTHER,
            segment_ids=[5],
            confidence=0.8,
        )
        assert entity.occurrence_count == 1, f"expected occurrence_count 1 for single segment, got {entity.occurrence_count}"


class TestNamedEntityMergeSegments:
    """Tests for merge_segments method."""

    def test_merge_segments_adds_new(self) -> None:
        """Merge segments adds new segment IDs."""
        entity = NamedEntity(
            text="John",
            category=EntityCategory.PERSON,
            segment_ids=[0, 1],
            confidence=0.9,
        )
        entity.merge_segments([3, 4])
        assert entity.segment_ids == [0, 1, 3, 4], f"expected merged segment_ids [0, 1, 3, 4], got {entity.segment_ids}"

    def test_merge_segments_deduplicates(self) -> None:
        """Merge segments deduplicates overlapping IDs."""
        entity = NamedEntity(
            text="John",
            category=EntityCategory.PERSON,
            segment_ids=[0, 1, 2],
            confidence=0.9,
        )
        entity.merge_segments([1, 2, 3])
        assert entity.segment_ids == [0, 1, 2, 3], f"expected deduplicated segment_ids [0, 1, 2, 3], got {entity.segment_ids}"

    def test_merge_segments_sorts(self) -> None:
        """Merge segments keeps result sorted."""
        entity = NamedEntity(
            text="John",
            category=EntityCategory.PERSON,
            segment_ids=[5, 10],
            confidence=0.9,
        )
        entity.merge_segments([1, 3])
        assert entity.segment_ids == [1, 3, 5, 10], f"expected sorted segment_ids [1, 3, 5, 10], got {entity.segment_ids}"

    def test_merge_empty_segments(self) -> None:
        """Merge with empty list preserves original segments."""
        entity = NamedEntity(
            text="John",
            category=EntityCategory.PERSON,
            segment_ids=[0, 1],
            confidence=0.9,
        )
        entity.merge_segments([])
        assert entity.segment_ids == [0, 1], f"expected unchanged segment_ids [0, 1] after merging empty list, got {entity.segment_ids}"


class TestNamedEntityDefaults:
    """Tests for NamedEntity default values."""

    @pytest.mark.parametrize(
        ("attr", "expected"),
        [
            pytest.param("meeting_id", None, id="meeting_id-none"),
            pytest.param("segment_ids", [], id="segment_ids-empty"),
            pytest.param("is_pinned", False, id="is_pinned-false"),
            pytest.param("db_id", None, id="db_id-none"),
        ],
    )
    def test_named_entity_default_values(self, attr: str, expected: object) -> None:
        """NamedEntity attributes have correct default values."""
        entity = NamedEntity(text="Test", category=EntityCategory.OTHER, confidence=0.5)
        actual = getattr(entity, attr)
        assert actual == expected, f"{attr} should default to {expected}"

    def test_id_is_auto_generated(self) -> None:
        """UUID id is auto-generated."""
        entity = NamedEntity(text="Test", category=EntityCategory.OTHER, confidence=0.5)
        assert entity.id is not None, "id should be auto-generated"