noteflow/tests/benchmarks/test_hot_paths.py

"""Benchmark tests for NoteFlow hot paths.

These benchmarks measure the performance of frequently-called code paths
to establish baselines and detect regressions.

Run with: pytest tests/benchmarks/ --benchmark-enable
Compare: pytest tests/benchmarks/ --benchmark-compare
Save baseline: pytest tests/benchmarks/ --benchmark-save=baseline

Note: These tests are marked as slow and excluded from CI unit test runs.
Run explicitly with: pytest tests/benchmarks/ -m slow
"""

from __future__ import annotations

from datetime import UTC, datetime
from typing import TYPE_CHECKING, cast
from uuid import UUID

import numpy as np
import pytest
from numpy.typing import NDArray
from pytest_benchmark.fixture import BenchmarkFixture

from noteflow.application.services.voice_profile.service import (
    EMBEDDING_DIM,
    cosine_similarity,
    merge_embeddings,
)
from noteflow.config.constants import DEFAULT_SAMPLE_RATE
from noteflow.domain.entities.segment import Segment, WordTiming
from noteflow.domain.value_objects import AudioSource, MeetingId, SpeakerRole
from noteflow.grpc.mixins.converters import (
    SegmentBuildParams,
    create_segment_from_asr,
    log_entry_to_proto,
    metrics_to_proto,
    segment_to_proto_update,
)
from noteflow.grpc.proto import noteflow_pb2
from noteflow.infrastructure.asr.dto import AsrResult, WordTiming as AsrWordTiming
from noteflow.infrastructure.asr.segmenter import AudioSegment, Segmenter, SegmenterConfig
from noteflow.infrastructure.asr.streaming_vad import EnergyVad, StreamingVad
from noteflow.infrastructure.audio.levels import RmsLevelProvider, compute_rms
from noteflow.infrastructure.audio.partial_buffer import PartialAudioBuffer
from noteflow.infrastructure.logging.log_buffer import LogEntry
from noteflow.infrastructure.metrics.collector import PerformanceMetrics

# Mark all tests in this module as slow (excluded from CI unit tests)
pytestmark = pytest.mark.slow

if TYPE_CHECKING:
    pass


def _run_benchmark(benchmark: BenchmarkFixture, func: object, *args: object) -> object:
    """Run benchmark and return result as object.

    This helper captures the unknown return type from pytest-benchmark and
    explicitly returns it as object, allowing downstream casts to work properly.

    The cast is required because BenchmarkFixture.__call__ is untyped in
    pytest-benchmark (no type stubs available).
    """
    # cast required: pytest-benchmark lacks type stubs
    return cast(object, benchmark(func, *args))


def typed_benchmark[T](
    benchmark: BenchmarkFixture, expected_type: type[T], func: object, *args: object
) -> T:
    """Run benchmark and return typed result for simple types (float, bool, int).

    BenchmarkFixture.__call__ is untyped but returns the result of func(*args).
    This wrapper provides explicit typing via the expected_type parameter.

    Args:
        benchmark: The pytest-benchmark fixture
        expected_type: The type that func returns (used for type inference)
        func: The function to benchmark
        *args: Arguments to pass to func
    """
    return cast(T, _run_benchmark(benchmark, func, *args))


def benchmark_array(
    benchmark: BenchmarkFixture, func: object, *args: object
) -> NDArray[np.float32]:
    """Run benchmark for functions returning float32 arrays.

    BenchmarkFixture.__call__ is untyped. This wrapper returns a properly typed NDArray.

    Args:
        benchmark: The pytest-benchmark fixture
        func: The function to benchmark (must return NDArray[np.float32])
        *args: Arguments to pass to func
    """
    return cast(NDArray[np.float32], _run_benchmark(benchmark, func, *args))


def benchmark_list(benchmark: BenchmarkFixture, func: object, *args: object) -> list[AudioSegment]:
    """Run benchmark for functions returning list of AudioSegment.

    BenchmarkFixture.__call__ is untyped. This wrapper returns a properly typed list.

    Args:
        benchmark: The pytest-benchmark fixture
        func: The function to benchmark (must return list[AudioSegment])
        *args: Arguments to pass to func
    """
    return cast(list[AudioSegment], _run_benchmark(benchmark, func, *args))


def benchmark_array_list(
    benchmark: BenchmarkFixture, func: object, *args: object
) -> list[NDArray[np.float32]]:
    """Run benchmark for functions returning list of float32 arrays.

    BenchmarkFixture.__call__ is untyped. This wrapper returns a properly typed list.

    Args:
        benchmark: The pytest-benchmark fixture
        func: The function to benchmark (must return list[NDArray[np.float32]])
        *args: Arguments to pass to func
    """
    return cast(list[NDArray[np.float32]], _run_benchmark(benchmark, func, *args))


def benchmark_float_list(benchmark: BenchmarkFixture, func: object, *args: object) -> list[float]:
    """Run benchmark for functions returning list of floats."""
    return cast(list[float], _run_benchmark(benchmark, func, *args))


# Standard audio chunk size (100ms at 16kHz)
CHUNK_SIZE = 1600
SAMPLE_RATE = DEFAULT_SAMPLE_RATE
# Typical partial buffer holds ~2s of audio (20 chunks x 100ms)
TYPICAL_PARTIAL_CHUNKS = 20
# dB floor for silence detection
DB_FLOOR = -60
MEETING_UUID = UUID("00000000-0000-0000-0000-000000000001")
MEETING_ID = MeetingId(MEETING_UUID)
ASR_SEGMENT_ID = 7
SEGMENT_START_OFFSET = 1.25
VOICE_EMBEDDING_NOISE = 0.01
VOICE_EMBEDDING_EXISTING_COUNT = 3


@pytest.fixture
def audio_chunk() -> NDArray[np.float32]:
    """Generate a realistic audio chunk (100ms at 16kHz)."""
    return np.random.randn(CHUNK_SIZE).astype(np.float32) * 0.1


@pytest.fixture
def speech_chunk() -> NDArray[np.float32]:
    """Generate a speech-like audio chunk with higher energy."""
    return np.random.randn(CHUNK_SIZE).astype(np.float32) * 0.5


@pytest.fixture
def silence_chunk() -> NDArray[np.float32]:
    """Generate a silence chunk with very low energy."""
    return np.random.randn(CHUNK_SIZE).astype(np.float32) * 0.001


@pytest.fixture
def segmenter() -> Segmenter:
    """Create a segmenter with default config."""
    return Segmenter(config=SegmenterConfig(sample_rate=SAMPLE_RATE))


@pytest.fixture
def energy_vad() -> EnergyVad:
    """Create an energy VAD instance."""
    return EnergyVad()


@pytest.fixture
def streaming_vad() -> StreamingVad:
    """Create a streaming VAD instance."""
    return StreamingVad()


@pytest.fixture
def rms_provider() -> RmsLevelProvider:
    """Create an RMS level provider."""
    return RmsLevelProvider()


@pytest.fixture
def segment_with_words() -> Segment:
    """Create a segment with word timings for converter benchmarks."""
    words = [
        WordTiming(word="hello", start_time=0.0, end_time=0.25, probability=0.95),
        WordTiming(word="world", start_time=0.25, end_time=0.5, probability=0.92),
        WordTiming(word="from", start_time=0.5, end_time=0.7, probability=0.9),
        WordTiming(word="noteflow", start_time=0.7, end_time=1.0, probability=0.93),
    ]
    return Segment(
        segment_id=42,
        text="hello world from noteflow",
        start_time=0.0,
        end_time=1.0,
        words=words,
    )


@pytest.fixture
def asr_result() -> AsrResult:
    """Create an ASR result for segment build benchmarks."""
    words = (
        AsrWordTiming(word="hello", start=0.0, end=0.25, probability=0.95),
        AsrWordTiming(word="world", start=0.25, end=0.5, probability=0.92),
        AsrWordTiming(word="from", start=0.5, end=0.7, probability=0.9),
        AsrWordTiming(word="noteflow", start=0.7, end=1.0, probability=0.93),
    )
    return AsrResult(
        text="hello world from noteflow",
        start=0.0,
        end=1.0,
        words=words,
        language="en",
        language_probability=0.98,
        avg_logprob=-0.2,
        no_speech_prob=0.01,
    )


@pytest.fixture
def segment_build_params() -> SegmentBuildParams:
    """Create segment build parameters for ASR conversion benchmarks."""
    return SegmentBuildParams(
        meeting_id=MEETING_ID,
        segment_id=ASR_SEGMENT_ID,
        segment_start_time=SEGMENT_START_OFFSET,
        audio_source=AudioSource.MIC,
    )


@pytest.fixture
def voice_embedding_pair() -> tuple[list[float], list[float]]:
    """Create two similar embeddings for voice profile benchmarks."""
    rng = np.random.default_rng(42)
    base = rng.standard_normal(EMBEDDING_DIM).astype(np.float32)
    noise = rng.standard_normal(EMBEDDING_DIM).astype(np.float32) * VOICE_EMBEDDING_NOISE
    return base.tolist(), (base + noise).tolist()


@pytest.fixture
def voice_embedding_merge_inputs(
    voice_embedding_pair: tuple[list[float], list[float]],
) -> tuple[list[float], list[float], int]:
    """Create inputs for merge_embeddings benchmark."""
    existing, new = voice_embedding_pair
    return existing, new, VOICE_EMBEDDING_EXISTING_COUNT


@pytest.fixture
def performance_metrics() -> PerformanceMetrics:
    """Create sample metrics for converter benchmarks."""
    return PerformanceMetrics(
        timestamp=1_700_000_000.0,
        cpu_percent=23.5,
        memory_percent=61.2,
        memory_mb=8192.0,
        disk_percent=44.0,
        network_bytes_sent=120_000,
        network_bytes_recv=98_000,
        process_memory_mb=512.0,
        active_connections=12,
    )


@pytest.fixture
def log_entry() -> LogEntry:
    """Create a sample log entry for converter benchmarks."""
    return LogEntry(
        timestamp=datetime.now(tz=UTC),
        level="info",
        source="bench",
        message="Segment persisted",
        details={"meeting_id": "test"},
        trace_id="trace",
        span_id="span",
        event_type="segment.added",
        operation_id="op",
        entity_id="entity",
    )


class TestComputeRmsBenchmark:
    """Benchmark tests for RMS computation (called 36,000x/hour)."""

    def test_compute_rms_typical_chunk(
        self, benchmark: BenchmarkFixture, audio_chunk: NDArray[np.float32]
    ) -> None:
        """Benchmark RMS computation on typical 100ms chunk."""
        result = typed_benchmark(benchmark, float, compute_rms, audio_chunk)
        assert 0 <= result <= 1, "RMS should be in valid range"

    def test_compute_rms_silence(
        self, benchmark: BenchmarkFixture, silence_chunk: NDArray[np.float32]
    ) -> None:
        """Benchmark RMS computation on silence."""
        result = typed_benchmark(benchmark, float, compute_rms, silence_chunk)
        assert result < 0.01, "Silence should have very low RMS"

    def test_compute_rms_speech(
        self, benchmark: BenchmarkFixture, speech_chunk: NDArray[np.float32]
    ) -> None:
        """Benchmark RMS computation on speech-like audio."""
        result = typed_benchmark(benchmark, float, compute_rms, speech_chunk)
        assert result > 0.1, "Speech should have higher RMS"


class TestVadBenchmark:
    """Benchmark tests for VAD processing (called 36,000x/hour)."""

    def test_energy_vad_process(
        self, benchmark: BenchmarkFixture, energy_vad: EnergyVad, audio_chunk: NDArray[np.float32]
    ) -> None:
        """Benchmark single EnergyVad.process() call."""
        result = typed_benchmark(benchmark, bool, energy_vad.process, audio_chunk)
        assert isinstance(result, bool), "VAD should return boolean"

    def test_streaming_vad_process_chunk(
        self,
        benchmark: BenchmarkFixture,
        streaming_vad: StreamingVad,
        audio_chunk: NDArray[np.float32],
    ) -> None:
        """Benchmark StreamingVad.process_chunk() call."""
        result = typed_benchmark(benchmark, bool, streaming_vad.process_chunk, audio_chunk)
        assert isinstance(result, bool), "VAD should return boolean"

    def test_energy_vad_speech_detection(
        self, benchmark: BenchmarkFixture, energy_vad: EnergyVad, speech_chunk: NDArray[np.float32]
    ) -> None:
        """Benchmark VAD on speech-like audio."""
        result = typed_benchmark(benchmark, bool, energy_vad.process, speech_chunk)
        assert result is True, "Should detect speech"

    def test_energy_vad_silence_detection(
        self, benchmark: BenchmarkFixture, energy_vad: EnergyVad, silence_chunk: NDArray[np.float32]
    ) -> None:
        """Benchmark VAD on silence."""
        result = typed_benchmark(benchmark, bool, energy_vad.process, silence_chunk)
        assert result is False, "Should detect silence"


class TestSegmenterBenchmark:
    """Benchmark tests for Segmenter state machine (called 36,000x/hour)."""

    def test_segmenter_idle_silence(
        self, benchmark: BenchmarkFixture, segmenter: Segmenter, silence_chunk: NDArray[np.float32]
    ) -> None:
        """Benchmark segmenter processing silence in IDLE state."""

        def process_idle() -> list[AudioSegment]:
            return list(segmenter.process_audio(silence_chunk, is_speech=False))

        result = benchmark_list(benchmark, process_idle)
        assert result == [], "No segments should be emitted in idle"

    def test_segmenter_speech_accumulation(
        self, benchmark: BenchmarkFixture, segmenter: Segmenter, speech_chunk: NDArray[np.float32]
    ) -> None:
        """Benchmark segmenter accumulating speech."""
        # First transition to SPEECH state
        list(segmenter.process_audio(speech_chunk, is_speech=True))

        def process_speech() -> list[AudioSegment]:
            return list(segmenter.process_audio(speech_chunk, is_speech=True))

        result = benchmark_list(benchmark, process_speech)
        # Should not emit unless max duration reached
        assert len(result) <= 1, "Should emit at most one segment"

    def test_segmenter_transition_idle_to_speech(
        self, benchmark: BenchmarkFixture, speech_chunk: NDArray[np.float32]
    ) -> None:
        """Benchmark state transition from IDLE to SPEECH."""

        def transition() -> list[AudioSegment]:
            seg = Segmenter(config=SegmenterConfig(sample_rate=SAMPLE_RATE))
            return list(seg.process_audio(speech_chunk, is_speech=True))

        result = benchmark_list(benchmark, transition)
        assert result == [], "Transition should not emit segment"


class TestRmsLevelProviderBenchmark:
    """Benchmark tests for RmsLevelProvider methods."""

    def test_get_rms(
        self,
        benchmark: BenchmarkFixture,
        rms_provider: RmsLevelProvider,
        audio_chunk: NDArray[np.float32],
    ) -> None:
        """Benchmark get_rms() method."""
        result = typed_benchmark(benchmark, float, rms_provider.get_rms, audio_chunk)
        assert 0 <= result <= 1, "RMS should be normalized"

    def test_get_db(
        self,
        benchmark: BenchmarkFixture,
        rms_provider: RmsLevelProvider,
        audio_chunk: NDArray[np.float32],
    ) -> None:
        """Benchmark get_db() method."""
        result = typed_benchmark(benchmark, float, rms_provider.get_db, audio_chunk)
        assert DB_FLOOR <= result <= 0, "dB should be in valid range"

    def test_rms_to_db_conversion(
        self, benchmark: BenchmarkFixture, rms_provider: RmsLevelProvider
    ) -> None:
        """Benchmark rms_to_db() conversion."""
        result = typed_benchmark(benchmark, float, rms_provider.rms_to_db, 0.5)
        assert result < 0, "Half amplitude should be negative dB"


class TestNumpyOperationsBenchmark:
    """Benchmark tests for NumPy operations used in hot paths."""

    def test_array_copy(
        self, benchmark: BenchmarkFixture, audio_chunk: NDArray[np.float32]
    ) -> None:
        """Benchmark array copy (used in partial buffer accumulation)."""
        result = benchmark_array(benchmark, audio_chunk.copy)
        assert result.shape == audio_chunk.shape, "Copy should preserve shape"

    def test_array_concatenate_small(
        self, benchmark: BenchmarkFixture, audio_chunk: NDArray[np.float32]
    ) -> None:
        """Benchmark concatenation of 5 chunks (~500ms audio)."""
        chunks = [audio_chunk.copy() for _ in range(5)]

        def concat() -> NDArray[np.float32]:
            return np.concatenate(chunks)

        result = benchmark_array(benchmark, concat)
        assert len(result) == CHUNK_SIZE * 5, "Should concatenate all chunks"

    def test_array_concatenate_large(
        self, benchmark: BenchmarkFixture, audio_chunk: NDArray[np.float32]
    ) -> None:
        """Benchmark concatenation of 20 chunks (~2s audio, typical partial)."""
        chunks = [audio_chunk.copy() for _ in range(20)]

        def concat() -> NDArray[np.float32]:
            return np.concatenate(chunks)

        result = benchmark_array(benchmark, concat)
        assert len(result) == CHUNK_SIZE * TYPICAL_PARTIAL_CHUNKS, "Should concatenate all chunks"

    def test_array_square(
        self, benchmark: BenchmarkFixture, audio_chunk: NDArray[np.float32]
    ) -> None:
        """Benchmark np.square (used in RMS calculation)."""
        result = benchmark_array(benchmark, np.square, audio_chunk)
        assert result.dtype == np.float32, "Should preserve dtype"

    def test_array_mean(
        self, benchmark: BenchmarkFixture, audio_chunk: NDArray[np.float32]
    ) -> None:
        """Benchmark np.mean (used in RMS calculation)."""
        result = typed_benchmark(benchmark, float, np.mean, audio_chunk)
        assert isinstance(result, (float, np.floating)), "Mean should be scalar"


class TestBufferOperationsBenchmark:
    """Benchmark tests for list operations used in buffers."""

    def test_list_append(
        self, benchmark: BenchmarkFixture, audio_chunk: NDArray[np.float32]
    ) -> None:
        """Benchmark list append (partial buffer accumulation)."""
        buffer: list[NDArray[np.float32]] = []

        def append() -> None:
            buffer.append(audio_chunk.copy())

        benchmark(append)
        assert buffer, "Buffer should have items"

    def test_list_clear(
        self, benchmark: BenchmarkFixture, audio_chunk: NDArray[np.float32]
    ) -> None:
        """Benchmark list clear (partial buffer clearing)."""
        # Pre-fill buffer
        buffer = [audio_chunk.copy() for _ in range(20)]

        def clear_and_refill() -> None:
            buffer.clear()
            for _ in range(TYPICAL_PARTIAL_CHUNKS):
                buffer.append(audio_chunk.copy())

        benchmark(clear_and_refill)

    def test_sum_lengths_naive(
        self, benchmark: BenchmarkFixture, audio_chunk: NDArray[np.float32]
    ) -> None:
        """Benchmark naive sum of chunk lengths (OLD segmenter pattern)."""
        chunks = [audio_chunk.copy() for _ in range(20)]

        def sum_naive() -> int:
            return sum(len(chunk) for chunk in chunks)

        result = typed_benchmark(benchmark, int, sum_naive)
        assert result == CHUNK_SIZE * TYPICAL_PARTIAL_CHUNKS, "Should sum all lengths"

    def test_cached_length(self, benchmark: BenchmarkFixture) -> None:
        """Benchmark cached length access (NEW segmenter pattern)."""
        cached_length = CHUNK_SIZE * TYPICAL_PARTIAL_CHUNKS

        def get_cached() -> int:
            return cached_length

        result = typed_benchmark(benchmark, int, get_cached)
        assert result == CHUNK_SIZE * TYPICAL_PARTIAL_CHUNKS, "Should return cached value"


class TestPartialBufferComparisonBenchmark:
    """Benchmark comparing old list-based vs new pre-allocated buffer."""

    def test_old_list_append_and_concat(
        self, benchmark: BenchmarkFixture, audio_chunk: NDArray[np.float32]
    ) -> None:
        """Benchmark OLD pattern: list append + np.concatenate (20 chunks = 2s)."""

        def old_pattern() -> NDArray[np.float32]:
            buffer: list[NDArray[np.float32]] = []
            for _ in range(TYPICAL_PARTIAL_CHUNKS):
                buffer.append(audio_chunk.copy())
            return np.concatenate(buffer)

        result = benchmark_array(benchmark, old_pattern)
        assert len(result) == CHUNK_SIZE * TYPICAL_PARTIAL_CHUNKS, "Should have all samples"

    def test_new_preallocated_buffer(
        self, benchmark: BenchmarkFixture, audio_chunk: NDArray[np.float32]
    ) -> None:
        """Benchmark NEW pattern: pre-allocated buffer (20 chunks = 2s)."""

        def new_pattern() -> NDArray[np.float32]:
            buffer = PartialAudioBuffer(sample_rate=SAMPLE_RATE)
            for _ in range(TYPICAL_PARTIAL_CHUNKS):
                buffer.append(audio_chunk)
            return buffer.get_audio()

        result = benchmark_array(benchmark, new_pattern)
        assert len(result) == CHUNK_SIZE * TYPICAL_PARTIAL_CHUNKS, "Should have all samples"

    def test_preallocated_append_only(
        self, benchmark: BenchmarkFixture, audio_chunk: NDArray[np.float32]
    ) -> None:
        """Benchmark pre-allocated buffer append only (no get_audio)."""
        buffer = PartialAudioBuffer(sample_rate=SAMPLE_RATE)

        def append_only() -> None:
            buffer.append(audio_chunk)

        benchmark(append_only)
        assert buffer.samples_buffered > 0, "Should have appended"

    def test_preallocated_get_audio_only(
        self, benchmark: BenchmarkFixture, audio_chunk: NDArray[np.float32]
    ) -> None:
        """Benchmark pre-allocated buffer get_audio only (pre-filled)."""
        buffer = PartialAudioBuffer(sample_rate=SAMPLE_RATE)
        for _ in range(TYPICAL_PARTIAL_CHUNKS):
            buffer.append(audio_chunk)

        result = benchmark_array(benchmark, buffer.get_audio)
        assert len(result) == CHUNK_SIZE * TYPICAL_PARTIAL_CHUNKS, "Should have all samples"

    def test_realistic_old_pattern_10_cycles(
        self, benchmark: BenchmarkFixture, audio_chunk: NDArray[np.float32]
    ) -> None:
        """Benchmark OLD pattern: 10 cycles of accumulate/concat/clear."""

        def old_pattern_cycles() -> list[NDArray[np.float32]]:
            results: list[NDArray[np.float32]] = []
            for _ in range(10):
                buffer: list[NDArray[np.float32]] = []
                for _ in range(TYPICAL_PARTIAL_CHUNKS):
                    buffer.append(audio_chunk.copy())
                results.append(np.concatenate(buffer))
                buffer.clear()  # Note: doesn't help much, new list created next cycle
            return results

        result = benchmark_array_list(benchmark, old_pattern_cycles)
        assert len(result) == 10, "Should have 10 results"

    def test_realistic_new_pattern_10_cycles(
        self, benchmark: BenchmarkFixture, audio_chunk: NDArray[np.float32]
    ) -> None:
        """Benchmark NEW pattern: 10 cycles with buffer reuse."""
        # Buffer created once (simulates per-meeting initialization)
        buffer = PartialAudioBuffer(sample_rate=SAMPLE_RATE)

        def new_pattern_cycles() -> list[NDArray[np.float32]]:
            results: list[NDArray[np.float32]] = []
            for _ in range(10):
                for _ in range(TYPICAL_PARTIAL_CHUNKS):
                    buffer.append(audio_chunk)
                results.append(buffer.get_audio())
                buffer.clear()  # O(1) pointer reset, buffer reused
            return results

        result = benchmark_array_list(benchmark, new_pattern_cycles)
        assert len(result) == 10, "Should have 10 results"


class TestAsrSegmentBuildBenchmarks:
    """Benchmark ASR-to-segment conversion path."""

    def test_create_segment_from_asr(
        self,
        benchmark: BenchmarkFixture,
        asr_result: AsrResult,
        segment_build_params: SegmentBuildParams,
    ) -> None:
        """Benchmark create_segment_from_asr conversion."""
        result = typed_benchmark(
            benchmark,
            Segment,
            create_segment_from_asr,
            segment_build_params,
            asr_result,
        )
        assert result.segment_id == ASR_SEGMENT_ID, "Segment ID should match build params"
        assert result.start_time == SEGMENT_START_OFFSET, "Start time should include offset"
        assert result.audio_source == AudioSource.MIC, "Audio source should be preserved"
        assert result.speaker_role == SpeakerRole.USER, "Speaker role should map from MIC"


class TestGrpcConverterBenchmarks:
    """Benchmark gRPC converter hot paths."""

    def test_segment_to_proto_update(
        self,
        benchmark: BenchmarkFixture,
        segment_with_words: Segment,
    ) -> None:
        """Benchmark segment_to_proto_update conversion."""
        result = typed_benchmark(
            benchmark,
            noteflow_pb2.TranscriptUpdate,
            segment_to_proto_update,
            "meeting_id",
            segment_with_words,
        )
        assert result.segment.segment_id == segment_with_words.segment_id, "segment_id should match"

    def test_metrics_to_proto(
        self,
        benchmark: BenchmarkFixture,
        performance_metrics: PerformanceMetrics,
    ) -> None:
        """Benchmark metrics_to_proto conversion."""
        result = typed_benchmark(
            benchmark,
            noteflow_pb2.PerformanceMetricsPoint,
            metrics_to_proto,
            performance_metrics,
        )
        assert result.cpu_percent >= 0, "CPU percent should be non-negative"

    def test_log_entry_to_proto(
        self,
        benchmark: BenchmarkFixture,
        log_entry: LogEntry,
    ) -> None:
        """Benchmark log_entry_to_proto conversion."""
        result = typed_benchmark(
            benchmark,
            noteflow_pb2.LogEntryProto,
            log_entry_to_proto,
            log_entry,
        )
        assert result.message, "Log message should be populated"


class TestVoiceProfileBenchmarks:
    """Benchmark voice profile similarity and merge operations."""

    def test_cosine_similarity(
        self,
        benchmark: BenchmarkFixture,
        voice_embedding_pair: tuple[list[float], list[float]],
    ) -> None:
        """Benchmark cosine similarity for voice profile embeddings."""
        existing, new = voice_embedding_pair
        result = typed_benchmark(benchmark, float, cosine_similarity, existing, new)
        assert 0.0 <= result <= 1.0, "Similarity should be normalized"
        assert result > 0.8, "Similar embeddings should yield high similarity"

    def test_merge_embeddings(
        self,
        benchmark: BenchmarkFixture,
        voice_embedding_merge_inputs: tuple[list[float], list[float], int],
    ) -> None:
        """Benchmark merge_embeddings for voice profile updates."""
        existing, new, count = voice_embedding_merge_inputs
        result = benchmark_float_list(benchmark, merge_embeddings, existing, new, count)
        assert len(result) == EMBEDDING_DIM, "Merged embedding should preserve dimension"
        norm = float(np.linalg.norm(np.array(result, dtype=np.float32)))
        assert 0.99 <= norm <= 1.01, "Merged embedding should remain normalized"