Files
noteflow/tests/benchmarks/test_hot_paths.py
Travis Vasceannie b11633192a
Some checks failed
CI / test-python (push) Failing after 22m14s
CI / test-rust (push) Has been cancelled
CI / test-typescript (push) Has been cancelled
deps
2026-01-24 21:31:58 +00:00

712 lines
26 KiB
Python

"""Benchmark tests for NoteFlow hot paths.
These benchmarks measure the performance of frequently-called code paths
to establish baselines and detect regressions.
Run with: pytest tests/benchmarks/ --benchmark-enable
Compare: pytest tests/benchmarks/ --benchmark-compare
Save baseline: pytest tests/benchmarks/ --benchmark-save=baseline
Note: These tests are marked as slow and excluded from CI unit test runs.
Run explicitly with: pytest tests/benchmarks/ -m slow
"""
from __future__ import annotations
from datetime import UTC, datetime
from typing import TYPE_CHECKING, cast
from uuid import UUID
import numpy as np
import pytest
from numpy.typing import NDArray
from pytest_benchmark.fixture import BenchmarkFixture
from noteflow.application.services.voice_profile.service import (
EMBEDDING_DIM,
cosine_similarity,
merge_embeddings,
)
from noteflow.config.constants import DEFAULT_SAMPLE_RATE
from noteflow.domain.entities.segment import Segment, WordTiming
from noteflow.domain.value_objects import AudioSource, MeetingId, SpeakerRole
from noteflow.grpc.mixins.converters import (
SegmentBuildParams,
create_segment_from_asr,
log_entry_to_proto,
metrics_to_proto,
segment_to_proto_update,
)
from noteflow.grpc.proto import noteflow_pb2
from noteflow.infrastructure.asr.dto import AsrResult, WordTiming as AsrWordTiming
from noteflow.infrastructure.asr.segmenter import AudioSegment, Segmenter, SegmenterConfig
from noteflow.infrastructure.asr.streaming_vad import EnergyVad, StreamingVad
from noteflow.infrastructure.audio.levels import RmsLevelProvider, compute_rms
from noteflow.infrastructure.audio.partial_buffer import PartialAudioBuffer
from noteflow.infrastructure.logging.log_buffer import LogEntry
from noteflow.infrastructure.metrics.collector import PerformanceMetrics
# Mark all tests in this module as slow (excluded from CI unit tests)
pytestmark = pytest.mark.slow
if TYPE_CHECKING:
pass
def _run_benchmark(benchmark: BenchmarkFixture, func: object, *args: object) -> object:
"""Run benchmark and return result as object.
This helper captures the unknown return type from pytest-benchmark and
explicitly returns it as object, allowing downstream casts to work properly.
The cast is required because BenchmarkFixture.__call__ is untyped in
pytest-benchmark (no type stubs available).
"""
# cast required: pytest-benchmark lacks type stubs
return cast(object, benchmark(func, *args))
def typed_benchmark[T](
benchmark: BenchmarkFixture, expected_type: type[T], func: object, *args: object
) -> T:
"""Run benchmark and return typed result for simple types (float, bool, int).
BenchmarkFixture.__call__ is untyped but returns the result of func(*args).
This wrapper provides explicit typing via the expected_type parameter.
Args:
benchmark: The pytest-benchmark fixture
expected_type: The type that func returns (used for type inference)
func: The function to benchmark
*args: Arguments to pass to func
"""
return cast(T, _run_benchmark(benchmark, func, *args))
def benchmark_array(
benchmark: BenchmarkFixture, func: object, *args: object
) -> NDArray[np.float32]:
"""Run benchmark for functions returning float32 arrays.
BenchmarkFixture.__call__ is untyped. This wrapper returns a properly typed NDArray.
Args:
benchmark: The pytest-benchmark fixture
func: The function to benchmark (must return NDArray[np.float32])
*args: Arguments to pass to func
"""
return cast(NDArray[np.float32], _run_benchmark(benchmark, func, *args))
def benchmark_list(benchmark: BenchmarkFixture, func: object, *args: object) -> list[AudioSegment]:
"""Run benchmark for functions returning list of AudioSegment.
BenchmarkFixture.__call__ is untyped. This wrapper returns a properly typed list.
Args:
benchmark: The pytest-benchmark fixture
func: The function to benchmark (must return list[AudioSegment])
*args: Arguments to pass to func
"""
return cast(list[AudioSegment], _run_benchmark(benchmark, func, *args))
def benchmark_array_list(
benchmark: BenchmarkFixture, func: object, *args: object
) -> list[NDArray[np.float32]]:
"""Run benchmark for functions returning list of float32 arrays.
BenchmarkFixture.__call__ is untyped. This wrapper returns a properly typed list.
Args:
benchmark: The pytest-benchmark fixture
func: The function to benchmark (must return list[NDArray[np.float32]])
*args: Arguments to pass to func
"""
return cast(list[NDArray[np.float32]], _run_benchmark(benchmark, func, *args))
def benchmark_float_list(benchmark: BenchmarkFixture, func: object, *args: object) -> list[float]:
"""Run benchmark for functions returning list of floats."""
return cast(list[float], _run_benchmark(benchmark, func, *args))
# Standard audio chunk size (100ms at 16kHz)
CHUNK_SIZE = 1600
SAMPLE_RATE = DEFAULT_SAMPLE_RATE
# Typical partial buffer holds ~2s of audio (20 chunks x 100ms)
TYPICAL_PARTIAL_CHUNKS = 20
# dB floor for silence detection
DB_FLOOR = -60
MEETING_UUID = UUID("00000000-0000-0000-0000-000000000001")
MEETING_ID = MeetingId(MEETING_UUID)
ASR_SEGMENT_ID = 7
SEGMENT_START_OFFSET = 1.25
VOICE_EMBEDDING_NOISE = 0.01
VOICE_EMBEDDING_EXISTING_COUNT = 3
@pytest.fixture
def audio_chunk() -> NDArray[np.float32]:
"""Generate a realistic audio chunk (100ms at 16kHz)."""
return np.random.randn(CHUNK_SIZE).astype(np.float32) * 0.1
@pytest.fixture
def speech_chunk() -> NDArray[np.float32]:
"""Generate a speech-like audio chunk with higher energy."""
return np.random.randn(CHUNK_SIZE).astype(np.float32) * 0.5
@pytest.fixture
def silence_chunk() -> NDArray[np.float32]:
"""Generate a silence chunk with very low energy."""
return np.random.randn(CHUNK_SIZE).astype(np.float32) * 0.001
@pytest.fixture
def segmenter() -> Segmenter:
"""Create a segmenter with default config."""
return Segmenter(config=SegmenterConfig(sample_rate=SAMPLE_RATE))
@pytest.fixture
def energy_vad() -> EnergyVad:
"""Create an energy VAD instance."""
return EnergyVad()
@pytest.fixture
def streaming_vad() -> StreamingVad:
"""Create a streaming VAD instance."""
return StreamingVad()
@pytest.fixture
def rms_provider() -> RmsLevelProvider:
"""Create an RMS level provider."""
return RmsLevelProvider()
@pytest.fixture
def segment_with_words() -> Segment:
"""Create a segment with word timings for converter benchmarks."""
words = [
WordTiming(word="hello", start_time=0.0, end_time=0.25, probability=0.95),
WordTiming(word="world", start_time=0.25, end_time=0.5, probability=0.92),
WordTiming(word="from", start_time=0.5, end_time=0.7, probability=0.9),
WordTiming(word="noteflow", start_time=0.7, end_time=1.0, probability=0.93),
]
return Segment(
segment_id=42,
text="hello world from noteflow",
start_time=0.0,
end_time=1.0,
words=words,
)
@pytest.fixture
def asr_result() -> AsrResult:
"""Create an ASR result for segment build benchmarks."""
words = (
AsrWordTiming(word="hello", start=0.0, end=0.25, probability=0.95),
AsrWordTiming(word="world", start=0.25, end=0.5, probability=0.92),
AsrWordTiming(word="from", start=0.5, end=0.7, probability=0.9),
AsrWordTiming(word="noteflow", start=0.7, end=1.0, probability=0.93),
)
return AsrResult(
text="hello world from noteflow",
start=0.0,
end=1.0,
words=words,
language="en",
language_probability=0.98,
avg_logprob=-0.2,
no_speech_prob=0.01,
)
@pytest.fixture
def segment_build_params() -> SegmentBuildParams:
"""Create segment build parameters for ASR conversion benchmarks."""
return SegmentBuildParams(
meeting_id=MEETING_ID,
segment_id=ASR_SEGMENT_ID,
segment_start_time=SEGMENT_START_OFFSET,
audio_source=AudioSource.MIC,
)
@pytest.fixture
def voice_embedding_pair() -> tuple[list[float], list[float]]:
"""Create two similar embeddings for voice profile benchmarks."""
rng = np.random.default_rng(42)
base = rng.standard_normal(EMBEDDING_DIM).astype(np.float32)
noise = rng.standard_normal(EMBEDDING_DIM).astype(np.float32) * VOICE_EMBEDDING_NOISE
return base.tolist(), (base + noise).tolist()
@pytest.fixture
def voice_embedding_merge_inputs(
voice_embedding_pair: tuple[list[float], list[float]],
) -> tuple[list[float], list[float], int]:
"""Create inputs for merge_embeddings benchmark."""
existing, new = voice_embedding_pair
return existing, new, VOICE_EMBEDDING_EXISTING_COUNT
@pytest.fixture
def performance_metrics() -> PerformanceMetrics:
"""Create sample metrics for converter benchmarks."""
return PerformanceMetrics(
timestamp=1_700_000_000.0,
cpu_percent=23.5,
memory_percent=61.2,
memory_mb=8192.0,
disk_percent=44.0,
network_bytes_sent=120_000,
network_bytes_recv=98_000,
process_memory_mb=512.0,
active_connections=12,
)
@pytest.fixture
def log_entry() -> LogEntry:
"""Create a sample log entry for converter benchmarks."""
return LogEntry(
timestamp=datetime.now(tz=UTC),
level="info",
source="bench",
message="Segment persisted",
details={"meeting_id": "test"},
trace_id="trace",
span_id="span",
event_type="segment.added",
operation_id="op",
entity_id="entity",
)
class TestComputeRmsBenchmark:
"""Benchmark tests for RMS computation (called 36,000x/hour)."""
def test_compute_rms_typical_chunk(
self, benchmark: BenchmarkFixture, audio_chunk: NDArray[np.float32]
) -> None:
"""Benchmark RMS computation on typical 100ms chunk."""
result = typed_benchmark(benchmark, float, compute_rms, audio_chunk)
assert 0 <= result <= 1, "RMS should be in valid range"
def test_compute_rms_silence(
self, benchmark: BenchmarkFixture, silence_chunk: NDArray[np.float32]
) -> None:
"""Benchmark RMS computation on silence."""
result = typed_benchmark(benchmark, float, compute_rms, silence_chunk)
assert result < 0.01, "Silence should have very low RMS"
def test_compute_rms_speech(
self, benchmark: BenchmarkFixture, speech_chunk: NDArray[np.float32]
) -> None:
"""Benchmark RMS computation on speech-like audio."""
result = typed_benchmark(benchmark, float, compute_rms, speech_chunk)
assert result > 0.1, "Speech should have higher RMS"
class TestVadBenchmark:
"""Benchmark tests for VAD processing (called 36,000x/hour)."""
def test_energy_vad_process(
self, benchmark: BenchmarkFixture, energy_vad: EnergyVad, audio_chunk: NDArray[np.float32]
) -> None:
"""Benchmark single EnergyVad.process() call."""
result = typed_benchmark(benchmark, bool, energy_vad.process, audio_chunk)
assert isinstance(result, bool), "VAD should return boolean"
def test_streaming_vad_process_chunk(
self,
benchmark: BenchmarkFixture,
streaming_vad: StreamingVad,
audio_chunk: NDArray[np.float32],
) -> None:
"""Benchmark StreamingVad.process_chunk() call."""
result = typed_benchmark(benchmark, bool, streaming_vad.process_chunk, audio_chunk)
assert isinstance(result, bool), "VAD should return boolean"
def test_energy_vad_speech_detection(
self, benchmark: BenchmarkFixture, energy_vad: EnergyVad, speech_chunk: NDArray[np.float32]
) -> None:
"""Benchmark VAD on speech-like audio."""
result = typed_benchmark(benchmark, bool, energy_vad.process, speech_chunk)
assert result is True, "Should detect speech"
def test_energy_vad_silence_detection(
self, benchmark: BenchmarkFixture, energy_vad: EnergyVad, silence_chunk: NDArray[np.float32]
) -> None:
"""Benchmark VAD on silence."""
result = typed_benchmark(benchmark, bool, energy_vad.process, silence_chunk)
assert result is False, "Should detect silence"
class TestSegmenterBenchmark:
"""Benchmark tests for Segmenter state machine (called 36,000x/hour)."""
def test_segmenter_idle_silence(
self, benchmark: BenchmarkFixture, segmenter: Segmenter, silence_chunk: NDArray[np.float32]
) -> None:
"""Benchmark segmenter processing silence in IDLE state."""
def process_idle() -> list[AudioSegment]:
return list(segmenter.process_audio(silence_chunk, is_speech=False))
result = benchmark_list(benchmark, process_idle)
assert result == [], "No segments should be emitted in idle"
def test_segmenter_speech_accumulation(
self, benchmark: BenchmarkFixture, segmenter: Segmenter, speech_chunk: NDArray[np.float32]
) -> None:
"""Benchmark segmenter accumulating speech."""
# First transition to SPEECH state
list(segmenter.process_audio(speech_chunk, is_speech=True))
def process_speech() -> list[AudioSegment]:
return list(segmenter.process_audio(speech_chunk, is_speech=True))
result = benchmark_list(benchmark, process_speech)
# Should not emit unless max duration reached
assert len(result) <= 1, "Should emit at most one segment"
def test_segmenter_transition_idle_to_speech(
self, benchmark: BenchmarkFixture, speech_chunk: NDArray[np.float32]
) -> None:
"""Benchmark state transition from IDLE to SPEECH."""
def transition() -> list[AudioSegment]:
seg = Segmenter(config=SegmenterConfig(sample_rate=SAMPLE_RATE))
return list(seg.process_audio(speech_chunk, is_speech=True))
result = benchmark_list(benchmark, transition)
assert result == [], "Transition should not emit segment"
class TestRmsLevelProviderBenchmark:
"""Benchmark tests for RmsLevelProvider methods."""
def test_get_rms(
self,
benchmark: BenchmarkFixture,
rms_provider: RmsLevelProvider,
audio_chunk: NDArray[np.float32],
) -> None:
"""Benchmark get_rms() method."""
result = typed_benchmark(benchmark, float, rms_provider.get_rms, audio_chunk)
assert 0 <= result <= 1, "RMS should be normalized"
def test_get_db(
self,
benchmark: BenchmarkFixture,
rms_provider: RmsLevelProvider,
audio_chunk: NDArray[np.float32],
) -> None:
"""Benchmark get_db() method."""
result = typed_benchmark(benchmark, float, rms_provider.get_db, audio_chunk)
assert DB_FLOOR <= result <= 0, "dB should be in valid range"
def test_rms_to_db_conversion(
self, benchmark: BenchmarkFixture, rms_provider: RmsLevelProvider
) -> None:
"""Benchmark rms_to_db() conversion."""
result = typed_benchmark(benchmark, float, rms_provider.rms_to_db, 0.5)
assert result < 0, "Half amplitude should be negative dB"
class TestNumpyOperationsBenchmark:
"""Benchmark tests for NumPy operations used in hot paths."""
def test_array_copy(
self, benchmark: BenchmarkFixture, audio_chunk: NDArray[np.float32]
) -> None:
"""Benchmark array copy (used in partial buffer accumulation)."""
result = benchmark_array(benchmark, audio_chunk.copy)
assert result.shape == audio_chunk.shape, "Copy should preserve shape"
def test_array_concatenate_small(
self, benchmark: BenchmarkFixture, audio_chunk: NDArray[np.float32]
) -> None:
"""Benchmark concatenation of 5 chunks (~500ms audio)."""
chunks = [audio_chunk.copy() for _ in range(5)]
def concat() -> NDArray[np.float32]:
return np.concatenate(chunks)
result = benchmark_array(benchmark, concat)
assert len(result) == CHUNK_SIZE * 5, "Should concatenate all chunks"
def test_array_concatenate_large(
self, benchmark: BenchmarkFixture, audio_chunk: NDArray[np.float32]
) -> None:
"""Benchmark concatenation of 20 chunks (~2s audio, typical partial)."""
chunks = [audio_chunk.copy() for _ in range(20)]
def concat() -> NDArray[np.float32]:
return np.concatenate(chunks)
result = benchmark_array(benchmark, concat)
assert len(result) == CHUNK_SIZE * TYPICAL_PARTIAL_CHUNKS, "Should concatenate all chunks"
def test_array_square(
self, benchmark: BenchmarkFixture, audio_chunk: NDArray[np.float32]
) -> None:
"""Benchmark np.square (used in RMS calculation)."""
result = benchmark_array(benchmark, np.square, audio_chunk)
assert result.dtype == np.float32, "Should preserve dtype"
def test_array_mean(
self, benchmark: BenchmarkFixture, audio_chunk: NDArray[np.float32]
) -> None:
"""Benchmark np.mean (used in RMS calculation)."""
result = typed_benchmark(benchmark, float, np.mean, audio_chunk)
assert isinstance(result, (float, np.floating)), "Mean should be scalar"
class TestBufferOperationsBenchmark:
"""Benchmark tests for list operations used in buffers."""
def test_list_append(
self, benchmark: BenchmarkFixture, audio_chunk: NDArray[np.float32]
) -> None:
"""Benchmark list append (partial buffer accumulation)."""
buffer: list[NDArray[np.float32]] = []
def append() -> None:
buffer.append(audio_chunk.copy())
benchmark(append)
assert buffer, "Buffer should have items"
def test_list_clear(
self, benchmark: BenchmarkFixture, audio_chunk: NDArray[np.float32]
) -> None:
"""Benchmark list clear (partial buffer clearing)."""
# Pre-fill buffer
buffer = [audio_chunk.copy() for _ in range(20)]
def clear_and_refill() -> None:
buffer.clear()
for _ in range(TYPICAL_PARTIAL_CHUNKS):
buffer.append(audio_chunk.copy())
benchmark(clear_and_refill)
def test_sum_lengths_naive(
self, benchmark: BenchmarkFixture, audio_chunk: NDArray[np.float32]
) -> None:
"""Benchmark naive sum of chunk lengths (OLD segmenter pattern)."""
chunks = [audio_chunk.copy() for _ in range(20)]
def sum_naive() -> int:
return sum(len(chunk) for chunk in chunks)
result = typed_benchmark(benchmark, int, sum_naive)
assert result == CHUNK_SIZE * TYPICAL_PARTIAL_CHUNKS, "Should sum all lengths"
def test_cached_length(self, benchmark: BenchmarkFixture) -> None:
"""Benchmark cached length access (NEW segmenter pattern)."""
cached_length = CHUNK_SIZE * TYPICAL_PARTIAL_CHUNKS
def get_cached() -> int:
return cached_length
result = typed_benchmark(benchmark, int, get_cached)
assert result == CHUNK_SIZE * TYPICAL_PARTIAL_CHUNKS, "Should return cached value"
class TestPartialBufferComparisonBenchmark:
"""Benchmark comparing old list-based vs new pre-allocated buffer."""
def test_old_list_append_and_concat(
self, benchmark: BenchmarkFixture, audio_chunk: NDArray[np.float32]
) -> None:
"""Benchmark OLD pattern: list append + np.concatenate (20 chunks = 2s)."""
def old_pattern() -> NDArray[np.float32]:
buffer: list[NDArray[np.float32]] = []
for _ in range(TYPICAL_PARTIAL_CHUNKS):
buffer.append(audio_chunk.copy())
return np.concatenate(buffer)
result = benchmark_array(benchmark, old_pattern)
assert len(result) == CHUNK_SIZE * TYPICAL_PARTIAL_CHUNKS, "Should have all samples"
def test_new_preallocated_buffer(
self, benchmark: BenchmarkFixture, audio_chunk: NDArray[np.float32]
) -> None:
"""Benchmark NEW pattern: pre-allocated buffer (20 chunks = 2s)."""
def new_pattern() -> NDArray[np.float32]:
buffer = PartialAudioBuffer(sample_rate=SAMPLE_RATE)
for _ in range(TYPICAL_PARTIAL_CHUNKS):
buffer.append(audio_chunk)
return buffer.get_audio()
result = benchmark_array(benchmark, new_pattern)
assert len(result) == CHUNK_SIZE * TYPICAL_PARTIAL_CHUNKS, "Should have all samples"
def test_preallocated_append_only(
self, benchmark: BenchmarkFixture, audio_chunk: NDArray[np.float32]
) -> None:
"""Benchmark pre-allocated buffer append only (no get_audio)."""
buffer = PartialAudioBuffer(sample_rate=SAMPLE_RATE)
def append_only() -> None:
buffer.append(audio_chunk)
benchmark(append_only)
assert buffer.samples_buffered > 0, "Should have appended"
def test_preallocated_get_audio_only(
self, benchmark: BenchmarkFixture, audio_chunk: NDArray[np.float32]
) -> None:
"""Benchmark pre-allocated buffer get_audio only (pre-filled)."""
buffer = PartialAudioBuffer(sample_rate=SAMPLE_RATE)
for _ in range(TYPICAL_PARTIAL_CHUNKS):
buffer.append(audio_chunk)
result = benchmark_array(benchmark, buffer.get_audio)
assert len(result) == CHUNK_SIZE * TYPICAL_PARTIAL_CHUNKS, "Should have all samples"
def test_realistic_old_pattern_10_cycles(
self, benchmark: BenchmarkFixture, audio_chunk: NDArray[np.float32]
) -> None:
"""Benchmark OLD pattern: 10 cycles of accumulate/concat/clear."""
def old_pattern_cycles() -> list[NDArray[np.float32]]:
results: list[NDArray[np.float32]] = []
for _ in range(10):
buffer: list[NDArray[np.float32]] = []
for _ in range(TYPICAL_PARTIAL_CHUNKS):
buffer.append(audio_chunk.copy())
results.append(np.concatenate(buffer))
buffer.clear() # Note: doesn't help much, new list created next cycle
return results
result = benchmark_array_list(benchmark, old_pattern_cycles)
assert len(result) == 10, "Should have 10 results"
def test_realistic_new_pattern_10_cycles(
self, benchmark: BenchmarkFixture, audio_chunk: NDArray[np.float32]
) -> None:
"""Benchmark NEW pattern: 10 cycles with buffer reuse."""
# Buffer created once (simulates per-meeting initialization)
buffer = PartialAudioBuffer(sample_rate=SAMPLE_RATE)
def new_pattern_cycles() -> list[NDArray[np.float32]]:
results: list[NDArray[np.float32]] = []
for _ in range(10):
for _ in range(TYPICAL_PARTIAL_CHUNKS):
buffer.append(audio_chunk)
results.append(buffer.get_audio())
buffer.clear() # O(1) pointer reset, buffer reused
return results
result = benchmark_array_list(benchmark, new_pattern_cycles)
assert len(result) == 10, "Should have 10 results"
class TestAsrSegmentBuildBenchmarks:
"""Benchmark ASR-to-segment conversion path."""
def test_create_segment_from_asr(
self,
benchmark: BenchmarkFixture,
asr_result: AsrResult,
segment_build_params: SegmentBuildParams,
) -> None:
"""Benchmark create_segment_from_asr conversion."""
result = typed_benchmark(
benchmark,
Segment,
create_segment_from_asr,
segment_build_params,
asr_result,
)
assert result.segment_id == ASR_SEGMENT_ID, "Segment ID should match build params"
assert result.start_time == SEGMENT_START_OFFSET, "Start time should include offset"
assert result.audio_source == AudioSource.MIC, "Audio source should be preserved"
assert result.speaker_role == SpeakerRole.USER, "Speaker role should map from MIC"
class TestGrpcConverterBenchmarks:
"""Benchmark gRPC converter hot paths."""
def test_segment_to_proto_update(
self,
benchmark: BenchmarkFixture,
segment_with_words: Segment,
) -> None:
"""Benchmark segment_to_proto_update conversion."""
result = typed_benchmark(
benchmark,
noteflow_pb2.TranscriptUpdate,
segment_to_proto_update,
"meeting_id",
segment_with_words,
)
assert result.segment.segment_id == segment_with_words.segment_id, "segment_id should match"
def test_metrics_to_proto(
self,
benchmark: BenchmarkFixture,
performance_metrics: PerformanceMetrics,
) -> None:
"""Benchmark metrics_to_proto conversion."""
result = typed_benchmark(
benchmark,
noteflow_pb2.PerformanceMetricsPoint,
metrics_to_proto,
performance_metrics,
)
assert result.cpu_percent >= 0, "CPU percent should be non-negative"
def test_log_entry_to_proto(
self,
benchmark: BenchmarkFixture,
log_entry: LogEntry,
) -> None:
"""Benchmark log_entry_to_proto conversion."""
result = typed_benchmark(
benchmark,
noteflow_pb2.LogEntryProto,
log_entry_to_proto,
log_entry,
)
assert result.message, "Log message should be populated"
class TestVoiceProfileBenchmarks:
"""Benchmark voice profile similarity and merge operations."""
def test_cosine_similarity(
self,
benchmark: BenchmarkFixture,
voice_embedding_pair: tuple[list[float], list[float]],
) -> None:
"""Benchmark cosine similarity for voice profile embeddings."""
existing, new = voice_embedding_pair
result = typed_benchmark(benchmark, float, cosine_similarity, existing, new)
assert 0.0 <= result <= 1.0, "Similarity should be normalized"
assert result > 0.8, "Similar embeddings should yield high similarity"
def test_merge_embeddings(
self,
benchmark: BenchmarkFixture,
voice_embedding_merge_inputs: tuple[list[float], list[float], int],
) -> None:
"""Benchmark merge_embeddings for voice profile updates."""
existing, new, count = voice_embedding_merge_inputs
result = benchmark_float_list(benchmark, merge_embeddings, existing, new, count)
assert len(result) == EMBEDDING_DIM, "Merged embedding should preserve dimension"
norm = float(np.linalg.norm(np.array(result, dtype=np.float32)))
assert 0.99 <= norm <= 1.01, "Merged embedding should remain normalized"