712 lines
26 KiB
Python
712 lines
26 KiB
Python
"""Benchmark tests for NoteFlow hot paths.
|
|
|
|
These benchmarks measure the performance of frequently-called code paths
|
|
to establish baselines and detect regressions.
|
|
|
|
Run with: pytest tests/benchmarks/ --benchmark-enable
|
|
Compare: pytest tests/benchmarks/ --benchmark-compare
|
|
Save baseline: pytest tests/benchmarks/ --benchmark-save=baseline
|
|
|
|
Note: These tests are marked as slow and excluded from CI unit test runs.
|
|
Run explicitly with: pytest tests/benchmarks/ -m slow
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from datetime import UTC, datetime
|
|
from typing import TYPE_CHECKING, cast
|
|
from uuid import UUID
|
|
|
|
import numpy as np
|
|
import pytest
|
|
from numpy.typing import NDArray
|
|
from pytest_benchmark.fixture import BenchmarkFixture
|
|
|
|
from noteflow.application.services.voice_profile.service import (
|
|
EMBEDDING_DIM,
|
|
cosine_similarity,
|
|
merge_embeddings,
|
|
)
|
|
from noteflow.config.constants import DEFAULT_SAMPLE_RATE
|
|
from noteflow.domain.entities.segment import Segment, WordTiming
|
|
from noteflow.domain.value_objects import AudioSource, MeetingId, SpeakerRole
|
|
from noteflow.grpc.mixins.converters import (
|
|
SegmentBuildParams,
|
|
create_segment_from_asr,
|
|
log_entry_to_proto,
|
|
metrics_to_proto,
|
|
segment_to_proto_update,
|
|
)
|
|
from noteflow.grpc.proto import noteflow_pb2
|
|
from noteflow.infrastructure.asr.dto import AsrResult, WordTiming as AsrWordTiming
|
|
from noteflow.infrastructure.asr.segmenter import AudioSegment, Segmenter, SegmenterConfig
|
|
from noteflow.infrastructure.asr.streaming_vad import EnergyVad, StreamingVad
|
|
from noteflow.infrastructure.audio.levels import RmsLevelProvider, compute_rms
|
|
from noteflow.infrastructure.audio.partial_buffer import PartialAudioBuffer
|
|
from noteflow.infrastructure.logging.log_buffer import LogEntry
|
|
from noteflow.infrastructure.metrics.collector import PerformanceMetrics
|
|
|
|
# Mark all tests in this module as slow (excluded from CI unit tests)
|
|
pytestmark = pytest.mark.slow
|
|
|
|
if TYPE_CHECKING:
|
|
pass
|
|
|
|
|
|
def _run_benchmark(benchmark: BenchmarkFixture, func: object, *args: object) -> object:
|
|
"""Run benchmark and return result as object.
|
|
|
|
This helper captures the unknown return type from pytest-benchmark and
|
|
explicitly returns it as object, allowing downstream casts to work properly.
|
|
|
|
The cast is required because BenchmarkFixture.__call__ is untyped in
|
|
pytest-benchmark (no type stubs available).
|
|
"""
|
|
# cast required: pytest-benchmark lacks type stubs
|
|
return cast(object, benchmark(func, *args))
|
|
|
|
|
|
def typed_benchmark[T](
|
|
benchmark: BenchmarkFixture, expected_type: type[T], func: object, *args: object
|
|
) -> T:
|
|
"""Run benchmark and return typed result for simple types (float, bool, int).
|
|
|
|
BenchmarkFixture.__call__ is untyped but returns the result of func(*args).
|
|
This wrapper provides explicit typing via the expected_type parameter.
|
|
|
|
Args:
|
|
benchmark: The pytest-benchmark fixture
|
|
expected_type: The type that func returns (used for type inference)
|
|
func: The function to benchmark
|
|
*args: Arguments to pass to func
|
|
"""
|
|
return cast(T, _run_benchmark(benchmark, func, *args))
|
|
|
|
|
|
def benchmark_array(
|
|
benchmark: BenchmarkFixture, func: object, *args: object
|
|
) -> NDArray[np.float32]:
|
|
"""Run benchmark for functions returning float32 arrays.
|
|
|
|
BenchmarkFixture.__call__ is untyped. This wrapper returns a properly typed NDArray.
|
|
|
|
Args:
|
|
benchmark: The pytest-benchmark fixture
|
|
func: The function to benchmark (must return NDArray[np.float32])
|
|
*args: Arguments to pass to func
|
|
"""
|
|
return cast(NDArray[np.float32], _run_benchmark(benchmark, func, *args))
|
|
|
|
|
|
def benchmark_list(benchmark: BenchmarkFixture, func: object, *args: object) -> list[AudioSegment]:
|
|
"""Run benchmark for functions returning list of AudioSegment.
|
|
|
|
BenchmarkFixture.__call__ is untyped. This wrapper returns a properly typed list.
|
|
|
|
Args:
|
|
benchmark: The pytest-benchmark fixture
|
|
func: The function to benchmark (must return list[AudioSegment])
|
|
*args: Arguments to pass to func
|
|
"""
|
|
return cast(list[AudioSegment], _run_benchmark(benchmark, func, *args))
|
|
|
|
|
|
def benchmark_array_list(
|
|
benchmark: BenchmarkFixture, func: object, *args: object
|
|
) -> list[NDArray[np.float32]]:
|
|
"""Run benchmark for functions returning list of float32 arrays.
|
|
|
|
BenchmarkFixture.__call__ is untyped. This wrapper returns a properly typed list.
|
|
|
|
Args:
|
|
benchmark: The pytest-benchmark fixture
|
|
func: The function to benchmark (must return list[NDArray[np.float32]])
|
|
*args: Arguments to pass to func
|
|
"""
|
|
return cast(list[NDArray[np.float32]], _run_benchmark(benchmark, func, *args))
|
|
|
|
|
|
def benchmark_float_list(benchmark: BenchmarkFixture, func: object, *args: object) -> list[float]:
|
|
"""Run benchmark for functions returning list of floats."""
|
|
return cast(list[float], _run_benchmark(benchmark, func, *args))
|
|
|
|
|
|
# Standard audio chunk size (100ms at 16kHz)
|
|
CHUNK_SIZE = 1600
|
|
SAMPLE_RATE = DEFAULT_SAMPLE_RATE
|
|
# Typical partial buffer holds ~2s of audio (20 chunks x 100ms)
|
|
TYPICAL_PARTIAL_CHUNKS = 20
|
|
# dB floor for silence detection
|
|
DB_FLOOR = -60
|
|
MEETING_UUID = UUID("00000000-0000-0000-0000-000000000001")
|
|
MEETING_ID = MeetingId(MEETING_UUID)
|
|
ASR_SEGMENT_ID = 7
|
|
SEGMENT_START_OFFSET = 1.25
|
|
VOICE_EMBEDDING_NOISE = 0.01
|
|
VOICE_EMBEDDING_EXISTING_COUNT = 3
|
|
|
|
|
|
@pytest.fixture
|
|
def audio_chunk() -> NDArray[np.float32]:
|
|
"""Generate a realistic audio chunk (100ms at 16kHz)."""
|
|
return np.random.randn(CHUNK_SIZE).astype(np.float32) * 0.1
|
|
|
|
|
|
@pytest.fixture
|
|
def speech_chunk() -> NDArray[np.float32]:
|
|
"""Generate a speech-like audio chunk with higher energy."""
|
|
return np.random.randn(CHUNK_SIZE).astype(np.float32) * 0.5
|
|
|
|
|
|
@pytest.fixture
|
|
def silence_chunk() -> NDArray[np.float32]:
|
|
"""Generate a silence chunk with very low energy."""
|
|
return np.random.randn(CHUNK_SIZE).astype(np.float32) * 0.001
|
|
|
|
|
|
@pytest.fixture
|
|
def segmenter() -> Segmenter:
|
|
"""Create a segmenter with default config."""
|
|
return Segmenter(config=SegmenterConfig(sample_rate=SAMPLE_RATE))
|
|
|
|
|
|
@pytest.fixture
|
|
def energy_vad() -> EnergyVad:
|
|
"""Create an energy VAD instance."""
|
|
return EnergyVad()
|
|
|
|
|
|
@pytest.fixture
|
|
def streaming_vad() -> StreamingVad:
|
|
"""Create a streaming VAD instance."""
|
|
return StreamingVad()
|
|
|
|
|
|
@pytest.fixture
|
|
def rms_provider() -> RmsLevelProvider:
|
|
"""Create an RMS level provider."""
|
|
return RmsLevelProvider()
|
|
|
|
|
|
@pytest.fixture
|
|
def segment_with_words() -> Segment:
|
|
"""Create a segment with word timings for converter benchmarks."""
|
|
words = [
|
|
WordTiming(word="hello", start_time=0.0, end_time=0.25, probability=0.95),
|
|
WordTiming(word="world", start_time=0.25, end_time=0.5, probability=0.92),
|
|
WordTiming(word="from", start_time=0.5, end_time=0.7, probability=0.9),
|
|
WordTiming(word="noteflow", start_time=0.7, end_time=1.0, probability=0.93),
|
|
]
|
|
return Segment(
|
|
segment_id=42,
|
|
text="hello world from noteflow",
|
|
start_time=0.0,
|
|
end_time=1.0,
|
|
words=words,
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def asr_result() -> AsrResult:
|
|
"""Create an ASR result for segment build benchmarks."""
|
|
words = (
|
|
AsrWordTiming(word="hello", start=0.0, end=0.25, probability=0.95),
|
|
AsrWordTiming(word="world", start=0.25, end=0.5, probability=0.92),
|
|
AsrWordTiming(word="from", start=0.5, end=0.7, probability=0.9),
|
|
AsrWordTiming(word="noteflow", start=0.7, end=1.0, probability=0.93),
|
|
)
|
|
return AsrResult(
|
|
text="hello world from noteflow",
|
|
start=0.0,
|
|
end=1.0,
|
|
words=words,
|
|
language="en",
|
|
language_probability=0.98,
|
|
avg_logprob=-0.2,
|
|
no_speech_prob=0.01,
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def segment_build_params() -> SegmentBuildParams:
|
|
"""Create segment build parameters for ASR conversion benchmarks."""
|
|
return SegmentBuildParams(
|
|
meeting_id=MEETING_ID,
|
|
segment_id=ASR_SEGMENT_ID,
|
|
segment_start_time=SEGMENT_START_OFFSET,
|
|
audio_source=AudioSource.MIC,
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def voice_embedding_pair() -> tuple[list[float], list[float]]:
|
|
"""Create two similar embeddings for voice profile benchmarks."""
|
|
rng = np.random.default_rng(42)
|
|
base = rng.standard_normal(EMBEDDING_DIM).astype(np.float32)
|
|
noise = rng.standard_normal(EMBEDDING_DIM).astype(np.float32) * VOICE_EMBEDDING_NOISE
|
|
return base.tolist(), (base + noise).tolist()
|
|
|
|
|
|
@pytest.fixture
|
|
def voice_embedding_merge_inputs(
|
|
voice_embedding_pair: tuple[list[float], list[float]],
|
|
) -> tuple[list[float], list[float], int]:
|
|
"""Create inputs for merge_embeddings benchmark."""
|
|
existing, new = voice_embedding_pair
|
|
return existing, new, VOICE_EMBEDDING_EXISTING_COUNT
|
|
|
|
|
|
@pytest.fixture
|
|
def performance_metrics() -> PerformanceMetrics:
|
|
"""Create sample metrics for converter benchmarks."""
|
|
return PerformanceMetrics(
|
|
timestamp=1_700_000_000.0,
|
|
cpu_percent=23.5,
|
|
memory_percent=61.2,
|
|
memory_mb=8192.0,
|
|
disk_percent=44.0,
|
|
network_bytes_sent=120_000,
|
|
network_bytes_recv=98_000,
|
|
process_memory_mb=512.0,
|
|
active_connections=12,
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def log_entry() -> LogEntry:
|
|
"""Create a sample log entry for converter benchmarks."""
|
|
return LogEntry(
|
|
timestamp=datetime.now(tz=UTC),
|
|
level="info",
|
|
source="bench",
|
|
message="Segment persisted",
|
|
details={"meeting_id": "test"},
|
|
trace_id="trace",
|
|
span_id="span",
|
|
event_type="segment.added",
|
|
operation_id="op",
|
|
entity_id="entity",
|
|
)
|
|
|
|
|
|
class TestComputeRmsBenchmark:
|
|
"""Benchmark tests for RMS computation (called 36,000x/hour)."""
|
|
|
|
def test_compute_rms_typical_chunk(
|
|
self, benchmark: BenchmarkFixture, audio_chunk: NDArray[np.float32]
|
|
) -> None:
|
|
"""Benchmark RMS computation on typical 100ms chunk."""
|
|
result = typed_benchmark(benchmark, float, compute_rms, audio_chunk)
|
|
assert 0 <= result <= 1, "RMS should be in valid range"
|
|
|
|
def test_compute_rms_silence(
|
|
self, benchmark: BenchmarkFixture, silence_chunk: NDArray[np.float32]
|
|
) -> None:
|
|
"""Benchmark RMS computation on silence."""
|
|
result = typed_benchmark(benchmark, float, compute_rms, silence_chunk)
|
|
assert result < 0.01, "Silence should have very low RMS"
|
|
|
|
def test_compute_rms_speech(
|
|
self, benchmark: BenchmarkFixture, speech_chunk: NDArray[np.float32]
|
|
) -> None:
|
|
"""Benchmark RMS computation on speech-like audio."""
|
|
result = typed_benchmark(benchmark, float, compute_rms, speech_chunk)
|
|
assert result > 0.1, "Speech should have higher RMS"
|
|
|
|
|
|
class TestVadBenchmark:
|
|
"""Benchmark tests for VAD processing (called 36,000x/hour)."""
|
|
|
|
def test_energy_vad_process(
|
|
self, benchmark: BenchmarkFixture, energy_vad: EnergyVad, audio_chunk: NDArray[np.float32]
|
|
) -> None:
|
|
"""Benchmark single EnergyVad.process() call."""
|
|
result = typed_benchmark(benchmark, bool, energy_vad.process, audio_chunk)
|
|
assert isinstance(result, bool), "VAD should return boolean"
|
|
|
|
def test_streaming_vad_process_chunk(
|
|
self,
|
|
benchmark: BenchmarkFixture,
|
|
streaming_vad: StreamingVad,
|
|
audio_chunk: NDArray[np.float32],
|
|
) -> None:
|
|
"""Benchmark StreamingVad.process_chunk() call."""
|
|
result = typed_benchmark(benchmark, bool, streaming_vad.process_chunk, audio_chunk)
|
|
assert isinstance(result, bool), "VAD should return boolean"
|
|
|
|
def test_energy_vad_speech_detection(
|
|
self, benchmark: BenchmarkFixture, energy_vad: EnergyVad, speech_chunk: NDArray[np.float32]
|
|
) -> None:
|
|
"""Benchmark VAD on speech-like audio."""
|
|
result = typed_benchmark(benchmark, bool, energy_vad.process, speech_chunk)
|
|
assert result is True, "Should detect speech"
|
|
|
|
def test_energy_vad_silence_detection(
|
|
self, benchmark: BenchmarkFixture, energy_vad: EnergyVad, silence_chunk: NDArray[np.float32]
|
|
) -> None:
|
|
"""Benchmark VAD on silence."""
|
|
result = typed_benchmark(benchmark, bool, energy_vad.process, silence_chunk)
|
|
assert result is False, "Should detect silence"
|
|
|
|
|
|
class TestSegmenterBenchmark:
|
|
"""Benchmark tests for Segmenter state machine (called 36,000x/hour)."""
|
|
|
|
def test_segmenter_idle_silence(
|
|
self, benchmark: BenchmarkFixture, segmenter: Segmenter, silence_chunk: NDArray[np.float32]
|
|
) -> None:
|
|
"""Benchmark segmenter processing silence in IDLE state."""
|
|
|
|
def process_idle() -> list[AudioSegment]:
|
|
return list(segmenter.process_audio(silence_chunk, is_speech=False))
|
|
|
|
result = benchmark_list(benchmark, process_idle)
|
|
assert result == [], "No segments should be emitted in idle"
|
|
|
|
def test_segmenter_speech_accumulation(
|
|
self, benchmark: BenchmarkFixture, segmenter: Segmenter, speech_chunk: NDArray[np.float32]
|
|
) -> None:
|
|
"""Benchmark segmenter accumulating speech."""
|
|
# First transition to SPEECH state
|
|
list(segmenter.process_audio(speech_chunk, is_speech=True))
|
|
|
|
def process_speech() -> list[AudioSegment]:
|
|
return list(segmenter.process_audio(speech_chunk, is_speech=True))
|
|
|
|
result = benchmark_list(benchmark, process_speech)
|
|
# Should not emit unless max duration reached
|
|
assert len(result) <= 1, "Should emit at most one segment"
|
|
|
|
def test_segmenter_transition_idle_to_speech(
|
|
self, benchmark: BenchmarkFixture, speech_chunk: NDArray[np.float32]
|
|
) -> None:
|
|
"""Benchmark state transition from IDLE to SPEECH."""
|
|
|
|
def transition() -> list[AudioSegment]:
|
|
seg = Segmenter(config=SegmenterConfig(sample_rate=SAMPLE_RATE))
|
|
return list(seg.process_audio(speech_chunk, is_speech=True))
|
|
|
|
result = benchmark_list(benchmark, transition)
|
|
assert result == [], "Transition should not emit segment"
|
|
|
|
|
|
class TestRmsLevelProviderBenchmark:
|
|
"""Benchmark tests for RmsLevelProvider methods."""
|
|
|
|
def test_get_rms(
|
|
self,
|
|
benchmark: BenchmarkFixture,
|
|
rms_provider: RmsLevelProvider,
|
|
audio_chunk: NDArray[np.float32],
|
|
) -> None:
|
|
"""Benchmark get_rms() method."""
|
|
result = typed_benchmark(benchmark, float, rms_provider.get_rms, audio_chunk)
|
|
assert 0 <= result <= 1, "RMS should be normalized"
|
|
|
|
def test_get_db(
|
|
self,
|
|
benchmark: BenchmarkFixture,
|
|
rms_provider: RmsLevelProvider,
|
|
audio_chunk: NDArray[np.float32],
|
|
) -> None:
|
|
"""Benchmark get_db() method."""
|
|
result = typed_benchmark(benchmark, float, rms_provider.get_db, audio_chunk)
|
|
assert DB_FLOOR <= result <= 0, "dB should be in valid range"
|
|
|
|
def test_rms_to_db_conversion(
|
|
self, benchmark: BenchmarkFixture, rms_provider: RmsLevelProvider
|
|
) -> None:
|
|
"""Benchmark rms_to_db() conversion."""
|
|
result = typed_benchmark(benchmark, float, rms_provider.rms_to_db, 0.5)
|
|
assert result < 0, "Half amplitude should be negative dB"
|
|
|
|
|
|
class TestNumpyOperationsBenchmark:
|
|
"""Benchmark tests for NumPy operations used in hot paths."""
|
|
|
|
def test_array_copy(
|
|
self, benchmark: BenchmarkFixture, audio_chunk: NDArray[np.float32]
|
|
) -> None:
|
|
"""Benchmark array copy (used in partial buffer accumulation)."""
|
|
result = benchmark_array(benchmark, audio_chunk.copy)
|
|
assert result.shape == audio_chunk.shape, "Copy should preserve shape"
|
|
|
|
def test_array_concatenate_small(
|
|
self, benchmark: BenchmarkFixture, audio_chunk: NDArray[np.float32]
|
|
) -> None:
|
|
"""Benchmark concatenation of 5 chunks (~500ms audio)."""
|
|
chunks = [audio_chunk.copy() for _ in range(5)]
|
|
|
|
def concat() -> NDArray[np.float32]:
|
|
return np.concatenate(chunks)
|
|
|
|
result = benchmark_array(benchmark, concat)
|
|
assert len(result) == CHUNK_SIZE * 5, "Should concatenate all chunks"
|
|
|
|
def test_array_concatenate_large(
|
|
self, benchmark: BenchmarkFixture, audio_chunk: NDArray[np.float32]
|
|
) -> None:
|
|
"""Benchmark concatenation of 20 chunks (~2s audio, typical partial)."""
|
|
chunks = [audio_chunk.copy() for _ in range(20)]
|
|
|
|
def concat() -> NDArray[np.float32]:
|
|
return np.concatenate(chunks)
|
|
|
|
result = benchmark_array(benchmark, concat)
|
|
assert len(result) == CHUNK_SIZE * TYPICAL_PARTIAL_CHUNKS, "Should concatenate all chunks"
|
|
|
|
def test_array_square(
|
|
self, benchmark: BenchmarkFixture, audio_chunk: NDArray[np.float32]
|
|
) -> None:
|
|
"""Benchmark np.square (used in RMS calculation)."""
|
|
result = benchmark_array(benchmark, np.square, audio_chunk)
|
|
assert result.dtype == np.float32, "Should preserve dtype"
|
|
|
|
def test_array_mean(
|
|
self, benchmark: BenchmarkFixture, audio_chunk: NDArray[np.float32]
|
|
) -> None:
|
|
"""Benchmark np.mean (used in RMS calculation)."""
|
|
result = typed_benchmark(benchmark, float, np.mean, audio_chunk)
|
|
assert isinstance(result, (float, np.floating)), "Mean should be scalar"
|
|
|
|
|
|
class TestBufferOperationsBenchmark:
|
|
"""Benchmark tests for list operations used in buffers."""
|
|
|
|
def test_list_append(
|
|
self, benchmark: BenchmarkFixture, audio_chunk: NDArray[np.float32]
|
|
) -> None:
|
|
"""Benchmark list append (partial buffer accumulation)."""
|
|
buffer: list[NDArray[np.float32]] = []
|
|
|
|
def append() -> None:
|
|
buffer.append(audio_chunk.copy())
|
|
|
|
benchmark(append)
|
|
assert buffer, "Buffer should have items"
|
|
|
|
def test_list_clear(
|
|
self, benchmark: BenchmarkFixture, audio_chunk: NDArray[np.float32]
|
|
) -> None:
|
|
"""Benchmark list clear (partial buffer clearing)."""
|
|
# Pre-fill buffer
|
|
buffer = [audio_chunk.copy() for _ in range(20)]
|
|
|
|
def clear_and_refill() -> None:
|
|
buffer.clear()
|
|
for _ in range(TYPICAL_PARTIAL_CHUNKS):
|
|
buffer.append(audio_chunk.copy())
|
|
|
|
benchmark(clear_and_refill)
|
|
|
|
def test_sum_lengths_naive(
|
|
self, benchmark: BenchmarkFixture, audio_chunk: NDArray[np.float32]
|
|
) -> None:
|
|
"""Benchmark naive sum of chunk lengths (OLD segmenter pattern)."""
|
|
chunks = [audio_chunk.copy() for _ in range(20)]
|
|
|
|
def sum_naive() -> int:
|
|
return sum(len(chunk) for chunk in chunks)
|
|
|
|
result = typed_benchmark(benchmark, int, sum_naive)
|
|
assert result == CHUNK_SIZE * TYPICAL_PARTIAL_CHUNKS, "Should sum all lengths"
|
|
|
|
def test_cached_length(self, benchmark: BenchmarkFixture) -> None:
|
|
"""Benchmark cached length access (NEW segmenter pattern)."""
|
|
cached_length = CHUNK_SIZE * TYPICAL_PARTIAL_CHUNKS
|
|
|
|
def get_cached() -> int:
|
|
return cached_length
|
|
|
|
result = typed_benchmark(benchmark, int, get_cached)
|
|
assert result == CHUNK_SIZE * TYPICAL_PARTIAL_CHUNKS, "Should return cached value"
|
|
|
|
|
|
class TestPartialBufferComparisonBenchmark:
|
|
"""Benchmark comparing old list-based vs new pre-allocated buffer."""
|
|
|
|
def test_old_list_append_and_concat(
|
|
self, benchmark: BenchmarkFixture, audio_chunk: NDArray[np.float32]
|
|
) -> None:
|
|
"""Benchmark OLD pattern: list append + np.concatenate (20 chunks = 2s)."""
|
|
|
|
def old_pattern() -> NDArray[np.float32]:
|
|
buffer: list[NDArray[np.float32]] = []
|
|
for _ in range(TYPICAL_PARTIAL_CHUNKS):
|
|
buffer.append(audio_chunk.copy())
|
|
return np.concatenate(buffer)
|
|
|
|
result = benchmark_array(benchmark, old_pattern)
|
|
assert len(result) == CHUNK_SIZE * TYPICAL_PARTIAL_CHUNKS, "Should have all samples"
|
|
|
|
def test_new_preallocated_buffer(
|
|
self, benchmark: BenchmarkFixture, audio_chunk: NDArray[np.float32]
|
|
) -> None:
|
|
"""Benchmark NEW pattern: pre-allocated buffer (20 chunks = 2s)."""
|
|
|
|
def new_pattern() -> NDArray[np.float32]:
|
|
buffer = PartialAudioBuffer(sample_rate=SAMPLE_RATE)
|
|
for _ in range(TYPICAL_PARTIAL_CHUNKS):
|
|
buffer.append(audio_chunk)
|
|
return buffer.get_audio()
|
|
|
|
result = benchmark_array(benchmark, new_pattern)
|
|
assert len(result) == CHUNK_SIZE * TYPICAL_PARTIAL_CHUNKS, "Should have all samples"
|
|
|
|
def test_preallocated_append_only(
|
|
self, benchmark: BenchmarkFixture, audio_chunk: NDArray[np.float32]
|
|
) -> None:
|
|
"""Benchmark pre-allocated buffer append only (no get_audio)."""
|
|
buffer = PartialAudioBuffer(sample_rate=SAMPLE_RATE)
|
|
|
|
def append_only() -> None:
|
|
buffer.append(audio_chunk)
|
|
|
|
benchmark(append_only)
|
|
assert buffer.samples_buffered > 0, "Should have appended"
|
|
|
|
def test_preallocated_get_audio_only(
|
|
self, benchmark: BenchmarkFixture, audio_chunk: NDArray[np.float32]
|
|
) -> None:
|
|
"""Benchmark pre-allocated buffer get_audio only (pre-filled)."""
|
|
buffer = PartialAudioBuffer(sample_rate=SAMPLE_RATE)
|
|
for _ in range(TYPICAL_PARTIAL_CHUNKS):
|
|
buffer.append(audio_chunk)
|
|
|
|
result = benchmark_array(benchmark, buffer.get_audio)
|
|
assert len(result) == CHUNK_SIZE * TYPICAL_PARTIAL_CHUNKS, "Should have all samples"
|
|
|
|
def test_realistic_old_pattern_10_cycles(
|
|
self, benchmark: BenchmarkFixture, audio_chunk: NDArray[np.float32]
|
|
) -> None:
|
|
"""Benchmark OLD pattern: 10 cycles of accumulate/concat/clear."""
|
|
|
|
def old_pattern_cycles() -> list[NDArray[np.float32]]:
|
|
results: list[NDArray[np.float32]] = []
|
|
for _ in range(10):
|
|
buffer: list[NDArray[np.float32]] = []
|
|
for _ in range(TYPICAL_PARTIAL_CHUNKS):
|
|
buffer.append(audio_chunk.copy())
|
|
results.append(np.concatenate(buffer))
|
|
buffer.clear() # Note: doesn't help much, new list created next cycle
|
|
return results
|
|
|
|
result = benchmark_array_list(benchmark, old_pattern_cycles)
|
|
assert len(result) == 10, "Should have 10 results"
|
|
|
|
def test_realistic_new_pattern_10_cycles(
|
|
self, benchmark: BenchmarkFixture, audio_chunk: NDArray[np.float32]
|
|
) -> None:
|
|
"""Benchmark NEW pattern: 10 cycles with buffer reuse."""
|
|
# Buffer created once (simulates per-meeting initialization)
|
|
buffer = PartialAudioBuffer(sample_rate=SAMPLE_RATE)
|
|
|
|
def new_pattern_cycles() -> list[NDArray[np.float32]]:
|
|
results: list[NDArray[np.float32]] = []
|
|
for _ in range(10):
|
|
for _ in range(TYPICAL_PARTIAL_CHUNKS):
|
|
buffer.append(audio_chunk)
|
|
results.append(buffer.get_audio())
|
|
buffer.clear() # O(1) pointer reset, buffer reused
|
|
return results
|
|
|
|
result = benchmark_array_list(benchmark, new_pattern_cycles)
|
|
assert len(result) == 10, "Should have 10 results"
|
|
|
|
|
|
class TestAsrSegmentBuildBenchmarks:
|
|
"""Benchmark ASR-to-segment conversion path."""
|
|
|
|
def test_create_segment_from_asr(
|
|
self,
|
|
benchmark: BenchmarkFixture,
|
|
asr_result: AsrResult,
|
|
segment_build_params: SegmentBuildParams,
|
|
) -> None:
|
|
"""Benchmark create_segment_from_asr conversion."""
|
|
result = typed_benchmark(
|
|
benchmark,
|
|
Segment,
|
|
create_segment_from_asr,
|
|
segment_build_params,
|
|
asr_result,
|
|
)
|
|
assert result.segment_id == ASR_SEGMENT_ID, "Segment ID should match build params"
|
|
assert result.start_time == SEGMENT_START_OFFSET, "Start time should include offset"
|
|
assert result.audio_source == AudioSource.MIC, "Audio source should be preserved"
|
|
assert result.speaker_role == SpeakerRole.USER, "Speaker role should map from MIC"
|
|
|
|
|
|
class TestGrpcConverterBenchmarks:
|
|
"""Benchmark gRPC converter hot paths."""
|
|
|
|
def test_segment_to_proto_update(
|
|
self,
|
|
benchmark: BenchmarkFixture,
|
|
segment_with_words: Segment,
|
|
) -> None:
|
|
"""Benchmark segment_to_proto_update conversion."""
|
|
result = typed_benchmark(
|
|
benchmark,
|
|
noteflow_pb2.TranscriptUpdate,
|
|
segment_to_proto_update,
|
|
"meeting_id",
|
|
segment_with_words,
|
|
)
|
|
assert result.segment.segment_id == segment_with_words.segment_id, "segment_id should match"
|
|
|
|
def test_metrics_to_proto(
|
|
self,
|
|
benchmark: BenchmarkFixture,
|
|
performance_metrics: PerformanceMetrics,
|
|
) -> None:
|
|
"""Benchmark metrics_to_proto conversion."""
|
|
result = typed_benchmark(
|
|
benchmark,
|
|
noteflow_pb2.PerformanceMetricsPoint,
|
|
metrics_to_proto,
|
|
performance_metrics,
|
|
)
|
|
assert result.cpu_percent >= 0, "CPU percent should be non-negative"
|
|
|
|
def test_log_entry_to_proto(
|
|
self,
|
|
benchmark: BenchmarkFixture,
|
|
log_entry: LogEntry,
|
|
) -> None:
|
|
"""Benchmark log_entry_to_proto conversion."""
|
|
result = typed_benchmark(
|
|
benchmark,
|
|
noteflow_pb2.LogEntryProto,
|
|
log_entry_to_proto,
|
|
log_entry,
|
|
)
|
|
assert result.message, "Log message should be populated"
|
|
|
|
|
|
class TestVoiceProfileBenchmarks:
|
|
"""Benchmark voice profile similarity and merge operations."""
|
|
|
|
def test_cosine_similarity(
|
|
self,
|
|
benchmark: BenchmarkFixture,
|
|
voice_embedding_pair: tuple[list[float], list[float]],
|
|
) -> None:
|
|
"""Benchmark cosine similarity for voice profile embeddings."""
|
|
existing, new = voice_embedding_pair
|
|
result = typed_benchmark(benchmark, float, cosine_similarity, existing, new)
|
|
assert 0.0 <= result <= 1.0, "Similarity should be normalized"
|
|
assert result > 0.8, "Similar embeddings should yield high similarity"
|
|
|
|
def test_merge_embeddings(
|
|
self,
|
|
benchmark: BenchmarkFixture,
|
|
voice_embedding_merge_inputs: tuple[list[float], list[float], int],
|
|
) -> None:
|
|
"""Benchmark merge_embeddings for voice profile updates."""
|
|
existing, new, count = voice_embedding_merge_inputs
|
|
result = benchmark_float_list(benchmark, merge_embeddings, existing, new, count)
|
|
assert len(result) == EMBEDDING_DIM, "Merged embedding should preserve dimension"
|
|
norm = float(np.linalg.norm(np.array(result, dtype=np.float32)))
|
|
assert 0.99 <= norm <= 1.01, "Merged embedding should remain normalized"
|