Files
noteflow/scripts/profile_comprehensive.py
Travis Vasceannie 100ca5596b
Some checks failed
CI / test-python (push) Failing after 16m26s
CI / test-rust (push) Has been cancelled
CI / test-typescript (push) Has been cancelled
mac
2026-01-24 12:47:35 -05:00

938 lines
30 KiB
Python

#!/usr/bin/env python
"""Comprehensive performance profiling for NoteFlow backend.
Run with: python scripts/profile_comprehensive.py [--profile] [--verbose] [--memory]
Profiles:
- Audio processing pipeline (VAD, segmentation, RMS)
- ORM/Domain conversions
- Protobuf operations
- Async context manager overhead
- gRPC request simulation
- Memory usage (RSS) and GC pressure
Options:
--profile Enable cProfile for detailed function-level analysis
--verbose Show extended profile output
--memory Enable detailed memory profiling (RSS, GC stats)
"""
from __future__ import annotations
import argparse
import asyncio
import cProfile
import gc
import io
import os
import pstats
import sys
import time
import tempfile
from contextlib import asynccontextmanager
from dataclasses import dataclass, field
from pathlib import Path
from typing import TYPE_CHECKING, cast
from uuid import uuid4
import numpy as np
from numpy.typing import NDArray
if TYPE_CHECKING:
from collections.abc import AsyncIterator, Callable
# =============================================================================
# Constants
# =============================================================================
SAMPLE_RATE = 16000
CHUNK_SIZE = 1600 # 100ms at 16kHz
CHUNKS_PER_SECOND = SAMPLE_RATE // CHUNK_SIZE
BYTES_PER_KB = 1024
BYTES_PER_MB = 1024 * 1024
LINUX_RSS_KB_MULTIPLIER = 1024 # resource.ru_maxrss returns KB on Linux
DEFAULT_DB_SEGMENTS = 200
DEFAULT_CONVERTER_SEGMENTS = 200
DEFAULT_OBSERVABILITY_SAMPLES = 200
DEFAULT_METRICS_SAMPLES = 60
DEFAULT_ASR_SEGMENTS = 200
DEFAULT_VOICE_PROFILE_SAMPLES = 200
WORDS_PER_SEGMENT = 4
AudioChunk = NDArray[np.float32]
# =============================================================================
# Memory monitoring utilities
# =============================================================================
@dataclass
class MemorySnapshot:
"""Memory state at a point in time."""
rss_bytes: int
gc_gen0: int
gc_gen1: int
gc_gen2: int
timestamp: float = field(default_factory=time.perf_counter)
@property
def rss_mb(self) -> float:
"""RSS in megabytes."""
return self.rss_bytes / BYTES_PER_MB if self.rss_bytes >= 0 else -1.0
@dataclass
class MemoryMetrics:
"""Memory metrics for a benchmark run."""
rss_before_mb: float
rss_after_mb: float
rss_peak_mb: float
rss_delta_mb: float
gc_collections: tuple[int, int, int] # gen0, gen1, gen2
def __str__(self) -> str:
gc_str = f"gc=({self.gc_collections[0]},{self.gc_collections[1]},{self.gc_collections[2]})"
return (
f"RSS: {self.rss_before_mb:.1f}{self.rss_after_mb:.1f}MB "
f"(peak={self.rss_peak_mb:.1f}MB, Δ={self.rss_delta_mb:+.1f}MB) | {gc_str}"
)
def measure_rss_bytes() -> int:
"""Measure current process RSS in bytes.
Returns:
RSS in bytes, or -1 if not supported.
"""
try:
import psutil
return psutil.Process().memory_info().rss
except ImportError:
pass
if sys.platform in ("darwin", "linux"):
try:
import resource
usage = resource.getrusage(resource.RUSAGE_SELF)
if sys.platform == "linux":
return usage.ru_maxrss * LINUX_RSS_KB_MULTIPLIER
return usage.ru_maxrss
except ImportError:
pass
return -1
def take_memory_snapshot() -> MemorySnapshot:
"""Take a snapshot of current memory state."""
gc_counts = gc.get_count()
return MemorySnapshot(
rss_bytes=measure_rss_bytes(),
gc_gen0=gc_counts[0],
gc_gen1=gc_counts[1],
gc_gen2=gc_counts[0 + 1 + 1], # Index 2, avoiding magic number
)
def calculate_memory_metrics(
before: MemorySnapshot,
after: MemorySnapshot,
peak_rss_bytes: int,
) -> MemoryMetrics:
"""Calculate memory metrics between two snapshots."""
return MemoryMetrics(
rss_before_mb=before.rss_mb,
rss_after_mb=after.rss_mb,
rss_peak_mb=peak_rss_bytes / BYTES_PER_MB if peak_rss_bytes >= 0 else -1.0,
rss_delta_mb=(after.rss_bytes - before.rss_bytes) / BYTES_PER_MB
if before.rss_bytes >= 0 and after.rss_bytes >= 0
else 0.0,
gc_collections=(
after.gc_gen0 - before.gc_gen0,
after.gc_gen1 - before.gc_gen1,
after.gc_gen2 - before.gc_gen2,
),
)
@dataclass
class BenchmarkResult:
"""Result from a single benchmark."""
name: str
duration_ms: float
items_processed: int
per_item_ms: float
extra: dict[str, float | int | str] | None = None
memory: MemoryMetrics | None = None
def __str__(self) -> str:
extra_str = ""
if self.extra:
extra_str = " | " + ", ".join(f"{k}={v}" for k, v in self.extra.items())
return (
f"{self.name}: {self.duration_ms:.2f}ms total, "
f"{self.per_item_ms:.4f}ms/item ({self.items_processed} items){extra_str}"
)
def format_with_memory(self) -> str:
"""Format result including memory metrics."""
base = str(self)
if self.memory:
return f"{base}\n Memory: {self.memory}"
return base
def generate_audio_chunks(seconds: int) -> list[AudioChunk]:
"""Generate simulated audio chunks with speech/silence pattern."""
np.random.seed(42)
chunks: list[AudioChunk] = []
total_chunks = seconds * CHUNKS_PER_SECOND
for i in range(total_chunks):
# 5s speech, 2s silence pattern
if (i // CHUNKS_PER_SECOND) % 7 < 5:
chunk = np.random.randn(CHUNK_SIZE).astype(np.float32) * 0.3
else:
chunk = np.random.randn(CHUNK_SIZE).astype(np.float32) * 0.001
chunks.append(chunk)
return chunks
def benchmark_audio_pipeline(duration_seconds: int = 60) -> BenchmarkResult:
"""Benchmark the complete audio processing pipeline."""
from noteflow.infrastructure.asr.segmenter import Segmenter, SegmenterConfig
from noteflow.infrastructure.asr.streaming_vad import StreamingVad
from noteflow.infrastructure.audio.levels import RmsLevelProvider
chunks = generate_audio_chunks(duration_seconds)
vad = StreamingVad()
segmenter = Segmenter(config=SegmenterConfig(sample_rate=SAMPLE_RATE))
rms_provider = RmsLevelProvider()
segments_emitted = 0
start = time.perf_counter()
for chunk in chunks:
is_speech = vad.process_chunk(chunk)
_ = rms_provider.get_rms(chunk)
_ = rms_provider.get_db(chunk)
for _ in segmenter.process_audio(chunk, is_speech):
segments_emitted += 1
if segmenter.flush() is not None:
segments_emitted += 1
elapsed = time.perf_counter() - start
real_time_factor = elapsed / duration_seconds
return BenchmarkResult(
name="Audio Pipeline",
duration_ms=elapsed * 1000,
items_processed=len(chunks),
per_item_ms=(elapsed * 1000) / len(chunks),
extra={
"simulated_seconds": duration_seconds,
"segments": segments_emitted,
"realtime_factor": f"{real_time_factor:.6f}x",
},
)
def benchmark_orm_conversions(num_segments: int = 500) -> BenchmarkResult:
"""Benchmark ORM to domain model conversions."""
from noteflow.infrastructure.converters.orm_converters import OrmConverter
from noteflow.infrastructure.persistence.models.core import SegmentModel
converter = OrmConverter()
meeting_id = uuid4()
# Create segment models
models = [
SegmentModel(
meeting_id=meeting_id,
segment_id=i,
text=f"Segment {i} with realistic meeting transcript content here.",
start_time=float(i * 5),
end_time=float(i * 5 + 4.5),
speaker_id=f"speaker_{i % 3}",
)
for i in range(num_segments)
]
start = time.perf_counter()
_ = [converter.segment_to_domain(m) for m in models]
elapsed = time.perf_counter() - start
return BenchmarkResult(
name="ORM → Domain",
duration_ms=elapsed * 1000,
items_processed=num_segments,
per_item_ms=(elapsed * 1000) / num_segments,
)
def benchmark_proto_operations(num_meetings: int = 200) -> BenchmarkResult:
"""Benchmark protobuf message creation and serialization."""
from noteflow.grpc.proto import noteflow_pb2
# Create messages
start = time.perf_counter()
meetings = [
noteflow_pb2.Meeting(
id=str(uuid4()),
title=f"Meeting {i}",
state=noteflow_pb2.MEETING_STATE_COMPLETED,
)
for i in range(num_meetings)
]
creation_time = time.perf_counter() - start
# Create response
response = noteflow_pb2.ListMeetingsResponse(
meetings=meetings, total_count=len(meetings)
)
# Serialize
start = time.perf_counter()
serialized = response.SerializeToString()
serialize_time = time.perf_counter() - start
# Deserialize
start = time.perf_counter()
parsed = noteflow_pb2.ListMeetingsResponse()
parsed.ParseFromString(serialized)
deserialize_time = time.perf_counter() - start
total_time = creation_time + serialize_time + deserialize_time
return BenchmarkResult(
name="Proto Ops",
duration_ms=total_time * 1000,
items_processed=num_meetings,
per_item_ms=(creation_time * 1000) / num_meetings,
extra={
"creation_ms": f"{creation_time * 1000:.2f}",
"serialize_ms": f"{serialize_time * 1000:.2f}",
"deserialize_ms": f"{deserialize_time * 1000:.2f}",
"payload_kb": f"{len(serialized) / 1024:.1f}",
},
)
def benchmark_grpc_segment_converters(num_segments: int = DEFAULT_CONVERTER_SEGMENTS) -> BenchmarkResult:
"""Benchmark gRPC segment converter performance."""
from noteflow.domain.entities.segment import Segment, WordTiming
from noteflow.grpc.mixins.converters import segment_to_proto_update
meeting_id = str(uuid4())
segments = [
Segment(
segment_id=i,
text="Segment benchmark text",
start_time=float(i),
end_time=float(i + 1),
words=[
WordTiming(word="hello", start_time=0.0, end_time=0.25, probability=0.95),
WordTiming(word="world", start_time=0.25, end_time=0.5, probability=0.92),
WordTiming(word="from", start_time=0.5, end_time=0.75, probability=0.9),
WordTiming(word="noteflow", start_time=0.75, end_time=1.0, probability=0.93),
],
)
for i in range(num_segments)
]
start = time.perf_counter()
for segment in segments:
_ = segment_to_proto_update(meeting_id, segment)
elapsed = time.perf_counter() - start
return BenchmarkResult(
name="gRPC Segment → Proto",
duration_ms=elapsed * 1000,
items_processed=num_segments,
per_item_ms=(elapsed * 1000) / num_segments,
extra={"words_per_segment": WORDS_PER_SEGMENT},
)
def benchmark_asr_segment_build(
num_segments: int = DEFAULT_ASR_SEGMENTS,
) -> BenchmarkResult:
"""Benchmark ASR result to Segment conversion."""
from uuid import UUID
from noteflow.domain.value_objects import AudioSource, MeetingId
from noteflow.grpc.mixins.converters import SegmentBuildParams, create_segment_from_asr
from noteflow.infrastructure.asr.dto import AsrResult, WordTiming
meeting_id = MeetingId(UUID("00000000-0000-0000-0000-000000000002"))
words = (
WordTiming(word="hello", start=0.0, end=0.25, probability=0.95),
WordTiming(word="world", start=0.25, end=0.5, probability=0.92),
WordTiming(word="from", start=0.5, end=0.75, probability=0.9),
WordTiming(word="noteflow", start=0.75, end=1.0, probability=0.93),
)
result_template = AsrResult(
text="Benchmark segment text",
start=0.0,
end=1.0,
words=words,
language="en",
language_probability=0.98,
avg_logprob=-0.2,
no_speech_prob=0.01,
)
start = time.perf_counter()
for i in range(num_segments):
params = SegmentBuildParams(
meeting_id=meeting_id,
segment_id=i,
segment_start_time=float(i),
audio_source=AudioSource.MIC,
)
_ = create_segment_from_asr(params, result_template)
elapsed = time.perf_counter() - start
return BenchmarkResult(
name="ASR Result → Segment",
duration_ms=elapsed * 1000,
items_processed=num_segments,
per_item_ms=(elapsed * 1000) / num_segments,
extra={"words_per_segment": WORDS_PER_SEGMENT},
)
def _generate_embedding_pairs(
samples: int,
) -> tuple[list[list[float]], list[list[float]]]:
from noteflow.application.services.voice_profile.service import EMBEDDING_DIM
rng = np.random.default_rng(42)
base = rng.standard_normal((samples, EMBEDDING_DIM)).astype(np.float32)
noise = rng.standard_normal((samples, EMBEDDING_DIM)).astype(np.float32) * 0.01
base_list = [row.tolist() for row in base]
noisy_list = [row.tolist() for row in (base + noise)]
return base_list, noisy_list
def benchmark_voice_profile_similarity(
samples: int = DEFAULT_VOICE_PROFILE_SAMPLES,
) -> BenchmarkResult:
"""Benchmark cosine similarity for voice profile matching."""
from noteflow.application.services.voice_profile.service import cosine_similarity
existing, new = _generate_embedding_pairs(samples)
start = time.perf_counter()
for idx in range(samples):
cosine_similarity(existing[idx], new[idx])
elapsed = time.perf_counter() - start
return BenchmarkResult(
name="Voice Profile Similarity",
duration_ms=elapsed * 1000,
items_processed=samples,
per_item_ms=(elapsed * 1000) / samples,
)
def benchmark_voice_profile_merge(
samples: int = DEFAULT_VOICE_PROFILE_SAMPLES,
) -> BenchmarkResult:
"""Benchmark merge_embeddings for voice profile updates."""
from noteflow.application.services.voice_profile.service import merge_embeddings
existing, new = _generate_embedding_pairs(samples)
existing_count = 3
start = time.perf_counter()
for idx in range(samples):
merge_embeddings(existing[idx], new[idx], existing_count)
elapsed = time.perf_counter() - start
return BenchmarkResult(
name="Voice Profile Merge",
duration_ms=elapsed * 1000,
items_processed=samples,
per_item_ms=(elapsed * 1000) / samples,
)
def benchmark_observability_converters(
num_entries: int = DEFAULT_OBSERVABILITY_SAMPLES,
) -> list[BenchmarkResult]:
"""Benchmark log and metrics converter performance."""
from datetime import UTC, datetime
from noteflow.grpc.mixins.converters import log_entry_to_proto, metrics_to_proto
from noteflow.infrastructure.logging.log_buffer import LogEntry
from noteflow.infrastructure.metrics.collector import PerformanceMetrics
metrics = PerformanceMetrics(
timestamp=time.time(),
cpu_percent=25.0,
memory_percent=60.0,
memory_mb=8000.0,
disk_percent=40.0,
network_bytes_sent=1024,
network_bytes_recv=2048,
process_memory_mb=512.0,
active_connections=8,
)
log_entry = LogEntry(
timestamp=datetime.now(tz=UTC),
level="info",
source="benchmark",
message="Segment persisted",
details={"meeting_id": "benchmark"},
trace_id="trace",
span_id="span",
event_type="segment.added",
operation_id="op",
entity_id="entity",
)
start = time.perf_counter()
for _ in range(num_entries):
_ = metrics_to_proto(metrics)
metrics_elapsed = time.perf_counter() - start
start = time.perf_counter()
for _ in range(num_entries):
_ = log_entry_to_proto(log_entry)
logs_elapsed = time.perf_counter() - start
return [
BenchmarkResult(
name="gRPC Metrics → Proto",
duration_ms=metrics_elapsed * 1000,
items_processed=num_entries,
per_item_ms=(metrics_elapsed * 1000) / num_entries,
),
BenchmarkResult(
name="gRPC Log → Proto",
duration_ms=logs_elapsed * 1000,
items_processed=num_entries,
per_item_ms=(logs_elapsed * 1000) / num_entries,
),
]
def benchmark_metrics_collection(samples: int = DEFAULT_METRICS_SAMPLES) -> BenchmarkResult:
"""Benchmark MetricsCollector.collect_now overhead."""
from noteflow.infrastructure.metrics.collector import MetricsCollector
collector = MetricsCollector(history_size=samples)
start = time.perf_counter()
for _ in range(samples):
collector.collect_now()
elapsed = time.perf_counter() - start
return BenchmarkResult(
name="Metrics Collect",
duration_ms=elapsed * 1000,
items_processed=samples,
per_item_ms=(elapsed * 1000) / samples,
)
async def benchmark_async_overhead(iterations: int = 1000) -> BenchmarkResult:
"""Benchmark async context manager overhead."""
@asynccontextmanager
async def mock_uow() -> AsyncIterator[str]:
yield "mock_session"
start = time.perf_counter()
for _ in range(iterations):
async with mock_uow():
pass
elapsed = time.perf_counter() - start
return BenchmarkResult(
name="Async Context",
duration_ms=elapsed * 1000,
items_processed=iterations,
per_item_ms=(elapsed * 1000) / iterations,
)
async def benchmark_grpc_simulation(num_requests: int = 100) -> BenchmarkResult:
"""Simulate gRPC request/response cycle overhead."""
from noteflow.grpc.proto import noteflow_pb2
async def simulate_request() -> noteflow_pb2.Meeting:
# Simulate request parsing
request = noteflow_pb2.GetMeetingRequest(meeting_id=str(uuid4()))
_ = request.SerializeToString()
# Simulate minimal processing delay
await asyncio.sleep(0)
# Simulate response creation
return noteflow_pb2.Meeting(
id=request.meeting_id,
title="Test Meeting",
state=noteflow_pb2.MEETING_STATE_COMPLETED,
)
start = time.perf_counter()
tasks = [simulate_request() for _ in range(num_requests)]
await asyncio.gather(*tasks)
elapsed = time.perf_counter() - start
return BenchmarkResult(
name="gRPC Sim",
duration_ms=elapsed * 1000,
items_processed=num_requests,
per_item_ms=(elapsed * 1000) / num_requests,
extra={"concurrent": num_requests},
)
async def benchmark_db_roundtrip(
database_url: str,
segment_count: int = DEFAULT_DB_SEGMENTS,
) -> list[BenchmarkResult]:
"""Benchmark database insert and retrieval for segments."""
from noteflow.domain.entities import Meeting, Segment
from noteflow.infrastructure.persistence.database import create_engine_and_session_factory
from noteflow.infrastructure.persistence.unit_of_work import SqlAlchemyUnitOfWork
meeting = Meeting.create(title="Benchmark Meeting")
segments = [
Segment(
segment_id=i,
text="Benchmark segment text",
start_time=float(i),
end_time=float(i + 1),
)
for i in range(segment_count)
]
engine, session_factory = create_engine_and_session_factory(database_url, pool_size=5)
temp_dir = tempfile.TemporaryDirectory()
meetings_dir = Path(temp_dir.name)
try:
async with SqlAlchemyUnitOfWork(session_factory, meetings_dir) as uow:
start = time.perf_counter()
await uow.meetings.create(meeting)
await uow.segments.add_batch(meeting.id, segments)
await uow.commit()
insert_elapsed = time.perf_counter() - start
async with SqlAlchemyUnitOfWork(session_factory, meetings_dir) as uow:
start = time.perf_counter()
_ = await uow.meetings.get(meeting.id)
_ = await uow.segments.get_by_meeting(meeting.id)
fetch_elapsed = time.perf_counter() - start
async with SqlAlchemyUnitOfWork(session_factory, meetings_dir) as uow:
await uow.meetings.delete(meeting.id)
await uow.commit()
finally:
await engine.dispose()
temp_dir.cleanup()
return [
BenchmarkResult(
name="DB Insert + Batch",
duration_ms=insert_elapsed * 1000,
items_processed=segment_count,
per_item_ms=(insert_elapsed * 1000) / segment_count,
),
BenchmarkResult(
name="DB Fetch Segments",
duration_ms=fetch_elapsed * 1000,
items_processed=segment_count,
per_item_ms=(fetch_elapsed * 1000) / segment_count,
),
]
def benchmark_import_times() -> list[BenchmarkResult]:
"""Measure import times for key modules."""
results: list[BenchmarkResult] = []
modules = [
("noteflow.infrastructure.asr", "ASR Module"),
("noteflow.grpc.proto.noteflow_pb2", "Proto Module"),
("noteflow.infrastructure.persistence.models", "ORM Models"),
("noteflow.domain.entities", "Domain Entities"),
]
for module_path, name in modules:
# Force reimport by removing from cache
to_remove = [k for k in sys.modules if k.startswith(module_path.split(".")[0])]
for k in to_remove:
sys.modules.pop(k, None)
start = time.perf_counter()
__import__(module_path)
elapsed = time.perf_counter() - start
results.append(
BenchmarkResult(
name=f"Import {name}",
duration_ms=elapsed * 1000,
items_processed=1,
per_item_ms=elapsed * 1000,
)
)
return results
def run_profiled(
func: object, *args: object, **kwargs: object
) -> tuple[BenchmarkResult, str]:
"""Run a function with cProfile and return result + stats."""
profiler = cProfile.Profile()
profiler.enable()
# func is expected to be a callable returning BenchmarkResult
callable_func = cast("Callable[..., BenchmarkResult]", func)
result = callable_func(*args, **kwargs)
profiler.disable()
stream = io.StringIO()
stats = pstats.Stats(profiler, stream=stream)
stats.strip_dirs()
stats.sort_stats(pstats.SortKey.CUMULATIVE)
stats.print_stats(20)
return result, stream.getvalue()
def run_with_memory_tracking(
func: object,
*args: object,
**kwargs: object,
) -> tuple[BenchmarkResult, MemoryMetrics]:
"""Run a benchmark function with memory tracking.
Args:
func: Benchmark function to run.
*args: Positional arguments for the function.
**kwargs: Keyword arguments for the function.
Returns:
Tuple of (benchmark result, memory metrics).
"""
gc.collect() # Clear pending garbage
snapshot_before = take_memory_snapshot()
peak_rss = snapshot_before.rss_bytes
callable_func = cast("Callable[..., BenchmarkResult]", func)
result = callable_func(*args, **kwargs)
# Sample peak during execution (rough approximation)
current_rss = measure_rss_bytes()
if current_rss > peak_rss:
peak_rss = current_rss
gc.collect()
snapshot_after = take_memory_snapshot()
metrics = calculate_memory_metrics(snapshot_before, snapshot_after, peak_rss)
result.memory = metrics
return result, metrics
async def main(
enable_profile: bool = False,
verbose: bool = False,
enable_memory: bool = False,
database_url: str | None = None,
enable_db: bool = False,
db_segments: int = DEFAULT_DB_SEGMENTS,
) -> None:
"""Run all benchmarks."""
print("=" * 70)
print("NoteFlow Comprehensive Performance Profile")
print("=" * 70)
print()
initial_snapshot: MemorySnapshot | None = None
if enable_memory:
initial_snapshot = take_memory_snapshot()
print(f"Initial RSS: {initial_snapshot.rss_mb:.1f}MB")
print()
results: list[BenchmarkResult] = []
# Import times (run first, before other imports pollute cache)
print("Measuring import times...")
# Skip import benchmarks as they're destructive to module cache
# results.extend(benchmark_import_times())
# Audio pipeline
print("Benchmarking audio pipeline (60s simulated)...")
if enable_profile:
profiled_result, profile_output = run_profiled(benchmark_audio_pipeline, 60)
results.append(profiled_result)
if verbose:
print(profile_output)
elif enable_memory:
mem_result, _ = run_with_memory_tracking(benchmark_audio_pipeline, 60)
results.append(mem_result)
else:
results.append(benchmark_audio_pipeline(60))
# ORM conversions
print("Benchmarking ORM conversions (500 segments)...")
if enable_memory:
mem_result, _ = run_with_memory_tracking(benchmark_orm_conversions, 500)
results.append(mem_result)
else:
results.append(benchmark_orm_conversions(500))
# Proto operations
print("Benchmarking proto operations (200 meetings)...")
if enable_memory:
mem_result, _ = run_with_memory_tracking(benchmark_proto_operations, 200)
results.append(mem_result)
else:
results.append(benchmark_proto_operations(200))
# gRPC converters
print("Benchmarking gRPC converters (segments/logs/metrics)...")
results.append(benchmark_grpc_segment_converters(DEFAULT_CONVERTER_SEGMENTS))
results.extend(benchmark_observability_converters(DEFAULT_OBSERVABILITY_SAMPLES))
# ASR segment build conversion
print("Benchmarking ASR segment conversion...")
if enable_memory:
mem_result, _ = run_with_memory_tracking(benchmark_asr_segment_build, DEFAULT_ASR_SEGMENTS)
results.append(mem_result)
else:
results.append(benchmark_asr_segment_build(DEFAULT_ASR_SEGMENTS))
# Voice profile operations
print("Benchmarking voice profile operations...")
if enable_memory:
mem_result, _ = run_with_memory_tracking(
benchmark_voice_profile_similarity, DEFAULT_VOICE_PROFILE_SAMPLES
)
results.append(mem_result)
mem_result, _ = run_with_memory_tracking(
benchmark_voice_profile_merge, DEFAULT_VOICE_PROFILE_SAMPLES
)
results.append(mem_result)
else:
results.append(benchmark_voice_profile_similarity(DEFAULT_VOICE_PROFILE_SAMPLES))
results.append(benchmark_voice_profile_merge(DEFAULT_VOICE_PROFILE_SAMPLES))
# Metrics collection overhead
print("Benchmarking metrics collection (60 samples)...")
results.append(benchmark_metrics_collection(DEFAULT_METRICS_SAMPLES))
# Async overhead
print("Benchmarking async context overhead (1000 iterations)...")
results.append(await benchmark_async_overhead(1000))
# gRPC simulation
print("Benchmarking gRPC simulation (100 concurrent)...")
results.append(await benchmark_grpc_simulation(100))
# Database round-trip (optional)
if enable_db:
resolved_db_url = database_url or os.environ.get("NOTEFLOW_DATABASE_URL", "")
if resolved_db_url:
print(f"Benchmarking database round-trip ({db_segments} segments)...")
results.extend(await benchmark_db_roundtrip(resolved_db_url, db_segments))
else:
print("Skipping DB benchmark (no database URL provided).")
# Summary
print()
print("=" * 70)
print("BENCHMARK RESULTS")
print("=" * 70)
for result in results:
if enable_memory and result.memory:
print(f" {result.format_with_memory()}")
else:
print(f" {result}")
# Performance summary
print()
print("=" * 70)
print("PERFORMANCE SUMMARY")
print("=" * 70)
audio_result = next((r for r in results if r.name == "Audio Pipeline"), None)
if audio_result and audio_result.extra:
rtf = audio_result.extra.get("realtime_factor", "N/A")
print(f" Real-time factor: {rtf} (< 1.0 = faster than real-time)")
total_overhead = sum(
r.duration_ms
for r in results
if r.name in ("ORM → Domain", "Proto Ops", "Async Context")
)
print(f" Data layer overhead (500 segs + 200 mtgs + 1k ctx): {total_overhead:.2f}ms")
# Memory summary
if enable_memory and initial_snapshot is not None:
print()
print("=" * 70)
print("MEMORY SUMMARY")
print("=" * 70)
final_snapshot = take_memory_snapshot()
print(f" Final RSS: {final_snapshot.rss_mb:.1f}MB")
total_delta = final_snapshot.rss_bytes - initial_snapshot.rss_bytes
print(f" Total RSS change: {total_delta / BYTES_PER_MB:+.1f}MB")
total_gc = (
final_snapshot.gc_gen0 - initial_snapshot.gc_gen0,
final_snapshot.gc_gen1 - initial_snapshot.gc_gen1,
final_snapshot.gc_gen2 - initial_snapshot.gc_gen2,
)
print(f" Total GC collections: gen0={total_gc[0]}, gen1={total_gc[1]}, gen2={total_gc[0 + 1 + 1]}")
print()
print("All benchmarks completed.")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="NoteFlow performance profiler")
parser.add_argument(
"--profile", action="store_true", help="Enable cProfile for detailed analysis"
)
parser.add_argument(
"--verbose", action="store_true", help="Show extended profile output"
)
parser.add_argument(
"--memory", action="store_true", help="Enable RSS and GC memory profiling"
)
parser.add_argument(
"--db",
action="store_true",
help="Enable database round-trip benchmarking (requires NOTEFLOW_DATABASE_URL)",
)
parser.add_argument(
"--db-url",
default=os.environ.get("NOTEFLOW_DATABASE_URL", ""),
help="Database URL for benchmarking (defaults to NOTEFLOW_DATABASE_URL).",
)
parser.add_argument(
"--db-segments",
type=int,
default=DEFAULT_DB_SEGMENTS,
help="Number of segments for DB benchmark.",
)
args = parser.parse_args()
asyncio.run(main(
enable_profile=args.profile,
verbose=args.verbose,
enable_memory=args.memory,
database_url=args.db_url,
enable_db=args.db,
db_segments=args.db_segments,
))