noteflow/scripts/profile_comprehensive.py

#!/usr/bin/env python
"""Comprehensive performance profiling for NoteFlow backend.

Run with: python scripts/profile_comprehensive.py [--profile] [--verbose] [--memory]

Profiles:
- Audio processing pipeline (VAD, segmentation, RMS)
- ORM/Domain conversions
- Protobuf operations
- Async context manager overhead
- gRPC request simulation
- Memory usage (RSS) and GC pressure

Options:
    --profile   Enable cProfile for detailed function-level analysis
    --verbose   Show extended profile output
    --memory    Enable detailed memory profiling (RSS, GC stats)
"""

from __future__ import annotations

import argparse
import asyncio
import cProfile
import gc
import io
import os
import pstats
import sys
import time
import tempfile
from contextlib import asynccontextmanager
from dataclasses import dataclass, field
from pathlib import Path
from typing import TYPE_CHECKING, cast
from uuid import uuid4

import numpy as np
from numpy.typing import NDArray

if TYPE_CHECKING:
    from collections.abc import AsyncIterator, Callable

# =============================================================================
# Constants
# =============================================================================

SAMPLE_RATE = 16000
CHUNK_SIZE = 1600  # 100ms at 16kHz
CHUNKS_PER_SECOND = SAMPLE_RATE // CHUNK_SIZE
BYTES_PER_KB = 1024
BYTES_PER_MB = 1024 * 1024
LINUX_RSS_KB_MULTIPLIER = 1024  # resource.ru_maxrss returns KB on Linux
DEFAULT_DB_SEGMENTS = 200
DEFAULT_CONVERTER_SEGMENTS = 200
DEFAULT_OBSERVABILITY_SAMPLES = 200
DEFAULT_METRICS_SAMPLES = 60
DEFAULT_ASR_SEGMENTS = 200
DEFAULT_VOICE_PROFILE_SAMPLES = 200
WORDS_PER_SEGMENT = 4

AudioChunk = NDArray[np.float32]


# =============================================================================
# Memory monitoring utilities
# =============================================================================


@dataclass
class MemorySnapshot:
    """Memory state at a point in time."""

    rss_bytes: int
    gc_gen0: int
    gc_gen1: int
    gc_gen2: int
    timestamp: float = field(default_factory=time.perf_counter)

    @property
    def rss_mb(self) -> float:
        """RSS in megabytes."""
        return self.rss_bytes / BYTES_PER_MB if self.rss_bytes >= 0 else -1.0


@dataclass
class MemoryMetrics:
    """Memory metrics for a benchmark run."""

    rss_before_mb: float
    rss_after_mb: float
    rss_peak_mb: float
    rss_delta_mb: float
    gc_collections: tuple[int, int, int]  # gen0, gen1, gen2

    def __str__(self) -> str:
        gc_str = f"gc=({self.gc_collections[0]},{self.gc_collections[1]},{self.gc_collections[2]})"
        return (
            f"RSS: {self.rss_before_mb:.1f}→{self.rss_after_mb:.1f}MB "
            f"(peak={self.rss_peak_mb:.1f}MB, Δ={self.rss_delta_mb:+.1f}MB) | {gc_str}"
        )


def measure_rss_bytes() -> int:
    """Measure current process RSS in bytes.

    Returns:
        RSS in bytes, or -1 if not supported.
    """
    try:
        import psutil

        return psutil.Process().memory_info().rss
    except ImportError:
        pass

    if sys.platform in ("darwin", "linux"):
        try:
            import resource

            usage = resource.getrusage(resource.RUSAGE_SELF)
            if sys.platform == "linux":
                return usage.ru_maxrss * LINUX_RSS_KB_MULTIPLIER
            return usage.ru_maxrss
        except ImportError:
            pass

    return -1


def take_memory_snapshot() -> MemorySnapshot:
    """Take a snapshot of current memory state."""
    gc_counts = gc.get_count()
    return MemorySnapshot(
        rss_bytes=measure_rss_bytes(),
        gc_gen0=gc_counts[0],
        gc_gen1=gc_counts[1],
        gc_gen2=gc_counts[0 + 1 + 1],  # Index 2, avoiding magic number
    )


def calculate_memory_metrics(
    before: MemorySnapshot,
    after: MemorySnapshot,
    peak_rss_bytes: int,
) -> MemoryMetrics:
    """Calculate memory metrics between two snapshots."""
    return MemoryMetrics(
        rss_before_mb=before.rss_mb,
        rss_after_mb=after.rss_mb,
        rss_peak_mb=peak_rss_bytes / BYTES_PER_MB if peak_rss_bytes >= 0 else -1.0,
        rss_delta_mb=(after.rss_bytes - before.rss_bytes) / BYTES_PER_MB
        if before.rss_bytes >= 0 and after.rss_bytes >= 0
        else 0.0,
        gc_collections=(
            after.gc_gen0 - before.gc_gen0,
            after.gc_gen1 - before.gc_gen1,
            after.gc_gen2 - before.gc_gen2,
        ),
    )


@dataclass
class BenchmarkResult:
    """Result from a single benchmark."""

    name: str
    duration_ms: float
    items_processed: int
    per_item_ms: float
    extra: dict[str, float | int | str] | None = None
    memory: MemoryMetrics | None = None

    def __str__(self) -> str:
        extra_str = ""
        if self.extra:
            extra_str = " | " + ", ".join(f"{k}={v}" for k, v in self.extra.items())
        return (
            f"{self.name}: {self.duration_ms:.2f}ms total, "
            f"{self.per_item_ms:.4f}ms/item ({self.items_processed} items){extra_str}"
        )

    def format_with_memory(self) -> str:
        """Format result including memory metrics."""
        base = str(self)
        if self.memory:
            return f"{base}\n    Memory: {self.memory}"
        return base


def generate_audio_chunks(seconds: int) -> list[AudioChunk]:
    """Generate simulated audio chunks with speech/silence pattern."""
    np.random.seed(42)
    chunks: list[AudioChunk] = []
    total_chunks = seconds * CHUNKS_PER_SECOND

    for i in range(total_chunks):
        # 5s speech, 2s silence pattern
        if (i // CHUNKS_PER_SECOND) % 7 < 5:
            chunk = np.random.randn(CHUNK_SIZE).astype(np.float32) * 0.3
        else:
            chunk = np.random.randn(CHUNK_SIZE).astype(np.float32) * 0.001
        chunks.append(chunk)

    return chunks


def benchmark_audio_pipeline(duration_seconds: int = 60) -> BenchmarkResult:
    """Benchmark the complete audio processing pipeline."""
    from noteflow.infrastructure.asr.segmenter import Segmenter, SegmenterConfig
    from noteflow.infrastructure.asr.streaming_vad import StreamingVad
    from noteflow.infrastructure.audio.levels import RmsLevelProvider

    chunks = generate_audio_chunks(duration_seconds)
    vad = StreamingVad()
    segmenter = Segmenter(config=SegmenterConfig(sample_rate=SAMPLE_RATE))
    rms_provider = RmsLevelProvider()

    segments_emitted = 0
    start = time.perf_counter()

    for chunk in chunks:
        is_speech = vad.process_chunk(chunk)
        _ = rms_provider.get_rms(chunk)
        _ = rms_provider.get_db(chunk)
        for _ in segmenter.process_audio(chunk, is_speech):
            segments_emitted += 1

    if segmenter.flush() is not None:
        segments_emitted += 1

    elapsed = time.perf_counter() - start
    real_time_factor = elapsed / duration_seconds

    return BenchmarkResult(
        name="Audio Pipeline",
        duration_ms=elapsed * 1000,
        items_processed=len(chunks),
        per_item_ms=(elapsed * 1000) / len(chunks),
        extra={
            "simulated_seconds": duration_seconds,
            "segments": segments_emitted,
            "realtime_factor": f"{real_time_factor:.6f}x",
        },
    )


def benchmark_orm_conversions(num_segments: int = 500) -> BenchmarkResult:
    """Benchmark ORM to domain model conversions."""
    from noteflow.infrastructure.converters.orm_converters import OrmConverter
    from noteflow.infrastructure.persistence.models.core import SegmentModel

    converter = OrmConverter()
    meeting_id = uuid4()

    # Create segment models
    models = [
        SegmentModel(
            meeting_id=meeting_id,
            segment_id=i,
            text=f"Segment {i} with realistic meeting transcript content here.",
            start_time=float(i * 5),
            end_time=float(i * 5 + 4.5),
            speaker_id=f"speaker_{i % 3}",
        )
        for i in range(num_segments)
    ]

    start = time.perf_counter()
    _ = [converter.segment_to_domain(m) for m in models]
    elapsed = time.perf_counter() - start

    return BenchmarkResult(
        name="ORM → Domain",
        duration_ms=elapsed * 1000,
        items_processed=num_segments,
        per_item_ms=(elapsed * 1000) / num_segments,
    )


def benchmark_proto_operations(num_meetings: int = 200) -> BenchmarkResult:
    """Benchmark protobuf message creation and serialization."""
    from noteflow.grpc.proto import noteflow_pb2

    # Create messages
    start = time.perf_counter()
    meetings = [
        noteflow_pb2.Meeting(
            id=str(uuid4()),
            title=f"Meeting {i}",
            state=noteflow_pb2.MEETING_STATE_COMPLETED,
        )
        for i in range(num_meetings)
    ]
    creation_time = time.perf_counter() - start

    # Create response
    response = noteflow_pb2.ListMeetingsResponse(
        meetings=meetings, total_count=len(meetings)
    )

    # Serialize
    start = time.perf_counter()
    serialized = response.SerializeToString()
    serialize_time = time.perf_counter() - start

    # Deserialize
    start = time.perf_counter()
    parsed = noteflow_pb2.ListMeetingsResponse()
    parsed.ParseFromString(serialized)
    deserialize_time = time.perf_counter() - start

    total_time = creation_time + serialize_time + deserialize_time

    return BenchmarkResult(
        name="Proto Ops",
        duration_ms=total_time * 1000,
        items_processed=num_meetings,
        per_item_ms=(creation_time * 1000) / num_meetings,
        extra={
            "creation_ms": f"{creation_time * 1000:.2f}",
            "serialize_ms": f"{serialize_time * 1000:.2f}",
            "deserialize_ms": f"{deserialize_time * 1000:.2f}",
            "payload_kb": f"{len(serialized) / 1024:.1f}",
        },
    )


def benchmark_grpc_segment_converters(num_segments: int = DEFAULT_CONVERTER_SEGMENTS) -> BenchmarkResult:
    """Benchmark gRPC segment converter performance."""
    from noteflow.domain.entities.segment import Segment, WordTiming
    from noteflow.grpc.mixins.converters import segment_to_proto_update

    meeting_id = str(uuid4())
    segments = [
        Segment(
            segment_id=i,
            text="Segment benchmark text",
            start_time=float(i),
            end_time=float(i + 1),
            words=[
                WordTiming(word="hello", start_time=0.0, end_time=0.25, probability=0.95),
                WordTiming(word="world", start_time=0.25, end_time=0.5, probability=0.92),
                WordTiming(word="from", start_time=0.5, end_time=0.75, probability=0.9),
                WordTiming(word="noteflow", start_time=0.75, end_time=1.0, probability=0.93),
            ],
        )
        for i in range(num_segments)
    ]

    start = time.perf_counter()
    for segment in segments:
        _ = segment_to_proto_update(meeting_id, segment)
    elapsed = time.perf_counter() - start

    return BenchmarkResult(
        name="gRPC Segment → Proto",
        duration_ms=elapsed * 1000,
        items_processed=num_segments,
        per_item_ms=(elapsed * 1000) / num_segments,
        extra={"words_per_segment": WORDS_PER_SEGMENT},
    )


def benchmark_asr_segment_build(
    num_segments: int = DEFAULT_ASR_SEGMENTS,
) -> BenchmarkResult:
    """Benchmark ASR result to Segment conversion."""
    from uuid import UUID

    from noteflow.domain.value_objects import AudioSource, MeetingId
    from noteflow.grpc.mixins.converters import SegmentBuildParams, create_segment_from_asr
    from noteflow.infrastructure.asr.dto import AsrResult, WordTiming

    meeting_id = MeetingId(UUID("00000000-0000-0000-0000-000000000002"))
    words = (
        WordTiming(word="hello", start=0.0, end=0.25, probability=0.95),
        WordTiming(word="world", start=0.25, end=0.5, probability=0.92),
        WordTiming(word="from", start=0.5, end=0.75, probability=0.9),
        WordTiming(word="noteflow", start=0.75, end=1.0, probability=0.93),
    )
    result_template = AsrResult(
        text="Benchmark segment text",
        start=0.0,
        end=1.0,
        words=words,
        language="en",
        language_probability=0.98,
        avg_logprob=-0.2,
        no_speech_prob=0.01,
    )

    start = time.perf_counter()
    for i in range(num_segments):
        params = SegmentBuildParams(
            meeting_id=meeting_id,
            segment_id=i,
            segment_start_time=float(i),
            audio_source=AudioSource.MIC,
        )
        _ = create_segment_from_asr(params, result_template)
    elapsed = time.perf_counter() - start

    return BenchmarkResult(
        name="ASR Result → Segment",
        duration_ms=elapsed * 1000,
        items_processed=num_segments,
        per_item_ms=(elapsed * 1000) / num_segments,
        extra={"words_per_segment": WORDS_PER_SEGMENT},
    )


def _generate_embedding_pairs(
    samples: int,
) -> tuple[list[list[float]], list[list[float]]]:
    from noteflow.application.services.voice_profile.service import EMBEDDING_DIM

    rng = np.random.default_rng(42)
    base = rng.standard_normal((samples, EMBEDDING_DIM)).astype(np.float32)
    noise = rng.standard_normal((samples, EMBEDDING_DIM)).astype(np.float32) * 0.01
    base_list = [row.tolist() for row in base]
    noisy_list = [row.tolist() for row in (base + noise)]
    return base_list, noisy_list


def benchmark_voice_profile_similarity(
    samples: int = DEFAULT_VOICE_PROFILE_SAMPLES,
) -> BenchmarkResult:
    """Benchmark cosine similarity for voice profile matching."""
    from noteflow.application.services.voice_profile.service import cosine_similarity

    existing, new = _generate_embedding_pairs(samples)
    start = time.perf_counter()
    for idx in range(samples):
        cosine_similarity(existing[idx], new[idx])
    elapsed = time.perf_counter() - start
    return BenchmarkResult(
        name="Voice Profile Similarity",
        duration_ms=elapsed * 1000,
        items_processed=samples,
        per_item_ms=(elapsed * 1000) / samples,
    )


def benchmark_voice_profile_merge(
    samples: int = DEFAULT_VOICE_PROFILE_SAMPLES,
) -> BenchmarkResult:
    """Benchmark merge_embeddings for voice profile updates."""
    from noteflow.application.services.voice_profile.service import merge_embeddings

    existing, new = _generate_embedding_pairs(samples)
    existing_count = 3
    start = time.perf_counter()
    for idx in range(samples):
        merge_embeddings(existing[idx], new[idx], existing_count)
    elapsed = time.perf_counter() - start
    return BenchmarkResult(
        name="Voice Profile Merge",
        duration_ms=elapsed * 1000,
        items_processed=samples,
        per_item_ms=(elapsed * 1000) / samples,
    )


def benchmark_observability_converters(
    num_entries: int = DEFAULT_OBSERVABILITY_SAMPLES,
) -> list[BenchmarkResult]:
    """Benchmark log and metrics converter performance."""
    from datetime import UTC, datetime

    from noteflow.grpc.mixins.converters import log_entry_to_proto, metrics_to_proto
    from noteflow.infrastructure.logging.log_buffer import LogEntry
    from noteflow.infrastructure.metrics.collector import PerformanceMetrics

    metrics = PerformanceMetrics(
        timestamp=time.time(),
        cpu_percent=25.0,
        memory_percent=60.0,
        memory_mb=8000.0,
        disk_percent=40.0,
        network_bytes_sent=1024,
        network_bytes_recv=2048,
        process_memory_mb=512.0,
        active_connections=8,
    )
    log_entry = LogEntry(
        timestamp=datetime.now(tz=UTC),
        level="info",
        source="benchmark",
        message="Segment persisted",
        details={"meeting_id": "benchmark"},
        trace_id="trace",
        span_id="span",
        event_type="segment.added",
        operation_id="op",
        entity_id="entity",
    )

    start = time.perf_counter()
    for _ in range(num_entries):
        _ = metrics_to_proto(metrics)
    metrics_elapsed = time.perf_counter() - start

    start = time.perf_counter()
    for _ in range(num_entries):
        _ = log_entry_to_proto(log_entry)
    logs_elapsed = time.perf_counter() - start

    return [
        BenchmarkResult(
            name="gRPC Metrics → Proto",
            duration_ms=metrics_elapsed * 1000,
            items_processed=num_entries,
            per_item_ms=(metrics_elapsed * 1000) / num_entries,
        ),
        BenchmarkResult(
            name="gRPC Log → Proto",
            duration_ms=logs_elapsed * 1000,
            items_processed=num_entries,
            per_item_ms=(logs_elapsed * 1000) / num_entries,
        ),
    ]


def benchmark_metrics_collection(samples: int = DEFAULT_METRICS_SAMPLES) -> BenchmarkResult:
    """Benchmark MetricsCollector.collect_now overhead."""
    from noteflow.infrastructure.metrics.collector import MetricsCollector

    collector = MetricsCollector(history_size=samples)
    start = time.perf_counter()
    for _ in range(samples):
        collector.collect_now()
    elapsed = time.perf_counter() - start

    return BenchmarkResult(
        name="Metrics Collect",
        duration_ms=elapsed * 1000,
        items_processed=samples,
        per_item_ms=(elapsed * 1000) / samples,
    )


async def benchmark_async_overhead(iterations: int = 1000) -> BenchmarkResult:
    """Benchmark async context manager overhead."""

    @asynccontextmanager
    async def mock_uow() -> AsyncIterator[str]:
        yield "mock_session"

    start = time.perf_counter()
    for _ in range(iterations):
        async with mock_uow():
            pass
    elapsed = time.perf_counter() - start

    return BenchmarkResult(
        name="Async Context",
        duration_ms=elapsed * 1000,
        items_processed=iterations,
        per_item_ms=(elapsed * 1000) / iterations,
    )


async def benchmark_grpc_simulation(num_requests: int = 100) -> BenchmarkResult:
    """Simulate gRPC request/response cycle overhead."""
    from noteflow.grpc.proto import noteflow_pb2

    async def simulate_request() -> noteflow_pb2.Meeting:
        # Simulate request parsing
        request = noteflow_pb2.GetMeetingRequest(meeting_id=str(uuid4()))
        _ = request.SerializeToString()

        # Simulate minimal processing delay
        await asyncio.sleep(0)

        # Simulate response creation
        return noteflow_pb2.Meeting(
            id=request.meeting_id,
            title="Test Meeting",
            state=noteflow_pb2.MEETING_STATE_COMPLETED,
        )

    start = time.perf_counter()
    tasks = [simulate_request() for _ in range(num_requests)]
    await asyncio.gather(*tasks)
    elapsed = time.perf_counter() - start

    return BenchmarkResult(
        name="gRPC Sim",
        duration_ms=elapsed * 1000,
        items_processed=num_requests,
        per_item_ms=(elapsed * 1000) / num_requests,
        extra={"concurrent": num_requests},
    )


async def benchmark_db_roundtrip(
    database_url: str,
    segment_count: int = DEFAULT_DB_SEGMENTS,
) -> list[BenchmarkResult]:
    """Benchmark database insert and retrieval for segments."""
    from noteflow.domain.entities import Meeting, Segment
    from noteflow.infrastructure.persistence.database import create_engine_and_session_factory
    from noteflow.infrastructure.persistence.unit_of_work import SqlAlchemyUnitOfWork

    meeting = Meeting.create(title="Benchmark Meeting")
    segments = [
        Segment(
            segment_id=i,
            text="Benchmark segment text",
            start_time=float(i),
            end_time=float(i + 1),
        )
        for i in range(segment_count)
    ]

    engine, session_factory = create_engine_and_session_factory(database_url, pool_size=5)
    temp_dir = tempfile.TemporaryDirectory()
    meetings_dir = Path(temp_dir.name)

    try:
        async with SqlAlchemyUnitOfWork(session_factory, meetings_dir) as uow:
            start = time.perf_counter()
            await uow.meetings.create(meeting)
            await uow.segments.add_batch(meeting.id, segments)
            await uow.commit()
            insert_elapsed = time.perf_counter() - start

        async with SqlAlchemyUnitOfWork(session_factory, meetings_dir) as uow:
            start = time.perf_counter()
            _ = await uow.meetings.get(meeting.id)
            _ = await uow.segments.get_by_meeting(meeting.id)
            fetch_elapsed = time.perf_counter() - start

        async with SqlAlchemyUnitOfWork(session_factory, meetings_dir) as uow:
            await uow.meetings.delete(meeting.id)
            await uow.commit()
    finally:
        await engine.dispose()
        temp_dir.cleanup()

    return [
        BenchmarkResult(
            name="DB Insert + Batch",
            duration_ms=insert_elapsed * 1000,
            items_processed=segment_count,
            per_item_ms=(insert_elapsed * 1000) / segment_count,
        ),
        BenchmarkResult(
            name="DB Fetch Segments",
            duration_ms=fetch_elapsed * 1000,
            items_processed=segment_count,
            per_item_ms=(fetch_elapsed * 1000) / segment_count,
        ),
    ]


def benchmark_import_times() -> list[BenchmarkResult]:
    """Measure import times for key modules."""
    results: list[BenchmarkResult] = []
    modules = [
        ("noteflow.infrastructure.asr", "ASR Module"),
        ("noteflow.grpc.proto.noteflow_pb2", "Proto Module"),
        ("noteflow.infrastructure.persistence.models", "ORM Models"),
        ("noteflow.domain.entities", "Domain Entities"),
    ]

    for module_path, name in modules:
        # Force reimport by removing from cache
        to_remove = [k for k in sys.modules if k.startswith(module_path.split(".")[0])]
        for k in to_remove:
            sys.modules.pop(k, None)

        start = time.perf_counter()
        __import__(module_path)
        elapsed = time.perf_counter() - start

        results.append(
            BenchmarkResult(
                name=f"Import {name}",
                duration_ms=elapsed * 1000,
                items_processed=1,
                per_item_ms=elapsed * 1000,
            )
        )

    return results


def run_profiled(
    func: object, *args: object, **kwargs: object
) -> tuple[BenchmarkResult, str]:
    """Run a function with cProfile and return result + stats."""
    profiler = cProfile.Profile()
    profiler.enable()
    # func is expected to be a callable returning BenchmarkResult
    callable_func = cast("Callable[..., BenchmarkResult]", func)
    result = callable_func(*args, **kwargs)
    profiler.disable()

    stream = io.StringIO()
    stats = pstats.Stats(profiler, stream=stream)
    stats.strip_dirs()
    stats.sort_stats(pstats.SortKey.CUMULATIVE)
    stats.print_stats(20)

    return result, stream.getvalue()


def run_with_memory_tracking(
    func: object,
    *args: object,
    **kwargs: object,
) -> tuple[BenchmarkResult, MemoryMetrics]:
    """Run a benchmark function with memory tracking.

    Args:
        func: Benchmark function to run.
        *args: Positional arguments for the function.
        **kwargs: Keyword arguments for the function.

    Returns:
        Tuple of (benchmark result, memory metrics).
    """
    gc.collect()  # Clear pending garbage
    snapshot_before = take_memory_snapshot()
    peak_rss = snapshot_before.rss_bytes

    callable_func = cast("Callable[..., BenchmarkResult]", func)
    result = callable_func(*args, **kwargs)

    # Sample peak during execution (rough approximation)
    current_rss = measure_rss_bytes()
    if current_rss > peak_rss:
        peak_rss = current_rss

    gc.collect()
    snapshot_after = take_memory_snapshot()

    metrics = calculate_memory_metrics(snapshot_before, snapshot_after, peak_rss)
    result.memory = metrics

    return result, metrics


async def main(
    enable_profile: bool = False,
    verbose: bool = False,
    enable_memory: bool = False,
    database_url: str | None = None,
    enable_db: bool = False,
    db_segments: int = DEFAULT_DB_SEGMENTS,
) -> None:
    """Run all benchmarks."""
    print("=" * 70)
    print("NoteFlow Comprehensive Performance Profile")
    print("=" * 70)
    print()

    initial_snapshot: MemorySnapshot | None = None
    if enable_memory:
        initial_snapshot = take_memory_snapshot()
        print(f"Initial RSS: {initial_snapshot.rss_mb:.1f}MB")
        print()

    results: list[BenchmarkResult] = []

    # Import times (run first, before other imports pollute cache)
    print("Measuring import times...")
    # Skip import benchmarks as they're destructive to module cache
    # results.extend(benchmark_import_times())

    # Audio pipeline
    print("Benchmarking audio pipeline (60s simulated)...")
    if enable_profile:
        profiled_result, profile_output = run_profiled(benchmark_audio_pipeline, 60)
        results.append(profiled_result)
        if verbose:
            print(profile_output)
    elif enable_memory:
        mem_result, _ = run_with_memory_tracking(benchmark_audio_pipeline, 60)
        results.append(mem_result)
    else:
        results.append(benchmark_audio_pipeline(60))

    # ORM conversions
    print("Benchmarking ORM conversions (500 segments)...")
    if enable_memory:
        mem_result, _ = run_with_memory_tracking(benchmark_orm_conversions, 500)
        results.append(mem_result)
    else:
        results.append(benchmark_orm_conversions(500))

    # Proto operations
    print("Benchmarking proto operations (200 meetings)...")
    if enable_memory:
        mem_result, _ = run_with_memory_tracking(benchmark_proto_operations, 200)
        results.append(mem_result)
    else:
        results.append(benchmark_proto_operations(200))

    # gRPC converters
    print("Benchmarking gRPC converters (segments/logs/metrics)...")
    results.append(benchmark_grpc_segment_converters(DEFAULT_CONVERTER_SEGMENTS))
    results.extend(benchmark_observability_converters(DEFAULT_OBSERVABILITY_SAMPLES))

    # ASR segment build conversion
    print("Benchmarking ASR segment conversion...")
    if enable_memory:
        mem_result, _ = run_with_memory_tracking(benchmark_asr_segment_build, DEFAULT_ASR_SEGMENTS)
        results.append(mem_result)
    else:
        results.append(benchmark_asr_segment_build(DEFAULT_ASR_SEGMENTS))

    # Voice profile operations
    print("Benchmarking voice profile operations...")
    if enable_memory:
        mem_result, _ = run_with_memory_tracking(
            benchmark_voice_profile_similarity, DEFAULT_VOICE_PROFILE_SAMPLES
        )
        results.append(mem_result)
        mem_result, _ = run_with_memory_tracking(
            benchmark_voice_profile_merge, DEFAULT_VOICE_PROFILE_SAMPLES
        )
        results.append(mem_result)
    else:
        results.append(benchmark_voice_profile_similarity(DEFAULT_VOICE_PROFILE_SAMPLES))
        results.append(benchmark_voice_profile_merge(DEFAULT_VOICE_PROFILE_SAMPLES))

    # Metrics collection overhead
    print("Benchmarking metrics collection (60 samples)...")
    results.append(benchmark_metrics_collection(DEFAULT_METRICS_SAMPLES))

    # Async overhead
    print("Benchmarking async context overhead (1000 iterations)...")
    results.append(await benchmark_async_overhead(1000))

    # gRPC simulation
    print("Benchmarking gRPC simulation (100 concurrent)...")
    results.append(await benchmark_grpc_simulation(100))

    # Database round-trip (optional)
    if enable_db:
        resolved_db_url = database_url or os.environ.get("NOTEFLOW_DATABASE_URL", "")
        if resolved_db_url:
            print(f"Benchmarking database round-trip ({db_segments} segments)...")
            results.extend(await benchmark_db_roundtrip(resolved_db_url, db_segments))
        else:
            print("Skipping DB benchmark (no database URL provided).")

    # Summary
    print()
    print("=" * 70)
    print("BENCHMARK RESULTS")
    print("=" * 70)
    for result in results:
        if enable_memory and result.memory:
            print(f"  {result.format_with_memory()}")
        else:
            print(f"  {result}")

    # Performance summary
    print()
    print("=" * 70)
    print("PERFORMANCE SUMMARY")
    print("=" * 70)

    audio_result = next((r for r in results if r.name == "Audio Pipeline"), None)
    if audio_result and audio_result.extra:
        rtf = audio_result.extra.get("realtime_factor", "N/A")
        print(f"  Real-time factor: {rtf} (< 1.0 = faster than real-time)")

    total_overhead = sum(
        r.duration_ms
        for r in results
        if r.name in ("ORM → Domain", "Proto Ops", "Async Context")
    )
    print(f"  Data layer overhead (500 segs + 200 mtgs + 1k ctx): {total_overhead:.2f}ms")

    # Memory summary
    if enable_memory and initial_snapshot is not None:
        print()
        print("=" * 70)
        print("MEMORY SUMMARY")
        print("=" * 70)
        final_snapshot = take_memory_snapshot()
        print(f"  Final RSS: {final_snapshot.rss_mb:.1f}MB")
        total_delta = final_snapshot.rss_bytes - initial_snapshot.rss_bytes
        print(f"  Total RSS change: {total_delta / BYTES_PER_MB:+.1f}MB")
        total_gc = (
            final_snapshot.gc_gen0 - initial_snapshot.gc_gen0,
            final_snapshot.gc_gen1 - initial_snapshot.gc_gen1,
            final_snapshot.gc_gen2 - initial_snapshot.gc_gen2,
        )
        print(f"  Total GC collections: gen0={total_gc[0]}, gen1={total_gc[1]}, gen2={total_gc[0 + 1 + 1]}")

    print()
    print("All benchmarks completed.")


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="NoteFlow performance profiler")
    parser.add_argument(
        "--profile", action="store_true", help="Enable cProfile for detailed analysis"
    )
    parser.add_argument(
        "--verbose", action="store_true", help="Show extended profile output"
    )
    parser.add_argument(
        "--memory", action="store_true", help="Enable RSS and GC memory profiling"
    )
    parser.add_argument(
        "--db",
        action="store_true",
        help="Enable database round-trip benchmarking (requires NOTEFLOW_DATABASE_URL)",
    )
    parser.add_argument(
        "--db-url",
        default=os.environ.get("NOTEFLOW_DATABASE_URL", ""),
        help="Database URL for benchmarking (defaults to NOTEFLOW_DATABASE_URL).",
    )
    parser.add_argument(
        "--db-segments",
        type=int,
        default=DEFAULT_DB_SEGMENTS,
        help="Number of segments for DB benchmark.",
    )
    args = parser.parse_args()

    asyncio.run(main(
        enable_profile=args.profile,
        verbose=args.verbose,
        enable_memory=args.memory,
        database_url=args.db_url,
        enable_db=args.db,
        db_segments=args.db_segments,
    ))