Files
noteflow/spikes/spike_02_audio_capture/protocols.py
Travis Vasceannie af1285b181 Add initial project structure and files
- Introduced .python-version for Python version management.
- Added AGENTS.md for documentation on agent usage and best practices.
- Created alembic.ini for database migration configurations.
- Implemented main.py as the entry point for the application.
- Established pyproject.toml for project dependencies and configurations.
- Initialized README.md for project overview.
- Generated uv.lock for dependency locking.
- Documented milestones and specifications in docs/milestones.md and docs/spec.md.
- Created logs/status_line.json for logging status information.
- Added initial spike implementations for UI tray hotkeys, audio capture, ASR latency, and encryption validation.
- Set up NoteFlow core structure in src/noteflow with necessary modules and services.
- Developed test suite in tests directory for application, domain, infrastructure, and integration testing.
- Included initial migration scripts in infrastructure/persistence/migrations for database setup.
- Established security protocols in infrastructure/security for key management and encryption.
- Implemented audio infrastructure for capturing and processing audio data.
- Created converters for ASR and ORM in infrastructure/converters.
- Added export functionality for different formats in infrastructure/export.
- Ensured all new files are included in the repository for future development.
2025-12-17 18:28:59 +00:00

169 lines
4.3 KiB
Python

"""Audio capture protocols and data types for Spike 2.
These protocols define the contracts for audio capture components that will be
promoted to src/noteflow/audio/ after validation.
"""
from __future__ import annotations
from collections.abc import Callable
from dataclasses import dataclass
from typing import Protocol
import numpy as np
from numpy.typing import NDArray
@dataclass(frozen=True)
class AudioDeviceInfo:
"""Information about an audio input device."""
device_id: int
name: str
channels: int
sample_rate: int
is_default: bool
@dataclass
class TimestampedAudio:
"""Audio frames with capture timestamp."""
frames: NDArray[np.float32]
timestamp: float # Monotonic time when captured
duration: float # Duration in seconds
def __post_init__(self) -> None:
"""Validate audio data."""
if self.duration < 0:
raise ValueError("Duration must be non-negative")
if self.timestamp < 0:
raise ValueError("Timestamp must be non-negative")
# Type alias for audio frame callback
AudioFrameCallback = Callable[[NDArray[np.float32], float], None]
class AudioCapture(Protocol):
"""Protocol for audio input capture.
Implementations should handle device enumeration, stream management,
and device change detection.
"""
def list_devices(self) -> list[AudioDeviceInfo]:
"""List available audio input devices.
Returns:
List of AudioDeviceInfo for all available input devices.
"""
...
def start(
self,
device_id: int | None,
on_frames: AudioFrameCallback,
sample_rate: int = 16000,
channels: int = 1,
chunk_duration_ms: int = 100,
) -> None:
"""Start capturing audio from the specified device.
Args:
device_id: Device ID to capture from, or None for default device.
on_frames: Callback receiving (frames, timestamp) for each chunk.
sample_rate: Sample rate in Hz (default 16kHz for ASR).
channels: Number of channels (default 1 for mono).
chunk_duration_ms: Duration of each audio chunk in milliseconds.
Raises:
RuntimeError: If already capturing.
ValueError: If device_id is invalid.
"""
...
def stop(self) -> None:
"""Stop audio capture.
Safe to call even if not capturing.
"""
...
def is_capturing(self) -> bool:
"""Check if currently capturing audio.
Returns:
True if capture is active.
"""
...
class AudioLevelProvider(Protocol):
"""Protocol for computing audio levels (VU meter data)."""
def get_rms(self, frames: NDArray[np.float32]) -> float:
"""Calculate RMS level from audio frames.
Args:
frames: Audio samples as float32 array (normalized -1.0 to 1.0).
Returns:
RMS level normalized to 0.0-1.0 range.
"""
...
def get_db(self, frames: NDArray[np.float32]) -> float:
"""Calculate dB level from audio frames.
Args:
frames: Audio samples as float32 array (normalized -1.0 to 1.0).
Returns:
Level in dB (typically -60 to 0 range).
"""
...
class RingBuffer(Protocol):
"""Protocol for timestamped audio ring buffer.
Ring buffers store recent audio with timestamps for ASR processing
and playback sync.
"""
def push(self, audio: TimestampedAudio) -> None:
"""Add audio to the buffer.
Old audio is discarded if buffer exceeds max_duration.
Args:
audio: Timestamped audio chunk to add.
"""
...
def get_window(self, duration_seconds: float) -> list[TimestampedAudio]:
"""Get the last N seconds of audio.
Args:
duration_seconds: How many seconds of audio to retrieve.
Returns:
List of TimestampedAudio chunks, ordered oldest to newest.
"""
...
def clear(self) -> None:
"""Clear all audio from the buffer."""
...
@property
def duration(self) -> float:
"""Total duration of buffered audio in seconds."""
...
@property
def max_duration(self) -> float:
"""Maximum buffer duration in seconds."""
...