Files
noteflow/spikes/spike_02_audio_capture/capture_impl.py
Travis Vasceannie af1285b181 Add initial project structure and files
- Introduced .python-version for Python version management.
- Added AGENTS.md for documentation on agent usage and best practices.
- Created alembic.ini for database migration configurations.
- Implemented main.py as the entry point for the application.
- Established pyproject.toml for project dependencies and configurations.
- Initialized README.md for project overview.
- Generated uv.lock for dependency locking.
- Documented milestones and specifications in docs/milestones.md and docs/spec.md.
- Created logs/status_line.json for logging status information.
- Added initial spike implementations for UI tray hotkeys, audio capture, ASR latency, and encryption validation.
- Set up NoteFlow core structure in src/noteflow with necessary modules and services.
- Developed test suite in tests directory for application, domain, infrastructure, and integration testing.
- Included initial migration scripts in infrastructure/persistence/migrations for database setup.
- Established security protocols in infrastructure/security for key management and encryption.
- Implemented audio infrastructure for capturing and processing audio data.
- Created converters for ASR and ORM in infrastructure/converters.
- Added export functionality for different formats in infrastructure/export.
- Ensured all new files are included in the repository for future development.
2025-12-17 18:28:59 +00:00

186 lines
5.8 KiB
Python

"""Audio capture implementation using sounddevice.
Provides cross-platform audio input capture with device handling.
"""
from __future__ import annotations
import logging
import time
from typing import TYPE_CHECKING
import numpy as np
import sounddevice as sd
from .protocols import AudioDeviceInfo, AudioFrameCallback
if TYPE_CHECKING:
from numpy.typing import NDArray
logger = logging.getLogger(__name__)
class SoundDeviceCapture:
"""sounddevice-based implementation of AudioCapture.
Handles device enumeration, stream management, and device change detection.
Uses PortAudio under the hood for cross-platform audio capture.
"""
def __init__(self) -> None:
"""Initialize the capture instance."""
self._stream: sd.InputStream | None = None
self._callback: AudioFrameCallback | None = None
self._device_id: int | None = None
self._sample_rate: int = 16000
self._channels: int = 1
def list_devices(self) -> list[AudioDeviceInfo]:
"""List available audio input devices.
Returns:
List of AudioDeviceInfo for all available input devices.
"""
devices: list[AudioDeviceInfo] = []
device_list = sd.query_devices()
# Get default input device index
try:
default_input = sd.default.device[0] # Input device index
except (TypeError, IndexError):
default_input = -1
devices.extend(
AudioDeviceInfo(
device_id=idx,
name=dev["name"],
channels=dev["max_input_channels"],
sample_rate=int(dev["default_samplerate"]),
is_default=(idx == default_input),
)
for idx, dev in enumerate(device_list)
if dev["max_input_channels"] > 0
)
return devices
def get_default_device(self) -> AudioDeviceInfo | None:
"""Get the default input device.
Returns:
Default input device info, or None if no input devices available.
"""
devices = self.list_devices()
for dev in devices:
if dev.is_default:
return dev
return devices[0] if devices else None
def start(
self,
device_id: int | None,
on_frames: AudioFrameCallback,
sample_rate: int = 16000,
channels: int = 1,
chunk_duration_ms: int = 100,
) -> None:
"""Start capturing audio from the specified device.
Args:
device_id: Device ID to capture from, or None for default device.
on_frames: Callback receiving (frames, timestamp) for each chunk.
sample_rate: Sample rate in Hz (default 16kHz for ASR).
channels: Number of channels (default 1 for mono).
chunk_duration_ms: Duration of each audio chunk in milliseconds.
Raises:
RuntimeError: If already capturing.
ValueError: If device_id is invalid.
"""
if self._stream is not None:
raise RuntimeError("Already capturing audio")
self._callback = on_frames
self._device_id = device_id
self._sample_rate = sample_rate
self._channels = channels
# Calculate block size from chunk duration
blocksize = int(sample_rate * chunk_duration_ms / 1000)
def _stream_callback(
indata: NDArray[np.float32],
frames: int,
time_info: object, # cffi CData from sounddevice, unused
status: sd.CallbackFlags,
) -> None:
"""Internal sounddevice callback."""
if status:
logger.warning("Audio stream status: %s", status)
if self._callback is not None:
# Copy the data and flatten to 1D array
audio_data = indata.copy().flatten().astype(np.float32)
timestamp = time.monotonic()
self._callback(audio_data, timestamp)
try:
self._stream = sd.InputStream(
device=device_id,
channels=channels,
samplerate=sample_rate,
blocksize=blocksize,
dtype=np.float32,
callback=_stream_callback,
)
self._stream.start()
logger.info(
"Started audio capture: device=%s, rate=%d, channels=%d, blocksize=%d",
device_id,
sample_rate,
channels,
blocksize,
)
except sd.PortAudioError as e:
self._stream = None
self._callback = None
raise RuntimeError(f"Failed to start audio capture: {e}") from e
def stop(self) -> None:
"""Stop audio capture.
Safe to call even if not capturing.
"""
if self._stream is not None:
try:
self._stream.stop()
self._stream.close()
except sd.PortAudioError as e:
logger.warning("Error stopping audio stream: %s", e)
finally:
self._stream = None
self._callback = None
logger.info("Stopped audio capture")
def is_capturing(self) -> bool:
"""Check if currently capturing audio.
Returns:
True if capture is active.
"""
return self._stream is not None and self._stream.active
@property
def current_device_id(self) -> int | None:
"""Get the current device ID being used for capture."""
return self._device_id
@property
def sample_rate(self) -> int:
"""Get the current sample rate."""
return self._sample_rate
@property
def channels(self) -> int:
"""Get the current number of channels."""
return self._channels