Files
noteflow/spikes/spike_02_audio_capture/capture_impl.py
Travis Vasceannie b333ea5b23 Add initial Docker and development environment setup
- Created .dockerignore to exclude unnecessary files from Docker builds.
- Added .repomixignore for managing ignored patterns in Repomix.
- Introduced Dockerfile.dev for development environment setup with Python 3.12.
- Configured docker-compose.yaml to define services, including a PostgreSQL database.
- Established a devcontainer.json for Visual Studio Code integration.
- Implemented postCreate.sh for automatic dependency installation in the dev container.
- Added constants.py to centralize configuration constants for the project.
- Updated pyproject.toml to include new development dependencies.
- Created initial documentation files for project overview and style conventions.
- Added tests for new functionalities to ensure reliability and correctness.
2025-12-19 05:02:16 +00:00

186 lines
5.8 KiB
Python

"""Audio capture implementation using sounddevice.
Provides cross-platform audio input capture with device handling.
"""
from __future__ import annotations
import logging
import time
from typing import TYPE_CHECKING
import numpy as np
import sounddevice as sd
from .protocols import AudioDeviceInfo, AudioFrameCallback
if TYPE_CHECKING:
from numpy.typing import NDArray
logger = logging.getLogger(__name__)
class SoundDeviceCapture:
"""sounddevice-based implementation of AudioCapture.
Handles device enumeration, stream management, and device change detection.
Uses PortAudio under the hood for cross-platform audio capture.
"""
def __init__(self) -> None:
"""Initialize the capture instance."""
self._stream: sd.InputStream | None = None
self._callback: AudioFrameCallback | None = None
self._device_id: int | None = None
self._sample_rate: int = 16000
self._channels: int = 1
def list_devices(self) -> list[AudioDeviceInfo]:
"""List available audio input devices.
Returns:
List of AudioDeviceInfo for all available input devices.
"""
devices: list[AudioDeviceInfo] = []
device_list = sd.query_devices()
# Get default input device index
try:
default_input = sd.default.device[0] # Input device index
except (TypeError, IndexError):
default_input = -1
devices.extend(
AudioDeviceInfo(
device_id=idx,
name=dev["name"],
channels=int(dev["max_input_channels"]),
sample_rate=int(dev["default_samplerate"]),
is_default=(idx == default_input),
)
for idx, dev in enumerate(device_list)
if int(dev.get("max_input_channels", 0)) > 0
)
return devices
def get_default_device(self) -> AudioDeviceInfo | None:
"""Get the default input device.
Returns:
Default input device info, or None if no input devices available.
"""
devices = self.list_devices()
for dev in devices:
if dev.is_default:
return dev
return devices[0] if devices else None
def start(
self,
device_id: int | None,
on_frames: AudioFrameCallback,
sample_rate: int = 16000,
channels: int = 1,
chunk_duration_ms: int = 100,
) -> None:
"""Start capturing audio from the specified device.
Args:
device_id: Device ID to capture from, or None for default device.
on_frames: Callback receiving (frames, timestamp) for each chunk.
sample_rate: Sample rate in Hz (default 16kHz for ASR).
channels: Number of channels (default 1 for mono).
chunk_duration_ms: Duration of each audio chunk in milliseconds.
Raises:
RuntimeError: If already capturing.
ValueError: If device_id is invalid.
"""
if self._stream is not None:
raise RuntimeError("Already capturing audio")
self._callback = on_frames
self._device_id = device_id
self._sample_rate = sample_rate
self._channels = channels
# Calculate block size from chunk duration
blocksize = int(sample_rate * chunk_duration_ms / 1000)
def _stream_callback(
indata: NDArray[np.float32],
frames: int,
time_info: object, # cffi CData from sounddevice, unused
status: sd.CallbackFlags,
) -> None:
"""Internal sounddevice callback."""
if status:
logger.warning("Audio stream status: %s", status)
if self._callback is not None:
# Copy the data and flatten to 1D array
audio_data = indata.copy().flatten().astype(np.float32)
timestamp = time.monotonic()
self._callback(audio_data, timestamp)
try:
self._stream = sd.InputStream(
device=device_id,
channels=channels,
samplerate=sample_rate,
blocksize=blocksize,
dtype=np.float32,
callback=_stream_callback,
)
self._stream.start()
logger.info(
"Started audio capture: device=%s, rate=%d, channels=%d, blocksize=%d",
device_id,
sample_rate,
channels,
blocksize,
)
except sd.PortAudioError as e:
self._stream = None
self._callback = None
raise RuntimeError(f"Failed to start audio capture: {e}") from e
def stop(self) -> None:
"""Stop audio capture.
Safe to call even if not capturing.
"""
if self._stream is not None:
try:
self._stream.stop()
self._stream.close()
except sd.PortAudioError as e:
logger.warning("Error stopping audio stream: %s", e)
finally:
self._stream = None
self._callback = None
logger.info("Stopped audio capture")
def is_capturing(self) -> bool:
"""Check if currently capturing audio.
Returns:
True if capture is active.
"""
return self._stream is not None and self._stream.active
@property
def current_device_id(self) -> int | None:
"""Get the current device ID being used for capture."""
return self._device_id
@property
def sample_rate(self) -> int:
"""Get the current sample rate."""
return self._sample_rate
@property
def channels(self) -> int:
"""Get the current number of channels."""
return self._channels