- Introduced .python-version for Python version management. - Added AGENTS.md for documentation on agent usage and best practices. - Created alembic.ini for database migration configurations. - Implemented main.py as the entry point for the application. - Established pyproject.toml for project dependencies and configurations. - Initialized README.md for project overview. - Generated uv.lock for dependency locking. - Documented milestones and specifications in docs/milestones.md and docs/spec.md. - Created logs/status_line.json for logging status information. - Added initial spike implementations for UI tray hotkeys, audio capture, ASR latency, and encryption validation. - Set up NoteFlow core structure in src/noteflow with necessary modules and services. - Developed test suite in tests directory for application, domain, infrastructure, and integration testing. - Included initial migration scripts in infrastructure/persistence/migrations for database setup. - Established security protocols in infrastructure/security for key management and encryption. - Implemented audio infrastructure for capturing and processing audio data. - Created converters for ASR and ORM in infrastructure/converters. - Added export functionality for different formats in infrastructure/export. - Ensured all new files are included in the repository for future development.
186 lines
5.8 KiB
Python
186 lines
5.8 KiB
Python
"""Audio capture implementation using sounddevice.
|
|
|
|
Provides cross-platform audio input capture with device handling.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import time
|
|
from typing import TYPE_CHECKING
|
|
|
|
import numpy as np
|
|
import sounddevice as sd
|
|
|
|
from .protocols import AudioDeviceInfo, AudioFrameCallback
|
|
|
|
if TYPE_CHECKING:
|
|
from numpy.typing import NDArray
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class SoundDeviceCapture:
|
|
"""sounddevice-based implementation of AudioCapture.
|
|
|
|
Handles device enumeration, stream management, and device change detection.
|
|
Uses PortAudio under the hood for cross-platform audio capture.
|
|
"""
|
|
|
|
def __init__(self) -> None:
|
|
"""Initialize the capture instance."""
|
|
self._stream: sd.InputStream | None = None
|
|
self._callback: AudioFrameCallback | None = None
|
|
self._device_id: int | None = None
|
|
self._sample_rate: int = 16000
|
|
self._channels: int = 1
|
|
|
|
def list_devices(self) -> list[AudioDeviceInfo]:
|
|
"""List available audio input devices.
|
|
|
|
Returns:
|
|
List of AudioDeviceInfo for all available input devices.
|
|
"""
|
|
devices: list[AudioDeviceInfo] = []
|
|
device_list = sd.query_devices()
|
|
|
|
# Get default input device index
|
|
try:
|
|
default_input = sd.default.device[0] # Input device index
|
|
except (TypeError, IndexError):
|
|
default_input = -1
|
|
|
|
devices.extend(
|
|
AudioDeviceInfo(
|
|
device_id=idx,
|
|
name=dev["name"],
|
|
channels=dev["max_input_channels"],
|
|
sample_rate=int(dev["default_samplerate"]),
|
|
is_default=(idx == default_input),
|
|
)
|
|
for idx, dev in enumerate(device_list)
|
|
if dev["max_input_channels"] > 0
|
|
)
|
|
return devices
|
|
|
|
def get_default_device(self) -> AudioDeviceInfo | None:
|
|
"""Get the default input device.
|
|
|
|
Returns:
|
|
Default input device info, or None if no input devices available.
|
|
"""
|
|
devices = self.list_devices()
|
|
for dev in devices:
|
|
if dev.is_default:
|
|
return dev
|
|
return devices[0] if devices else None
|
|
|
|
def start(
|
|
self,
|
|
device_id: int | None,
|
|
on_frames: AudioFrameCallback,
|
|
sample_rate: int = 16000,
|
|
channels: int = 1,
|
|
chunk_duration_ms: int = 100,
|
|
) -> None:
|
|
"""Start capturing audio from the specified device.
|
|
|
|
Args:
|
|
device_id: Device ID to capture from, or None for default device.
|
|
on_frames: Callback receiving (frames, timestamp) for each chunk.
|
|
sample_rate: Sample rate in Hz (default 16kHz for ASR).
|
|
channels: Number of channels (default 1 for mono).
|
|
chunk_duration_ms: Duration of each audio chunk in milliseconds.
|
|
|
|
Raises:
|
|
RuntimeError: If already capturing.
|
|
ValueError: If device_id is invalid.
|
|
"""
|
|
if self._stream is not None:
|
|
raise RuntimeError("Already capturing audio")
|
|
|
|
self._callback = on_frames
|
|
self._device_id = device_id
|
|
self._sample_rate = sample_rate
|
|
self._channels = channels
|
|
|
|
# Calculate block size from chunk duration
|
|
blocksize = int(sample_rate * chunk_duration_ms / 1000)
|
|
|
|
def _stream_callback(
|
|
indata: NDArray[np.float32],
|
|
frames: int,
|
|
time_info: object, # cffi CData from sounddevice, unused
|
|
status: sd.CallbackFlags,
|
|
) -> None:
|
|
"""Internal sounddevice callback."""
|
|
if status:
|
|
logger.warning("Audio stream status: %s", status)
|
|
|
|
if self._callback is not None:
|
|
# Copy the data and flatten to 1D array
|
|
audio_data = indata.copy().flatten().astype(np.float32)
|
|
timestamp = time.monotonic()
|
|
self._callback(audio_data, timestamp)
|
|
|
|
try:
|
|
self._stream = sd.InputStream(
|
|
device=device_id,
|
|
channels=channels,
|
|
samplerate=sample_rate,
|
|
blocksize=blocksize,
|
|
dtype=np.float32,
|
|
callback=_stream_callback,
|
|
)
|
|
self._stream.start()
|
|
logger.info(
|
|
"Started audio capture: device=%s, rate=%d, channels=%d, blocksize=%d",
|
|
device_id,
|
|
sample_rate,
|
|
channels,
|
|
blocksize,
|
|
)
|
|
except sd.PortAudioError as e:
|
|
self._stream = None
|
|
self._callback = None
|
|
raise RuntimeError(f"Failed to start audio capture: {e}") from e
|
|
|
|
def stop(self) -> None:
|
|
"""Stop audio capture.
|
|
|
|
Safe to call even if not capturing.
|
|
"""
|
|
if self._stream is not None:
|
|
try:
|
|
self._stream.stop()
|
|
self._stream.close()
|
|
except sd.PortAudioError as e:
|
|
logger.warning("Error stopping audio stream: %s", e)
|
|
finally:
|
|
self._stream = None
|
|
self._callback = None
|
|
logger.info("Stopped audio capture")
|
|
|
|
def is_capturing(self) -> bool:
|
|
"""Check if currently capturing audio.
|
|
|
|
Returns:
|
|
True if capture is active.
|
|
"""
|
|
return self._stream is not None and self._stream.active
|
|
|
|
@property
|
|
def current_device_id(self) -> int | None:
|
|
"""Get the current device ID being used for capture."""
|
|
return self._device_id
|
|
|
|
@property
|
|
def sample_rate(self) -> int:
|
|
"""Get the current sample rate."""
|
|
return self._sample_rate
|
|
|
|
@property
|
|
def channels(self) -> int:
|
|
"""Get the current number of channels."""
|
|
return self._channels
|