- Introduced .python-version for Python version management. - Added AGENTS.md for documentation on agent usage and best practices. - Created alembic.ini for database migration configurations. - Implemented main.py as the entry point for the application. - Established pyproject.toml for project dependencies and configurations. - Initialized README.md for project overview. - Generated uv.lock for dependency locking. - Documented milestones and specifications in docs/milestones.md and docs/spec.md. - Created logs/status_line.json for logging status information. - Added initial spike implementations for UI tray hotkeys, audio capture, ASR latency, and encryption validation. - Set up NoteFlow core structure in src/noteflow with necessary modules and services. - Developed test suite in tests directory for application, domain, infrastructure, and integration testing. - Included initial migration scripts in infrastructure/persistence/migrations for database setup. - Established security protocols in infrastructure/security for key management and encryption. - Implemented audio infrastructure for capturing and processing audio data. - Created converters for ASR and ORM in infrastructure/converters. - Added export functionality for different formats in infrastructure/export. - Ensured all new files are included in the repository for future development.
71 lines
1.7 KiB
Python
71 lines
1.7 KiB
Python
"""ASR protocols for Spike 3.
|
|
|
|
These protocols define the contracts for ASR components that will be
|
|
promoted to src/noteflow/asr/ after validation.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from collections.abc import Iterator
|
|
from typing import TYPE_CHECKING, Protocol
|
|
|
|
if TYPE_CHECKING:
|
|
import numpy as np
|
|
from numpy.typing import NDArray
|
|
|
|
from .dto import AsrResult
|
|
|
|
|
|
class AsrEngine(Protocol):
|
|
"""Protocol for ASR transcription engine.
|
|
|
|
Implementations should handle model loading, caching, and inference.
|
|
"""
|
|
|
|
def load_model(self, model_size: str = "base") -> None:
|
|
"""Load the ASR model.
|
|
|
|
Downloads the model if not cached.
|
|
|
|
Args:
|
|
model_size: Model size ("tiny", "base", "small", "medium", "large").
|
|
|
|
Raises:
|
|
ValueError: If model_size is invalid.
|
|
RuntimeError: If model loading fails.
|
|
"""
|
|
...
|
|
|
|
def transcribe(
|
|
self,
|
|
audio: "NDArray[np.float32]",
|
|
language: str | None = None,
|
|
) -> Iterator[AsrResult]:
|
|
"""Transcribe audio and yield results.
|
|
|
|
Args:
|
|
audio: Audio samples as float32 array (16kHz mono, normalized).
|
|
language: Optional language code (e.g., "en"). Auto-detected if None.
|
|
|
|
Yields:
|
|
AsrResult segments.
|
|
|
|
Raises:
|
|
RuntimeError: If model not loaded.
|
|
"""
|
|
...
|
|
|
|
@property
|
|
def is_loaded(self) -> bool:
|
|
"""Return True if model is loaded."""
|
|
...
|
|
|
|
@property
|
|
def model_size(self) -> str | None:
|
|
"""Return the loaded model size, or None if not loaded."""
|
|
...
|
|
|
|
def unload(self) -> None:
|
|
"""Unload the model to free memory."""
|
|
...
|