Add speaker diarization support and related configurations

- Introduced speaker diarization functionality using pyannote.audio and diart for both streaming and offline processing.
- Updated `pyproject.toml` to include new dependencies for diarization.
- Enhanced `TranscriptComponent` to handle speaker identification and color coding for speakers.
- Added new settings in `Settings` class for enabling and configuring diarization features.
- Updated `Segment` and gRPC models to include speaker ID and confidence metrics.
- Implemented a `DiarizationEngine` for managing diarization processes and speaker assignments.
- Created necessary database migrations to accommodate new speaker fields in segments.
- Added tests for audio reading and diarization functionalities to ensure reliability and correctness.
- Updated documentation to reflect new features and usage instructions for diarization.
This commit is contained in:
2025-12-18 18:51:09 +00:00
parent d2f4180716
commit b9297d9160
25 changed files with 2888 additions and 169 deletions

View File

@@ -52,6 +52,11 @@ summarization = [
"openai>=2.13.0",
"anthropic>=0.75.0",
]
diarization = [
"pyannote.audio>=3.3",
"diart>=0.9.2",
"torch>=2.0",
]
[build-system]
requires = ["hatchling"]
@@ -112,6 +117,9 @@ markers = [
"slow: marks tests as slow (model loading)",
"integration: marks tests requiring external services",
]
filterwarnings = [
"ignore:The @wait_container_is_ready decorator is deprecated.*:DeprecationWarning:testcontainers.core.waiting_utils",
]
[dependency-groups]
dev = [