Add speaker diarization support and related configurations

- Introduced speaker diarization functionality using pyannote.audio and diart for both streaming and offline processing. - Updated `pyproject.toml` to include new dependencies for diarization. - Enhanced `TranscriptComponent` to handle speaker identification and color coding for speakers. - Added new settings in `Settings` class for enabling and configuring diarization features. - Updated `Segment` and gRPC models to include speaker ID and confidence metrics. - Implemented a `DiarizationEngine` for managing diarization processes and speaker assignments. - Created necessary database migrations to accommodate new speaker fields in segments. - Added tests for audio reading and diarization functionalities to ensure reliability and correctness. - Updated documentation to reflect new features and usage instructions for diarization.
2025-12-18 18:51:09 +00:00
parent d2f4180716
commit b9297d9160
25 changed files with 2888 additions and 169 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -52,6 +52,11 @@ summarization = [
    "openai>=2.13.0",
    "anthropic>=0.75.0",
 ]
+diarization = [
+    "pyannote.audio>=3.3",
+    "diart>=0.9.2",
+    "torch>=2.0",
+]

 [build-system]
 requires = ["hatchling"]
@@ -112,6 +117,9 @@ markers = [
    "slow: marks tests as slow (model loading)",
    "integration: marks tests requiring external services",
 ]
+filterwarnings = [
+    "ignore:The @wait_container_is_ready decorator is deprecated.*:DeprecationWarning:testcontainers.core.waiting_utils",
+]

 [dependency-groups]
 dev = [