- Created .dockerignore to exclude unnecessary files from Docker builds. - Added .repomixignore for managing ignored patterns in Repomix. - Introduced Dockerfile.dev for development environment setup with Python 3.12. - Configured docker-compose.yaml to define services, including a PostgreSQL database. - Established a devcontainer.json for Visual Studio Code integration. - Implemented postCreate.sh for automatic dependency installation in the dev container. - Added constants.py to centralize configuration constants for the project. - Updated pyproject.toml to include new development dependencies. - Created initial documentation files for project overview and style conventions. - Added tests for new functionalities to ensure reliability and correctness.
334 lines
13 KiB
Python
334 lines
13 KiB
Python
"""Stress tests for NoteFlowServicer concurrent stream handling.
|
|
|
|
Detects race conditions when multiple clients stream simultaneously.
|
|
Verifies _cleanup_streaming_state prevents memory leaks.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
from uuid import uuid4
|
|
|
|
import pytest
|
|
|
|
from noteflow.grpc.service import NoteFlowServicer
|
|
|
|
|
|
class TestStreamingStateInitialization:
|
|
"""Test streaming state initialization correctness."""
|
|
|
|
@pytest.mark.stress
|
|
def test_init_streaming_state_creates_all_state(
|
|
self, memory_servicer: NoteFlowServicer
|
|
) -> None:
|
|
"""Initialize streaming state creates entries in all state dictionaries."""
|
|
meeting_id = str(uuid4())
|
|
|
|
memory_servicer._init_streaming_state(meeting_id, next_segment_id=0)
|
|
|
|
assert meeting_id in memory_servicer._partial_buffers
|
|
assert meeting_id in memory_servicer._vad_instances
|
|
assert meeting_id in memory_servicer._segmenters
|
|
assert meeting_id in memory_servicer._was_speaking
|
|
assert meeting_id in memory_servicer._segment_counters
|
|
assert meeting_id in memory_servicer._last_partial_time
|
|
assert meeting_id in memory_servicer._last_partial_text
|
|
assert meeting_id in memory_servicer._diarization_turns
|
|
assert meeting_id in memory_servicer._diarization_stream_time
|
|
|
|
memory_servicer._cleanup_streaming_state(meeting_id)
|
|
|
|
@pytest.mark.stress
|
|
def test_init_with_different_segment_ids(self, memory_servicer: NoteFlowServicer) -> None:
|
|
"""Initialize with different segment IDs sets counter correctly."""
|
|
meeting_id1 = str(uuid4())
|
|
meeting_id2 = str(uuid4())
|
|
|
|
memory_servicer._init_streaming_state(meeting_id1, next_segment_id=0)
|
|
memory_servicer._init_streaming_state(meeting_id2, next_segment_id=42)
|
|
|
|
assert memory_servicer._segment_counters[meeting_id1] == 0
|
|
assert memory_servicer._segment_counters[meeting_id2] == 42
|
|
|
|
memory_servicer._cleanup_streaming_state(meeting_id1)
|
|
memory_servicer._cleanup_streaming_state(meeting_id2)
|
|
|
|
|
|
class TestCleanupStreamingState:
|
|
"""Test _cleanup_streaming_state removes all per-meeting resources."""
|
|
|
|
@pytest.mark.stress
|
|
def test_cleanup_removes_all_state_dictionaries(
|
|
self, memory_servicer: NoteFlowServicer
|
|
) -> None:
|
|
"""Verify cleanup removes entries from all state dictionaries."""
|
|
meeting_id = str(uuid4())
|
|
|
|
memory_servicer._init_streaming_state(meeting_id, next_segment_id=0)
|
|
memory_servicer._active_streams.add(meeting_id)
|
|
|
|
assert meeting_id in memory_servicer._partial_buffers
|
|
assert meeting_id in memory_servicer._vad_instances
|
|
assert meeting_id in memory_servicer._segmenters
|
|
|
|
memory_servicer._cleanup_streaming_state(meeting_id)
|
|
memory_servicer._active_streams.discard(meeting_id)
|
|
|
|
assert meeting_id not in memory_servicer._partial_buffers
|
|
assert meeting_id not in memory_servicer._vad_instances
|
|
assert meeting_id not in memory_servicer._segmenters
|
|
assert meeting_id not in memory_servicer._was_speaking
|
|
assert meeting_id not in memory_servicer._segment_counters
|
|
assert meeting_id not in memory_servicer._stream_formats
|
|
assert meeting_id not in memory_servicer._last_partial_time
|
|
assert meeting_id not in memory_servicer._last_partial_text
|
|
assert meeting_id not in memory_servicer._diarization_turns
|
|
assert meeting_id not in memory_servicer._diarization_stream_time
|
|
assert meeting_id not in memory_servicer._diarization_streaming_failed
|
|
assert meeting_id not in memory_servicer._active_streams
|
|
|
|
@pytest.mark.stress
|
|
def test_cleanup_idempotent(self, memory_servicer: NoteFlowServicer) -> None:
|
|
"""Cleanup is idempotent - multiple calls don't raise."""
|
|
meeting_id = str(uuid4())
|
|
|
|
memory_servicer._init_streaming_state(meeting_id, next_segment_id=0)
|
|
|
|
memory_servicer._cleanup_streaming_state(meeting_id)
|
|
memory_servicer._cleanup_streaming_state(meeting_id)
|
|
memory_servicer._cleanup_streaming_state(meeting_id)
|
|
|
|
@pytest.mark.stress
|
|
def test_cleanup_nonexistent_meeting_no_error(self, memory_servicer: NoteFlowServicer) -> None:
|
|
"""Cleanup of non-existent meeting ID doesn't raise."""
|
|
nonexistent_id = str(uuid4())
|
|
|
|
memory_servicer._cleanup_streaming_state(nonexistent_id)
|
|
|
|
|
|
class TestConcurrentStreamInitialization:
|
|
"""Test concurrent stream initialization."""
|
|
|
|
@pytest.mark.stress
|
|
@pytest.mark.asyncio
|
|
async def test_concurrent_init_different_meetings(
|
|
self, memory_servicer: NoteFlowServicer
|
|
) -> None:
|
|
"""Multiple concurrent init calls for different meetings succeed."""
|
|
meeting_ids = [str(uuid4()) for _ in range(20)]
|
|
|
|
async def init_meeting(meeting_id: str, segment_id: int) -> None:
|
|
await asyncio.sleep(0.001)
|
|
memory_servicer._init_streaming_state(meeting_id, segment_id)
|
|
|
|
tasks = [asyncio.create_task(init_meeting(mid, idx)) for idx, mid in enumerate(meeting_ids)]
|
|
await asyncio.gather(*tasks)
|
|
|
|
assert len(memory_servicer._vad_instances) == len(meeting_ids)
|
|
assert len(memory_servicer._segmenters) == len(meeting_ids)
|
|
assert len(memory_servicer._partial_buffers) == len(meeting_ids)
|
|
|
|
for mid in meeting_ids:
|
|
memory_servicer._cleanup_streaming_state(mid)
|
|
|
|
@pytest.mark.stress
|
|
@pytest.mark.asyncio
|
|
async def test_concurrent_cleanup_different_meetings(
|
|
self, memory_servicer: NoteFlowServicer
|
|
) -> None:
|
|
"""Multiple concurrent cleanup calls for different meetings succeed."""
|
|
meeting_ids = [str(uuid4()) for _ in range(20)]
|
|
|
|
for idx, mid in enumerate(meeting_ids):
|
|
memory_servicer._init_streaming_state(mid, idx)
|
|
|
|
async def cleanup_meeting(meeting_id: str) -> None:
|
|
await asyncio.sleep(0.001)
|
|
memory_servicer._cleanup_streaming_state(meeting_id)
|
|
|
|
tasks = [asyncio.create_task(cleanup_meeting(mid)) for mid in meeting_ids]
|
|
await asyncio.gather(*tasks)
|
|
|
|
assert len(memory_servicer._vad_instances) == 0
|
|
assert len(memory_servicer._segmenters) == 0
|
|
assert len(memory_servicer._partial_buffers) == 0
|
|
|
|
|
|
class TestNoMemoryLeaksUnderLoad:
|
|
"""Test no memory leaks after many stream cycles."""
|
|
|
|
@pytest.mark.stress
|
|
def test_stream_cycles_cleanup_completely(self, memory_servicer: NoteFlowServicer) -> None:
|
|
"""Many init/cleanup cycles leave no leaked state."""
|
|
for _ in range(100):
|
|
meeting_id = str(uuid4())
|
|
memory_servicer._init_streaming_state(meeting_id, next_segment_id=0)
|
|
memory_servicer._active_streams.add(meeting_id)
|
|
memory_servicer._cleanup_streaming_state(meeting_id)
|
|
memory_servicer._active_streams.discard(meeting_id)
|
|
|
|
assert len(memory_servicer._active_streams) == 0
|
|
assert len(memory_servicer._partial_buffers) == 0
|
|
assert len(memory_servicer._vad_instances) == 0
|
|
assert len(memory_servicer._segmenters) == 0
|
|
assert len(memory_servicer._was_speaking) == 0
|
|
assert len(memory_servicer._segment_counters) == 0
|
|
assert len(memory_servicer._last_partial_time) == 0
|
|
assert len(memory_servicer._last_partial_text) == 0
|
|
assert len(memory_servicer._diarization_turns) == 0
|
|
assert len(memory_servicer._diarization_stream_time) == 0
|
|
assert len(memory_servicer._diarization_streaming_failed) == 0
|
|
|
|
@pytest.mark.stress
|
|
@pytest.mark.slow
|
|
def test_many_concurrent_meetings_no_leak(self, memory_servicer: NoteFlowServicer) -> None:
|
|
"""Many meetings initialized then cleaned up leave no state."""
|
|
meeting_ids = [str(uuid4()) for _ in range(500)]
|
|
|
|
for idx, mid in enumerate(meeting_ids):
|
|
memory_servicer._init_streaming_state(mid, idx)
|
|
memory_servicer._active_streams.add(mid)
|
|
|
|
assert len(memory_servicer._vad_instances) == 500
|
|
assert len(memory_servicer._segmenters) == 500
|
|
|
|
for mid in meeting_ids:
|
|
memory_servicer._cleanup_streaming_state(mid)
|
|
memory_servicer._active_streams.discard(mid)
|
|
|
|
assert len(memory_servicer._active_streams) == 0
|
|
assert len(memory_servicer._vad_instances) == 0
|
|
assert len(memory_servicer._segmenters) == 0
|
|
|
|
@pytest.mark.stress
|
|
@pytest.mark.asyncio
|
|
async def test_interleaved_init_cleanup(self, memory_servicer: NoteFlowServicer) -> None:
|
|
"""Interleaved init and cleanup doesn't leak or corrupt."""
|
|
for _ in range(50):
|
|
meeting_ids = [str(uuid4()) for _ in range(10)]
|
|
|
|
for idx, mid in enumerate(meeting_ids):
|
|
memory_servicer._init_streaming_state(mid, idx)
|
|
|
|
for mid in meeting_ids[:5]:
|
|
memory_servicer._cleanup_streaming_state(mid)
|
|
|
|
for mid in meeting_ids[5:]:
|
|
memory_servicer._cleanup_streaming_state(mid)
|
|
|
|
assert len(memory_servicer._vad_instances) == 0
|
|
assert len(memory_servicer._segmenters) == 0
|
|
|
|
|
|
class TestActiveStreamsTracking:
|
|
"""Test _active_streams set behavior."""
|
|
|
|
@pytest.mark.stress
|
|
def test_active_streams_tracks_active_meetings(self, memory_servicer: NoteFlowServicer) -> None:
|
|
"""_active_streams correctly tracks active meeting IDs."""
|
|
meeting_ids = [str(uuid4()) for _ in range(5)]
|
|
|
|
for mid in meeting_ids:
|
|
memory_servicer._active_streams.add(mid)
|
|
|
|
assert len(memory_servicer._active_streams) == 5
|
|
for mid in meeting_ids:
|
|
assert mid in memory_servicer._active_streams
|
|
|
|
for mid in meeting_ids:
|
|
memory_servicer._active_streams.discard(mid)
|
|
|
|
assert len(memory_servicer._active_streams) == 0
|
|
|
|
@pytest.mark.stress
|
|
def test_discard_nonexistent_no_error(self, memory_servicer: NoteFlowServicer) -> None:
|
|
"""Discarding non-existent meeting ID doesn't raise."""
|
|
nonexistent = str(uuid4())
|
|
memory_servicer._active_streams.discard(nonexistent)
|
|
|
|
|
|
class TestDiarizationStateCleanup:
|
|
"""Test diarization-related state cleanup."""
|
|
|
|
@pytest.mark.stress
|
|
def test_diarization_failed_set_cleaned(self, memory_servicer: NoteFlowServicer) -> None:
|
|
"""_diarization_streaming_failed set is cleaned on cleanup."""
|
|
meeting_id = str(uuid4())
|
|
|
|
memory_servicer._init_streaming_state(meeting_id, 0)
|
|
memory_servicer._diarization_streaming_failed.add(meeting_id)
|
|
|
|
assert meeting_id in memory_servicer._diarization_streaming_failed
|
|
|
|
memory_servicer._cleanup_streaming_state(meeting_id)
|
|
|
|
assert meeting_id not in memory_servicer._diarization_streaming_failed
|
|
|
|
@pytest.mark.stress
|
|
def test_diarization_turns_cleaned(self, memory_servicer: NoteFlowServicer) -> None:
|
|
"""_diarization_turns dict is cleaned on cleanup."""
|
|
meeting_id = str(uuid4())
|
|
|
|
memory_servicer._init_streaming_state(meeting_id, 0)
|
|
|
|
assert meeting_id in memory_servicer._diarization_turns
|
|
assert memory_servicer._diarization_turns[meeting_id] == []
|
|
|
|
memory_servicer._cleanup_streaming_state(meeting_id)
|
|
|
|
assert meeting_id not in memory_servicer._diarization_turns
|
|
|
|
|
|
class TestServicerInstantiation:
|
|
"""Test NoteFlowServicer instantiation patterns."""
|
|
|
|
@pytest.mark.stress
|
|
def test_servicer_starts_with_empty_state(self) -> None:
|
|
"""New servicer has empty state dictionaries."""
|
|
servicer = NoteFlowServicer()
|
|
|
|
assert len(servicer._active_streams) == 0
|
|
assert len(servicer._partial_buffers) == 0
|
|
assert len(servicer._vad_instances) == 0
|
|
assert len(servicer._segmenters) == 0
|
|
assert len(servicer._was_speaking) == 0
|
|
assert len(servicer._segment_counters) == 0
|
|
assert len(servicer._audio_writers) == 0
|
|
|
|
@pytest.mark.stress
|
|
def test_multiple_servicers_independent(self) -> None:
|
|
"""Multiple servicer instances have independent state."""
|
|
servicer1 = NoteFlowServicer()
|
|
servicer2 = NoteFlowServicer()
|
|
|
|
meeting_id = str(uuid4())
|
|
servicer1._init_streaming_state(meeting_id, 0)
|
|
|
|
assert meeting_id in servicer1._vad_instances
|
|
assert meeting_id not in servicer2._vad_instances
|
|
|
|
servicer1._cleanup_streaming_state(meeting_id)
|
|
|
|
|
|
class TestMemoryStoreAccess:
|
|
"""Test memory store access patterns."""
|
|
|
|
@pytest.mark.stress
|
|
def test_get_memory_store_returns_store(self, memory_servicer: NoteFlowServicer) -> None:
|
|
"""_get_memory_store returns MeetingStore when configured."""
|
|
store = memory_servicer._get_memory_store()
|
|
assert store is not None
|
|
|
|
@pytest.mark.stress
|
|
def test_memory_store_create_meeting(self, memory_servicer: NoteFlowServicer) -> None:
|
|
"""Memory store can create and retrieve meetings."""
|
|
store = memory_servicer._get_memory_store()
|
|
|
|
meeting = store.create(title="Test Meeting")
|
|
assert meeting is not None
|
|
assert meeting.title == "Test Meeting"
|
|
|
|
retrieved = store.get(str(meeting.id))
|
|
assert retrieved is not None
|
|
assert retrieved.title == "Test Meeting"
|