Files
noteflow/tests/stress/test_concurrency_stress.py
Travis Vasceannie b333ea5b23 Add initial Docker and development environment setup
- Created .dockerignore to exclude unnecessary files from Docker builds.
- Added .repomixignore for managing ignored patterns in Repomix.
- Introduced Dockerfile.dev for development environment setup with Python 3.12.
- Configured docker-compose.yaml to define services, including a PostgreSQL database.
- Established a devcontainer.json for Visual Studio Code integration.
- Implemented postCreate.sh for automatic dependency installation in the dev container.
- Added constants.py to centralize configuration constants for the project.
- Updated pyproject.toml to include new development dependencies.
- Created initial documentation files for project overview and style conventions.
- Added tests for new functionalities to ensure reliability and correctness.
2025-12-19 05:02:16 +00:00

334 lines
13 KiB
Python

"""Stress tests for NoteFlowServicer concurrent stream handling.
Detects race conditions when multiple clients stream simultaneously.
Verifies _cleanup_streaming_state prevents memory leaks.
"""
from __future__ import annotations
import asyncio
from uuid import uuid4
import pytest
from noteflow.grpc.service import NoteFlowServicer
class TestStreamingStateInitialization:
"""Test streaming state initialization correctness."""
@pytest.mark.stress
def test_init_streaming_state_creates_all_state(
self, memory_servicer: NoteFlowServicer
) -> None:
"""Initialize streaming state creates entries in all state dictionaries."""
meeting_id = str(uuid4())
memory_servicer._init_streaming_state(meeting_id, next_segment_id=0)
assert meeting_id in memory_servicer._partial_buffers
assert meeting_id in memory_servicer._vad_instances
assert meeting_id in memory_servicer._segmenters
assert meeting_id in memory_servicer._was_speaking
assert meeting_id in memory_servicer._segment_counters
assert meeting_id in memory_servicer._last_partial_time
assert meeting_id in memory_servicer._last_partial_text
assert meeting_id in memory_servicer._diarization_turns
assert meeting_id in memory_servicer._diarization_stream_time
memory_servicer._cleanup_streaming_state(meeting_id)
@pytest.mark.stress
def test_init_with_different_segment_ids(self, memory_servicer: NoteFlowServicer) -> None:
"""Initialize with different segment IDs sets counter correctly."""
meeting_id1 = str(uuid4())
meeting_id2 = str(uuid4())
memory_servicer._init_streaming_state(meeting_id1, next_segment_id=0)
memory_servicer._init_streaming_state(meeting_id2, next_segment_id=42)
assert memory_servicer._segment_counters[meeting_id1] == 0
assert memory_servicer._segment_counters[meeting_id2] == 42
memory_servicer._cleanup_streaming_state(meeting_id1)
memory_servicer._cleanup_streaming_state(meeting_id2)
class TestCleanupStreamingState:
"""Test _cleanup_streaming_state removes all per-meeting resources."""
@pytest.mark.stress
def test_cleanup_removes_all_state_dictionaries(
self, memory_servicer: NoteFlowServicer
) -> None:
"""Verify cleanup removes entries from all state dictionaries."""
meeting_id = str(uuid4())
memory_servicer._init_streaming_state(meeting_id, next_segment_id=0)
memory_servicer._active_streams.add(meeting_id)
assert meeting_id in memory_servicer._partial_buffers
assert meeting_id in memory_servicer._vad_instances
assert meeting_id in memory_servicer._segmenters
memory_servicer._cleanup_streaming_state(meeting_id)
memory_servicer._active_streams.discard(meeting_id)
assert meeting_id not in memory_servicer._partial_buffers
assert meeting_id not in memory_servicer._vad_instances
assert meeting_id not in memory_servicer._segmenters
assert meeting_id not in memory_servicer._was_speaking
assert meeting_id not in memory_servicer._segment_counters
assert meeting_id not in memory_servicer._stream_formats
assert meeting_id not in memory_servicer._last_partial_time
assert meeting_id not in memory_servicer._last_partial_text
assert meeting_id not in memory_servicer._diarization_turns
assert meeting_id not in memory_servicer._diarization_stream_time
assert meeting_id not in memory_servicer._diarization_streaming_failed
assert meeting_id not in memory_servicer._active_streams
@pytest.mark.stress
def test_cleanup_idempotent(self, memory_servicer: NoteFlowServicer) -> None:
"""Cleanup is idempotent - multiple calls don't raise."""
meeting_id = str(uuid4())
memory_servicer._init_streaming_state(meeting_id, next_segment_id=0)
memory_servicer._cleanup_streaming_state(meeting_id)
memory_servicer._cleanup_streaming_state(meeting_id)
memory_servicer._cleanup_streaming_state(meeting_id)
@pytest.mark.stress
def test_cleanup_nonexistent_meeting_no_error(self, memory_servicer: NoteFlowServicer) -> None:
"""Cleanup of non-existent meeting ID doesn't raise."""
nonexistent_id = str(uuid4())
memory_servicer._cleanup_streaming_state(nonexistent_id)
class TestConcurrentStreamInitialization:
"""Test concurrent stream initialization."""
@pytest.mark.stress
@pytest.mark.asyncio
async def test_concurrent_init_different_meetings(
self, memory_servicer: NoteFlowServicer
) -> None:
"""Multiple concurrent init calls for different meetings succeed."""
meeting_ids = [str(uuid4()) for _ in range(20)]
async def init_meeting(meeting_id: str, segment_id: int) -> None:
await asyncio.sleep(0.001)
memory_servicer._init_streaming_state(meeting_id, segment_id)
tasks = [asyncio.create_task(init_meeting(mid, idx)) for idx, mid in enumerate(meeting_ids)]
await asyncio.gather(*tasks)
assert len(memory_servicer._vad_instances) == len(meeting_ids)
assert len(memory_servicer._segmenters) == len(meeting_ids)
assert len(memory_servicer._partial_buffers) == len(meeting_ids)
for mid in meeting_ids:
memory_servicer._cleanup_streaming_state(mid)
@pytest.mark.stress
@pytest.mark.asyncio
async def test_concurrent_cleanup_different_meetings(
self, memory_servicer: NoteFlowServicer
) -> None:
"""Multiple concurrent cleanup calls for different meetings succeed."""
meeting_ids = [str(uuid4()) for _ in range(20)]
for idx, mid in enumerate(meeting_ids):
memory_servicer._init_streaming_state(mid, idx)
async def cleanup_meeting(meeting_id: str) -> None:
await asyncio.sleep(0.001)
memory_servicer._cleanup_streaming_state(meeting_id)
tasks = [asyncio.create_task(cleanup_meeting(mid)) for mid in meeting_ids]
await asyncio.gather(*tasks)
assert len(memory_servicer._vad_instances) == 0
assert len(memory_servicer._segmenters) == 0
assert len(memory_servicer._partial_buffers) == 0
class TestNoMemoryLeaksUnderLoad:
"""Test no memory leaks after many stream cycles."""
@pytest.mark.stress
def test_stream_cycles_cleanup_completely(self, memory_servicer: NoteFlowServicer) -> None:
"""Many init/cleanup cycles leave no leaked state."""
for _ in range(100):
meeting_id = str(uuid4())
memory_servicer._init_streaming_state(meeting_id, next_segment_id=0)
memory_servicer._active_streams.add(meeting_id)
memory_servicer._cleanup_streaming_state(meeting_id)
memory_servicer._active_streams.discard(meeting_id)
assert len(memory_servicer._active_streams) == 0
assert len(memory_servicer._partial_buffers) == 0
assert len(memory_servicer._vad_instances) == 0
assert len(memory_servicer._segmenters) == 0
assert len(memory_servicer._was_speaking) == 0
assert len(memory_servicer._segment_counters) == 0
assert len(memory_servicer._last_partial_time) == 0
assert len(memory_servicer._last_partial_text) == 0
assert len(memory_servicer._diarization_turns) == 0
assert len(memory_servicer._diarization_stream_time) == 0
assert len(memory_servicer._diarization_streaming_failed) == 0
@pytest.mark.stress
@pytest.mark.slow
def test_many_concurrent_meetings_no_leak(self, memory_servicer: NoteFlowServicer) -> None:
"""Many meetings initialized then cleaned up leave no state."""
meeting_ids = [str(uuid4()) for _ in range(500)]
for idx, mid in enumerate(meeting_ids):
memory_servicer._init_streaming_state(mid, idx)
memory_servicer._active_streams.add(mid)
assert len(memory_servicer._vad_instances) == 500
assert len(memory_servicer._segmenters) == 500
for mid in meeting_ids:
memory_servicer._cleanup_streaming_state(mid)
memory_servicer._active_streams.discard(mid)
assert len(memory_servicer._active_streams) == 0
assert len(memory_servicer._vad_instances) == 0
assert len(memory_servicer._segmenters) == 0
@pytest.mark.stress
@pytest.mark.asyncio
async def test_interleaved_init_cleanup(self, memory_servicer: NoteFlowServicer) -> None:
"""Interleaved init and cleanup doesn't leak or corrupt."""
for _ in range(50):
meeting_ids = [str(uuid4()) for _ in range(10)]
for idx, mid in enumerate(meeting_ids):
memory_servicer._init_streaming_state(mid, idx)
for mid in meeting_ids[:5]:
memory_servicer._cleanup_streaming_state(mid)
for mid in meeting_ids[5:]:
memory_servicer._cleanup_streaming_state(mid)
assert len(memory_servicer._vad_instances) == 0
assert len(memory_servicer._segmenters) == 0
class TestActiveStreamsTracking:
"""Test _active_streams set behavior."""
@pytest.mark.stress
def test_active_streams_tracks_active_meetings(self, memory_servicer: NoteFlowServicer) -> None:
"""_active_streams correctly tracks active meeting IDs."""
meeting_ids = [str(uuid4()) for _ in range(5)]
for mid in meeting_ids:
memory_servicer._active_streams.add(mid)
assert len(memory_servicer._active_streams) == 5
for mid in meeting_ids:
assert mid in memory_servicer._active_streams
for mid in meeting_ids:
memory_servicer._active_streams.discard(mid)
assert len(memory_servicer._active_streams) == 0
@pytest.mark.stress
def test_discard_nonexistent_no_error(self, memory_servicer: NoteFlowServicer) -> None:
"""Discarding non-existent meeting ID doesn't raise."""
nonexistent = str(uuid4())
memory_servicer._active_streams.discard(nonexistent)
class TestDiarizationStateCleanup:
"""Test diarization-related state cleanup."""
@pytest.mark.stress
def test_diarization_failed_set_cleaned(self, memory_servicer: NoteFlowServicer) -> None:
"""_diarization_streaming_failed set is cleaned on cleanup."""
meeting_id = str(uuid4())
memory_servicer._init_streaming_state(meeting_id, 0)
memory_servicer._diarization_streaming_failed.add(meeting_id)
assert meeting_id in memory_servicer._diarization_streaming_failed
memory_servicer._cleanup_streaming_state(meeting_id)
assert meeting_id not in memory_servicer._diarization_streaming_failed
@pytest.mark.stress
def test_diarization_turns_cleaned(self, memory_servicer: NoteFlowServicer) -> None:
"""_diarization_turns dict is cleaned on cleanup."""
meeting_id = str(uuid4())
memory_servicer._init_streaming_state(meeting_id, 0)
assert meeting_id in memory_servicer._diarization_turns
assert memory_servicer._diarization_turns[meeting_id] == []
memory_servicer._cleanup_streaming_state(meeting_id)
assert meeting_id not in memory_servicer._diarization_turns
class TestServicerInstantiation:
"""Test NoteFlowServicer instantiation patterns."""
@pytest.mark.stress
def test_servicer_starts_with_empty_state(self) -> None:
"""New servicer has empty state dictionaries."""
servicer = NoteFlowServicer()
assert len(servicer._active_streams) == 0
assert len(servicer._partial_buffers) == 0
assert len(servicer._vad_instances) == 0
assert len(servicer._segmenters) == 0
assert len(servicer._was_speaking) == 0
assert len(servicer._segment_counters) == 0
assert len(servicer._audio_writers) == 0
@pytest.mark.stress
def test_multiple_servicers_independent(self) -> None:
"""Multiple servicer instances have independent state."""
servicer1 = NoteFlowServicer()
servicer2 = NoteFlowServicer()
meeting_id = str(uuid4())
servicer1._init_streaming_state(meeting_id, 0)
assert meeting_id in servicer1._vad_instances
assert meeting_id not in servicer2._vad_instances
servicer1._cleanup_streaming_state(meeting_id)
class TestMemoryStoreAccess:
"""Test memory store access patterns."""
@pytest.mark.stress
def test_get_memory_store_returns_store(self, memory_servicer: NoteFlowServicer) -> None:
"""_get_memory_store returns MeetingStore when configured."""
store = memory_servicer._get_memory_store()
assert store is not None
@pytest.mark.stress
def test_memory_store_create_meeting(self, memory_servicer: NoteFlowServicer) -> None:
"""Memory store can create and retrieve meetings."""
store = memory_servicer._get_memory_store()
meeting = store.create(title="Test Meeting")
assert meeting is not None
assert meeting.title == "Test Meeting"
retrieved = store.get(str(meeting.id))
assert retrieved is not None
assert retrieved.title == "Test Meeting"