diff --git a/.claude/hookify.block-duplicate-fixtures.local.md b/.claude/hookify.block-duplicate-fixtures.local.md index 438658b..3d75c7f 100644 --- a/.claude/hookify.block-duplicate-fixtures.local.md +++ b/.claude/hookify.block-duplicate-fixtures.local.md @@ -4,12 +4,17 @@ enabled: true event: file action: block conditions: + # Match test files EXCEPT root conftest.py (where global fixtures should be defined) + # Uses negative lookahead to exclude tests/conftest.py but include: + # - tests/unit/conftest.py (sub-conftest can't override root) + # - tests/unit/test_foo.py (test files can't override root) - field: file_path operator: regex_match - pattern: tests?/.*\.py$ + pattern: tests?/(?!conftest\.py$).+\.py$ + # Only block if redefining a specific root conftest.py fixture - field: new_text operator: regex_match - pattern: "@pytest\.fixture[^@]*\ndef\s+(mock_uow|crypto|meetings_dir|webhook_config|webhook_config_all_events|sample_datetime|calendar_settings|meeting_id|sample_meeting|recording_meeting|mock_grpc_context|mock_asr_engine|mock_optional_extras)\s*\(" + pattern: "@pytest\\.fixture[^@]*\\ndef\\s+(mock_uow|crypto|meetings_dir|webhook_config|webhook_config_all_events|sample_datetime|calendar_settings|meeting_id|sample_meeting|recording_meeting|mock_grpc_context|mock_asr_engine|mock_optional_extras)\\s*\\(" --- 🚫 **Test Quality Violation: Duplicate Fixture Definition** @@ -17,7 +22,7 @@ conditions: Your edit redefines a **fixture that already exists in `tests/conftest.py`**. **Detected fixture duplication attempt:** -The following fixtures are globally available from `tests/conftest.py`: +The following fixtures are globally available from root `tests/conftest.py`: - `mock_uow` - Mock UnitOfWork with all repositories - `crypto` - AesGcmCryptoBox with in-memory keystore - `meetings_dir` - Temporary meetings directory @@ -40,6 +45,11 @@ The following fixtures are globally available from `tests/conftest.py`: **What to do instead:** 1. **Use the existing fixture** from `tests/conftest.py` 2. **If you need variations**, create a new fixture with a different name -3. **If the global fixture is insufficient**, update `tests/conftest.py` +3. **If the global fixture is insufficient**, update root `tests/conftest.py` + +**Allowed locations for new fixtures:** +- `tests/conftest.py` - Add global fixtures here (unrestricted) +- `tests//conftest.py` - Add scoped fixtures with unique names +- Any test file - Add test-specific fixtures with unique names **Project reference:** See `tests/conftest.py` for all available fixtures. diff --git a/.claude/hookify.block-tests-quality-bash.local.md b/.claude/hookify.block-tests-quality-bash.local.md new file mode 100644 index 0000000..55190bc --- /dev/null +++ b/.claude/hookify.block-tests-quality-bash.local.md @@ -0,0 +1,39 @@ +--- +name: block-tests-quality-bash +enabled: true +event: bash +action: block +pattern: tests/quality/ +--- + +# BLOCKED: Protected Directory (Bash) + +Bash commands that modify files in `tests/quality/` are **not allowed**. + +## Why This Is Blocked + +- The `tests/quality/` directory contains protected test infrastructure +- Commands like `rm`, `mv`, `cp`, `sed`, `echo >`, etc. targeting this directory are prohibited +- The user has explicitly requested that agents may **view** but **never modify** these files + +## Allowed Operations + +- `cat tests/quality/*` - viewing file contents +- `ls tests/quality/` - listing files +- `pytest tests/quality/` - running the tests + +## Blocked Operations + +- Any command that would create, modify, or delete files in this directory +- Redirecting output to files in this directory +- Moving or copying files into this directory + +## If You Believe a Change Is Needed + +Do NOT attempt to modify these files. Instead: + +1. Explain to the user what change you believe is necessary +2. Provide your justification +3. Wait for explicit user approval and manual intervention + +**This rule cannot be bypassed.** diff --git a/.claude/hookify.block-tests-quality.local.md b/.claude/hookify.block-tests-quality.local.md new file mode 100644 index 0000000..6e8ed82 --- /dev/null +++ b/.claude/hookify.block-tests-quality.local.md @@ -0,0 +1,36 @@ +--- +name: block-tests-quality +enabled: true +event: file +action: block +conditions: + - field: file_path + operator: regex_match + pattern: tests/quality/ +--- + +# BLOCKED: Protected Directory + +The `tests/quality/` directory is **protected** and cannot be modified. + +## Why This Is Blocked + +- This directory contains the test smell detection baselines and quality gate infrastructure +- Changes to these files could compromise the project's quality enforcement +- The user has explicitly requested that agents may **view** but **never modify** these files + +## What You Can Do + +- **Read files**: Use the Read tool to view contents +- **Reference patterns**: Learn from the test smell detection logic +- **Report issues**: If you find a problem, report it to the user rather than fixing it + +## If You Believe a Change Is Needed + +Do NOT attempt to modify these files. Instead: + +1. Explain to the user what change you believe is necessary +2. Provide your justification +3. Wait for explicit user approval and manual intervention + +**This rule cannot be bypassed.** Do not attempt workarounds. diff --git a/scripts/profile_hot_paths.py b/scripts/profile_hot_paths.py index 7f35606..8507b1e 100644 --- a/scripts/profile_hot_paths.py +++ b/scripts/profile_hot_paths.py @@ -13,12 +13,13 @@ import pstats import numpy as np +from noteflow.config.constants import DEFAULT_SAMPLE_RATE from noteflow.infrastructure.asr.segmenter import Segmenter, SegmenterConfig from noteflow.infrastructure.asr.streaming_vad import StreamingVad from noteflow.infrastructure.audio.levels import RmsLevelProvider # Simulation parameters -SAMPLE_RATE = 16000 +SAMPLE_RATE = DEFAULT_SAMPLE_RATE CHUNK_SIZE = 1600 # 100ms at 16kHz SIMULATION_SECONDS = 60 # Simulate 1 minute of audio CHUNKS_PER_SECOND = SAMPLE_RATE // CHUNK_SIZE diff --git a/src/noteflow/application/services/recovery_service.py b/src/noteflow/application/services/recovery_service.py index a2097e1..ff4f45a 100644 --- a/src/noteflow/application/services/recovery_service.py +++ b/src/noteflow/application/services/recovery_service.py @@ -15,6 +15,7 @@ import sqlalchemy.exc from noteflow.domain.value_objects import MeetingState from noteflow.infrastructure.logging import get_logger +from noteflow.infrastructure.persistence.constants import MAX_MEETINGS_LIMIT if TYPE_CHECKING: from noteflow.domain.entities import Meeting @@ -152,7 +153,7 @@ class RecoveryService: # Find all meetings in active states meetings, total = await self._uow.meetings.list_all( states=self.ACTIVE_STATES, - limit=1000, # Handle up to 1000 crashed meetings + limit=MAX_MEETINGS_LIMIT, ) if total == 0: diff --git a/src/noteflow/config/constants.py b/src/noteflow/config/constants.py deleted file mode 100644 index 038a870..0000000 --- a/src/noteflow/config/constants.py +++ /dev/null @@ -1,249 +0,0 @@ -"""Centralized constants for NoteFlow. - -This module provides shared constants used across the codebase to avoid -magic numbers and ensure consistency. -""" - -from typing import Final, Literal - -# Time conversion constants -SECONDS_PER_HOUR: Final[float] = 3600.0 -"""Seconds in one hour, for time unit conversions.""" - -DEFAULT_OAUTH_TOKEN_EXPIRY_SECONDS: Final[int] = int(SECONDS_PER_HOUR) -"""Default OAuth token expiry time in seconds (1 hour).""" - -# Audio constants -DEFAULT_SAMPLE_RATE: Final[int] = 16000 -"""Default audio sample rate in Hz (16 kHz).""" - -POSITION_UPDATE_INTERVAL: Final[float] = 0.1 -"""Playback position update interval in seconds (100ms).""" - -# gRPC constants -DEFAULT_GRPC_PORT: Final[int] = 50051 -"""Default gRPC server port (canonical definition).""" - -MAX_GRPC_MESSAGE_SIZE: Final[int] = 100 * 1024 * 1024 -"""Maximum gRPC message size in bytes (100 MB).""" - -# Audio encryption buffering constants -AUDIO_BUFFER_SIZE_BYTES: Final[int] = 320_000 -"""Target audio buffer size before encryption (320 KB = ~10 seconds at 16kHz PCM16).""" - -PERIODIC_FLUSH_INTERVAL_SECONDS: Final[float] = 2.0 -"""Interval for periodic audio buffer flush to disk (crash resilience).""" - -# Meeting defaults -DEFAULT_MEETING_TITLE: Final[str] = "Untitled" -"""Default title for meetings without an explicit title.""" - -# Diarization constants -ERR_HF_TOKEN_REQUIRED: Final[str] = "HuggingFace token required for pyannote models" -"""Error message when HuggingFace token is missing for pyannote.""" - -ERR_SERVER_RESTARTED: Final[str] = "Server restarted" -"""Error message for jobs interrupted by server restart.""" - -# Calendar/OAuth error messages -ERR_TOKEN_EXPIRED: Final[str] = "Access token expired or invalid" -"""Error for expired or invalid OAuth tokens.""" - -ERR_API_PREFIX: Final[str] = "API error: " -"""Prefix for API error messages.""" - -ERR_TOKEN_REFRESH_PREFIX: Final[str] = "Token refresh failed: " -"""Prefix for token refresh error messages.""" - -# OAuth token field names -OAUTH_FIELD_ACCESS_TOKEN: Final[str] = "access_token" -"""OAuth access token field name.""" - -OAUTH_FIELD_REFRESH_TOKEN: Final[str] = "refresh_token" -"""OAuth refresh token field name.""" - -OAUTH_FIELD_TOKEN_TYPE: Final[str] = "token_type" -"""OAuth token type field name.""" - -OAUTH_FIELD_SCOPE: Final[str] = "scope" -"""OAuth scope field name.""" - -OAUTH_FIELD_EXPIRES_IN: Final[str] = "expires_in" -"""OAuth expires_in field name.""" - -HTTP_AUTHORIZATION: Final[str] = "Authorization" -"""HTTP Authorization header name.""" - -HTTP_BEARER_PREFIX: Final[str] = "Bearer " -"""Standard HTTP authorization header prefix.""" - -HTTP_STATUS_OK: Final[int] = 200 -"""HTTP 200 OK status code.""" - -HTTP_STATUS_UNAUTHORIZED: Final[int] = 401 -"""HTTP 401 Unauthorized status code.""" - -# Application directory -APP_DIR_NAME: Final[str] = ".noteflow" -"""Application data directory name within user home.""" - -# spaCy NER model names -SPACY_MODEL_SM: Final[str] = "en_core_web_sm" -"""Small English spaCy model for NER.""" - -SPACY_MODEL_MD: Final[str] = "en_core_web_md" -"""Medium English spaCy model for NER.""" - -SPACY_MODEL_LG: Final[str] = "en_core_web_lg" -"""Large English spaCy model for NER.""" - -SPACY_MODEL_TRF: Final[str] = "en_core_web_trf" -"""Transformer-based English spaCy model for NER.""" - -# Export format constants -EXPORT_FORMAT_HTML: Final[str] = "HTML" -"""HTML export format display name.""" - -EXPORT_EXT_HTML: Final[str] = ".html" -"""HTML file extension.""" - -EXPORT_EXT_PDF: Final[str] = ".pdf" -"""PDF file extension.""" - -# Error detail keys -ERROR_DETAIL_PROJECT_ID: Final[str] = "project_id" -"""Error detail key for project ID.""" - -# Meeting title prefix -MEETING_TITLE_PREFIX: Final[str] = "Meeting " -"""Prefix for auto-generated meeting titles.""" - -# Error message prefixes -ERROR_MSG_MEETING_PREFIX: Final[str] = "Meeting " -"""Prefix for meeting-related error messages.""" - -# Error messages -ERROR_PROJECT_ID_REQUIRED: Final[str] = "project_id is required" -"""Error message when project_id is missing.""" - -ERROR_INVALID_PROJECT_ID_PREFIX: Final[str] = "Invalid project_id: " -"""Prefix for invalid project_id error messages.""" - -ERROR_WORKSPACE_ID_REQUIRED: Final[str] = "workspace_id is required" -"""Error message when workspace_id is missing.""" - -ERROR_INVALID_WORKSPACE_ID_PREFIX: Final[str] = "Invalid workspace_id: " -"""Prefix for invalid workspace_id error messages.""" - -ERROR_USER_ID_REQUIRED: Final[str] = "user_id is required" -"""Error message when user_id is missing.""" - -ERROR_INVALID_UUID_PREFIX: Final[str] = "Invalid UUID: " -"""Prefix for invalid UUID error messages.""" - -ERROR_INVALID_WORKSPACE_ID_FORMAT: Final[str] = "Invalid workspace_id format" -"""Error message for invalid workspace_id format.""" - -# Database feature names -FEATURE_NAME_PROJECTS: Final[str] = "Projects" -"""Feature name for projects in error messages.""" - -# Error message prefixes for entity not found -ERROR_MSG_WORKSPACE_PREFIX: Final[str] = "Workspace " -"""Prefix for workspace-related error messages.""" - -ERROR_MSG_PROJECT_PREFIX: Final[str] = "Project " -"""Prefix for project-related error messages.""" - -# Provider names -PROVIDER_NAME_OPENAI: Final[str] = "openai" -"""OpenAI provider name.""" - -# Status messages -STATUS_DISABLED: Final[str] = "Disabled" -"""Status message for disabled features.""" - -# Settings field names -SETTING_CLOUD_CONSENT_GRANTED: Final[str] = "cloud_consent_granted" -"""Settings field name for cloud consent granted.""" - -# Validation error messages -ERROR_MSG_END_TIME_PREFIX: Final[str] = "end_time (" -"""Prefix for end_time validation error messages.""" - -ERROR_MSG_START_TIME_COMPARISON: Final[str] = ") must be >= start_time (" -"""Middle part of time validation error messages.""" - -# Trigger action values -TRIGGER_ACTION_IGNORE: Final[Literal["ignore"]] = "ignore" -"""Trigger action value for ignore.""" - -# Rule schema field names -RULE_FIELD_DEFAULT_FORMAT: Final[str] = "default_format" -"""Rule schema field name for default format.""" - -RULE_FIELD_INCLUDE_AUDIO: Final[str] = "include_audio" -"""Rule schema field name for include audio.""" - -RULE_FIELD_INCLUDE_TIMESTAMPS: Final[str] = "include_timestamps" -"""Rule schema field name for include timestamps.""" - -RULE_FIELD_TEMPLATE_ID: Final[str] = "template_id" -"""Rule schema field name for template ID.""" - -RULE_FIELD_AUTO_START_ENABLED: Final[str] = "auto_start_enabled" -"""Rule schema field name for auto start enabled.""" - -RULE_FIELD_DESCRIPTION: Final[str] = "description" -"""Rule schema field name for description.""" - -RULE_FIELD_EXPORT_RULES: Final[str] = "export_rules" -"""Rule schema field name for export rules.""" - -RULE_FIELD_TRIGGER_RULES: Final[str] = "trigger_rules" -"""Rule schema field name for trigger rules.""" - -RULE_FIELD_CALENDAR_MATCH_PATTERNS: Final[str] = "calendar_match_patterns" -"""Rule schema field name for calendar match patterns.""" - -RULE_FIELD_APP_MATCH_PATTERNS: Final[str] = "app_match_patterns" -"""Rule schema field name for app match patterns.""" - -# Validation error message suffixes -ERROR_SUFFIX_MUST_BE_BOOLEAN: Final[str] = " must be a boolean" -"""Suffix for boolean validation error messages.""" - -# JSON schema type names -SCHEMA_TYPE_STRING: Final[str] = "string" -"""JSON schema type name for string.""" - -SCHEMA_TYPE_BOOLEAN: Final[str] = "boolean" -"""JSON schema type name for boolean.""" - -SCHEMA_TYPE_ARRAY_ITEMS: Final[str] = "items" -"""JSON schema type name for array items.""" - -# Log event names - centralized to avoid repeated strings -LOG_EVENT_DATABASE_REQUIRED_FOR_ANNOTATIONS: Final[str] = "database_required_for_annotations" -"""Log event when annotations require database persistence.""" - -LOG_EVENT_ANNOTATION_NOT_FOUND: Final[str] = "annotation_not_found" -"""Log event when annotation lookup fails.""" - -LOG_EVENT_INVALID_ANNOTATION_ID: Final[str] = "invalid_annotation_id" -"""Log event when annotation ID is invalid.""" - -LOG_EVENT_SERVICE_NOT_ENABLED: Final[str] = "service_not_enabled" -"""Log event when a service feature is not enabled.""" - -LOG_EVENT_WEBHOOK_REGISTRATION_FAILED: Final[str] = "webhook_registration_failed" -"""Log event when webhook registration fails.""" - -LOG_EVENT_WEBHOOK_UPDATE_FAILED: Final[str] = "webhook_update_failed" -"""Log event when webhook update fails.""" - -LOG_EVENT_WEBHOOK_DELETE_FAILED: Final[str] = "webhook_delete_failed" -"""Log event when webhook deletion fails.""" - -LOG_EVENT_INVALID_WEBHOOK_ID: Final[str] = "invalid_webhook_id" -"""Log event when webhook ID is invalid.""" diff --git a/src/noteflow/config/constants/__init__.py b/src/noteflow/config/constants/__init__.py new file mode 100644 index 0000000..0094cc3 --- /dev/null +++ b/src/noteflow/config/constants/__init__.py @@ -0,0 +1,188 @@ +"""Centralized constants for NoteFlow. + +This package provides shared constants used across the codebase to avoid +magic numbers and ensure consistency. + +Constants are organized into domain-specific modules: +- core: Time, audio, gRPC, application paths +- http: HTTP status codes, headers, OAuth fields +- errors: Error messages, log event names +- domain: Meeting defaults, export formats, NER, rules, triggers + +All constants are re-exported here for backwards compatibility. +""" + +# Core constants +from noteflow.config.constants.core import ( + APP_DIR_NAME, + AUDIO_BUFFER_SIZE_BYTES, + DEFAULT_GRPC_PORT, + DEFAULT_SAMPLE_RATE, + MAX_GRPC_MESSAGE_SIZE, + PERIODIC_FLUSH_INTERVAL_SECONDS, + POSITION_UPDATE_INTERVAL, + SECONDS_PER_HOUR, +) + +# Domain constants +from noteflow.config.constants.domain import ( + DEFAULT_MEETING_TITLE, + EXPORT_EXT_HTML, + EXPORT_EXT_PDF, + EXPORT_FORMAT_HTML, + FEATURE_NAME_PROJECTS, + MEETING_TITLE_PREFIX, + PROVIDER_NAME_OPENAI, + RULE_FIELD_APP_MATCH_PATTERNS, + RULE_FIELD_AUTO_START_ENABLED, + RULE_FIELD_CALENDAR_MATCH_PATTERNS, + RULE_FIELD_DEFAULT_FORMAT, + RULE_FIELD_DESCRIPTION, + RULE_FIELD_EXPORT_RULES, + RULE_FIELD_INCLUDE_AUDIO, + RULE_FIELD_INCLUDE_TIMESTAMPS, + RULE_FIELD_TEMPLATE_ID, + RULE_FIELD_TRIGGER_RULES, + SCHEMA_TYPE_ARRAY_ITEMS, + SCHEMA_TYPE_BOOLEAN, + SCHEMA_TYPE_STRING, + SETTING_CLOUD_CONSENT_GRANTED, + SPACY_MODEL_LG, + SPACY_MODEL_MD, + SPACY_MODEL_SM, + SPACY_MODEL_TRF, + STATUS_DISABLED, + TRIGGER_ACTION_IGNORE, +) + +# Error constants +from noteflow.config.constants.errors import ( + ERR_API_PREFIX, + ERR_HF_TOKEN_REQUIRED, + ERR_SERVER_RESTARTED, + ERR_TOKEN_EXPIRED, + ERR_TOKEN_REFRESH_PREFIX, + ERROR_DETAIL_PROJECT_ID, + ERROR_INVALID_PROJECT_ID_PREFIX, + ERROR_INVALID_UUID_PREFIX, + ERROR_INVALID_WORKSPACE_ID_FORMAT, + ERROR_INVALID_WORKSPACE_ID_PREFIX, + ERROR_MSG_END_TIME_PREFIX, + ERROR_MSG_MEETING_PREFIX, + ERROR_MSG_PROJECT_PREFIX, + ERROR_MSG_START_TIME_COMPARISON, + ERROR_MSG_WORKSPACE_PREFIX, + ERROR_PROJECT_ID_REQUIRED, + ERROR_SUFFIX_MUST_BE_BOOLEAN, + ERROR_USER_ID_REQUIRED, + ERROR_WORKSPACE_ID_REQUIRED, + LOG_EVENT_ANNOTATION_NOT_FOUND, + LOG_EVENT_DATABASE_REQUIRED_FOR_ANNOTATIONS, + LOG_EVENT_INVALID_ANNOTATION_ID, + LOG_EVENT_INVALID_WEBHOOK_ID, + LOG_EVENT_SERVICE_NOT_ENABLED, + LOG_EVENT_WEBHOOK_DELETE_FAILED, + LOG_EVENT_WEBHOOK_REGISTRATION_FAILED, + LOG_EVENT_WEBHOOK_UPDATE_FAILED, +) + +# HTTP constants +from noteflow.config.constants.http import ( + DEFAULT_OAUTH_TOKEN_EXPIRY_SECONDS, + HTTP_AUTHORIZATION, + HTTP_BEARER_PREFIX, + HTTP_STATUS_BAD_REQUEST, + HTTP_STATUS_INTERNAL_SERVER_ERROR, + HTTP_STATUS_NO_CONTENT, + HTTP_STATUS_NOT_FOUND, + HTTP_STATUS_OK, + HTTP_STATUS_UNAUTHORIZED, + OAUTH_FIELD_ACCESS_TOKEN, + OAUTH_FIELD_EXPIRES_IN, + OAUTH_FIELD_REFRESH_TOKEN, + OAUTH_FIELD_SCOPE, + OAUTH_FIELD_TOKEN_TYPE, +) + +__all__ = [ + # Core + "APP_DIR_NAME", + "AUDIO_BUFFER_SIZE_BYTES", + "DEFAULT_GRPC_PORT", + # Domain + "DEFAULT_MEETING_TITLE", + # HTTP + "DEFAULT_OAUTH_TOKEN_EXPIRY_SECONDS", + "DEFAULT_SAMPLE_RATE", + "ERROR_DETAIL_PROJECT_ID", + "ERROR_INVALID_PROJECT_ID_PREFIX", + "ERROR_INVALID_UUID_PREFIX", + "ERROR_INVALID_WORKSPACE_ID_FORMAT", + "ERROR_INVALID_WORKSPACE_ID_PREFIX", + "ERROR_MSG_END_TIME_PREFIX", + "ERROR_MSG_MEETING_PREFIX", + "ERROR_MSG_PROJECT_PREFIX", + "ERROR_MSG_START_TIME_COMPARISON", + "ERROR_MSG_WORKSPACE_PREFIX", + "ERROR_PROJECT_ID_REQUIRED", + "ERROR_SUFFIX_MUST_BE_BOOLEAN", + "ERROR_USER_ID_REQUIRED", + "ERROR_WORKSPACE_ID_REQUIRED", + # Errors + "ERR_API_PREFIX", + "ERR_HF_TOKEN_REQUIRED", + "ERR_SERVER_RESTARTED", + "ERR_TOKEN_EXPIRED", + "ERR_TOKEN_REFRESH_PREFIX", + "EXPORT_EXT_HTML", + "EXPORT_EXT_PDF", + "EXPORT_FORMAT_HTML", + "FEATURE_NAME_PROJECTS", + "HTTP_AUTHORIZATION", + "HTTP_BEARER_PREFIX", + "HTTP_STATUS_BAD_REQUEST", + "HTTP_STATUS_INTERNAL_SERVER_ERROR", + "HTTP_STATUS_NOT_FOUND", + "HTTP_STATUS_NO_CONTENT", + "HTTP_STATUS_OK", + "HTTP_STATUS_UNAUTHORIZED", + "LOG_EVENT_ANNOTATION_NOT_FOUND", + "LOG_EVENT_DATABASE_REQUIRED_FOR_ANNOTATIONS", + "LOG_EVENT_INVALID_ANNOTATION_ID", + "LOG_EVENT_INVALID_WEBHOOK_ID", + "LOG_EVENT_SERVICE_NOT_ENABLED", + "LOG_EVENT_WEBHOOK_DELETE_FAILED", + "LOG_EVENT_WEBHOOK_REGISTRATION_FAILED", + "LOG_EVENT_WEBHOOK_UPDATE_FAILED", + "MAX_GRPC_MESSAGE_SIZE", + "MEETING_TITLE_PREFIX", + "OAUTH_FIELD_ACCESS_TOKEN", + "OAUTH_FIELD_EXPIRES_IN", + "OAUTH_FIELD_REFRESH_TOKEN", + "OAUTH_FIELD_SCOPE", + "OAUTH_FIELD_TOKEN_TYPE", + "PERIODIC_FLUSH_INTERVAL_SECONDS", + "POSITION_UPDATE_INTERVAL", + "PROVIDER_NAME_OPENAI", + "RULE_FIELD_APP_MATCH_PATTERNS", + "RULE_FIELD_AUTO_START_ENABLED", + "RULE_FIELD_CALENDAR_MATCH_PATTERNS", + "RULE_FIELD_DEFAULT_FORMAT", + "RULE_FIELD_DESCRIPTION", + "RULE_FIELD_EXPORT_RULES", + "RULE_FIELD_INCLUDE_AUDIO", + "RULE_FIELD_INCLUDE_TIMESTAMPS", + "RULE_FIELD_TEMPLATE_ID", + "RULE_FIELD_TRIGGER_RULES", + "SCHEMA_TYPE_ARRAY_ITEMS", + "SCHEMA_TYPE_BOOLEAN", + "SCHEMA_TYPE_STRING", + "SECONDS_PER_HOUR", + "SETTING_CLOUD_CONSENT_GRANTED", + "SPACY_MODEL_LG", + "SPACY_MODEL_MD", + "SPACY_MODEL_SM", + "SPACY_MODEL_TRF", + "STATUS_DISABLED", + "TRIGGER_ACTION_IGNORE", +] diff --git a/src/noteflow/config/constants/core.py b/src/noteflow/config/constants/core.py new file mode 100644 index 0000000..00f02ca --- /dev/null +++ b/src/noteflow/config/constants/core.py @@ -0,0 +1,46 @@ +"""Core application constants. + +Time conversions, audio settings, gRPC configuration, and application paths. +""" + +from typing import Final + +# ============================================================================= +# Time Conversion +# ============================================================================= + +SECONDS_PER_HOUR: Final[float] = 3600.0 +"""Seconds in one hour, for time unit conversions.""" + +# ============================================================================= +# Audio Settings +# ============================================================================= + +DEFAULT_SAMPLE_RATE: Final[int] = 16000 +"""Default audio sample rate in Hz (16 kHz).""" + +POSITION_UPDATE_INTERVAL: Final[float] = 0.1 +"""Playback position update interval in seconds (100ms).""" + +AUDIO_BUFFER_SIZE_BYTES: Final[int] = 320_000 +"""Target audio buffer size before encryption (320 KB = ~10 seconds at 16kHz PCM16).""" + +PERIODIC_FLUSH_INTERVAL_SECONDS: Final[float] = 2.0 +"""Interval for periodic audio buffer flush to disk (crash resilience).""" + +# ============================================================================= +# gRPC Settings +# ============================================================================= + +DEFAULT_GRPC_PORT: Final[int] = 50051 +"""Default gRPC server port (canonical definition).""" + +MAX_GRPC_MESSAGE_SIZE: Final[int] = 100 * 1024 * 1024 +"""Maximum gRPC message size in bytes (100 MB).""" + +# ============================================================================= +# Application Paths +# ============================================================================= + +APP_DIR_NAME: Final[str] = ".noteflow" +"""Application data directory name within user home.""" diff --git a/src/noteflow/config/constants/domain.py b/src/noteflow/config/constants/domain.py new file mode 100644 index 0000000..bdc1180 --- /dev/null +++ b/src/noteflow/config/constants/domain.py @@ -0,0 +1,123 @@ +"""Domain-specific constants. + +Meeting defaults, export formats, NER models, rules schema, triggers, and providers. +""" + +from typing import Final, Literal + +# ============================================================================= +# Meeting Defaults +# ============================================================================= + +DEFAULT_MEETING_TITLE: Final[str] = "Untitled" +"""Default title for meetings without an explicit title.""" + +MEETING_TITLE_PREFIX: Final[str] = "Meeting " +"""Prefix for auto-generated meeting titles.""" + +# ============================================================================= +# Export Formats +# ============================================================================= + +EXPORT_FORMAT_HTML: Final[str] = "HTML" +"""HTML export format display name.""" + +EXPORT_EXT_HTML: Final[str] = ".html" +"""HTML file extension.""" + +EXPORT_EXT_PDF: Final[str] = ".pdf" +"""PDF file extension.""" + +# ============================================================================= +# NER / spaCy Models +# ============================================================================= + +SPACY_MODEL_SM: Final[str] = "en_core_web_sm" +"""Small English spaCy model for NER.""" + +SPACY_MODEL_MD: Final[str] = "en_core_web_md" +"""Medium English spaCy model for NER.""" + +SPACY_MODEL_LG: Final[str] = "en_core_web_lg" +"""Large English spaCy model for NER.""" + +SPACY_MODEL_TRF: Final[str] = "en_core_web_trf" +"""Transformer-based English spaCy model for NER.""" + +# ============================================================================= +# Provider Names +# ============================================================================= + +PROVIDER_NAME_OPENAI: Final[str] = "openai" +"""OpenAI provider name.""" + +# ============================================================================= +# Feature Names & Status +# ============================================================================= + +FEATURE_NAME_PROJECTS: Final[str] = "Projects" +"""Feature name for projects in error messages.""" + +STATUS_DISABLED: Final[str] = "Disabled" +"""Status message for disabled features.""" + +# ============================================================================= +# Settings Field Names +# ============================================================================= + +SETTING_CLOUD_CONSENT_GRANTED: Final[str] = "cloud_consent_granted" +"""Settings field name for cloud consent granted.""" + +# ============================================================================= +# Trigger Actions +# ============================================================================= + +TRIGGER_ACTION_IGNORE: Final[Literal["ignore"]] = "ignore" +"""Trigger action value for ignore.""" + +# ============================================================================= +# Rule Schema Field Names +# ============================================================================= + +RULE_FIELD_DEFAULT_FORMAT: Final[str] = "default_format" +"""Rule schema field name for default format.""" + +RULE_FIELD_INCLUDE_AUDIO: Final[str] = "include_audio" +"""Rule schema field name for include audio.""" + +RULE_FIELD_INCLUDE_TIMESTAMPS: Final[str] = "include_timestamps" +"""Rule schema field name for include timestamps.""" + +RULE_FIELD_TEMPLATE_ID: Final[str] = "template_id" +"""Rule schema field name for template ID.""" + +RULE_FIELD_AUTO_START_ENABLED: Final[str] = "auto_start_enabled" +"""Rule schema field name for auto start enabled.""" + +RULE_FIELD_DESCRIPTION: Final[str] = "description" +"""Rule schema field name for description.""" + +RULE_FIELD_EXPORT_RULES: Final[str] = "export_rules" +"""Rule schema field name for export rules.""" + +RULE_FIELD_TRIGGER_RULES: Final[str] = "trigger_rules" +"""Rule schema field name for trigger rules.""" + +RULE_FIELD_CALENDAR_MATCH_PATTERNS: Final[str] = "calendar_match_patterns" +"""Rule schema field name for calendar match patterns.""" + +RULE_FIELD_APP_MATCH_PATTERNS: Final[str] = "app_match_patterns" +"""Rule schema field name for app match patterns.""" + +# ============================================================================= +# JSON Schema Type Names +# ============================================================================= + +SCHEMA_TYPE_STRING: Final[str] = "string" +"""JSON schema type name for string.""" + +SCHEMA_TYPE_BOOLEAN: Final[str] = "boolean" +"""JSON schema type name for boolean.""" + +SCHEMA_TYPE_ARRAY_ITEMS: Final[str] = "items" +"""JSON schema type name for array items.""" diff --git a/src/noteflow/config/constants/errors.py b/src/noteflow/config/constants/errors.py new file mode 100644 index 0000000..5fdf5a1 --- /dev/null +++ b/src/noteflow/config/constants/errors.py @@ -0,0 +1,107 @@ +"""Error message and log event constants. + +Centralized error messages, validation errors, and structured log event names. +""" + +from typing import Final + +# ============================================================================= +# Service Error Messages +# ============================================================================= + +ERR_HF_TOKEN_REQUIRED: Final[str] = "HuggingFace token required for pyannote models" +"""Error message when HuggingFace token is missing for pyannote.""" + +ERR_SERVER_RESTARTED: Final[str] = "Server restarted" +"""Error message for jobs interrupted by server restart.""" + +ERR_TOKEN_EXPIRED: Final[str] = "Access token expired or invalid" +"""Error for expired or invalid OAuth tokens.""" + +ERR_API_PREFIX: Final[str] = "API error: " +"""Prefix for API error messages.""" + +ERR_TOKEN_REFRESH_PREFIX: Final[str] = "Token refresh failed: " +"""Prefix for token refresh error messages.""" + +# ============================================================================= +# Entity Error Messages +# ============================================================================= + +ERROR_PROJECT_ID_REQUIRED: Final[str] = "project_id is required" +"""Error message when project_id is missing.""" + +ERROR_INVALID_PROJECT_ID_PREFIX: Final[str] = "Invalid project_id: " +"""Prefix for invalid project_id error messages.""" + +ERROR_WORKSPACE_ID_REQUIRED: Final[str] = "workspace_id is required" +"""Error message when workspace_id is missing.""" + +ERROR_INVALID_WORKSPACE_ID_PREFIX: Final[str] = "Invalid workspace_id: " +"""Prefix for invalid workspace_id error messages.""" + +ERROR_USER_ID_REQUIRED: Final[str] = "user_id is required" +"""Error message when user_id is missing.""" + +ERROR_INVALID_UUID_PREFIX: Final[str] = "Invalid UUID: " +"""Prefix for invalid UUID error messages.""" + +ERROR_INVALID_WORKSPACE_ID_FORMAT: Final[str] = "Invalid workspace_id format" +"""Error message for invalid workspace_id format.""" + +# ============================================================================= +# Entity Message Prefixes +# ============================================================================= + +ERROR_MSG_MEETING_PREFIX: Final[str] = "Meeting " +"""Prefix for meeting-related error messages.""" + +ERROR_MSG_WORKSPACE_PREFIX: Final[str] = "Workspace " +"""Prefix for workspace-related error messages.""" + +ERROR_MSG_PROJECT_PREFIX: Final[str] = "Project " +"""Prefix for project-related error messages.""" + +ERROR_DETAIL_PROJECT_ID: Final[str] = "project_id" +"""Error detail key for project ID.""" + +# ============================================================================= +# Validation Error Messages +# ============================================================================= + +ERROR_MSG_END_TIME_PREFIX: Final[str] = "end_time (" +"""Prefix for end_time validation error messages.""" + +ERROR_MSG_START_TIME_COMPARISON: Final[str] = ") must be >= start_time (" +"""Middle part of time validation error messages.""" + +ERROR_SUFFIX_MUST_BE_BOOLEAN: Final[str] = " must be a boolean" +"""Suffix for boolean validation error messages.""" + +# ============================================================================= +# Log Event Names +# ============================================================================= + +LOG_EVENT_DATABASE_REQUIRED_FOR_ANNOTATIONS: Final[str] = "database_required_for_annotations" +"""Log event when annotations require database persistence.""" + +LOG_EVENT_ANNOTATION_NOT_FOUND: Final[str] = "annotation_not_found" +"""Log event when annotation lookup fails.""" + +LOG_EVENT_INVALID_ANNOTATION_ID: Final[str] = "invalid_annotation_id" +"""Log event when annotation ID is invalid.""" + +LOG_EVENT_SERVICE_NOT_ENABLED: Final[str] = "service_not_enabled" +"""Log event when a service feature is not enabled.""" + +LOG_EVENT_WEBHOOK_REGISTRATION_FAILED: Final[str] = "webhook_registration_failed" +"""Log event when webhook registration fails.""" + +LOG_EVENT_WEBHOOK_UPDATE_FAILED: Final[str] = "webhook_update_failed" +"""Log event when webhook update fails.""" + +LOG_EVENT_WEBHOOK_DELETE_FAILED: Final[str] = "webhook_delete_failed" +"""Log event when webhook deletion fails.""" + +LOG_EVENT_INVALID_WEBHOOK_ID: Final[str] = "invalid_webhook_id" +"""Log event when webhook ID is invalid.""" diff --git a/src/noteflow/config/constants/http.py b/src/noteflow/config/constants/http.py new file mode 100644 index 0000000..7be584a --- /dev/null +++ b/src/noteflow/config/constants/http.py @@ -0,0 +1,66 @@ +"""HTTP and OAuth constants. + +HTTP status codes, headers, and OAuth field names. +""" + +from typing import Final + +from .core import SECONDS_PER_HOUR + +# ============================================================================= +# OAuth Token Settings +# ============================================================================= + +DEFAULT_OAUTH_TOKEN_EXPIRY_SECONDS: Final[int] = int(SECONDS_PER_HOUR) +"""Default OAuth token expiry time in seconds (1 hour).""" + +# ============================================================================= +# OAuth Field Names +# ============================================================================= + +OAUTH_FIELD_ACCESS_TOKEN: Final[str] = "access_token" +"""OAuth access token field name.""" + +OAUTH_FIELD_REFRESH_TOKEN: Final[str] = "refresh_token" +"""OAuth refresh token field name.""" + +OAUTH_FIELD_TOKEN_TYPE: Final[str] = "token_type" +"""OAuth token type field name.""" + +OAUTH_FIELD_SCOPE: Final[str] = "scope" +"""OAuth scope field name.""" + +OAUTH_FIELD_EXPIRES_IN: Final[str] = "expires_in" +"""OAuth expires_in field name.""" + +# ============================================================================= +# HTTP Headers +# ============================================================================= + +HTTP_AUTHORIZATION: Final[str] = "Authorization" +"""HTTP Authorization header name.""" + +HTTP_BEARER_PREFIX: Final[str] = "Bearer " +"""Standard HTTP authorization header prefix.""" + +# ============================================================================= +# HTTP Status Codes +# ============================================================================= + +HTTP_STATUS_OK: Final[int] = 200 +"""HTTP 200 OK status code.""" + +HTTP_STATUS_NO_CONTENT: Final[int] = 204 +"""HTTP 204 No Content status code.""" + +HTTP_STATUS_BAD_REQUEST: Final[int] = 400 +"""HTTP 400 Bad Request status code.""" + +HTTP_STATUS_UNAUTHORIZED: Final[int] = 401 +"""HTTP 401 Unauthorized status code.""" + +HTTP_STATUS_NOT_FOUND: Final[int] = 404 +"""HTTP 404 Not Found status code.""" + +HTTP_STATUS_INTERNAL_SERVER_ERROR: Final[int] = 500 +"""HTTP 500 Internal Server Error status code.""" diff --git a/src/noteflow/grpc/_mixins/observability.py b/src/noteflow/grpc/_mixins/observability.py index 96fc2b7..9714923 100644 --- a/src/noteflow/grpc/_mixins/observability.py +++ b/src/noteflow/grpc/_mixins/observability.py @@ -8,6 +8,7 @@ import grpc.aio from noteflow.infrastructure.logging import get_log_buffer from noteflow.infrastructure.metrics import PerformanceMetrics, get_metrics_collector +from noteflow.infrastructure.persistence.constants import DEFAULT_LOG_LIMIT, MAX_LOG_LIMIT from ..proto import noteflow_pb2 @@ -35,7 +36,7 @@ class ObservabilityMixin: buffer = get_log_buffer() # Apply defaults and limits - limit = min(request.limit or 100, 1000) + limit = min(request.limit or DEFAULT_LOG_LIMIT, MAX_LOG_LIMIT) level = request.level if request.level else None source = request.source if request.source else None diff --git a/src/noteflow/grpc/_mixins/sync.py b/src/noteflow/grpc/_mixins/sync.py index f27e997..fe7dd80 100644 --- a/src/noteflow/grpc/_mixins/sync.py +++ b/src/noteflow/grpc/_mixins/sync.py @@ -10,6 +10,7 @@ import grpc.aio from noteflow.domain.entities import SyncRun from noteflow.infrastructure.logging import get_logger +from noteflow.infrastructure.persistence.constants import DEFAULT_LIST_LIMIT from ..proto import noteflow_pb2 from .errors import ( @@ -157,7 +158,7 @@ class SyncMixin: events = await calendar_service.list_calendar_events( provider=provider, hours_ahead=168, # 1 week - limit=100, + limit=DEFAULT_LIST_LIMIT, ) return len(events) diff --git a/src/noteflow/grpc/_mixins/webhooks.py b/src/noteflow/grpc/_mixins/webhooks.py index a56f35f..8e7da1c 100644 --- a/src/noteflow/grpc/_mixins/webhooks.py +++ b/src/noteflow/grpc/_mixins/webhooks.py @@ -22,6 +22,10 @@ from noteflow.domain.webhooks.events import ( WebhookEventType, ) from noteflow.infrastructure.logging import get_logger +from noteflow.infrastructure.persistence.constants import ( + DEFAULT_WEBHOOK_DELIVERY_HISTORY_LIMIT, + MAX_WEBHOOK_DELIVERIES_LIMIT, +) from ..proto import noteflow_pb2 from .errors import abort_database_required, abort_invalid_argument, abort_not_found @@ -286,7 +290,7 @@ class WebhooksMixin: ) await abort_invalid_argument(context, _ERR_INVALID_WEBHOOK_ID) - limit = min(request.limit or 50, 500) + limit = min(request.limit or DEFAULT_WEBHOOK_DELIVERY_HISTORY_LIMIT, MAX_WEBHOOK_DELIVERIES_LIMIT) async with self._create_repository_provider() as uow: if not uow.supports_webhooks: diff --git a/src/noteflow/infrastructure/calendar/oauth_manager.py b/src/noteflow/infrastructure/calendar/oauth_manager.py index 52d784f..76a77ac 100644 --- a/src/noteflow/infrastructure/calendar/oauth_manager.py +++ b/src/noteflow/infrastructure/calendar/oauth_manager.py @@ -18,6 +18,8 @@ import httpx from noteflow.config.constants import ( DEFAULT_OAUTH_TOKEN_EXPIRY_SECONDS, ERR_TOKEN_REFRESH_PREFIX, + HTTP_STATUS_NO_CONTENT, + HTTP_STATUS_OK, OAUTH_FIELD_ACCESS_TOKEN, OAUTH_FIELD_REFRESH_TOKEN, OAUTH_FIELD_SCOPE, @@ -211,7 +213,7 @@ class OAuthManager(OAuthPort): async with httpx.AsyncClient() as client: response = await client.post(token_url, data=data) - if response.status_code != 200: + if response.status_code != HTTP_STATUS_OK: error_detail = response.text logger.error( "Token refresh failed for provider=%s: %s", @@ -255,7 +257,7 @@ class OAuthManager(OAuthPort): params={"post_logout_redirect_uri": self._settings.redirect_uri}, ) - if response.status_code in (200, 204): + if response.status_code in (HTTP_STATUS_OK, HTTP_STATUS_NO_CONTENT): logger.info("Revoked tokens for provider=%s", provider.value) return True @@ -367,7 +369,7 @@ class OAuthManager(OAuthPort): async with httpx.AsyncClient() as client: response = await client.post(token_url, data=data) - if response.status_code != 200: + if response.status_code != HTTP_STATUS_OK: error_detail = response.text logger.error( "Token exchange failed for provider=%s: %s", diff --git a/src/noteflow/infrastructure/persistence/constants.py b/src/noteflow/infrastructure/persistence/constants.py index 7584499..073e6ef 100644 --- a/src/noteflow/infrastructure/persistence/constants.py +++ b/src/noteflow/infrastructure/persistence/constants.py @@ -31,6 +31,12 @@ MIN_LIST_LIMIT: Final[int] = 1 MAX_WEBHOOK_DELIVERIES_LIMIT: Final[int] = 500 """Maximum number of webhook deliveries that can be returned.""" +DEFAULT_LOG_LIMIT: Final[int] = 100 +"""Default number of log entries to return.""" + +MAX_LOG_LIMIT: Final[int] = 1000 +"""Maximum number of log entries that can be returned.""" + # ============================================================================= # Database Field Sizes # ============================================================================= diff --git a/tests/benchmarks/test_hot_paths.py b/tests/benchmarks/test_hot_paths.py index 60802af..77e40f7 100644 --- a/tests/benchmarks/test_hot_paths.py +++ b/tests/benchmarks/test_hot_paths.py @@ -14,13 +14,14 @@ import numpy as np import pytest from numpy.typing import NDArray +from noteflow.config.constants import DEFAULT_SAMPLE_RATE from noteflow.infrastructure.asr.segmenter import Segmenter, SegmenterConfig from noteflow.infrastructure.asr.streaming_vad import EnergyVad, StreamingVad from noteflow.infrastructure.audio.levels import RmsLevelProvider, compute_rms # Standard audio chunk size (100ms at 16kHz) CHUNK_SIZE = 1600 -SAMPLE_RATE = 16000 +SAMPLE_RATE = DEFAULT_SAMPLE_RATE # Typical partial buffer holds ~2s of audio (20 chunks x 100ms) TYPICAL_PARTIAL_CHUNKS = 20 # dB floor for silence detection diff --git a/tests/conftest.py b/tests/conftest.py index 8557e61..22baf49 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -19,7 +19,12 @@ from uuid import uuid4 import pytest +from noteflow.config.constants import DEFAULT_SAMPLE_RATE from noteflow.config.settings import CalendarIntegrationSettings + +# Re-export for test convenience - tests can import from conftest +SAMPLE_RATE = DEFAULT_SAMPLE_RATE +"""Standard test sample rate (16 kHz). Import this instead of using magic number 16000.""" from noteflow.domain.entities import Meeting from noteflow.domain.value_objects import MeetingId from noteflow.domain.webhooks import WebhookConfig, WebhookEventType @@ -263,6 +268,12 @@ def recording_meeting() -> Meeting: return meeting +@pytest.fixture +def sample_rate() -> int: + """Default audio sample rate (16 kHz) for testing.""" + return DEFAULT_SAMPLE_RATE + + # ============================================================================ # gRPC context mock # ============================================================================ diff --git a/tests/integration/test_streaming_real_pipeline.py b/tests/integration/test_streaming_real_pipeline.py index a943924..44f470e 100644 --- a/tests/integration/test_streaming_real_pipeline.py +++ b/tests/integration/test_streaming_real_pipeline.py @@ -11,6 +11,7 @@ import grpc import numpy as np import pytest +from noteflow.config.constants import DEFAULT_SAMPLE_RATE from noteflow.domain.entities import Meeting from noteflow.grpc.proto import noteflow_pb2 from noteflow.grpc.service import NoteFlowServicer @@ -20,7 +21,7 @@ from noteflow.infrastructure.persistence.unit_of_work import SqlAlchemyUnitOfWor if TYPE_CHECKING: from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker -SAMPLE_RATE = 16000 +SAMPLE_RATE = DEFAULT_SAMPLE_RATE CHUNK_SAMPLES = 1600 # 0.1s at 16kHz SPEECH_CHUNKS = 4 SILENCE_CHUNKS = 10 diff --git a/tests/quality/_test_smell_collectors.py b/tests/quality/_test_smell_collectors.py new file mode 100644 index 0000000..1932085 --- /dev/null +++ b/tests/quality/_test_smell_collectors.py @@ -0,0 +1,587 @@ +"""Test smell violation collectors for baseline generation. + +These collectors are used by generate_baseline.py to capture current +test smell violations for baseline enforcement. +""" + +from __future__ import annotations + +import ast +import re +from collections import defaultdict +from pathlib import Path + +from tests.quality._baseline import Violation +from tests.quality._helpers import ( + find_test_files, + parse_file_safe, + read_file_safe, + relative_path, +) + + +def _get_test_methods(tree: ast.AST) -> list[ast.FunctionDef | ast.AsyncFunctionDef]: + """Extract test methods from AST.""" + tests: list[ast.FunctionDef | ast.AsyncFunctionDef] = [] + for node in ast.walk(tree): + if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)): + if node.name.startswith("test_"): + tests.append(node) + return tests + + +def _get_fixtures(tree: ast.AST) -> list[ast.FunctionDef]: + """Extract pytest fixtures from AST.""" + fixtures: list[ast.FunctionDef] = [] + for node in ast.walk(tree): + if isinstance(node, ast.FunctionDef): + for decorator in node.decorator_list: + if isinstance(decorator, ast.Attribute): + if decorator.attr == "fixture": + fixtures.append(node) + break + elif isinstance(decorator, ast.Call): + if isinstance(decorator.func, ast.Attribute): + if decorator.func.attr == "fixture": + fixtures.append(node) + break + elif isinstance(decorator, ast.Name) and decorator.id == "fixture": + fixtures.append(node) + break + return fixtures + + +def _get_fixture_scope(node: ast.FunctionDef) -> str | None: + """Extract fixture scope from decorator.""" + for decorator in node.decorator_list: + if isinstance(decorator, ast.Call): + for keyword in decorator.keywords: + if keyword.arg == "scope": + if isinstance(keyword.value, ast.Constant): + return str(keyword.value.value) + return None + + +def collect_assertion_roulette() -> list[Violation]: + """Collect assertion roulette violations.""" + violations: list[Violation] = [] + + for py_file in find_test_files(): + tree, error = parse_file_safe(py_file) + if error or tree is None: + continue + + rel_path = relative_path(py_file) + + for test_method in _get_test_methods(tree): + assertions_without_msg = 0 + for node in ast.walk(test_method): + if isinstance(node, ast.Assert): + if node.msg is None: + assertions_without_msg += 1 + + if assertions_without_msg > 3: + violations.append( + Violation( + rule="assertion_roulette", + relative_path=rel_path, + identifier=test_method.name, + detail=f"assertions={assertions_without_msg}", + ) + ) + + return violations + + +def collect_conditional_test_logic() -> list[Violation]: + """Collect conditional test logic violations.""" + violations: list[Violation] = [] + + def _contains_assertion(node: ast.AST) -> bool: + return any(isinstance(child, ast.Assert) for child in ast.walk(node)) + + for py_file in find_test_files(): + if "stress" in py_file.parts: + continue + + tree, error = parse_file_safe(py_file) + if error or tree is None: + continue + + rel_path = relative_path(py_file) + + for test_method in _get_test_methods(tree): + conditionals: list[str] = [] + + for node in ast.walk(test_method): + if isinstance(node, ast.If) and _contains_assertion(node): + conditionals.append(f"if@{node.lineno}") + elif isinstance(node, ast.For) and _contains_assertion(node): + conditionals.append(f"for@{node.lineno}") + elif isinstance(node, ast.While) and _contains_assertion(node): + conditionals.append(f"while@{node.lineno}") + + if conditionals: + violations.append( + Violation( + rule="conditional_test_logic", + relative_path=rel_path, + identifier=test_method.name, + detail=",".join(conditionals[:3]), + ) + ) + + return violations + + +def collect_sleepy_tests() -> list[Violation]: + """Collect sleepy test violations.""" + allowed_sleepy_paths = { + "tests/stress/", + "tests/integration/test_signal_handling.py", + "tests/integration/test_database_resilience.py", + "tests/grpc/test_stream_lifecycle.py", + } + violations: list[Violation] = [] + + for py_file in find_test_files(): + rel_path = relative_path(py_file) + if any(allowed in rel_path for allowed in allowed_sleepy_paths): + continue + + tree, error = parse_file_safe(py_file) + if error or tree is None: + continue + + for test_method in _get_test_methods(tree): + for node in ast.walk(test_method): + if isinstance(node, ast.Call): + if isinstance(node.func, ast.Attribute): + if node.func.attr == "sleep": + if isinstance(node.func.value, ast.Name): + if node.func.value.id in {"time", "asyncio"}: + violations.append( + Violation( + rule="sleepy_test", + relative_path=rel_path, + identifier=test_method.name, + detail=f"line={node.lineno}", + ) + ) + + return violations + + +def collect_unknown_tests() -> list[Violation]: + """Collect unknown test (no assertion) violations.""" + violations: list[Violation] = [] + + for py_file in find_test_files(): + tree, error = parse_file_safe(py_file) + if error or tree is None: + continue + + rel_path = relative_path(py_file) + + for test_method in _get_test_methods(tree): + has_assertion = False + has_raises = False + + for node in ast.walk(test_method): + if isinstance(node, ast.Assert): + has_assertion = True + break + if isinstance(node, ast.With): + for item in node.items: + if isinstance(item.context_expr, ast.Call): + call = item.context_expr + if isinstance(call.func, ast.Attribute): + if call.func.attr in {"raises", "warns"}: + has_raises = True + + if not has_assertion and not has_raises: + has_call = any(isinstance(n, ast.Call) for n in ast.walk(test_method)) + if not has_call: + violations.append( + Violation( + rule="unknown_test", + relative_path=rel_path, + identifier=test_method.name, + ) + ) + + return violations + + +def collect_redundant_prints() -> list[Violation]: + """Collect redundant print violations in tests.""" + violations: list[Violation] = [] + + for py_file in find_test_files(): + tree, error = parse_file_safe(py_file) + if error or tree is None: + continue + + rel_path = relative_path(py_file) + + for test_method in _get_test_methods(tree): + for node in ast.walk(test_method): + if isinstance(node, ast.Call) and isinstance(node.func, ast.Name): + if node.func.id == "print": + violations.append( + Violation( + rule="redundant_print", + relative_path=rel_path, + identifier=test_method.name, + detail=f"line={node.lineno}", + ) + ) + + return violations + + +def collect_exception_handling() -> list[Violation]: + """Collect exception handling in tests violations.""" + violations: list[Violation] = [] + + for py_file in find_test_files(): + tree, error = parse_file_safe(py_file) + if error or tree is None: + continue + + rel_path = relative_path(py_file) + + for test_method in _get_test_methods(tree): + for node in ast.walk(test_method): + if isinstance(node, ast.Try): + for handler in node.handlers: + if handler.type is None: + violations.append( + Violation( + rule="exception_handling", + relative_path=rel_path, + identifier=test_method.name, + detail="bare_except", + ) + ) + elif isinstance(handler.type, ast.Name): + if handler.type.id in {"Exception", "BaseException"}: + violations.append( + Violation( + rule="exception_handling", + relative_path=rel_path, + identifier=test_method.name, + detail=f"catches_{handler.type.id}", + ) + ) + + return violations + + +def collect_magic_number_tests() -> list[Violation]: + """Collect magic number in test assertions violations.""" + allowed_numbers = {0, 1, 2, -1, 100, 1000} + violations: list[Violation] = [] + + for py_file in find_test_files(): + tree, error = parse_file_safe(py_file) + if error or tree is None: + continue + + rel_path = relative_path(py_file) + + for test_method in _get_test_methods(tree): + for node in ast.walk(test_method): + if isinstance(node, ast.Assert): + for child in ast.walk(node): + if isinstance(child, ast.Constant): + if isinstance(child.value, (int, float)): + if child.value not in allowed_numbers: + if abs(child.value) > 10: + violations.append( + Violation( + rule="magic_number_test", + relative_path=rel_path, + identifier=test_method.name, + detail=f"value={child.value}", + ) + ) + + return violations + + +def collect_sensitive_equality() -> list[Violation]: + """Collect sensitive equality (str/repr comparison) violations.""" + excluded_test_patterns = {"string", "proto", "conversion", "serializ", "preserves_message"} + excluded_file_patterns = {"_mixin"} + violations: list[Violation] = [] + + for py_file in find_test_files(): + if any(p in py_file.stem.lower() for p in excluded_file_patterns): + continue + + tree, error = parse_file_safe(py_file) + if error or tree is None: + continue + + rel_path = relative_path(py_file) + + for test_method in _get_test_methods(tree): + if any(p in test_method.name.lower() for p in excluded_test_patterns): + continue + for node in ast.walk(test_method): + if isinstance(node, ast.Assert): + if isinstance(node.test, ast.Compare): + operands = [node.test.left, *node.test.comparators] + for operand in operands: + if isinstance(operand, ast.Call): + if isinstance(operand.func, ast.Name): + if operand.func.id in {"str", "repr"}: + violations.append( + Violation( + rule="sensitive_equality", + relative_path=rel_path, + identifier=test_method.name, + detail=operand.func.id, + ) + ) + + return violations + + +def collect_eager_tests() -> list[Violation]: + """Collect eager test (too many method calls) violations.""" + max_method_calls = 10 + violations: list[Violation] = [] + + for py_file in find_test_files(): + tree, error = parse_file_safe(py_file) + if error or tree is None: + continue + + rel_path = relative_path(py_file) + + for test_method in _get_test_methods(tree): + method_calls: set[str] = set() + + for node in ast.walk(test_method): + if isinstance(node, ast.Call): + if isinstance(node.func, ast.Attribute): + if not node.func.attr.startswith("assert"): + method_calls.add(node.func.attr) + + if len(method_calls) > max_method_calls: + violations.append( + Violation( + rule="eager_test", + relative_path=rel_path, + identifier=test_method.name, + detail=f"methods={len(method_calls)}", + ) + ) + + return violations + + +def collect_duplicate_test_names() -> list[Violation]: + """Collect duplicate test name violations.""" + test_names: dict[str, list[tuple[Path, int]]] = defaultdict(list) + + for py_file in find_test_files(): + tree, error = parse_file_safe(py_file) + if error or tree is None: + continue + + for test_method in _get_test_methods(tree): + test_names[test_method.name].append((py_file, test_method.lineno)) + + duplicates = {name: locs for name, locs in test_names.items() if len(locs) > 1} + + violations: list[Violation] = [] + for name, locs in duplicates.items(): + first_path = relative_path(locs[0][0]) + violations.append( + Violation( + rule="duplicate_test_name", + relative_path=first_path, + identifier=name, + detail=f"count={len(locs)}", + ) + ) + + return violations + + +def collect_long_tests() -> list[Violation]: + """Collect long test method violations.""" + max_lines = 50 + violations: list[Violation] = [] + + for py_file in find_test_files(): + tree, error = parse_file_safe(py_file) + if error or tree is None: + continue + + rel_path = relative_path(py_file) + + for test_method in _get_test_methods(tree): + if test_method.end_lineno: + lines = test_method.end_lineno - test_method.lineno + 1 + if lines > max_lines: + violations.append( + Violation( + rule="long_test", + relative_path=rel_path, + identifier=test_method.name, + detail=f"lines={lines}", + ) + ) + + return violations + + +def collect_fixture_missing_type() -> list[Violation]: + """Collect fixtures missing type hints violations.""" + violations: list[Violation] = [] + + for py_file in find_test_files(): + tree, error = parse_file_safe(py_file) + if error or tree is None: + continue + + rel_path = relative_path(py_file) + + for fixture in _get_fixtures(tree): + if fixture.returns is None: + if not fixture.name.startswith("_"): + violations.append( + Violation( + rule="fixture_missing_type", + relative_path=rel_path, + identifier=fixture.name, + ) + ) + + return violations + + +def collect_unused_fixtures() -> list[Violation]: + """Collect unused fixture parameter violations.""" + violations: list[Violation] = [] + + for py_file in find_test_files(): + tree, error = parse_file_safe(py_file) + if error or tree is None: + continue + + rel_path = relative_path(py_file) + + for test_method in _get_test_methods(tree): + params = [ + arg.arg for arg in test_method.args.args + if arg.arg not in ("self", "cls") + ] + params = [p for p in params if not p.startswith("_")] + + used_names: set[str] = set() + for node in ast.walk(test_method): + if isinstance(node, ast.Name): + used_names.add(node.id) + + skip_params = { + "monkeypatch", "capsys", "capfd", "caplog", "tmp_path", + "tmp_path_factory", "request", "pytestconfig", "record_property", + "record_testsuite_property", "recwarn", "event_loop", + } + + for param in params: + if param not in used_names and param not in skip_params: + violations.append( + Violation( + rule="unused_fixture", + relative_path=rel_path, + identifier=test_method.name, + detail=param, + ) + ) + + return violations + + +def collect_fixture_scope_too_narrow() -> list[Violation]: + """Collect fixtures with potentially wrong scope violations.""" + expensive_patterns = [ + r"asyncpg\.connect", + r"create_async_engine", + r"aiohttp\.ClientSession", + r"httpx\.AsyncClient", + r"subprocess\.Popen", + r"docker\.", + r"testcontainers\.", + ] + violations: list[Violation] = [] + + for py_file in find_test_files(): + tree, error = parse_file_safe(py_file) + if error or tree is None: + continue + + content, _ = read_file_safe(py_file) + if content is None: + continue + + rel_path = relative_path(py_file) + + for fixture in _get_fixtures(tree): + scope = _get_fixture_scope(fixture) + fixture_source = ast.get_source_segment(content, fixture) + + if fixture_source: + if scope is None or scope == "function": + for pattern in expensive_patterns: + if re.search(pattern, fixture_source): + violations.append( + Violation( + rule="fixture_scope_too_narrow", + relative_path=rel_path, + identifier=fixture.name, + detail="expensive_setup", + ) + ) + break + + return violations + + +def collect_raises_without_match() -> list[Violation]: + """Collect pytest.raises without match violations.""" + violations: list[Violation] = [] + + for py_file in find_test_files(): + tree, error = parse_file_safe(py_file) + if error or tree is None: + continue + + rel_path = relative_path(py_file) + + for test_method in _get_test_methods(tree): + for node in ast.walk(test_method): + if isinstance(node, ast.Call): + if isinstance(node.func, ast.Attribute): + if node.func.attr == "raises": + if isinstance(node.func.value, ast.Name): + if node.func.value.id == "pytest": + has_match = any( + kw.arg == "match" for kw in node.keywords + ) + if not has_match: + violations.append( + Violation( + rule="raises_without_match", + relative_path=rel_path, + identifier=test_method.name, + detail=f"line={node.lineno}", + ) + ) + + return violations diff --git a/tests/quality/baselines.json b/tests/quality/baselines.json index a11424d..a65767c 100644 --- a/tests/quality/baselines.json +++ b/tests/quality/baselines.json @@ -1,13 +1,112 @@ { - "generated_at": "2025-12-31T15:22:31.401267+00:00", + "generated_at": "2025-12-31T15:28:38.066948+00:00", "rules": { "alias_import": [ "alias_import|src/noteflow/domain/auth/oidc.py|cc2f0972|datetime->dt", "alias_import|src/noteflow/grpc/service.py|d8a43a4a|__version__->NOTEFLOW_VERSION" ], + "assertion_roulette": [ + "assertion_roulette|tests/domain/test_meeting.py|test_immediate_stop_after_start_zero_duration|assertions=5", + "assertion_roulette|tests/domain/test_meeting.py|test_state_transition_does_not_modify_segments|assertions=4", + "assertion_roulette|tests/domain/test_project.py|test_default_values|assertions=4", + "assertion_roulette|tests/domain/test_project.py|test_default_values|assertions=4", + "assertion_roulette|tests/domain/test_project.py|test_settings_with_full_configuration|assertions=4", + "assertion_roulette|tests/domain/test_project.py|test_timestamps_are_set|assertions=4", + "assertion_roulette|tests/domain/test_project.py|test_with_nested_rules|assertions=4", + "assertion_roulette|tests/domain/test_project.py|test_with_values|assertions=4", + "assertion_roulette|tests/domain/test_summary.py|test_action_item_with_all_fields|assertions=4", + "assertion_roulette|tests/grpc/test_entities_mixin.py|test_returns_extracted_entities|assertions=4", + "assertion_roulette|tests/infrastructure/asr/test_engine.py|test_load_model_with_stub_sets_state|assertions=4", + "assertion_roulette|tests/infrastructure/asr/test_engine.py|test_transcribe_with_stubbed_model|assertions=5", + "assertion_roulette|tests/infrastructure/audio/test_writer.py|test_flush_writes_buffered_data|assertions=5", + "assertion_roulette|tests/infrastructure/audio/test_writer.py|test_manifest_contains_correct_metadata|assertions=6", + "assertion_roulette|tests/infrastructure/audio/test_writer.py|test_periodic_flush_thread_starts_on_open|assertions=4", + "assertion_roulette|tests/infrastructure/audio/test_writer.py|test_write_chunk_converts_float32_to_pcm16|assertions=4", + "assertion_roulette|tests/infrastructure/calendar/test_google_adapter.py|test_list_events_returns_calendar_events|assertions=7", + "assertion_roulette|tests/infrastructure/calendar/test_oauth_manager.py|test_initiate_google_auth_returns_url_and_state|assertions=6", + "assertion_roulette|tests/infrastructure/export/test_html.py|test_export_escapes_html|assertions=5", + "assertion_roulette|tests/infrastructure/export/test_markdown.py|test_export_includes_sections|assertions=10", + "assertion_roulette|tests/infrastructure/summarization/test_ollama_provider.py|test_ollama_summarize_returns_result|assertions=8", + "assertion_roulette|tests/infrastructure/test_calendar_converters.py|test_calendar_event_info_to_orm_kwargs|assertions=5", + "assertion_roulette|tests/infrastructure/test_observability.py|test_collect_now_returns_metrics|assertions=5", + "assertion_roulette|tests/infrastructure/test_orm_converters.py|test_converts_orm_to_domain_annotation|assertions=5", + "assertion_roulette|tests/infrastructure/triggers/test_foreground_app.py|test_foreground_app_settings_lowercases_apps|assertions=7", + "assertion_roulette|tests/integration/test_e2e_annotations.py|test_add_annotation_persists_to_database|assertions=8", + "assertion_roulette|tests/integration/test_e2e_annotations.py|test_update_annotation_modifies_database|assertions=4", + "assertion_roulette|tests/integration/test_e2e_ner.py|test_delete_does_not_affect_other_entities|assertions=4", + "assertion_roulette|tests/integration/test_grpc_servicer_database.py|test_create_meeting_persists_to_database|assertions=6", + "assertion_roulette|tests/integration/test_grpc_servicer_database.py|test_get_diarization_job_status_retrieves_from_database|assertions=4", + "assertion_roulette|tests/integration/test_grpc_servicer_database.py|test_get_meeting_with_segments|assertions=4", + "assertion_roulette|tests/integration/test_grpc_servicer_database.py|test_refine_speaker_diarization_creates_job_in_database|assertions=5", + "assertion_roulette|tests/integration/test_grpc_servicer_database.py|test_rename_speaker_updates_segments_in_database|assertions=4", + "assertion_roulette|tests/integration/test_preferences_repository.py|test_set_bulk_creates_multiple_preferences|assertions=4", + "assertion_roulette|tests/integration/test_signal_handling.py|test_cleanup_all_active_streams|assertions=7", + "assertion_roulette|tests/integration/test_signal_handling.py|test_shutdown_cancels_diarization_tasks|assertions=4", + "assertion_roulette|tests/integration/test_unit_of_work_advanced.py|test_diarization_job_workflow|assertions=4", + "assertion_roulette|tests/integration/test_unit_of_work_advanced.py|test_meeting_lifecycle_workflow|assertions=11", + "assertion_roulette|tests/integration/test_unit_of_work_advanced.py|test_repository_instances_cached_within_context|assertions=6", + "assertion_roulette|tests/integration/test_webhook_integration.py|test_stop_meeting_triggers_meeting_completed_webhook|assertions=6", + "assertion_roulette|tests/scripts/test_migrate_logging.py|test_adds_get_logger_to_existing_import|assertions=4", + "assertion_roulette|tests/scripts/test_migrate_logging.py|test_grpc_service_pattern|assertions=4", + "assertion_roulette|tests/scripts/test_migrate_logging.py|test_keeps_import_logging_when_constants_used|assertions=5", + "assertion_roulette|tests/scripts/test_migrate_logging.py|test_transforms_complex_module|assertions=5", + "assertion_roulette|tests/scripts/test_migrate_logging.py|test_transforms_simple_module|assertions=4", + "assertion_roulette|tests/scripts/test_migrate_logging.py|test_uses_logging_constants_detection|assertions=4" + ], + "conditional_test_logic": [ + "conditional_test_logic|tests/application/test_meeting_service.py|test_meeting_state_transitions|if@122", + "conditional_test_logic|tests/grpc/test_sprint_15_1_critical_bugs.py|test_no_datetime_now_in_diarization_mixin|for@234", + "conditional_test_logic|tests/grpc/test_stream_lifecycle.py|test_double_start_same_meeting_id_detected|if@454", + "conditional_test_logic|tests/infrastructure/audio/test_capture.py|test_get_default_device_returns_device_or_none|if@42", + "conditional_test_logic|tests/infrastructure/audio/test_ring_buffer.py|test_chunk_count_property|for@173", + "conditional_test_logic|tests/infrastructure/audio/test_ring_buffer.py|test_get_window_chronological_order|for@132", + "conditional_test_logic|tests/infrastructure/audio/test_ring_buffer.py|test_ring_buffer_duration_property|for@164", + "conditional_test_logic|tests/infrastructure/ner/test_engine.py|test_confidence_is_set|for@126", + "conditional_test_logic|tests/infrastructure/ner/test_engine.py|test_normalized_text_is_lowercase|for@119", + "conditional_test_logic|tests/infrastructure/persistence/test_migrations.py|test_all_migrations_have_down_revision|for@54", + "conditional_test_logic|tests/infrastructure/persistence/test_migrations.py|test_all_migrations_have_downgrade_function|for@85", + "conditional_test_logic|tests/infrastructure/persistence/test_migrations.py|test_all_migrations_have_revision|for@34", + "conditional_test_logic|tests/infrastructure/persistence/test_migrations.py|test_all_migrations_have_upgrade_function|for@74", + "conditional_test_logic|tests/infrastructure/test_observability.py|test_rapid_collection_maintains_order|for@403", + "conditional_test_logic|tests/infrastructure/test_observability.py|test_rapid_sequential_logging|for@356", + "conditional_test_logic|tests/infrastructure/triggers/test_calendar.py|test_datetime_parsing_formats|if@315", + "conditional_test_logic|tests/infrastructure/triggers/test_calendar.py|test_overlap_scenarios|if@177", + "conditional_test_logic|tests/integration/test_crash_scenarios.py|test_concurrent_recovery_calls|for@359", + "conditional_test_logic|tests/integration/test_database_resilience.py|test_concurrent_creates_unique_ids|for@235", + "conditional_test_logic|tests/integration/test_entity_repository.py|test_saves_multiple_entities|for@193", + "conditional_test_logic|tests/integration/test_recovery_service.py|test_recovers_multiple_meetings|for@146", + "conditional_test_logic|tests/integration/test_signal_handling.py|test_shutdown_cancels_diarization_tasks|for@85" + ], "deprecated_pattern": [ "deprecated_pattern|src/noteflow/infrastructure/export/html.py|b089eb78|str.format()" ], + "duplicate_test_name": [ + "duplicate_test_name|tests/application/test_recovery_service.py|test_audio_validation_skipped_without_meetings_dir|count=2", + "duplicate_test_name|tests/config/test_feature_flags.py|test_default_values|count=4", + "duplicate_test_name|tests/domain/test_project.py|test_is_frozen|count=2", + "duplicate_test_name|tests/domain/test_project.py|test_with_values|count=2", + "duplicate_test_name|tests/grpc/test_annotation_mixin.py|test_aborts_on_invalid_annotation_id|count=3", + "duplicate_test_name|tests/grpc/test_annotation_mixin.py|test_aborts_on_invalid_meeting_id|count=2", + "duplicate_test_name|tests/grpc/test_annotation_mixin.py|test_aborts_when_annotation_not_found|count=3", + "duplicate_test_name|tests/grpc/test_entities_mixin.py|test_aborts_when_entity_belongs_to_different_meeting|count=2", + "duplicate_test_name|tests/grpc/test_entities_mixin.py|test_aborts_when_entity_not_found|count=2", + "duplicate_test_name|tests/grpc/test_entities_mixin.py|test_aborts_with_invalid_entity_id_format|count=2", + "duplicate_test_name|tests/grpc/test_entities_mixin.py|test_aborts_with_invalid_meeting_id_format|count=2", + "duplicate_test_name|tests/grpc/test_entities_mixin.py|test_aborts_with_invalid_meeting_id|count=2", + "duplicate_test_name|tests/grpc/test_project_mixin.py|test_delete_project_not_found|count=2", + "duplicate_test_name|tests/infrastructure/summarization/test_cloud_provider.py|test_raises_invalid_response_on_empty_content|count=2", + "duplicate_test_name|tests/infrastructure/summarization/test_cloud_provider.py|test_summarize_returns_result|count=2" + ], + "eager_test": [ + "eager_test|tests/infrastructure/audio/test_writer.py|test_audio_roundtrip_encryption_decryption|methods=15", + "eager_test|tests/infrastructure/audio/test_writer.py|test_flush_is_thread_safe|methods=12", + "eager_test|tests/infrastructure/audio/test_writer.py|test_manifest_wrapped_dek_can_decrypt_audio|methods=11", + "eager_test|tests/infrastructure/audio/test_writer.py|test_write_chunk_clamps_audio_range|methods=11" + ], + "exception_handling": [ + "exception_handling|tests/integration/test_memory_fallback.py|test_concurrent_reads_and_writes|catches_Exception", + "exception_handling|tests/integration/test_memory_fallback.py|test_concurrent_reads_and_writes|catches_Exception" + ], "high_complexity": [ "high_complexity|src/noteflow/infrastructure/observability/usage.py|record|complexity=20" ], @@ -37,6 +136,59 @@ "long_parameter_list|src/noteflow/infrastructure/observability/usage.py|record_simple|params=9", "long_parameter_list|src/noteflow/infrastructure/webhooks/executor.py|_create_delivery|params=9" ], + "long_test": [ + "long_test|tests/infrastructure/audio/test_capture.py|test_start_with_stubbed_stream_invokes_callback|lines=54", + "long_test|tests/integration/test_e2e_streaming.py|test_segments_persisted_to_database|lines=70", + "long_test|tests/integration/test_unit_of_work_advanced.py|test_meeting_lifecycle_workflow|lines=63" + ], + "magic_number_test": [ + "magic_number_test|tests/domain/test_annotation.py|test_annotation_very_long_duration|value=7200.0", + "magic_number_test|tests/domain/test_meeting.py|test_duration_seconds_with_times|value=1800.0", + "magic_number_test|tests/domain/test_segment.py|test_segment_very_long_duration|value=36000.0", + "magic_number_test|tests/domain/test_summary.py|test_key_point_with_many_segment_ids|value=50", + "magic_number_test|tests/domain/test_summary.py|test_key_point_with_timing|value=10.5", + "magic_number_test|tests/domain/test_summary.py|test_key_point_with_timing|value=25.0", + "magic_number_test|tests/domain/test_summary.py|test_summary_very_long_executive_summary|value=10000", + "magic_number_test|tests/grpc/test_annotation_mixin.py|test_returns_annotation_when_found|value=120.0", + "magic_number_test|tests/grpc/test_annotation_mixin.py|test_updates_annotation_successfully|value=15.0", + "magic_number_test|tests/grpc/test_annotation_mixin.py|test_updates_annotation_successfully|value=25.0", + "magic_number_test|tests/grpc/test_annotation_mixin.py|test_updates_text_only|value=20.0", + "magic_number_test|tests/grpc/test_diarization_cancel.py|test_progress_percent_running|value=50.0", + "magic_number_test|tests/grpc/test_diarization_mixin.py|test_status_progress_running_is_time_based|value=50.0", + "magic_number_test|tests/grpc/test_meeting_mixin.py|test_list_meetings_respects_limit|value=25", + "magic_number_test|tests/grpc/test_meeting_mixin.py|test_list_meetings_respects_offset|value=50", + "magic_number_test|tests/grpc/test_preferences_mixin.py|test_computes_deterministic_etag|value=32", + "magic_number_test|tests/grpc/test_stream_lifecycle.py|test_partial_buffers_cleared_on_cleanup|value=3200", + "magic_number_test|tests/grpc/test_timestamp_converters.py|test_epoch_seconds_to_datetime_returns_utc|value=2024", + "magic_number_test|tests/grpc/test_timestamp_converters.py|test_iso_string_with_z_suffix_parsed_as_utc|value=14", + "magic_number_test|tests/grpc/test_timestamp_converters.py|test_iso_string_with_z_suffix_parsed_as_utc|value=15", + "magic_number_test|tests/grpc/test_timestamp_converters.py|test_iso_string_with_z_suffix_parsed_as_utc|value=2024", + "magic_number_test|tests/grpc/test_timestamp_converters.py|test_iso_string_with_z_suffix_parsed_as_utc|value=30", + "magic_number_test|tests/grpc/test_timestamp_converters.py|test_iso_string_with_z_suffix_parsed_as_utc|value=45", + "magic_number_test|tests/infrastructure/asr/test_segmenter.py|test_custom_config|value=44100", + "magic_number_test|tests/infrastructure/asr/test_segmenter.py|test_custom_config|value=60.0", + "magic_number_test|tests/infrastructure/audio/test_capture.py|test_properties_after_start|value=44100", + "magic_number_test|tests/infrastructure/audio/test_dto.py|test_timestamped_audio_creation|value=1600", + "magic_number_test|tests/infrastructure/audio/test_reader.py|test_reader_uses_manifest_sample_rate|value=1600", + "magic_number_test|tests/infrastructure/audio/test_reader.py|test_reader_uses_manifest_sample_rate|value=48000", + "magic_number_test|tests/infrastructure/audio/test_reader.py|test_reader_uses_manifest_sample_rate|value=48000", + "magic_number_test|tests/infrastructure/audio/test_ring_buffer.py|test_init_with_default_duration|value=30.0", + "magic_number_test|tests/infrastructure/audio/test_ring_buffer.py|test_max_duration_property|value=15.0", + "magic_number_test|tests/infrastructure/audio/test_writer.py|test_write_chunk_converts_float32_to_pcm16|value=3200", + "magic_number_test|tests/infrastructure/summarization/test_cloud_provider.py|test_summarize_returns_result|value=150", + "magic_number_test|tests/infrastructure/summarization/test_cloud_provider.py|test_summarize_returns_result|value=150", + "magic_number_test|tests/infrastructure/test_diarization.py|test_overlap_duration_full_overlap|value=15.0", + "magic_number_test|tests/infrastructure/test_diarization.py|test_overlap_duration_no_overlap|value=12.0", + "magic_number_test|tests/infrastructure/test_diarization.py|test_overlap_duration_no_overlap|value=20.0", + "magic_number_test|tests/infrastructure/test_diarization.py|test_overlap_duration_partial_overlap_right|value=15.0", + "magic_number_test|tests/infrastructure/test_integration_converters.py|test_converts_stats_dict|value=15", + "magic_number_test|tests/infrastructure/test_integration_converters.py|test_sync_run_orm_to_domain|value=5000", + "magic_number_test|tests/infrastructure/test_integration_converters.py|test_sync_run_to_orm_kwargs|value=10000", + "magic_number_test|tests/infrastructure/test_integration_converters.py|test_sync_run_to_orm_kwargs|value=25", + "magic_number_test|tests/infrastructure/test_observability.py|test_log_with_large_details|value=50", + "magic_number_test|tests/infrastructure/triggers/test_calendar.py|test_non_iterable_returns_empty|value=12345", + "magic_number_test|tests/integration/test_e2e_annotations.py|test_add_annotation_persists_to_database|value=15.0" + ], "module_size_soft": [ "module_size_soft|src/noteflow/config/settings.py|module|lines=566", "module_size_soft|src/noteflow/domain/ports/repositories/identity.py|module|lines=599", @@ -45,6 +197,58 @@ "orphaned_import": [ "orphaned_import|src/noteflow/infrastructure/observability/otel.py|opentelemetry" ], + "raises_without_match": [ + "raises_without_match|tests/domain/test_project.py|test_archive_default_project_raises|line=261", + "raises_without_match|tests/domain/test_project.py|test_default_project_cannot_be_archived|line=544", + "raises_without_match|tests/grpc/test_stream_lifecycle.py|test_cancelled_error_propagation_in_stream|line=647", + "raises_without_match|tests/infrastructure/asr/test_dto.py|test_word_timing_frozen|line=41", + "raises_without_match|tests/infrastructure/audio/test_dto.py|test_audio_device_info_frozen|line=42", + "raises_without_match|tests/infrastructure/auth/test_oidc_registry.py|test_create_provider_discovery_failure|line=154", + "raises_without_match|tests/integration/test_e2e_annotations.py|test_add_annotation_invalid_meeting_id|line=341", + "raises_without_match|tests/integration/test_e2e_annotations.py|test_annotations_deleted_with_meeting|line=460", + "raises_without_match|tests/integration/test_e2e_annotations.py|test_delete_annotation_not_found_e2e|line=386", + "raises_without_match|tests/integration/test_e2e_annotations.py|test_get_annotation_not_found|line=355", + "raises_without_match|tests/integration/test_e2e_annotations.py|test_update_annotation_not_found|line=372", + "raises_without_match|tests/integration/test_e2e_export.py|test_export_transcript_invalid_meeting_id|line=427", + "raises_without_match|tests/integration/test_e2e_export.py|test_export_transcript_nonexistent_meeting|line=410", + "raises_without_match|tests/integration/test_e2e_streaming.py|test_concurrent_streams_rejected|line=356", + "raises_without_match|tests/integration/test_e2e_streaming.py|test_stream_init_fails_for_nonexistent_meeting|line=191", + "raises_without_match|tests/integration/test_e2e_streaming.py|test_stream_rejects_invalid_meeting_id|line=213", + "raises_without_match|tests/integration/test_e2e_summarization.py|test_generate_summary_invalid_meeting_id|line=499", + "raises_without_match|tests/integration/test_e2e_summarization.py|test_generate_summary_nonexistent_meeting|line=485", + "raises_without_match|tests/integration/test_error_handling.py|test_delete_nonexistent_annotation|line=600", + "raises_without_match|tests/integration/test_error_handling.py|test_delete_nonexistent_meeting|line=107", + "raises_without_match|tests/integration/test_error_handling.py|test_duplicate_job_id_rejected|line=225", + "raises_without_match|tests/integration/test_error_handling.py|test_empty_meeting_id|line=79", + "raises_without_match|tests/integration/test_error_handling.py|test_export_nonexistent_meeting|line=487", + "raises_without_match|tests/integration/test_error_handling.py|test_get_nonexistent_annotation|line=569", + "raises_without_match|tests/integration/test_error_handling.py|test_get_status_nonexistent_job|line=619", + "raises_without_match|tests/integration/test_error_handling.py|test_invalid_uuid_format_for_meeting_id|line=65", + "raises_without_match|tests/integration/test_error_handling.py|test_nonexistent_meeting_returns_not_found|line=93", + "raises_without_match|tests/integration/test_error_handling.py|test_summarize_nonexistent_meeting|line=534", + "raises_without_match|tests/integration/test_error_handling.py|test_update_nonexistent_annotation|line=586", + "raises_without_match|tests/integration/test_grpc_servicer_database.py|test_get_nonexistent_job_returns_not_found|line=335", + "raises_without_match|tests/integration/test_grpc_servicer_database.py|test_get_nonexistent_meeting_returns_not_found|line=166", + "raises_without_match|tests/integration/test_project_repository.py|test_archive_default_project_raises_repository|line=276", + "raises_without_match|tests/integration/test_trigger_settings.py|test_retention_check_interval_validation|line=91", + "raises_without_match|tests/integration/test_trigger_settings.py|test_retention_check_interval_validation|line=98", + "raises_without_match|tests/integration/test_trigger_settings.py|test_retention_days_validation|line=77", + "raises_without_match|tests/integration/test_trigger_settings.py|test_retention_days_validation|line=81", + "raises_without_match|tests/integration/test_unit_of_work_advanced.py|test_exception_during_segment_add_rolls_back_meeting|line=252", + "raises_without_match|tests/stress/test_transaction_boundaries.py|test_batch_segment_add_rollback|line=196", + "raises_without_match|tests/stress/test_transaction_boundaries.py|test_exception_type_does_not_matter|line=78" + ], + "sensitive_equality": [ + "sensitive_equality|tests/domain/test_project.py|test_error_message_includes_project_id|str", + "sensitive_equality|tests/integration/test_e2e_streaming.py|test_active_stream_removed_on_completion|str", + "sensitive_equality|tests/integration/test_grpc_servicer_database.py|test_get_meeting_retrieves_from_database|str", + "sensitive_equality|tests/integration/test_grpc_servicer_database.py|test_refine_speaker_diarization_creates_job_in_database|str" + ], + "sleepy_test": [ + "sleepy_test|tests/integration/test_e2e_streaming.py|test_stop_request_exits_stream_gracefully|line=456", + "sleepy_test|tests/integration/test_unit_of_work_advanced.py|test_concurrent_uow_instances_isolated|line=165", + "sleepy_test|tests/integration/test_unit_of_work_advanced.py|test_concurrent_uow_instances_isolated|line=171" + ], "thin_wrapper": [ "thin_wrapper|src/noteflow/domain/auth/oidc.py|from_dict|cls", "thin_wrapper|src/noteflow/domain/webhooks/events.py|create|cls", @@ -88,6 +292,12 @@ "thin_wrapper|src/noteflow/infrastructure/persistence/memory/repositories/integration.py|get_sync_run|get", "thin_wrapper|src/noteflow/infrastructure/persistence/memory/repositories/webhook.py|get_by_id|get", "thin_wrapper|src/noteflow/infrastructure/security/crypto.py|generate_dek|token_bytes" + ], + "unused_fixture": [ + "unused_fixture|tests/grpc/test_export_mixin.py|test_export_aborts_when_meeting_not_found|mock_meetings_repo", + "unused_fixture|tests/grpc/test_export_mixin.py|test_export_aborts_when_meeting_not_found|mock_segments_repo", + "unused_fixture|tests/grpc/test_stream_lifecycle.py|test_audio_writer_closed_on_cleanup|crypto", + "unused_fixture|tests/grpc/test_stream_lifecycle.py|test_context_cancelled_check_pattern|memory_servicer" ] }, "schema_version": 1 diff --git a/tests/quality/generate_baseline.py b/tests/quality/generate_baseline.py index 5d608d3..a46a24a 100644 --- a/tests/quality/generate_baseline.py +++ b/tests/quality/generate_baseline.py @@ -32,6 +32,23 @@ from tests.quality._helpers import ( read_file_safe, relative_path, ) +from tests.quality._test_smell_collectors import ( + collect_assertion_roulette, + collect_conditional_test_logic, + collect_duplicate_test_names, + collect_eager_tests, + collect_exception_handling, + collect_fixture_missing_type, + collect_fixture_scope_too_narrow, + collect_long_tests, + collect_magic_number_tests, + collect_raises_without_match, + collect_redundant_prints, + collect_sensitive_equality, + collect_sleepy_tests, + collect_unknown_tests, + collect_unused_fixtures, +) def collect_stale_todos() -> list[Violation]: @@ -653,7 +670,7 @@ def main() -> None: all_violations: dict[str, list[Violation]] = {} - # Collect from all rules + # Collect from all rules - source code smells collectors = [ ("stale_todo", collect_stale_todos), ("orphaned_import", collect_orphaned_imports), @@ -669,6 +686,22 @@ def main() -> None: ("feature_envy", collect_feature_envy), ("redundant_type_alias", collect_redundant_type_aliases), ("passthrough_class", collect_passthrough_classes), + # Test smell collectors + ("assertion_roulette", collect_assertion_roulette), + ("conditional_test_logic", collect_conditional_test_logic), + ("sleepy_test", collect_sleepy_tests), + ("unknown_test", collect_unknown_tests), + ("redundant_print", collect_redundant_prints), + ("exception_handling", collect_exception_handling), + ("magic_number_test", collect_magic_number_tests), + ("sensitive_equality", collect_sensitive_equality), + ("eager_test", collect_eager_tests), + ("duplicate_test_name", collect_duplicate_test_names), + ("long_test", collect_long_tests), + ("fixture_missing_type", collect_fixture_missing_type), + ("unused_fixture", collect_unused_fixtures), + ("fixture_scope_too_narrow", collect_fixture_scope_too_narrow), + ("raises_without_match", collect_raises_without_match), ] for rule_name, collector in collectors: diff --git a/tests/quality/test_test_smells.py b/tests/quality/test_test_smells.py index a251c71..fe728a2 100644 --- a/tests/quality/test_test_smells.py +++ b/tests/quality/test_test_smells.py @@ -22,32 +22,20 @@ from __future__ import annotations import ast import re from collections import defaultdict -from dataclasses import dataclass from pathlib import Path - -@dataclass -class DetectedSmell: - """Represents a detected test smell.""" - - smell_type: str - test_name: str - file_path: Path - line_number: int - details: str - - -def find_test_files(root: Path) -> list[Path]: - """Find all test files in the project.""" - files: list[Path] = [] - for py_file in root.rglob("test_*.py"): - if ".venv" in py_file.parts or "__pycache__" in py_file.parts: - continue - # Skip quality tests themselves - if "quality" in py_file.parts: - continue - files.append(py_file) - return files +from tests.quality._baseline import ( + Violation, + assert_no_new_violations, + content_hash, +) +from tests.quality._helpers import ( + collect_parse_errors, + find_test_files, + parse_file_safe, + read_file_safe, + relative_path, +) def get_test_methods(tree: ast.AST) -> list[ast.FunctionDef | ast.AsyncFunctionDef]: @@ -100,48 +88,40 @@ def test_no_assertion_roulette() -> None: Assertion Roulette occurs when a test has multiple assertions without messages, making it hard to determine which assertion failed. """ - tests_root = Path(__file__).parent.parent + violations: list[Violation] = [] + parse_errors: list[str] = [] - smells: list[DetectedSmell] = [] - - for py_file in find_test_files(tests_root): - source = py_file.read_text(encoding="utf-8") - try: - tree = ast.parse(source) - except SyntaxError: + for py_file in find_test_files(): + tree, error = parse_file_safe(py_file) + if error: + parse_errors.append(error) continue + if tree is None: + continue + + rel_path = relative_path(py_file) for test_method in get_test_methods(tree): assertions_without_msg = 0 - total_assertions = 0 for node in ast.walk(test_method): if isinstance(node, ast.Assert): - total_assertions += 1 if not has_assertion_message(node): assertions_without_msg += 1 # Flag if >3 assertions without messages if assertions_without_msg > 3: - smells.append( - DetectedSmell( - smell_type="assertion_roulette", - test_name=test_method.name, - file_path=py_file, - line_number=test_method.lineno, - details=f"{assertions_without_msg} assertions without messages", + violations.append( + Violation( + rule="assertion_roulette", + relative_path=rel_path, + identifier=test_method.name, + detail=f"assertions={assertions_without_msg}", ) ) - # Target: reduce assertion roulette by adding messages to complex assertions - # Current baseline: 91 tests. Goal: reduce to 50. - assert len(smells) <= 50, ( - f"Found {len(smells)} tests with assertion roulette (max 50 allowed):\n" - + "\n".join( - f" {s.file_path}:{s.line_number}: {s.test_name} - {s.details}" - for s in smells[:10] - ) - ) + collect_parse_errors(parse_errors) + assert_no_new_violations("assertion_roulette", violations) def _contains_assertion(node: ast.AST) -> bool: @@ -158,20 +138,22 @@ def test_no_conditional_test_logic() -> None: Note: Loops/conditionals used only for setup (without assertions) are allowed. Stress tests are excluded as they intentionally use loops for thorough testing. """ - tests_root = Path(__file__).parent.parent + violations: list[Violation] = [] + parse_errors: list[str] = [] - smells: list[DetectedSmell] = [] - - for py_file in find_test_files(tests_root): + for py_file in find_test_files(): # Skip stress tests - they intentionally use loops for thorough testing if "stress" in py_file.parts: continue - source = py_file.read_text(encoding="utf-8") - try: - tree = ast.parse(source) - except SyntaxError: + tree, error = parse_file_safe(py_file) + if error: + parse_errors.append(error) continue + if tree is None: + continue + + rel_path = relative_path(py_file) for test_method in get_test_methods(tree): conditionals: list[str] = [] @@ -179,33 +161,24 @@ def test_no_conditional_test_logic() -> None: for node in ast.walk(test_method): # Only flag conditionals that contain assertions inside them if isinstance(node, ast.If) and _contains_assertion(node): - conditionals.append(f"if at line {node.lineno}") + conditionals.append(f"if@{node.lineno}") elif isinstance(node, ast.For) and _contains_assertion(node): - conditionals.append(f"for at line {node.lineno}") + conditionals.append(f"for@{node.lineno}") elif isinstance(node, ast.While) and _contains_assertion(node): - conditionals.append(f"while at line {node.lineno}") + conditionals.append(f"while@{node.lineno}") if conditionals: - smells.append( - DetectedSmell( - smell_type="conditional_test_logic", - test_name=test_method.name, - file_path=py_file, - line_number=test_method.lineno, - details=", ".join(conditionals[:3]), + violations.append( + Violation( + rule="conditional_test_logic", + relative_path=rel_path, + identifier=test_method.name, + detail=",".join(conditionals[:3]), ) ) - # Target: refactor tests to use parameterization instead of loops with assertions - # Stress tests excluded (they intentionally use loops). - # Setup-only loops (without assertions) are allowed. - assert len(smells) <= 40, ( - f"Found {len(smells)} tests with conditional logic (max 40 allowed):\n" - + "\n".join( - f" {s.file_path}:{s.line_number}: {s.test_name} - {s.details}" - for s in smells[:10] - ) - ) + collect_parse_errors(parse_errors) + assert_no_new_violations("conditional_test_logic", violations) def test_no_empty_tests() -> None: @@ -213,16 +186,18 @@ def test_no_empty_tests() -> None: Empty tests pass silently, giving false confidence in code quality. """ - tests_root = Path(__file__).parent.parent + violations: list[Violation] = [] + parse_errors: list[str] = [] - smells: list[DetectedSmell] = [] - - for py_file in find_test_files(tests_root): - source = py_file.read_text(encoding="utf-8") - try: - tree = ast.parse(source) - except SyntaxError: + for py_file in find_test_files(): + tree, error = parse_file_safe(py_file) + if error: + parse_errors.append(error) continue + if tree is None: + continue + + rel_path = relative_path(py_file) for test_method in get_test_methods(tree): # Filter out docstrings and pass statements @@ -234,21 +209,20 @@ def test_no_empty_tests() -> None: ] if not executable_stmts: - smells.append( - DetectedSmell( - smell_type="empty_test", - test_name=test_method.name, - file_path=py_file, - line_number=test_method.lineno, - details="no executable statements", + violations.append( + Violation( + rule="empty_test", + relative_path=rel_path, + identifier=test_method.name, ) ) - assert not smells, ( - f"Found {len(smells)} empty tests (should be removed or implemented):\n" - + "\n".join( - f" {s.file_path}:{s.line_number}: {s.test_name}" for s in smells[:10] - ) + collect_parse_errors(parse_errors) + + # Empty tests should be removed entirely - zero tolerance + assert not violations, ( + f"Found {len(violations)} empty tests (should be removed or implemented):\n" + + "\n".join(str(v) for v in violations[:10]) ) @@ -263,8 +237,6 @@ def test_no_sleepy_tests() -> None: - tests/integration/test_database_resilience.py: Tests connection pool timing - tests/grpc/test_stream_lifecycle.py: Tests streaming cleanup timing """ - tests_root = Path(__file__).parent.parent - # Paths where sleep is legitimately needed for stress/resilience testing allowed_sleepy_paths = { "tests/stress/", @@ -273,18 +245,20 @@ def test_no_sleepy_tests() -> None: "tests/grpc/test_stream_lifecycle.py", } - smells: list[DetectedSmell] = [] + violations: list[Violation] = [] + parse_errors: list[str] = [] - for py_file in find_test_files(tests_root): + for py_file in find_test_files(): # Skip files where sleep is legitimately needed for stress/resilience testing - rel_path = str(py_file.relative_to(tests_root.parent)) + rel_path = relative_path(py_file) if any(allowed in rel_path for allowed in allowed_sleepy_paths): continue - source = py_file.read_text(encoding="utf-8") - try: - tree = ast.parse(source) - except SyntaxError: + tree, error = parse_file_safe(py_file) + if error: + parse_errors.append(error) + continue + if tree is None: continue for test_method in get_test_methods(tree): @@ -295,23 +269,17 @@ def test_no_sleepy_tests() -> None: if node.func.attr == "sleep": if isinstance(node.func.value, ast.Name): if node.func.value.id in {"time", "asyncio"}: - smells.append( - DetectedSmell( - smell_type="sleepy_test", - test_name=test_method.name, - file_path=py_file, - line_number=node.lineno, - details="uses sleep()", + violations.append( + Violation( + rule="sleepy_test", + relative_path=rel_path, + identifier=test_method.name, + detail=f"line={node.lineno}", ) ) - # Sleepy tests cause flakiness. Use mocking instead. Current baseline: 4. - assert len(smells) <= 3, ( - f"Found {len(smells)} sleepy tests (max 3 allowed):\n" - + "\n".join( - f" {s.file_path}:{s.line_number}: {s.test_name}" for s in smells[:10] - ) - ) + collect_parse_errors(parse_errors) + assert_no_new_violations("sleepy_test", violations) def test_no_unknown_tests() -> None: @@ -320,16 +288,18 @@ def test_no_unknown_tests() -> None: Tests without assertions pass even when behavior is wrong, providing false confidence. Every test should assert something. """ - tests_root = Path(__file__).parent.parent + violations: list[Violation] = [] + parse_errors: list[str] = [] - smells: list[DetectedSmell] = [] - - for py_file in find_test_files(tests_root): - source = py_file.read_text(encoding="utf-8") - try: - tree = ast.parse(source) - except SyntaxError: + for py_file in find_test_files(): + tree, error = parse_file_safe(py_file) + if error: + parse_errors.append(error) continue + if tree is None: + continue + + rel_path = relative_path(py_file) for test_method in get_test_methods(tree): has_assertion = False @@ -353,23 +323,16 @@ def test_no_unknown_tests() -> None: # These are valid for checking no exceptions are raised has_call = any(isinstance(n, ast.Call) for n in ast.walk(test_method)) if not has_call: - smells.append( - DetectedSmell( - smell_type="unknown_test", - test_name=test_method.name, - file_path=py_file, - line_number=test_method.lineno, - details="no assertions or pytest.raises", + violations.append( + Violation( + rule="unknown_test", + relative_path=rel_path, + identifier=test_method.name, ) ) - # Every test should assert something. Current baseline: 10. - assert len(smells) <= 5, ( - f"Found {len(smells)} tests without assertions (max 5 allowed):\n" - + "\n".join( - f" {s.file_path}:{s.line_number}: {s.test_name}" for s in smells[:10] - ) - ) + collect_parse_errors(parse_errors) + assert_no_new_violations("unknown_test", violations) def test_no_redundant_assertions() -> None: @@ -377,16 +340,18 @@ def test_no_redundant_assertions() -> None: Redundant assertions provide no value and may indicate incomplete tests. """ - tests_root = Path(__file__).parent.parent + violations: list[Violation] = [] + parse_errors: list[str] = [] - smells: list[DetectedSmell] = [] - - for py_file in find_test_files(tests_root): - source = py_file.read_text(encoding="utf-8") - try: - tree = ast.parse(source) - except SyntaxError: + for py_file in find_test_files(): + tree, error = parse_file_safe(py_file) + if error: + parse_errors.append(error) continue + if tree is None: + continue + + rel_path = relative_path(py_file) for test_method in get_test_methods(tree): for node in ast.walk(test_method): @@ -395,13 +360,12 @@ def test_no_redundant_assertions() -> None: # Check for assert True, assert False (False would fail anyway) if isinstance(test_expr, ast.Constant): if test_expr.value is True: - smells.append( - DetectedSmell( - smell_type="redundant_assertion", - test_name=test_method.name, - file_path=py_file, - line_number=node.lineno, - details="assert True", + violations.append( + Violation( + rule="redundant_assertion", + relative_path=rel_path, + identifier=test_method.name, + detail="assert_true", ) ) # Check for assert x == x @@ -412,22 +376,21 @@ def test_no_redundant_assertions() -> None: left = ast.dump(test_expr.left) right = ast.dump(test_expr.comparators[0]) if left == right: - smells.append( - DetectedSmell( - smell_type="redundant_assertion", - test_name=test_method.name, - file_path=py_file, - line_number=node.lineno, - details="comparing value to itself", + violations.append( + Violation( + rule="redundant_assertion", + relative_path=rel_path, + identifier=test_method.name, + detail="self_comparison", ) ) - assert not smells, ( - f"Found {len(smells)} redundant assertions:\n" - + "\n".join( - f" {s.file_path}:{s.line_number}: {s.test_name} - {s.details}" - for s in smells[:10] - ) + collect_parse_errors(parse_errors) + + # Redundant assertions should be removed - zero tolerance + assert not violations, ( + f"Found {len(violations)} redundant assertions:\n" + + "\n".join(str(v) for v in violations[:10]) ) @@ -437,37 +400,34 @@ def test_no_redundant_prints() -> None: Print statements in tests are noise during automated runs. Use logging or pytest's capture mechanism instead. """ - tests_root = Path(__file__).parent.parent + violations: list[Violation] = [] + parse_errors: list[str] = [] - smells: list[DetectedSmell] = [] - - for py_file in find_test_files(tests_root): - source = py_file.read_text(encoding="utf-8") - try: - tree = ast.parse(source) - except SyntaxError: + for py_file in find_test_files(): + tree, error = parse_file_safe(py_file) + if error: + parse_errors.append(error) continue + if tree is None: + continue + + rel_path = relative_path(py_file) for test_method in get_test_methods(tree): for node in ast.walk(test_method): if isinstance(node, ast.Call) and isinstance(node.func, ast.Name): if node.func.id == "print": - smells.append( - DetectedSmell( - smell_type="redundant_print", - test_name=test_method.name, - file_path=py_file, - line_number=node.lineno, - details="print() statement", + violations.append( + Violation( + rule="redundant_print", + relative_path=rel_path, + identifier=test_method.name, + detail=f"line={node.lineno}", ) ) - assert len(smells) <= 5, ( - f"Found {len(smells)} tests with print statements (max 5 allowed):\n" - + "\n".join( - f" {s.file_path}:{s.line_number}: {s.test_name}" for s in smells[:10] - ) - ) + collect_parse_errors(parse_errors) + assert_no_new_violations("redundant_print", violations) def test_no_ignored_tests_without_reason() -> None: @@ -476,9 +436,7 @@ def test_no_ignored_tests_without_reason() -> None: Skipped tests should have a reason explaining why they're skipped, otherwise it's unclear if they should be fixed or removed. """ - tests_root = Path(__file__).parent.parent - - smells: list[DetectedSmell] = [] + violations: list[Violation] = [] # Pattern for skip without reason: @pytest.mark.skip or @pytest.mark.skip() skip_pattern = re.compile(r"@pytest\.mark\.skip\s*(?:\(\s*\))?$", re.MULTILINE) @@ -488,38 +446,41 @@ def test_no_ignored_tests_without_reason() -> None: r"@pytest\.mark\.skipif\s*\([^)]*\)\s*$", re.MULTILINE ) - for py_file in find_test_files(tests_root): - content = py_file.read_text(encoding="utf-8") + for py_file in find_test_files(): + content, error = read_file_safe(py_file) + if error or content is None: + continue + + rel_path = relative_path(py_file) lines = content.splitlines() for i, line in enumerate(lines, start=1): stripped = line.strip() # Check for @pytest.mark.skip without reason if skip_pattern.search(stripped): - smells.append( - DetectedSmell( - smell_type="ignored_test_no_reason", - test_name="", - file_path=py_file, - line_number=i, - details="@pytest.mark.skip without reason", + violations.append( + Violation( + rule="ignored_test_no_reason", + relative_path=rel_path, + identifier=content_hash(f"{i}:{stripped}"), + detail="skip_no_reason", ) ) # Check for @pytest.mark.skipif without reason= elif skipif_pattern.search(stripped) and "reason=" not in stripped: - smells.append( - DetectedSmell( - smell_type="ignored_test_no_reason", - test_name="", - file_path=py_file, - line_number=i, - details="@pytest.mark.skipif without reason", + violations.append( + Violation( + rule="ignored_test_no_reason", + relative_path=rel_path, + identifier=content_hash(f"{i}:{stripped}"), + detail="skipif_no_reason", ) ) - assert not smells, ( - f"Found {len(smells)} skipped tests without reason:\n" - + "\n".join(f" {s.file_path}:{s.line_number}: {s.details}" for s in smells) + # Skipped tests without reason should be fixed - zero tolerance + assert not violations, ( + f"Found {len(violations)} skipped tests without reason:\n" + + "\n".join(str(v) for v in violations) ) @@ -529,16 +490,18 @@ def test_no_exception_handling_in_tests() -> None: Tests should use pytest.raises() for expected exceptions, not try/except. Manual exception handling can hide bugs. """ - tests_root = Path(__file__).parent.parent + violations: list[Violation] = [] + parse_errors: list[str] = [] - smells: list[DetectedSmell] = [] - - for py_file in find_test_files(tests_root): - source = py_file.read_text(encoding="utf-8") - try: - tree = ast.parse(source) - except SyntaxError: + for py_file in find_test_files(): + tree, error = parse_file_safe(py_file) + if error: + parse_errors.append(error) continue + if tree is None: + continue + + rel_path = relative_path(py_file) for test_method in get_test_methods(tree): for node in ast.walk(test_method): @@ -546,35 +509,27 @@ def test_no_exception_handling_in_tests() -> None: # Check if it's a bare except or catching specific exceptions for handler in node.handlers: if handler.type is None: - smells.append( - DetectedSmell( - smell_type="exception_handling", - test_name=test_method.name, - file_path=py_file, - line_number=node.lineno, - details="bare except clause", + violations.append( + Violation( + rule="exception_handling", + relative_path=rel_path, + identifier=test_method.name, + detail="bare_except", ) ) elif isinstance(handler.type, ast.Name): if handler.type.id in {"Exception", "BaseException"}: - smells.append( - DetectedSmell( - smell_type="exception_handling", - test_name=test_method.name, - file_path=py_file, - line_number=node.lineno, - details=f"catches {handler.type.id}", + violations.append( + Violation( + rule="exception_handling", + relative_path=rel_path, + identifier=test_method.name, + detail=f"catches_{handler.type.id}", ) ) - # Use pytest.raises instead of try/except. Current baseline: 5. - assert len(smells) <= 3, ( - f"Found {len(smells)} tests with broad exception handling (max 3):\n" - + "\n".join( - f" {s.file_path}:{s.line_number}: {s.test_name} - {s.details}" - for s in smells[:10] - ) - ) + collect_parse_errors(parse_errors) + assert_no_new_violations("exception_handling", violations) def test_no_magic_numbers_in_assertions() -> None: @@ -583,19 +538,21 @@ def test_no_magic_numbers_in_assertions() -> None: Assertions like `assert result == 42` are unclear. Use named constants or variables with descriptive names. """ - tests_root = Path(__file__).parent.parent - # Allowed magic numbers in tests allowed_numbers = {0, 1, 2, -1, 100, 1000} - smells: list[DetectedSmell] = [] + violations: list[Violation] = [] + parse_errors: list[str] = [] - for py_file in find_test_files(tests_root): - source = py_file.read_text(encoding="utf-8") - try: - tree = ast.parse(source) - except SyntaxError: + for py_file in find_test_files(): + tree, error = parse_file_safe(py_file) + if error: + parse_errors.append(error) continue + if tree is None: + continue + + rel_path = relative_path(py_file) for test_method in get_test_methods(tree): for node in ast.walk(test_method): @@ -606,24 +563,17 @@ def test_no_magic_numbers_in_assertions() -> None: if isinstance(child.value, (int, float)): if child.value not in allowed_numbers: if abs(child.value) > 10: - smells.append( - DetectedSmell( - smell_type="magic_number_test", - test_name=test_method.name, - file_path=py_file, - line_number=node.lineno, - details=f"magic number {child.value}", + violations.append( + Violation( + rule="magic_number_test", + relative_path=rel_path, + identifier=test_method.name, + detail=f"value={child.value}", ) ) - # Magic numbers reduce test readability. Use named constants. - assert len(smells) <= 50, ( - f"Found {len(smells)} tests with magic numbers (max 50 allowed):\n" - + "\n".join( - f" {s.file_path}:{s.line_number}: {s.test_name} - {s.details}" - for s in smells[:10] - ) - ) + collect_parse_errors(parse_errors) + assert_no_new_violations("magic_number_test", violations) def test_no_sensitive_equality() -> None: @@ -638,8 +588,6 @@ def test_no_sensitive_equality() -> None: - Tests with "conversion" in name - testing type conversion - Tests with "serializ" in name - testing serialization """ - tests_root = Path(__file__).parent.parent - # Test names that legitimately use str() comparisons excluded_test_patterns = { "string", # Testing string conversion behavior @@ -664,18 +612,22 @@ def test_no_sensitive_equality() -> None: file_name_lower = file_path.stem.lower() return any(p in file_name_lower for p in excluded_file_patterns) - smells: list[DetectedSmell] = [] + violations: list[Violation] = [] + parse_errors: list[str] = [] - for py_file in find_test_files(tests_root): + for py_file in find_test_files(): # Skip files that legitimately use str() comparisons if _is_excluded_file(py_file): continue - source = py_file.read_text(encoding="utf-8") - try: - tree = ast.parse(source) - except SyntaxError: + tree, error = parse_file_safe(py_file) + if error: + parse_errors.append(error) continue + if tree is None: + continue + + rel_path = relative_path(py_file) for test_method in get_test_methods(tree): # Skip tests that legitimately test string behavior @@ -692,24 +644,17 @@ def test_no_sensitive_equality() -> None: if isinstance(operand, ast.Call): if isinstance(operand.func, ast.Name): if operand.func.id in {"str", "repr"}: - smells.append( - DetectedSmell( - smell_type="sensitive_equality", - test_name=test_method.name, - file_path=py_file, - line_number=node.lineno, - details=f"comparing {operand.func.id}() output", + violations.append( + Violation( + rule="sensitive_equality", + relative_path=rel_path, + identifier=test_method.name, + detail=operand.func.id, ) ) - # Comparing str()/repr() is fragile. Compare object attributes instead. - assert len(smells) <= 10, ( - f"Found {len(smells)} tests with sensitive equality (max 10 allowed):\n" - + "\n".join( - f" {s.file_path}:{s.line_number}: {s.test_name} - {s.details}" - for s in smells[:10] - ) - ) + collect_parse_errors(parse_errors) + assert_no_new_violations("sensitive_equality", violations) def test_no_eager_tests() -> None: @@ -718,17 +663,19 @@ def test_no_eager_tests() -> None: Eager tests are hard to maintain because failures don't pinpoint the problem. Each test should focus on one behavior. """ - tests_root = Path(__file__).parent.parent - - smells: list[DetectedSmell] = [] + violations: list[Violation] = [] + parse_errors: list[str] = [] max_method_calls = 10 # Threshold for "too many" method calls - for py_file in find_test_files(tests_root): - source = py_file.read_text(encoding="utf-8") - try: - tree = ast.parse(source) - except SyntaxError: + for py_file in find_test_files(): + tree, error = parse_file_safe(py_file) + if error: + parse_errors.append(error) continue + if tree is None: + continue + + rel_path = relative_path(py_file) for test_method in get_test_methods(tree): method_calls: set[str] = set() @@ -741,24 +688,17 @@ def test_no_eager_tests() -> None: method_calls.add(node.func.attr) if len(method_calls) > max_method_calls: - smells.append( - DetectedSmell( - smell_type="eager_test", - test_name=test_method.name, - file_path=py_file, - line_number=test_method.lineno, - details=f"calls {len(method_calls)} different methods", + violations.append( + Violation( + rule="eager_test", + relative_path=rel_path, + identifier=test_method.name, + detail=f"methods={len(method_calls)}", ) ) - # Allow baseline for integration tests - assert len(smells) <= 10, ( - f"Found {len(smells)} eager tests (max 10 allowed):\n" - + "\n".join( - f" {s.file_path}:{s.line_number}: {s.test_name} - {s.details}" - for s in smells[:10] - ) - ) + collect_parse_errors(parse_errors) + assert_no_new_violations("eager_test", violations) def test_no_duplicate_test_names() -> None: @@ -767,15 +707,15 @@ def test_no_duplicate_test_names() -> None: Duplicate test names can cause confusion and may result in tests being shadowed or not run. """ - tests_root = Path(__file__).parent.parent - test_names: dict[str, list[tuple[Path, int]]] = defaultdict(list) + parse_errors: list[str] = [] - for py_file in find_test_files(tests_root): - source = py_file.read_text(encoding="utf-8") - try: - tree = ast.parse(source) - except SyntaxError: + for py_file in find_test_files(): + tree, error = parse_file_safe(py_file) + if error: + parse_errors.append(error) + continue + if tree is None: continue for test_method in get_test_methods(tree): @@ -783,17 +723,21 @@ def test_no_duplicate_test_names() -> None: duplicates = {name: locs for name, locs in test_names.items() if len(locs) > 1} - violations = [ - f"'{name}' defined in: " + ", ".join(f"{f}:{line}" for f, line in locs) - for name, locs in duplicates.items() - ] + violations: list[Violation] = [] + for name, locs in duplicates.items(): + # Use first location as the identifier + first_path = relative_path(locs[0][0]) + violations.append( + Violation( + rule="duplicate_test_name", + relative_path=first_path, + identifier=name, + detail=f"count={len(locs)}", + ) + ) - # Duplicate test names can cause confusion. Make names more specific. - # Current baseline: 26. Goal: reduce to 15. - assert len(violations) <= 15, ( - f"Found {len(violations)} duplicate test names (max 15 allowed):\n" - + "\n".join(violations[:10]) - ) + collect_parse_errors(parse_errors) + assert_no_new_violations("duplicate_test_name", violations) def test_no_hardcoded_test_data_paths() -> None: @@ -802,8 +746,6 @@ def test_no_hardcoded_test_data_paths() -> None: Hardcoded paths make tests non-portable. Use fixtures, tmp_path, or pathlib relative to __file__. """ - tests_root = Path(__file__).parent.parent - # Patterns for hardcoded paths path_patterns = [ r'["\'][A-Za-z]:\\', # Windows paths @@ -812,29 +754,42 @@ def test_no_hardcoded_test_data_paths() -> None: r'["\']\/var\/\w+', # Hardcoded /var paths ] - smells: list[DetectedSmell] = [] + violations: list[Violation] = [] - for py_file in find_test_files(tests_root): - content = py_file.read_text(encoding="utf-8") + for py_file in find_test_files(): + content, error = read_file_safe(py_file) + if error or content is None: + continue + + rel_path = relative_path(py_file) lines = content.splitlines() for i, line in enumerate(lines, start=1): + # Skip comments + stripped = line.strip() + if stripped.startswith("#"): + continue + for pattern in path_patterns: - if re.search(pattern, line): - smells.append( - DetectedSmell( - smell_type="hardcoded_path", - test_name="", - file_path=py_file, - line_number=i, - details="hardcoded file path", + match = re.search(pattern, line) + if match: + # Check if pattern appears before a comment + comment_pos = line.find("#") + if comment_pos != -1 and comment_pos < match.start(): + continue + violations.append( + Violation( + rule="hardcoded_path", + relative_path=rel_path, + identifier=content_hash(f"{i}:{stripped}"), ) ) break - assert not smells, ( - f"Found {len(smells)} hardcoded paths in tests:\n" - + "\n".join(f" {s.file_path}:{s.line_number}" for s in smells[:10]) + # Hardcoded paths should not exist - zero tolerance + assert not violations, ( + f"Found {len(violations)} hardcoded paths in tests:\n" + + "\n".join(str(v) for v in violations[:10]) ) @@ -844,41 +799,36 @@ def test_no_long_test_methods() -> None: Long tests are hard to understand and maintain. Break them into smaller, focused tests or extract helper functions. """ - tests_root = Path(__file__).parent.parent max_lines = 50 - smells: list[DetectedSmell] = [] + violations: list[Violation] = [] + parse_errors: list[str] = [] - for py_file in find_test_files(tests_root): - source = py_file.read_text(encoding="utf-8") - try: - tree = ast.parse(source) - except SyntaxError: + for py_file in find_test_files(): + tree, error = parse_file_safe(py_file) + if error: + parse_errors.append(error) continue + if tree is None: + continue + + rel_path = relative_path(py_file) for test_method in get_test_methods(tree): if test_method.end_lineno: lines = test_method.end_lineno - test_method.lineno + 1 if lines > max_lines: - smells.append( - DetectedSmell( - smell_type="long_test", - test_name=test_method.name, - file_path=py_file, - line_number=test_method.lineno, - details=f"{lines} lines (max {max_lines})", + violations.append( + Violation( + rule="long_test", + relative_path=rel_path, + identifier=test_method.name, + detail=f"lines={lines}", ) ) - # Long tests are hard to understand. Break into smaller focused tests. - # Current baseline: 5. Goal: reduce to 3. - assert len(smells) <= 3, ( - f"Found {len(smells)} long test methods (max 3 allowed):\n" - + "\n".join( - f" {s.file_path}:{s.line_number}: {s.test_name} - {s.details}" - for s in smells[:10] - ) - ) + collect_parse_errors(parse_errors) + assert_no_new_violations("long_test", violations) # ============================================================================= @@ -919,14 +869,35 @@ def _get_fixture_scope(node: ast.FunctionDef) -> str | None: return None +def _get_module_level_fixtures(tree: ast.AST) -> list[ast.FunctionDef]: + """Extract only module-level pytest fixtures from AST (not class-scoped).""" + fixtures: list[ast.FunctionDef] = [] + # Only check top-level function definitions, not methods inside classes + for node in tree.body: + if isinstance(node, ast.FunctionDef): + for decorator in node.decorator_list: + # Check for @pytest.fixture or @fixture + if isinstance(decorator, ast.Attribute): + if decorator.attr == "fixture": + fixtures.append(node) + break + elif isinstance(decorator, ast.Call): + if isinstance(decorator.func, ast.Attribute): + if decorator.func.attr == "fixture": + fixtures.append(node) + break + elif isinstance(decorator, ast.Name) and decorator.id == "fixture": + fixtures.append(node) + break + return fixtures + + def test_no_unittest_style_assertions() -> None: """Detect unittest-style assertions instead of plain assert. Pytest works best with plain assert statements. Using unittest-style assertions (self.assertEqual, etc.) loses pytest's assertion introspection. """ - tests_root = Path(__file__).parent.parent - unittest_assertions = { "assertEqual", "assertNotEqual", @@ -957,36 +928,39 @@ def test_no_unittest_style_assertions() -> None: "assertDictEqual", } - smells: list[DetectedSmell] = [] + violations: list[Violation] = [] + parse_errors: list[str] = [] - for py_file in find_test_files(tests_root): - source = py_file.read_text(encoding="utf-8") - try: - tree = ast.parse(source) - except SyntaxError: + for py_file in find_test_files(): + tree, error = parse_file_safe(py_file) + if error: + parse_errors.append(error) continue + if tree is None: + continue + + rel_path = relative_path(py_file) for test_method in get_test_methods(tree): for node in ast.walk(test_method): if isinstance(node, ast.Call): if isinstance(node.func, ast.Attribute): if node.func.attr in unittest_assertions: - smells.append( - DetectedSmell( - smell_type="unittest_assertion", - test_name=test_method.name, - file_path=py_file, - line_number=node.lineno, - details=f"self.{node.func.attr}() - use plain assert", + violations.append( + Violation( + rule="unittest_assertion", + relative_path=rel_path, + identifier=test_method.name, + detail=node.func.attr, ) ) - assert not smells, ( - f"Found {len(smells)} unittest-style assertions (use plain assert):\n" - + "\n".join( - f" {s.file_path}:{s.line_number}: {s.test_name} - {s.details}" - for s in smells[:10] - ) + collect_parse_errors(parse_errors) + + # unittest assertions should not be used - zero tolerance + assert not violations, ( + f"Found {len(violations)} unittest-style assertions (use plain assert):\n" + + "\n".join(str(v) for v in violations[:10]) ) @@ -996,8 +970,6 @@ def test_no_session_scoped_fixtures_with_mutation() -> None: Session-scoped fixtures that mutate lists, dicts, or objects can cause test pollution. They should return immutable or fresh copies. """ - tests_root = Path(__file__).parent.parent - # Patterns indicating mutation in fixture body mutation_patterns = [ r"\.append\(", @@ -1010,40 +982,47 @@ def test_no_session_scoped_fixtures_with_mutation() -> None: r"\[.+\]\s*=", # dict/list assignment ] - smells: list[DetectedSmell] = [] + violations: list[Violation] = [] + parse_errors: list[str] = [] - for py_file in find_test_files(tests_root): - source = py_file.read_text(encoding="utf-8") - try: - tree = ast.parse(source) - except SyntaxError: + for py_file in find_test_files(): + tree, error = parse_file_safe(py_file) + if error: + parse_errors.append(error) continue + if tree is None: + continue + + content, _ = read_file_safe(py_file) + if content is None: + continue + + rel_path = relative_path(py_file) for fixture in _get_fixtures(tree): scope = _get_fixture_scope(fixture) if scope in ("session", "module"): # Check fixture body for mutation patterns - fixture_source = ast.get_source_segment(source, fixture) + fixture_source = ast.get_source_segment(content, fixture) if fixture_source: for pattern in mutation_patterns: if re.search(pattern, fixture_source): - smells.append( - DetectedSmell( - smell_type="session_fixture_mutation", - test_name=fixture.name, - file_path=py_file, - line_number=fixture.lineno, - details=f"scope={scope} fixture may mutate state", + violations.append( + Violation( + rule="session_fixture_mutation", + relative_path=rel_path, + identifier=fixture.name, + detail=f"scope={scope}", ) ) break - assert not smells, ( - f"Found {len(smells)} session/module fixtures with potential mutation:\n" - + "\n".join( - f" {s.file_path}:{s.line_number}: {s.test_name} - {s.details}" - for s in smells[:10] - ) + collect_parse_errors(parse_errors) + + # Session fixtures with mutation should not exist - zero tolerance + assert not violations, ( + f"Found {len(violations)} session/module fixtures with potential mutation:\n" + + "\n".join(str(v) for v in violations[:10]) ) @@ -1053,39 +1032,34 @@ def test_fixtures_have_type_hints() -> None: Fixtures should have return type annotations for better IDE support and documentation. Use -> T or -> Generator[T, None, None] for yields. """ - tests_root = Path(__file__).parent.parent + violations: list[Violation] = [] + parse_errors: list[str] = [] - smells: list[DetectedSmell] = [] - - for py_file in find_test_files(tests_root): - source = py_file.read_text(encoding="utf-8") - try: - tree = ast.parse(source) - except SyntaxError: + for py_file in find_test_files(): + tree, error = parse_file_safe(py_file) + if error: + parse_errors.append(error) continue + if tree is None: + continue + + rel_path = relative_path(py_file) for fixture in _get_fixtures(tree): # Check if fixture has return type annotation if fixture.returns is None: # Skip fixtures that start with _ (internal helpers) if not fixture.name.startswith("_"): - smells.append( - DetectedSmell( - smell_type="fixture_missing_type", - test_name=fixture.name, - file_path=py_file, - line_number=fixture.lineno, - details="fixture missing return type annotation", + violations.append( + Violation( + rule="fixture_missing_type", + relative_path=rel_path, + identifier=fixture.name, ) ) - # Allow some fixtures without types (legacy or complex cases) - assert len(smells) <= 10, ( - f"Found {len(smells)} fixtures without type hints (max 10 allowed):\n" - + "\n".join( - f" {s.file_path}:{s.line_number}: {s.test_name}" for s in smells[:15] - ) - ) + collect_parse_errors(parse_errors) + assert_no_new_violations("fixture_missing_type", violations) def test_no_unused_fixture_parameters() -> None: @@ -1094,16 +1068,18 @@ def test_no_unused_fixture_parameters() -> None: Requesting unused fixtures wastes resources and clutters the test signature. Remove unused fixture parameters or mark them with underscore prefix. """ - tests_root = Path(__file__).parent.parent + violations: list[Violation] = [] + parse_errors: list[str] = [] - smells: list[DetectedSmell] = [] - - for py_file in find_test_files(tests_root): - source = py_file.read_text(encoding="utf-8") - try: - tree = ast.parse(source) - except SyntaxError: + for py_file in find_test_files(): + tree, error = parse_file_safe(py_file) + if error: + parse_errors.append(error) continue + if tree is None: + continue + + rel_path = relative_path(py_file) for test_method in get_test_methods(tree): # Get parameter names (excluding self) @@ -1141,23 +1117,17 @@ def test_no_unused_fixture_parameters() -> None: "event_loop", ): continue - smells.append( - DetectedSmell( - smell_type="unused_fixture", - test_name=test_method.name, - file_path=py_file, - line_number=test_method.lineno, - details=f"unused parameter: {param}", + violations.append( + Violation( + rule="unused_fixture", + relative_path=rel_path, + identifier=test_method.name, + detail=param, ) ) - assert len(smells) <= 5, ( - f"Found {len(smells)} unused fixture parameters (max 5 allowed):\n" - + "\n".join( - f" {s.file_path}:{s.line_number}: {s.test_name} - {s.details}" - for s in smells[:10] - ) - ) + collect_parse_errors(parse_errors) + assert_no_new_violations("unused_fixture", violations) def test_conftest_fixtures_not_duplicated() -> None: @@ -1169,16 +1139,20 @@ def test_conftest_fixtures_not_duplicated() -> None: Only checks conftest files that would be visible to the test file (same directory or parent directories). """ - tests_root = Path(__file__).parent.parent + # Use the project root for finding conftest files + project_root = Path(__file__).parent.parent.parent + tests_root = project_root / "tests" # Collect fixtures from conftest files, organized by directory conftest_by_dir: dict[Path, dict[str, int]] = {} for conftest in tests_root.rglob("conftest.py"): if ".venv" in conftest.parts: continue - source = conftest.read_text(encoding="utf-8") + content, error = read_file_safe(conftest) + if error or content is None: + continue try: - tree = ast.parse(source) + tree = ast.parse(content) except SyntaxError: continue conftest_dir = conftest.parent @@ -1187,15 +1161,18 @@ def test_conftest_fixtures_not_duplicated() -> None: conftest_by_dir[conftest_dir][fixture.name] = fixture.lineno # Check test files for duplicate fixture definitions - smells: list[DetectedSmell] = [] + violations: list[Violation] = [] + parse_errors: list[str] = [] - for py_file in find_test_files(tests_root): - source = py_file.read_text(encoding="utf-8") - try: - tree = ast.parse(source) - except SyntaxError: + for py_file in find_test_files(): + tree, error = parse_file_safe(py_file) + if error: + parse_errors.append(error) + continue + if tree is None: continue + rel_path = relative_path(py_file) test_dir = py_file.parent # Find all conftest directories visible to this test file @@ -1214,22 +1191,21 @@ def test_conftest_fixtures_not_duplicated() -> None: # Only check module-level fixtures (class-scoped fixtures are intentional) for fixture in _get_module_level_fixtures(tree): if fixture.name in visible_conftest_fixtures: - smells.append( - DetectedSmell( - smell_type="duplicate_fixture", - test_name=fixture.name, - file_path=py_file, - line_number=fixture.lineno, - details=f"also in {visible_conftest_fixtures[fixture.name]}", + violations.append( + Violation( + rule="duplicate_fixture", + relative_path=rel_path, + identifier=fixture.name, + detail=str(visible_conftest_fixtures[fixture.name]), ) ) - assert not smells, ( - f"Found {len(smells)} fixtures duplicated from conftest:\n" - + "\n".join( - f" {s.file_path}:{s.line_number}: {s.test_name} - {s.details}" - for s in smells[:10] - ) + collect_parse_errors(parse_errors) + + # Duplicate fixtures should be consolidated - zero tolerance + assert not violations, ( + f"Found {len(violations)} fixtures duplicated from conftest:\n" + + "\n".join(str(v) for v in violations[:10]) ) @@ -1239,8 +1215,6 @@ def test_fixture_scope_appropriate() -> None: - Function-scoped fixtures that create expensive resources should use module/session - Session-scoped fixtures that yield mutable objects should use function scope """ - tests_root = Path(__file__).parent.parent - # Patterns suggesting expensive setup expensive_patterns = [ r"asyncpg\.connect", @@ -1252,43 +1226,44 @@ def test_fixture_scope_appropriate() -> None: r"testcontainers\.", ] - smells: list[DetectedSmell] = [] + violations: list[Violation] = [] + parse_errors: list[str] = [] - for py_file in find_test_files(tests_root): - source = py_file.read_text(encoding="utf-8") - try: - tree = ast.parse(source) - except SyntaxError: + for py_file in find_test_files(): + tree, error = parse_file_safe(py_file) + if error: + parse_errors.append(error) continue + if tree is None: + continue + + content, _ = read_file_safe(py_file) + if content is None: + continue + + rel_path = relative_path(py_file) for fixture in _get_fixtures(tree): scope = _get_fixture_scope(fixture) - fixture_source = ast.get_source_segment(source, fixture) + fixture_source = ast.get_source_segment(content, fixture) if fixture_source: # Check for expensive operations in function-scoped fixtures if scope is None or scope == "function": for pattern in expensive_patterns: if re.search(pattern, fixture_source): - smells.append( - DetectedSmell( - smell_type="fixture_scope_too_narrow", - test_name=fixture.name, - file_path=py_file, - line_number=fixture.lineno, - details="expensive setup in function-scoped fixture", + violations.append( + Violation( + rule="fixture_scope_too_narrow", + relative_path=rel_path, + identifier=fixture.name, + detail="expensive_setup", ) ) break - # Allow some fixtures with narrow scope (may be intentional) - assert len(smells) <= 5, ( - f"Found {len(smells)} fixtures with potentially wrong scope (max 5 allowed):\n" - + "\n".join( - f" {s.file_path}:{s.line_number}: {s.test_name} - {s.details}" - for s in smells[:10] - ) - ) + collect_parse_errors(parse_errors) + assert_no_new_violations("fixture_scope_too_narrow", violations) def test_no_pytest_raises_without_match() -> None: @@ -1297,16 +1272,18 @@ def test_no_pytest_raises_without_match() -> None: Using pytest.raises without match= can hide bugs where the wrong exception is raised. Always specify match= to verify the exception message. """ - tests_root = Path(__file__).parent.parent + violations: list[Violation] = [] + parse_errors: list[str] = [] - smells: list[DetectedSmell] = [] - - for py_file in find_test_files(tests_root): - source = py_file.read_text(encoding="utf-8") - try: - tree = ast.parse(source) - except SyntaxError: + for py_file in find_test_files(): + tree, error = parse_file_safe(py_file) + if error: + parse_errors.append(error) continue + if tree is None: + continue + + rel_path = relative_path(py_file) for test_method in get_test_methods(tree): for node in ast.walk(test_method): @@ -1321,47 +1298,17 @@ def test_no_pytest_raises_without_match() -> None: kw.arg == "match" for kw in node.keywords ) if not has_match: - smells.append( - DetectedSmell( - smell_type="raises_without_match", - test_name=test_method.name, - file_path=py_file, - line_number=node.lineno, - details="pytest.raises() without match=", + violations.append( + Violation( + rule="raises_without_match", + relative_path=rel_path, + identifier=test_method.name, + detail=f"line={node.lineno}", ) ) - # Allow some raises without match (generic exception checks, FrozenInstanceError, etc.) - # Current baseline: 48. Goal: reduce to 40 over time. - assert len(smells) <= 50, ( - f"Found {len(smells)} pytest.raises without match (max 50 allowed):\n" - + "\n".join( - f" {s.file_path}:{s.line_number}: {s.test_name}" for s in smells[:15] - ) - ) - - -def _get_module_level_fixtures(tree: ast.AST) -> list[ast.FunctionDef]: - """Extract only module-level pytest fixtures from AST (not class-scoped).""" - fixtures: list[ast.FunctionDef] = [] - # Only check top-level function definitions, not methods inside classes - for node in tree.body: - if isinstance(node, ast.FunctionDef): - for decorator in node.decorator_list: - # Check for @pytest.fixture or @fixture - if isinstance(decorator, ast.Attribute): - if decorator.attr == "fixture": - fixtures.append(node) - break - elif isinstance(decorator, ast.Call): - if isinstance(decorator.func, ast.Attribute): - if decorator.func.attr == "fixture": - fixtures.append(node) - break - elif isinstance(decorator, ast.Name) and decorator.id == "fixture": - fixtures.append(node) - break - return fixtures + collect_parse_errors(parse_errors) + assert_no_new_violations("raises_without_match", violations) def test_no_cross_file_fixture_duplicates() -> None: @@ -1372,16 +1319,16 @@ def test_no_cross_file_fixture_duplicates() -> None: Class-scoped fixtures are excluded as they are intentionally isolated. """ - tests_root = Path(__file__).parent.parent - # Collect module-level fixtures by name fixture_locations: dict[str, list[tuple[Path, int]]] = defaultdict(list) + parse_errors: list[str] = [] - for py_file in find_test_files(tests_root): - source = py_file.read_text(encoding="utf-8") - try: - tree = ast.parse(source) - except SyntaxError: + for py_file in find_test_files(): + tree, error = parse_file_safe(py_file) + if error: + parse_errors.append(error) + continue + if tree is None: continue for fixture in _get_module_level_fixtures(tree): @@ -1392,22 +1339,23 @@ def test_no_cross_file_fixture_duplicates() -> None: name: locs for name, locs in fixture_locations.items() if len(locs) > 1 } - smells: list[DetectedSmell] = [] + violations: list[Violation] = [] for name, locations in duplicates.items(): - files = ", ".join(str(f.relative_to(tests_root)) for f, _ in locations) - smells.append( - DetectedSmell( - smell_type="cross_file_fixture_duplicate", - test_name=name, - file_path=locations[0][0], - line_number=locations[0][1], - details=f"defined in {len(locations)} files: {files}", + first_path = relative_path(locations[0][0]) + violations.append( + Violation( + rule="cross_file_fixture_duplicate", + relative_path=first_path, + identifier=name, + detail=f"files={len(locations)}", ) ) - # These should be consolidated to conftest.py - assert not smells, ( - f"Found {len(smells)} fixtures duplicated across test files " + collect_parse_errors(parse_errors) + + # Cross-file duplicates should be consolidated - zero tolerance + assert not violations, ( + f"Found {len(violations)} fixtures duplicated across test files " "(consolidate to conftest.py):\n" - + "\n".join(f" {s.test_name}: {s.details}" for s in smells[:10]) + + "\n".join(str(v) for v in violations[:10]) ) diff --git a/uv.lock b/uv.lock index 1daa322..3832634 100644 --- a/uv.lock +++ b/uv.lock @@ -2249,6 +2249,7 @@ dependencies = [ { name = "pydantic" }, { name = "pydantic-settings" }, { name = "rich" }, + { name = "sounddevice" }, { name = "sqlalchemy", extra = ["asyncio"] }, { name = "structlog" }, { name = "types-psutil" }, @@ -2408,6 +2409,7 @@ requires-dist = [ { name = "pywinctl", marker = "extra == 'triggers'", specifier = ">=0.3" }, { name = "rich", specifier = ">=14.2.0" }, { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.3" }, + { name = "sounddevice", specifier = ">=0.5.3" }, { name = "sounddevice", marker = "extra == 'audio'", specifier = ">=0.4.6" }, { name = "sounddevice", marker = "extra == 'optional'", specifier = ">=0.4.6" }, { name = "spacy", marker = "extra == 'ner'", specifier = ">=3.8.11" },