Files
claude-scripts/src/quality/config/schemas.py
Travis Vasceannie aff4da0712 feat: enhance code quality checks with file path context and improved messaging
- Added file path parameter to internal duplicate checks and complexity issue analysis for better context in error messages.
- Updated the EnhancedMessageFormatter to provide tailored refactoring suggestions based on whether the file is a test file.
- Improved complexity issue handling to allow moderate cyclomatic complexity while blocking critical issues, enhancing overall code quality feedback.
- Refactored configuration schema to streamline validation settings.
2025-10-11 21:41:52 +00:00

303 lines
9.5 KiB
Python

"""Configuration schemas using Pydantic."""
from __future__ import annotations
from pathlib import Path
from typing import TYPE_CHECKING
import yaml
from pydantic import BaseModel, Field, field_validator
if TYPE_CHECKING:
from types import ModuleType
class SimilarityAlgorithmConfig(BaseModel):
"""Configuration for similarity algorithms."""
name: str
weight: float = Field(default=1.0, ge=0.0, le=1.0)
enabled: bool = True
parameters: dict[str, str | int | float | bool] = Field(default_factory=dict)
class ComplexityConfig(BaseModel):
"""Configuration for complexity analysis."""
include_cyclomatic: bool = True
include_cognitive: bool = True
include_halstead: bool = True
include_maintainability: bool = True
complexity_threshold: int = Field(default=10, ge=1)
class DetectionConfig(BaseModel):
"""Configuration for duplicate detection."""
min_lines: int = Field(default=5, ge=1)
min_tokens: int = Field(default=50, ge=1)
similarity_threshold: float = Field(default=0.8, ge=0.0, le=1.0)
# Similarity algorithms
similarity_algorithms: list[SimilarityAlgorithmConfig] = Field(
default_factory=lambda: [
SimilarityAlgorithmConfig(name="levenshtein", weight=0.2),
SimilarityAlgorithmConfig(name="jaccard", weight=0.3),
SimilarityAlgorithmConfig(name="cosine", weight=0.3),
SimilarityAlgorithmConfig(name="semantic", weight=0.2),
],
)
# Performance settings
use_lsh: bool = True
lsh_threshold: int = Field(
default=1000,
ge=100,
) # Use LSH for datasets larger than this
parallel_processing: bool = True
max_workers: int | None = None
class LanguageConfig(BaseModel):
"""Configuration for language support."""
languages: set[str] = Field(default_factory=lambda: {"python"})
file_extensions: dict[str, list[str]] = Field(
default_factory=lambda: {
"python": [".py", ".pyx", ".pyi"],
"javascript": [".js", ".jsx", ".es6", ".mjs"],
"typescript": [".ts", ".tsx"],
"java": [".java"],
"c": [".c", ".h"],
"cpp": [".cpp", ".cxx", ".cc", ".hpp", ".hxx"],
"csharp": [".cs"],
"go": [".go"],
"rust": [".rs"],
"php": [".php"],
"ruby": [".rb"],
},
)
class PathConfig(BaseModel):
"""Configuration for file paths."""
include_patterns: list[str] = Field(default_factory=lambda: ["**/*.py"])
exclude_patterns: list[str] = Field(
default_factory=lambda: [
"**/__pycache__/**",
"**/*.pyc",
"**/venv/**",
"**/.venv/**",
"**/node_modules/**",
"**/.git/**",
"**/build/**",
"**/dist/**",
],
)
max_files: int | None = None
follow_symlinks: bool = False
class RefactoringConfig(BaseModel):
"""Configuration for refactoring suggestions."""
enabled: bool = True
min_priority_score: float = Field(default=1.0, ge=0.0)
suggest_extract_method: bool = True
suggest_extract_class: bool = True
suggest_parameter_object: bool = True
suggest_template_method: bool = True
estimate_effort: bool = True
risk_threshold: float = Field(default=0.7, ge=0.0, le=1.0)
class ReportingConfig(BaseModel):
"""Configuration for reporting."""
formats: list[str] = Field(default_factory=lambda: ["console"])
output_dir: Path = Field(default=Path("./quality_reports"))
# Console reporting
show_code_preview: bool = True
show_complexity_metrics: bool = True
show_refactoring_suggestions: bool = True
# Dashboard settings
dashboard_enabled: bool = False
dashboard_port: int = Field(default=8080, ge=1024, le=65535)
dashboard_host: str = "localhost"
# Export formats
export_sarif: bool = False
export_json: bool = False
export_html: bool = False
export_csv: bool = False
class CacheConfig(BaseModel):
"""Configuration for caching."""
enabled: bool = True
cache_dir: Path = Field(default=Path(".quality_cache"))
max_age_days: int = Field(default=7, ge=1)
use_memory_cache: bool = True
class IntegrationConfig(BaseModel):
"""Configuration for external integrations."""
# Git integration
use_git: bool = True
analyze_git_history: bool = False
blame_duplicates: bool = False
# JSCPD integration for multi-language support
use_jscpd: bool = True
jscpd_path: str | None = None
jscpd_config: dict[str, str | int | float | bool] = Field(default_factory=dict)
class ExceptionRule(BaseModel):
"""Configuration for a single exception rule."""
analysis_type: str # "complexity", "duplicates", "modernization", "code_smells"
issue_type: str | None = None # Specific issue type (optional)
file_patterns: list[str] = Field(default_factory=list) # File path patterns
line_patterns: list[str] = Field(default_factory=list) # Line content patterns
reason: str | None = None # Optional reason for the exception
expires: str | None = None # Optional expiration date (YYYY-MM-DD)
enabled: bool = True
class ExceptionsConfig(BaseModel):
"""Configuration for analysis exceptions."""
enabled: bool = True
rules: list[ExceptionRule] = Field(default_factory=list)
# Global file/directory exceptions
exclude_files: list[str] = Field(default_factory=list)
exclude_directories: list[str] = Field(default_factory=list)
# Temporary suppressions (auto-expire)
temporary_suppressions: dict[str, str] = Field(
default_factory=dict,
) # rule_id -> expiry_date
class QualityConfig(BaseModel):
"""Main configuration for code quality analysis."""
# Core configuration sections
detection: DetectionConfig = Field(default_factory=DetectionConfig)
complexity: ComplexityConfig = Field(default_factory=ComplexityConfig)
languages: LanguageConfig = Field(default_factory=LanguageConfig)
paths: PathConfig = Field(default_factory=PathConfig)
refactoring: RefactoringConfig = Field(default_factory=RefactoringConfig)
reporting: ReportingConfig = Field(default_factory=ReportingConfig)
cache: CacheConfig = Field(default_factory=CacheConfig)
integrations: IntegrationConfig = Field(default_factory=IntegrationConfig)
exceptions: ExceptionsConfig = Field(default_factory=ExceptionsConfig)
# Global settings
version: str = "1.0.0"
debug: bool = False
verbose: bool = False
@field_validator("detection")
@classmethod
def validate_similarity_weights(cls, v: DetectionConfig) -> DetectionConfig:
"""Ensure similarity algorithm weights sum to approximately 1.0."""
total_weight = sum(alg.weight for alg in v.similarity_algorithms if alg.enabled)
if abs(total_weight - 1.0) > 0.1:
# Auto-normalize weights
for alg in v.similarity_algorithms:
if alg.enabled:
alg.weight /= total_weight
return v
model_config = {
"validate_assignment": True,
"extra": "forbid",
}
def load_config(config_path: Path | None = None) -> QualityConfig:
"""Load configuration from file or use defaults."""
if config_path is None:
# Look for config files in common locations
possible_paths = [
Path("quality.yaml"),
Path("quality.yml"),
Path(".quality.yaml"),
Path(".quality.yml"),
Path("pyproject.toml"), # Look for [tool.quality] section
]
for path in possible_paths:
if path.exists():
config_path = path
break
if config_path and config_path.exists():
return _load_from_file(config_path)
return QualityConfig()
def _load_from_file(config_path: Path) -> QualityConfig:
"""Load configuration from specific file."""
if config_path.suffix.lower() in {".yaml", ".yml"}:
return _load_from_yaml(config_path)
if config_path.name == "pyproject.toml":
return _load_from_pyproject(config_path)
msg = f"Unsupported config file format: {config_path}"
raise ValueError(msg)
def _load_from_yaml(config_path: Path) -> QualityConfig:
"""Load configuration from YAML file."""
with open(config_path, encoding="utf-8") as f:
data = yaml.safe_load(f)
return QualityConfig(**data) if data else QualityConfig()
def _load_from_pyproject(config_path: Path) -> QualityConfig:
"""Load configuration from pyproject.toml file."""
toml_loader: ModuleType
try:
import tomllib # Python 3.11+
toml_loader = tomllib
except ImportError:
try:
import tomli
toml_loader = tomli
except ImportError as e:
msg = (
"tomli package required to read pyproject.toml. "
"Install with: pip install tomli"
)
raise ImportError(
msg,
) from e
with open(config_path, "rb") as f:
data = toml_loader.load(f)
# Extract quality configuration
quality_config = data.get("tool", {}).get("quality", {})
return QualityConfig(**quality_config) if quality_config else QualityConfig()
def save_config(config: QualityConfig, output_path: Path) -> None:
"""Save configuration to YAML file."""
with open(output_path, "w", encoding="utf-8") as f:
yaml.dump(
config.model_dump(exclude_defaults=True),
f,
default_flow_style=False,
sort_keys=True,
)