feat: implement code quality analysis toolkit with modernization, complexity and duplication detection

2025-08-26 12:23:57 -04:00
parent 530c49accd
commit 0475c3cae6
34 changed files with 7273 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,161 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# IDE specific files
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+
+# OS specific files
+.DS_Store
+.DS_Store?
+._*
+.Spotlight-V100
+.Trashes
+ehthumbs.db
+Thumbs.db
+
+# Temporary files
+*.tmp
+*.temp
+*.bak
+*.backup
+
+# Log files
+*.log
+
+# Database files
+*.db
+*.sqlite
+*.sqlite3
+
+# Configuration files with secrets
+.env.local
+.env.production
+config.local.yaml
+secrets.yaml
+
+# UV specific
+.uv_cache/
+
+# Ruff cache
+.ruff_cache/
+
+# Test artifacts
+.coverage
+.pytest_cache/
+htmlcov/
+
+# Build artifacts
+dist/
+build/
+*.egg-info/
--- a/21
+++ b/21
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2024 Your Name
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -0,0 +1,61 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "claude-scripts"
+version = "0.1.0"
+description = "A comprehensive Python code quality analysis toolkit for detecting duplicates, complexity metrics, and modernization opportunities"
+authors = [{name = "Your Name", email = "your.email@example.com"}]
+readme = "README.md"
+license = {file = "LICENSE"}
+requires-python = ">=3.12"
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: MIT License",
+    "Operating System :: OS Independent",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+    "Topic :: Software Development :: Quality Assurance",
+    "Topic :: Software Development :: Libraries :: Python Modules",
+]
+keywords = ["code-quality", "static-analysis", "duplicate-detection", "complexity", "refactoring"]
+dependencies = [
+    "click>=8.0.0",
+    "pyyaml>=6.0",
+    "pydantic>=2.0.0",
+    "radon>=6.0.0",
+]
+
+[project.optional-dependencies]
+dev = [
+    "pytest>=7.0.0",
+    "pytest-cov>=4.0.0",
+    "ruff>=0.1.0",
+    "mypy>=1.5.0",
+    "pre-commit>=3.0.0",
+]
+
+[project.urls]
+Homepage = "https://github.com/yourusername/claude-scripts"
+Repository = "https://github.com/yourusername/claude-scripts"
+Issues = "https://github.com/yourusername/claude-scripts/issues"
+Documentation = "https://github.com/yourusername/claude-scripts#readme"
+
+[project.scripts]
+claude-quality = "quality.cli.main:cli"
+
+[tool.hatch.build.targets.sdist]
+exclude = [
+    "/.github",
+    "/docs",
+    "/.vscode",
+    "/.pytest_cache",
+    "/.mypy_cache",
+    "/.ruff_cache",
+]
+
+[tool.hatch.build.targets.wheel]
+packages = ["src/quality"]
--- a/src/quality/.quality-exceptions.yaml
+++ b/src/quality/.quality-exceptions.yaml
@@ -0,0 +1,131 @@
+# Quality Analysis Exceptions Configuration
+# This file allows you to suppress specific analysis results based on file patterns,
+# line patterns, issue types, and other criteria.
+
+exceptions:
+  enabled: true
+
+  # Global file and directory exclusions
+  # These patterns will suppress all issues for matching files/directories
+  exclude_files:
+    - "*/tests/*"
+    - "*/test_*"
+    - "*/__pycache__/*"
+    - "*/migrations/*"
+    - "*/conftest.py"
+    - "*/.pytest_cache/*"
+
+  exclude_directories:
+    - "*/venv/*"
+    - "*/.venv/*"
+    - "*/node_modules/*"
+    - "*/.git/*"
+    - "*/build/*"
+    - "*/dist/*"
+
+  # Specific exception rules
+  rules:
+    # Example: Suppress complexity issues in legacy code
+    - analysis_type: "complexity"
+      issue_type: "high_complexity"
+      file_patterns:
+        - "*/legacy/*"
+        - "*/third_party/*"
+        - "*/vendor/*"
+      reason: "Legacy code with known complexity - migration planned"
+
+    # Example: Allow intentional Pydantic v1 usage in compatibility layers
+    - analysis_type: "modernization"
+      issue_type: "pydantic_v1_pattern"
+      file_patterns:
+        - "*/compatibility/*"
+        - "*/adapters/*"
+      line_patterns:
+        - "# pydantic v1 required"
+        - "# TODO: migrate to v2"
+        - "# legacy compatibility"
+      reason: "Intentional Pydantic v1 usage for compatibility with legacy systems"
+
+    # Example: Suppress typing imports for external compatibility
+    - analysis_type: "modernization"
+      issue_type: "legacy_typing_import"
+      file_patterns:
+        - "*/external/*"
+        - "*/integrations/*"
+      reason: "External library compatibility requirements"
+
+    # Example: Allow duplicates in generated/template code
+    - analysis_type: "duplicates"
+      file_patterns:
+        - "*/templates/*"
+        - "*/generated/*"
+        - "*/auto_generated/*"
+        - "*/schemas/auto/*"
+      reason: "Generated or template code - duplication expected and acceptable"
+
+    # Example: Suppress modernization issues in scripts
+    - analysis_type: "modernization"
+      file_patterns:
+        - "*/scripts/*"
+        - "*/migrations/*"
+      reason: "Scripts and migrations prioritize backward compatibility"
+
+    # Example: Temporary suppression with expiration for gradual refactoring
+    - analysis_type: "complexity"
+      issue_type: "high_complexity"
+      file_patterns:
+        - "*/parsers/*"
+        - "*/processors/*"
+      reason: "Complex parsing logic - refactoring scheduled for Q2 2024"
+      expires: "2024-06-30"
+      enabled: true
+
+    # Example: Suppress specific modernization patterns in test files
+    - analysis_type: "modernization"
+      issue_type: "legacy_typing_import"
+      file_patterns:
+        - "**/test_*.py"
+        - "**/tests/*.py"
+      reason: "Tests may use legacy patterns for compatibility testing"
+
+    # Example: Allow specific duplicates in configuration files
+    - analysis_type: "duplicates"
+      file_patterns:
+        - "*/config/*"
+        - "*/settings/*"
+      line_patterns:
+        - "# duplicate config acceptable"
+        - "# shared configuration"
+      reason: "Configuration files may have intentional duplication"
+
+# Analysis Types Available:
+# - "complexity" - Code complexity issues (high cyclomatic/cognitive complexity)
+# - "duplicates" - Duplicate code detection
+# - "modernization" - Modern Python pattern suggestions
+# - "code_smells" - General code smell detection (if implemented)
+
+# Common Issue Types:
+# Complexity:
+#   - "high_complexity" - General high complexity
+#   - "cyclomatic_complexity" - High cyclomatic complexity
+#   - "cognitive_complexity" - High cognitive complexity
+#
+# Modernization:
+#   - "legacy_typing_import" - from typing import List, Dict, etc.
+#   - "pydantic_v1_pattern" - Pydantic v1 usage patterns
+#   - "old_string_formatting" - % string formatting
+#   - "format_to_fstring" - .format() that could be f-strings
+#   - "unnecessary_object_inheritance" - class Foo(object):
+#
+# Duplicates:
+#   - "duplicate_code" - General duplicate code blocks
+
+# Pattern Syntax:
+# - file_patterns: Unix shell-style wildcards (*, **, ?, [seq])
+# - line_patterns: Python regex patterns
+# - Use "*" for analysis_type to match all analysis types
+# - Leave issue_type empty to match all issues of that analysis type
+
+# Expiration Format:
+# - expires: "YYYY-MM-DD" format
+# - Rules with past expiration dates are automatically disabled
--- a/src/quality/init.py
+++ b/src/quality/init.py
@@ -0,0 +1,9 @@
+"""Enhanced code quality analysis package."""
+
+__version__ = "1.0.0"
+__author__ = "IntelliKit Team"
+__email__ = "team@intellikit.com"
+
+# Minimal imports to prevent pre-commit failures
+# Full imports can be added later when all modules are properly set up
+__all__ = []
--- a/src/quality/analyzers/init.py
+++ b/src/quality/analyzers/init.py
@@ -0,0 +1 @@
+"""Code analyzers for various quality checks."""
--- a/src/quality/analyzers/modernization.py
+++ b/src/quality/analyzers/modernization.py
@@ -0,0 +1,831 @@
+"""Modern Python patterns analyzer."""
+
+import ast
+import re
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+
+from ..config.schemas import QualityConfig
+from ..core.exceptions import ExceptionFilter
+
+
+@dataclass
+class ModernizationIssue:
+    """Represents a modernization issue in code."""
+
+    file_path: str
+    line_number: int
+    column: int
+    issue_type: str
+    old_pattern: str
+    suggested_fix: str
+    severity: str  # "error", "warning", "info"
+    description: str
+    can_auto_fix: bool = True
+
+
+class ModernizationAnalyzer(ast.NodeVisitor):
+    """Analyzes code for non-modern Python patterns."""
+
+    # Types that can be replaced with modern built-in equivalents
+    REPLACEABLE_TYPING_IMPORTS = {
+        "List",
+        "Dict",
+        "Tuple",
+        "Set",
+        "FrozenSet",
+        "Union",
+        "Optional",
+    }
+
+    # Types that moved to collections module but are replaceable
+    COLLECTIONS_TYPING_IMPORTS = {
+        "Deque",
+        "Counter",
+        "DefaultDict",
+        "ChainMap",
+        "OrderedDict",
+    }
+
+    # Types that moved to other modules
+    MOVED_TYPING_IMPORTS = {
+        "Callable": "collections.abc",
+        "Coroutine": "collections.abc",
+        "Awaitable": "collections.abc",
+        "AsyncIterable": "collections.abc",
+        "AsyncIterator": "collections.abc",
+        "Iterable": "collections.abc",
+        "Iterator": "collections.abc",
+        "Generator": "collections.abc",
+        "Hashable": "collections.abc",
+        "Reversible": "collections.abc",
+        "Container": "collections.abc",
+        "Collection": "collections.abc",
+        "Sequence": "collections.abc",
+        "MutableSequence": "collections.abc",
+        "Set": "collections.abc",
+        "MutableSet": "collections.abc",
+        "Mapping": "collections.abc",
+        "MutableMapping": "collections.abc",
+        "Sized": "collections.abc",
+        "Pattern": "re",
+        "Match": "re",
+    }
+
+    # Types that must remain in typing module (no modern replacement)
+    REQUIRED_TYPING_IMPORTS = {
+        "ClassVar",
+        "TypeVar",
+        "Generic",
+        "Protocol",
+        "Final",
+        "Literal",
+        "Type",
+        "TypedDict",
+        "NewType",
+        "NoReturn",
+        "Never",
+        "Self",
+        "Unpack",
+        "TypeAlias",
+        "TypeGuard",
+        "TypeIs",
+        "Annotated",
+        "Any",
+        "overload",
+        "runtime_checkable",
+        "TYPE_CHECKING",
+    }
+
+    # Combined set of all recognized typing imports
+    ALL_TYPING_IMPORTS = (
+        REPLACEABLE_TYPING_IMPORTS
+        | COLLECTIONS_TYPING_IMPORTS
+        | set(MOVED_TYPING_IMPORTS.keys())
+        | REQUIRED_TYPING_IMPORTS
+    )
+
+    # Mapping for truly replaceable types
+    REPLACEABLE_TO_MODERN = {
+        "List": "list",
+        "Dict": "dict",
+        "Tuple": "tuple",
+        "Set": "set",
+        "FrozenSet": "frozenset",
+        "Union": "|",
+        "Optional": "| None",
+    }
+
+    # Mapping for collections types
+    COLLECTIONS_TO_MODERN = {
+        "Deque": "collections.deque",
+        "Counter": "collections.Counter",
+        "DefaultDict": "collections.defaultdict",
+        "ChainMap": "collections.ChainMap",
+        "OrderedDict": "collections.OrderedDict",
+    }
+
+    def __init__(
+        self, file_path: str, content: str, config: QualityConfig | None = None
+    ):
+        self.file_path = file_path
+        self.content = content
+        self.content_lines = content.splitlines()
+        self.config = config or QualityConfig()
+        self.issues: list[ModernizationIssue] = []
+        self.imports: dict[str, str] = {}  # name -> module
+        self.typing_imports: set[str] = set()
+        self.has_future_annotations = False
+
+    def analyze(self) -> list[ModernizationIssue]:
+        """Run the modernization analysis."""
+        try:
+            tree = ast.parse(self.content)
+            self.visit(tree)
+
+            # Additional pattern-based checks
+            self._check_string_patterns()
+            self._check_exception_patterns()
+            self._check_super_patterns()
+
+        except SyntaxError:
+            pass  # Skip files with syntax errors
+
+        return self.issues
+
+    def visit_ImportFrom(self, node: ast.ImportFrom) -> None:
+        """Check for typing imports that can be modernized."""
+        if node.module == "typing":
+            for alias in node.names:
+                name = alias.asname or alias.name
+
+                if alias.name in self.ALL_TYPING_IMPORTS:
+                    self.typing_imports.add(name)
+
+                # Only flag imports that can be modernized
+                if alias.name in self.REPLACEABLE_TYPING_IMPORTS:
+                    self._add_replaceable_typing_import_issue(node, alias.name, name)
+                elif alias.name in self.COLLECTIONS_TYPING_IMPORTS:
+                    self._add_collections_typing_import_issue(node, alias.name, name)
+                elif alias.name in self.MOVED_TYPING_IMPORTS:
+                    self._add_moved_typing_import_issue(node, alias.name, name)
+                # Note: REQUIRED_TYPING_IMPORTS are not flagged as issues
+
+        elif node.module == "__future__" and any(
+            alias.name == "annotations" for alias in node.names
+        ):
+            self.has_future_annotations = True
+
+        # Track all imports for context
+        if node.module:
+            for alias in node.names:
+                name = alias.asname or alias.name
+                if name is not None and node.module is not None:
+                    self.imports[name] = node.module
+
+        self.generic_visit(node)
+
+    def visit_Import(self, node: ast.Import) -> None:
+        """Track regular imports."""
+        for alias in node.names:
+            name = alias.asname or alias.name
+            self.imports[name] = alias.name
+        self.generic_visit(node)
+
+    def visit_Subscript(self, node: ast.Subscript) -> None:
+        """Check for typing usage in type annotations that can be modernized."""
+        typing_name = None
+
+        if isinstance(node.value, ast.Name) and node.value.id in self.typing_imports:
+            typing_name = node.value.id
+        elif (
+            isinstance(node.value, ast.Attribute)
+            and isinstance(node.value.value, ast.Name)
+            and node.value.value.id == "typing"
+            and node.value.attr in self.ALL_TYPING_IMPORTS
+        ):
+            # Handle typing.List, typing.Dict etc.
+            typing_name = node.value.attr
+
+        if typing_name:
+            # Only flag usage of types that can be modernized
+            if typing_name in (
+                self.REPLACEABLE_TYPING_IMPORTS | self.COLLECTIONS_TYPING_IMPORTS
+            ):
+                self._add_typing_usage_issue(node, typing_name)
+            elif typing_name in self.MOVED_TYPING_IMPORTS:
+                self._add_moved_typing_usage_issue(node, typing_name)
+            # Note: REQUIRED_TYPING_IMPORTS usage is not flagged
+
+        self.generic_visit(node)
+
+    def visit_BinOp(self, node: ast.BinOp) -> None:
+        """Check for Union usage that could be modernized."""
+        if isinstance(node.op, ast.BitOr):
+            # This is already modern syntax (X | Y)
+            pass
+        self.generic_visit(node)
+
+    def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
+        """Check function definitions for modernization opportunities."""
+        # Check for missing return type annotations
+        if not node.returns and not self._is_dunder_method(node.name):
+            self._add_missing_return_type_issue(node)
+
+        # Check for untyped parameters
+        for arg in node.args.args:
+            if not arg.annotation and arg.arg != "self" and arg.arg != "cls":
+                self._add_missing_param_type_issue(node, arg)
+
+        self.generic_visit(node)
+
+    def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None:
+        """Check async function definitions."""
+        # Same checks as regular functions
+        if not node.returns and not self._is_dunder_method(node.name):
+            self._add_missing_return_type_issue(node)
+
+        for arg in node.args.args:
+            if not arg.annotation and arg.arg != "self" and arg.arg != "cls":
+                self._add_missing_param_type_issue(node, arg)
+
+        self.generic_visit(node)
+
+    def visit_ClassDef(self, node: ast.ClassDef) -> None:
+        """Check class definitions for modernization opportunities."""
+        # Check if class inherits from object (unnecessary in Python 3)
+        for base in node.bases:
+            if isinstance(base, ast.Name) and base.id == "object":
+                self._add_unnecessary_object_inheritance_issue(node, base)
+
+        self.generic_visit(node)
+
+    def _add_replaceable_typing_import_issue(
+        self, node: ast.ImportFrom, typing_name: str, import_name: str
+    ) -> None:
+        """Add issue for typing import that can be replaced with built-ins."""
+        modern_replacement = self.REPLACEABLE_TO_MODERN[typing_name]
+
+        if typing_name in ["List", "Dict", "Tuple", "Set", "FrozenSet"]:
+            description = f"Use built-in '{modern_replacement}' instead of 'typing.{typing_name}' (Python 3.9+)"
+            severity = "warning"
+        elif typing_name == "Union":
+            description = (
+                "Use '|' union operator instead of 'typing.Union' (Python 3.10+)"
+            )
+            severity = "warning"
+        elif typing_name == "Optional":
+            description = "Use '| None' instead of 'typing.Optional' (Python 3.10+)"
+            severity = "warning"
+        else:
+            description = (
+                f"Use '{modern_replacement}' instead of 'typing.{typing_name}'"
+            )
+            severity = "warning"
+
+        self.issues.append(
+            ModernizationIssue(
+                file_path=self.file_path,
+                line_number=node.lineno,
+                column=node.col_offset,
+                issue_type="replaceable_typing_import",
+                old_pattern=f"from typing import {typing_name}",
+                suggested_fix=f"# Remove this import and use {modern_replacement} directly",
+                severity=severity,
+                description=description,
+            )
+        )
+
+    def _add_collections_typing_import_issue(
+        self, node: ast.ImportFrom, typing_name: str, import_name: str
+    ) -> None:
+        """Add issue for typing import that moved to collections."""
+        self.issues.append(
+            ModernizationIssue(
+                file_path=self.file_path,
+                line_number=node.lineno,
+                column=node.col_offset,
+                issue_type="collections_typing_import",
+                old_pattern=f"from typing import {typing_name}",
+                suggested_fix=f"from collections import {typing_name.lower()}",
+                severity="info",
+                description=f"Use 'from collections import {typing_name.lower()}' instead of 'typing.{typing_name}'",
+            )
+        )
+
+    def _add_moved_typing_import_issue(
+        self, node: ast.ImportFrom, typing_name: str, import_name: str
+    ) -> None:
+        """Add issue for typing import that moved to another module."""
+        target_module = self.MOVED_TYPING_IMPORTS[typing_name]
+
+        self.issues.append(
+            ModernizationIssue(
+                file_path=self.file_path,
+                line_number=node.lineno,
+                column=node.col_offset,
+                issue_type="moved_typing_import",
+                old_pattern=f"from typing import {typing_name}",
+                suggested_fix=f"from {target_module} import {typing_name}",
+                severity="info",
+                description=f"'{typing_name}' moved from 'typing' to '{target_module}' module",
+            )
+        )
+
+    def _add_typing_usage_issue(self, node: ast.Subscript, typing_name: str) -> None:
+        """Add issue for typing usage that can be modernized."""
+        if typing_name in self.REPLACEABLE_TYPING_IMPORTS:
+            modern_replacement = self.REPLACEABLE_TO_MODERN[typing_name]
+            if typing_name in ["List", "Dict", "Tuple", "Set", "FrozenSet"]:
+                old_pattern = f"{typing_name}[...]"
+                new_pattern = f"{modern_replacement.lower()}[...]"
+                description = f"Use built-in '{modern_replacement}' instead of 'typing.{typing_name}'"
+                severity = "warning"
+            elif typing_name == "Union":
+                old_pattern = "Union[...]"
+                new_pattern = "... | ..."
+                description = "Use '|' union operator instead of 'typing.Union'"
+                severity = "warning"
+            elif typing_name == "Optional":
+                old_pattern = "Optional[...]"
+                new_pattern = "... | None"
+                description = "Use '| None' instead of 'typing.Optional'"
+                severity = "warning"
+            else:
+                return  # Skip unknown replaceable types
+        elif typing_name in self.COLLECTIONS_TYPING_IMPORTS:
+            modern_replacement = self.COLLECTIONS_TO_MODERN[typing_name]
+            old_pattern = f"{typing_name}[...]"
+            new_pattern = f"{modern_replacement}[...]"
+            description = (
+                f"Use '{modern_replacement}' instead of 'typing.{typing_name}'"
+            )
+            severity = "info"
+        else:
+            return  # Skip unknown types
+
+        self.issues.append(
+            ModernizationIssue(
+                file_path=self.file_path,
+                line_number=node.lineno,
+                column=node.col_offset,
+                issue_type="typing_usage",
+                old_pattern=old_pattern,
+                suggested_fix=new_pattern,
+                severity=severity,
+                description=description,
+            )
+        )
+
+    def _add_moved_typing_usage_issue(
+        self, node: ast.Subscript, typing_name: str
+    ) -> None:
+        """Add issue for typing usage that moved to another module."""
+        target_module = self.MOVED_TYPING_IMPORTS[typing_name]
+
+        self.issues.append(
+            ModernizationIssue(
+                file_path=self.file_path,
+                line_number=node.lineno,
+                column=node.col_offset,
+                issue_type="moved_typing_usage",
+                old_pattern=f"typing.{typing_name}[...]",
+                suggested_fix=f"{target_module}.{typing_name}[...]",
+                severity="info",
+                description=f"Use '{target_module}.{typing_name}' instead of 'typing.{typing_name}'",
+            )
+        )
+
+    def _add_missing_return_type_issue(
+        self, node: ast.FunctionDef | ast.AsyncFunctionDef
+    ) -> None:
+        """Add issue for missing return type annotation."""
+        self.issues.append(
+            ModernizationIssue(
+                file_path=self.file_path,
+                line_number=node.lineno,
+                column=node.col_offset,
+                issue_type="missing_return_type",
+                old_pattern=f"def {node.name}(...)",
+                suggested_fix=f"def {node.name}(...) -> ReturnType",
+                severity="info",
+                description="Consider adding return type annotation for better type safety",
+                can_auto_fix=False,
+            )
+        )
+
+    def _add_missing_param_type_issue(
+        self, node: ast.FunctionDef | ast.AsyncFunctionDef, arg: ast.arg
+    ) -> None:
+        """Add issue for missing parameter type annotation."""
+        self.issues.append(
+            ModernizationIssue(
+                file_path=self.file_path,
+                line_number=node.lineno,
+                column=node.col_offset,
+                issue_type="missing_param_type",
+                old_pattern=f"{arg.arg}",
+                suggested_fix=f"{arg.arg}: ParamType",
+                severity="info",
+                description=f"Consider adding type annotation for parameter '{arg.arg}'",
+                can_auto_fix=False,
+            )
+        )
+
+    def _add_unnecessary_object_inheritance_issue(
+        self, node: ast.ClassDef, base: ast.Name
+    ) -> None:
+        """Add issue for unnecessary object inheritance."""
+        self.issues.append(
+            ModernizationIssue(
+                file_path=self.file_path,
+                line_number=node.lineno,
+                column=node.col_offset,
+                issue_type="unnecessary_object_inheritance",
+                old_pattern=f"class {node.name}(object)",
+                suggested_fix=f"class {node.name}",
+                severity="info",
+                description="Inheriting from 'object' is unnecessary in Python 3",
+                can_auto_fix=True,
+            )
+        )
+
+    def _check_string_patterns(self) -> None:
+        """Check for old-style string formatting."""
+        for i, line in enumerate(self.content_lines, 1):
+            # Check for % formatting
+            if re.search(r'["\'].*%[sd].*["\'].*%', line):
+                self.issues.append(
+                    ModernizationIssue(
+                        file_path=self.file_path,
+                        line_number=i,
+                        column=0,
+                        issue_type="old_string_formatting",
+                        old_pattern="'...' % (...)",
+                        suggested_fix="f'...' or '...'.format(...)",
+                        severity="info",
+                        description="Consider using f-strings or .format() instead of % formatting",
+                    )
+                )
+
+            # Check for .format() that could be f-string
+            if re.search(r'["\'].*\{.*\}.*["\']\.format\(', line):
+                self.issues.append(
+                    ModernizationIssue(
+                        file_path=self.file_path,
+                        line_number=i,
+                        column=0,
+                        issue_type="format_to_fstring",
+                        old_pattern="'...{}'.format(...)",
+                        suggested_fix="f'...{...}'",
+                        severity="info",
+                        description="Consider using f-strings instead of .format() for better readability",
+                    )
+                )
+
+    def _check_exception_patterns(self) -> None:
+        """Check for old-style exception handling."""
+        for i, line in enumerate(self.content_lines, 1):
+            # Check for bare except
+            if re.search(r"except\s*:", line.strip()):
+                self.issues.append(
+                    ModernizationIssue(
+                        file_path=self.file_path,
+                        line_number=i,
+                        column=0,
+                        issue_type="bare_except",
+                        old_pattern="except:",
+                        suggested_fix="except Exception:",
+                        severity="warning",
+                        description="Use specific exception types instead of bare except",
+                    )
+                )
+
+    def _check_super_patterns(self) -> None:
+        """Check for old-style super() calls."""
+        for i, line in enumerate(self.content_lines, 1):
+            # Check for old-style super calls
+            if re.search(r"super\(\s*\w+\s*,\s*self\s*\)", line):
+                self.issues.append(
+                    ModernizationIssue(
+                        file_path=self.file_path,
+                        line_number=i,
+                        column=0,
+                        issue_type="old_super_call",
+                        old_pattern="super(ClassName, self)",
+                        suggested_fix="super()",
+                        severity="info",
+                        description="Use super() without arguments (Python 3+)",
+                    )
+                )
+
+    def _is_dunder_method(self, name: str) -> bool:
+        """Check if method name is a dunder method."""
+        return name.startswith("__") and name.endswith("__")
+
+
+class PydanticAnalyzer:
+    """Analyzes Pydantic usage patterns and migration opportunities."""
+
+    V1_PATTERNS = {
+        # Model configuration patterns
+        r"class\s+Config:": "Use model_config instead of Config class (Pydantic v2)",
+        # Field patterns
+        r"Field\([^)]*allow_mutation=": "allow_mutation is deprecated, use frozen instead",
+        r"Field\([^)]*regex=": "regex parameter is deprecated, use pattern instead",
+        r"Field\([^)]*min_length=": "Consider using StringConstraints for string validation",
+        r"Field\([^)]*max_length=": "Consider using StringConstraints for string validation",
+        # Validator patterns
+        r"@validator": "@validator is deprecated, use @field_validator instead",
+        r"@root_validator": "@root_validator is deprecated, use @model_validator instead",
+        r"pre=True": "pre parameter syntax changed in Pydantic v2",
+        # Model methods
+        r"\.dict\(\)": "Use .model_dump() instead of .dict() (Pydantic v2)",
+        r"\.json\(\)": "Use .model_dump_json() instead of .json() (Pydantic v2)",
+        r"\.parse_obj\(": "Use model_validate() instead of parse_obj() (Pydantic v2)",
+        r"\.parse_raw\(": "Use model_validate_json() instead of parse_raw() (Pydantic v2)",
+        r"\.schema\(\)": "Use model_json_schema() instead of schema() (Pydantic v2)",
+        r"\.copy\(\)": "Use model_copy() instead of copy() (Pydantic v2)",
+        # Import patterns
+        r"from pydantic import.*BaseSettings": "BaseSettings moved to pydantic-settings package",
+    }
+
+    # Pydantic v2 methods that should NEVER be flagged as issues when used with model classes
+    V2_METHODS = {
+        "model_validate",
+        "model_validate_json",
+        "model_dump",
+        "model_dump_json",
+        "model_copy",
+        "model_json_schema",
+        "model_rebuild",
+        "model_fields",
+        "model_fields_set",
+        "model_computed_fields",
+        "model_config",
+        "model_extra",
+    }
+
+    INTENTIONAL_V1_CONTEXTS = {
+        # These patterns suggest intentional v1 usage that might be needed
+        "pydantic.v1",  # Explicit v1 import
+        "pydantic_v1",  # Common alias for v1
+        "__pydantic_model__",  # v1 compatibility marker
+        "model_rebuild",  # Sometimes used in migration contexts
+        "# pydantic v1",  # Comment indicating intentional v1 usage
+        "# TODO: migrate",  # Comment indicating planned migration
+    }
+
+    def __init__(self, file_path: str, content: str):
+        self.file_path = file_path
+        self.content = content
+        self.content_lines = content.splitlines()
+        self.issues: list[ModernizationIssue] = []
+
+    def analyze(self) -> list[ModernizationIssue]:
+        """Analyze Pydantic usage patterns."""
+        has_pydantic_import = self._has_pydantic_import()
+        if not has_pydantic_import:
+            return []
+
+        # Check if this looks like intentional v1 usage
+        is_intentional_v1 = self._is_intentional_v1_usage()
+
+        for i, line in enumerate(self.content_lines, 1):
+            # Skip lines that contain valid Pydantic v2 patterns
+            if self._is_valid_v2_pattern(line):
+                continue
+
+            for pattern, description in self.V1_PATTERNS.items():
+                if re.search(pattern, line):
+                    severity = "info" if is_intentional_v1 else "warning"
+
+                    # Determine suggested fix based on pattern
+                    suggested_fix = self._get_suggested_fix(pattern, line)
+
+                    self.issues.append(
+                        ModernizationIssue(
+                            file_path=self.file_path,
+                            line_number=i,
+                            column=0,
+                            issue_type="pydantic_v1_pattern",
+                            old_pattern=pattern,
+                            suggested_fix=suggested_fix,
+                            severity=severity,
+                            description=description,
+                            can_auto_fix=pattern
+                            in [r"\.dict\(\)", r"\.json\(\)", r"\.copy\(\)"],
+                        )
+                    )
+
+        return self.issues
+
+    def _has_pydantic_import(self) -> bool:
+        """Check if file imports Pydantic."""
+        return any(
+            "pydantic" in line for line in self.content_lines[:20]
+        )  # Check first 20 lines
+
+    def _is_intentional_v1_usage(self) -> bool:
+        """Check if this appears to be intentional v1 usage."""
+        content_lower = self.content.lower()
+        return any(context in content_lower for context in self.INTENTIONAL_V1_CONTEXTS)
+
+    def _is_valid_v2_pattern(self, line: str) -> bool:
+        """Check if line contains valid Pydantic v2 patterns that should not be flagged."""
+        # Check if line contains any valid v2 methods
+        return any(f".{v2_method}(" in line for v2_method in self.V2_METHODS)
+
+    def _get_suggested_fix(self, pattern: str, line: str) -> str:
+        """Get suggested fix for a Pydantic pattern."""
+        fixes = {
+            r"\.dict\(\)": line.replace(".dict()", ".model_dump()"),
+            r"\.json\(\)": line.replace(".json()", ".model_dump_json()"),
+            r"\.copy\(\)": line.replace(".copy()", ".model_copy()"),
+            r"@validator": line.replace("@validator", "@field_validator"),
+            r"@root_validator": line.replace("@root_validator", "@model_validator"),
+        }
+
+        for fix_pattern, fix_line in fixes.items():
+            if re.search(fix_pattern, line):
+                return fix_line.strip()
+
+        return "See Pydantic v2 migration guide"
+
+
+class ModernizationEngine:
+    """Main engine for running modernization analysis."""
+
+    def __init__(self, config: QualityConfig | None = None):
+        self.config = config or QualityConfig()
+        # Import here to avoid circular imports
+
+        self.exception_filter = ExceptionFilter(self.config)
+
+    def analyze_file(self, file_path: Path) -> list[ModernizationIssue]:
+        """Analyze a single file for modernization opportunities."""
+        try:
+            with open(file_path, encoding="utf-8") as f:
+                content = f.read()
+        except (OSError, UnicodeDecodeError):
+            return []
+
+        issues = []
+
+        # Python modernization analysis
+        python_analyzer = ModernizationAnalyzer(str(file_path), content, self.config)
+        issues.extend(python_analyzer.analyze())
+
+        # Pydantic analysis
+        pydantic_analyzer = PydanticAnalyzer(str(file_path), content)
+        issues.extend(pydantic_analyzer.analyze())
+
+        return issues
+
+    def analyze_files(
+        self, file_paths: list[Path]
+    ) -> dict[Path, list[ModernizationIssue]]:
+        """Analyze multiple files for modernization opportunities."""
+        results = {}
+
+        for file_path in file_paths:
+            if file_path.suffix.lower() == ".py":
+                issues = self.analyze_file(file_path)
+
+                # Apply exception filtering
+                filtered_issues = self.exception_filter.filter_issues(
+                    "modernization",
+                    issues,
+                    get_file_path_fn=lambda issue: issue.file_path,
+                    get_line_number_fn=lambda issue: issue.line_number,
+                    get_issue_type_fn=lambda issue: issue.issue_type,
+                    get_line_content_fn=lambda issue: self._get_line_content(
+                        issue.file_path, issue.line_number
+                    ),
+                )
+
+                if filtered_issues:  # Only include files with remaining issues
+                    results[file_path] = filtered_issues
+
+        return results
+
+    def _get_line_content(self, file_path: str, line_number: int) -> str:
+        """Get the content of a specific line from a file."""
+        try:
+            with open(file_path, encoding="utf-8") as f:
+                lines = f.readlines()
+                if 1 <= line_number <= len(lines):
+                    return lines[line_number - 1].strip()
+        except (OSError, UnicodeDecodeError):
+            pass
+        return ""
+
+    def get_summary(
+        self, results: dict[Path, list[ModernizationIssue]]
+    ) -> dict[str, Any]:
+        """Generate summary of modernization analysis."""
+        all_issues = []
+        for issues in results.values():
+            if issues is not None:
+                all_issues.extend(issues)
+
+        # Group by issue type
+        by_type: dict[str, list[ModernizationIssue]] = {}
+        by_severity = {"error": 0, "warning": 0, "info": 0}
+
+        for issue in all_issues:
+            by_type.setdefault(issue.issue_type, []).append(issue)
+            by_severity[issue.severity] += 1
+
+        # Top files with most issues
+        file_counts = {}
+        for file_path, issues in results.items():
+            if issues:
+                file_counts[file_path] = len(issues)
+
+        top_files = sorted(file_counts.items(), key=lambda x: x[1], reverse=True)[:10]
+
+        # Auto-fixable issues
+        auto_fixable = sum(1 for issue in all_issues if issue.can_auto_fix)
+
+        return {
+            "total_files_analyzed": len(results),
+            "files_with_issues": len(
+                [
+                    f
+                    for f, issues in results.items()
+                    if issues is not None and len(issues) > 0
+                ]
+            ),
+            "total_issues": len(all_issues),
+            "by_severity": by_severity,
+            "by_type": {k: len(v) for k, v in by_type.items()},
+            "auto_fixable_count": auto_fixable,
+            "top_files_with_issues": [(str(f), count) for f, count in top_files],
+            "recommendations": self._generate_recommendations(by_type, by_severity),
+        }
+
+    def _generate_recommendations(
+        self, by_type: dict[str, list[ModernizationIssue]], by_severity: dict[str, int]
+    ) -> list[str]:
+        """Generate recommendations based on analysis results."""
+        recommendations = []
+
+        # Handle new typing import issue types
+        replaceable_count = len(by_type.get("replaceable_typing_import", []))
+        collections_count = len(by_type.get("collections_typing_import", []))
+        moved_count = len(by_type.get("moved_typing_import", []))
+
+        if replaceable_count > 0:
+            recommendations.append(
+                f"🔄 Update {replaceable_count} typing imports to use modern built-in types (Python 3.9+)"
+            )
+
+        if collections_count > 0:
+            recommendations.append(
+                f"📦 Update {collections_count} typing imports to use collections module"
+            )
+
+        if moved_count > 0:
+            recommendations.append(
+                f"🔀 Update {moved_count} typing imports that moved to other modules"
+            )
+
+        # Handle typing usage issues
+        usage_count = len(by_type.get("typing_usage", []))
+        moved_usage_count = len(by_type.get("moved_typing_usage", []))
+
+        if usage_count > 0:
+            recommendations.append(
+                f"⚡ Modernize {usage_count} type annotations to use built-ins or | union syntax"
+            )
+
+        if moved_usage_count > 0:
+            recommendations.append(
+                f"🔀 Update {moved_usage_count} type annotations that moved to other modules"
+            )
+
+        # Keep existing recommendations for other issue types
+        if "pydantic_v1_pattern" in by_type:
+            count = len(by_type["pydantic_v1_pattern"])
+            recommendations.append(f"📦 Migrate {count} Pydantic v1 patterns to v2 API")
+
+        if "old_string_formatting" in by_type:
+            count = len(by_type["old_string_formatting"])
+            recommendations.append(
+                f"✨ Replace {count} old string formatting patterns with f-strings"
+            )
+
+        if "bare_except" in by_type:
+            count = len(by_type["bare_except"])
+            recommendations.append(
+                f"⚠️ Fix {count} bare except clauses for better error handling"
+            )
+
+        if by_severity["warning"] > 10:
+            recommendations.append(
+                f"🚨 Address {by_severity['warning']} warning-level issues for better code quality"
+            )
+
+        return recommendations
--- a/src/quality/cli/init.py
+++ b/src/quality/cli/init.py
@@ -0,0 +1 @@
+"""CLI interface for the quality analysis package."""
--- a/src/quality/cli/main.py
+++ b/src/quality/cli/main.py
@@ -0,0 +1,691 @@
+#!/usr/bin/env python3
+"""Main CLI interface for code quality analysis."""
+
+import ast
+import csv
+import json
+import sys
+from pathlib import Path
+from typing import Any
+
+import click
+
+from ..analyzers.modernization import ModernizationEngine
+from ..complexity.analyzer import ComplexityAnalyzer
+from ..config.schemas import QualityConfig, _load_from_yaml, load_config
+from ..core.ast_analyzer import ASTAnalyzer
+from ..core.exceptions import create_exceptions_config_template
+from ..detection.engine import DuplicateDetectionEngine
+from ..utils.file_finder import FileFinder
+
+
+@click.group()
+@click.option(
+    "--config",
+    "-c",
+    type=click.Path(exists=True, path_type=Path),
+    help="Path to configuration file",
+)
+@click.option(
+    "--exceptions-file",
+    "-e",
+    type=click.Path(exists=True, path_type=Path),
+    help="Path to exceptions configuration file",
+)
+@click.option("--verbose", "-v", is_flag=True, help="Enable verbose output")
+@click.pass_context
+def cli(
+    ctx: click.Context, config: Path | None, exceptions_file: Path | None, verbose: bool
+) -> None:
+    """Code quality analysis toolkit."""
+    ctx.ensure_object(dict)
+
+    # Load configuration
+    quality_config = load_config(config)
+    quality_config.verbose = verbose
+
+    # Load exceptions configuration if provided
+    if exceptions_file:
+        exceptions_data = _load_from_yaml(exceptions_file)
+        if hasattr(exceptions_data, "exceptions"):
+            quality_config.exceptions = exceptions_data.exceptions
+
+    ctx.obj["config"] = quality_config
+    ctx.obj["verbose"] = verbose
+
+
+@cli.command()
+@click.argument(
+    "paths", nargs=-1, required=True, type=click.Path(exists=True, path_type=Path)
+)
+@click.option("--threshold", "-t", default=0.8, help="Similarity threshold (0.0-1.0)")
+@click.option("--min-lines", default=5, help="Minimum lines for duplicate detection")
+@click.option("--min-tokens", default=50, help="Minimum tokens for duplicate detection")
+@click.option("--output", "-o", type=click.File("w"), help="Output file for results")
+@click.option(
+    "--format",
+    "output_format",
+    default="json",
+    type=click.Choice(["json", "console", "csv"]),
+    help="Output format",
+)
+@click.pass_context
+def duplicates(
+    ctx: click.Context,
+    paths: tuple[Path],
+    threshold: float,
+    min_lines: int,
+    min_tokens: int,
+    output: Any,
+    output_format: str,
+) -> None:
+    """Detect duplicate code patterns."""
+    config: QualityConfig = ctx.obj["config"]
+    verbose: bool = ctx.obj["verbose"]
+
+    # Update config with CLI options
+    config.detection.similarity_threshold = threshold
+    config.detection.min_lines = min_lines
+    config.detection.min_tokens = min_tokens
+
+    if verbose:
+        click.echo(f"🔍 Analyzing paths: {', '.join(str(p) for p in paths)}")
+        click.echo(f"📊 Similarity threshold: {threshold}")
+        click.echo(f"📏 Min lines: {min_lines}, Min tokens: {min_tokens}")
+
+    # Find Python files
+    file_finder = FileFinder(config.paths, config.languages)
+    all_files = []
+    for path in paths:
+        if path.is_file():
+            all_files.append(path)
+        else:
+            files = file_finder.find_files(path)
+            all_files.extend(files)
+
+    if not all_files:
+        click.echo("❌ No Python files found in the specified paths.", err=True)
+        return
+
+    if verbose:
+        click.echo(f"📂 Found {len(all_files)} Python files")
+
+    # Run duplicate detection
+    engine = DuplicateDetectionEngine(config)
+    duplicates_found = engine.detect_duplicates_in_files(all_files)
+
+    if verbose:
+        click.echo(f"🔍 Found {len(duplicates_found)} duplicate groups")
+
+    # Generate output
+    results: dict[str, Any] = {
+        "summary": {
+            "total_files_analyzed": len(all_files),
+            "duplicate_groups_found": len(duplicates_found),
+            "total_duplicate_blocks": sum(
+                len(match.blocks) for match in duplicates_found
+            ),
+            "configuration": {
+                "similarity_threshold": threshold,
+                "min_lines": min_lines,
+                "min_tokens": min_tokens,
+            },
+        },
+        "duplicates": [],
+    }
+
+    for i, match in enumerate(duplicates_found, 1):
+        detailed_analysis = engine.get_detailed_analysis(match)
+        results["duplicates"].append({"group_id": i, "analysis": detailed_analysis})
+
+    # Output results
+    if output_format == "json":
+        if output:
+            json.dump(results, output, indent=2, default=str)
+        else:
+            click.echo(json.dumps(results, indent=2, default=str))
+    elif output_format == "console":
+        _print_console_duplicates(results, verbose)
+    elif output_format == "csv":
+        _print_csv_duplicates(results, output)
+
+
+@cli.command()
+@click.argument(
+    "paths", nargs=-1, required=True, type=click.Path(exists=True, path_type=Path)
+)
+@click.option("--threshold", default=10, help="Complexity threshold")
+@click.option("--output", "-o", type=click.File("w"), help="Output file for results")
+@click.option(
+    "--format",
+    "output_format",
+    default="json",
+    type=click.Choice(["json", "console"]),
+    help="Output format",
+)
+@click.pass_context
+def complexity(
+    ctx: click.Context,
+    paths: tuple[Path],
+    threshold: int,
+    output: Any,
+    output_format: str,
+) -> None:
+    """Analyze code complexity."""
+    config: QualityConfig = ctx.obj["config"]
+    verbose: bool = ctx.obj["verbose"]
+
+    config.complexity.complexity_threshold = threshold
+
+    if verbose:
+        click.echo(f"🔍 Analyzing complexity in: {', '.join(str(p) for p in paths)}")
+        click.echo(f"📊 Complexity threshold: {threshold}")
+
+    # Find Python files
+    file_finder = FileFinder(config.paths, config.languages)
+    all_files = []
+    for path in paths:
+        if path.is_file():
+            all_files.append(path)
+        else:
+            files = file_finder.find_files(path)
+            all_files.extend(files)
+
+    if not all_files:
+        click.echo("❌ No Python files found in the specified paths.", err=True)
+        return
+
+    if verbose:
+        click.echo(f"📂 Found {len(all_files)} Python files")
+
+    # Run complexity analysis
+    analyzer = ComplexityAnalyzer(config.complexity)
+    overview = analyzer.get_project_complexity_overview(all_files)
+
+    # Output results
+    if output_format == "json":
+        if output:
+            json.dump(overview, output, indent=2, default=str)
+        else:
+            click.echo(json.dumps(overview, indent=2, default=str))
+    elif output_format == "console":
+        _print_console_complexity(overview, verbose)
+
+
+@cli.command()
+@click.argument(
+    "paths", nargs=-1, required=True, type=click.Path(exists=True, path_type=Path)
+)
+@click.option(
+    "--include-type-hints", is_flag=True, help="Include missing type hint analysis"
+)
+@click.option("--pydantic-only", is_flag=True, help="Only analyze Pydantic patterns")
+@click.option("--output", "-o", type=click.File("w"), help="Output file for results")
+@click.option(
+    "--format",
+    "output_format",
+    default="json",
+    type=click.Choice(["json", "console"]),
+    help="Output format",
+)
+@click.pass_context
+def modernization(
+    ctx: click.Context,
+    paths: tuple[Path],
+    include_type_hints: bool,
+    pydantic_only: bool,
+    output: Any,
+    output_format: str,
+) -> None:
+    """Analyze code for modernization opportunities."""
+
+    config: QualityConfig = ctx.obj["config"]
+    verbose: bool = ctx.obj["verbose"]
+
+    if verbose:
+        click.echo(
+            f"🔍 Analyzing modernization opportunities in: {', '.join(str(p) for p in paths)}"
+        )
+        if include_type_hints:
+            click.echo("📝 Including type hint analysis")
+        if pydantic_only:
+            click.echo("📦 Pydantic-only analysis mode")
+
+    # Find Python files
+    file_finder = FileFinder(config.paths, config.languages)
+    all_files = []
+    for path in paths:
+        if path.is_file():
+            all_files.append(path)
+        else:
+            files = file_finder.find_files(path)
+            all_files.extend(files)
+
+    if not all_files:
+        click.echo("❌ No Python files found in the specified paths.", err=True)
+        return
+
+    if verbose:
+        click.echo(f"📂 Found {len(all_files)} Python files")
+
+    # Run modernization analysis
+    engine = ModernizationEngine(config)
+    results = engine.analyze_files(all_files)
+    summary = engine.get_summary(results)
+
+    # Filter results if needed
+    if pydantic_only:
+        filtered_results = {}
+        for file_path, issues in results.items():
+            pydantic_issues = [
+                issue for issue in issues if issue.issue_type == "pydantic_v1_pattern"
+            ]
+            if pydantic_issues:
+                filtered_results[file_path] = pydantic_issues
+        results = filtered_results
+
+        # Recalculate summary
+        summary = engine.get_summary(results)
+
+    # Output results
+    final_results = {
+        "summary": summary,
+        "files": {
+            str(file_path): [issue.__dict__ for issue in issues]
+            for file_path, issues in results.items()
+            if issues
+        },
+    }
+
+    if output_format == "json":
+        if output:
+            json.dump(final_results, output, indent=2, default=str)
+        else:
+            click.echo(json.dumps(final_results, indent=2, default=str))
+    elif output_format == "console":
+        _print_console_modernization(final_results, verbose, include_type_hints)
+
+
+@cli.command()
+@click.argument(
+    "paths", nargs=-1, required=True, type=click.Path(exists=True, path_type=Path)
+)
+@click.option("--output", "-o", type=click.File("w"), help="Output file for results")
+@click.option(
+    "--format",
+    "output_format",
+    default="json",
+    type=click.Choice(["json", "console"]),
+    help="Output format",
+)
+@click.pass_context
+def full_analysis(
+    ctx: click.Context, paths: tuple[Path], output: Any, output_format: str
+) -> None:
+    """Run comprehensive code quality analysis."""
+    config: QualityConfig = ctx.obj["config"]
+    verbose: bool = ctx.obj["verbose"]
+
+    if verbose:
+        click.echo(
+            f"🔍 Running full quality analysis on: {', '.join(str(p) for p in paths)}"
+        )
+
+    # Find Python files
+    file_finder = FileFinder(config.paths, config.languages)
+    all_files = []
+    for path in paths:
+        if path.is_file():
+            all_files.append(path)
+        else:
+            files = file_finder.find_files(path)
+            all_files.extend(files)
+
+    if not all_files:
+        click.echo("❌ No Python files found in the specified paths.", err=True)
+        return
+
+    if verbose:
+        click.echo(f"📂 Found {len(all_files)} Python files")
+
+    # Run all analyses
+    results: dict[str, Any] = {
+        "metadata": {
+            "total_files": len(all_files),
+            "analyzed_paths": [str(p) for p in paths],
+            "configuration": config.dict(),
+        }
+    }
+
+    # Complexity analysis
+    if verbose:
+        click.echo("📊 Running complexity analysis...")
+    complexity_analyzer = ComplexityAnalyzer(config.complexity)
+    results["complexity"] = complexity_analyzer.get_project_complexity_overview(
+        all_files
+    )
+
+    # Duplicate detection
+    if verbose:
+        click.echo("🔍 Running duplicate detection...")
+    duplicate_engine = DuplicateDetectionEngine(config)
+    duplicates_found = duplicate_engine.detect_duplicates_in_files(all_files)
+
+    results["duplicates"] = {
+        "summary": {
+            "duplicate_groups_found": len(duplicates_found),
+            "total_duplicate_blocks": sum(
+                len(match.blocks) for match in duplicates_found
+            ),
+        },
+        "details": [],
+    }
+
+    for i, match in enumerate(duplicates_found, 1):
+        detailed_analysis = duplicate_engine.get_detailed_analysis(match)
+        duplicate_details = results["duplicates"]["details"]
+        if isinstance(duplicate_details, list):
+            duplicate_details.append({"group_id": i, "analysis": detailed_analysis})
+
+    # Code smells detection
+    if verbose:
+        click.echo("👃 Detecting code smells...")
+    all_smells = []
+    for file_path in all_files:
+        try:
+            with open(file_path, encoding="utf-8") as f:
+                content = f.read()
+            ast_analyzer = ASTAnalyzer(str(file_path), content)
+            # Parse the AST and analyze
+            tree = ast.parse(content)
+            ast_analyzer.visit(tree)
+            smells = ast_analyzer.detect_code_smells()
+            if smells:
+                all_smells.extend(
+                    [{"file": str(file_path), "smell": smell} for smell in smells]
+                )
+        except Exception:
+            continue
+
+    results["code_smells"] = {"total_smells": len(all_smells), "details": all_smells}
+
+    # Generate overall quality score
+    results["quality_score"] = _calculate_overall_quality_score(results)
+
+    # Output results
+    if output_format == "json":
+        if output:
+            json.dump(results, output, indent=2, default=str)
+        else:
+            click.echo(json.dumps(results, indent=2, default=str))
+    elif output_format == "console":
+        _print_console_full_analysis(results, verbose)
+
+
+def _print_console_duplicates(results: dict[str, Any], verbose: bool) -> None:
+    """Print duplicate results in console format."""
+    summary = results["summary"]
+
+    click.echo("\n🔍 DUPLICATE CODE ANALYSIS")
+    click.echo("=" * 50)
+    click.echo(f"📂 Files analyzed: {summary['total_files_analyzed']}")
+    click.echo(f"🔄 Duplicate groups: {summary['duplicate_groups_found']}")
+    click.echo(f"📊 Total duplicate blocks: {summary['total_duplicate_blocks']}")
+
+    if not results["duplicates"]:
+        click.echo("\n✅ No significant duplicate code patterns found!")
+        return
+
+    click.echo(f"\n🚨 Found {len(results['duplicates'])} duplicate groups:")
+
+    for dup in results["duplicates"]:
+        analysis = dup["analysis"]
+        match_info = analysis["match_info"]
+
+        click.echo(f"\n📋 Group #{dup['group_id']}")
+        click.echo(f"   Similarity: {match_info['similarity_score']:.2%}")
+        click.echo(f"   Priority: {match_info['priority_score']:.2f}")
+        click.echo(f"   Type: {match_info['match_type']}")
+
+        click.echo("   📁 Affected files:")
+        for block in analysis["blocks"]:
+            click.echo(f"     • {block['file_path']} (lines {block['line_range']})")
+
+        if verbose and analysis["refactoring_suggestions"]:
+            click.echo("   💡 Refactoring suggestions:")
+            for suggestion in analysis["refactoring_suggestions"]:
+                click.echo(f"     • {suggestion}")
+
+
+def _print_csv_duplicates(results: dict[str, Any], output: Any) -> None:
+    """Print duplicate results in CSV format."""
+
+    if not output:
+        output = sys.stdout
+
+    writer = csv.writer(output)
+    writer.writerow(
+        [
+            "Group ID",
+            "Similarity Score",
+            "Priority Score",
+            "Match Type",
+            "File Path",
+            "Line Range",
+            "Lines of Code",
+            "Estimated Effort",
+            "Risk Level",
+        ]
+    )
+
+    for dup in results["duplicates"]:
+        analysis = dup["analysis"]
+        match_info = analysis["match_info"]
+
+        for block in analysis["blocks"]:
+            writer.writerow(
+                [
+                    dup["group_id"],
+                    f"{match_info['similarity_score']:.2%}",
+                    f"{match_info['priority_score']:.2f}",
+                    match_info["match_type"],
+                    block["file_path"],
+                    block["line_range"],
+                    block["lines_of_code"],
+                    analysis.get("estimated_effort", "Unknown"),
+                    analysis.get("risk_assessment", "Unknown"),
+                ]
+            )
+
+
+def _print_console_complexity(results: dict[str, Any], verbose: bool) -> None:
+    """Print complexity results in console format."""
+    click.echo("\n📊 COMPLEXITY ANALYSIS")
+    click.echo("=" * 50)
+
+    summary = results["summary"]
+    click.echo(f"📂 Total files: {results['total_files']}")
+    click.echo(f"📏 Total lines: {results['total_lines_of_code']}")
+    click.echo(f"⚙️ Total functions: {results['total_functions']}")
+    click.echo(f"🏗️ Total classes: {results['total_classes']}")
+
+    click.echo("\n📈 Average metrics:")
+    click.echo(f"   Complexity score: {summary['average_complexity_score']}")
+    click.echo(f"   Cyclomatic complexity: {summary['average_cyclomatic_complexity']}")
+    click.echo(f"   Maintainability index: {summary['average_maintainability_index']}")
+
+    click.echo("\n📊 Complexity distribution:")
+    for level, count in results["distribution"].items():
+        click.echo(f"   {level}: {count} files")
+
+    if results["high_complexity_files"]:
+        click.echo(
+            f"\n🚨 High complexity files (top {len(results['high_complexity_files'])}):"
+        )
+        for file_info in results["high_complexity_files"]:
+            click.echo(
+                f"   • {file_info['file']} (score: {file_info['score']:.1f}, level: {file_info['level']})"
+            )
+
+    if results["recommendations"]:
+        click.echo("\n💡 Recommendations:")
+        for rec in results["recommendations"]:
+            click.echo(f"   {rec}")
+
+
+def _print_console_modernization(
+    results: dict[str, Any], verbose: bool, include_type_hints: bool
+) -> None:
+    """Print modernization results in console format."""
+    summary = results["summary"]
+
+    click.echo("\n🔄 MODERNIZATION ANALYSIS")
+    click.echo("=" * 50)
+    click.echo(f"📂 Files analyzed: {summary['total_files_analyzed']}")
+    click.echo(f"⚠️ Files with issues: {summary['files_with_issues']}")
+    click.echo(f"🔧 Total issues: {summary['total_issues']}")
+    click.echo(f"✅ Auto-fixable: {summary['auto_fixable_count']}")
+
+    click.echo("\n📊 Issues by severity:")
+    for severity, count in summary["by_severity"].items():
+        if count > 0:
+            icon = (
+                "🚨" if severity == "error" else "⚠️" if severity == "warning" else "ℹ️"
+            )
+            click.echo(f"   {icon} {severity.title()}: {count}")
+
+    click.echo("\n📋 Issues by type:")
+    for issue_type, count in summary["by_type"].items():
+        click.echo(f"   • {issue_type.replace('_', ' ').title()}: {count}")
+
+    if summary["top_files_with_issues"]:
+        click.echo("\n🗂️ Files with most issues:")
+        for file_path, count in summary["top_files_with_issues"][:5]:
+            click.echo(f"   • {file_path}: {count} issues")
+
+    if summary["recommendations"]:
+        click.echo("\n💡 Recommendations:")
+        for rec in summary["recommendations"]:
+            click.echo(f"   {rec}")
+
+    if verbose and results["files"]:
+        click.echo("\n📝 Detailed issues:")
+        for file_path, issues in list(results["files"].items())[:5]:  # Show top 5 files
+            click.echo(f"\n   📁 {file_path}:")
+            for issue in issues[:3]:  # Show first 3 issues per file
+                severity_icon = (
+                    "🚨"
+                    if issue["severity"] == "error"
+                    else "⚠️"
+                    if issue["severity"] == "warning"
+                    else "ℹ️"
+                )
+                click.echo(
+                    f"      {severity_icon} Line {issue['line_number']}: {issue['description']}"
+                )
+                if issue["can_auto_fix"]:
+                    click.echo(f"         🔧 Suggested fix: {issue['suggested_fix']}")
+            if len(issues) > 3:
+                click.echo(f"         ... and {len(issues) - 3} more issues")
+
+
+def _print_console_full_analysis(results: dict[str, Any], verbose: bool) -> None:
+    """Print full analysis results in console format."""
+    click.echo("\n🎯 COMPREHENSIVE CODE QUALITY ANALYSIS")
+    click.echo("=" * 60)
+
+    metadata = results["metadata"]
+    click.echo(f"📂 Total files analyzed: {metadata['total_files']}")
+    click.echo(f"📍 Paths: {', '.join(metadata['analyzed_paths'])}")
+    click.echo(f"🎯 Overall quality score: {results['quality_score']:.1f}/100")
+
+    # Complexity summary
+    complexity = results["complexity"]
+    click.echo("\n📊 COMPLEXITY METRICS")
+    click.echo(f"   Average score: {complexity['summary']['average_complexity_score']}")
+    click.echo(f"   High complexity files: {len(complexity['high_complexity_files'])}")
+
+    # Duplicates summary
+    duplicates = results["duplicates"]
+    click.echo("\n🔄 DUPLICATE DETECTION")
+    click.echo(
+        f"   Duplicate groups: {duplicates['summary']['duplicate_groups_found']}"
+    )
+    click.echo(
+        f"   Total duplicate blocks: {duplicates['summary']['total_duplicate_blocks']}"
+    )
+
+    # Code smells summary
+    smells = results["code_smells"]
+    click.echo("\n👃 CODE SMELLS")
+    click.echo(f"   Total issues: {smells['total_smells']}")
+
+    if verbose and smells["details"]:
+        click.echo("   Details:")
+        for smell in smells["details"][:10]:  # Show first 10
+            click.echo(f"     • {smell['file']}: {smell['smell']}")
+        if len(smells["details"]) > 10:
+            click.echo(f"     ... and {len(smells['details']) - 10} more")
+
+
+def _calculate_overall_quality_score(results: dict[str, Any]) -> float:
+    """Calculate an overall quality score based on all metrics."""
+    score = 100.0
+
+    # Complexity penalty (max -30 points)
+    complexity = results["complexity"]
+    avg_complexity = complexity["summary"]["average_complexity_score"]
+    if avg_complexity > 50:
+        score -= min(30, (avg_complexity - 50) * 0.6)
+
+    # Duplicate penalty (max -30 points)
+    duplicates = results["duplicates"]
+    if duplicates["summary"]["duplicate_groups_found"] > 0:
+        penalty = min(30, duplicates["summary"]["duplicate_groups_found"] * 3)
+        score -= penalty
+
+    # Code smells penalty (max -20 points)
+    smells = results["code_smells"]
+    if smells["total_smells"] > 0:
+        penalty = min(20, smells["total_smells"] * 2)
+        score -= penalty
+
+    # Maintainability bonus/penalty (max ±20 points)
+    avg_maintainability = complexity["summary"]["average_maintainability_index"]
+    if avg_maintainability > 70:
+        score += min(20.0, (avg_maintainability - 70) * 0.5)
+    elif avg_maintainability < 30:
+        score -= min(20.0, (30 - avg_maintainability) * 0.5)
+
+    return max(0.0, score)
+
+
+@cli.command()
+@click.option(
+    "--output-path",
+    "-o",
+    default=".quality-exceptions.yaml",
+    type=click.Path(path_type=Path),
+    help="Output path for exceptions configuration file",
+)
+def create_exceptions_template(output_path: Path) -> None:
+    """Create a template exceptions configuration file."""
+
+    template_content = create_exceptions_config_template()
+
+    if output_path.exists() and not click.confirm(
+        f"File {output_path} already exists. Overwrite?"
+    ):
+        click.echo("Aborted.")
+        return
+
+    with open(output_path, "w", encoding="utf-8") as f:
+        f.write(template_content)
+
+    click.echo(f"✅ Created exceptions configuration template at: {output_path}")
+    click.echo("📝 Edit this file to configure exception rules for your project")
+    click.echo(f"🔧 Use with: --exceptions-file {output_path}")
+
+
+if __name__ == "__main__":
+    cli()
--- a/src/quality/complexity/init.py
+++ b/src/quality/complexity/init.py
@@ -0,0 +1,13 @@
+"""Code complexity analysis module."""
+
+from .analyzer import ComplexityAnalyzer
+from .calculator import ComplexityCalculator
+from .metrics import ComplexityMetrics
+from .radon_integration import RadonComplexityAnalyzer
+
+__all__ = [
+    "ComplexityAnalyzer",
+    "ComplexityCalculator",
+    "ComplexityMetrics",
+    "RadonComplexityAnalyzer",
+]
--- a/src/quality/complexity/analyzer.py
+++ b/src/quality/complexity/analyzer.py
@@ -0,0 +1,311 @@
+"""High-level complexity analysis interface."""
+
+from pathlib import Path
+from typing import Any
+
+from .metrics import ComplexityMetrics
+from .radon_integration import RadonComplexityAnalyzer
+from ..config.schemas import ComplexityConfig
+
+# TYPE_CHECKING import to avoid circular imports
+try:
+    from ..core.exceptions import ExceptionFilter
+except ImportError:
+    ExceptionFilter = None
+
+
+class ComplexityAnalyzer:
+    """High-level interface for code complexity analysis."""
+
+    def __init__(self, config: ComplexityConfig | None = None, full_config: Any = None):
+        self.config = config or ComplexityConfig()
+        self.radon_analyzer = RadonComplexityAnalyzer(fallback_to_manual=True)
+
+        # Initialize exception filter if full config provided
+        if full_config:
+            from ..core.exceptions import ExceptionFilter
+
+            self.exception_filter: ExceptionFilter | None = ExceptionFilter(full_config)
+        else:
+            self.exception_filter: ExceptionFilter | None = None
+
+    def analyze_code(self, code: str, filename: str = "<string>") -> ComplexityMetrics:
+        """Analyze complexity of code string."""
+        metrics = self.radon_analyzer.analyze_code(code, filename)
+        return self._filter_metrics_by_config(metrics)
+
+    def analyze_file(self, file_path: Path) -> ComplexityMetrics:
+        """Analyze complexity of a file."""
+        metrics = self.radon_analyzer.analyze_file(file_path)
+        return self._filter_metrics_by_config(metrics)
+
+    def batch_analyze_files(
+        self, file_paths: list[Path], max_workers: int | None = None
+    ) -> dict[Path, ComplexityMetrics]:
+        """Analyze multiple files in parallel."""
+        raw_results = self.radon_analyzer.batch_analyze_files(file_paths, max_workers)
+
+        # Filter metrics based on configuration
+        filtered_results = {}
+        for path, metrics in raw_results.items():
+            filtered_results[path] = self._filter_metrics_by_config(metrics)
+
+        return filtered_results
+
+    def get_complexity_summary(self, metrics: ComplexityMetrics) -> dict[str, Any]:
+        """Get a human-readable summary of complexity metrics."""
+        return {
+            "overall_score": metrics.get_overall_score(),
+            "complexity_level": metrics.get_complexity_level(),
+            "priority_score": metrics.get_priority_score(),
+            "recommendations": metrics.get_recommendations(),
+            "key_metrics": {
+                "cyclomatic_complexity": metrics.cyclomatic_complexity,
+                "cognitive_complexity": metrics.cognitive_complexity,
+                "maintainability_index": metrics.maintainability_index,
+                "max_nesting_depth": metrics.max_nesting_depth,
+                "lines_of_code": metrics.lines_of_code,
+                "function_count": metrics.function_count,
+                "class_count": metrics.class_count,
+            },
+            "flags": self._get_complexity_flags(metrics),
+        }
+
+    def get_detailed_report(
+        self, code: str, filename: str = "<string>"
+    ) -> dict[str, Any]:
+        """Get detailed complexity report including function-level analysis."""
+        report = self.radon_analyzer.get_detailed_complexity_report(code, filename)
+
+        # Add summary information
+        if "file_metrics" in report:
+            metrics = ComplexityMetrics.from_dict(report["file_metrics"])
+            report["summary"] = self.get_complexity_summary(metrics)
+
+        # Filter functions and classes that exceed thresholds
+        if "functions" in report:
+            report["high_complexity_functions"] = [
+                func
+                for func in report["functions"]
+                if func["complexity"] >= self.config.complexity_threshold
+            ]
+
+        return report
+
+    def find_complex_code(
+        self, file_paths: list[Path], max_workers: int | None = None
+    ) -> list[dict[str, Any]]:
+        """Find code blocks that exceed complexity thresholds."""
+        results = self.batch_analyze_files(file_paths, max_workers)
+        complex_files = []
+
+        for path, metrics in results.items():
+            if self._is_complex(metrics):
+                # Check if this complexity issue should be suppressed
+                if self.exception_filter:
+                    should_suppress, reason = (
+                        self.exception_filter.should_suppress_issue(
+                            "complexity", "high_complexity", str(path), 1, ""
+                        )
+                    )
+                    if should_suppress:
+                        continue
+
+                summary = self.get_complexity_summary(metrics)
+                complex_files.append(
+                    {
+                        "file_path": str(path),
+                        "metrics": metrics.to_dict(),
+                        "summary": summary,
+                        "priority": summary["priority_score"],
+                    }
+                )
+
+        # Sort by priority (highest first)
+        complex_files.sort(key=lambda x: x["priority"], reverse=True)
+        return complex_files
+
+    def get_project_complexity_overview(
+        self, file_paths: list[Path], max_workers: int | None = None
+    ) -> dict[str, Any]:
+        """Get overall project complexity statistics."""
+        results = self.batch_analyze_files(file_paths, max_workers)
+
+        if not results:
+            return {
+                "total_files": 0,
+                "summary": {},
+                "distribution": {},
+                "recommendations": [],
+            }
+
+        # Aggregate statistics
+        total_files = len(results)
+        total_lines = sum(m.lines_of_code for m in results.values())
+        total_functions = sum(m.function_count for m in results.values())
+        total_classes = sum(m.class_count for m in results.values())
+
+        # Complexity distribution
+        complexity_levels = {
+            "Low": 0,
+            "Moderate": 0,
+            "High": 0,
+            "Very High": 0,
+            "Extreme": 0,
+        }
+        high_complexity_files = []
+
+        for path, metrics in results.items():
+            level = metrics.get_complexity_level()
+            complexity_levels[level] += 1
+
+            if metrics.get_overall_score() >= 50:  # High complexity threshold
+                high_complexity_files.append(
+                    {
+                        "file": str(path),
+                        "score": metrics.get_overall_score(),
+                        "level": level,
+                    }
+                )
+
+        # Sort high complexity files by score
+        high_complexity_files.sort(key=lambda x: x["score"], reverse=True)
+
+        # Project-level recommendations
+        recommendations = []
+        if complexity_levels["Extreme"] > 0:
+            recommendations.append(
+                f"🚨 {complexity_levels['Extreme']} files with extreme complexity need immediate attention"
+            )
+        if complexity_levels["Very High"] > 0:
+            recommendations.append(
+                f"⚠️ {complexity_levels['Very High']} files with very high complexity should be refactored"
+            )
+        if total_files > 0:
+            avg_complexity = (
+                sum(m.get_overall_score() for m in results.values()) / total_files
+            )
+            if avg_complexity > 40:
+                recommendations.append(
+                    "📈 Overall project complexity is high - consider architectural improvements"
+                )
+
+        return {
+            "total_files": total_files,
+            "total_lines_of_code": total_lines,
+            "total_functions": total_functions,
+            "total_classes": total_classes,
+            "summary": {
+                "average_complexity_score": round(
+                    sum(m.get_overall_score() for m in results.values()) / total_files,
+                    2,
+                )
+                if total_files > 0
+                else 0,
+                "average_cyclomatic_complexity": round(
+                    sum(m.cyclomatic_complexity for m in results.values())
+                    / total_files,
+                    2,
+                )
+                if total_files > 0
+                else 0,
+                "average_maintainability_index": round(
+                    sum(m.maintainability_index for m in results.values())
+                    / total_files,
+                    2,
+                )
+                if total_files > 0
+                else 0,
+            },
+            "distribution": complexity_levels,
+            "high_complexity_files": high_complexity_files[:10],  # Top 10
+            "recommendations": recommendations,
+            "config": {
+                "complexity_threshold": self.config.complexity_threshold,
+                "radon_available": self.radon_analyzer.is_available(),
+                "metrics_included": {
+                    "cyclomatic_complexity": self.config.include_cyclomatic,
+                    "cognitive_complexity": self.config.include_cognitive,
+                    "halstead_metrics": self.config.include_halstead,
+                    "maintainability_index": self.config.include_maintainability,
+                },
+            },
+        }
+
+    def _filter_metrics_by_config(
+        self, metrics: ComplexityMetrics
+    ) -> ComplexityMetrics:
+        """Filter metrics based on configuration settings."""
+        filtered = ComplexityMetrics()
+
+        # Always include basic metrics
+        filtered.lines_of_code = metrics.lines_of_code
+        filtered.source_lines_of_code = metrics.source_lines_of_code
+        filtered.logical_lines_of_code = metrics.logical_lines_of_code
+        filtered.comment_lines = metrics.comment_lines
+        filtered.blank_lines = metrics.blank_lines
+        filtered.function_count = metrics.function_count
+        filtered.class_count = metrics.class_count
+        filtered.method_count = metrics.method_count
+
+        # Include metrics based on configuration
+        if self.config.include_cyclomatic:
+            filtered.cyclomatic_complexity = metrics.cyclomatic_complexity
+
+        if self.config.include_cognitive:
+            filtered.cognitive_complexity = metrics.cognitive_complexity
+            filtered.max_nesting_depth = metrics.max_nesting_depth
+            filtered.average_nesting_depth = metrics.average_nesting_depth
+
+        if self.config.include_halstead:
+            filtered.halstead_difficulty = metrics.halstead_difficulty
+            filtered.halstead_effort = metrics.halstead_effort
+            filtered.halstead_volume = metrics.halstead_volume
+            filtered.halstead_time = metrics.halstead_time
+            filtered.halstead_bugs = metrics.halstead_bugs
+
+        if self.config.include_maintainability:
+            filtered.maintainability_index = metrics.maintainability_index
+
+        # Additional metrics
+        filtered.parameters_count = metrics.parameters_count
+        filtered.variables_count = metrics.variables_count
+        filtered.returns_count = metrics.returns_count
+
+        return filtered
+
+    def _is_complex(self, metrics: ComplexityMetrics) -> bool:
+        """Check if code is considered complex based on thresholds."""
+        return (
+            metrics.cyclomatic_complexity >= self.config.complexity_threshold
+            or metrics.cognitive_complexity >= self.config.complexity_threshold * 1.5
+            or metrics.max_nesting_depth > 4
+            or metrics.maintainability_index < 20
+        )
+
+    def _get_complexity_flags(self, metrics: ComplexityMetrics) -> list[str]:
+        """Get list of complexity warning flags."""
+        flags = []
+
+        if metrics.cyclomatic_complexity > self.config.complexity_threshold:
+            flags.append("HIGH_CYCLOMATIC_COMPLEXITY")
+
+        if metrics.cognitive_complexity > self.config.complexity_threshold * 1.5:
+            flags.append("HIGH_COGNITIVE_COMPLEXITY")
+
+        if metrics.max_nesting_depth > 4:
+            flags.append("DEEP_NESTING")
+
+        if metrics.maintainability_index < 20:
+            flags.append("LOW_MAINTAINABILITY")
+
+        if metrics.halstead_difficulty > 20:
+            flags.append("HIGH_HALSTEAD_DIFFICULTY")
+
+        if metrics.function_count == 0 and metrics.lines_of_code > 50:
+            flags.append("LARGE_MONOLITHIC_CODE")
+
+        if metrics.parameters_count > 5:
+            flags.append("TOO_MANY_PARAMETERS")
+
+        return flags
--- a/src/quality/complexity/calculator.py
+++ b/src/quality/complexity/calculator.py
@@ -0,0 +1,358 @@
+"""Manual complexity calculation algorithms."""
+
+import ast
+import re
+from collections import Counter
+
+from .metrics import ComplexityMetrics
+
+
+class ComplexityCalculator:
+    """Manual complexity calculator using AST analysis."""
+
+    def calculate_complexity(self, code: str) -> ComplexityMetrics:
+        """Calculate all complexity metrics for given code."""
+        try:
+            tree = ast.parse(code)
+            return self._analyze_ast(tree, code)
+        except SyntaxError:
+            # Return basic metrics for malformed code
+            return self._analyze_text_metrics(code)
+
+    def _analyze_ast(self, tree: ast.AST, code: str) -> ComplexityMetrics:
+        """Analyze AST to extract complexity metrics."""
+        metrics = ComplexityMetrics()
+
+        # Basic line counts
+        lines = code.split("\n")
+        metrics.lines_of_code = len(lines)
+        metrics.blank_lines = len([line for line in lines if not line.strip()])
+        metrics.comment_lines = len(
+            [line for line in lines if line.strip().startswith("#")]
+        )
+        metrics.source_lines_of_code = (
+            metrics.lines_of_code - metrics.blank_lines - metrics.comment_lines
+        )
+
+        # AST-based metrics
+        for node in ast.walk(tree):
+            if isinstance(node, ast.FunctionDef):
+                metrics.function_count += 1
+                # Count parameters
+                metrics.parameters_count += len(node.args.args)
+                # Count returns
+                metrics.returns_count += len(
+                    [n for n in ast.walk(node) if isinstance(n, ast.Return)]
+                )
+            elif isinstance(node, ast.ClassDef):
+                metrics.class_count += 1
+            elif isinstance(node, ast.AsyncFunctionDef):
+                metrics.function_count += 1
+                metrics.parameters_count += len(node.args.args)
+                metrics.returns_count += len(
+                    [n for n in ast.walk(node) if isinstance(n, ast.Return)]
+                )
+
+        # Calculate cyclomatic complexity
+        metrics.cyclomatic_complexity = self._calculate_cyclomatic_complexity(tree)
+
+        # Calculate cognitive complexity
+        metrics.cognitive_complexity = self._calculate_cognitive_complexity(tree)
+
+        # Calculate nesting metrics
+        metrics.max_nesting_depth, metrics.average_nesting_depth = (
+            self._calculate_nesting_metrics(tree)
+        )
+
+        # Calculate Halstead metrics
+        halstead = self._calculate_halstead_metrics(tree)
+        metrics.halstead_difficulty = halstead.get("difficulty", 0.0)
+        metrics.halstead_volume = halstead.get("volume", 0.0)
+        metrics.halstead_effort = halstead.get("effort", 0.0)
+        metrics.halstead_time = halstead.get("time", 0.0)
+        metrics.halstead_bugs = halstead.get("bugs", 0.0)
+
+        # Calculate maintainability index
+        metrics.maintainability_index = self._calculate_maintainability_index(metrics)
+
+        # Logical lines of code (non-empty, non-comment)
+        metrics.logical_lines_of_code = self._count_logical_lines(tree)
+
+        # Count variables
+        metrics.variables_count = self._count_variables(tree)
+
+        # Count methods in classes
+        metrics.method_count = self._count_methods(tree)
+
+        return metrics
+
+    def _analyze_text_metrics(self, code: str) -> ComplexityMetrics:
+        """Fallback text-based analysis for malformed code."""
+        metrics = ComplexityMetrics()
+
+        lines = code.split("\n")
+        metrics.lines_of_code = len(lines)
+        metrics.blank_lines = len([line for line in lines if not line.strip()])
+        metrics.comment_lines = len(
+            [line for line in lines if line.strip().startswith("#")]
+        )
+        metrics.source_lines_of_code = (
+            metrics.lines_of_code - metrics.blank_lines - metrics.comment_lines
+        )
+
+        # Basic pattern matching
+        metrics.function_count = len(re.findall(r"^\s*def\s+\w+", code, re.MULTILINE))
+        metrics.class_count = len(re.findall(r"^\s*class\s+\w+", code, re.MULTILINE))
+
+        return metrics
+
+    def _calculate_cyclomatic_complexity(self, tree: ast.AST) -> int:
+        """Calculate McCabe cyclomatic complexity."""
+        complexity = 1  # Base complexity
+
+        for node in ast.walk(tree):
+            if isinstance(
+                node,
+                (
+                    ast.If,
+                    ast.While,
+                    ast.For,
+                    ast.AsyncFor,
+                    ast.ExceptHandler,
+                    ast.With,
+                    ast.Assert,
+                ),
+            ):
+                complexity += 1
+            elif isinstance(node, ast.BoolOp):
+                # Add complexity for boolean operations (and, or)
+                complexity += len(node.values) - 1
+            elif isinstance(node, ast.Expr) and isinstance(node.value, ast.IfExp):
+                # Ternary operator
+                complexity += 1
+
+        return complexity
+
+    def _calculate_cognitive_complexity(self, tree: ast.AST) -> int:
+        """Calculate cognitive complexity (similar to SonarQube)."""
+        complexity = 0
+
+        def visit_node(node: ast.AST, depth: int = 0) -> int:
+            nonlocal complexity
+            local_complexity = 0
+
+            if isinstance(
+                node,
+                (
+                    ast.If,
+                    ast.While,
+                    ast.For,
+                    ast.AsyncFor,
+                    ast.ExceptHandler,
+                    ast.With,
+                ),
+            ):
+                local_complexity += 1 + depth
+            elif isinstance(node, ast.BoolOp):
+                # Logical operators add complexity
+                local_complexity += len(node.values) - 1
+            elif (
+                isinstance(node, ast.Lambda)
+                or isinstance(node, ast.Expr)
+                and isinstance(node.value, ast.IfExp)
+            ):
+                local_complexity += 1
+
+            complexity += local_complexity
+
+            # Increase nesting for control structures
+            new_depth = (
+                depth + 1
+                if isinstance(
+                    node,
+                    (
+                        ast.If,
+                        ast.While,
+                        ast.For,
+                        ast.AsyncFor,
+                        ast.ExceptHandler,
+                        ast.With,
+                    ),
+                )
+                else depth
+            )
+
+            # Recursively visit children
+            for child in ast.iter_child_nodes(node):
+                visit_node(child, new_depth)
+
+            return complexity
+
+        visit_node(tree)
+        return complexity
+
+    def _calculate_nesting_metrics(self, tree: ast.AST) -> tuple[int, float]:
+        """Calculate nesting depth metrics."""
+        depths = []
+
+        def visit_node(node: ast.AST, depth: int = 0) -> None:
+            current_depth = depth
+
+            if isinstance(
+                node, (ast.If, ast.While, ast.For, ast.AsyncFor, ast.With, ast.Try)
+            ):
+                current_depth += 1
+                depths.append(current_depth)
+
+            for child in ast.iter_child_nodes(node):
+                visit_node(child, current_depth)
+
+        visit_node(tree)
+
+        max_depth = max(depths) if depths else 0
+        avg_depth = sum(depths) / len(depths) if depths else 0.0
+
+        return max_depth, round(avg_depth, 2)
+
+    def _calculate_halstead_metrics(self, tree: ast.AST) -> dict[str, float]:
+        """Calculate Halstead complexity metrics."""
+        operators = Counter()
+        operands = Counter()
+
+        for node in ast.walk(tree):
+            # Operators
+            if isinstance(node, (ast.BinOp, ast.UnaryOp)):
+                operators[type(node.op).__name__] += 1
+            elif isinstance(node, ast.Compare):
+                for op in node.ops:
+                    operators[type(op).__name__] += 1
+            elif isinstance(node, ast.BoolOp):
+                operators[type(node.op).__name__] += 1
+            elif isinstance(node, (ast.If, ast.While, ast.For, ast.AsyncFor)):
+                operators["control"] += 1
+            elif isinstance(node, ast.Call):
+                operators["call"] += 1
+            elif isinstance(node, (ast.Assign, ast.AugAssign)):
+                operators["assign"] += 1
+
+            # Operands
+            if isinstance(node, ast.Name):
+                operands[node.id] += 1
+            elif isinstance(node, ast.Constant):
+                operands[str(node.value)] += 1
+            elif isinstance(node, ast.Attribute):
+                operands[node.attr] += 1
+
+        # Halstead metrics
+        n1 = len(operators)  # Number of unique operators
+        n2 = len(operands)  # Number of unique operands
+        N1 = sum(operators.values())  # Total operators
+        N2 = sum(operands.values())  # Total operands
+
+        vocabulary = n1 + n2
+        length = N1 + N2
+
+        if n2 == 0:
+            return {
+                "difficulty": 0.0,
+                "volume": 0.0,
+                "effort": 0.0,
+                "time": 0.0,
+                "bugs": 0.0,
+            }
+
+        # Prevent division by zero and invalid log
+        if vocabulary <= 1:
+            volume = 0.0
+        else:
+            import math
+
+            volume = length * math.log2(vocabulary)
+
+        difficulty = (n1 / 2) * (N2 / n2) if n2 > 0 else 0.0
+        effort = difficulty * volume
+        time = effort / 18  # Seconds
+        bugs = volume / 3000  # Delivered bugs estimation
+
+        return {
+            "difficulty": round(difficulty, 2),
+            "volume": round(volume, 2),
+            "effort": round(effort, 2),
+            "time": round(time, 2),
+            "bugs": round(bugs, 4),
+        }
+
+    def _calculate_maintainability_index(self, metrics: ComplexityMetrics) -> float:
+        """Calculate maintainability index."""
+        import math
+
+        # Original Microsoft formula adapted
+        # MI = 171 - 5.2 * ln(HV) - 0.23 * CC - 16.2 * ln(LOC)
+        # Where HV = Halstead Volume, CC = Cyclomatic Complexity, LOC = Lines of Code
+
+        if metrics.halstead_volume <= 0 or metrics.source_lines_of_code <= 0:
+            return 100.0  # Default high maintainability for simple code
+
+        try:
+            mi: float = (
+                171
+                - 5.2 * math.log(metrics.halstead_volume)
+                - 0.23 * metrics.cyclomatic_complexity
+                - 16.2 * math.log(metrics.source_lines_of_code)
+            )
+
+            # Normalize to 0-100 scale
+            mi = max(0.0, min(100.0, mi))
+            return round(mi, 2)
+        except (ValueError, ZeroDivisionError):
+            return 50.0  # Default moderate maintainability
+
+    def _count_logical_lines(self, tree: ast.AST) -> int:
+        """Count logical lines of code (AST nodes that represent statements)."""
+        count = 0
+
+        for node in ast.walk(tree):
+            if isinstance(
+                node,
+                ast.Assign
+                | ast.AugAssign
+                | ast.Return
+                | ast.Yield
+                | ast.YieldFrom
+                | ast.Expr
+                | ast.Import
+                | ast.ImportFrom
+                | ast.Pass
+                | ast.Break
+                | ast.Continue
+                | ast.Global
+                | ast.Nonlocal
+                | ast.Assert,
+            ):
+                count += 1
+
+        return count
+
+    def _count_variables(self, tree: ast.AST) -> int:
+        """Count unique variable names."""
+        variables = set()
+
+        for node in ast.walk(tree):
+            if isinstance(node, ast.Name) and isinstance(
+                node.ctx, (ast.Store, ast.Del)
+            ):
+                variables.add(node.id)
+
+        return len(variables)
+
+    def _count_methods(self, tree: ast.AST) -> int:
+        """Count methods inside classes."""
+        method_count = 0
+
+        for node in ast.walk(tree):
+            if isinstance(node, ast.ClassDef):
+                for child in node.body:
+                    if isinstance(child, (ast.FunctionDef, ast.AsyncFunctionDef)):
+                        method_count += 1
+
+        return method_count
--- a/src/quality/complexity/metrics.py
+++ b/src/quality/complexity/metrics.py
@@ -0,0 +1,186 @@
+"""Complexity metrics data structures and calculations."""
+
+from dataclasses import dataclass
+from typing import Any
+
+
+@dataclass
+class ComplexityMetrics:
+    """Container for various complexity metrics."""
+
+    # Cyclomatic complexity
+    cyclomatic_complexity: int = 0
+
+    # Cognitive complexity
+    cognitive_complexity: int = 0
+
+    # Halstead metrics
+    halstead_difficulty: float = 0.0
+    halstead_effort: float = 0.0
+    halstead_volume: float = 0.0
+    halstead_time: float = 0.0
+    halstead_bugs: float = 0.0
+
+    # Maintainability index
+    maintainability_index: float = 0.0
+
+    # Raw metrics
+    lines_of_code: int = 0
+    source_lines_of_code: int = 0
+    logical_lines_of_code: int = 0
+    comment_lines: int = 0
+    blank_lines: int = 0
+
+    # Function/class counts
+    function_count: int = 0
+    class_count: int = 0
+    method_count: int = 0
+
+    # Nesting and depth metrics
+    max_nesting_depth: int = 0
+    average_nesting_depth: float = 0.0
+
+    # Additional metrics
+    parameters_count: int = 0
+    variables_count: int = 0
+    returns_count: int = 0
+
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to dictionary representation."""
+        return {
+            "cyclomatic_complexity": self.cyclomatic_complexity,
+            "cognitive_complexity": self.cognitive_complexity,
+            "halstead_difficulty": self.halstead_difficulty,
+            "halstead_effort": self.halstead_effort,
+            "halstead_volume": self.halstead_volume,
+            "halstead_time": self.halstead_time,
+            "halstead_bugs": self.halstead_bugs,
+            "maintainability_index": self.maintainability_index,
+            "lines_of_code": self.lines_of_code,
+            "source_lines_of_code": self.source_lines_of_code,
+            "logical_lines_of_code": self.logical_lines_of_code,
+            "comment_lines": self.comment_lines,
+            "blank_lines": self.blank_lines,
+            "function_count": self.function_count,
+            "class_count": self.class_count,
+            "method_count": self.method_count,
+            "max_nesting_depth": self.max_nesting_depth,
+            "average_nesting_depth": self.average_nesting_depth,
+            "parameters_count": self.parameters_count,
+            "variables_count": self.variables_count,
+            "returns_count": self.returns_count,
+        }
+
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> "ComplexityMetrics":
+        """Create from dictionary representation."""
+        return cls(**data)
+
+    def get_overall_score(self) -> float:
+        """Calculate overall complexity score (0-100, lower is better)."""
+        # Weighted combination of different metrics
+        # Higher weights for more important complexity indicators
+
+        score = 0.0
+
+        # Cyclomatic complexity (weight: 30%)
+        cyclomatic_score = min(self.cyclomatic_complexity * 2, 100)
+        score += cyclomatic_score * 0.3
+
+        # Cognitive complexity (weight: 30%)
+        cognitive_score = min(self.cognitive_complexity * 2, 100)
+        score += cognitive_score * 0.3
+
+        # Maintainability index (weight: 20%, inverted since higher is better)
+        maintainability_score = max(100 - self.maintainability_index, 0)
+        score += maintainability_score * 0.2
+
+        # Nesting depth (weight: 10%)
+        nesting_score = min(self.max_nesting_depth * 10, 100)
+        score += nesting_score * 0.1
+
+        # Halstead difficulty (weight: 10%)
+        halstead_score = min(self.halstead_difficulty * 3, 100)
+        score += halstead_score * 0.1
+
+        return round(score, 2)
+
+    def get_complexity_level(self) -> str:
+        """Get human-readable complexity level."""
+        score = self.get_overall_score()
+
+        if score < 20:
+            return "Low"
+        elif score < 40:
+            return "Moderate"
+        elif score < 60:
+            return "High"
+        elif score < 80:
+            return "Very High"
+        else:
+            return "Extreme"
+
+    def get_priority_score(self) -> float:
+        """Get priority score for refactoring (0-1, higher means higher priority)."""
+        overall_score = self.get_overall_score()
+
+        # Convert to 0-1 scale
+        priority = overall_score / 100.0
+
+        # Boost priority for extreme cases
+        if self.cyclomatic_complexity > 20:
+            priority = min(priority + 0.2, 1.0)
+        if self.cognitive_complexity > 25:
+            priority = min(priority + 0.2, 1.0)
+        if self.max_nesting_depth > 5:
+            priority = min(priority + 0.1, 1.0)
+
+        return round(priority, 3)
+
+    def get_recommendations(self) -> list[str]:
+        """Get complexity reduction recommendations."""
+        recommendations = []
+
+        if self.cyclomatic_complexity > 10:
+            recommendations.append(
+                f"High cyclomatic complexity ({self.cyclomatic_complexity}). "
+                "Consider breaking down complex conditional logic."
+            )
+
+        if self.cognitive_complexity > 15:
+            recommendations.append(
+                f"High cognitive complexity ({self.cognitive_complexity}). "
+                "Consider extracting nested logic into separate methods."
+            )
+
+        if self.max_nesting_depth > 4:
+            recommendations.append(
+                f"Deep nesting detected ({self.max_nesting_depth} levels). "
+                "Consider using guard clauses or early returns."
+            )
+
+        if self.maintainability_index < 20:
+            recommendations.append(
+                f"Low maintainability index ({self.maintainability_index:.1f}). "
+                "Consider refactoring for better readability and simplicity."
+            )
+
+        if self.halstead_difficulty > 20:
+            recommendations.append(
+                f"High Halstead difficulty ({self.halstead_difficulty:.1f}). "
+                "Code may be hard to understand and maintain."
+            )
+
+        if self.function_count == 0 and self.lines_of_code > 50:
+            recommendations.append(
+                "Large code block without functions. "
+                "Consider extracting reusable functions."
+            )
+
+        if self.parameters_count > 5:
+            recommendations.append(
+                f"Many parameters ({self.parameters_count}). "
+                "Consider using parameter objects or configuration classes."
+            )
+
+        return recommendations
--- a/src/quality/complexity/radon_integration.py
+++ b/src/quality/complexity/radon_integration.py
@@ -0,0 +1,348 @@
+"""Radon integration for professional complexity analysis."""
+
+import ast
+from pathlib import Path
+from typing import Any
+
+try:
+    from radon.complexity import cc_rank, cc_visit
+    from radon.metrics import h_visit, mi_visit
+    from radon.raw import analyze
+
+    RADON_AVAILABLE = True
+except ImportError:
+    RADON_AVAILABLE = False
+
+from .calculator import ComplexityCalculator
+from .metrics import ComplexityMetrics
+
+
+class RadonComplexityAnalyzer:
+    """Professional complexity analyzer using Radon library."""
+
+    def __init__(self, fallback_to_manual: bool = True):
+        self.fallback_to_manual = fallback_to_manual
+        self.manual_calculator = ComplexityCalculator()
+
+    def analyze_code(self, code: str, filename: str = "<string>") -> ComplexityMetrics:
+        """Analyze code complexity using Radon or fallback to manual calculation."""
+        if RADON_AVAILABLE:
+            return self._analyze_with_radon(code, filename)
+        elif self.fallback_to_manual:
+            return self.manual_calculator.calculate_complexity(code)
+        else:
+            raise ImportError("Radon is not available and fallback is disabled")
+
+    def analyze_file(self, file_path: Path) -> ComplexityMetrics:
+        """Analyze complexity of a file."""
+        try:
+            with open(file_path, encoding="utf-8") as f:
+                code = f.read()
+            return self.analyze_code(code, str(file_path))
+        except Exception:
+            # Return empty metrics for unreadable files
+            return ComplexityMetrics()
+
+    def _analyze_with_radon(self, code: str, filename: str) -> ComplexityMetrics:
+        """Analyze code using Radon library."""
+        metrics = ComplexityMetrics()
+
+        try:
+            # Raw metrics (lines of code, etc.)
+            raw_metrics = analyze(code)
+            if raw_metrics:
+                metrics.lines_of_code = raw_metrics.loc
+                metrics.logical_lines_of_code = raw_metrics.lloc
+                metrics.source_lines_of_code = raw_metrics.sloc
+                metrics.comment_lines = raw_metrics.comments
+                metrics.blank_lines = raw_metrics.blank
+
+            # Cyclomatic complexity
+            cc_results = cc_visit(code)
+            if cc_results:
+                # Sum up complexity from all functions/methods
+                total_complexity = sum(block.complexity for block in cc_results)
+                metrics.cyclomatic_complexity = total_complexity
+
+                # Count functions and classes
+                metrics.function_count = len(
+                    [b for b in cc_results if b.is_method or b.type == "function"]
+                )
+                metrics.class_count = len([b for b in cc_results if b.type == "class"])
+                metrics.method_count = len([b for b in cc_results if b.is_method])
+
+            # Halstead metrics
+            try:
+                halstead_data = h_visit(code)
+                if halstead_data:
+                    metrics.halstead_difficulty = halstead_data.difficulty
+                    metrics.halstead_effort = halstead_data.effort
+                    metrics.halstead_volume = halstead_data.volume
+                    metrics.halstead_time = halstead_data.time
+                    metrics.halstead_bugs = halstead_data.bugs
+            except Exception:
+                # Halstead calculation can fail for some code patterns
+                pass
+
+            # Maintainability Index
+            try:
+                mi_data = mi_visit(code, multi=True)
+                if mi_data and hasattr(mi_data, "mi"):
+                    metrics.maintainability_index = mi_data.mi
+            except Exception:
+                # MI calculation can fail, calculate manually
+                metrics.maintainability_index = self._calculate_mi_fallback(metrics)
+
+            # Calculate additional metrics manually
+            metrics = self._enhance_with_manual_metrics(code, metrics)
+
+        except Exception:
+            # If Radon fails completely, fallback to manual calculation
+            if self.fallback_to_manual:
+                return self.manual_calculator.calculate_complexity(code)
+            else:
+                raise
+
+        return metrics
+
+    def _enhance_with_manual_metrics(
+        self, code: str, metrics: ComplexityMetrics
+    ) -> ComplexityMetrics:
+        """Add metrics not provided by Radon using manual calculation."""
+        import ast
+
+        try:
+            tree = ast.parse(code)
+
+            # Calculate cognitive complexity manually
+            metrics.cognitive_complexity = self._calculate_cognitive_complexity(tree)
+
+            # Calculate nesting metrics
+            max_depth, avg_depth = self._calculate_nesting_metrics(tree)
+            metrics.max_nesting_depth = max_depth
+            metrics.average_nesting_depth = avg_depth
+
+            # Count variables, parameters, returns
+            for node in ast.walk(tree):
+                if isinstance(node, ast.FunctionDef | ast.AsyncFunctionDef):
+                    metrics.parameters_count += len(node.args.args)
+                    metrics.returns_count += len(
+                        [n for n in ast.walk(node) if isinstance(n, ast.Return)]
+                    )
+
+            # Count variables
+            variables = set()
+            for node in ast.walk(tree):
+                if isinstance(node, ast.Name) and isinstance(
+                    node.ctx, ast.Store | ast.Del
+                ):
+                    variables.add(node.id)
+            metrics.variables_count = len(variables)
+
+        except SyntaxError:
+            # If AST parsing fails, keep existing metrics
+            pass
+
+        return metrics
+
+    def _calculate_cognitive_complexity(self, tree: ast.AST) -> int:
+        """Calculate cognitive complexity manually."""
+        complexity = 0
+
+        def visit_node(node: ast.AST, depth: int = 0) -> None:
+            nonlocal complexity
+            local_complexity = 0
+
+            if isinstance(
+                node,
+                ast.If
+                | ast.While
+                | ast.For
+                | ast.AsyncFor
+                | ast.ExceptHandler
+                | ast.With,
+            ):
+                local_complexity += 1 + depth
+            elif isinstance(node, ast.BoolOp):
+                local_complexity += len(node.values) - 1
+            elif (
+                isinstance(node, ast.Lambda)
+                or isinstance(node, ast.Expr)
+                and isinstance(node.value, ast.IfExp)
+            ):
+                local_complexity += 1
+
+            complexity += local_complexity
+
+            # Increase nesting for control structures
+            new_depth = (
+                depth + 1
+                if isinstance(
+                    node,
+                    ast.If
+                    | ast.While
+                    | ast.For
+                    | ast.AsyncFor
+                    | ast.ExceptHandler
+                    | ast.With,
+                )
+                else depth
+            )
+
+            for child in ast.iter_child_nodes(node):
+                visit_node(child, new_depth)
+
+        visit_node(tree)
+        return complexity
+
+    def _calculate_nesting_metrics(self, tree: ast.AST) -> tuple[int, float]:
+        """Calculate nesting depth metrics."""
+        depths = []
+
+        def visit_node(node: ast.AST, depth: int = 0) -> None:
+            current_depth = depth
+
+            if isinstance(
+                node, ast.If | ast.While | ast.For | ast.AsyncFor | ast.With | ast.Try
+            ):
+                current_depth += 1
+                depths.append(current_depth)
+
+            for child in ast.iter_child_nodes(node):
+                visit_node(child, current_depth)
+
+        visit_node(tree)
+
+        max_depth = max(depths) if depths else 0
+        avg_depth = sum(depths) / len(depths) if depths else 0.0
+
+        return max_depth, round(avg_depth, 2)
+
+    def _calculate_mi_fallback(self, metrics: ComplexityMetrics) -> float:
+        """Calculate maintainability index when Radon fails."""
+        import math
+
+        if metrics.halstead_volume <= 0 or metrics.source_lines_of_code <= 0:
+            return 100.0
+
+        try:
+            mi = (
+                171
+                - 5.2 * math.log(metrics.halstead_volume)
+                - 0.23 * metrics.cyclomatic_complexity
+                - 16.2 * math.log(metrics.source_lines_of_code)
+            )
+
+            return max(0, min(100, round(mi, 2)))
+        except (ValueError, ZeroDivisionError):
+            return 50.0
+
+    def get_complexity_rank(self, complexity_score: int) -> str:
+        """Get complexity rank using Radon's ranking system."""
+        if not RADON_AVAILABLE:
+            # Manual ranking
+            if complexity_score <= 5:
+                return "A"  # Low
+            elif complexity_score <= 10:
+                return "B"  # Moderate
+            elif complexity_score <= 20:
+                return "C"  # High
+            elif complexity_score <= 30:
+                return "D"  # Very High
+            else:
+                return "F"  # Extreme
+
+        return cc_rank(complexity_score)
+
+    def batch_analyze_files(
+        self, file_paths: list[Path], max_workers: int | None = None
+    ) -> dict[Path, ComplexityMetrics]:
+        """Analyze multiple files in parallel."""
+        import concurrent.futures
+        import os
+
+        if max_workers is None:
+            max_workers = os.cpu_count() or 4
+
+        results = {}
+
+        with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
+            # Submit all tasks
+            future_to_path = {
+                executor.submit(self.analyze_file, path): path for path in file_paths
+            }
+
+            # Collect results
+            for future in concurrent.futures.as_completed(future_to_path):
+                path = future_to_path[future]
+                try:
+                    results[path] = future.result()
+                except Exception:
+                    # Create empty metrics for failed files
+                    results[path] = ComplexityMetrics()
+
+        return results
+
+    def get_detailed_complexity_report(
+        self, code: str, filename: str = "<string>"
+    ) -> dict[str, Any]:
+        """Get detailed complexity report including function-level analysis."""
+        if not RADON_AVAILABLE:
+            metrics = self.manual_calculator.calculate_complexity(code)
+            return {
+                "file_metrics": metrics.to_dict(),
+                "functions": [],
+                "classes": [],
+                "radon_available": False,
+            }
+
+        metrics = self._analyze_with_radon(code, filename)
+
+        # Get function-level details from Radon
+        functions = []
+        classes = []
+
+        try:
+            cc_results = cc_visit(code)
+            for block in cc_results:
+                item = {
+                    "name": block.name,
+                    "complexity": block.complexity,
+                    "rank": self.get_complexity_rank(block.complexity),
+                    "line_number": block.lineno,
+                    "end_line": getattr(block, "endline", None),
+                    "type": block.type,
+                    "is_method": getattr(block, "is_method", False),
+                }
+
+                if block.type == "function" or getattr(block, "is_method", False):
+                    functions.append(item)
+                elif block.type == "class":
+                    classes.append(item)
+        except Exception:
+            pass
+
+        return {
+            "file_metrics": metrics.to_dict(),
+            "functions": functions,
+            "classes": classes,
+            "radon_available": True,
+        }
+
+    @staticmethod
+    def is_available() -> bool:
+        """Check if Radon is available."""
+        return RADON_AVAILABLE
+
+    @staticmethod
+    def get_radon_version() -> str | None:
+        """Get Radon version if available."""
+        if not RADON_AVAILABLE:
+            return None
+
+        try:
+            import radon
+
+            return getattr(radon, "__version__", "unknown")
+        except AttributeError:
+            return "unknown"
--- a/src/quality/config/init.py
+++ b/src/quality/config/init.py
@@ -0,0 +1,5 @@
+"""Configuration management for code quality analysis."""
+
+from .schemas import QualityConfig, load_config
+
+__all__ = ["QualityConfig", "load_config"]
--- a/src/quality/config/default_config.yaml
+++ b/src/quality/config/default_config.yaml
@@ -0,0 +1,128 @@
+# Default configuration for IntelliKit Quality Analysis
+
+# Detection settings
+detection:
+  min_lines: 5                    # Minimum lines for duplicate detection
+  min_tokens: 50                  # Minimum tokens for duplicate detection
+  similarity_threshold: 0.8       # Similarity threshold (0.0-1.0)
+
+  # Similarity algorithm weights (should sum to ~1.0) - Optimized for better accuracy
+  similarity_algorithms:
+    - name: structural
+      weight: 0.5
+      enabled: true
+    - name: cosine
+      weight: 0.2
+      enabled: true
+    - name: jaccard
+      weight: 0.15
+      enabled: true
+    - name: levenshtein
+      weight: 0.1
+      enabled: true
+    - name: semantic
+      weight: 0.05
+      enabled: true
+
+  # Performance settings - Optimized LSH parameters
+  use_lsh: true                   # Use LSH for large codebases  
+  lsh_threshold: 500             # Use LSH when blocks > this number (reduced for better coverage)
+  lsh_bands: 20                  # Number of LSH bands (increased for better precision)
+  lsh_rows: 4                    # Rows per band (decreased to balance precision/recall)
+  lsh_num_perm: 256              # Number of permutations (increased for better accuracy)
+  parallel_processing: true      # Enable parallel processing
+  max_workers: null              # Auto-detect CPU cores
+
+# Complexity analysis
+complexity:
+  include_cyclomatic: true       # Include McCabe complexity
+  include_cognitive: true        # Include cognitive complexity
+  include_halstead: true         # Include Halstead metrics
+  include_maintainability: true  # Include maintainability index
+  complexity_threshold: 10       # Threshold for flagging complex code
+
+# Language support
+languages:
+  languages:
+    - python
+    - javascript
+    - typescript
+  file_extensions:
+    python: [".py", ".pyx", ".pyi"]
+    javascript: [".js", ".jsx", ".es6", ".mjs"]
+    typescript: [".ts", ".tsx"]
+
+# File path configuration
+paths:
+  include_patterns:
+    - "**/*.py"
+    - "**/*.js"
+    - "**/*.ts"
+  exclude_patterns:
+    - "**/__pycache__/**"
+    - "**/*.pyc"
+    - "**/venv/**"
+    - "**/.venv/**"
+    - "**/node_modules/**"
+    - "**/.git/**"
+    - "**/build/**"
+    - "**/dist/**"
+    - "**/migrations/**"
+  max_files: null               # No limit
+  follow_symlinks: false
+
+# Refactoring suggestions
+refactoring:
+  enabled: true
+  min_priority_score: 1.0       # Minimum priority for suggestions
+  suggest_extract_method: true
+  suggest_extract_class: true
+  suggest_parameter_object: true
+  suggest_template_method: true
+  estimate_effort: true         # Include effort estimates
+  risk_threshold: 0.7           # Risk threshold for suggestions
+
+# Reporting configuration
+reporting:
+  formats: ["console"]          # Output formats
+  output_dir: "./quality_reports"
+
+  # Console output settings
+  show_code_preview: true
+  show_complexity_metrics: true
+  show_refactoring_suggestions: true
+
+  # Dashboard settings
+  dashboard_enabled: false
+  dashboard_port: 8080
+  dashboard_host: "localhost"
+
+  # Export formats
+  export_sarif: false           # IDE integration format
+  export_json: false
+  export_html: false
+  export_csv: false
+
+# Cache configuration
+cache:
+  enabled: true
+  cache_dir: ".quality_cache"
+  max_age_days: 7               # Cache expiry in days
+  use_memory_cache: true
+
+# External integrations
+integrations:
+  # Git integration
+  use_git: true
+  analyze_git_history: false   # Analyze historical changes
+  blame_duplicates: false      # Show git blame for duplicates
+
+  # JSCPD for multi-language support
+  use_jscpd: true
+  jscpd_path: null             # Auto-detect jscpd path
+  jscpd_config: {}             # Additional jscpd options
+
+# Global settings
+version: "1.0.0"
+debug: false
+verbose: false
--- a/src/quality/config/schemas.py
+++ b/src/quality/config/schemas.py
@@ -0,0 +1,294 @@
+"""Configuration schemas using Pydantic."""
+
+from pathlib import Path
+
+import yaml
+from pydantic import BaseModel, field_validator
+try:
+    from pydantic import Field
+except ImportError:
+    from pydantic.v1 import Field
+
+
+class SimilarityAlgorithmConfig(BaseModel):
+    """Configuration for similarity algorithms."""
+
+    name: str
+    weight: float = Field(default=1.0, ge=0.0, le=1.0)
+    enabled: bool = True
+    parameters: dict[str, str | int | float | bool] = Field(default_factory=dict)
+
+
+class ComplexityConfig(BaseModel):
+    """Configuration for complexity analysis."""
+
+    include_cyclomatic: bool = True
+    include_cognitive: bool = True
+    include_halstead: bool = True
+    include_maintainability: bool = True
+    complexity_threshold: int = Field(default=10, ge=1)
+
+
+class DetectionConfig(BaseModel):
+    """Configuration for duplicate detection."""
+
+    min_lines: int = Field(default=5, ge=1)
+    min_tokens: int = Field(default=50, ge=1)
+    similarity_threshold: float = Field(default=0.8, ge=0.0, le=1.0)
+
+    # Similarity algorithms
+    similarity_algorithms: list[SimilarityAlgorithmConfig] = Field(
+        default_factory=lambda: [
+            SimilarityAlgorithmConfig(name="levenshtein", weight=0.2),
+            SimilarityAlgorithmConfig(name="jaccard", weight=0.3),
+            SimilarityAlgorithmConfig(name="cosine", weight=0.3),
+            SimilarityAlgorithmConfig(name="semantic", weight=0.2),
+        ]
+    )
+
+    # Performance settings
+    use_lsh: bool = True
+    lsh_threshold: int = Field(
+        default=1000, ge=100
+    )  # Use LSH for datasets larger than this
+    parallel_processing: bool = True
+    max_workers: int | None = None
+
+
+class LanguageConfig(BaseModel):
+    """Configuration for language support."""
+
+    languages: set[str] = Field(default_factory=lambda: {"python"})
+    file_extensions: dict[str, list[str]] = Field(
+        default_factory=lambda: {
+            "python": [".py", ".pyx", ".pyi"],
+            "javascript": [".js", ".jsx", ".es6", ".mjs"],
+            "typescript": [".ts", ".tsx"],
+            "java": [".java"],
+            "c": [".c", ".h"],
+            "cpp": [".cpp", ".cxx", ".cc", ".hpp", ".hxx"],
+            "csharp": [".cs"],
+            "go": [".go"],
+            "rust": [".rs"],
+            "php": [".php"],
+            "ruby": [".rb"],
+        }
+    )
+
+
+class PathConfig(BaseModel):
+    """Configuration for file paths."""
+
+    include_patterns: list[str] = Field(default_factory=lambda: ["**/*.py"])
+    exclude_patterns: list[str] = Field(
+        default_factory=lambda: [
+            "**/__pycache__/**",
+            "**/*.pyc",
+            "**/venv/**",
+            "**/.venv/**",
+            "**/node_modules/**",
+            "**/.git/**",
+            "**/build/**",
+            "**/dist/**",
+        ]
+    )
+    max_files: int | None = None
+    follow_symlinks: bool = False
+
+
+class RefactoringConfig(BaseModel):
+    """Configuration for refactoring suggestions."""
+
+    enabled: bool = True
+    min_priority_score: float = Field(default=1.0, ge=0.0)
+    suggest_extract_method: bool = True
+    suggest_extract_class: bool = True
+    suggest_parameter_object: bool = True
+    suggest_template_method: bool = True
+    estimate_effort: bool = True
+    risk_threshold: float = Field(default=0.7, ge=0.0, le=1.0)
+
+
+class ReportingConfig(BaseModel):
+    """Configuration for reporting."""
+
+    formats: list[str] = Field(default_factory=lambda: ["console"])
+    output_dir: Path = Field(default=Path("./quality_reports"))
+
+    # Console reporting
+    show_code_preview: bool = True
+    show_complexity_metrics: bool = True
+    show_refactoring_suggestions: bool = True
+
+    # Dashboard settings
+    dashboard_enabled: bool = False
+    dashboard_port: int = Field(default=8080, ge=1024, le=65535)
+    dashboard_host: str = "localhost"
+
+    # Export formats
+    export_sarif: bool = False
+    export_json: bool = False
+    export_html: bool = False
+    export_csv: bool = False
+
+
+class CacheConfig(BaseModel):
+    """Configuration for caching."""
+
+    enabled: bool = True
+    cache_dir: Path = Field(default=Path(".quality_cache"))
+    max_age_days: int = Field(default=7, ge=1)
+    use_memory_cache: bool = True
+
+
+class IntegrationConfig(BaseModel):
+    """Configuration for external integrations."""
+
+    # Git integration
+    use_git: bool = True
+    analyze_git_history: bool = False
+    blame_duplicates: bool = False
+
+    # JSCPD integration for multi-language support
+    use_jscpd: bool = True
+    jscpd_path: str | None = None
+    jscpd_config: dict[str, str | int | float | bool] = Field(default_factory=dict)
+
+
+class ExceptionRule(BaseModel):
+    """Configuration for a single exception rule."""
+
+    analysis_type: str  # "complexity", "duplicates", "modernization", "code_smells"
+    issue_type: str | None = None  # Specific issue type (optional)
+    file_patterns: list[str] = Field(default_factory=list)  # File path patterns
+    line_patterns: list[str] = Field(default_factory=list)  # Line content patterns
+    reason: str | None = None  # Optional reason for the exception
+    expires: str | None = None  # Optional expiration date (YYYY-MM-DD)
+    enabled: bool = True
+
+
+class ExceptionsConfig(BaseModel):
+    """Configuration for analysis exceptions."""
+
+    enabled: bool = True
+    rules: list[ExceptionRule] = Field(default_factory=list)
+
+    # Global file/directory exceptions
+    exclude_files: list[str] = Field(default_factory=list)
+    exclude_directories: list[str] = Field(default_factory=list)
+
+    # Temporary suppressions (auto-expire)
+    temporary_suppressions: dict[str, str] = Field(
+        default_factory=dict
+    )  # rule_id -> expiry_date
+
+
+class QualityConfig(BaseModel):
+    """Main configuration for code quality analysis."""
+
+    # Core configuration sections
+    detection: DetectionConfig = Field(default_factory=DetectionConfig)
+    complexity: ComplexityConfig = Field(default_factory=ComplexityConfig)
+    languages: LanguageConfig = Field(default_factory=LanguageConfig)
+    paths: PathConfig = Field(default_factory=PathConfig)
+    refactoring: RefactoringConfig = Field(default_factory=RefactoringConfig)
+    reporting: ReportingConfig = Field(default_factory=ReportingConfig)
+    cache: CacheConfig = Field(default_factory=CacheConfig)
+    integrations: IntegrationConfig = Field(default_factory=IntegrationConfig)
+    exceptions: ExceptionsConfig = Field(default_factory=ExceptionsConfig)
+
+    # Global settings
+    version: str = "1.0.0"
+    debug: bool = False
+    verbose: bool = False
+
+    @field_validator("detection")
+    def validate_similarity_weights(cls, v):
+        """Ensure similarity algorithm weights sum to approximately 1.0."""
+        total_weight = sum(alg.weight for alg in v.similarity_algorithms if alg.enabled)
+        if abs(total_weight - 1.0) > 0.1:
+            # Auto-normalize weights
+            for alg in v.similarity_algorithms:
+                if alg.enabled:
+                    alg.weight = alg.weight / total_weight
+        return v
+
+    class Config:
+        """Pydantic configuration."""
+
+        validate_assignment = True
+        extra = "forbid"
+
+
+def load_config(config_path: Path | None = None) -> QualityConfig:
+    """Load configuration from file or use defaults."""
+    if config_path is None:
+        # Look for config files in common locations
+        possible_paths = [
+            Path("quality.yaml"),
+            Path("quality.yml"),
+            Path(".quality.yaml"),
+            Path(".quality.yml"),
+            Path("pyproject.toml"),  # Look for [tool.quality] section
+        ]
+
+        for path in possible_paths:
+            if path.exists():
+                config_path = path
+                break
+
+    if config_path and config_path.exists():
+        return _load_from_file(config_path)
+    else:
+        return QualityConfig()
+
+
+def _load_from_file(config_path: Path) -> QualityConfig:
+    """Load configuration from specific file."""
+    if config_path.suffix.lower() in [".yaml", ".yml"]:
+        return _load_from_yaml(config_path)
+    elif config_path.name == "pyproject.toml":
+        return _load_from_pyproject(config_path)
+    else:
+        raise ValueError(f"Unsupported config file format: {config_path}")
+
+
+def _load_from_yaml(config_path: Path) -> QualityConfig:
+    """Load configuration from YAML file."""
+    with open(config_path, encoding="utf-8") as f:
+        data = yaml.safe_load(f)
+
+    return QualityConfig(**data) if data else QualityConfig()
+
+
+def _load_from_pyproject(config_path: Path) -> QualityConfig:
+    """Load configuration from pyproject.toml file."""
+    try:
+        import tomli
+    except ImportError:
+        try:
+            import tomllib as tomli
+        except ImportError as e:
+            raise ImportError(
+                "tomli package required to read pyproject.toml. "
+                "Install with: pip install tomli"
+            ) from e
+
+    with open(config_path, "rb") as f:
+        data = tomli.load(f)
+
+    # Extract quality configuration
+    quality_config = data.get("tool", {}).get("quality", {})
+
+    return QualityConfig(**quality_config) if quality_config else QualityConfig()
+
+
+def save_config(config: QualityConfig, output_path: Path) -> None:
+    """Save configuration to YAML file."""
+    with open(output_path, "w", encoding="utf-8") as f:
+        yaml.dump(
+            config.dict(exclude_defaults=True),
+            f,
+            default_flow_style=False,
+            sort_keys=True,
+        )
--- a/src/quality/core/init.py
+++ b/src/quality/core/init.py
@@ -0,0 +1,23 @@
+"""Core components for code quality analysis."""
+
+from .ast_analyzer import ASTAnalyzer
+from .base import (
+    AnalysisResult,
+    CodeBlock,
+    ComplexityMetrics,
+    DuplicateMatch,
+    RefactoringSuggestion,
+    SimilarityAlgorithm,
+)
+from .cache import CacheManager
+
+__all__ = [
+    "AnalysisResult",
+    "ASTAnalyzer",
+    "CacheManager",
+    "CodeBlock",
+    "ComplexityMetrics",
+    "DuplicateMatch",
+    "RefactoringSuggestion",
+    "SimilarityAlgorithm",
+]
--- a/src/quality/core/ast_analyzer.py
+++ b/src/quality/core/ast_analyzer.py
@@ -0,0 +1,281 @@
+"""Enhanced AST analysis for code quality detection."""
+
+import ast
+
+from .base import CodeBlock, ComplexityMetrics
+
+
+class ASTAnalyzer(ast.NodeVisitor):
+    """Enhanced AST visitor for extracting code structure and complexity metrics."""
+
+    def __init__(self, file_path: str = "", content: str = ""):
+        self.file_path = file_path
+        self.content_lines = content.splitlines() if content else []
+        self.functions: list[CodeBlock] = []
+        self.classes: list[CodeBlock] = []
+        self.code_blocks: list[CodeBlock] = []
+        self.imports: list[str] = []
+        self.global_variables: set[str] = set()
+        self.call_graph: dict[str, list[str]] = {}
+
+    def extract_code_blocks(self, file_path, min_lines: int = 5) -> list[CodeBlock]:
+        """Extract code blocks from a file."""
+        try:
+            with open(file_path, encoding="utf-8") as f:
+                content = f.read()
+        except (OSError, UnicodeDecodeError):
+            return []
+
+        # Reset analyzer state
+        self.__init__(str(file_path), content)
+
+        try:
+            tree = ast.parse(content)
+        except SyntaxError:
+            return []
+        else:
+            self.visit(tree)
+
+            # Filter blocks by minimum size
+            filtered_blocks = []
+            for block in self.code_blocks:
+                if (block.end_line - block.start_line + 1) >= min_lines:
+                    filtered_blocks.append(block)
+
+            return filtered_blocks
+
+    def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
+        """Visit function definitions with complexity analysis."""
+        complexity = self._calculate_cyclomatic_complexity(node)
+        cognitive_complexity = self._calculate_cognitive_complexity(node)
+
+        metrics = ComplexityMetrics(
+            cyclomatic_complexity=complexity, cognitive_complexity=cognitive_complexity
+        )
+
+        block = self._extract_code_block(node, node.name, "function", metrics)
+        self.functions.append(block)
+        self._extract_function_calls(node, node.name)
+        self.generic_visit(node)
+
+    def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None:
+        """Visit async function definitions."""
+        complexity = self._calculate_cyclomatic_complexity(node)
+        cognitive_complexity = self._calculate_cognitive_complexity(node)
+
+        metrics = ComplexityMetrics(
+            cyclomatic_complexity=complexity, cognitive_complexity=cognitive_complexity
+        )
+
+        block = self._extract_code_block(node, node.name, "function", metrics)
+        self.functions.append(block)
+        self._extract_function_calls(node, node.name)
+        self.generic_visit(node)
+
+    def visit_ClassDef(self, node: ast.ClassDef) -> None:
+        """Visit class definitions."""
+        # Class complexity is sum of method complexities
+        methods = [
+            n
+            for n in ast.walk(node)
+            if isinstance(n, ast.FunctionDef | ast.AsyncFunctionDef)
+        ]
+        total_complexity = sum(
+            self._calculate_cyclomatic_complexity(method) for method in methods
+        )
+
+        metrics = ComplexityMetrics(cyclomatic_complexity=total_complexity)
+        block = self._extract_code_block(node, node.name, "class", metrics)
+        self.classes.append(block)
+        self.generic_visit(node)
+
+    def visit_Import(self, node: ast.Import) -> None:
+        """Track imports."""
+        for alias in node.names:
+            self.imports.append(alias.name)
+        self.generic_visit(node)
+
+    def visit_ImportFrom(self, node: ast.ImportFrom) -> None:
+        """Track from imports."""
+        if node.module:
+            for alias in node.names:
+                self.imports.append(f"{node.module}.{alias.name}")
+        self.generic_visit(node)
+
+    def visit_Assign(self, node: ast.Assign) -> None:
+        """Track global variable assignments."""
+        for target in node.targets:
+            if isinstance(target, ast.Name):
+                self.global_variables.add(target.id)
+        self.generic_visit(node)
+
+    def _extract_code_block(
+        self,
+        node: ast.AST,
+        name: str,
+        block_type: str,
+        complexity_metrics: ComplexityMetrics | None = None,
+    ) -> CodeBlock:
+        """Extract code block from AST node with enhanced metadata."""
+        start_line = node.lineno
+        end_line = getattr(node, "end_lineno", start_line)
+
+        if end_line is None:
+            end_line = start_line
+
+        content = "\n".join(self.content_lines[start_line - 1 : end_line])
+
+        block = CodeBlock(
+            file_path=self.file_path,
+            start_line=start_line,
+            end_line=end_line,
+            content=content,
+            complexity_metrics=complexity_metrics,
+            function_name=name if block_type == "function" else None,
+            class_name=name if block_type == "class" else None,
+        )
+
+        self.code_blocks.append(block)
+        return block
+
+    def _calculate_cyclomatic_complexity(self, node: ast.AST) -> int:
+        """Calculate McCabe cyclomatic complexity."""
+        complexity = 1  # Base complexity
+
+        for child in ast.walk(node):
+            if isinstance(
+                child,
+                ast.If
+                | ast.While
+                | ast.For
+                | ast.AsyncFor
+                | ast.ExceptHandler
+                | ast.With
+                | ast.AsyncWith
+                | ast.Assert,
+            ):
+                complexity += 1
+            elif isinstance(child, ast.BoolOp):
+                complexity += len(child.values) - 1
+            elif isinstance(child, ast.Break | ast.Continue):
+                complexity += 1
+
+        return complexity
+
+    def _calculate_cognitive_complexity(self, node: ast.AST) -> int:
+        """Calculate cognitive complexity (more human-oriented)."""
+        complexity = 0
+        nesting_level = 0
+
+        def visit_node(n: ast.AST, level: int) -> int:
+            nonlocal complexity
+            local_complexity = 0
+
+            if isinstance(
+                n, ast.If | ast.While | ast.For | ast.AsyncFor | ast.ExceptHandler
+            ):
+                local_complexity += 1 + level
+            elif isinstance(n, ast.Break | ast.Continue):
+                local_complexity += 1
+            elif isinstance(n, ast.BoolOp):
+                local_complexity += len(n.values) - 1
+
+            # Increase nesting for certain constructs
+            if isinstance(
+                n,
+                ast.If
+                | ast.While
+                | ast.For
+                | ast.AsyncFor
+                | ast.With
+                | ast.AsyncWith
+                | ast.Try,
+            ):
+                level += 1
+
+            for child in ast.iter_child_nodes(n):
+                local_complexity += visit_node(child, level)
+
+            return local_complexity
+
+        return visit_node(node, nesting_level)
+
+    def _extract_function_calls(self, node: ast.AST, function_name: str) -> None:
+        """Extract function calls to build call graph."""
+        calls = []
+
+        for child in ast.walk(node):
+            if isinstance(child, ast.Call):
+                if isinstance(child.func, ast.Name):
+                    calls.append(child.func.id)
+                elif isinstance(child.func, ast.Attribute):
+                    calls.append(child.func.attr)
+
+        self.call_graph[function_name] = calls
+
+    def get_code_structure_signature(self, node: ast.AST) -> str:
+        """Generate structure signature for semantic comparison."""
+        structure_elements = []
+
+        for child in ast.walk(node):
+            if isinstance(child, ast.FunctionDef):
+                structure_elements.append(f"func:{len(child.args.args)}")
+            elif isinstance(child, ast.ClassDef):
+                structure_elements.append(f"class:{len(child.bases)}")
+            elif isinstance(child, ast.If):
+                structure_elements.append("if")
+            elif isinstance(child, ast.For):
+                structure_elements.append("for")
+            elif isinstance(child, ast.While):
+                structure_elements.append("while")
+            elif isinstance(child, ast.Try):
+                structure_elements.append("try")
+
+        return "|".join(structure_elements)
+
+    def get_variable_usage_pattern(self, node: ast.AST) -> dict[str, int]:
+        """Analyze variable usage patterns."""
+        variable_usage = {}
+
+        for child in ast.walk(node):
+            if isinstance(child, ast.Name):
+                name = child.id
+                variable_usage[name] = variable_usage.get(name, 0) + 1
+
+        return variable_usage
+
+    def detect_code_smells(self) -> list[str]:
+        """Detect common code smells."""
+        smells = []
+
+        # Long methods
+        long_methods = [f for f in self.functions if f.lines_count > 30]
+        if long_methods:
+            smells.append(
+                f"Long methods detected: {len(long_methods)} methods > 30 lines"
+            )
+
+        # Complex methods
+        complex_methods = [
+            f
+            for f in self.functions
+            if f.complexity_metrics and f.complexity_metrics.cyclomatic_complexity > 10
+        ]
+        if complex_methods:
+            smells.append(
+                f"Complex methods detected: {len(complex_methods)} methods with complexity > 10"
+            )
+
+        # Many parameters
+        for func in self.functions:
+            try:
+                tree = ast.parse(func.content)
+                for node in ast.walk(tree):
+                    if isinstance(node, ast.FunctionDef) and len(node.args.args) > 5:
+                        smells.append(
+                            f"Method with many parameters: {func.function_name} ({len(node.args.args)} parameters)"
+                        )
+            except Exception:
+                pass
+
+        return smells
--- a/src/quality/core/base.py
+++ b/src/quality/core/base.py
@@ -0,0 +1,268 @@
+"""Base classes and interfaces for code quality analysis."""
+
+import hashlib
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from enum import Enum
+from pathlib import Path
+from typing import Protocol
+
+
+class SimilarityAlgorithm(Protocol):
+    """Protocol for similarity calculation algorithms."""
+
+    def calculate(self, text1: str, text2: str) -> float:
+        """Calculate similarity between two text strings.
+
+        Args:
+            text1: First text string
+            text2: Second text string
+
+        Returns:
+            Similarity score between 0.0 and 1.0
+        """
+        ...
+
+
+class RefactoringType(Enum):
+    """Types of refactoring suggestions."""
+
+    EXTRACT_METHOD = "extract_method"
+    EXTRACT_CLASS = "extract_class"
+    INTRODUCE_PARAMETER_OBJECT = "introduce_parameter_object"
+    TEMPLATE_METHOD = "template_method"
+    CONSOLIDATE_CONDITIONAL = "consolidate_conditional"
+    REMOVE_DUPLICATE = "remove_duplicate"
+
+
+@dataclass
+class ComplexityMetrics:
+    """Code complexity metrics."""
+
+    cyclomatic_complexity: int
+    cognitive_complexity: int | None = None
+    halstead_difficulty: float | None = None
+    halstead_effort: float | None = None
+    maintainability_index: float | None = None
+
+    @property
+    def complexity_score(self) -> float:
+        """Calculate overall complexity score."""
+        score = self.cyclomatic_complexity
+        if self.cognitive_complexity:
+            score += self.cognitive_complexity * 0.5
+        if self.halstead_difficulty:
+            score += self.halstead_difficulty * 0.3
+        return score
+
+
+@dataclass
+class CodeBlock:
+    """Represents a block of code with metadata."""
+
+    file_path: str
+    start_line: int
+    end_line: int
+    content: str
+    content_hash: str = field(init=False)
+    normalized_content: str = field(init=False)
+    complexity_metrics: ComplexityMetrics | None = None
+    function_name: str | None = None
+    class_name: str | None = None
+
+    def __post_init__(self) -> None:
+        """Initialize computed fields."""
+        self.content_hash = hashlib.md5(self.content.encode()).hexdigest()
+        self.normalized_content = self._normalize_content()
+
+    def _normalize_content(self) -> str:
+        """Normalize content for comparison with enhanced identifier abstraction."""
+        import re
+
+        content = self.content
+
+        # Remove comments (Python, JavaScript, TypeScript)
+        content = re.sub(r"#.*$", "", content, flags=re.MULTILINE)  # Python comments
+        content = re.sub(r"//.*$", "", content, flags=re.MULTILINE)  # JS/TS single-line
+        content = re.sub(r"/\*.*?\*/", "", content, flags=re.DOTALL)  # JS/TS multi-line
+
+        # Remove string literals but preserve their structure
+        content = re.sub(r'""".*?"""', '"""STRING"""', content, flags=re.DOTALL)
+        content = re.sub(r"'''.*?'''", "'''STRING'''", content, flags=re.DOTALL)
+        content = re.sub(r'"[^"]*"', '"STRING"', content)
+        content = re.sub(r"'[^']*'", "'STRING'", content)
+
+        # Normalize numeric literals
+        content = re.sub(r'\b\d+\.?\d*\b', 'NUM', content)
+
+        # Abstract variable names while preserving keywords and operators
+        python_keywords = {
+            'def', 'class', 'if', 'else', 'elif', 'for', 'while', 'try', 'except',
+            'finally', 'with', 'as', 'import', 'from', 'return', 'yield', 'pass',
+            'break', 'continue', 'and', 'or', 'not', 'in', 'is', 'lambda', 'None',
+            'True', 'False', 'self', 'cls'
+        }
+        
+        # Split into tokens and normalize identifiers
+        tokens = re.findall(r'\b\w+\b|[^\w\s]', content)
+        normalized_tokens = []
+        
+        for token in tokens:
+            if token.lower() in python_keywords or not re.match(r'^[a-zA-Z_]\w*$', token):
+                # Keep keywords and non-identifiers as-is
+                normalized_tokens.append(token)
+            else:
+                # Abstract user-defined identifiers
+                normalized_tokens.append('VAR')
+        
+        content = ' '.join(normalized_tokens)
+        
+        # Remove extra whitespace
+        content = re.sub(r"\s+", " ", content)
+        return content.strip()
+
+    @property
+    def lines_count(self) -> int:
+        """Get number of lines in the code block."""
+        return self.end_line - self.start_line + 1
+
+    @property
+    def relative_path(self) -> str:
+        """Get relative path from current working directory."""
+        try:
+            return str(Path(self.file_path).relative_to(Path.cwd()))
+        except ValueError:
+            return self.file_path
+
+
+@dataclass
+class RefactoringSuggestion:
+    """Suggestion for refactoring duplicated code."""
+
+    refactoring_type: RefactoringType
+    description: str
+    affected_blocks: list[CodeBlock]
+    effort_estimate: float  # Hours
+    risk_score: float  # 0-1, higher = riskier
+    expected_benefit: str
+    implementation_steps: list[str] = field(default_factory=list)
+
+    @property
+    def priority_score(self) -> float:
+        """Calculate priority based on benefit vs effort and risk."""
+        lines_saved = sum(block.lines_count for block in self.affected_blocks) - 1
+        complexity_reduction = sum(
+            block.complexity_metrics.complexity_score
+            for block in self.affected_blocks
+            if block.complexity_metrics
+        )
+
+        benefit_score = (lines_saved * 0.1) + (complexity_reduction * 0.5)
+        cost_score = self.effort_estimate + (self.risk_score * 2)
+
+        return benefit_score / max(cost_score, 0.1)
+
+
+@dataclass
+class DuplicateMatch:
+    """Represents a duplicate code match."""
+
+    blocks: list[CodeBlock]
+    similarity_score: float
+    match_type: str  # 'exact', 'similar', 'structural', 'semantic'
+    description: str
+    complexity_score: float = 0.0
+    priority_score: float = 0.0
+    refactoring_suggestion: RefactoringSuggestion | None = None
+
+    def __post_init__(self) -> None:
+        """Calculate derived scores."""
+        if self.blocks:
+            # Average complexity score
+            complexity_scores = [
+                block.complexity_metrics.complexity_score
+                for block in self.blocks
+                if block.complexity_metrics
+            ]
+            self.complexity_score = (
+                sum(complexity_scores) / len(complexity_scores)
+                if complexity_scores
+                else 0.0
+            )
+
+            # Calculate priority: similarity × complexity × lines
+            total_lines = sum(block.lines_count for block in self.blocks)
+            self.priority_score = (
+                self.similarity_score * self.complexity_score * (total_lines / 10)
+            )
+
+
+@dataclass
+class AnalysisResult:
+    """Result of code quality analysis."""
+
+    duplicate_matches: list[DuplicateMatch]
+    total_files_analyzed: int
+    total_lines_analyzed: int
+    total_duplicated_lines: int
+    analysis_duration: float  # seconds
+    summary_stats: dict[str, int | float] = field(default_factory=dict)
+    refactoring_suggestions: list[RefactoringSuggestion] = field(default_factory=list)
+
+    @property
+    def duplication_percentage(self) -> float:
+        """Calculate percentage of duplicated lines."""
+        if self.total_lines_analyzed == 0:
+            return 0.0
+        return (self.total_duplicated_lines / self.total_lines_analyzed) * 100
+
+    @property
+    def high_priority_matches(self) -> list[DuplicateMatch]:
+        """Get matches with high priority scores."""
+        return [match for match in self.duplicate_matches if match.priority_score > 5.0]
+
+
+class CodeAnalyzer(ABC):
+    """Abstract base class for code analyzers."""
+
+    @abstractmethod
+    def analyze(self, code: str, file_path: str) -> AnalysisResult:
+        """Analyze code and return results."""
+        ...
+
+    @abstractmethod
+    def supports_language(self, language: str) -> bool:
+        """Check if analyzer supports the given language."""
+        ...
+
+
+class QualityMetricsCalculator(ABC):
+    """Abstract base class for quality metrics calculation."""
+
+    @abstractmethod
+    def calculate_complexity(self, code: str) -> ComplexityMetrics:
+        """Calculate complexity metrics for code."""
+        ...
+
+    @abstractmethod
+    def calculate_maintainability_index(self, code: str) -> float:
+        """Calculate maintainability index."""
+        ...
+
+
+@dataclass
+class AnalysisConfig:
+    """Configuration for analysis."""
+
+    min_lines: int = 5
+    min_tokens: int = 50
+    similarity_threshold: float = 0.8
+    complexity_threshold: int = 10
+    languages: set[str] = field(default_factory=lambda: {"python"})
+    exclude_patterns: list[str] = field(default_factory=list)
+    include_patterns: list[str] = field(default_factory=lambda: ["**/*.py"])
+    enable_semantic_analysis: bool = True
+    enable_refactoring_suggestions: bool = True
+    max_files: int | None = None
+    parallel_processing: bool = True
+    cache_enabled: bool = True
--- a/src/quality/core/cache.py
+++ b/src/quality/core/cache.py
@@ -0,0 +1,131 @@
+"""Caching system for performance optimization."""
+
+import hashlib
+import pickle
+from pathlib import Path
+from typing import Any, Generic, TypeVar
+
+from .base import CodeBlock
+
+T = TypeVar("T")
+
+
+class CacheManager(Generic[T]):
+    """Generic cache manager for storing analysis results."""
+
+    def __init__(self, cache_dir: Path = Path(".quality_cache")):
+        self.cache_dir = cache_dir
+        self.cache_dir.mkdir(exist_ok=True)
+        self.memory_cache: dict[str, T] = {}
+
+    def _get_cache_key(self, data: str, prefix: str = "") -> str:
+        """Generate cache key from data."""
+        hash_obj = hashlib.sha256(data.encode())
+        return f"{prefix}_{hash_obj.hexdigest()[:16]}"
+
+    def get(self, key: str, use_memory: bool = True) -> T | None:
+        """Get item from cache."""
+        # Check memory cache first
+        if use_memory and key in self.memory_cache:
+            return self.memory_cache[key]
+
+        # Check disk cache
+        cache_file = self.cache_dir / f"{key}.pickle"
+        if cache_file.exists():
+            try:
+                with open(cache_file, "rb") as f:
+                    data = pickle.load(f)
+                    if use_memory:
+                        self.memory_cache[key] = data
+                    return data
+            except Exception:
+                # If cache is corrupted, remove it
+                cache_file.unlink(missing_ok=True)
+
+        return None
+
+    def set(self, key: str, value: T, use_memory: bool = True) -> None:
+        """Store item in cache."""
+        if use_memory:
+            self.memory_cache[key] = value
+
+        # Store on disk
+        cache_file = self.cache_dir / f"{key}.pickle"
+        try:
+            with open(cache_file, "wb") as f:
+                pickle.dump(value, f)
+        except Exception:
+            pass  # Fail silently if can't write to disk
+
+    def get_file_hash(self, file_path: Path) -> str:
+        """Get hash of file contents and modification time."""
+        try:
+            stat = file_path.stat()
+            content_hash = hashlib.md5(file_path.read_bytes()).hexdigest()
+        except Exception:
+            return ""
+        else:
+            return f"{content_hash}_{stat.st_mtime}"
+
+    def is_file_cached(self, file_path: Path) -> bool:
+        """Check if file analysis is cached and up-to-date."""
+        file_hash = self.get_file_hash(file_path)
+        if not file_hash:
+            return False
+
+        cache_key = self._get_cache_key(str(file_path), "file")
+        cached_result = self.get(f"{cache_key}_meta")
+
+        return cached_result == file_hash
+
+    def cache_file_analysis(self, file_path: Path, blocks: list[CodeBlock]) -> None:
+        """Cache file analysis results."""
+        file_hash = self.get_file_hash(file_path)
+        cache_key = self._get_cache_key(str(file_path), "file")
+
+        # Cache the blocks
+        self.set(cache_key, blocks)
+        # Cache the file metadata
+        self.set(f"{cache_key}_meta", file_hash)
+
+    def get_cached_file_analysis(self, file_path: Path) -> list[CodeBlock] | None:
+        """Get cached file analysis if up-to-date."""
+        if not self.is_file_cached(file_path):
+            return None
+
+        cache_key = self._get_cache_key(str(file_path), "file")
+        return self.get(cache_key)
+
+    def clear(self) -> None:
+        """Clear all caches."""
+        self.memory_cache.clear()
+
+        # Clear disk cache
+        for cache_file in self.cache_dir.glob("*.pickle"):
+            cache_file.unlink(missing_ok=True)
+
+    def clear_old_entries(self, max_age_days: int = 7) -> None:
+        """Clear cache entries older than specified days."""
+        import time
+
+        max_age_seconds = max_age_days * 24 * 3600
+        current_time = time.time()
+
+        for cache_file in self.cache_dir.glob("*.pickle"):
+            try:
+                if (current_time - cache_file.stat().st_mtime) > max_age_seconds:
+                    cache_file.unlink()
+            except Exception:
+                pass
+
+    def get_cache_stats(self) -> dict[str, Any]:
+        """Get cache statistics."""
+        disk_files = list(self.cache_dir.glob("*.pickle"))
+        total_size = sum(f.stat().st_size for f in disk_files if f.exists())
+
+        return {
+            "memory_items": len(self.memory_cache),
+            "disk_files": len(disk_files),
+            "total_size_mb": total_size / (1024 * 1024),
+            "cache_dir": str(self.cache_dir),
+        }
--- a/src/quality/core/exceptions.py
+++ b/src/quality/core/exceptions.py
@@ -0,0 +1,354 @@
+"""Exception handling system for quality analysis."""
+
+import fnmatch
+import re
+from datetime import datetime
+from pathlib import Path
+from typing import Any
+
+from ..config.schemas import (
+    ExceptionRule,
+    QualityConfig,
+)
+
+
+class ExceptionFilter:
+    """Filters analysis results based on configured exception rules."""
+
+    def __init__(self, config: QualityConfig):
+        self.config = config
+        self.exceptions_config = config.exceptions
+        self.active_rules = self._get_active_rules()
+
+    def _get_active_rules(self) -> list[ExceptionRule]:
+        """Get currently active exception rules."""
+        if not self.exceptions_config.enabled:
+            return []
+
+        active_rules = []
+        current_date = datetime.now().date()
+
+        for rule in self.exceptions_config.rules:
+            if not rule.enabled:
+                continue
+
+            # Check if rule has expired
+            if rule.expires:
+                try:
+                    expire_date = datetime.strptime(rule.expires, "%Y-%m-%d").date()
+                    if current_date > expire_date:
+                        continue
+                except ValueError:
+                    # Invalid date format, skip rule
+                    continue
+
+            active_rules.append(rule)
+
+        return active_rules
+
+    def should_suppress_issue(
+        self,
+        analysis_type: str,
+        issue_type: str | None,
+        file_path: str,
+        line_number: int,
+        line_content: str = "",
+    ) -> tuple[bool, str | None]:
+        """
+        Check if an issue should be suppressed.
+
+        Returns:
+            (should_suppress, reason)
+        """
+        # Check global file/directory exclusions first
+        if self._is_globally_excluded(file_path):
+            return True, "File/directory globally excluded"
+
+        # Check exception rules
+        for rule in self.active_rules:
+            if self._rule_matches(
+                rule, analysis_type, issue_type, file_path, line_number, line_content
+            ):
+                return (
+                    True,
+                    rule.reason or f"Matched exception rule: {rule.analysis_type}",
+                )
+
+        return False, None
+
+    def _is_globally_excluded(self, file_path: str) -> bool:
+        """Check if file is globally excluded."""
+        normalized_path = str(Path(file_path).resolve())
+
+        # Check excluded files
+        for pattern in self.exceptions_config.exclude_files:
+            if fnmatch.fnmatch(normalized_path, pattern) or fnmatch.fnmatch(
+                file_path, pattern
+            ):
+                return True
+
+        # Check excluded directories
+        for pattern in self.exceptions_config.exclude_directories:
+            if fnmatch.fnmatch(str(Path(file_path).parent), pattern):
+                return True
+            # Also check if any parent directory matches
+            path_parts = Path(file_path).parts
+            for i in range(len(path_parts)):
+                partial_path = "/".join(path_parts[: i + 1])
+                if fnmatch.fnmatch(partial_path, pattern):
+                    return True
+
+        return False
+
+    def _rule_matches(
+        self,
+        rule: ExceptionRule,
+        analysis_type: str,
+        issue_type: str | None,
+        file_path: str,
+        line_number: int,
+        line_content: str,
+    ) -> bool:
+        """Check if a rule matches the current issue."""
+        # Check analysis type
+        if rule.analysis_type != analysis_type and rule.analysis_type != "*":
+            return False
+
+        # Check issue type if specified
+        if rule.issue_type and rule.issue_type != issue_type:
+            return False
+
+        # Check file patterns
+        if rule.file_patterns:
+            file_matches = False
+            for pattern in rule.file_patterns:
+                if fnmatch.fnmatch(file_path, pattern) or fnmatch.fnmatch(
+                    str(Path(file_path).name), pattern
+                ):
+                    file_matches = True
+                    break
+            if not file_matches:
+                return False
+
+        # Check line patterns
+        if rule.line_patterns and line_content:
+            line_matches = False
+            for pattern in rule.line_patterns:
+                if re.search(pattern, line_content):
+                    line_matches = True
+                    break
+            if not line_matches:
+                return False
+
+        return True
+
+    def filter_issues(
+        self,
+        analysis_type: str,
+        issues: list[Any],
+        get_file_path_fn: callable = None,
+        get_line_number_fn: callable = None,
+        get_line_content_fn: callable = None,
+        get_issue_type_fn: callable = None,
+    ) -> list[Any]:
+        """
+        Filter a list of issues based on exception rules.
+
+        Args:
+            analysis_type: Type of analysis ("complexity", "duplicates", etc.)
+            issues: List of issues to filter
+            get_file_path_fn: Function to extract file path from issue
+            get_line_number_fn: Function to extract line number from issue
+            get_line_content_fn: Function to extract line content from issue
+            get_issue_type_fn: Function to extract issue type from issue
+        """
+        if not self.exceptions_config.enabled or not issues:
+            return issues
+
+        filtered_issues = []
+
+        for issue in issues:
+            # Extract issue details
+            file_path = (
+                get_file_path_fn(issue)
+                if get_file_path_fn
+                else getattr(issue, "file_path", "")
+            )
+            line_number = (
+                get_line_number_fn(issue)
+                if get_line_number_fn
+                else getattr(issue, "line_number", 0)
+            )
+            line_content = (
+                get_line_content_fn(issue)
+                if get_line_content_fn
+                else getattr(issue, "line_content", "")
+            )
+            issue_type = (
+                get_issue_type_fn(issue)
+                if get_issue_type_fn
+                else getattr(issue, "issue_type", None)
+            )
+
+            should_suppress, reason = self.should_suppress_issue(
+                analysis_type, issue_type, file_path, line_number, line_content
+            )
+
+            if not should_suppress:
+                filtered_issues.append(issue)
+            elif self.config.debug:
+                print(
+                    f"Suppressed {analysis_type} issue in {file_path}:{line_number} - {reason}"
+                )
+
+        return filtered_issues
+
+    def get_suppression_summary(self) -> dict[str, Any]:
+        """Get summary of active suppressions."""
+        return {
+            "enabled": self.exceptions_config.enabled,
+            "active_rules": len(self.active_rules),
+            "global_exclusions": {
+                "files": len(self.exceptions_config.exclude_files),
+                "directories": len(self.exceptions_config.exclude_directories),
+            },
+            "rules_by_type": self._summarize_rules_by_type(),
+        }
+
+    def _summarize_rules_by_type(self) -> dict[str, int]:
+        """Summarize rules by analysis type."""
+        summary = {}
+        for rule in self.active_rules:
+            analysis_type = rule.analysis_type
+            summary[analysis_type] = summary.get(analysis_type, 0) + 1
+        return summary
+
+
+def create_example_exceptions_config() -> dict[str, Any]:
+    """Create an example exceptions configuration."""
+    return {
+        "exceptions": {
+            "enabled": True,
+            "exclude_files": [
+                "*/tests/*",
+                "*/test_*",
+                "*/__pycache__/*",
+                "*/migrations/*",
+            ],
+            "exclude_directories": [
+                "*/venv/*",
+                "*/.venv/*",
+                "*/node_modules/*",
+                "*/.git/*",
+            ],
+            "rules": [
+                {
+                    "analysis_type": "complexity",
+                    "issue_type": "high_complexity",
+                    "file_patterns": ["*/legacy/*", "*/third_party/*"],
+                    "reason": "Legacy code with known complexity - migration planned",
+                },
+                {
+                    "analysis_type": "modernization",
+                    "issue_type": "pydantic_v1_pattern",
+                    "file_patterns": ["*/compatibility/*"],
+                    "line_patterns": ["# pydantic v1 required", "# TODO: migrate"],
+                    "reason": "Intentional Pydantic v1 usage for compatibility",
+                },
+                {
+                    "analysis_type": "modernization",
+                    "issue_type": "legacy_typing_import",
+                    "file_patterns": ["*/external/*"],
+                    "reason": "External library compatibility requirements",
+                },
+                {
+                    "analysis_type": "duplicates",
+                    "file_patterns": ["*/templates/*", "*/generated/*"],
+                    "reason": "Generated or template code - duplication expected",
+                },
+                {
+                    "analysis_type": "code_smells",
+                    "issue_type": "long_method",
+                    "file_patterns": ["*/parsers/*"],
+                    "reason": "Parser methods intentionally long for readability",
+                    "expires": "2024-12-31",
+                },
+            ],
+        }
+    }
+
+
+def create_exceptions_config_template() -> str:
+    """Create a YAML template for exceptions configuration."""
+    return """# Quality Analysis Exceptions Configuration
+# This file allows you to suppress specific analysis results
+
+exceptions:
+  enabled: true
+
+  # Global file and directory exclusions
+  exclude_files:
+    - "*/tests/*"
+    - "*/test_*"
+    - "*/__pycache__/*"
+    - "*/migrations/*"
+
+  exclude_directories:
+    - "*/venv/*"
+    - "*/.venv/*"
+    - "*/node_modules/*"
+    - "*/.git/*"
+
+  # Specific exception rules
+  rules:
+    # Example: Suppress complexity issues in legacy code
+    - analysis_type: "complexity"
+      issue_type: "high_complexity"
+      file_patterns:
+        - "*/legacy/*"
+        - "*/third_party/*"
+      reason: "Legacy code with known complexity - migration planned"
+
+    # Example: Allow intentional Pydantic v1 usage
+    - analysis_type: "modernization"
+      issue_type: "pydantic_v1_pattern"
+      file_patterns:
+        - "*/compatibility/*"
+      line_patterns:
+        - "# pydantic v1 required"
+        - "# TODO: migrate"
+      reason: "Intentional Pydantic v1 usage for compatibility"
+
+    # Example: Suppress typing imports for external compatibility
+    - analysis_type: "modernization"
+      issue_type: "legacy_typing_import"
+      file_patterns:
+        - "*/external/*"
+      reason: "External library compatibility requirements"
+
+    # Example: Allow duplicates in generated/template code
+    - analysis_type: "duplicates"
+      file_patterns:
+        - "*/templates/*"
+        - "*/generated/*"
+      reason: "Generated or template code - duplication expected"
+
+    # Example: Temporary suppression with expiration
+    - analysis_type: "code_smells"
+      issue_type: "long_method"
+      file_patterns:
+        - "*/parsers/*"
+      reason: "Parser methods intentionally long for readability"
+      expires: "2024-12-31"
+      enabled: true
+
+# Analysis Types:
+# - "complexity" - Code complexity issues
+# - "duplicates" - Duplicate code detection
+# - "modernization" - Modern Python pattern suggestions
+# - "code_smells" - General code smell detection
+
+# Issue Types vary by analysis - see CLI output for specific types
+# Use "*" for analysis_type to match all analysis types
+# Leave issue_type empty to match all issues of that analysis type
+"""
--- a/src/quality/detection/init.py
+++ b/src/quality/detection/init.py
@@ -0,0 +1,9 @@
+"""Code duplicate detection engine."""
+
+from .engine import DuplicateDetectionEngine
+from .matcher import DuplicateMatcher
+
+__all__ = [
+    "DuplicateDetectionEngine",
+    "DuplicateMatcher",
+]
--- a/src/quality/detection/engine.py
+++ b/src/quality/detection/engine.py
@@ -0,0 +1,420 @@
+"""Enhanced duplicate detection engine with multiple algorithms."""
+
+import ast
+from pathlib import Path
+from typing import Any
+
+from ..complexity.analyzer import ComplexityAnalyzer
+from ..config.schemas import QualityConfig
+from ..core.ast_analyzer import ASTAnalyzer
+from ..core.base import CodeBlock, DuplicateMatch
+from .matcher import DuplicateMatcher
+from ..similarity.base import SimilarityCalculator
+from ..similarity.lsh import LSHDuplicateDetector
+
+
+class DuplicateDetectionEngine:
+    """Advanced duplicate detection engine with configurable algorithms."""
+
+    def __init__(self, config: QualityConfig | None = None):
+        self.config = config or QualityConfig()
+        self.detection_config = self.config.detection
+
+        # Initialize exception filter
+        from ..core.exceptions import ExceptionFilter
+
+        self.exception_filter = ExceptionFilter(self.config)
+
+        # Initialize components
+        self.ast_analyzer = ASTAnalyzer()
+        self.complexity_analyzer = ComplexityAnalyzer(
+            self.config.complexity, self.config
+        )
+        self.similarity_calculator = self._create_similarity_calculator()
+        self.matcher = DuplicateMatcher(
+            self.similarity_calculator, self.detection_config
+        )
+
+        # LSH for large-scale detection
+        self.lsh_detector = None
+        if self.detection_config.use_lsh:
+            self.lsh_detector = LSHDuplicateDetector(
+                threshold=self.detection_config.similarity_threshold,
+                num_perm=128,
+                bands=16,
+                rows=8,
+            )
+
+    def detect_duplicates_in_files(
+        self, file_paths: list[Path], max_workers: int | None = None
+    ) -> list[DuplicateMatch]:
+        """Detect duplicates across multiple files."""
+        # Extract code blocks from all files
+        all_blocks = []
+
+        for file_path in file_paths:
+            try:
+                blocks = self.ast_analyzer.extract_code_blocks(file_path)
+                # Filter blocks by minimum size
+                filtered_blocks = [
+                    block
+                    for block in blocks
+                    if (block.end_line - block.start_line + 1)
+                    >= self.detection_config.min_lines
+                    and len(block.content.split()) >= self.detection_config.min_tokens
+                ]
+                all_blocks.extend(filtered_blocks)
+            except Exception:
+                # Skip files that can't be parsed
+                continue
+
+        return self.detect_duplicates_in_blocks(all_blocks)
+
+    def detect_duplicates_in_blocks(
+        self, blocks: list[CodeBlock]
+    ) -> list[DuplicateMatch]:
+        """Detect duplicates in a list of code blocks."""
+        if not blocks:
+            return []
+
+        # Use LSH for large datasets
+        if (
+            self.detection_config.use_lsh
+            and len(blocks) >= self.detection_config.lsh_threshold
+            and self.lsh_detector
+        ):
+            return self._detect_with_lsh(blocks)
+        else:
+            return self._detect_with_similarity(blocks)
+
+    def find_duplicates_of_block(
+        self, target_block: CodeBlock, candidate_blocks: list[CodeBlock]
+    ) -> list[DuplicateMatch]:
+        """Find duplicates of a specific code block."""
+        matches = []
+
+        for candidate in candidate_blocks:
+            if candidate == target_block:  # Skip self
+                continue
+
+            similarity = self.similarity_calculator.calculate_similarity(
+                target_block, candidate
+            )
+
+            if similarity >= self.detection_config.similarity_threshold:
+                # Calculate complexity metrics
+                target_complexity = self.complexity_analyzer.analyze_code(
+                    target_block.content
+                )
+                candidate_complexity = self.complexity_analyzer.analyze_code(
+                    candidate.content
+                )
+
+                match_type = "exact" if similarity >= 0.95 else "similar"
+                match = DuplicateMatch(
+                    blocks=[target_block, candidate],
+                    similarity_score=similarity,
+                    match_type=match_type,
+                    description=f"{match_type.title()} duplicate detected (similarity: {similarity:.3f})",
+                    complexity_score=max(
+                        target_complexity.get_overall_score(),
+                        candidate_complexity.get_overall_score(),
+                    ),
+                    priority_score=self._calculate_priority_score(
+                        similarity,
+                        target_complexity.get_overall_score(),
+                        len([target_block, candidate]),
+                    ),
+                )
+                matches.append(match)
+
+        return matches
+
+    def get_detailed_analysis(self, duplicate_match: DuplicateMatch) -> dict[str, Any]:
+        """Get detailed analysis of a duplicate match."""
+        if not duplicate_match.blocks:
+            return {}
+
+        # Analyze each block
+        block_analyses = []
+        for block in duplicate_match.blocks:
+            complexity = self.complexity_analyzer.analyze_code(block.content)
+            summary = self.complexity_analyzer.get_complexity_summary(complexity)
+
+            block_analyses.append(
+                {
+                    "file_path": str(block.file_path),
+                    "line_range": f"{block.start_line}-{block.end_line}",
+                    "lines_of_code": block.end_line - block.start_line + 1,
+                    "complexity": summary,
+                    "content_preview": self._get_content_preview(block.content),
+                }
+            )
+
+        # Calculate similarity breakdown
+        similarity_breakdown = {}
+        if len(duplicate_match.blocks) >= 2:
+            similarity_breakdown = (
+                self.similarity_calculator.calculate_detailed_similarity(
+                    duplicate_match.blocks[0], duplicate_match.blocks[1]
+                )
+            )
+
+        # Generate refactoring suggestions
+        suggestions = self._generate_refactoring_suggestions(duplicate_match)
+
+        return {
+            "match_info": {
+                "similarity_score": duplicate_match.similarity_score,
+                "match_type": duplicate_match.match_type,
+                "priority_score": duplicate_match.priority_score,
+                "complexity_score": duplicate_match.complexity_score,
+            },
+            "blocks": block_analyses,
+            "similarity_breakdown": similarity_breakdown,
+            "refactoring_suggestions": suggestions,
+            "estimated_effort": self._estimate_refactoring_effort(duplicate_match),
+            "risk_assessment": self._assess_refactoring_risk(duplicate_match),
+        }
+
+    def _create_similarity_calculator(self) -> SimilarityCalculator:
+        """Create similarity calculator with configured algorithms."""
+        from ..similarity import (
+            CosineSimilarity,
+            JaccardSimilarity,
+            LevenshteinSimilarity,
+            SemanticSimilarity,
+            StructuralSimilarity,
+        )
+
+        algorithms = []
+
+        for algo_config in self.detection_config.similarity_algorithms:
+            if not algo_config.enabled:
+                continue
+
+            if algo_config.name == "levenshtein":
+                algorithms.append(LevenshteinSimilarity(algo_config))
+            elif algo_config.name == "jaccard":
+                algorithms.append(JaccardSimilarity(algo_config))
+            elif algo_config.name == "cosine":
+                algorithms.append(CosineSimilarity(algo_config))
+            elif algo_config.name == "semantic":
+                algorithms.append(SemanticSimilarity(algo_config))
+            elif algo_config.name == "structural":
+                algorithms.append(StructuralSimilarity(algo_config))
+
+        return SimilarityCalculator(algorithms)
+
+    def _detect_with_lsh(self, blocks: list[CodeBlock]) -> list[DuplicateMatch]:
+        """Detect duplicates using LSH for performance."""
+        if not self.lsh_detector:
+            return []
+
+        # Add all blocks to LSH index
+        for block in blocks:
+            self.lsh_detector.add_code_block(block)
+
+        # Find duplicate groups
+        duplicate_groups = self.lsh_detector.find_all_duplicates()
+
+        # Convert to DuplicateMatch objects
+        matches = []
+        for group in duplicate_groups:
+            if len(group) < 2:
+                continue
+
+            # Calculate exact similarity for the group
+            representative = group[0]
+            similarities = []
+
+            for other in group[1:]:
+                similarity = self.similarity_calculator.calculate_similarity(
+                    representative, other
+                )
+                similarities.append(similarity)
+
+            avg_similarity = (
+                sum(similarities) / len(similarities) if similarities else 0.0
+            )
+
+            # Calculate complexity metrics
+            complexities = []
+            for block in group:
+                complexity = self.complexity_analyzer.analyze_code(block.content)
+                complexities.append(complexity.get_overall_score())
+
+            max_complexity = max(complexities) if complexities else 0.0
+
+            match = DuplicateMatch(
+                blocks=group,
+                similarity_score=avg_similarity,
+                match_type="lsh_cluster",
+                description=f"LSH cluster with {len(group)} blocks (similarity: {avg_similarity:.3f})",
+                complexity_score=max_complexity,
+                priority_score=self._calculate_priority_score(
+                    avg_similarity, max_complexity, len(group)
+                ),
+            )
+            matches.append(match)
+
+        return self._filter_duplicate_matches(matches)
+
+    def _detect_with_similarity(self, blocks: list[CodeBlock]) -> list[DuplicateMatch]:
+        """Detect duplicates using similarity algorithms."""
+        matches = self.matcher.find_all_duplicates(blocks)
+        return self._filter_duplicate_matches(matches)
+
+    def _filter_duplicate_matches(
+        self, matches: list[DuplicateMatch]
+    ) -> list[DuplicateMatch]:
+        """Filter duplicate matches based on exception rules."""
+        if not self.exception_filter:
+            return matches
+
+        filtered_matches = []
+        for match in matches:
+            # Check if any block in the match should be suppressed
+            should_suppress_match = False
+
+            for block in match.blocks:
+                should_suppress, reason = self.exception_filter.should_suppress_issue(
+                    "duplicates",
+                    "duplicate_code",
+                    block.file_path,
+                    block.start_line,
+                    block.content,
+                )
+                if should_suppress:
+                    should_suppress_match = True
+                    break
+
+            if not should_suppress_match:
+                filtered_matches.append(match)
+
+        return filtered_matches
+
+    def _calculate_priority_score(
+        self, similarity: float, complexity: float, block_count: int
+    ) -> float:
+        """Calculate priority score for refactoring."""
+        # Base score from similarity
+        priority = similarity
+
+        # Boost for high complexity
+        if complexity > 50:
+            priority += 0.2
+
+        # Boost for more duplicates
+        if block_count > 2:
+            priority += 0.1 * (block_count - 2)
+
+        return min(priority, 1.0)
+
+    def _generate_refactoring_suggestions(
+        self, duplicate_match: DuplicateMatch
+    ) -> list[str]:
+        """Generate refactoring suggestions for duplicate code."""
+        suggestions = []
+
+        if len(duplicate_match.blocks) < 2:
+            return suggestions
+
+        first_block = duplicate_match.blocks[0]
+
+        # Analyze code structure
+        try:
+            tree = ast.parse(first_block.content)
+
+            # Check if it's a function
+            has_function = any(
+                isinstance(node, ast.FunctionDef) for node in ast.walk(tree)
+            )
+            has_class = any(isinstance(node, ast.ClassDef) for node in ast.walk(tree))
+
+            if has_function:
+                suggestions.append(
+                    "Extract common function into a shared utility module"
+                )
+                suggestions.append(
+                    "Consider creating a base function with configurable parameters"
+                )
+            elif has_class:
+                suggestions.append("Extract common class into a base class or mixin")
+                suggestions.append("Consider using inheritance or composition patterns")
+            else:
+                suggestions.append("Extract duplicate code into a reusable function")
+                suggestions.append(
+                    "Consider creating a utility module for shared logic"
+                )
+
+            # Complexity-based suggestions
+            if duplicate_match.complexity_score > 60:
+                suggestions.append(
+                    "High complexity detected - consider breaking down into smaller functions"
+                )
+
+            # Similarity-based suggestions
+            if duplicate_match.similarity_score > 0.95:
+                suggestions.append(
+                    "Nearly identical code - prioritize for immediate refactoring"
+                )
+            elif duplicate_match.similarity_score > 0.8:
+                suggestions.append("Similar code - consider parameterizing differences")
+
+        except SyntaxError:
+            suggestions.append("Extract duplicate code into a reusable component")
+
+        return suggestions
+
+    def _estimate_refactoring_effort(self, duplicate_match: DuplicateMatch) -> str:
+        """Estimate effort required for refactoring."""
+        if not duplicate_match.blocks:
+            return "Unknown"
+
+        total_lines = sum(
+            block.end_line - block.start_line + 1 for block in duplicate_match.blocks
+        )
+
+        if total_lines < 20:
+            return "Low (1-2 hours)"
+        elif total_lines < 100:
+            return "Medium (0.5-1 day)"
+        elif total_lines < 500:
+            return "High (1-3 days)"
+        else:
+            return "Very High (1+ weeks)"
+
+    def _assess_refactoring_risk(self, duplicate_match: DuplicateMatch) -> str:
+        """Assess risk level of refactoring."""
+        risk_factors = []
+
+        if duplicate_match.complexity_score > 70:
+            risk_factors.append("High complexity")
+
+        if len(duplicate_match.blocks) > 5:
+            risk_factors.append("Many duplicates")
+
+        if duplicate_match.similarity_score < 0.85:
+            risk_factors.append("Moderate differences between duplicates")
+
+        # Check if duplicates span multiple files
+        unique_files = len(set(block.file_path for block in duplicate_match.blocks))
+        if unique_files > 3:
+            risk_factors.append("Cross-module dependencies")
+
+        if not risk_factors:
+            return "Low"
+        elif len(risk_factors) <= 2:
+            return "Medium"
+        else:
+            return "High"
+
+    def _get_content_preview(self, content: str, max_lines: int = 5) -> str:
+        """Get a preview of code content."""
+        lines = content.split("\n")
+        if len(lines) <= max_lines:
+            return content
+
+        preview_lines = lines[:max_lines]
+        return "\n".join(preview_lines) + f"\n... ({len(lines) - max_lines} more lines)"
--- a/src/quality/detection/matcher.py
+++ b/src/quality/detection/matcher.py
@@ -0,0 +1,296 @@
+"""Duplicate matching algorithms and strategies."""
+
+from collections import defaultdict
+from typing import Any
+
+from ..config.schemas import DetectionConfig
+from ..core.base import CodeBlock, DuplicateMatch
+from ..similarity.base import SimilarityCalculator
+
+
+class DuplicateMatcher:
+    """Handles matching logic for finding duplicates."""
+
+    def __init__(
+        self, similarity_calculator: SimilarityCalculator, config: DetectionConfig
+    ):
+        self.similarity_calculator = similarity_calculator
+        self.config = config
+
+    def find_all_duplicates(self, blocks: list[CodeBlock]) -> list[DuplicateMatch]:
+        """Find all duplicate matches in a list of code blocks."""
+        if len(blocks) < 2:
+            return []
+
+        matches = []
+        processed_pairs = set()
+
+        for i, block1 in enumerate(blocks):
+            for j, block2 in enumerate(blocks[i + 1 :], i + 1):
+                pair = (i, j)
+                if pair in processed_pairs:
+                    continue
+
+                similarity = self.similarity_calculator.calculate_similarity(
+                    block1, block2
+                )
+
+                if similarity >= self.config.similarity_threshold:
+                    match_type = "exact" if similarity >= 0.95 else "similar"
+                    match = DuplicateMatch(
+                        blocks=[block1, block2],
+                        similarity_score=similarity,
+                        match_type=match_type,
+                        description=f"{match_type.title()} match between 2 blocks (similarity: {similarity:.3f})",
+                        complexity_score=0.0,  # Will be calculated by engine
+                        priority_score=similarity,
+                    )
+                    matches.append(match)
+                    processed_pairs.add(pair)
+
+        return self._merge_overlapping_matches(matches)
+
+    def find_duplicates_of_block(
+        self, target_block: CodeBlock, candidate_blocks: list[CodeBlock]
+    ) -> list[DuplicateMatch]:
+        """Find duplicates of a specific block."""
+        matches = []
+
+        for candidate in candidate_blocks:
+            if candidate == target_block:
+                continue
+
+            similarity = self.similarity_calculator.calculate_similarity(
+                target_block, candidate
+            )
+
+            if similarity >= self.config.similarity_threshold:
+                match_type = "exact" if similarity >= 0.95 else "similar"
+                match = DuplicateMatch(
+                    blocks=[target_block, candidate],
+                    similarity_score=similarity,
+                    match_type=match_type,
+                    description=f"{match_type.title()} match with target block (similarity: {similarity:.3f})",
+                    complexity_score=0.0,
+                    priority_score=similarity,
+                )
+                matches.append(match)
+
+        return matches
+
+    def find_similar_blocks(
+        self,
+        target_block: CodeBlock,
+        candidate_blocks: list[CodeBlock],
+        threshold: float,
+    ) -> list[tuple[CodeBlock, float]]:
+        """Find blocks similar to target with custom threshold."""
+        similar_blocks = []
+
+        for candidate in candidate_blocks:
+            if candidate == target_block:
+                continue
+
+            similarity = self.similarity_calculator.calculate_similarity(
+                target_block, candidate
+            )
+
+            if similarity >= threshold:
+                similar_blocks.append((candidate, similarity))
+
+        # Sort by similarity descending
+        similar_blocks.sort(key=lambda x: x[1], reverse=True)
+        return similar_blocks
+
+    def group_similar_blocks(self, blocks: list[CodeBlock]) -> list[list[CodeBlock]]:
+        """Group blocks into clusters of similar code."""
+        if len(blocks) < 2:
+            return [[block] for block in blocks]
+
+        # Build similarity matrix
+        similarity_matrix = {}
+        for i, block1 in enumerate(blocks):
+            for j, block2 in enumerate(blocks[i + 1 :], i + 1):
+                similarity = self.similarity_calculator.calculate_similarity(
+                    block1, block2
+                )
+                similarity_matrix[(i, j)] = similarity
+
+        # Use Union-Find to group similar blocks
+        parent = list(range(len(blocks)))
+
+        def find(x: int) -> int:
+            if parent[x] != x:
+                parent[x] = find(parent[x])
+            return parent[x]
+
+        def union(x: int, y: int) -> None:
+            px, py = find(x), find(y)
+            if px != py:
+                parent[px] = py
+
+        # Union blocks that are similar enough
+        for (i, j), similarity in similarity_matrix.items():
+            if similarity >= self.config.similarity_threshold:
+                union(i, j)
+
+        # Group blocks by their root parent
+        groups = defaultdict(list)
+        for i, block in enumerate(blocks):
+            root = find(i)
+            groups[root].append(block)
+
+        return list(groups.values())
+
+    def calculate_match_confidence(self, match: DuplicateMatch) -> dict[str, Any]:
+        """Calculate confidence metrics for a duplicate match."""
+        if len(match.blocks) < 2:
+            return {"confidence": 0.0, "factors": []}
+
+        confidence_factors = []
+        total_confidence = 0.0
+
+        # Similarity-based confidence
+        similarity_confidence = match.similarity_score
+        confidence_factors.append(
+            {
+                "factor": "similarity_score",
+                "value": match.similarity_score,
+                "weight": 0.4,
+                "contribution": similarity_confidence * 0.4,
+            }
+        )
+        total_confidence += similarity_confidence * 0.4
+
+        # Length-based confidence (longer matches are more reliable)
+        avg_length = sum(len(block.content) for block in match.blocks) / len(
+            match.blocks
+        )
+        length_confidence = min(avg_length / 1000, 1.0)  # Normalize to [0,1]
+        confidence_factors.append(
+            {
+                "factor": "code_length",
+                "value": avg_length,
+                "weight": 0.2,
+                "contribution": length_confidence * 0.2,
+            }
+        )
+        total_confidence += length_confidence * 0.2
+
+        # Token count confidence
+        avg_tokens = sum(len(block.content.split()) for block in match.blocks) / len(
+            match.blocks
+        )
+        token_confidence = min(avg_tokens / 100, 1.0)  # Normalize to [0,1]
+        confidence_factors.append(
+            {
+                "factor": "token_count",
+                "value": avg_tokens,
+                "weight": 0.2,
+                "contribution": token_confidence * 0.2,
+            }
+        )
+        total_confidence += token_confidence * 0.2
+
+        # Structural complexity confidence
+        complexity_confidence = min(match.complexity_score / 100, 1.0)
+        confidence_factors.append(
+            {
+                "factor": "complexity_score",
+                "value": match.complexity_score,
+                "weight": 0.2,
+                "contribution": complexity_confidence * 0.2,
+            }
+        )
+        total_confidence += complexity_confidence * 0.2
+
+        return {
+            "confidence": round(total_confidence, 3),
+            "level": self._get_confidence_level(total_confidence),
+            "factors": confidence_factors,
+        }
+
+    def _merge_overlapping_matches(
+        self, matches: list[DuplicateMatch]
+    ) -> list[DuplicateMatch]:
+        """Merge matches that share code blocks."""
+        if len(matches) <= 1:
+            return matches
+
+        # Group matches by overlapping blocks
+        block_to_matches = defaultdict(list)
+        for i, match in enumerate(matches):
+            for block in match.blocks:
+                block_to_matches[id(block)].append(i)
+
+        # Find groups of overlapping matches
+        processed = set()
+        merged_matches = []
+
+        for i, match in enumerate(matches):
+            if i in processed:
+                continue
+
+            # Find all matches that overlap with this one
+            overlapping = {i}
+            to_check = [i]
+
+            while to_check:
+                current = to_check.pop()
+                processed.add(current)
+
+                for block in matches[current].blocks:
+                    for match_idx in block_to_matches[id(block)]:
+                        if match_idx not in overlapping:
+                            overlapping.add(match_idx)
+                            to_check.append(match_idx)
+
+            if len(overlapping) == 1:
+                # No overlaps, keep original match
+                merged_matches.append(match)
+            else:
+                # Merge overlapping matches
+                all_blocks = []
+                similarities = []
+                complexity_scores = []
+
+                for idx in overlapping:
+                    all_blocks.extend(matches[idx].blocks)
+                    similarities.append(matches[idx].similarity_score)
+                    complexity_scores.append(matches[idx].complexity_score)
+
+                # Remove duplicate blocks
+                unique_blocks = []
+                seen_blocks = set()
+                for block in all_blocks:
+                    block_id = (block.file_path, block.start_line, block.end_line)
+                    if block_id not in seen_blocks:
+                        unique_blocks.append(block)
+                        seen_blocks.add(block_id)
+
+                # Create merged match
+                avg_score = sum(similarities) / len(similarities)
+                merged_match = DuplicateMatch(
+                    blocks=unique_blocks,
+                    similarity_score=avg_score,
+                    match_type="merged_cluster",
+                    description=f"Merged cluster with {len(unique_blocks)} blocks (avg similarity: {avg_score:.3f})",
+                    complexity_score=max(complexity_scores)
+                    if complexity_scores
+                    else 0.0,
+                    priority_score=avg_score,
+                )
+                merged_matches.append(merged_match)
+
+        return merged_matches
+
+    def _get_confidence_level(self, confidence: float) -> str:
+        """Get human-readable confidence level."""
+        if confidence >= 0.8:
+            return "High"
+        elif confidence >= 0.6:
+            return "Medium"
+        elif confidence >= 0.4:
+            return "Low"
+        else:
+            return "Very Low"
--- a/src/quality/similarity/init.py
+++ b/src/quality/similarity/init.py
@@ -0,0 +1,63 @@
+"""Similarity algorithms for code analysis."""
+
+from .base import (
+    BaseSimilarityAlgorithm,
+    SimilarityCalculator,
+)
+from .lsh import (
+    BandingLSH,
+    LSHDuplicateDetector,
+    LSHSimilarity,
+)
+from .semantic import (
+    FunctionalSimilarity,
+    HashSimilarity,
+    SemanticSimilarity,
+)
+from .structural import (
+    DependencySimilarity,
+    IdentifierSimilarity,
+    StructuralSimilarity,
+    TreeEditDistance,
+)
+from .text_based import (
+    DifflibSimilarity,
+    LevenshteinSimilarity,
+    LongestCommonSubsequence,
+    NGramSimilarity,
+)
+from .token_based import (
+    CosineSimilarity,
+    JaccardSimilarity,
+    ShingleSimilarity,
+    TFIDFSimilarity,
+)
+
+__all__ = [
+    # Base classes
+    "BaseSimilarityAlgorithm",
+    "SimilarityCalculator",
+    # Text-based algorithms
+    "LevenshteinSimilarity",
+    "DifflibSimilarity",
+    "LongestCommonSubsequence",
+    "NGramSimilarity",
+    # Token-based algorithms
+    "JaccardSimilarity",
+    "CosineSimilarity",
+    "TFIDFSimilarity",
+    "ShingleSimilarity",
+    # Structural algorithms
+    "StructuralSimilarity",
+    "TreeEditDistance",
+    "DependencySimilarity",
+    "IdentifierSimilarity",
+    # Semantic algorithms
+    "SemanticSimilarity",
+    "FunctionalSimilarity",
+    "HashSimilarity",
+    # LSH algorithms
+    "LSHSimilarity",
+    "LSHDuplicateDetector",
+    "BandingLSH",
+]
--- a/src/quality/similarity/base.py
+++ b/src/quality/similarity/base.py
@@ -0,0 +1,130 @@
+"""Base similarity calculation framework."""
+
+from abc import ABC, abstractmethod
+from typing import Any
+
+from ..config.schemas import SimilarityAlgorithmConfig
+from ..core.base import CodeBlock
+
+
+class BaseSimilarityAlgorithm(ABC):
+    """Base class for similarity algorithms."""
+
+    def __init__(self, config: SimilarityAlgorithmConfig | None = None):
+        self.config = config or SimilarityAlgorithmConfig(
+            name=self.__class__.__name__.lower()
+        )
+
+    @abstractmethod
+    def calculate(self, text1: str, text2: str) -> float:
+        """Calculate similarity between two text strings."""
+        ...
+
+    @property
+    def name(self) -> str:
+        """Get algorithm name."""
+        return self.config.name
+
+    @property
+    def weight(self) -> float:
+        """Get algorithm weight."""
+        return self.config.weight
+
+    @property
+    def enabled(self) -> bool:
+        """Check if algorithm is enabled."""
+        return self.config.enabled
+
+
+class SimilarityCalculator:
+    """Main similarity calculator that combines multiple algorithms."""
+
+    def __init__(self, algorithms: list[BaseSimilarityAlgorithm] | None = None):
+        self.algorithms = algorithms or []
+        self._normalize_weights()
+
+    def add_algorithm(self, algorithm: BaseSimilarityAlgorithm) -> None:
+        """Add a similarity algorithm."""
+        self.algorithms.append(algorithm)
+        self._normalize_weights()
+
+    def calculate_similarity(self, block1: CodeBlock, block2: CodeBlock) -> float:
+        """Calculate weighted similarity between two code blocks."""
+        if not self.algorithms:
+            return 0.0
+
+        total_score = 0.0
+        total_weight = 0.0
+
+        for algorithm in self.algorithms:
+            if not algorithm.enabled:
+                continue
+
+            try:
+                score = algorithm.calculate(
+                    block1.normalized_content, block2.normalized_content
+                )
+                total_score += score * algorithm.weight
+                total_weight += algorithm.weight
+            except Exception:
+                # Skip algorithm if it fails
+                continue
+
+        return total_score / total_weight if total_weight > 0 else 0.0
+
+    def calculate_detailed_similarity(
+        self, block1: CodeBlock, block2: CodeBlock
+    ) -> dict[str, float]:
+        """Calculate similarity with breakdown by algorithm."""
+        results = {}
+
+        for algorithm in self.algorithms:
+            if not algorithm.enabled:
+                continue
+
+            try:
+                score = algorithm.calculate(
+                    block1.normalized_content, block2.normalized_content
+                )
+                results[algorithm.name] = score
+            except Exception:
+                results[algorithm.name] = 0.0
+
+        # Calculate weighted average
+        total_score = sum(
+            results[alg.name] * alg.weight
+            for alg in self.algorithms
+            if alg.enabled and alg.name in results
+        )
+        total_weight = sum(alg.weight for alg in self.algorithms if alg.enabled)
+
+        results["weighted_average"] = (
+            total_score / total_weight if total_weight > 0 else 0.0
+        )
+
+        return results
+
+    def _normalize_weights(self) -> None:
+        """Normalize algorithm weights to sum to 1.0."""
+        enabled_algorithms = [alg for alg in self.algorithms if alg.enabled]
+
+        if not enabled_algorithms:
+            return
+
+        total_weight = sum(alg.weight for alg in enabled_algorithms)
+
+        if total_weight > 0:
+            for algorithm in enabled_algorithms:
+                algorithm.config.weight = algorithm.weight / total_weight
+
+    def get_algorithm_info(self) -> list[dict[str, Any]]:
+        """Get information about all algorithms."""
+        return [
+            {
+                "name": alg.name,
+                "weight": alg.weight,
+                "enabled": alg.enabled,
+                "class": alg.__class__.__name__,
+            }
+            for alg in self.algorithms
+        ]
--- a/src/quality/similarity/lsh.py
+++ b/src/quality/similarity/lsh.py
@@ -0,0 +1,326 @@
+"""LSH-based similarity for efficient large-scale duplicate detection."""
+
+import hashlib
+from collections import defaultdict
+from typing import Any
+
+try:
+    from datasketch import MinHash, MinHashLSH
+
+    LSH_AVAILABLE = True
+except ImportError:
+    LSH_AVAILABLE = False
+
+from ..config.schemas import SimilarityAlgorithmConfig
+from ..core.base import CodeBlock
+from .base import BaseSimilarityAlgorithm
+
+
+class LSHSimilarity(BaseSimilarityAlgorithm):
+    """LSH-based similarity for efficient approximate matching."""
+
+    def __init__(self, config: SimilarityAlgorithmConfig | None = None):
+        if config is None:
+            config = SimilarityAlgorithmConfig(
+                name="lsh",
+                weight=0.1,
+                parameters={"threshold": 0.8, "num_perm": 128, "bands": 16, "rows": 8},
+            )
+        super().__init__(config)
+
+        # LSH parameters
+        self.threshold = self.config.parameters.get("threshold", 0.8)
+        self.num_perm = self.config.parameters.get("num_perm", 128)
+        self.bands = self.config.parameters.get("bands", 16)
+        self.rows = self.config.parameters.get("rows", 8)
+
+        # Initialize LSH index
+        self.lsh_index = None
+        self.minhashes = {}
+
+        if LSH_AVAILABLE:
+            self._initialize_lsh()
+
+    def _initialize_lsh(self) -> None:
+        """Initialize LSH index."""
+        if LSH_AVAILABLE:
+            self.lsh_index = MinHashLSH(
+                threshold=self.threshold, num_perm=self.num_perm
+            )
+
+    def calculate(self, text1: str, text2: str) -> float:
+        """Calculate similarity using MinHash."""
+        if not LSH_AVAILABLE:
+            # Fallback to simple text similarity
+            return self._fallback_similarity(text1, text2)
+
+        if not text1 and not text2:
+            return 1.0
+        if not text1 or not text2:
+            return 0.0
+
+        minhash1 = self._create_minhash(text1)
+        minhash2 = self._create_minhash(text2)
+
+        return minhash1.jaccard(minhash2)
+
+    def _create_minhash(self, text: str) -> Any:
+        """Create MinHash for text."""
+        if not LSH_AVAILABLE:
+            return None
+
+        minhash = MinHash(num_perm=self.num_perm)
+
+        # Create shingles from text
+        shingles = self._get_shingles(text)
+
+        for shingle in shingles:
+            minhash.update(shingle.encode("utf-8"))
+
+        return minhash
+
+    def _get_shingles(self, text: str, k: int = 4) -> set[str]:
+        """Generate character k-shingles from text."""
+        # Normalize text
+        normalized = text.lower().replace(" ", "").replace("\n", "").replace("\t", "")
+
+        if len(normalized) < k:
+            return {normalized}
+
+        return {normalized[i : i + k] for i in range(len(normalized) - k + 1)}
+
+    def _fallback_similarity(self, text1: str, text2: str) -> float:
+        """Fallback similarity when LSH is not available."""
+        if not text1 and not text2:
+            return 1.0
+        if not text1 or not text2:
+            return 0.0
+
+        # Simple Jaccard similarity on character 4-grams
+        shingles1 = self._get_shingles(text1)
+        shingles2 = self._get_shingles(text2)
+
+        if not shingles1 and not shingles2:
+            return 1.0
+        if not shingles1 or not shingles2:
+            return 0.0
+
+        intersection = len(shingles1.intersection(shingles2))
+        union = len(shingles1.union(shingles2))
+
+        return intersection / union if union > 0 else 0.0
+
+
+class LSHDuplicateDetector:
+    """High-performance duplicate detection using LSH."""
+
+    def __init__(
+        self,
+        threshold: float = 0.8,
+        num_perm: int = 128,
+        bands: int = 16,
+        rows: int = 8,
+    ):
+        self.threshold = threshold
+        self.num_perm = num_perm
+        self.bands = bands
+        self.rows = rows
+
+        self.lsh_index = None
+        self.minhashes = {}
+        self.code_blocks = {}
+
+        if LSH_AVAILABLE:
+            self.lsh_index = MinHashLSH(threshold=threshold, num_perm=num_perm)
+
+    def add_code_block(self, block: CodeBlock) -> None:
+        """Add a code block to the LSH index."""
+        if not LSH_AVAILABLE:
+            return
+
+        block_id = self._get_block_id(block)
+        minhash = self._create_minhash(block.normalized_content)
+
+        self.minhashes[block_id] = minhash
+        self.code_blocks[block_id] = block
+
+        if self.lsh_index:
+            self.lsh_index.insert(block_id, minhash)
+
+    def find_similar_blocks(self, block: CodeBlock) -> list[tuple[CodeBlock, float]]:
+        """Find similar blocks using LSH."""
+        if not LSH_AVAILABLE or not self.lsh_index:
+            return []
+
+        block_id = self._get_block_id(block)
+        query_minhash = self._create_minhash(block.normalized_content)
+
+        # Get candidate similar blocks
+        candidates = self.lsh_index.query(query_minhash)
+
+        similar_blocks = []
+        for candidate_id in candidates:
+            if candidate_id == block_id:
+                continue
+
+            candidate_block = self.code_blocks.get(candidate_id)
+            if candidate_block:
+                # Calculate exact similarity
+                similarity = query_minhash.jaccard(self.minhashes[candidate_id])
+                if similarity >= self.threshold:
+                    similar_blocks.append((candidate_block, similarity))
+
+        # Sort by similarity descending
+        similar_blocks.sort(key=lambda x: x[1], reverse=True)
+        return similar_blocks
+
+    def find_all_duplicates(self) -> list[list[CodeBlock]]:
+        """Find all duplicate groups using LSH."""
+        if not LSH_AVAILABLE or not self.lsh_index:
+            return []
+
+        duplicate_groups = []
+        processed = set()
+
+        for block_id, block in self.code_blocks.items():
+            if block_id in processed:
+                continue
+
+            similar_blocks = self.find_similar_blocks(block)
+
+            if similar_blocks:
+                # Create group with original block and similar blocks
+                group = [block]
+                group.extend([similar_block for similar_block, _ in similar_blocks])
+
+                # Mark all blocks in group as processed
+                processed.add(block_id)
+                for similar_block, _ in similar_blocks:
+                    similar_id = self._get_block_id(similar_block)
+                    processed.add(similar_id)
+
+                duplicate_groups.append(group)
+
+        return duplicate_groups
+
+    def get_statistics(self) -> dict[str, Any]:
+        """Get LSH index statistics."""
+        if not LSH_AVAILABLE or not self.lsh_index:
+            return {"error": "LSH not available"}
+
+        return {
+            "total_blocks": len(self.code_blocks),
+            "threshold": self.threshold,
+            "num_perm": self.num_perm,
+            "lsh_available": LSH_AVAILABLE,
+            "index_keys": len(self.lsh_index.keys)
+            if hasattr(self.lsh_index, "keys")
+            else 0,
+        }
+
+    def _create_minhash(self, text: str) -> Any:
+        """Create MinHash for text."""
+        if not LSH_AVAILABLE:
+            return None
+
+        minhash = MinHash(num_perm=self.num_perm)
+
+        # Create token-based shingles
+        shingles = self._get_token_shingles(text)
+
+        for shingle in shingles:
+            minhash.update(shingle.encode("utf-8"))
+
+        return minhash
+
+    def _get_token_shingles(self, text: str, k: int = 3) -> set[str]:
+        """Generate token k-shingles from text."""
+        import re
+
+        # Tokenize text
+        tokens = re.findall(r"\w+", text.lower())
+
+        if len(tokens) < k:
+            return {" ".join(tokens)}
+
+        return {" ".join(tokens[i : i + k]) for i in range(len(tokens) - k + 1)}
+
+    def _get_block_id(self, block: CodeBlock) -> str:
+        """Generate unique ID for code block."""
+        content = f"{block.file_path}:{block.start_line}:{block.end_line}"
+        return hashlib.md5(content.encode()).hexdigest()
+
+
+class BandingLSH:
+    """Custom LSH implementation with banding technique."""
+
+    def __init__(self, bands: int = 20, rows: int = 5, threshold: float = 0.8):
+        self.bands = bands
+        self.rows = rows
+        self.threshold = threshold
+        self.hash_tables: list[defaultdict[int, set[str]]] = [
+            defaultdict(set) for _ in range(bands)
+        ]
+        self.signatures: dict[str, list[int]] = {}
+
+    def add_signature(self, item_id: str, signature: list[int]) -> None:
+        """Add signature to LSH buckets."""
+        if len(signature) != self.bands * self.rows:
+            raise ValueError(
+                f"Signature length {len(signature)} != {self.bands * self.rows}"
+            )
+
+        self.signatures[item_id] = signature
+
+        # Hash each band
+        for band_idx in range(self.bands):
+            start = band_idx * self.rows
+            end = start + self.rows
+            band_signature = tuple(signature[start:end])
+
+            # Hash the band
+            band_hash = hash(band_signature)
+            self.hash_tables[band_idx][band_hash].add(item_id)
+
+    def find_candidates(self, query_id: str) -> set[str]:
+        """Find candidate similar items."""
+        if query_id not in self.signatures:
+            return set()
+
+        candidates = set()
+        query_signature = self.signatures[query_id]
+
+        # Check each band
+        for band_idx in range(self.bands):
+            start = band_idx * self.rows
+            end = start + self.rows
+            band_signature = tuple(query_signature[start:end])
+
+            band_hash = hash(band_signature)
+            candidates.update(self.hash_tables[band_idx][band_hash])
+
+        # Remove query item itself
+        candidates.discard(query_id)
+        return candidates
+
+    def estimate_jaccard(self, sig1: list[int], sig2: list[int]) -> float:
+        """Estimate Jaccard similarity from signatures."""
+        if len(sig1) != len(sig2):
+            return 0.0
+
+        matches = sum(1 for a, b in zip(sig1, sig2, strict=False) if a == b)
+        return matches / len(sig1)
+
+    def get_statistics(self) -> dict[str, Any]:
+        """Get LSH statistics."""
+        total_buckets = sum(len(table) for table in self.hash_tables)
+        avg_bucket_size = total_buckets / self.bands if self.bands > 0 else 0
+
+        return {
+            "bands": self.bands,
+            "rows": self.rows,
+            "total_items": len(self.signatures),
+            "total_buckets": total_buckets,
+            "avg_bucket_size": avg_bucket_size,
+            "threshold": self.threshold,
+        }
--- a/src/quality/similarity/semantic.py
+++ b/src/quality/similarity/semantic.py
@@ -0,0 +1,398 @@
+"""Semantic similarity algorithms for code analysis."""
+
+import ast
+import hashlib
+import re
+from collections import Counter
+
+from ..config.schemas import SimilarityAlgorithmConfig
+from .base import BaseSimilarityAlgorithm
+
+
+class SemanticSimilarity(BaseSimilarityAlgorithm):
+    """Semantic similarity algorithm based on normalized code patterns."""
+
+    def __init__(self, config: SimilarityAlgorithmConfig | None = None):
+        if config is None:
+            config = SimilarityAlgorithmConfig(name="semantic", weight=0.2)
+        super().__init__(config)
+
+    def calculate(self, text1: str, text2: str) -> float:
+        """Calculate similarity based on semantic patterns."""
+        if not text1 and not text2:
+            return 1.0
+        if not text1 or not text2:
+            return 0.0
+
+        # Normalize both texts for semantic comparison
+        normalized1 = self._normalize_code(text1)
+        normalized2 = self._normalize_code(text2)
+
+        # Calculate multiple semantic similarities
+        pattern_sim = self._pattern_similarity(normalized1, normalized2)
+        concept_sim = self._concept_similarity(text1, text2)
+        structure_sim = self._semantic_structure_similarity(text1, text2)
+
+        # Weighted combination
+        return pattern_sim * 0.4 + concept_sim * 0.4 + structure_sim * 0.2
+
+    def _normalize_code(self, code: str) -> str:
+        """Normalize code for semantic comparison."""
+        # Remove comments
+        code = re.sub(r"#.*$", "", code, flags=re.MULTILINE)
+        code = re.sub(r'""".*?"""', "", code, flags=re.DOTALL)
+        code = re.sub(r"'''.*?'''", "", code, flags=re.DOTALL)
+
+        # Normalize whitespace
+        code = re.sub(r"\s+", " ", code).strip()
+
+        # Normalize variable names to generic patterns
+        code = re.sub(r"\b[a-z_][a-z0-9_]*\b", "VAR", code)
+
+        # Normalize string literals
+        code = re.sub(r'"[^"]*"', "STR", code)
+        code = re.sub(r"'[^']*'", "STR", code)
+
+        # Normalize numbers
+        code = re.sub(r"\b\d+\.?\d*\b", "NUM", code)
+
+        return code
+
+    def _pattern_similarity(self, normalized1: str, normalized2: str) -> float:
+        """Compare normalized code patterns."""
+        if not normalized1 and not normalized2:
+            return 1.0
+        if not normalized1 or not normalized2:
+            return 0.0
+
+        import difflib
+
+        return difflib.SequenceMatcher(None, normalized1, normalized2).ratio()
+
+    def _concept_similarity(self, code1: str, code2: str) -> float:
+        """Compare conceptual similarity using keywords and operations."""
+        concepts1 = self._extract_concepts(code1)
+        concepts2 = self._extract_concepts(code2)
+
+        if not concepts1 and not concepts2:
+            return 1.0
+        if not concepts1 or not concepts2:
+            return 0.0
+
+        # Calculate cosine similarity on concept frequencies
+        all_concepts = set(concepts1.keys()) | set(concepts2.keys())
+
+        dot_product = sum(
+            concepts1.get(concept, 0) * concepts2.get(concept, 0)
+            for concept in all_concepts
+        )
+        magnitude1 = (
+            sum(concepts1.get(concept, 0) ** 2 for concept in all_concepts) ** 0.5
+        )
+        magnitude2 = (
+            sum(concepts2.get(concept, 0) ** 2 for concept in all_concepts) ** 0.5
+        )
+
+        if magnitude1 == 0 or magnitude2 == 0:
+            return 0.0
+
+        return dot_product / (magnitude1 * magnitude2)
+
+    def _extract_concepts(self, code: str) -> Counter[str]:
+        """Extract conceptual elements from code."""
+        concepts = Counter()
+
+        # Python keywords and operations
+        python_concepts = {
+            "def",
+            "class",
+            "if",
+            "else",
+            "elif",
+            "for",
+            "while",
+            "try",
+            "except",
+            "finally",
+            "with",
+            "return",
+            "yield",
+            "import",
+            "from",
+            "as",
+            "and",
+            "or",
+            "not",
+            "in",
+            "is",
+            "lambda",
+            "pass",
+            "break",
+            "continue",
+        }
+
+        # Extract words
+        words = re.findall(r"\b\w+\b", code.lower())
+
+        for word in words:
+            if word in python_concepts:
+                concepts[f"keyword:{word}"] += 1
+            elif word in ["len", "str", "int", "float", "list", "dict", "set", "tuple"]:
+                concepts[f"builtin:{word}"] += 1
+            elif word.endswith("error") or word.endswith("exception"):
+                concepts["error_handling"] += 1
+            elif word in ["print", "log", "debug", "info", "warn", "error"]:
+                concepts["logging"] += 1
+            elif word in ["open", "read", "write", "close", "file"]:
+                concepts["file_io"] += 1
+            elif word in ["get", "post", "put", "delete", "request", "response"]:
+                concepts["http"] += 1
+            elif word in ["query", "select", "insert", "update", "delete", "database"]:
+                concepts["database"] += 1
+
+        # Extract operators and patterns
+        operators = re.findall(r"[+\-*/=<>!&|^~%]", code)
+        for op in operators:
+            concepts[f"operator:{op}"] += 1
+
+        return concepts
+
+    def _semantic_structure_similarity(self, code1: str, code2: str) -> float:
+        """Compare semantic structure patterns."""
+        try:
+            import ast
+
+            tree1 = ast.parse(code1)
+            tree2 = ast.parse(code2)
+
+            patterns1 = self._extract_semantic_patterns(tree1)
+            patterns2 = self._extract_semantic_patterns(tree2)
+
+            return self._compare_pattern_sets(patterns1, patterns2)
+
+        except SyntaxError:
+            return 0.0
+
+    def _extract_semantic_patterns(self, tree: ast.AST) -> set[str]:
+        """Extract semantic patterns from AST."""
+        patterns = set()
+
+        for node in ast.walk(tree):
+            if isinstance(node, ast.FunctionDef):
+                # Function signature patterns
+                arg_count = len(node.args.args)
+                patterns.add(f"function_args:{arg_count}")
+
+                # Check for common patterns
+                if any(isinstance(n, ast.Return) for n in ast.walk(node)):
+                    patterns.add("function_returns")
+                if any(isinstance(n, ast.Yield) for n in ast.walk(node)):
+                    patterns.add("generator_function")
+
+            elif isinstance(node, ast.ClassDef):
+                # Class patterns
+                base_count = len(node.bases)
+                patterns.add(f"class_inheritance:{base_count}")
+
+            elif isinstance(node, ast.Try):
+                # Exception handling patterns
+                patterns.add("exception_handling")
+                if node.finalbody:
+                    patterns.add("finally_block")
+
+            elif isinstance(node, ast.With):
+                # Context manager pattern
+                patterns.add("context_manager")
+
+            elif isinstance(node, ast.ListComp):
+                patterns.add("list_comprehension")
+            elif isinstance(node, ast.DictComp):
+                patterns.add("dict_comprehension")
+            elif isinstance(node, ast.SetComp):
+                patterns.add("set_comprehension")
+
+            elif isinstance(node, ast.Lambda):
+                patterns.add("lambda_function")
+
+            elif isinstance(
+                node, (ast.FunctionDef, ast.ClassDef, ast.AsyncFunctionDef)
+            ):
+                if node.decorator_list:
+                    patterns.add("decorator_usage")
+
+        return patterns
+
+    def _compare_pattern_sets(self, patterns1: set[str], patterns2: set[str]) -> float:
+        """Compare two sets of semantic patterns."""
+        if not patterns1 and not patterns2:
+            return 1.0
+        if not patterns1 or not patterns2:
+            return 0.0
+
+        intersection = len(patterns1.intersection(patterns2))
+        union = len(patterns1.union(patterns2))
+
+        return intersection / union if union > 0 else 0.0
+
+
+class FunctionalSimilarity(BaseSimilarityAlgorithm):
+    """Similarity based on functional behavior patterns."""
+
+    def __init__(self, config: SimilarityAlgorithmConfig | None = None):
+        if config is None:
+            config = SimilarityAlgorithmConfig(name="functional", weight=0.15)
+        super().__init__(config)
+
+    def calculate(self, text1: str, text2: str) -> float:
+        """Calculate similarity based on functional patterns."""
+        if not text1 and not text2:
+            return 1.0
+        if not text1 or not text2:
+            return 0.0
+
+        try:
+            import ast
+
+            tree1 = ast.parse(text1)
+            tree2 = ast.parse(text2)
+
+            behavior1 = self._extract_behavioral_patterns(tree1)
+            behavior2 = self._extract_behavioral_patterns(tree2)
+
+            return self._compare_behaviors(behavior1, behavior2)
+
+        except SyntaxError:
+            return 0.0
+
+    def _extract_behavioral_patterns(self, tree: ast.AST) -> dict[str, int]:
+        """Extract behavioral patterns from AST."""
+
+        patterns = {
+            "data_access": 0,  # Reading/accessing data
+            "data_mutation": 0,  # Modifying data
+            "control_flow": 0,  # Conditional logic
+            "iteration": 0,  # Loops and iteration
+            "function_calls": 0,  # Function invocations
+            "exception_handling": 0,  # Error handling
+            "io_operations": 0,  # Input/output
+            "mathematical": 0,  # Math operations
+        }
+
+        for node in ast.walk(tree):
+            if isinstance(node, (ast.Subscript, ast.Attribute)):
+                patterns["data_access"] += 1
+            elif isinstance(node, (ast.Assign, ast.AugAssign)):
+                patterns["data_mutation"] += 1
+            elif isinstance(node, ast.If):
+                patterns["control_flow"] += 1
+            elif isinstance(node, (ast.For, ast.While)):
+                patterns["iteration"] += 1
+            elif isinstance(node, ast.Call):
+                patterns["function_calls"] += 1
+                # Check for specific types of calls
+                if isinstance(node.func, ast.Name):
+                    func_name = node.func.id.lower()
+                    if func_name in ["print", "input", "open", "read", "write"]:
+                        patterns["io_operations"] += 1
+            elif isinstance(node, ast.Try):
+                patterns["exception_handling"] += 1
+            elif isinstance(node, ast.BinOp) and isinstance(
+                node.op, (ast.Add, ast.Sub, ast.Mult, ast.Div, ast.Mod, ast.Pow)
+            ):
+                patterns["mathematical"] += 1
+
+        return patterns
+
+    def _compare_behaviors(
+        self, behavior1: dict[str, int], behavior2: dict[str, int]
+    ) -> float:
+        """Compare behavioral patterns."""
+        if not any(behavior1.values()) and not any(behavior2.values()):
+            return 1.0
+        if not any(behavior1.values()) or not any(behavior2.values()):
+            return 0.0
+
+        # Calculate cosine similarity on behavior patterns
+        all_patterns = set(behavior1.keys()) | set(behavior2.keys())
+
+        dot_product = sum(
+            behavior1.get(pattern, 0) * behavior2.get(pattern, 0)
+            for pattern in all_patterns
+        )
+        magnitude1 = (
+            sum(behavior1.get(pattern, 0) ** 2 for pattern in all_patterns) ** 0.5
+        )
+        magnitude2 = (
+            sum(behavior2.get(pattern, 0) ** 2 for pattern in all_patterns) ** 0.5
+        )
+
+        if magnitude1 == 0 or magnitude2 == 0:
+            return 0.0
+
+        return dot_product / (magnitude1 * magnitude2)
+
+
+class HashSimilarity(BaseSimilarityAlgorithm):
+    """Similarity based on code content hashing."""
+
+    def __init__(self, config: SimilarityAlgorithmConfig | None = None):
+        if config is None:
+            config = SimilarityAlgorithmConfig(name="hash", weight=0.1)
+        super().__init__(config)
+
+    def calculate(self, text1: str, text2: str) -> float:
+        """Calculate similarity using various hash comparisons."""
+        if not text1 and not text2:
+            return 1.0
+        if not text1 or not text2:
+            return 0.0
+
+        # Multiple hash-based comparisons
+        exact_match = self._exact_hash_similarity(text1, text2)
+        if exact_match == 1.0:
+            return 1.0
+
+        normalized_match = self._normalized_hash_similarity(text1, text2)
+        fuzzy_match = self._fuzzy_hash_similarity(text1, text2)
+
+        return max(exact_match, normalized_match, fuzzy_match)
+
+    def _exact_hash_similarity(self, text1: str, text2: str) -> float:
+        """Check for exact content match."""
+        hash1 = hashlib.md5(text1.encode()).hexdigest()
+        hash2 = hashlib.md5(text2.encode()).hexdigest()
+        return 1.0 if hash1 == hash2 else 0.0
+
+    def _normalized_hash_similarity(self, text1: str, text2: str) -> float:
+        """Check for normalized content match."""
+        # Normalize whitespace and comments
+        normalized1 = re.sub(
+            r"\s+", " ", re.sub(r"#.*$", "", text1, flags=re.MULTILINE)
+        ).strip()
+        normalized2 = re.sub(
+            r"\s+", " ", re.sub(r"#.*$", "", text2, flags=re.MULTILINE)
+        ).strip()
+
+        hash1 = hashlib.md5(normalized1.encode()).hexdigest()
+        hash2 = hashlib.md5(normalized2.encode()).hexdigest()
+
+        return 1.0 if hash1 == hash2 else 0.0
+
+    def _fuzzy_hash_similarity(self, text1: str, text2: str) -> float:
+        """Calculate fuzzy hash similarity using character n-grams."""
+        # Create character 4-grams for fuzzy matching
+        ngrams1 = set(text1[i : i + 4] for i in range(len(text1) - 3))
+        ngrams2 = set(text2[i : i + 4] for i in range(len(text2) - 3))
+
+        if not ngrams1 and not ngrams2:
+            return 1.0
+        if not ngrams1 or not ngrams2:
+            return 0.0
+
+        intersection = len(ngrams1.intersection(ngrams2))
+        union = len(ngrams1.union(ngrams2))
+
+        jaccard = intersection / union if union > 0 else 0.0
+
+        # Return 1.0 only for very high similarity
+        return 1.0 if jaccard > 0.95 else 0.0
--- a/src/quality/similarity/structural.py
+++ b/src/quality/similarity/structural.py
@@ -0,0 +1,399 @@
+"""Structural similarity algorithms for code analysis."""
+
+import ast
+from collections import Counter
+
+from ..config.schemas import SimilarityAlgorithmConfig
+from .base import BaseSimilarityAlgorithm
+
+
+class StructuralSimilarity(BaseSimilarityAlgorithm):
+    """AST-based structural similarity algorithm."""
+
+    def __init__(self, config: SimilarityAlgorithmConfig | None = None):
+        if config is None:
+            config = SimilarityAlgorithmConfig(name="structural", weight=0.25)
+        super().__init__(config)
+
+    def calculate(self, text1: str, text2: str) -> float:
+        """Calculate similarity based on AST structure."""
+        if not text1 and not text2:
+            return 1.0
+        if not text1 or not text2:
+            return 0.0
+
+        try:
+            tree1 = ast.parse(text1)
+            tree2 = ast.parse(text2)
+        except SyntaxError:
+            # Fallback to text-based comparison for malformed code
+            return self._fallback_similarity(text1, text2)
+
+        structure1 = self._extract_structure(tree1)
+        structure2 = self._extract_structure(tree2)
+
+        return self._compare_structures(structure1, structure2)
+
+    def _extract_structure(self, tree: ast.AST) -> list[str]:
+        """Extract enhanced structural patterns from AST."""
+        structure = []
+        
+        # Track nesting depth for better structural comparison
+        def visit_node(node: ast.AST, depth: int = 0) -> None:
+            depth_prefix = f"d{depth}:" if depth > 0 else ""
+            
+            if isinstance(node, ast.FunctionDef):
+                # Abstract function names but keep structural information
+                arg_count = len(node.args.args)
+                has_decorators = len(node.decorator_list) > 0
+                structure.append(f"{depth_prefix}function:args{arg_count}:dec{has_decorators}")
+                
+                # Analyze function body patterns
+                body_patterns = []
+                for child in node.body:
+                    if isinstance(child, ast.If):
+                        body_patterns.append("if")
+                    elif isinstance(child, ast.For):
+                        body_patterns.append("for")
+                    elif isinstance(child, ast.While):
+                        body_patterns.append("while")
+                    elif isinstance(child, ast.Try):
+                        body_patterns.append("try")
+                    elif isinstance(child, ast.Return):
+                        body_patterns.append("return")
+                
+                if body_patterns:
+                    structure.append(f"{depth_prefix}body_pattern:{'_'.join(body_patterns[:5])}")
+                
+                # Visit children with increased depth
+                for child in ast.iter_child_nodes(node):
+                    visit_node(child, depth + 1)
+                    
+            elif isinstance(node, ast.AsyncFunctionDef):
+                arg_count = len(node.args.args)
+                has_decorators = len(node.decorator_list) > 0
+                structure.append(f"{depth_prefix}async_function:args{arg_count}:dec{has_decorators}")
+                
+                for child in ast.iter_child_nodes(node):
+                    visit_node(child, depth + 1)
+                    
+            elif isinstance(node, ast.ClassDef):
+                # Abstract class names but keep inheritance and structure info
+                base_count = len(node.bases)
+                has_decorators = len(node.decorator_list) > 0
+                structure.append(f"{depth_prefix}class:bases{base_count}:dec{has_decorators}")
+                
+                # Count methods in class
+                method_count = sum(1 for child in node.body if isinstance(child, (ast.FunctionDef, ast.AsyncFunctionDef)))
+                structure.append(f"{depth_prefix}class_methods:{method_count}")
+                
+                for child in ast.iter_child_nodes(node):
+                    visit_node(child, depth + 1)
+                    
+            elif isinstance(node, ast.If):
+                # Track conditional structure complexity
+                elif_count = len([n for n in node.orelse if isinstance(n, ast.If)])
+                has_else = any(not isinstance(n, ast.If) for n in node.orelse)
+                structure.append(f"{depth_prefix}if:elif{elif_count}:else{has_else}")
+                
+                for child in ast.iter_child_nodes(node):
+                    visit_node(child, depth + 1)
+                    
+            elif isinstance(node, ast.For):
+                # Detect nested loops
+                is_nested = any(isinstance(child, (ast.For, ast.While)) for child in ast.walk(node))
+                structure.append(f"{depth_prefix}for:nested{is_nested}")
+                
+                for child in ast.iter_child_nodes(node):
+                    visit_node(child, depth + 1)
+                    
+            elif isinstance(node, ast.While):
+                is_nested = any(isinstance(child, (ast.For, ast.While)) for child in ast.walk(node))
+                structure.append(f"{depth_prefix}while:nested{is_nested}")
+                
+                for child in ast.iter_child_nodes(node):
+                    visit_node(child, depth + 1)
+                    
+            elif isinstance(node, ast.Try):
+                except_count = len(node.handlers)
+                has_finally = bool(node.finalbody)
+                has_else = bool(node.orelse)
+                structure.append(f"{depth_prefix}try:except{except_count}:finally{has_finally}:else{has_else}")
+                
+                for child in ast.iter_child_nodes(node):
+                    visit_node(child, depth + 1)
+                    
+            elif isinstance(node, ast.With):
+                item_count = len(node.items)
+                structure.append(f"{depth_prefix}with:items{item_count}")
+                
+                for child in ast.iter_child_nodes(node):
+                    visit_node(child, depth + 1)
+                    
+            elif isinstance(node, ast.Return):
+                has_value = node.value is not None
+                structure.append(f"{depth_prefix}return:value{has_value}")
+                
+            elif isinstance(node, ast.Assign):
+                target_count = len(node.targets)
+                structure.append(f"{depth_prefix}assign:targets{target_count}")
+                
+            elif isinstance(node, ast.Call):
+                # Abstract function calls but keep argument structure
+                arg_count = len(node.args)
+                kwarg_count = len(node.keywords)
+                structure.append(f"{depth_prefix}call:args{arg_count}:kwargs{kwarg_count}")
+                
+            else:
+                # Visit other node types without adding to structure
+                for child in ast.iter_child_nodes(node):
+                    visit_node(child, depth)
+
+        visit_node(tree)
+        return structure
+
+    def _compare_structures(
+        self, structure1: list[str], structure2: list[str]
+    ) -> float:
+        """Compare two structural patterns."""
+        if not structure1 and not structure2:
+            return 1.0
+        if not structure1 or not structure2:
+            return 0.0
+
+        # Convert to sets for Jaccard similarity on structure
+        set1 = set(structure1)
+        set2 = set(structure2)
+
+        intersection = len(set1.intersection(set2))
+        union = len(set1.union(set2))
+
+        jaccard = intersection / union if union > 0 else 0.0
+
+        # Also consider sequence similarity
+        sequence_sim = self._sequence_similarity(structure1, structure2)
+
+        # Combine Jaccard and sequence similarity
+        return (jaccard + sequence_sim) / 2
+
+    def _sequence_similarity(self, seq1: list[str], seq2: list[str]) -> float:
+        """Calculate similarity preserving sequence order."""
+        if not seq1 and not seq2:
+            return 1.0
+        if not seq1 or not seq2:
+            return 0.0
+
+        # Use dynamic programming for longest common subsequence
+        lcs_length = self._lcs_length(seq1, seq2)
+        max_length = max(len(seq1), len(seq2))
+
+        return lcs_length / max_length if max_length > 0 else 0.0
+
+    def _lcs_length(self, seq1: list[str], seq2: list[str]) -> int:
+        """Calculate length of longest common subsequence."""
+        m, n = len(seq1), len(seq2)
+
+        # Create DP table
+        dp = [[0] * (n + 1) for _ in range(m + 1)]
+
+        # Fill the DP table
+        for i in range(1, m + 1):
+            for j in range(1, n + 1):
+                if seq1[i - 1] == seq2[j - 1]:
+                    dp[i][j] = dp[i - 1][j - 1] + 1
+                else:
+                    dp[i][j] = max(dp[i - 1][j], dp[i][j - 1])
+
+        return dp[m][n]
+
+    def _fallback_similarity(self, text1: str, text2: str) -> float:
+        """Fallback to simple text similarity for malformed code."""
+        import difflib
+
+        return difflib.SequenceMatcher(None, text1, text2).ratio()
+
+
+class TreeEditDistance(BaseSimilarityAlgorithm):
+    """Tree edit distance-based similarity algorithm."""
+
+    def __init__(self, config: SimilarityAlgorithmConfig | None = None):
+        if config is None:
+            config = SimilarityAlgorithmConfig(name="tree_edit", weight=0.2)
+        super().__init__(config)
+
+    def calculate(self, text1: str, text2: str) -> float:
+        """Calculate similarity using simplified tree edit distance."""
+        if not text1 and not text2:
+            return 1.0
+        if not text1 or not text2:
+            return 0.0
+
+        try:
+            tree1 = ast.parse(text1)
+            tree2 = ast.parse(text2)
+        except SyntaxError:
+            # Fallback to text-based comparison
+            import difflib
+
+            return difflib.SequenceMatcher(None, text1, text2).ratio()
+
+        # Simplified tree representation
+        nodes1 = self._get_node_types(tree1)
+        nodes2 = self._get_node_types(tree2)
+
+        # Calculate edit distance
+        edit_distance = self._edit_distance(nodes1, nodes2)
+        max_length = max(len(nodes1), len(nodes2))
+
+        # Convert to similarity score
+        return 1 - (edit_distance / max_length) if max_length > 0 else 1.0
+
+    def _get_node_types(self, tree: ast.AST) -> list[str]:
+        """Extract node types from AST."""
+        return [type(node).__name__ for node in ast.walk(tree)]
+
+    def _edit_distance(self, seq1: list[str], seq2: list[str]) -> int:
+        """Calculate edit distance between two sequences."""
+        m, n = len(seq1), len(seq2)
+
+        # Create DP table
+        dp = [[0] * (n + 1) for _ in range(m + 1)]
+
+        # Initialize base cases
+        for i in range(m + 1):
+            dp[i][0] = i
+        for j in range(n + 1):
+            dp[0][j] = j
+
+        # Fill the DP table
+        for i in range(1, m + 1):
+            for j in range(1, n + 1):
+                if seq1[i - 1] == seq2[j - 1]:
+                    dp[i][j] = dp[i - 1][j - 1]
+                else:
+                    dp[i][j] = 1 + min(
+                        dp[i - 1][j],  # deletion
+                        dp[i][j - 1],  # insertion
+                        dp[i - 1][j - 1],  # substitution
+                    )
+
+        return dp[m][n]
+
+
+class DependencySimilarity(BaseSimilarityAlgorithm):
+    """Import and dependency-based similarity algorithm."""
+
+    def __init__(self, config: SimilarityAlgorithmConfig | None = None):
+        if config is None:
+            config = SimilarityAlgorithmConfig(name="dependency", weight=0.15)
+        super().__init__(config)
+
+    def calculate(self, text1: str, text2: str) -> float:
+        """Calculate similarity based on imports and dependencies."""
+        if not text1 and not text2:
+            return 1.0
+        if not text1 or not text2:
+            return 0.0
+
+        try:
+            deps1 = self._extract_dependencies(text1)
+            deps2 = self._extract_dependencies(text2)
+        except SyntaxError:
+            return 0.0
+
+        return self._compare_dependencies(deps1, deps2)
+
+    def _extract_dependencies(self, code: str) -> set[str]:
+        """Extract import dependencies from code."""
+        try:
+            tree = ast.parse(code)
+        except SyntaxError:
+            return set()
+
+        dependencies = set()
+
+        for node in ast.walk(tree):
+            if isinstance(node, ast.Import):
+                for alias in node.names:
+                    dependencies.add(alias.name.split(".")[0])
+            elif isinstance(node, ast.ImportFrom):
+                if node.module:
+                    dependencies.add(node.module.split(".")[0])
+                for alias in node.names:
+                    dependencies.add(alias.name)
+
+        return dependencies
+
+    def _compare_dependencies(self, deps1: set[str], deps2: set[str]) -> float:
+        """Compare two sets of dependencies."""
+        if not deps1 and not deps2:
+            return 1.0
+        if not deps1 or not deps2:
+            return 0.0
+
+        intersection = len(deps1.intersection(deps2))
+        union = len(deps1.union(deps2))
+
+        return intersection / union if union > 0 else 0.0
+
+
+class IdentifierSimilarity(BaseSimilarityAlgorithm):
+    """Variable and function name-based similarity algorithm."""
+
+    def __init__(self, config: SimilarityAlgorithmConfig | None = None):
+        if config is None:
+            config = SimilarityAlgorithmConfig(name="identifier", weight=0.2)
+        super().__init__(config)
+
+    def calculate(self, text1: str, text2: str) -> float:
+        """Calculate similarity based on identifier names."""
+        if not text1 and not text2:
+            return 1.0
+        if not text1 or not text2:
+            return 0.0
+
+        try:
+            identifiers1 = self._extract_identifiers(text1)
+            identifiers2 = self._extract_identifiers(text2)
+        except SyntaxError:
+            return 0.0
+
+        return self._compare_identifiers(identifiers1, identifiers2)
+
+    def _extract_identifiers(self, code: str) -> Counter[str]:
+        """Extract all identifiers from code."""
+        try:
+            tree = ast.parse(code)
+        except SyntaxError:
+            return Counter()
+
+        identifiers = []
+
+        for node in ast.walk(tree):
+            if isinstance(node, ast.Name):
+                identifiers.append(node.id)
+            elif isinstance(node, ast.FunctionDef | ast.ClassDef):
+                identifiers.append(node.name)
+            elif isinstance(node, ast.Attribute):
+                identifiers.append(node.attr)
+
+        return Counter(identifiers)
+
+    def _compare_identifiers(self, ids1: Counter[str], ids2: Counter[str]) -> float:
+        """Compare two sets of identifiers."""
+        if not ids1 and not ids2:
+            return 1.0
+        if not ids1 or not ids2:
+            return 0.0
+
+        # Calculate cosine similarity on identifier frequencies
+        all_ids = set(ids1.keys()) | set(ids2.keys())
+
+        dot_product = sum(ids1[id_] * ids2[id_] for id_ in all_ids)
+        magnitude1 = sum(ids1[id_] ** 2 for id_ in all_ids) ** 0.5
+        magnitude2 = sum(ids2[id_] ** 2 for id_ in all_ids) ** 0.5
+
+        if magnitude1 == 0 or magnitude2 == 0:
+            return 0.0
+
+        return dot_product / (magnitude1 * magnitude2)
--- a/src/quality/similarity/text_based.py
+++ b/src/quality/similarity/text_based.py
@@ -0,0 +1,131 @@
+"""Text-based similarity algorithms."""
+
+import difflib
+
+try:
+    from Levenshtein import ratio as levenshtein_ratio
+
+    LEVENSHTEIN_AVAILABLE = True
+except ImportError:
+    LEVENSHTEIN_AVAILABLE = False
+
+from ..config.schemas import SimilarityAlgorithmConfig
+from .base import BaseSimilarityAlgorithm
+
+
+class LevenshteinSimilarity(BaseSimilarityAlgorithm):
+    """Levenshtein distance-based similarity algorithm."""
+
+    def __init__(self, config: SimilarityAlgorithmConfig | None = None):
+        if config is None:
+            config = SimilarityAlgorithmConfig(name="levenshtein", weight=0.2)
+        super().__init__(config)
+
+    def calculate(self, text1: str, text2: str) -> float:
+        """Calculate similarity using Levenshtein distance."""
+        if not text1 and not text2:
+            return 1.0
+        if not text1 or not text2:
+            return 0.0
+
+        if LEVENSHTEIN_AVAILABLE:
+            return levenshtein_ratio(text1, text2)
+        else:
+            # Fallback to difflib implementation
+            return difflib.SequenceMatcher(None, text1, text2).ratio()
+
+
+class DifflibSimilarity(BaseSimilarityAlgorithm):
+    """Python difflib-based similarity algorithm."""
+
+    def __init__(self, config: SimilarityAlgorithmConfig | None = None):
+        if config is None:
+            config = SimilarityAlgorithmConfig(name="difflib", weight=0.25)
+        super().__init__(config)
+
+    def calculate(self, text1: str, text2: str) -> float:
+        """Calculate similarity using difflib SequenceMatcher."""
+        if not text1 and not text2:
+            return 1.0
+        if not text1 or not text2:
+            return 0.0
+
+        return difflib.SequenceMatcher(None, text1, text2).ratio()
+
+
+class LongestCommonSubsequence(BaseSimilarityAlgorithm):
+    """Longest Common Subsequence-based similarity."""
+
+    def __init__(self, config: SimilarityAlgorithmConfig | None = None):
+        if config is None:
+            config = SimilarityAlgorithmConfig(name="lcs", weight=0.15)
+        super().__init__(config)
+
+    def calculate(self, text1: str, text2: str) -> float:
+        """Calculate similarity using LCS."""
+        if not text1 and not text2:
+            return 1.0
+        if not text1 or not text2:
+            return 0.0
+
+        lcs_length = self._lcs_length(text1, text2)
+        max_length = max(len(text1), len(text2))
+
+        return lcs_length / max_length if max_length > 0 else 0.0
+
+    def _lcs_length(self, text1: str, text2: str) -> int:
+        """Calculate length of longest common subsequence."""
+        m, n = len(text1), len(text2)
+
+        # Create DP table
+        dp = [[0] * (n + 1) for _ in range(m + 1)]
+
+        # Fill the DP table
+        for i in range(1, m + 1):
+            for j in range(1, n + 1):
+                if text1[i - 1] == text2[j - 1]:
+                    dp[i][j] = dp[i - 1][j - 1] + 1
+                else:
+                    dp[i][j] = max(dp[i - 1][j], dp[i][j - 1])
+
+        return dp[m][n]
+
+
+class NGramSimilarity(BaseSimilarityAlgorithm):
+    """N-gram based similarity algorithm."""
+
+    def __init__(self, config: SimilarityAlgorithmConfig | None = None):
+        if config is None:
+            config = SimilarityAlgorithmConfig(
+                name="ngram", weight=0.2, parameters={"n": 3}
+            )
+        super().__init__(config)
+        n_param = self.config.parameters.get("n", 3)
+        self.n: int = int(n_param) if isinstance(n_param, (int, float, str)) else 3
+
+    def calculate(self, text1: str, text2: str) -> float:
+        """Calculate similarity using n-grams."""
+        if not text1 and not text2:
+            return 1.0
+        if not text1 or not text2:
+            return 0.0
+
+        ngrams1 = set(self._get_ngrams(text1))
+        ngrams2 = set(self._get_ngrams(text2))
+
+        if not ngrams1 and not ngrams2:
+            return 1.0
+        if not ngrams1 or not ngrams2:
+            return 0.0
+
+        intersection = len(ngrams1.intersection(ngrams2))
+        union = len(ngrams1.union(ngrams2))
+
+        return intersection / union if union > 0 else 0.0
+
+    def _get_ngrams(self, text: str) -> list[str]:
+        """Generate n-grams from text."""
+        if len(text) < self.n:
+            return [text]
+
+        return [text[i : i + self.n] for i in range(len(text) - self.n + 1)]
--- a/src/quality/similarity/token_based.py
+++ b/src/quality/similarity/token_based.py
@@ -0,0 +1,271 @@
+"""Token-based similarity algorithms."""
+
+import math
+from collections import Counter
+
+from ..config.schemas import SimilarityAlgorithmConfig
+from .base import BaseSimilarityAlgorithm
+
+
+class JaccardSimilarity(BaseSimilarityAlgorithm):
+    """Jaccard similarity coefficient algorithm with enhanced tokenization."""
+
+    def __init__(self, config: SimilarityAlgorithmConfig | None = None):
+        if config is None:
+            config = SimilarityAlgorithmConfig(name="jaccard", weight=0.3)
+        super().__init__(config)
+
+    def calculate(self, text1: str, text2: str) -> float:
+        """Calculate similarity using Jaccard coefficient."""
+        if not text1 and not text2:
+            return 1.0
+        if not text1 or not text2:
+            return 0.0
+
+        tokens1 = set(self._tokenize(text1))
+        tokens2 = set(self._tokenize(text2))
+
+        if not tokens1 and not tokens2:
+            return 1.0
+        if not tokens1 or not tokens2:
+            return 0.0
+
+        intersection = len(tokens1.intersection(tokens2))
+        union = len(tokens1.union(tokens2))
+
+        return intersection / union if union > 0 else 0.0
+
+    def _tokenize(self, text: str) -> list[str]:
+        """Enhanced tokenization with semantic grouping for better duplicate detection."""
+        import re
+
+        # Python keywords and built-ins that should be preserved exactly
+        keywords = {
+            'def', 'class', 'if', 'else', 'elif', 'for', 'while', 'try', 'except',
+            'finally', 'with', 'as', 'import', 'from', 'return', 'yield', 'pass',
+            'break', 'continue', 'and', 'or', 'not', 'in', 'is', 'lambda', 'None',
+            'True', 'False', 'self', 'cls', 'len', 'range', 'str', 'int', 'float',
+            'list', 'dict', 'tuple', 'set', 'bool', 'append', 'extend', 'remove'
+        }
+        
+        # Semantic variable name patterns (group similar names)
+        semantic_patterns = [
+            (r'\b(data|item|element|val|value|obj|object|thing)\w*\b', 'DATA_VAR'),
+            (r'\b(result|output|ret|return|res|response)\w*\b', 'RESULT_VAR'), 
+            (r'\b(index|idx|i|j|k|counter|count|num|number)\w*\b', 'INDEX_VAR'),
+            (r'\b(name|id|key|identifier|label)\w*\b', 'ID_VAR'),
+            (r'\b(config|settings|options|params?|args?|kwargs?)\w*\b', 'CONFIG_VAR'),
+            (r'\b(path|file|dir|directory|filename)\w*\b', 'PATH_VAR'),
+            (r'\b(error|err|exception|ex)\w*\b', 'ERROR_VAR'),
+            (r'\b(temp|tmp|buffer|buf|cache)\w*\b', 'TEMP_VAR'),
+            (r'\b(min|max|avg|sum|total|count)\w*\b', 'CALC_VAR'),
+            (r'\b(user|person|client|customer)\w*\b', 'USER_VAR'),
+            (r'\b(width|height|size|length|dimension)\w*\b', 'SIZE_VAR'),
+        ]
+        
+        # First pass: extract all tokens
+        tokens = re.findall(r"\b\w+\b", text.lower())
+        
+        # Second pass: apply semantic grouping and filtering
+        processed_tokens = []
+        for token in tokens:
+            if len(token) <= 1:
+                continue
+                
+            # Keep keywords and built-ins as-is
+            if token in keywords:
+                processed_tokens.append(token)
+                continue
+            
+            # Apply semantic patterns to group similar variable names
+            matched = False
+            for pattern, replacement in semantic_patterns:
+                if re.match(pattern, token):
+                    processed_tokens.append(replacement)
+                    matched = True
+                    break
+            
+            if not matched:
+                # Generic variable abstraction for remaining identifiers
+                if re.match(r'^[a-zA-Z_]\w*$', token):
+                    processed_tokens.append('VAR')
+                else:
+                    processed_tokens.append(token)
+                    
+        return processed_tokens
+
+
+class CosineSimilarity(BaseSimilarityAlgorithm):
+    """Cosine similarity algorithm using TF-IDF vectors."""
+
+    def __init__(self, config: SimilarityAlgorithmConfig | None = None):
+        if config is None:
+            config = SimilarityAlgorithmConfig(name="cosine", weight=0.3)
+        super().__init__(config)
+
+    def calculate(self, text1: str, text2: str) -> float:
+        """Calculate similarity using cosine similarity."""
+        if not text1 and not text2:
+            return 1.0
+        if not text1 or not text2:
+            return 0.0
+
+        tokens1 = self._tokenize(text1)
+        tokens2 = self._tokenize(text2)
+
+        if not tokens1 and not tokens2:
+            return 1.0
+        if not tokens1 or not tokens2:
+            return 0.0
+
+        # Create term frequency vectors
+        tf1 = Counter(tokens1)
+        tf2 = Counter(tokens2)
+
+        # Get all unique terms
+        all_terms = set(tf1.keys()) | set(tf2.keys())
+
+        # Calculate cosine similarity
+        dot_product = sum(tf1[term] * tf2[term] for term in all_terms)
+        magnitude1 = math.sqrt(sum(tf1[term] ** 2 for term in all_terms))
+        magnitude2 = math.sqrt(sum(tf2[term] ** 2 for term in all_terms))
+
+        if magnitude1 == 0 or magnitude2 == 0:
+            return 0.0
+
+        return dot_product / (magnitude1 * magnitude2)
+
+    def _tokenize(self, text: str) -> list[str]:
+        """Tokenize text into words/identifiers."""
+        import re
+
+        # Split on whitespace and common delimiters
+        tokens = re.findall(r"\b\w+\b", text.lower())
+        return [token for token in tokens if len(token) > 1]
+
+
+class TFIDFSimilarity(BaseSimilarityAlgorithm):
+    """TF-IDF based cosine similarity algorithm."""
+
+    def __init__(self, config: SimilarityAlgorithmConfig | None = None):
+        if config is None:
+            config = SimilarityAlgorithmConfig(name="tfidf", weight=0.25)
+        super().__init__(config)
+
+    def calculate(self, text1: str, text2: str) -> float:
+        """Calculate similarity using TF-IDF weighted cosine similarity."""
+        if not text1 and not text2:
+            return 1.0
+        if not text1 or not text2:
+            return 0.0
+
+        tokens1 = self._tokenize(text1)
+        tokens2 = self._tokenize(text2)
+
+        if not tokens1 and not tokens2:
+            return 1.0
+        if not tokens1 or not tokens2:
+            return 0.0
+
+        # Calculate TF for both texts
+        tf1 = self._calculate_tf(tokens1)
+        tf2 = self._calculate_tf(tokens2)
+
+        # Calculate IDF for all terms
+        all_terms = set(tf1.keys()) | set(tf2.keys())
+        idf = self._calculate_idf(all_terms, [tokens1, tokens2])
+
+        # Calculate TF-IDF vectors
+        tfidf1 = {term: tf1.get(term, 0) * idf[term] for term in all_terms}
+        tfidf2 = {term: tf2.get(term, 0) * idf[term] for term in all_terms}
+
+        # Calculate cosine similarity
+        dot_product = sum(tfidf1[term] * tfidf2[term] for term in all_terms)
+        magnitude1 = math.sqrt(sum(tfidf1[term] ** 2 for term in all_terms))
+        magnitude2 = math.sqrt(sum(tfidf2[term] ** 2 for term in all_terms))
+
+        if magnitude1 == 0 or magnitude2 == 0:
+            return 0.0
+
+        return dot_product / (magnitude1 * magnitude2)
+
+    def _tokenize(self, text: str) -> list[str]:
+        """Tokenize text into words/identifiers."""
+        import re
+
+        # Split on whitespace and common delimiters
+        tokens = re.findall(r"\b\w+\b", text.lower())
+        return [token for token in tokens if len(token) > 1]
+
+    def _calculate_tf(self, tokens: list[str]) -> dict[str, float]:
+        """Calculate term frequency."""
+        tf = Counter(tokens)
+        total_terms = len(tokens)
+        return {term: count / total_terms for term, count in tf.items()}
+
+    def _calculate_idf(
+        self, terms: set[str], documents: list[list[str]]
+    ) -> dict[str, float]:
+        """Calculate inverse document frequency."""
+        idf = {}
+        total_docs = len(documents)
+
+        for term in terms:
+            docs_containing_term = sum(1 for doc in documents if term in doc)
+            idf[term] = math.log(
+                total_docs / (docs_containing_term + 1)
+            )  # +1 for smoothing
+
+        return idf
+
+
+class ShingleSimilarity(BaseSimilarityAlgorithm):
+    """Shingle-based similarity algorithm."""
+
+    def __init__(self, config: SimilarityAlgorithmConfig | None = None):
+        if config is None:
+            config = SimilarityAlgorithmConfig(
+                name="shingle", weight=0.2, parameters={"k": 4}
+            )
+        super().__init__(config)
+        k_param = self.config.parameters.get("k", 4)
+        self.k: int = int(k_param) if isinstance(k_param, (int, float, str)) else 4
+
+    def calculate(self, text1: str, text2: str) -> float:
+        """Calculate similarity using k-shingles."""
+        if not text1 and not text2:
+            return 1.0
+        if not text1 or not text2:
+            return 0.0
+
+        shingles1 = set(self._get_shingles(text1))
+        shingles2 = set(self._get_shingles(text2))
+
+        if not shingles1 and not shingles2:
+            return 1.0
+        if not shingles1 or not shingles2:
+            return 0.0
+
+        intersection = len(shingles1.intersection(shingles2))
+        union = len(shingles1.union(shingles2))
+
+        return intersection / union if union > 0 else 0.0
+
+    def _get_shingles(self, text: str) -> list[str]:
+        """Generate k-shingles from text."""
+        tokens = self._tokenize(text)
+
+        if len(tokens) < self.k:
+            return [" ".join(tokens)]
+
+        return [
+            " ".join(tokens[i : i + self.k]) for i in range(len(tokens) - self.k + 1)
+        ]
+
+    def _tokenize(self, text: str) -> list[str]:
+        """Tokenize text into words/identifiers."""
+        import re
+
+        # Split on whitespace and common delimiters
+        tokens = re.findall(r"\b\w+\b", text.lower())
+        return [token for token in tokens if len(token) > 1]
--- a/src/quality/utils/init.py
+++ b/src/quality/utils/init.py
@@ -0,0 +1 @@
+"""Utility modules for the quality analysis package."""
--- a/src/quality/utils/file_finder.py
+++ b/src/quality/utils/file_finder.py
@@ -0,0 +1,222 @@
+"""File discovery utilities for quality analysis."""
+
+import fnmatch
+from pathlib import Path
+from typing import Any
+
+from ..config.schemas import LanguageConfig, PathConfig
+
+
+class FileFinder:
+    """Finds relevant source files for analysis."""
+
+    def __init__(self, path_config: PathConfig, language_config: LanguageConfig):
+        self.path_config = path_config
+        self.language_config = language_config
+
+    def find_files(self, root_path: Path) -> list[Path]:
+        """Find all relevant source files in the given path."""
+        if not root_path.exists():
+            return []
+
+        if root_path.is_file():
+            return [root_path] if self._should_include_file(root_path) else []
+
+        found_files = []
+        files_processed = 0
+
+        # Get all supported extensions
+        extensions = set()
+        for lang in self.language_config.languages:
+            if lang in self.language_config.file_extensions:
+                extensions.update(self.language_config.file_extensions[lang])
+
+        # Walk through directory
+        for file_path in root_path.rglob("*"):
+            if not file_path.is_file():
+                continue
+
+            # Check max files limit
+            if (
+                self.path_config.max_files is not None
+                and files_processed >= self.path_config.max_files
+            ):
+                break
+
+            # Check if file should be included
+            if self._should_include_file(file_path):
+                found_files.append(file_path)
+                files_processed += 1
+
+        return found_files
+
+    def find_python_files(self, root_path: Path) -> list[Path]:
+        """Find only Python files."""
+        if not root_path.exists():
+            return []
+
+        if root_path.is_file():
+            return [root_path] if self._is_python_file(root_path) else []
+
+        found_files = []
+        for file_path in root_path.rglob("*.py"):
+            if self._should_include_file(file_path) and self._is_python_file(file_path):
+                found_files.append(file_path)
+
+        return found_files
+
+    def _should_include_file(self, file_path: Path) -> bool:
+        """Check if a file should be included in analysis."""
+        path_str = str(file_path)
+
+        # Check exclude patterns first
+        for pattern in self.path_config.exclude_patterns:
+            if fnmatch.fnmatch(path_str, pattern) or fnmatch.fnmatch(
+                file_path.name, pattern
+            ):
+                return False
+
+        # Check include patterns
+        for pattern in self.path_config.include_patterns:
+            if fnmatch.fnmatch(path_str, pattern) or fnmatch.fnmatch(
+                file_path.name, pattern
+            ):
+                # Check if it's a supported file type
+                return self._has_supported_extension(file_path)
+
+        return False
+
+    def _has_supported_extension(self, file_path: Path) -> bool:
+        """Check if file has a supported extension."""
+        suffix = file_path.suffix.lower()
+
+        for lang in self.language_config.languages:
+            if (
+                lang in self.language_config.file_extensions
+                and suffix in self.language_config.file_extensions[lang]
+            ):
+                return True
+
+        return False
+
+    def _is_python_file(self, file_path: Path) -> bool:
+        """Check if file is a Python file."""
+        return file_path.suffix.lower() in [".py", ".pyx", ".pyi"]
+
+    def get_file_language(self, file_path: Path) -> str | None:
+        """Determine the programming language of a file."""
+        suffix = file_path.suffix.lower()
+
+        for lang, extensions in self.language_config.file_extensions.items():
+            if suffix in extensions:
+                return lang
+
+        return None
+
+    def get_project_stats(self, root_path: Path) -> dict[str, Any]:
+        """Get statistics about files in the project."""
+        stats = {
+            "total_files": 0,
+            "supported_files": 0,
+            "excluded_files": 0,
+            "by_language": {},
+        }
+
+        if not root_path.exists():
+            return stats
+
+        # Initialize language counters
+        for lang in self.language_config.languages:
+            stats["by_language"][lang] = 0
+
+        # Walk through all files
+        for file_path in root_path.rglob("*"):
+            if not file_path.is_file():
+                continue
+
+            stats["total_files"] += 1
+
+            if self._should_include_file(file_path):
+                stats["supported_files"] += 1
+                lang = self.get_file_language(file_path)
+                if lang and lang in stats["by_language"]:
+                    stats["by_language"][lang] += 1
+            else:
+                stats["excluded_files"] += 1
+
+        return stats
+
+    def filter_files_by_patterns(
+        self,
+        files: list[Path],
+        include_patterns: list[str] | None = None,
+        exclude_patterns: list[str] | None = None,
+    ) -> list[Path]:
+        """Filter files by additional patterns."""
+        filtered = []
+
+        for file_path in files:
+            path_str = str(file_path)
+            include = True
+
+            # Apply exclude patterns
+            if exclude_patterns:
+                for pattern in exclude_patterns:
+                    if fnmatch.fnmatch(path_str, pattern) or fnmatch.fnmatch(
+                        file_path.name, pattern
+                    ):
+                        include = False
+                        break
+
+            # Apply include patterns
+            if include and include_patterns:
+                include = False
+                for pattern in include_patterns:
+                    if fnmatch.fnmatch(path_str, pattern) or fnmatch.fnmatch(
+                        file_path.name, pattern
+                    ):
+                        include = True
+                        break
+
+            if include:
+                filtered.append(file_path)
+
+        return filtered
+
+    def get_file_size_stats(self, files: list[Path]) -> dict[str, int]:
+        """Get file size statistics."""
+        sizes = []
+        total_size = 0
+        total_lines = 0
+
+        for file_path in files:
+            try:
+                size = file_path.stat().st_size
+                sizes.append(size)
+                total_size += size
+
+                # Count lines
+                with open(file_path, encoding="utf-8") as f:
+                    lines = sum(1 for _ in f)
+                    total_lines += lines
+            except (OSError, UnicodeDecodeError):
+                continue
+
+        if not sizes:
+            return {
+                "total_files": 0,
+                "total_size_bytes": 0,
+                "total_lines": 0,
+                "average_size_bytes": 0,
+                "average_lines_per_file": 0,
+            }
+
+        return {
+            "total_files": len(sizes),
+            "total_size_bytes": total_size,
+            "total_lines": total_lines,
+            "average_size_bytes": total_size // len(sizes),
+            "average_lines_per_file": total_lines // len(sizes),
+            "largest_file_bytes": max(sizes),
+            "smallest_file_bytes": min(sizes),
+        }
				`@@ -0,0 +1 @@`
				`"""Code analyzers for various quality checks."""`
				`@@ -0,0 +1 @@`
				`"""CLI interface for the quality analysis package."""`
				`@@ -0,0 +1 @@`
				`"""Utility modules for the quality analysis package."""`