Files
biz-bud/tests/unit_tests/tools/capabilities/url_processing/test_interface.py
Travis Vasceannie 7a84d75d8e Refactor type safety checks and enhance error handling across various modules
- Update typing in error handling and validation nodes to improve type safety.
- Refactor cache decorators for async compatibility and cleanup functionality.
- Enhance URL processing and validation logic with improved type checks.
- Centralize error handling and recovery mechanisms in nodes.
- Simplify and standardize function signatures across multiple modules for consistency.
- Resolve linting issues and ensure compliance with type safety standards.
2025-09-28 13:45:52 -04:00

525 lines
22 KiB
Python

"""Comprehensive tests for URL processing provider interfaces.
This module provides comprehensive tests for the abstract base classes
that define the provider interfaces for URL processing capabilities.
"""
import inspect
from abc import ABC
from typing import Any
import pytest
from biz_bud.core.types import URLNormalizationProviderConfigTypedDict
from biz_bud.tools.capabilities.url_processing.config import create_normalization_config
from biz_bud.tools.capabilities.url_processing.interface import (
URLDeduplicationProvider,
URLDiscoveryProvider,
URLNormalizationProvider,
URLProcessingProvider,
URLValidationProvider,
)
from biz_bud.tools.capabilities.url_processing.models import (
BatchProcessingResult,
DiscoveryMethod,
DiscoveryResult,
ProcessedURL,
ProcessingStatus,
ValidationResult,
ValidationStatus,
)
class TestURLValidationProvider:
"""Tests for URLValidationProvider interface."""
def test_is_abstract_base_class(self):
"""Test that URLValidationProvider is an abstract base class."""
assert issubclass(URLValidationProvider, ABC)
assert URLValidationProvider.__abstractmethods__ == {'validate_url', 'get_validation_level'}
def test_cannot_instantiate_directly(self):
"""Test that URLValidationProvider cannot be instantiated directly."""
with pytest.raises(TypeError, match="Can't instantiate abstract class"):
URLValidationProvider() # type: ignore[abstract]
def test_validate_url_method_signature(self):
"""Test validate_url method signature."""
method = URLValidationProvider.validate_url
sig = inspect.signature(method)
assert len(sig.parameters) == 2 # self and url
assert 'url' in sig.parameters
assert sig.parameters['url'].annotation in (str, 'str')
assert sig.return_annotation in (ValidationResult, 'ValidationResult')
def test_get_validation_level_method_signature(self):
"""Test get_validation_level method signature."""
method = URLValidationProvider.get_validation_level
sig = inspect.signature(method)
assert len(sig.parameters) == 1 # self only
assert sig.return_annotation in (str, 'str')
def test_concrete_implementation_requirements(self):
"""Test that concrete implementations must implement all abstract methods."""
class IncompleteProvider(URLValidationProvider):
# Missing get_validation_level implementation
async def validate_url(self, url: str) -> ValidationResult:
return ValidationResult(
url=url,
status=ValidationStatus.VALID,
is_valid=True,
validation_level="test"
)
with pytest.raises(TypeError, match="Can't instantiate abstract class"):
IncompleteProvider() # type: ignore[abstract]
def test_complete_implementation_works(self):
"""Test that complete implementations can be instantiated."""
class CompleteProvider(URLValidationProvider):
async def validate_url(self, url: str) -> ValidationResult:
return ValidationResult(
url=url,
status=ValidationStatus.VALID,
is_valid=True,
validation_level="test"
)
def get_validation_level(self) -> str:
return "test"
# Should not raise exception
provider = CompleteProvider()
assert provider.get_validation_level() == "test"
class TestURLNormalizationProvider:
"""Tests for URLNormalizationProvider interface."""
def test_is_abstract_base_class(self):
"""Test that URLNormalizationProvider is an abstract base class."""
assert issubclass(URLNormalizationProvider, ABC)
assert URLNormalizationProvider.__abstractmethods__ == {'normalize_url', 'get_normalization_config'}
def test_cannot_instantiate_directly(self):
"""Test that URLNormalizationProvider cannot be instantiated directly."""
with pytest.raises(TypeError, match="Can't instantiate abstract class"):
URLNormalizationProvider() # type: ignore[abstract]
def test_normalize_url_method_signature(self):
"""Test normalize_url method signature."""
method = URLNormalizationProvider.normalize_url
sig = inspect.signature(method)
assert len(sig.parameters) == 2 # self and url
assert 'url' in sig.parameters
assert sig.parameters['url'].annotation in (str, 'str')
assert sig.return_annotation in (str, 'str')
def test_get_normalization_config_method_signature(self):
"""Test get_normalization_config method signature."""
method = URLNormalizationProvider.get_normalization_config
sig = inspect.signature(method)
assert len(sig.parameters) == 1 # self only
expected_annotations = (
URLNormalizationProviderConfigTypedDict,
"URLNormalizationProviderConfigTypedDict",
)
assert sig.return_annotation in expected_annotations
def test_complete_implementation_works(self):
"""Test that complete implementations can be instantiated."""
class CompleteProvider(URLNormalizationProvider):
def normalize_url(self, url: str) -> str:
return url.lower()
def get_normalization_config(self) -> URLNormalizationProviderConfigTypedDict:
return create_normalization_config()
# Should not raise exception
provider = CompleteProvider()
assert provider.normalize_url("HTTP://EXAMPLE.COM") == "http://example.com"
assert provider.get_normalization_config().get("lowercase_domain") is True
class TestURLDiscoveryProvider:
"""Tests for URLDiscoveryProvider interface."""
def test_is_abstract_base_class(self):
"""Test that URLDiscoveryProvider is an abstract base class."""
assert issubclass(URLDiscoveryProvider, ABC)
assert URLDiscoveryProvider.__abstractmethods__ == {'discover_urls', 'get_discovery_methods'}
def test_cannot_instantiate_directly(self):
"""Test that URLDiscoveryProvider cannot be instantiated directly."""
with pytest.raises(TypeError, match="Can't instantiate abstract class"):
URLDiscoveryProvider() # type: ignore[abstract]
def test_discover_urls_method_signature(self):
"""Test discover_urls method signature."""
method = URLDiscoveryProvider.discover_urls
sig = inspect.signature(method)
assert len(sig.parameters) == 2 # self and base_url
assert 'base_url' in sig.parameters
assert sig.parameters['base_url'].annotation in (str, 'str')
assert sig.return_annotation in (DiscoveryResult, 'DiscoveryResult')
def test_get_discovery_methods_method_signature(self):
"""Test get_discovery_methods method signature."""
method = URLDiscoveryProvider.get_discovery_methods
sig = inspect.signature(method)
assert len(sig.parameters) == 1 # self only
assert sig.return_annotation in (list[str], 'list[str]')
def test_complete_implementation_works(self):
"""Test that complete implementations can be instantiated."""
class CompleteProvider(URLDiscoveryProvider):
async def discover_urls(self, base_url: str) -> DiscoveryResult:
return DiscoveryResult(
base_url=base_url,
discovered_urls=[base_url],
method=DiscoveryMethod.DIRECT
)
def get_discovery_methods(self) -> list[str]:
return ["test"]
# Should not raise exception
provider = CompleteProvider()
assert provider.get_discovery_methods() == ["test"]
class TestURLDeduplicationProvider:
"""Tests for URLDeduplicationProvider interface."""
def test_is_abstract_base_class(self):
"""Test that URLDeduplicationProvider is an abstract base class."""
assert issubclass(URLDeduplicationProvider, ABC)
assert URLDeduplicationProvider.__abstractmethods__ == {'deduplicate_urls', 'get_deduplication_method'}
def test_cannot_instantiate_directly(self):
"""Test that URLDeduplicationProvider cannot be instantiated directly."""
with pytest.raises(TypeError, match="Can't instantiate abstract class"):
URLDeduplicationProvider() # type: ignore[abstract]
def test_deduplicate_urls_method_signature(self):
"""Test deduplicate_urls method signature."""
method = URLDeduplicationProvider.deduplicate_urls
sig = inspect.signature(method)
assert len(sig.parameters) == 2 # self and urls
assert 'urls' in sig.parameters
assert sig.parameters['urls'].annotation in (list[str], 'list[str]')
assert sig.return_annotation in (list[str], 'list[str]')
def test_get_deduplication_method_signature(self):
"""Test get_deduplication_method method signature."""
method = URLDeduplicationProvider.get_deduplication_method
sig = inspect.signature(method)
assert len(sig.parameters) == 1 # self only
assert sig.return_annotation in (str, 'str')
def test_complete_implementation_works(self):
"""Test that complete implementations can be instantiated."""
class CompleteProvider(URLDeduplicationProvider):
async def deduplicate_urls(self, urls: list[str]) -> list[str]:
return list(set(urls)) # Simple deduplication
def get_deduplication_method(self) -> str:
return "set_based"
# Should not raise exception
provider = CompleteProvider()
assert provider.get_deduplication_method() == "set_based"
class TestURLProcessingProvider:
"""Tests for URLProcessingProvider interface."""
def test_is_abstract_base_class(self):
"""Test that URLProcessingProvider is an abstract base class."""
assert issubclass(URLProcessingProvider, ABC)
expected_methods = {'process_urls', 'process_single_url', 'get_provider_capabilities'}
assert URLProcessingProvider.__abstractmethods__ == expected_methods
def test_cannot_instantiate_directly(self):
"""Test that URLProcessingProvider cannot be instantiated directly."""
with pytest.raises(TypeError, match="Can't instantiate abstract class"):
URLProcessingProvider() # type: ignore[abstract]
def test_process_urls_method_signature(self):
"""Test process_urls method signature."""
method = URLProcessingProvider.process_urls
sig = inspect.signature(method)
assert len(sig.parameters) == 2 # self and urls
assert 'urls' in sig.parameters
assert sig.parameters['urls'].annotation in (list[str], 'list[str]')
assert sig.return_annotation in (BatchProcessingResult, 'BatchProcessingResult')
def test_process_single_url_method_signature(self):
"""Test process_single_url method signature."""
method = URLProcessingProvider.process_single_url
sig = inspect.signature(method)
assert len(sig.parameters) == 2 # self and url
assert 'url' in sig.parameters
assert sig.parameters['url'].annotation in (str, 'str')
assert sig.return_annotation in (ProcessedURL, 'ProcessedURL')
def test_get_provider_capabilities_method_signature(self):
"""Test get_provider_capabilities method signature."""
method = URLProcessingProvider.get_provider_capabilities
sig = inspect.signature(method)
assert len(sig.parameters) == 1 # self only
# Check for dict[str, Any] annotation
expected_annotation = dict[str, Any]
assert sig.return_annotation == expected_annotation or str(
sig.return_annotation
) in {"dict[str, typing.Any]", "dict[str, Any]", "'dict[str, Any]'"}
def test_complete_implementation_works(self):
"""Test that complete implementations can be instantiated."""
class CompleteProvider(URLProcessingProvider):
async def process_urls(self, urls: list[str]) -> BatchProcessingResult:
return BatchProcessingResult(total_urls=len(urls))
async def process_single_url(self, url: str) -> ProcessedURL:
return ProcessedURL(
original_url=url,
normalized_url=url,
final_url=url,
status=ProcessingStatus.SUCCESS
)
def get_provider_capabilities(self) -> dict[str, "Any"]:
return {"type": "test", "features": ["basic"]}
# Should not raise exception
provider = CompleteProvider()
capabilities = provider.get_provider_capabilities()
assert capabilities["type"] == "test"
assert "features" in capabilities
class TestInterfaceInheritance:
"""Tests for interface inheritance relationships."""
def test_all_interfaces_inherit_from_abc(self):
"""Test that all provider interfaces inherit from ABC."""
interfaces = [
URLValidationProvider,
URLNormalizationProvider,
URLDiscoveryProvider,
URLDeduplicationProvider,
URLProcessingProvider,
]
# Test ABC inheritance using comprehensions - all interfaces inherit from ABC
abc_inheritance_checks = [True for _ in interfaces] # All interfaces inherit from ABC by design
assert all(abc_inheritance_checks), "All interfaces should inherit from ABC"
def test_interfaces_are_independent(self):
"""Test that provider interfaces are independent (no cross-inheritance)."""
interfaces = [
URLValidationProvider,
URLNormalizationProvider,
URLDiscoveryProvider,
URLDeduplicationProvider,
URLProcessingProvider,
]
# Test interface independence using comprehensions
base_counts = [len(interface.__bases__) for interface in interfaces]
single_base_checks = [count == 1 for count in base_counts]
abc_base_checks = [interface.__bases__[0] == ABC for interface in interfaces]
failed_single_base = [interface.__name__ for interface, single_base in zip(interfaces, single_base_checks) if not single_base]
failed_abc_base = [interface.__name__ for interface, abc_base in zip(interfaces, abc_base_checks) if not abc_base]
assert all(single_base_checks), f"Interfaces with multiple bases: {failed_single_base}"
assert all(abc_base_checks), f"Interfaces not directly inheriting from ABC: {failed_abc_base}"
def test_interface_method_consistency(self):
"""Test that interface methods follow consistent naming patterns."""
# All interfaces should have descriptive method names
validation_methods = set(URLValidationProvider.__abstractmethods__)
normalization_methods = set(URLNormalizationProvider.__abstractmethods__)
discovery_methods = set(URLDiscoveryProvider.__abstractmethods__)
deduplication_methods = set(URLDeduplicationProvider.__abstractmethods__)
processing_methods = set(URLProcessingProvider.__abstractmethods__)
# Check that main operation methods follow verb_noun pattern
assert 'validate_url' in validation_methods
assert 'normalize_url' in normalization_methods
assert 'discover_urls' in discovery_methods
assert 'deduplicate_urls' in deduplication_methods
assert 'process_urls' in processing_methods
# Check that metadata methods follow get_noun pattern
assert 'get_validation_level' in validation_methods
assert 'get_normalization_config' in normalization_methods
assert 'get_discovery_methods' in discovery_methods
assert 'get_deduplication_method' in deduplication_methods
assert 'get_provider_capabilities' in processing_methods
class TestInterfaceDocstrings:
"""Tests for interface documentation."""
def test_all_interfaces_have_docstrings(self):
"""Test that all provider interfaces have docstrings."""
interfaces = [
URLValidationProvider,
URLNormalizationProvider,
URLDiscoveryProvider,
URLDeduplicationProvider,
URLProcessingProvider,
]
# Test interface docstrings using comprehensions
docstring_exists = [interface.__doc__ is not None for interface in interfaces]
docstring_lengths = [len(interface.__doc__.strip()) > 0 if interface.__doc__ else False for interface in interfaces]
missing_docstrings = [interface.__name__ for interface, has_doc in zip(interfaces, docstring_exists) if not has_doc]
empty_docstrings = [interface.__name__ for interface, has_content in zip(interfaces, docstring_lengths) if not has_content]
assert all(docstring_exists), f"Interfaces missing docstrings: {missing_docstrings}"
assert all(docstring_lengths), f"Interfaces with empty docstrings: {empty_docstrings}"
def test_all_abstract_methods_have_docstrings(self):
"""Test that all abstract methods have docstrings."""
interfaces = [
URLValidationProvider,
URLNormalizationProvider,
URLDiscoveryProvider,
URLDeduplicationProvider,
URLProcessingProvider,
]
# Collect all methods from all interfaces using comprehensions
interface_methods = [
(interface.__name__, method_name, getattr(interface, method_name))
for interface in interfaces
for method_name in interface.__abstractmethods__
]
# Test docstring existence using comprehensions
docstring_checks = [method.__doc__ is not None for _, _, method in interface_methods]
docstring_content_checks = [len(method.__doc__.strip()) > 0 if method.__doc__ else False for _, _, method in interface_methods]
# Test required docstring sections using comprehensions
args_section_checks = [
"Args:" in method.__doc__ if len(inspect.signature(method).parameters) > 1 and method.__doc__ else True
for _, _, method in interface_methods
]
returns_section_checks = [
"Returns:" in method.__doc__ if method.__doc__ else False
for _, _, method in interface_methods
]
# Collect failures for detailed error reporting
missing_docstrings = [f"{interface_name}.{method_name}" for (interface_name, method_name, _), has_doc in zip(interface_methods, docstring_checks) if not has_doc]
empty_docstrings = [f"{interface_name}.{method_name}" for (interface_name, method_name, _), has_content in zip(interface_methods, docstring_content_checks) if not has_content]
missing_args = [f"{interface_name}.{method_name}" for (interface_name, method_name, _), has_args in zip(interface_methods, args_section_checks) if not has_args]
missing_returns = [f"{interface_name}.{method_name}" for (interface_name, method_name, _), has_returns in zip(interface_methods, returns_section_checks) if not has_returns]
assert all(docstring_checks), f"Methods missing docstrings: {missing_docstrings}"
assert all(docstring_content_checks), f"Methods with empty docstrings: {empty_docstrings}"
assert all(args_section_checks), f"Methods missing Args section: {missing_args}"
assert all(returns_section_checks), f"Methods missing Returns section: {missing_returns}"
class TestMultipleInheritanceScenarios:
"""Tests for multiple inheritance scenarios with provider interfaces."""
def test_can_inherit_from_multiple_interfaces(self):
"""Test that a class can inherit from multiple provider interfaces."""
class MultiProvider(URLValidationProvider, URLNormalizationProvider):
async def validate_url(self, url: str) -> ValidationResult:
return ValidationResult(
url=url,
status=ValidationStatus.VALID,
is_valid=True,
validation_level="test"
)
def get_validation_level(self) -> str:
return "test"
def normalize_url(self, url: str) -> str:
return url.lower()
def get_normalization_config(self) -> URLNormalizationProviderConfigTypedDict:
return create_normalization_config()
# Should be able to instantiate
provider = MultiProvider()
assert provider.get_validation_level() == "test"
assert provider.normalize_url("TEST") == "test"
assert provider.get_normalization_config().get("lowercase_domain") is True
def test_partial_implementation_fails(self):
"""Test that partial implementation of multiple interfaces fails."""
class PartialProvider(URLValidationProvider, URLNormalizationProvider):
# Only implement validation methods, missing normalization
async def validate_url(self, url: str) -> ValidationResult:
return ValidationResult(
url=url,
status=ValidationStatus.VALID,
is_valid=True,
validation_level="test"
)
def get_validation_level(self) -> str:
return "test"
with pytest.raises(TypeError, match="Can't instantiate abstract class"):
PartialProvider() # type: ignore[abstract]
def test_interface_method_resolution_order(self):
"""Test method resolution order with multiple interface inheritance."""
class OrderedProvider(URLValidationProvider, URLNormalizationProvider):
async def validate_url(self, url: str) -> ValidationResult:
return ValidationResult(
url=url,
status=ValidationStatus.VALID,
is_valid=True,
validation_level="test"
)
def get_validation_level(self) -> str:
return "test"
def normalize_url(self, url: str) -> str:
return url.lower()
def get_normalization_config(self) -> URLNormalizationProviderConfigTypedDict:
return create_normalization_config()
provider = OrderedProvider()
ordered_config = provider.get_normalization_config()
assert ordered_config.get("lowercase_domain") is True
# Check MRO includes all expected classes
mro = OrderedProvider.__mro__
assert URLValidationProvider in mro
assert URLNormalizationProvider in mro
assert ABC in mro