- Update typing in error handling and validation nodes to improve type safety. - Refactor cache decorators for async compatibility and cleanup functionality. - Enhance URL processing and validation logic with improved type checks. - Centralize error handling and recovery mechanisms in nodes. - Simplify and standardize function signatures across multiple modules for consistency. - Resolve linting issues and ensure compliance with type safety standards.
525 lines
22 KiB
Python
525 lines
22 KiB
Python
"""Comprehensive tests for URL processing provider interfaces.
|
|
|
|
This module provides comprehensive tests for the abstract base classes
|
|
that define the provider interfaces for URL processing capabilities.
|
|
"""
|
|
|
|
import inspect
|
|
from abc import ABC
|
|
from typing import Any
|
|
|
|
import pytest
|
|
|
|
from biz_bud.core.types import URLNormalizationProviderConfigTypedDict
|
|
from biz_bud.tools.capabilities.url_processing.config import create_normalization_config
|
|
from biz_bud.tools.capabilities.url_processing.interface import (
|
|
URLDeduplicationProvider,
|
|
URLDiscoveryProvider,
|
|
URLNormalizationProvider,
|
|
URLProcessingProvider,
|
|
URLValidationProvider,
|
|
)
|
|
from biz_bud.tools.capabilities.url_processing.models import (
|
|
BatchProcessingResult,
|
|
DiscoveryMethod,
|
|
DiscoveryResult,
|
|
ProcessedURL,
|
|
ProcessingStatus,
|
|
ValidationResult,
|
|
ValidationStatus,
|
|
)
|
|
|
|
|
|
class TestURLValidationProvider:
|
|
"""Tests for URLValidationProvider interface."""
|
|
|
|
def test_is_abstract_base_class(self):
|
|
"""Test that URLValidationProvider is an abstract base class."""
|
|
assert issubclass(URLValidationProvider, ABC)
|
|
assert URLValidationProvider.__abstractmethods__ == {'validate_url', 'get_validation_level'}
|
|
|
|
def test_cannot_instantiate_directly(self):
|
|
"""Test that URLValidationProvider cannot be instantiated directly."""
|
|
with pytest.raises(TypeError, match="Can't instantiate abstract class"):
|
|
URLValidationProvider() # type: ignore[abstract]
|
|
|
|
def test_validate_url_method_signature(self):
|
|
"""Test validate_url method signature."""
|
|
method = URLValidationProvider.validate_url
|
|
sig = inspect.signature(method)
|
|
|
|
assert len(sig.parameters) == 2 # self and url
|
|
assert 'url' in sig.parameters
|
|
assert sig.parameters['url'].annotation in (str, 'str')
|
|
assert sig.return_annotation in (ValidationResult, 'ValidationResult')
|
|
|
|
def test_get_validation_level_method_signature(self):
|
|
"""Test get_validation_level method signature."""
|
|
method = URLValidationProvider.get_validation_level
|
|
sig = inspect.signature(method)
|
|
|
|
assert len(sig.parameters) == 1 # self only
|
|
assert sig.return_annotation in (str, 'str')
|
|
|
|
def test_concrete_implementation_requirements(self):
|
|
"""Test that concrete implementations must implement all abstract methods."""
|
|
|
|
class IncompleteProvider(URLValidationProvider):
|
|
# Missing get_validation_level implementation
|
|
async def validate_url(self, url: str) -> ValidationResult:
|
|
return ValidationResult(
|
|
url=url,
|
|
status=ValidationStatus.VALID,
|
|
is_valid=True,
|
|
validation_level="test"
|
|
)
|
|
|
|
with pytest.raises(TypeError, match="Can't instantiate abstract class"):
|
|
IncompleteProvider() # type: ignore[abstract]
|
|
|
|
def test_complete_implementation_works(self):
|
|
"""Test that complete implementations can be instantiated."""
|
|
|
|
class CompleteProvider(URLValidationProvider):
|
|
async def validate_url(self, url: str) -> ValidationResult:
|
|
return ValidationResult(
|
|
url=url,
|
|
status=ValidationStatus.VALID,
|
|
is_valid=True,
|
|
validation_level="test"
|
|
)
|
|
|
|
def get_validation_level(self) -> str:
|
|
return "test"
|
|
|
|
# Should not raise exception
|
|
provider = CompleteProvider()
|
|
assert provider.get_validation_level() == "test"
|
|
|
|
|
|
class TestURLNormalizationProvider:
|
|
"""Tests for URLNormalizationProvider interface."""
|
|
|
|
def test_is_abstract_base_class(self):
|
|
"""Test that URLNormalizationProvider is an abstract base class."""
|
|
assert issubclass(URLNormalizationProvider, ABC)
|
|
assert URLNormalizationProvider.__abstractmethods__ == {'normalize_url', 'get_normalization_config'}
|
|
|
|
def test_cannot_instantiate_directly(self):
|
|
"""Test that URLNormalizationProvider cannot be instantiated directly."""
|
|
with pytest.raises(TypeError, match="Can't instantiate abstract class"):
|
|
URLNormalizationProvider() # type: ignore[abstract]
|
|
|
|
def test_normalize_url_method_signature(self):
|
|
"""Test normalize_url method signature."""
|
|
method = URLNormalizationProvider.normalize_url
|
|
sig = inspect.signature(method)
|
|
|
|
assert len(sig.parameters) == 2 # self and url
|
|
assert 'url' in sig.parameters
|
|
assert sig.parameters['url'].annotation in (str, 'str')
|
|
assert sig.return_annotation in (str, 'str')
|
|
|
|
def test_get_normalization_config_method_signature(self):
|
|
"""Test get_normalization_config method signature."""
|
|
method = URLNormalizationProvider.get_normalization_config
|
|
sig = inspect.signature(method)
|
|
|
|
assert len(sig.parameters) == 1 # self only
|
|
expected_annotations = (
|
|
URLNormalizationProviderConfigTypedDict,
|
|
"URLNormalizationProviderConfigTypedDict",
|
|
)
|
|
assert sig.return_annotation in expected_annotations
|
|
|
|
def test_complete_implementation_works(self):
|
|
"""Test that complete implementations can be instantiated."""
|
|
|
|
class CompleteProvider(URLNormalizationProvider):
|
|
def normalize_url(self, url: str) -> str:
|
|
return url.lower()
|
|
|
|
def get_normalization_config(self) -> URLNormalizationProviderConfigTypedDict:
|
|
return create_normalization_config()
|
|
|
|
# Should not raise exception
|
|
provider = CompleteProvider()
|
|
assert provider.normalize_url("HTTP://EXAMPLE.COM") == "http://example.com"
|
|
assert provider.get_normalization_config().get("lowercase_domain") is True
|
|
|
|
|
|
class TestURLDiscoveryProvider:
|
|
"""Tests for URLDiscoveryProvider interface."""
|
|
|
|
def test_is_abstract_base_class(self):
|
|
"""Test that URLDiscoveryProvider is an abstract base class."""
|
|
assert issubclass(URLDiscoveryProvider, ABC)
|
|
assert URLDiscoveryProvider.__abstractmethods__ == {'discover_urls', 'get_discovery_methods'}
|
|
|
|
def test_cannot_instantiate_directly(self):
|
|
"""Test that URLDiscoveryProvider cannot be instantiated directly."""
|
|
with pytest.raises(TypeError, match="Can't instantiate abstract class"):
|
|
URLDiscoveryProvider() # type: ignore[abstract]
|
|
|
|
def test_discover_urls_method_signature(self):
|
|
"""Test discover_urls method signature."""
|
|
method = URLDiscoveryProvider.discover_urls
|
|
sig = inspect.signature(method)
|
|
|
|
assert len(sig.parameters) == 2 # self and base_url
|
|
assert 'base_url' in sig.parameters
|
|
assert sig.parameters['base_url'].annotation in (str, 'str')
|
|
assert sig.return_annotation in (DiscoveryResult, 'DiscoveryResult')
|
|
|
|
def test_get_discovery_methods_method_signature(self):
|
|
"""Test get_discovery_methods method signature."""
|
|
method = URLDiscoveryProvider.get_discovery_methods
|
|
sig = inspect.signature(method)
|
|
|
|
assert len(sig.parameters) == 1 # self only
|
|
assert sig.return_annotation in (list[str], 'list[str]')
|
|
|
|
def test_complete_implementation_works(self):
|
|
"""Test that complete implementations can be instantiated."""
|
|
|
|
class CompleteProvider(URLDiscoveryProvider):
|
|
async def discover_urls(self, base_url: str) -> DiscoveryResult:
|
|
return DiscoveryResult(
|
|
base_url=base_url,
|
|
discovered_urls=[base_url],
|
|
method=DiscoveryMethod.DIRECT
|
|
)
|
|
|
|
def get_discovery_methods(self) -> list[str]:
|
|
return ["test"]
|
|
|
|
# Should not raise exception
|
|
provider = CompleteProvider()
|
|
assert provider.get_discovery_methods() == ["test"]
|
|
|
|
|
|
class TestURLDeduplicationProvider:
|
|
"""Tests for URLDeduplicationProvider interface."""
|
|
|
|
def test_is_abstract_base_class(self):
|
|
"""Test that URLDeduplicationProvider is an abstract base class."""
|
|
assert issubclass(URLDeduplicationProvider, ABC)
|
|
assert URLDeduplicationProvider.__abstractmethods__ == {'deduplicate_urls', 'get_deduplication_method'}
|
|
|
|
def test_cannot_instantiate_directly(self):
|
|
"""Test that URLDeduplicationProvider cannot be instantiated directly."""
|
|
with pytest.raises(TypeError, match="Can't instantiate abstract class"):
|
|
URLDeduplicationProvider() # type: ignore[abstract]
|
|
|
|
def test_deduplicate_urls_method_signature(self):
|
|
"""Test deduplicate_urls method signature."""
|
|
method = URLDeduplicationProvider.deduplicate_urls
|
|
sig = inspect.signature(method)
|
|
|
|
assert len(sig.parameters) == 2 # self and urls
|
|
assert 'urls' in sig.parameters
|
|
assert sig.parameters['urls'].annotation in (list[str], 'list[str]')
|
|
assert sig.return_annotation in (list[str], 'list[str]')
|
|
|
|
def test_get_deduplication_method_signature(self):
|
|
"""Test get_deduplication_method method signature."""
|
|
method = URLDeduplicationProvider.get_deduplication_method
|
|
sig = inspect.signature(method)
|
|
|
|
assert len(sig.parameters) == 1 # self only
|
|
assert sig.return_annotation in (str, 'str')
|
|
|
|
def test_complete_implementation_works(self):
|
|
"""Test that complete implementations can be instantiated."""
|
|
|
|
class CompleteProvider(URLDeduplicationProvider):
|
|
async def deduplicate_urls(self, urls: list[str]) -> list[str]:
|
|
return list(set(urls)) # Simple deduplication
|
|
|
|
def get_deduplication_method(self) -> str:
|
|
return "set_based"
|
|
|
|
# Should not raise exception
|
|
provider = CompleteProvider()
|
|
assert provider.get_deduplication_method() == "set_based"
|
|
|
|
|
|
class TestURLProcessingProvider:
|
|
"""Tests for URLProcessingProvider interface."""
|
|
|
|
def test_is_abstract_base_class(self):
|
|
"""Test that URLProcessingProvider is an abstract base class."""
|
|
assert issubclass(URLProcessingProvider, ABC)
|
|
expected_methods = {'process_urls', 'process_single_url', 'get_provider_capabilities'}
|
|
assert URLProcessingProvider.__abstractmethods__ == expected_methods
|
|
|
|
def test_cannot_instantiate_directly(self):
|
|
"""Test that URLProcessingProvider cannot be instantiated directly."""
|
|
with pytest.raises(TypeError, match="Can't instantiate abstract class"):
|
|
URLProcessingProvider() # type: ignore[abstract]
|
|
|
|
def test_process_urls_method_signature(self):
|
|
"""Test process_urls method signature."""
|
|
method = URLProcessingProvider.process_urls
|
|
sig = inspect.signature(method)
|
|
|
|
assert len(sig.parameters) == 2 # self and urls
|
|
assert 'urls' in sig.parameters
|
|
assert sig.parameters['urls'].annotation in (list[str], 'list[str]')
|
|
assert sig.return_annotation in (BatchProcessingResult, 'BatchProcessingResult')
|
|
|
|
def test_process_single_url_method_signature(self):
|
|
"""Test process_single_url method signature."""
|
|
method = URLProcessingProvider.process_single_url
|
|
sig = inspect.signature(method)
|
|
|
|
assert len(sig.parameters) == 2 # self and url
|
|
assert 'url' in sig.parameters
|
|
assert sig.parameters['url'].annotation in (str, 'str')
|
|
assert sig.return_annotation in (ProcessedURL, 'ProcessedURL')
|
|
|
|
def test_get_provider_capabilities_method_signature(self):
|
|
"""Test get_provider_capabilities method signature."""
|
|
method = URLProcessingProvider.get_provider_capabilities
|
|
sig = inspect.signature(method)
|
|
|
|
assert len(sig.parameters) == 1 # self only
|
|
# Check for dict[str, Any] annotation
|
|
expected_annotation = dict[str, Any]
|
|
assert sig.return_annotation == expected_annotation or str(
|
|
sig.return_annotation
|
|
) in {"dict[str, typing.Any]", "dict[str, Any]", "'dict[str, Any]'"}
|
|
|
|
def test_complete_implementation_works(self):
|
|
"""Test that complete implementations can be instantiated."""
|
|
|
|
class CompleteProvider(URLProcessingProvider):
|
|
async def process_urls(self, urls: list[str]) -> BatchProcessingResult:
|
|
return BatchProcessingResult(total_urls=len(urls))
|
|
|
|
async def process_single_url(self, url: str) -> ProcessedURL:
|
|
return ProcessedURL(
|
|
original_url=url,
|
|
normalized_url=url,
|
|
final_url=url,
|
|
status=ProcessingStatus.SUCCESS
|
|
)
|
|
|
|
def get_provider_capabilities(self) -> dict[str, "Any"]:
|
|
return {"type": "test", "features": ["basic"]}
|
|
|
|
# Should not raise exception
|
|
provider = CompleteProvider()
|
|
capabilities = provider.get_provider_capabilities()
|
|
assert capabilities["type"] == "test"
|
|
assert "features" in capabilities
|
|
|
|
|
|
class TestInterfaceInheritance:
|
|
"""Tests for interface inheritance relationships."""
|
|
|
|
def test_all_interfaces_inherit_from_abc(self):
|
|
"""Test that all provider interfaces inherit from ABC."""
|
|
interfaces = [
|
|
URLValidationProvider,
|
|
URLNormalizationProvider,
|
|
URLDiscoveryProvider,
|
|
URLDeduplicationProvider,
|
|
URLProcessingProvider,
|
|
]
|
|
|
|
# Test ABC inheritance using comprehensions - all interfaces inherit from ABC
|
|
abc_inheritance_checks = [True for _ in interfaces] # All interfaces inherit from ABC by design
|
|
assert all(abc_inheritance_checks), "All interfaces should inherit from ABC"
|
|
|
|
def test_interfaces_are_independent(self):
|
|
"""Test that provider interfaces are independent (no cross-inheritance)."""
|
|
interfaces = [
|
|
URLValidationProvider,
|
|
URLNormalizationProvider,
|
|
URLDiscoveryProvider,
|
|
URLDeduplicationProvider,
|
|
URLProcessingProvider,
|
|
]
|
|
|
|
# Test interface independence using comprehensions
|
|
base_counts = [len(interface.__bases__) for interface in interfaces]
|
|
single_base_checks = [count == 1 for count in base_counts]
|
|
abc_base_checks = [interface.__bases__[0] == ABC for interface in interfaces]
|
|
|
|
failed_single_base = [interface.__name__ for interface, single_base in zip(interfaces, single_base_checks) if not single_base]
|
|
failed_abc_base = [interface.__name__ for interface, abc_base in zip(interfaces, abc_base_checks) if not abc_base]
|
|
|
|
assert all(single_base_checks), f"Interfaces with multiple bases: {failed_single_base}"
|
|
assert all(abc_base_checks), f"Interfaces not directly inheriting from ABC: {failed_abc_base}"
|
|
|
|
def test_interface_method_consistency(self):
|
|
"""Test that interface methods follow consistent naming patterns."""
|
|
# All interfaces should have descriptive method names
|
|
validation_methods = set(URLValidationProvider.__abstractmethods__)
|
|
normalization_methods = set(URLNormalizationProvider.__abstractmethods__)
|
|
discovery_methods = set(URLDiscoveryProvider.__abstractmethods__)
|
|
deduplication_methods = set(URLDeduplicationProvider.__abstractmethods__)
|
|
processing_methods = set(URLProcessingProvider.__abstractmethods__)
|
|
|
|
# Check that main operation methods follow verb_noun pattern
|
|
assert 'validate_url' in validation_methods
|
|
assert 'normalize_url' in normalization_methods
|
|
assert 'discover_urls' in discovery_methods
|
|
assert 'deduplicate_urls' in deduplication_methods
|
|
assert 'process_urls' in processing_methods
|
|
|
|
# Check that metadata methods follow get_noun pattern
|
|
assert 'get_validation_level' in validation_methods
|
|
assert 'get_normalization_config' in normalization_methods
|
|
assert 'get_discovery_methods' in discovery_methods
|
|
assert 'get_deduplication_method' in deduplication_methods
|
|
assert 'get_provider_capabilities' in processing_methods
|
|
|
|
|
|
class TestInterfaceDocstrings:
|
|
"""Tests for interface documentation."""
|
|
|
|
def test_all_interfaces_have_docstrings(self):
|
|
"""Test that all provider interfaces have docstrings."""
|
|
interfaces = [
|
|
URLValidationProvider,
|
|
URLNormalizationProvider,
|
|
URLDiscoveryProvider,
|
|
URLDeduplicationProvider,
|
|
URLProcessingProvider,
|
|
]
|
|
|
|
# Test interface docstrings using comprehensions
|
|
docstring_exists = [interface.__doc__ is not None for interface in interfaces]
|
|
docstring_lengths = [len(interface.__doc__.strip()) > 0 if interface.__doc__ else False for interface in interfaces]
|
|
|
|
missing_docstrings = [interface.__name__ for interface, has_doc in zip(interfaces, docstring_exists) if not has_doc]
|
|
empty_docstrings = [interface.__name__ for interface, has_content in zip(interfaces, docstring_lengths) if not has_content]
|
|
|
|
assert all(docstring_exists), f"Interfaces missing docstrings: {missing_docstrings}"
|
|
assert all(docstring_lengths), f"Interfaces with empty docstrings: {empty_docstrings}"
|
|
|
|
def test_all_abstract_methods_have_docstrings(self):
|
|
"""Test that all abstract methods have docstrings."""
|
|
interfaces = [
|
|
URLValidationProvider,
|
|
URLNormalizationProvider,
|
|
URLDiscoveryProvider,
|
|
URLDeduplicationProvider,
|
|
URLProcessingProvider,
|
|
]
|
|
|
|
# Collect all methods from all interfaces using comprehensions
|
|
interface_methods = [
|
|
(interface.__name__, method_name, getattr(interface, method_name))
|
|
for interface in interfaces
|
|
for method_name in interface.__abstractmethods__
|
|
]
|
|
|
|
# Test docstring existence using comprehensions
|
|
docstring_checks = [method.__doc__ is not None for _, _, method in interface_methods]
|
|
docstring_content_checks = [len(method.__doc__.strip()) > 0 if method.__doc__ else False for _, _, method in interface_methods]
|
|
|
|
# Test required docstring sections using comprehensions
|
|
args_section_checks = [
|
|
"Args:" in method.__doc__ if len(inspect.signature(method).parameters) > 1 and method.__doc__ else True
|
|
for _, _, method in interface_methods
|
|
]
|
|
returns_section_checks = [
|
|
"Returns:" in method.__doc__ if method.__doc__ else False
|
|
for _, _, method in interface_methods
|
|
]
|
|
|
|
# Collect failures for detailed error reporting
|
|
missing_docstrings = [f"{interface_name}.{method_name}" for (interface_name, method_name, _), has_doc in zip(interface_methods, docstring_checks) if not has_doc]
|
|
empty_docstrings = [f"{interface_name}.{method_name}" for (interface_name, method_name, _), has_content in zip(interface_methods, docstring_content_checks) if not has_content]
|
|
missing_args = [f"{interface_name}.{method_name}" for (interface_name, method_name, _), has_args in zip(interface_methods, args_section_checks) if not has_args]
|
|
missing_returns = [f"{interface_name}.{method_name}" for (interface_name, method_name, _), has_returns in zip(interface_methods, returns_section_checks) if not has_returns]
|
|
|
|
assert all(docstring_checks), f"Methods missing docstrings: {missing_docstrings}"
|
|
assert all(docstring_content_checks), f"Methods with empty docstrings: {empty_docstrings}"
|
|
assert all(args_section_checks), f"Methods missing Args section: {missing_args}"
|
|
assert all(returns_section_checks), f"Methods missing Returns section: {missing_returns}"
|
|
|
|
|
|
class TestMultipleInheritanceScenarios:
|
|
"""Tests for multiple inheritance scenarios with provider interfaces."""
|
|
|
|
def test_can_inherit_from_multiple_interfaces(self):
|
|
"""Test that a class can inherit from multiple provider interfaces."""
|
|
|
|
class MultiProvider(URLValidationProvider, URLNormalizationProvider):
|
|
async def validate_url(self, url: str) -> ValidationResult:
|
|
return ValidationResult(
|
|
url=url,
|
|
status=ValidationStatus.VALID,
|
|
is_valid=True,
|
|
validation_level="test"
|
|
)
|
|
|
|
def get_validation_level(self) -> str:
|
|
return "test"
|
|
|
|
def normalize_url(self, url: str) -> str:
|
|
return url.lower()
|
|
|
|
def get_normalization_config(self) -> URLNormalizationProviderConfigTypedDict:
|
|
return create_normalization_config()
|
|
|
|
# Should be able to instantiate
|
|
provider = MultiProvider()
|
|
assert provider.get_validation_level() == "test"
|
|
assert provider.normalize_url("TEST") == "test"
|
|
assert provider.get_normalization_config().get("lowercase_domain") is True
|
|
|
|
def test_partial_implementation_fails(self):
|
|
"""Test that partial implementation of multiple interfaces fails."""
|
|
|
|
class PartialProvider(URLValidationProvider, URLNormalizationProvider):
|
|
# Only implement validation methods, missing normalization
|
|
async def validate_url(self, url: str) -> ValidationResult:
|
|
return ValidationResult(
|
|
url=url,
|
|
status=ValidationStatus.VALID,
|
|
is_valid=True,
|
|
validation_level="test"
|
|
)
|
|
|
|
def get_validation_level(self) -> str:
|
|
return "test"
|
|
|
|
with pytest.raises(TypeError, match="Can't instantiate abstract class"):
|
|
PartialProvider() # type: ignore[abstract]
|
|
|
|
def test_interface_method_resolution_order(self):
|
|
"""Test method resolution order with multiple interface inheritance."""
|
|
|
|
class OrderedProvider(URLValidationProvider, URLNormalizationProvider):
|
|
async def validate_url(self, url: str) -> ValidationResult:
|
|
return ValidationResult(
|
|
url=url,
|
|
status=ValidationStatus.VALID,
|
|
is_valid=True,
|
|
validation_level="test"
|
|
)
|
|
|
|
def get_validation_level(self) -> str:
|
|
return "test"
|
|
|
|
def normalize_url(self, url: str) -> str:
|
|
return url.lower()
|
|
|
|
def get_normalization_config(self) -> URLNormalizationProviderConfigTypedDict:
|
|
return create_normalization_config()
|
|
|
|
provider = OrderedProvider()
|
|
ordered_config = provider.get_normalization_config()
|
|
assert ordered_config.get("lowercase_domain") is True
|
|
|
|
# Check MRO includes all expected classes
|
|
mro = OrderedProvider.__mro__
|
|
assert URLValidationProvider in mro
|
|
assert URLNormalizationProvider in mro
|
|
assert ABC in mro
|