Add script and certificate for git.lab integration

- Introduce add-git-lab-cert.sh to automate the installation of the git.lab certificate into system stores for Ubuntu, CentOS, and Alpine Linux. - Add git.lab.crt certificate file for secure connections. - Enhance error handling in paperless agent to provide user-friendly fallback messages during LLM access issues. - Improve receipt.py to refine price pattern matching and avoid treating unit-annotated numbers as prices.
2025-09-28 20:14:12 -04:00
parent 911b0aed0e
commit 8d09d6efb2
7 changed files with 220 additions and 72 deletions
--- a/add-git-lab-cert.sh
+++ b/add-git-lab-cert.sh
@@ -0,0 +1,66 @@
+#!/bin/bash
+
+# Script to add git.lab certificate to system certificate store
+# This script needs to be run with sudo privileges
+
+set -e
+
+CERT_FILE="git.lab.crt"
+CERT_NAME="git.lab"
+
+echo "Adding git.lab certificate to system certificate store..."
+
+# Check if certificate file exists
+if [ ! -f "$CERT_FILE" ]; then
+    echo "Downloading certificate from git.lab..."
+    openssl s_client -connect git.lab:443 -servername git.lab < /dev/null 2>/dev/null | openssl x509 -outform PEM > "$CERT_FILE"
+    echo "Certificate saved to $CERT_FILE"
+fi
+
+# For Ubuntu/Debian systems
+if [ -d "/usr/local/share/ca-certificates" ]; then
+    echo "Installing certificate for Ubuntu/Debian..."
+    sudo cp "$CERT_FILE" "/usr/local/share/ca-certificates/${CERT_NAME}.crt"
+    sudo update-ca-certificates
+    echo "Certificate added to Ubuntu/Debian certificate store"
+fi
+
+# For CentOS/RHEL/Fedora systems
+if [ -d "/etc/pki/ca-trust/source/anchors" ]; then
+    echo "Installing certificate for CentOS/RHEL/Fedora..."
+    sudo cp "$CERT_FILE" "/etc/pki/ca-trust/source/anchors/${CERT_NAME}.crt"
+    sudo update-ca-trust
+    echo "Certificate added to CentOS/RHEL/Fedora certificate store"
+fi
+
+# For Alpine Linux
+if [ -d "/usr/local/share/ca-certificates" ] && [ -f "/etc/alpine-release" ]; then
+    echo "Installing certificate for Alpine Linux..."
+    sudo cp "$CERT_FILE" "/usr/local/share/ca-certificates/${CERT_NAME}.crt"
+    sudo update-ca-certificates
+    echo "Certificate added to Alpine certificate store"
+fi
+
+# Also add to Git's certificate bundle as backup
+echo "Creating Git-specific certificate bundle..."
+if [ -f "/etc/ssl/certs/ca-certificates.crt" ]; then
+    cat /etc/ssl/certs/ca-certificates.crt "$CERT_FILE" > ~/.gitcerts.pem
+    git config --global http.sslCAInfo ~/.gitcerts.pem
+    echo "Git certificate bundle created at ~/.gitcerts.pem"
+fi
+
+echo ""
+echo "Certificate installation complete!"
+echo ""
+echo "Testing Git connection..."
+if git ls-remote --heads http://git.lab/vasceannie/biz-bud.git >/dev/null 2>&1; then
+    echo "✅ SUCCESS: Git can now connect to git.lab"
+else
+    echo "❌ Git connection still failing. You may need to:"
+    echo "   1. Restart your terminal/shell"
+    echo "   2. Check if git.lab should be accessed via a different hostname"
+    echo "   3. Contact your GitLab administrator"
+fi
+
+echo ""
+echo "To clean up the certificate file, run: rm $CERT_FILE"
--- a/git.lab.crt
+++ b/git.lab.crt
@@ -0,0 +1,20 @@
+-----BEGIN CERTIFICATE-----
+MIIDWDCCAkCgAwIBAgIUOake/moDwMcL5KxFnUmmvXLhvSkwDQYJKoZIhvcNAQEL
+BQAwMDEOMAwGA1UEAwwFKi5sYWIxETAPBgNVBAoMCEludGVybmFsMQswCQYDVQQG
+EwJVUzAeFw0yNTA3MTMxODQzNDdaFw0zNTA3MTExODQzNDdaMDAxDjAMBgNVBAMM
+BSoubGFiMREwDwYDVQQKDAhJbnRlcm5hbDELMAkGA1UEBhMCVVMwggEiMA0GCSqG
+SIb3DQEBAQUAA4IBDwAwggEKAoIBAQCyPSwUX/gDVsNQkYXE1LPq9DjZQ8uh/5RB
+NP8jQgFJ6ztPwRBMer4U0rfd3PVMoF7ko1ZZdoo2BotUbCLHiEwgWWbOupiWq1F
+R11jpmRW2tQJHEEez6YGtqkZRs9YQN9p0jjAfizW+dwRkyn1n8BojXyVK1PUulQ+
+TcBxWl18zRNf+JVqY4lyyT3aHp8OCThqzY+Mnyb57mTIypmA0EsfbrK55UY4D1QT
+dcOSW3lAmNZfLEOrhl8rVP93/P9pDBISaol8YlhEsk/Q5PCsM/AX7k3pjI57C8iT
+SeryjYCi6tbEnHRFd40oWtu8n9sUQ9yggktsshRvno/LJLEMo5LvAgMBAAGjajBo
+MB0GA1UdDgQWBBTDiclF3GIA6B0Rgb3QOYSXWMDY/DAfBgNVHSMEGDAWgBTDiclF
+3GIA6B0Rgb3QOYSXWMDY/DAPBgNVHRMBAf8EBTADAQH/MBUGA1UdEQQOMAyCBSou
+bGFiggNsYWIwDQYJKoZIhvcNAQELBQADggEBABZiygqVi3lcUnlMcalfgb0acKLF
+YCThpOQN7Df/BQMNxBZ/t3K+rL1bU0ghpcZ38uCj/UdSlnvYa8cGFHjAS6bhbdpk
+ZMm0ZzJszQjdKBHtYnQk7sn5KZ/BOB6bPdXlxl556B2Ybc0wKH8f3xUeVgLheHMr
+Cxu24+FOMqZeBJQ87BsPY/Pmy8QBQDncaffnscYAS4Jq+hKVwxtomP9UT1wkQGd
+0wwYMIEw+6y3Ya9/dPAirFEWEGQizATJZRUnG9AEh1GJlIkkHm9ieQtuS+8G0vUw
+brw/fl/LcyK7S1U7bnz9CCGBeaa3kE45dc2MmiSI9TrMiReOItaO2zfbcyc=
+-----END CERTIFICATE-----
--- a/src/biz_bud/core/errors/formatter.py
+++ b/src/biz_bud/core/errors/formatter.py
@@ -508,8 +508,23 @@ def categorize_error(
            "broken pipe",
            "address already in use",
        ]
+        # Common Python runtime/type phrases that indicate logic issues rather than user validation mistakes
+        runtime_type_indicators = [
+            "not subscriptable",
+            "has no attribute",
+            "unsupported operand type",
+            "cannot concatenate",
+            "division by zero",
+            "object is not iterable",
+            "index out of range",
+            "list index out of range",
+            "key error",
+            "unhashable type",
+        ]
        if any(indicator in message for indicator in system_indicators):
            return ErrorCategory.UNKNOWN, None
+        if any(indicator in message for indicator in runtime_type_indicators):
+            return ErrorCategory.STATE, ErrorNamespace.STATE_INVALID_KEY
        if "missing" in message and any(
            term in message for term in ["field", "parameter", "param", "argument", "key"]
        ):
--- a/src/biz_bud/core/errors/specialized_exceptions.py
+++ b/src/biz_bud/core/errors/specialized_exceptions.py
@@ -174,10 +174,8 @@ class ServiceError(MetadataInjectMixin, BusinessBuddyError):
        context: ErrorContext | None = None,
        cause: Exception | None = None,
    ):
-        base_context = self._prepare_context(
-            context=context, service_name=service_name
-        )
-        # Shallow copy to avoid mutating shared context instances
+        base_context = context or ErrorContext()
+        # Create isolated copy first to avoid mutating shared context instances
        isolated_context = ErrorContext(
            node_name=base_context.node_name,
            tool_name=base_context.tool_name,
@@ -186,6 +184,10 @@ class ServiceError(MetadataInjectMixin, BusinessBuddyError):
            operation=base_context.operation,
            metadata=dict(base_context.metadata),
        )
+        # Inject metadata only on the isolated context
+        isolated_context = self._prepare_context(
+            context=isolated_context, service_name=service_name
+        )
        if operation:
            isolated_context.operation = operation
        super().__init__(
@@ -585,7 +587,9 @@ class ParameterValidationError(ValidationError):
        if parameter_name:
            self.context.metadata["parameter_name"] = parameter_name
        if parameter_value is not None:
-            self.context.metadata["parameter_value"] = str(parameter_value)
+            self.context.metadata["parameter_value"] = coerce_json_value(
+                parameter_value
+            )
        if expected_range:
            self.context.metadata["expected_range"] = expected_range
        if validation_type:
@@ -671,15 +675,28 @@ class URLProcessingError(BusinessBuddyError):
        cause: Exception | None = None,
    ) -> None:
        """Initialize URL processing error with additional details."""
-        # Create an isolated copy to avoid mutating shared contexts
        base_context = context or ErrorContext()
+        # Sanitize and cap details before storing
+        safe_details: JSONObject = {}
+        total_size = 0
+        if details:
+            for key, value in details.items():
+                v = coerce_json_value(value)
+                if isinstance(v, str) and len(v) > 1000:
+                    v = v[:1000]
+                safe_details[key] = v
+                total_size += len(str(key)) + len(str(v))
+                if total_size > 8000:
+                    safe_details["__truncated__"] = True
+                    break
+
        isolated_context = ErrorContext(
            node_name=base_context.node_name,
            tool_name=base_context.tool_name,
            retry_count=base_context.retry_count,
            max_retries=base_context.max_retries,
            operation=base_context.operation,
-            metadata=dict(base_context.metadata),
+            metadata={**base_context.metadata, **safe_details},
        )
        super().__init__(
            message,
@@ -689,17 +706,8 @@ class URLProcessingError(BusinessBuddyError):
            cause or original_error,
            ErrorNamespace.VAL_SCHEMA_ERROR,
        )
-        # Store additional details
-        self.details = details or {}
+        self.details = safe_details
        self.original_error = original_error
-        if details:
-            safe_details: JSONObject = {}
-            for key, value in details.items():
-                safe_details[key] = coerce_json_value(value)
-            for key, value in list(safe_details.items()):
-                if isinstance(value, str) and len(value) > 1000:
-                    safe_details[key] = value[:1000]
-            self.context.metadata.update(safe_details)


 class URLValidationError(URLProcessingError):
@@ -1235,44 +1243,58 @@ class JsonParsingError(BusinessBuddyError):
        parse_error: str | None = None,
        context: ErrorContext | None = None,
        cause: Exception | None = None,
-        # Additional parameters for compatibility with llm/utils.py
        error_type: JsonParsingErrorType | str | None = None,
        response_preview: str | None = None,
        recovery_attempted: bool = False,
        original_error: Exception | None = None,
    ):
        """Initialize JSON parsing error with parsing details."""
+        base_context = context or ErrorContext()
+
+        # Normalize and truncate previews up front
+        preview_source = json_text if json_text is not None else response_preview
+        safe_preview = (preview_source or "")[:2000] or None
+        safe_error_type = (
+            error_type.value
+            if isinstance(error_type, JsonParsingErrorType)
+            else (str(error_type) if error_type else None)
+        )
+
+        # Build metadata before base init
+        pre_metadata: JSONObject = {}
+        if safe_error_type:
+            pre_metadata["error_type"] = safe_error_type
+        if safe_preview:
+            pre_metadata["response_preview"] = safe_preview[:200]
+        if json_text is not None:
+            pre_metadata["json_length"] = len(json_text)
+        if recovery_attempted:
+            pre_metadata["recovery_attempted"] = recovery_attempted
+
+        isolated_context = ErrorContext(
+            node_name=base_context.node_name,
+            tool_name=base_context.tool_name,
+            retry_count=base_context.retry_count,
+            max_retries=base_context.max_retries,
+            operation=base_context.operation,
+            metadata={**base_context.metadata, **pre_metadata},
+        )
+
        super().__init__(
            message,
            ErrorSeverity.ERROR,
            ErrorCategory.PARSING,
-            context,
+            isolated_context,
            cause or original_error,
            ErrorNamespace.PAR_JSON_INVALID,
        )
-        preview_text = (json_text or response_preview) or ""
        self.json_text = json_text
        self.parse_error = parse_error
-        self.error_type: JsonParsingErrorType | str | None = error_type
-        self.response_preview = preview_text[:2000] if preview_text else None
+        self.error_type = error_type
+        self.response_preview = safe_preview
        self.recovery_attempted = recovery_attempted
        self.original_error = original_error

-        # Add additional context
-        if error_type:
-            type_value = (
-                error_type.value
-                if isinstance(error_type, JsonParsingErrorType)
-                else str(error_type)
-            )
-            self.context.metadata["error_type"] = type_value
-        if self.response_preview:
-            self.context.metadata["response_preview"] = self.response_preview[:200]
-        if json_text is not None:
-            self.context.metadata["json_length"] = len(json_text)
-        if recovery_attempted:
-            self.context.metadata["recovery_attempted"] = recovery_attempted
-
    def to_log_context(self) -> JSONObject:
        """Generate log context for structured logging."""
        # Use json_text for preview if response_preview is not available or short
--- a/src/biz_bud/core/validation/security.py
+++ b/src/biz_bud/core/validation/security.py
@@ -277,32 +277,36 @@ class SecurityValidator:
            )

    def _calculate_nesting_depth(
-        self, obj: JSONValue | tuple[JSONValue, ...], current_depth: int = 0
+        self,
+        obj: JSONValue | tuple[JSONValue, ...],
+        current_depth: int = 0,
+        _visited: set[int] | None = None,
    ) -> int:
-        """Calculate the maximum nesting depth of a data structure.
+        """Calculate the maximum nesting depth of a data structure with cycle safety."""
+        if _visited is None:
+            _visited = set()

-        Args:
-            obj: Object to analyze
-            current_depth: Current depth level
-
-        Returns:
-            Maximum nesting depth
-        """
        if current_depth > 20:  # Prevent infinite recursion
            return current_depth

+        obj_id = id(obj)
+        if obj_id in _visited:
+            return current_depth
+        _visited.add(obj_id)
+
        if isinstance(obj, dict):
            if not obj:
                return current_depth
            return max(
-                self._calculate_nesting_depth(value, current_depth + 1)
+                self._calculate_nesting_depth(value, current_depth + 1, _visited.copy())
                for value in obj.values()
            )
        elif isinstance(obj, (list, tuple)):
            if not obj:
                return current_depth
            return max(
-                self._calculate_nesting_depth(item, current_depth + 1) for item in obj
+                self._calculate_nesting_depth(item, current_depth + 1, _visited.copy())
+                for item in obj
            )
        else:
            return current_depth
--- a/src/biz_bud/graphs/paperless/agent.py
+++ b/src/biz_bud/graphs/paperless/agent.py
@@ -328,10 +328,11 @@ async def paperless_agent_node(
    llm = llm_client.llm
    if llm is None:
        logger.error("Failed to obtain LLM instance from service")
-        raise ToolError(
-            "Failed to obtain LLM instance from service; aborting agent step.",
-            tool_name="paperless_agent_node",
+        # Return a helpful AI message instead of raising
+        fallback_msg = AIMessage(
+            content="I'm having trouble accessing the language model right now. Please try again shortly."
        )
+        return {"messages": [fallback_msg]}

    # Bind tools to the LLM
    llm_with_tools = llm.bind_tools(PAPERLESS_TOOLS)
@@ -371,22 +372,30 @@ async def paperless_agent_node(
                llm_minimal = llm.bind_tools(essential_tools)
                response = await llm_minimal.ainvoke(messages)
                retry_elapsed = time.time() - retry_start
-                logger.info(
-                    f"Retry successful with minimal tools in {retry_elapsed:.2f}s"
-                )
+                logger.info(f"Retry successful with minimal tools in {retry_elapsed:.2f}s")
            except Exception as retry_e:
                retry_elapsed = time.time() - retry_start
-                logger.error(
-                    f"Retry with minimal tools failed after {retry_elapsed:.2f}s: {retry_e}"
+                logger.error(f"Retry with minimal tools failed after {retry_elapsed:.2f}s: {retry_e}")
+                # Fallback AI message to keep graph response shape valid
+                fallback = AIMessage(
+                    content="I'm experiencing an upstream service issue. Please try again later."
                )
-                raise retry_e
+                return {"messages": [fallback]}
        elif "context_length_exceeded" in err_text or "maximum context length" in err_text:
            logger.error(
                f"Context length exceeded with {token_count} tokens - message history management may have failed"
            )
-            raise
+            # Return a helpful AI message instead of raising
+            fallback = AIMessage(
+                content="This conversation is too long to process right now. Please shorten your request or start a new thread."
+            )
+            return {"messages": [fallback]}
        else:
-            raise
+            # Unknown error path: return safe fallback to avoid graph breakage
+            fallback = AIMessage(
+                content="An unexpected error occurred while generating a response. Please try again."
+            )
+            return {"messages": [fallback]}


    # Apply message history management AFTER generating response
--- a/src/biz_bud/tools/capabilities/extraction/receipt.py
+++ b/src/biz_bud/tools/capabilities/extraction/receipt.py
@@ -88,15 +88,23 @@ def generate_intelligent_search_variations(original_desc: str) -> list[str]:
    expanded_desc = desc
    expanded_words: list[str] = []
    for word in desc.split():
-        # Check if word matches any abbreviation
+        # Check if word matches any abbreviation directly
        expanded_word = expansions.get(word, word)
-        # Also try partial matches for compound abbreviations
-        if word not in expansions:
+
+        # Only attempt partial expansion when the token looks alphabetic and manageable in length
+        if (
+            word not in expansions
+            and word.isalpha()
+            and 2 < len(word) <= 20
+        ):
            for abbr, full in expansions.items():
-                if abbr in word and len(abbr) > 2:
-                    expanded_word = word.replace(abbr, full)
-                    break
-        # Append the final expanded word
+                if len(abbr) <= 2:
+                    continue
+                if re.search(rf"(^|[^A-Z0-9]){re.escape(abbr)}([^A-Z0-9]|$)", word):
+                    candidate = re.sub(rf"{re.escape(abbr)}", full, word)
+                    if candidate != word and len(candidate) <= 40:
+                        expanded_word = candidate
+                        break
        expanded_words.append(expanded_word)

    expanded_desc = ' '.join(expanded_words)
@@ -310,13 +318,17 @@ def extract_structured_line_item_data(

    if unit_price is None and total_price is None:
        price_text = price_info if price_info else desc
+        # Avoid treating unit-annotated numbers as prices
+        unit_lookahead = r"(?:LB|LBS|OZ|OZS|CT|COUNT|EA|EACH)"
        price_patterns = [
-            # Prefer explicit currency-prefixed amounts
-            r"\$\s*(\d{1,3}(?:,\d{3})*(?:\.\d{2})?|\d+(?:\.\d{2})?)",
-            # Bare decimals only when preceded by price cues
-            r"(?:(?:price|total|amount|cost)\s*[:\-]?\s*)\b(\d{1,3}(?:,\d{3})*\.\d{2}|\d+\.\d{2})\b",
-            # Split integer and cents with cues
-            r"(?:(?:price|total|amount|cost)\s*[:\-]?\s*)\b(\d{1,3}(?:,\d{3})*|\d+)\.(\d{2})\b",
+            # Prefer explicit currency-prefixed amounts with cents
+            rf"(?:\$\s*|USD\s*)\b(\d{{1,3}}(?:,\d{{3}})*\.\d{{2}})\b(?!\s*{unit_lookahead}\b)",
+            # Allow integer dollars when explicitly marked as currency with safe trailing context
+            rf"(?:\$\s*|USD\s*)\b(\d{{1,3}}(?:,\d{{3}})*)\b(?=\s*(?:usd|dollars?)?\b|$)(?!\s*{unit_lookahead}\b)",
+            # Bare decimals only when preceded by price cues; avoid unit-followed numbers
+            rf"(?:(?:price|total|amount|cost)\s*[:\-]?\s*)\b(\d{{1,3}}(?:,\d{{3}})*\.\d{{2}}|\d+\.\d{{2}})\b(?!\s*{unit_lookahead}\b)",
+            # Split integer and cents with cues; avoid unit-followed numbers
+            rf"(?:(?:price|total|amount|cost)\s*[:\-]?\s*)\b(\d{{1,3}}(?:,\d{{3}})*|\d+)\.(\d{{2}})\b(?!\s*{unit_lookahead}\b)",
        ]

        for pattern in price_patterns: