Add script and certificate for git.lab integration

- Introduce add-git-lab-cert.sh to automate the installation of the git.lab certificate into system stores for Ubuntu, CentOS, and Alpine Linux.
- Add git.lab.crt certificate file for secure connections.
- Enhance error handling in paperless agent to provide user-friendly fallback messages during LLM access issues.
- Improve receipt.py to refine price pattern matching and avoid treating unit-annotated numbers as prices.
This commit is contained in:
2025-09-28 20:14:12 -04:00
parent 911b0aed0e
commit 8d09d6efb2
7 changed files with 220 additions and 72 deletions

66
add-git-lab-cert.sh Executable file
View File

@@ -0,0 +1,66 @@
#!/bin/bash
# Script to add git.lab certificate to system certificate store
# This script needs to be run with sudo privileges
set -e
CERT_FILE="git.lab.crt"
CERT_NAME="git.lab"
echo "Adding git.lab certificate to system certificate store..."
# Check if certificate file exists
if [ ! -f "$CERT_FILE" ]; then
echo "Downloading certificate from git.lab..."
openssl s_client -connect git.lab:443 -servername git.lab < /dev/null 2>/dev/null | openssl x509 -outform PEM > "$CERT_FILE"
echo "Certificate saved to $CERT_FILE"
fi
# For Ubuntu/Debian systems
if [ -d "/usr/local/share/ca-certificates" ]; then
echo "Installing certificate for Ubuntu/Debian..."
sudo cp "$CERT_FILE" "/usr/local/share/ca-certificates/${CERT_NAME}.crt"
sudo update-ca-certificates
echo "Certificate added to Ubuntu/Debian certificate store"
fi
# For CentOS/RHEL/Fedora systems
if [ -d "/etc/pki/ca-trust/source/anchors" ]; then
echo "Installing certificate for CentOS/RHEL/Fedora..."
sudo cp "$CERT_FILE" "/etc/pki/ca-trust/source/anchors/${CERT_NAME}.crt"
sudo update-ca-trust
echo "Certificate added to CentOS/RHEL/Fedora certificate store"
fi
# For Alpine Linux
if [ -d "/usr/local/share/ca-certificates" ] && [ -f "/etc/alpine-release" ]; then
echo "Installing certificate for Alpine Linux..."
sudo cp "$CERT_FILE" "/usr/local/share/ca-certificates/${CERT_NAME}.crt"
sudo update-ca-certificates
echo "Certificate added to Alpine certificate store"
fi
# Also add to Git's certificate bundle as backup
echo "Creating Git-specific certificate bundle..."
if [ -f "/etc/ssl/certs/ca-certificates.crt" ]; then
cat /etc/ssl/certs/ca-certificates.crt "$CERT_FILE" > ~/.gitcerts.pem
git config --global http.sslCAInfo ~/.gitcerts.pem
echo "Git certificate bundle created at ~/.gitcerts.pem"
fi
echo ""
echo "Certificate installation complete!"
echo ""
echo "Testing Git connection..."
if git ls-remote --heads http://git.lab/vasceannie/biz-bud.git >/dev/null 2>&1; then
echo "✅ SUCCESS: Git can now connect to git.lab"
else
echo "❌ Git connection still failing. You may need to:"
echo " 1. Restart your terminal/shell"
echo " 2. Check if git.lab should be accessed via a different hostname"
echo " 3. Contact your GitLab administrator"
fi
echo ""
echo "To clean up the certificate file, run: rm $CERT_FILE"

20
git.lab.crt Normal file
View File

@@ -0,0 +1,20 @@
-----BEGIN CERTIFICATE-----
MIIDWDCCAkCgAwIBAgIUOake/moDwMcL5KxFnUmmvXLhvSkwDQYJKoZIhvcNAQEL
BQAwMDEOMAwGA1UEAwwFKi5sYWIxETAPBgNVBAoMCEludGVybmFsMQswCQYDVQQG
EwJVUzAeFw0yNTA3MTMxODQzNDdaFw0zNTA3MTExODQzNDdaMDAxDjAMBgNVBAMM
BSoubGFiMREwDwYDVQQKDAhJbnRlcm5hbDELMAkGA1UEBhMCVVMwggEiMA0GCSqG
SIb3DQEBAQUAA4IBDwAwggEKAoIBAQCyPSwUX/gDVsNQkYXE1LPq9DjZQ8uh/5RB
+NP8jQgFJ6ztPwRBMer4U0rfd3PVMoF7ko1ZZdoo2BotUbCLHiEwgWWbOupiWq1F
R11jpmRW2tQJHEEez6YGtqkZRs9YQN9p0jjAfizW+dwRkyn1n8BojXyVK1PUulQ+
TcBxWl18zRNf+JVqY4lyyT3aHp8OCThqzY+Mnyb57mTIypmA0EsfbrK55UY4D1QT
dcOSW3lAmNZfLEOrhl8rVP93/P9pDBISaol8YlhEsk/Q5PCsM/AX7k3pjI57C8iT
SeryjYCi6tbEnHRFd40oWtu8n9sUQ9yggktsshRvno/LJLEMo5LvAgMBAAGjajBo
MB0GA1UdDgQWBBTDiclF3GIA6B0Rgb3QOYSXWMDY/DAfBgNVHSMEGDAWgBTDiclF
3GIA6B0Rgb3QOYSXWMDY/DAPBgNVHRMBAf8EBTADAQH/MBUGA1UdEQQOMAyCBSou
bGFiggNsYWIwDQYJKoZIhvcNAQELBQADggEBABZiygqVi3lcUnlMcalfgb0acKLF
YCThpOQN7Df/BQMNxBZ/t3K+rL1bU0ghpcZ38uCj/UdSlnvYa8cGFHjAS6bhbdpk
ZMm0ZzJszQjdKBHtYnQk7sn5KZ/BOB6bPdXlxl556B2Ybc0wKH8f3xUeVgLheHMr
+Cxu24+FOMqZeBJQ87BsPY/Pmy8QBQDncaffnscYAS4Jq+hKVwxtomP9UT1wkQGd
0wwYMIEw+6y3Ya9/dPAirFEWEGQizATJZRUnG9AEh1GJlIkkHm9ieQtuS+8G0vUw
brw/fl/LcyK7S1U7bnz9CCGBeaa3kE45dc2MmiSI9TrMiReOItaO2zfbcyc=
-----END CERTIFICATE-----

View File

@@ -508,8 +508,23 @@ def categorize_error(
"broken pipe",
"address already in use",
]
# Common Python runtime/type phrases that indicate logic issues rather than user validation mistakes
runtime_type_indicators = [
"not subscriptable",
"has no attribute",
"unsupported operand type",
"cannot concatenate",
"division by zero",
"object is not iterable",
"index out of range",
"list index out of range",
"key error",
"unhashable type",
]
if any(indicator in message for indicator in system_indicators):
return ErrorCategory.UNKNOWN, None
if any(indicator in message for indicator in runtime_type_indicators):
return ErrorCategory.STATE, ErrorNamespace.STATE_INVALID_KEY
if "missing" in message and any(
term in message for term in ["field", "parameter", "param", "argument", "key"]
):

View File

@@ -174,10 +174,8 @@ class ServiceError(MetadataInjectMixin, BusinessBuddyError):
context: ErrorContext | None = None,
cause: Exception | None = None,
):
base_context = self._prepare_context(
context=context, service_name=service_name
)
# Shallow copy to avoid mutating shared context instances
base_context = context or ErrorContext()
# Create isolated copy first to avoid mutating shared context instances
isolated_context = ErrorContext(
node_name=base_context.node_name,
tool_name=base_context.tool_name,
@@ -186,6 +184,10 @@ class ServiceError(MetadataInjectMixin, BusinessBuddyError):
operation=base_context.operation,
metadata=dict(base_context.metadata),
)
# Inject metadata only on the isolated context
isolated_context = self._prepare_context(
context=isolated_context, service_name=service_name
)
if operation:
isolated_context.operation = operation
super().__init__(
@@ -585,7 +587,9 @@ class ParameterValidationError(ValidationError):
if parameter_name:
self.context.metadata["parameter_name"] = parameter_name
if parameter_value is not None:
self.context.metadata["parameter_value"] = str(parameter_value)
self.context.metadata["parameter_value"] = coerce_json_value(
parameter_value
)
if expected_range:
self.context.metadata["expected_range"] = expected_range
if validation_type:
@@ -671,15 +675,28 @@ class URLProcessingError(BusinessBuddyError):
cause: Exception | None = None,
) -> None:
"""Initialize URL processing error with additional details."""
# Create an isolated copy to avoid mutating shared contexts
base_context = context or ErrorContext()
# Sanitize and cap details before storing
safe_details: JSONObject = {}
total_size = 0
if details:
for key, value in details.items():
v = coerce_json_value(value)
if isinstance(v, str) and len(v) > 1000:
v = v[:1000]
safe_details[key] = v
total_size += len(str(key)) + len(str(v))
if total_size > 8000:
safe_details["__truncated__"] = True
break
isolated_context = ErrorContext(
node_name=base_context.node_name,
tool_name=base_context.tool_name,
retry_count=base_context.retry_count,
max_retries=base_context.max_retries,
operation=base_context.operation,
metadata=dict(base_context.metadata),
metadata={**base_context.metadata, **safe_details},
)
super().__init__(
message,
@@ -689,17 +706,8 @@ class URLProcessingError(BusinessBuddyError):
cause or original_error,
ErrorNamespace.VAL_SCHEMA_ERROR,
)
# Store additional details
self.details = details or {}
self.details = safe_details
self.original_error = original_error
if details:
safe_details: JSONObject = {}
for key, value in details.items():
safe_details[key] = coerce_json_value(value)
for key, value in list(safe_details.items()):
if isinstance(value, str) and len(value) > 1000:
safe_details[key] = value[:1000]
self.context.metadata.update(safe_details)
class URLValidationError(URLProcessingError):
@@ -1235,44 +1243,58 @@ class JsonParsingError(BusinessBuddyError):
parse_error: str | None = None,
context: ErrorContext | None = None,
cause: Exception | None = None,
# Additional parameters for compatibility with llm/utils.py
error_type: JsonParsingErrorType | str | None = None,
response_preview: str | None = None,
recovery_attempted: bool = False,
original_error: Exception | None = None,
):
"""Initialize JSON parsing error with parsing details."""
base_context = context or ErrorContext()
# Normalize and truncate previews up front
preview_source = json_text if json_text is not None else response_preview
safe_preview = (preview_source or "")[:2000] or None
safe_error_type = (
error_type.value
if isinstance(error_type, JsonParsingErrorType)
else (str(error_type) if error_type else None)
)
# Build metadata before base init
pre_metadata: JSONObject = {}
if safe_error_type:
pre_metadata["error_type"] = safe_error_type
if safe_preview:
pre_metadata["response_preview"] = safe_preview[:200]
if json_text is not None:
pre_metadata["json_length"] = len(json_text)
if recovery_attempted:
pre_metadata["recovery_attempted"] = recovery_attempted
isolated_context = ErrorContext(
node_name=base_context.node_name,
tool_name=base_context.tool_name,
retry_count=base_context.retry_count,
max_retries=base_context.max_retries,
operation=base_context.operation,
metadata={**base_context.metadata, **pre_metadata},
)
super().__init__(
message,
ErrorSeverity.ERROR,
ErrorCategory.PARSING,
context,
isolated_context,
cause or original_error,
ErrorNamespace.PAR_JSON_INVALID,
)
preview_text = (json_text or response_preview) or ""
self.json_text = json_text
self.parse_error = parse_error
self.error_type: JsonParsingErrorType | str | None = error_type
self.response_preview = preview_text[:2000] if preview_text else None
self.error_type = error_type
self.response_preview = safe_preview
self.recovery_attempted = recovery_attempted
self.original_error = original_error
# Add additional context
if error_type:
type_value = (
error_type.value
if isinstance(error_type, JsonParsingErrorType)
else str(error_type)
)
self.context.metadata["error_type"] = type_value
if self.response_preview:
self.context.metadata["response_preview"] = self.response_preview[:200]
if json_text is not None:
self.context.metadata["json_length"] = len(json_text)
if recovery_attempted:
self.context.metadata["recovery_attempted"] = recovery_attempted
def to_log_context(self) -> JSONObject:
"""Generate log context for structured logging."""
# Use json_text for preview if response_preview is not available or short

View File

@@ -277,32 +277,36 @@ class SecurityValidator:
)
def _calculate_nesting_depth(
self, obj: JSONValue | tuple[JSONValue, ...], current_depth: int = 0
self,
obj: JSONValue | tuple[JSONValue, ...],
current_depth: int = 0,
_visited: set[int] | None = None,
) -> int:
"""Calculate the maximum nesting depth of a data structure.
"""Calculate the maximum nesting depth of a data structure with cycle safety."""
if _visited is None:
_visited = set()
Args:
obj: Object to analyze
current_depth: Current depth level
Returns:
Maximum nesting depth
"""
if current_depth > 20: # Prevent infinite recursion
return current_depth
obj_id = id(obj)
if obj_id in _visited:
return current_depth
_visited.add(obj_id)
if isinstance(obj, dict):
if not obj:
return current_depth
return max(
self._calculate_nesting_depth(value, current_depth + 1)
self._calculate_nesting_depth(value, current_depth + 1, _visited.copy())
for value in obj.values()
)
elif isinstance(obj, (list, tuple)):
if not obj:
return current_depth
return max(
self._calculate_nesting_depth(item, current_depth + 1) for item in obj
self._calculate_nesting_depth(item, current_depth + 1, _visited.copy())
for item in obj
)
else:
return current_depth

View File

@@ -328,10 +328,11 @@ async def paperless_agent_node(
llm = llm_client.llm
if llm is None:
logger.error("Failed to obtain LLM instance from service")
raise ToolError(
"Failed to obtain LLM instance from service; aborting agent step.",
tool_name="paperless_agent_node",
# Return a helpful AI message instead of raising
fallback_msg = AIMessage(
content="I'm having trouble accessing the language model right now. Please try again shortly."
)
return {"messages": [fallback_msg]}
# Bind tools to the LLM
llm_with_tools = llm.bind_tools(PAPERLESS_TOOLS)
@@ -371,22 +372,30 @@ async def paperless_agent_node(
llm_minimal = llm.bind_tools(essential_tools)
response = await llm_minimal.ainvoke(messages)
retry_elapsed = time.time() - retry_start
logger.info(
f"Retry successful with minimal tools in {retry_elapsed:.2f}s"
)
logger.info(f"Retry successful with minimal tools in {retry_elapsed:.2f}s")
except Exception as retry_e:
retry_elapsed = time.time() - retry_start
logger.error(
f"Retry with minimal tools failed after {retry_elapsed:.2f}s: {retry_e}"
logger.error(f"Retry with minimal tools failed after {retry_elapsed:.2f}s: {retry_e}")
# Fallback AI message to keep graph response shape valid
fallback = AIMessage(
content="I'm experiencing an upstream service issue. Please try again later."
)
raise retry_e
return {"messages": [fallback]}
elif "context_length_exceeded" in err_text or "maximum context length" in err_text:
logger.error(
f"Context length exceeded with {token_count} tokens - message history management may have failed"
)
raise
# Return a helpful AI message instead of raising
fallback = AIMessage(
content="This conversation is too long to process right now. Please shorten your request or start a new thread."
)
return {"messages": [fallback]}
else:
raise
# Unknown error path: return safe fallback to avoid graph breakage
fallback = AIMessage(
content="An unexpected error occurred while generating a response. Please try again."
)
return {"messages": [fallback]}
# Apply message history management AFTER generating response

View File

@@ -88,15 +88,23 @@ def generate_intelligent_search_variations(original_desc: str) -> list[str]:
expanded_desc = desc
expanded_words: list[str] = []
for word in desc.split():
# Check if word matches any abbreviation
# Check if word matches any abbreviation directly
expanded_word = expansions.get(word, word)
# Also try partial matches for compound abbreviations
if word not in expansions:
# Only attempt partial expansion when the token looks alphabetic and manageable in length
if (
word not in expansions
and word.isalpha()
and 2 < len(word) <= 20
):
for abbr, full in expansions.items():
if abbr in word and len(abbr) > 2:
expanded_word = word.replace(abbr, full)
break
# Append the final expanded word
if len(abbr) <= 2:
continue
if re.search(rf"(^|[^A-Z0-9]){re.escape(abbr)}([^A-Z0-9]|$)", word):
candidate = re.sub(rf"{re.escape(abbr)}", full, word)
if candidate != word and len(candidate) <= 40:
expanded_word = candidate
break
expanded_words.append(expanded_word)
expanded_desc = ' '.join(expanded_words)
@@ -310,13 +318,17 @@ def extract_structured_line_item_data(
if unit_price is None and total_price is None:
price_text = price_info if price_info else desc
# Avoid treating unit-annotated numbers as prices
unit_lookahead = r"(?:LB|LBS|OZ|OZS|CT|COUNT|EA|EACH)"
price_patterns = [
# Prefer explicit currency-prefixed amounts
r"\$\s*(\d{1,3}(?:,\d{3})*(?:\.\d{2})?|\d+(?:\.\d{2})?)",
# Bare decimals only when preceded by price cues
r"(?:(?:price|total|amount|cost)\s*[:\-]?\s*)\b(\d{1,3}(?:,\d{3})*\.\d{2}|\d+\.\d{2})\b",
# Split integer and cents with cues
r"(?:(?:price|total|amount|cost)\s*[:\-]?\s*)\b(\d{1,3}(?:,\d{3})*|\d+)\.(\d{2})\b",
# Prefer explicit currency-prefixed amounts with cents
rf"(?:\$\s*|USD\s*)\b(\d{{1,3}}(?:,\d{{3}})*\.\d{{2}})\b(?!\s*{unit_lookahead}\b)",
# Allow integer dollars when explicitly marked as currency with safe trailing context
rf"(?:\$\s*|USD\s*)\b(\d{{1,3}}(?:,\d{{3}})*)\b(?=\s*(?:usd|dollars?)?\b|$)(?!\s*{unit_lookahead}\b)",
# Bare decimals only when preceded by price cues; avoid unit-followed numbers
rf"(?:(?:price|total|amount|cost)\s*[:\-]?\s*)\b(\d{{1,3}}(?:,\d{{3}})*\.\d{{2}}|\d+\.\d{{2}})\b(?!\s*{unit_lookahead}\b)",
# Split integer and cents with cues; avoid unit-followed numbers
rf"(?:(?:price|total|amount|cost)\s*[:\-]?\s*)\b(\d{{1,3}}(?:,\d{{3}})*|\d+)\.(\d{{2}})\b(?!\s*{unit_lookahead}\b)",
]
for pattern in price_patterns: