UN-2893 [FIX] Fix duplicate process handling status updates and UI error logs (#1594)
* UN-2893 [FIX] Fix duplicate process handling status updates and UI error logs Prevent duplicate worker processes from updating file execution status and showing UI error logs during GKE race conditions. - Added is_duplicate_skip flag to FileProcessingResult dataclass - Fixed destination_processed default value for correct duplicate detection - Skip status updates and UI logs when duplicate is detected - Only first worker updates status, second worker silently exits * logger.error converted to logger.exception * error to exception in logs
This commit is contained in:
@@ -786,6 +786,9 @@ class FileProcessingResult:
|
||||
destination_processed: bool = True
|
||||
destination_error: str | None = None
|
||||
|
||||
# Duplicate detection indicator
|
||||
is_duplicate_skip: bool = False # True when file skipped due to duplicate detection
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
"""Convert to dictionary for backward compatibility."""
|
||||
return serialize_dataclass_to_dict(self)
|
||||
@@ -807,6 +810,7 @@ class FileProcessingResult:
|
||||
review_result=data.get("review_result"),
|
||||
destination_processed=data.get("destination_processed", True),
|
||||
destination_error=data.get("destination_error"),
|
||||
is_duplicate_skip=data.get("is_duplicate_skip", False),
|
||||
)
|
||||
|
||||
def is_successful(self) -> bool:
|
||||
|
||||
@@ -760,6 +760,21 @@ def _handle_file_processing_result(
|
||||
)
|
||||
return
|
||||
|
||||
# CRITICAL: Handle duplicate skip - no DB updates, silent skip
|
||||
# When is_duplicate_skip=True, another worker is already processing this file
|
||||
# We should skip ALL processing and not update any database status or counters
|
||||
if getattr(file_execution_result, "is_duplicate_skip", False):
|
||||
# Enhanced debug log with full context for internal debugging
|
||||
logger.info(
|
||||
f"DUPLICATE SKIP: File '{file_name}' skipped as duplicate - another worker is processing it. "
|
||||
f"execution_id={execution_id}, workflow_id={workflow_id}, "
|
||||
f"file_execution_id={file_execution_id}, celery_task_id={celery_task_id}. "
|
||||
f"No DB status updates, no counter increments - silent skip to avoid interfering with active worker. "
|
||||
f"First worker will handle all status updates and counter increments."
|
||||
)
|
||||
# Exit early without any DB updates - the first worker will handle all updates
|
||||
return
|
||||
|
||||
# Calculate execution time
|
||||
file_execution_time = _calculate_execution_time(file_name, file_start_time)
|
||||
|
||||
|
||||
@@ -358,10 +358,8 @@ class WorkflowExecutionProcessor:
|
||||
logger.info(
|
||||
f"Updated file execution {context.workflow_file_execution_id} status to ERROR"
|
||||
)
|
||||
except Exception as status_error:
|
||||
logger.error(
|
||||
f"Failed to update file execution status: {status_error}"
|
||||
)
|
||||
except Exception:
|
||||
logger.exception("Failed to update file execution status")
|
||||
|
||||
return FileProcessingResult(
|
||||
file_name=context.file_name,
|
||||
@@ -380,6 +378,21 @@ class WorkflowExecutionProcessor:
|
||||
f"✓ Workflow execution completed successfully for {context.file_name}"
|
||||
)
|
||||
|
||||
# Check if this was a duplicate skip (destination not processed due to duplicate detection)
|
||||
destination_processed = (
|
||||
execution_result.metadata.destination_processed
|
||||
if execution_result.metadata
|
||||
else False
|
||||
)
|
||||
destination_error = (
|
||||
execution_result.metadata.destination_error
|
||||
if execution_result.metadata
|
||||
else None
|
||||
)
|
||||
|
||||
# Duplicate detection: destination not processed AND no error
|
||||
is_duplicate = not destination_processed and not destination_error
|
||||
|
||||
return FileProcessingResult(
|
||||
file_name=context.file_name,
|
||||
file_execution_id=context.workflow_file_execution_id,
|
||||
@@ -390,12 +403,9 @@ class WorkflowExecutionProcessor:
|
||||
if execution_result.metadata
|
||||
else {},
|
||||
execution_time=context.get_processing_duration(),
|
||||
destination_processed=execution_result.metadata.destination_processed
|
||||
if execution_result.metadata
|
||||
else True,
|
||||
destination_error=execution_result.metadata.destination_error
|
||||
if execution_result.metadata
|
||||
else None,
|
||||
destination_processed=destination_processed,
|
||||
destination_error=destination_error,
|
||||
is_duplicate_skip=is_duplicate,
|
||||
)
|
||||
|
||||
except Exception as execution_error:
|
||||
@@ -597,37 +607,47 @@ class FileProcessor:
|
||||
# Check if destination processing failed
|
||||
destination_error = workflow_result.destination_error
|
||||
destination_processed = workflow_result.destination_processed
|
||||
is_duplicate_skip = getattr(workflow_result, "is_duplicate_skip", False)
|
||||
|
||||
if destination_error or not destination_processed:
|
||||
# Log destination failure
|
||||
error_msg = destination_error or "Destination processing failed"
|
||||
log_file_error(
|
||||
workflow_logger,
|
||||
workflow_file_execution_id,
|
||||
f"❌ File '{context.file_name}' destination processing failed: {error_msg}",
|
||||
)
|
||||
|
||||
# Update file execution status to ERROR
|
||||
logger.info(
|
||||
f"Updating file execution status to ERROR for {context.workflow_file_execution_id} due to destination failure"
|
||||
)
|
||||
try:
|
||||
context.api_client.update_file_execution_status(
|
||||
file_execution_id=context.workflow_file_execution_id,
|
||||
status=ExecutionStatus.ERROR.value,
|
||||
error_message=error_msg,
|
||||
# Skip UI error log and DB updates for duplicates (internal race condition, not user error)
|
||||
if not is_duplicate_skip:
|
||||
# Log destination failure to UI
|
||||
error_msg = destination_error or "Destination processing failed"
|
||||
log_file_error(
|
||||
workflow_logger,
|
||||
workflow_file_execution_id,
|
||||
f"❌ File '{context.file_name}' destination processing failed: {error_msg}",
|
||||
)
|
||||
|
||||
# Update file execution status to ERROR
|
||||
logger.info(
|
||||
f"Updated file execution {context.workflow_file_execution_id} status to ERROR"
|
||||
)
|
||||
except Exception as status_error:
|
||||
logger.error(
|
||||
f"Failed to update file execution status: {status_error}"
|
||||
f"Updating file execution status to ERROR for {context.workflow_file_execution_id} due to destination failure"
|
||||
)
|
||||
try:
|
||||
context.api_client.update_file_execution_status(
|
||||
file_execution_id=context.workflow_file_execution_id,
|
||||
status=ExecutionStatus.ERROR.value,
|
||||
error_message=error_msg,
|
||||
)
|
||||
logger.info(
|
||||
f"Updated file execution {context.workflow_file_execution_id} status to ERROR"
|
||||
)
|
||||
except Exception:
|
||||
logger.exception("Failed to update file execution status")
|
||||
|
||||
# Update workflow result since destination failed
|
||||
workflow_result.success = False
|
||||
workflow_result.error = error_msg
|
||||
# Update workflow result since destination failed
|
||||
workflow_result.success = False
|
||||
workflow_result.error = error_msg
|
||||
else:
|
||||
# Debug log for duplicate detection (internal use only)
|
||||
logger.info(
|
||||
f"DUPLICATE SKIP: File '{context.file_name}' identified as duplicate in processor. "
|
||||
f"destination_processed=False, destination_error=None, "
|
||||
f"file_execution_id={context.workflow_file_execution_id}. "
|
||||
f"Skipping UI error log and status updates - this is an internal race condition, not a user-facing error. "
|
||||
f"First worker will handle all status updates."
|
||||
)
|
||||
else:
|
||||
log_file_info(
|
||||
workflow_logger,
|
||||
@@ -639,7 +659,7 @@ class FileProcessor:
|
||||
return workflow_result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"File processing failed for {context.file_name}: {e}")
|
||||
logger.exception(f"File processing failed for {context.file_name}")
|
||||
|
||||
# Send file processing error log to UI
|
||||
log_file_processing_error(
|
||||
|
||||
@@ -343,15 +343,14 @@ class WorkerDestinationConnector:
|
||||
FileExecutionStage.FINALIZATION,
|
||||
FileExecutionStage.COMPLETED,
|
||||
]:
|
||||
# Enhanced debug log with full context for internal debugging
|
||||
logger.warning(
|
||||
f"Destination already processed for file '{file_ctx.file_name}' "
|
||||
f"at stage {current_stage.value}. Skipping duplicate processing."
|
||||
)
|
||||
log_file_info(
|
||||
exec_ctx.workflow_log,
|
||||
exec_ctx.file_execution_id,
|
||||
f"File '{file_ctx.file_name}' destination already processed - skipping duplicate",
|
||||
f"DUPLICATE DETECTION: File '{file_ctx.file_name}' destination already at stage "
|
||||
f"{current_stage.value}. execution_id={exec_ctx.execution_id}, "
|
||||
f"file_execution_id={exec_ctx.file_execution_id}, worker_pid={os.getpid()}. "
|
||||
f"Skipping duplicate processing - another worker completed this already."
|
||||
)
|
||||
# UI log removed - duplication is internal detail, not user-facing error
|
||||
return False # Duplicate detected
|
||||
|
||||
# Atomically acquire lock using Redis SET NX
|
||||
@@ -381,16 +380,14 @@ class WorkerDestinationConnector:
|
||||
)
|
||||
|
||||
if not lock_acquired:
|
||||
# Another worker already has the lock
|
||||
# Enhanced debug log with full context for internal debugging
|
||||
logger.warning(
|
||||
f"Lock acquisition failed for file '{file_ctx.file_name}': "
|
||||
f"Another worker holds the lock. Skipping duplicate processing."
|
||||
)
|
||||
log_file_info(
|
||||
exec_ctx.workflow_log,
|
||||
exec_ctx.file_execution_id,
|
||||
f"File '{file_ctx.file_name}' destination lock held by another worker - skipping duplicate",
|
||||
f"DUPLICATE DETECTION: Lock acquisition failed for file '{file_ctx.file_name}'. "
|
||||
f"execution_id={exec_ctx.execution_id}, file_execution_id={exec_ctx.file_execution_id}, "
|
||||
f"lock_key={lock_key}, worker_pid={os.getpid()}. "
|
||||
f"Another worker holds the lock - skipping duplicate processing."
|
||||
)
|
||||
# UI log removed - duplication is internal detail, not user-facing error
|
||||
return False
|
||||
|
||||
# Lock acquired successfully - now set DESTINATION_PROCESSING stage
|
||||
|
||||
@@ -528,13 +528,19 @@ class WorkerWorkflowExecutionService:
|
||||
final_error = destination_result.error
|
||||
|
||||
# Build metadata
|
||||
# CRITICAL: Check destination_result.processed (not just existence) to detect duplicate skips
|
||||
# destination_result.processed=False means duplicate was detected and skipped
|
||||
metadata = WorkflowExecutionMetadata(
|
||||
workflow_id=workflow_id,
|
||||
execution_id=execution_id,
|
||||
execution_time=execution_time,
|
||||
tool_count=tool_count,
|
||||
workflow_executed=workflow_success,
|
||||
destination_processed=destination_result is not None,
|
||||
destination_processed=(
|
||||
getattr(destination_result, "processed", True)
|
||||
if destination_result
|
||||
else False
|
||||
),
|
||||
destination_error=destination_result.error if destination_result else None,
|
||||
)
|
||||
|
||||
@@ -1156,9 +1162,13 @@ class WorkerWorkflowExecutionService:
|
||||
|
||||
# Check if handle_output returned None (duplicate detected)
|
||||
if handle_output_result is None:
|
||||
# Enhanced debug log with full context for internal debugging
|
||||
logger.info(
|
||||
f"Duplicate detected for {file_hash.file_name} - "
|
||||
f"skipping file history and all downstream processing"
|
||||
f"DUPLICATE SKIP: File '{file_hash.file_name}' duplicate detected at destination phase. "
|
||||
f"execution_id={execution_id}, file_execution_id={workflow_file_execution_id}, "
|
||||
f"workflow_id={workflow_id}. "
|
||||
f"Returning FinalOutputResult with processed=False, no file history will be created. "
|
||||
f"This is an internal race condition, not an error."
|
||||
)
|
||||
# Return special result to indicate duplicate (not an error, not processed)
|
||||
return FinalOutputResult(
|
||||
|
||||
Reference in New Issue
Block a user