Files
unstract/workers/sample.env

344 lines
11 KiB
Bash

# =============================================================================
# Unstract Workers Environment Configuration
# =============================================================================
# Copy this file to .env and update the values for your environment
# =============================================================================
# Core Configuration (REQUIRED)
# =============================================================================
# Django Backend URL - REQUIRED
# Docker (default): http://unstract-backend:8000
# Local development: http://localhost:8000
DJANGO_APP_BACKEND_URL=http://unstract-backend:8000
# Internal API Base URL - REQUIRED
# This is the full URL with /internal suffix for worker→backend communication
# Docker: http://unstract-backend:8000/internal
# Local: http://localhost:8000/internal
INTERNAL_API_BASE_URL=http://unstract-backend:8000/internal
# Internal API Configuration
INTERNAL_API_PREFIX=/internal
INTERNAL_API_VERSION=v1
# Internal Service API Key - REQUIRED
INTERNAL_SERVICE_API_KEY=dev-internal-key-123
# Internal API Connection Settings
INTERNAL_API_TIMEOUT=120
INTERNAL_API_RETRY_ATTEMPTS=3
INTERNAL_API_RETRY_BACKOFF_FACTOR=1.0
# Internal API Endpoint Prefixes
INTERNAL_API_HEALTH_PREFIX=v1/health/
INTERNAL_API_TOOL_PREFIX=v1/tool-execution/
INTERNAL_API_EXECUTION_PREFIX=v1/execution/
INTERNAL_API_WEBHOOK_PREFIX=v1/webhook/
INTERNAL_API_FILE_HISTORY_PREFIX=v1/file-history/
INTERNAL_API_WORKFLOW_PREFIX=v1/workflow-execution/
INTERNAL_API_ORGANIZATION_PREFIX=v1/organization/
# =============================================================================
# Celery Configuration
# =============================================================================
# Celery Broker (RabbitMQ) - REQUIRED
# These credentials must match your RabbitMQ configuration
CELERY_BROKER_BASE_URL=amqp://unstract-rabbitmq:5672//
CELERY_BROKER_USER=admin
CELERY_BROKER_PASS=password
# =============================================================================
# Database Configuration (REQUIRED)
# =============================================================================
# PostgreSQL (for Celery result backend) - REQUIRED
# These credentials must match your PostgreSQL configuration
DB_HOST=unstract-db
DB_USER=unstract_dev
DB_PASSWORD=unstract_pass
DB_NAME=unstract_db
DB_PORT=5432
DB_SCHEMA=unstract
# Celery Backend Database Schema
CELERY_BACKEND_DB_SCHEMA=public
# Redis (for caching and queues) - REQUIRED
REDIS_HOST=unstract-redis
REDIS_PORT=6379
REDIS_PASSWORD=
REDIS_USER=default
REDIS_DB=0
# Cache-Specific Redis Configuration
CACHE_REDIS_ENABLED=true
CACHE_REDIS_HOST=unstract-redis
CACHE_REDIS_PORT=6379
CACHE_REDIS_DB=0
CACHE_REDIS_PASSWORD=
CACHE_REDIS_USERNAME=
CACHE_REDIS_SSL=false
CACHE_REDIS_SSL_CERT_REQS=required
# Database URL (for fallback usage)
DATABASE_URL=postgresql://unstract_dev:unstract_pass@unstract-db:5432/unstract_db
# =============================================================================
# Worker Infrastructure Settings
# =============================================================================
# Worker Singleton Infrastructure - Controls shared resource management
ENABLE_API_CLIENT_SINGLETON=true
DEBUG_API_CLIENT_INIT=false
WORKER_INFRASTRUCTURE_HEALTH_CHECK=true
# API Client Configuration
API_CLIENT_POOL_SIZE=3
# Config Caching
ENABLE_CONFIG_CACHE=true
CONFIG_CACHE_TTL=300
# Debug Settings
ENABLE_DEBUG_LOGGING=false
DEBUG_ORGANIZATION_CONTEXT=false
# Worker Concurrency
MAX_CONCURRENT_TASKS=10
# =============================================================================
# Worker Performance Settings
# =============================================================================
CELERY_WORKER_PREFETCH_MULTIPLIER=1
CELERY_TASK_ACKS_LATE=true
CELERY_WORKER_MAX_TASKS_PER_CHILD=1000
# =============================================================================
# Task Timeout Configuration (Celery Standard Naming Convention)
# =============================================================================
# Uses format: {WORKER_TYPE}_TASK_TIME_LIMIT and {WORKER_TYPE}_TASK_SOFT_TIME_LIMIT
#
# Resolution hierarchy:
# 1. Worker-specific: FILE_PROCESSING_TASK_TIME_LIMIT (highest priority)
# 2. General: TASK_TIME_LIMIT (fallback for all workers)
# 3. Code defaults (lowest priority)
# General Task Timeouts - Applies to all workers without specific overrides
TASK_TIME_LIMIT=3600 # 1 hour - General hard timeout
TASK_SOFT_TIME_LIMIT=3300 # 55 minutes - General soft timeout
# Worker-Specific Timeouts - Overrides general timeouts for specific worker types
FILE_PROCESSING_TASK_TIME_LIMIT=7200 # 2 hours - File processing hard timeout
FILE_PROCESSING_TASK_SOFT_TIME_LIMIT=6300 # 1h 45m - File processing soft timeout
CALLBACK_TASK_TIME_LIMIT=3600 # 1 hour - Callback hard timeout
CALLBACK_TASK_SOFT_TIME_LIMIT=3300 # 55 minutes - Callback soft timeout
# Retry Configuration
CELERY_TASK_DEFAULT_RETRY_DELAY=60
CELERY_TASK_MAX_RETRIES=3
CELERY_TASK_REJECT_ON_WORKER_LOST=false
# Advanced Celery Configuration
CELERY_WORKER_POOL_RESTARTS=true
CELERY_BROKER_CONNECTION_RETRY_ON_STARTUP=true
CELERY_RESULT_CHORD_RETRY_INTERVAL=3.0
# =============================================================================
# Worker-Specific Configuration
# =============================================================================
# API Deployment Worker
API_DEPLOYMENT_WORKER_NAME=api-deployment-worker
API_DEPLOYMENT_HEALTH_PORT=8080
API_DEPLOYMENT_AUTOSCALE=4,1
# General Worker
GENERAL_WORKER_NAME=general-worker
GENERAL_HEALTH_PORT=8081
GENERAL_AUTOSCALE=6,2
# File Processing Worker
FILE_PROCESSING_WORKER_NAME=file-processing-worker
FILE_PROCESSING_HEALTH_PORT=8082
FILE_PROCESSING_AUTOSCALE=8,2
# Callback Worker
CALLBACK_WORKER_NAME=callback-worker
CALLBACK_HEALTH_PORT=8083
CALLBACK_AUTOSCALE=4,1
# Scheduler Worker
SCHEDULER_WORKER_NAME=scheduler-worker
SCHEDULER_HEALTH_PORT=8087
SCHEDULER_AUTOSCALE=2,1
# Notification Worker
NOTIFICATION_WORKER_NAME=notification-worker
NOTIFICATION_HEALTH_PORT=8085
NOTIFICATION_AUTOSCALE=4,1
# Log Consumer Worker
LOG_CONSUMER_WORKER_NAME=log-consumer-worker
LOG_CONSUMER_HEALTH_PORT=8086
LOG_CONSUMER_AUTOSCALE=2,1
# =============================================================================
# Logging Configuration
# =============================================================================
LOG_LEVEL=INFO
# structured or django
LOG_FORMAT=django
DEFAULT_LOG_LEVEL=INFO
WORKER_VERSION=1.0.0
WORKER_INSTANCE_ID=dev-01
# Log History Configuration
ENABLE_LOG_HISTORY=true
LOG_HISTORY_CONSUMER_INTERVAL=30
LOGS_BATCH_LIMIT=30
LOGS_EXPIRATION_TIME_IN_SECOND=86400
LOG_HISTORY_QUEUE_NAME=log_history_queue
# Log Queue Size Protection
# Maximum number of logs in Redis queue before dropping new logs
LOG_QUEUE_MAX_SIZE=10000
# =============================================================================
# Queue Configuration
# =============================================================================
# Notification Queue Name
NOTIFICATION_QUEUE_NAME=notifications
# =============================================================================
# Backend Services
# =============================================================================
# Platform Service
PLATFORM_SERVICE_HOST=http://unstract-platform-service
PLATFORM_SERVICE_PORT=3001
# Prompt Service
PROMPT_HOST=http://unstract-prompt-service
PROMPT_PORT=3003
# X2Text Service
X2TEXT_HOST=http://unstract-x2text-service
X2TEXT_PORT=3004
# Tool Runner
UNSTRACT_RUNNER_HOST=http://unstract-runner
UNSTRACT_RUNNER_PORT=5002
UNSTRACT_RUNNER_API_TIMEOUT=300
UNSTRACT_RUNNER_API_RETRY_COUNT=5
UNSTRACT_RUNNER_API_BACKOFF_FACTOR=3
# =============================================================================
# File Storage Configuration
# =============================================================================
# File Storage Credentials (MinIO)
WORKFLOW_EXECUTION_FILE_STORAGE_CREDENTIALS='{"provider": "minio", "credentials": {"endpoint_url": "http://unstract-minio:9000", "key": "minio", "secret": "minio123"}}'
API_FILE_STORAGE_CREDENTIALS='{"provider": "minio", "credentials": {"endpoint_url": "http://unstract-minio:9000", "key": "minio", "secret": "minio123"}}'
# File Execution Configuration
WORKFLOW_EXECUTION_DIR_PREFIX=unstract/execution
API_EXECUTION_DIR_PREFIX=unstract/api
MAX_PARALLEL_FILE_BATCHES=1
# File Execution TTL Configuration
FILE_EXECUTION_TRACKER_TTL_IN_SECOND=18000
FILE_EXECUTION_TRACKER_COMPLETED_TTL_IN_SECOND=300
# Destination Processing TTL Configuration
DESTINATION_PROCESSING_STAGE_TTL_IN_SECOND=600
# Actual Redis lock TTL
DESTINATION_PROCESSING_LOCK_TTL_IN_SECOND=10
EXECUTION_RESULT_TTL_SECONDS=86400
EXECUTION_CACHE_TTL_SECONDS=86400
INSTANT_WF_POLLING_TIMEOUT=300
# Active File Execution cache in seconds
ACTIVE_FILE_CACHE_TTL=300
# =============================================================================
# Development Settings
# =============================================================================
DEBUG=false
TESTING=false
ENABLE_METRICS=true
ENABLE_FILE_HISTORY=true
ENABLE_WEBHOOK_DELIVERY=true
# Tool Registry
TOOL_REGISTRY_CONFIG_PATH=../unstract/tool-registry/tool_registry_config
TOOL_REGISTRY_STORAGE_CREDENTIALS='{"provider":"local"}'
# =============================================================================
# Optional Advanced Settings
# =============================================================================
# Health Checks
HEALTH_CHECK_INTERVAL=30
HEALTH_CHECK_TIMEOUT=10
METRICS_PORT=8080
# Circuit Breaker
CIRCUIT_BREAKER_FAILURE_THRESHOLD=5
CIRCUIT_BREAKER_RECOVERY_TIMEOUT=60
# Notifications
NOTIFICATION_TIMEOUT=5
# Cache
CACHE_TTL_SEC=10800
# Connection Pooling
CONNECTION_POOL_SIZE=10
CONNECTION_POOL_MAX_OVERFLOW=20
# Task Routing and Backup
ENABLE_PRIORITY_ROUTING=false
HIGH_PRIORITY_QUEUE_SUFFIX=_high
LOW_PRIORITY_QUEUE_SUFFIX=_low
ENABLE_TASK_BACKUP=false
BACKUP_INTERVAL=3600
# Feature Flags
ENABLE_DESTINATION_CONNECTORS=true
ENABLE_CLEANUP_TASKS=true
# Security (for production)
SECURE_SSL_REDIRECT=false
SESSION_COOKIE_SECURE=false
CSRF_COOKIE_SECURE=false
# Monitoring
SENTRY_DSN=
SENTRY_ENVIRONMENT=development
# =============================================================================
# Local Development Overrides
# =============================================================================
# For local development (all services on host), change Docker service names to localhost:
# DJANGO_APP_BACKEND_URL=http://localhost:8000
# INTERNAL_API_BASE_URL=http://localhost:8000/internal
# CELERY_BROKER_BASE_URL=amqp://localhost:5672//
# DB_HOST=localhost
# REDIS_HOST=localhost
# CACHE_REDIS_HOST=localhost
# PLATFORM_SERVICE_HOST=http://localhost
# PROMPT_HOST=http://localhost
# X2TEXT_HOST=http://localhost
# UNSTRACT_RUNNER_HOST=http://localhost
# WORKFLOW_EXECUTION_FILE_STORAGE_CREDENTIALS={"provider": "minio", "credentials": {"endpoint_url": "http://localhost:9000", "key": "minio", "secret": "minio123"}}
# API_FILE_STORAGE_CREDENTIALS={"provider": "minio", "credentials": {"endpoint_url": "http://localhost:9000", "key": "minio", "secret": "minio123"}}
PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python