unstract/backend/sample.env

DJANGO_SETTINGS_MODULE='backend.settings.dev'

# NOTE: Change below to True if you are running in HTTPS mode.
SESSION_COOKIE_SECURE=False
CSRF_COOKIE_SECURE=False

# Default log level
DEFAULT_LOG_LEVEL="INFO"

# Common
PATH_PREFIX="api/v1"

# Django settings
DJANGO_APP_BACKEND_URL=http://frontend.unstract.localhost
DJANGO_SECRET_KEY="1(xf&nc6!y7!l&!5xe&i_rx7e^m@fcut9fduv86ft=-b@2g6"

# Postgres DB envs
DB_HOST='unstract-db'
DB_USER='unstract_dev'
DB_PASSWORD='unstract_pass'
DB_NAME='unstract_db'
DB_PORT=5432
DB_SCHEMA="unstract"

# Celery Backend Database (optional - defaults to DB_NAME if unset)
# Example:
# CELERY_BACKEND_DB_NAME=unstract_celery_db
# Redis
REDIS_HOST="unstract-redis"
REDIS_PORT=6379
REDIS_PASSWORD=""
REDIS_USER=default

# Connector OAuth
SOCIAL_AUTH_EXTRA_DATA_EXPIRATION_TIME_IN_SECOND=3600
GOOGLE_OAUTH2_KEY=
GOOGLE_OAUTH2_SECRET=

# User session
SESSION_EXPIRATION_TIME_IN_SECOND=7200

# FE Web Application Dependencies
WEB_APP_ORIGIN_URL="http://frontend.unstract.localhost"

# API keys for trusted services
INTERNAL_SERVICE_API_KEY=

# Unstract Core envs
BUILTIN_FUNCTIONS_API_KEY=

FREE_STORAGE_AWS_ACCESS_KEY_ID=
FREE_STORAGE_AWS_SECRET_ACCESS_KEY=
UNSTRACT_FREE_STORAGE_BUCKET_NAME=

GDRIVE_GOOGLE_SERVICE_ACCOUNT=
GDRIVE_GOOGLE_PROJECT_ID=
GOOGLE_STORAGE_ACCESS_KEY_ID=
GOOGLE_STORAGE_SECRET_ACCESS_KEY=
GOOGLE_STORAGE_BASE_URL=https://storage.googleapis.com

# Platform Service
PLATFORM_SERVICE_HOST=http://unstract-platform-service
PLATFORM_SERVICE_PORT=3001

# Tool Runner
UNSTRACT_RUNNER_HOST=http://unstract-runner
UNSTRACT_RUNNER_PORT=5002
UNSTRACT_RUNNER_API_TIMEOUT=240  # (in seconds) 2 mins
UNSTRACT_RUNNER_API_RETRY_COUNT=5  # Number of retries for failed requests
UNSTRACT_RUNNER_API_BACKOFF_FACTOR=3  # Exponential backoff factor for retries

# Prompt Service
PROMPT_HOST=http://unstract-prompt-service
PROMPT_PORT=3003

#Prompt Studio
PROMPT_STUDIO_FILE_PATH=/app/prompt-studio-data

# Structure Tool Image (Runs prompt studio exported tools)
# https://hub.docker.com/r/unstract/tool-structure
STRUCTURE_TOOL_IMAGE_URL="docker:unstract/tool-structure:0.0.88"
STRUCTURE_TOOL_IMAGE_NAME="unstract/tool-structure"
STRUCTURE_TOOL_IMAGE_TAG="0.0.88"

# Feature Flags
EVALUATION_SERVER_IP=unstract-flipt
EVALUATION_SERVER_PORT=9000
PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python


#X2Text Service
X2TEXT_HOST=http://unstract-x2text-service
X2TEXT_PORT=3004

# Encryption Key
# Key must be 32 url-safe base64-encoded bytes. Check the README.md for details
ENCRYPTION_KEY="Sample-Key"

# Cache TTL
CACHE_TTL_SEC=10800

# Default user auth credentials
DEFAULT_AUTH_USERNAME=
DEFAULT_AUTH_PASSWORD=

# System admin credentials
SYSTEM_ADMIN_USERNAME="admin"
SYSTEM_ADMIN_PASSWORD="admin"
SYSTEM_ADMIN_EMAIL="admin@abc.com"

# Set Django Session Expiry Time (in seconds)
SESSION_COOKIE_AGE=86400

# Control async extraction of LLMWhisperer
# Time in seconds to wait before polling LLMWhisperer's status API
ADAPTER_LLMW_POLL_INTERVAL=30
# Total number of times to poll the status API.
# 500 mins to allow 1500 (max pages limit) * 20 (approx time in sec to process a page)
ADAPTER_LLMW_MAX_POLLS=1000
# Number of times to retry the /whisper-status API before failing the extraction
ADAPTER_LLMW_STATUS_RETRIES=5

# Enable logging of workflow history.
ENABLE_LOG_HISTORY=True
# Interval in seconds for periodic consumer operations.
LOG_HISTORY_CONSUMER_INTERVAL=30
# Maximum number of logs to insert in a single batch.
LOGS_BATCH_LIMIT=30
# Logs Expiry of 24 hours
LOGS_EXPIRATION_TIME_IN_SECOND=86400

# Celery Configuration
# Used by celery and to connect to queue to push logs
CELERY_BROKER_BASE_URL="amqp://unstract-rabbitmq:5672//"
CELERY_BROKER_USER=admin
CELERY_BROKER_PASS=password

# Indexing flag to prevent re-index
INDEXING_FLAG_TTL=1800

# Notification Timeout in Seconds
NOTIFICATION_TIMEOUT=5

# Path where public and private tools are registered
# with a YAML and JSONs
TOOL_REGISTRY_CONFIG_PATH="/data/tool_registry_config"

# Flipt Service
FLIPT_SERVICE_AVAILABLE=False

# File System Configuration for Workflow and API Execution

# Directory Prefixes for storing execution files
WORKFLOW_EXECUTION_DIR_PREFIX="unstract/execution"
API_EXECUTION_DIR_PREFIX="unstract/api"

# Storage Provider for Workflow Execution
# Valid options: MINIO, S3, etc..
WORKFLOW_EXECUTION_FILE_STORAGE_CREDENTIALS='{"provider": "minio", "credentials": {"endpoint_url": "http://unstract-minio:9000", "key": "minio", "secret": "minio123"}}'

# Storage Provider for API Execution
API_FILE_STORAGE_CREDENTIALS='{"provider": "minio", "credentials": {"endpoint_url": "http://unstract-minio:9000", "key": "minio", "secret": "minio123"}}'

#Remote storage related envs
PERMANENT_REMOTE_STORAGE='{"provider": "minio", "credentials": {"endpoint_url": "http://unstract-minio:9000", "key": "minio", "secret": "minio123"}}'
REMOTE_PROMPT_STUDIO_FILE_PATH="unstract/prompt-studio-data"

# Storage Provider for Tool registry
TOOL_REGISTRY_STORAGE_CREDENTIALS='{"provider":"local"}'

# Highlight data to be available in api deployment
ENABLE_HIGHLIGHT_API_DEPLOYMENT=False

# Execution result and cache expire time
# For API results cached per workflow execution (24 hours)
EXECUTION_RESULT_TTL_SECONDS=86400
# For execution metadata cached per workflow execution (24 hours)
EXECUTION_CACHE_TTL_SECONDS=86400
# Instant workflow polling timeout in seconds (5 minutes)
INSTANT_WF_POLLING_TIMEOUT=300

# Maximum number of batches (i.e., parallel tasks) created for a single workflow execution (1 file at a time)
MAX_PARALLEL_FILE_BATCHES=1
# Maximum allowed value for MAX_PARALLEL_FILE_BATCHES (upper limit for validation)
MAX_PARALLEL_FILE_BATCHES_MAX_VALUE=100
# Maximum number of files allowed per workflow page execution
WORKFLOW_PAGE_MAX_FILES=2

# File execution tracker TTL in seconds (5 hours)
FILE_EXECUTION_TRACKER_TTL_IN_SECOND=18000
# File execution tracker completed TTL in seconds (10 minutes)
FILE_EXECUTION_TRACKER_COMPLETED_TTL_IN_SECOND=600

# Runner polling timeout (3 hours)
MAX_RUNNER_POLLING_WAIT_SECONDS=10800
# Runner polling interval (2 seconds)
RUNNER_POLLING_INTERVAL_SECONDS=2

# ETL Pipeline minimum schedule interval (in seconds)
# Default: 1800 seconds (30 minutes)
# Examples: 900 (15 min), 1800 (30 min), 3600 (60 min)
MIN_SCHEDULE_INTERVAL_SECONDS=1800