* UN-2470 [MISC] Remove Django dependency from Celery workers This commit introduces a new worker architecture that decouples Celery workers from Django where possible, enabling support for gevent/eventlet pool types and reducing worker startup overhead. Key changes: - Created separate worker modules (api-deployment, callback, file_processing, general) - Added internal API endpoints for worker communication - Implemented Django-free task execution where appropriate - Added shared utilities and client facades - Updated container configurations for new worker architecture 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com> * Fix pre-commit issues: file permissions and ruff errors Setup the docker for new workers - Add executable permissions to worker entrypoint files - Fix import order in namespace package __init__.py - Remove unused variable api_status in general worker - Address ruff E402 and F841 errors 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com> * refactoreed, Dockerfiles,fixes * flexibility on celery run commands * added debug logs * handled filehistory for API * cleanup * cleanup * cloud plugin structure * minor changes in import plugin * added notification and logger workers under new worker module * add docker compatibility for new workers * handled docker issues * log consumer worker fixes * added scheduler worker * minor env changes * cleanup the logs * minor changes in logs * resolved scheduler worker issues * cleanup and refactor * ensuring backward compatibbility to existing wokers * added configuration internal apis and cache utils * optimization * Fix API client singleton pattern to share HTTP sessions - Fix flawed singleton implementation that was trying to share BaseAPIClient instances - Now properly shares HTTP sessions between specialized clients - Eliminates 6x BaseAPIClient initialization by reusing the same underlying session - Should reduce API deployment orchestration time by ~135ms (from 6 clients to 1 session) - Added debug logging to verify singleton pattern activation * cleanup and structuring * cleanup in callback * file system connectors issue * celery env values changes * optional gossip * variables for sync, mingle and gossip * Fix for file type check * Task pipeline issue resolving * api deployement failed response handled * Task pipline fixes * updated file history cleanup with active file execution * pipline status update and workflow ui page execution * cleanup and resolvinf conflicts * remove unstract-core from conenctoprs * Commit uv.lock changes * uv locks updates * resolve migration issues * defer connector-metadtda * Fix connector migration for production scale - Add encryption key handling with defer() to prevent decryption failures - Add final cleanup step to fix duplicate connector names - Optimize for large datasets with batch processing and bulk operations - Ensure unique constraint in migration 0004 can be created successfully 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com> * hitl fixes * minor fixes on hitl * api_hub related changes * dockerfile fixes * api client cache fixes with actual response class * fix: tags and llm_profile_id * optimized clear cache * cleanup * enhanced logs * added more handling on is file dir and added loggers * cleanup the runplatform script * internal apis are excempting from csrf * sonal cloud issues * sona-cloud issues * resolving sonar cloud issues * resolving sonar cloud issues * Delta: added Batch size fix in workers * comments addressed * celery configurational changes for new workers * fiixes in callback regaurding the pipline type check * change internal url registry logic * gitignore changes * gitignore changes * addressng pr cmmnets and cleanup the codes * adding missed profiles for v2 * sonal cloud blocker issues resolved * imlement otel * Commit uv.lock changes * handle execution time and some cleanup * adding user_data in metadata Pr: https://github.com/Zipstack/unstract/pull/1544 * scheduler backward compatibitlity * replace user_data with custom_data * Commit uv.lock changes * celery worker command issue resolved * enhance package imports in connectors by changing to lazy imports * Update runner.py by removing the otel from it Update runner.py by removing the otel from it Signed-off-by: ali <117142933+muhammad-ali-e@users.noreply.github.com> * added delta changes * handle erro to destination db * resolve tool instances id validation and hitl queu name in API * handled direct execution from workflow page to worker and logs * handle cost logs * Update health.py Signed-off-by: Ritwik G <100672805+ritwik-g@users.noreply.github.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * minor log changes * introducing log consumer scheduler to bulk create, and socket .emit from worker for ws * Commit uv.lock changes * time limit or timeout celery config cleanup * implemented redis client class in worker * pipline status enum mismatch * notification worker fixes * resolve uv lock conflicts * workflow log fixes * ws channel name issue resolved. and handling redis down in status tracker, and removing redis keys * default TTL changed for unified logs * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: ali <117142933+muhammad-ali-e@users.noreply.github.com> Signed-off-by: Ritwik G <100672805+ritwik-g@users.noreply.github.com> Co-authored-by: Claude <noreply@anthropic.com> Co-authored-by: Ritwik G <100672805+ritwik-g@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
131 lines
3.9 KiB
Python
131 lines
3.9 KiB
Python
import http
|
|
import logging
|
|
import os
|
|
from typing import Any
|
|
|
|
import redis
|
|
import socketio
|
|
from django.conf import settings
|
|
from django.core.wsgi import WSGIHandler
|
|
|
|
from unstract.core.data_models import LogDataDTO
|
|
from unstract.core.log_utils import get_validated_log_data, store_execution_log
|
|
from utils.constants import ExecutionLogConstants
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
sio = socketio.Server(
|
|
# Allowed values: {threading, eventlet, gevent, gevent_uwsgi}
|
|
async_mode="threading",
|
|
cors_allowed_origins=settings.CORS_ALLOWED_ORIGINS,
|
|
logger=False,
|
|
engineio_logger=False,
|
|
always_connect=True,
|
|
client_manager=socketio.KombuManager(url=settings.SOCKET_IO_MANAGER_URL),
|
|
)
|
|
|
|
redis_conn = redis.Redis(
|
|
host=settings.REDIS_HOST,
|
|
port=int(settings.REDIS_PORT),
|
|
username=settings.REDIS_USER,
|
|
password=settings.REDIS_PASSWORD,
|
|
)
|
|
|
|
|
|
@sio.event
|
|
def connect(sid: str, environ: Any, auth: Any) -> None:
|
|
"""This function is called when a client connects to the server.
|
|
|
|
It handles the connection and authentication of the client.
|
|
"""
|
|
logger.info(f"[{os.getpid()}] Client with SID:{sid} connected")
|
|
session_id = _get_user_session_id_from_cookies(sid, environ)
|
|
if session_id:
|
|
sio.enter_room(sid, session_id)
|
|
logger.info(f"Entered room {session_id} for socket {sid}")
|
|
else:
|
|
sio.disconnect(sid)
|
|
|
|
|
|
@sio.event
|
|
def disconnect(sid: str) -> None:
|
|
logger.info(f"[{os.getpid()}] Client with SID:{sid} disconnected")
|
|
|
|
|
|
def _get_user_session_id_from_cookies(sid: str, environ: Any) -> str | None:
|
|
"""Get the user session ID from cookies.
|
|
|
|
Args:
|
|
sid (str): The socket ID of the client.
|
|
environ (Any): The environment variables of the client.
|
|
|
|
Returns:
|
|
Optional[str]: The user session ID.
|
|
"""
|
|
cookie_str = environ.get("HTTP_COOKIE")
|
|
if not cookie_str:
|
|
logger.warning(f"No cookies found in {environ} for the sid {sid}")
|
|
return None
|
|
|
|
cookie = http.cookies.SimpleCookie(cookie_str)
|
|
session_id = cookie.get(settings.SESSION_COOKIE_NAME)
|
|
|
|
if not session_id:
|
|
logger.warning(f"No session ID found in cookies for SID {sid}")
|
|
return None
|
|
|
|
return session_id.value
|
|
|
|
|
|
# Functions moved to unstract.core.log_utils for sharing with workers
|
|
# Keep these as wrapper functions for backward compatibility
|
|
|
|
|
|
def _get_validated_log_data(json_data: Any) -> LogDataDTO | None:
|
|
"""Validate log data to persist history (backward compatibility wrapper)."""
|
|
return get_validated_log_data(json_data)
|
|
|
|
|
|
def _store_execution_log(data: dict[str, Any]) -> None:
|
|
"""Store execution log in database (backward compatibility wrapper)."""
|
|
store_execution_log(
|
|
data=data,
|
|
redis_client=redis_conn,
|
|
log_queue_name=ExecutionLogConstants.LOG_QUEUE_NAME,
|
|
is_enabled=ExecutionLogConstants.IS_ENABLED,
|
|
)
|
|
|
|
|
|
def _emit_websocket_event(room: str, event: str, data: dict[str, Any]) -> None:
|
|
"""Emit websocket event
|
|
Args:
|
|
room (str): Room to emit event to
|
|
event (str): Event name
|
|
channel (str): Channel name
|
|
data (bytes): Data to emit
|
|
"""
|
|
payload = {"data": data}
|
|
try:
|
|
logger.debug(f"[{os.getpid()}] Push websocket event: {event}, {payload}")
|
|
sio.emit(event, data=payload, room=room)
|
|
except Exception as e:
|
|
logger.error(f"Error emitting WebSocket event: {e}")
|
|
|
|
|
|
def handle_user_logs(room: str, event: str, message: dict[str, Any]) -> None:
|
|
"""Handle user logs from applications
|
|
Args:
|
|
message (dict[str, Any]): log message
|
|
"""
|
|
if not room or not event:
|
|
logger.warning(f"Message received without room and event: {message}")
|
|
return
|
|
|
|
_store_execution_log(message)
|
|
_emit_websocket_event(room, event, message)
|
|
|
|
|
|
def start_server(django_app: WSGIHandler, namespace: str) -> WSGIHandler:
|
|
django_app = socketio.WSGIApp(sio, django_app, socketio_path=namespace)
|
|
return django_app
|