From e7979bc15c911623f3ea18bf50c8427dddf31dda Mon Sep 17 00:00:00 2001 From: Travis Vasceannie Date: Sat, 22 Nov 2025 21:36:43 +0000 Subject: [PATCH] x --- compose/swarm/docker-compose.litellm.yml | 195 ++++++++++++----------- compose/swarm/docker-compose.nhost.yml | 8 +- traefik/traefik.yaml | 18 +++ 3 files changed, 126 insertions(+), 95 deletions(-) diff --git a/compose/swarm/docker-compose.litellm.yml b/compose/swarm/docker-compose.litellm.yml index 07ce5d7..a9f6289 100755 --- a/compose/swarm/docker-compose.litellm.yml +++ b/compose/swarm/docker-compose.litellm.yml @@ -1,96 +1,109 @@ +version: '3.2' + services: - litellm: - image: litellm/litellm:latest - restart: unless-stopped - # volumes: - # - /home/litellm/config.yaml:/app/config.yaml - # command: - # - "--config=/app/config.yaml" - environment: - DATABASE_URL: ${DATABASE_URL} - LITELLM_MASTER_KEY: ${LITELLM_MASTER_KEY} - LITELLM_SALT_KEY: ${LITELLM_SALT_KEY} - UI_USERNAME: ${UI_USERNAME} - UI_PASSWORD: ${UI_PASSWORD} - STORE_MODEL_IN_DB: "True" - # Provider Keys - OPENAI_API_KEY: ${OPENAI_API_KEY} - OPENAI_BASE_URL: ${OPENAI_BASE_URL} - COHERE_API_KEY: ${COHERE_API_KEY} - OR_SITE_URL: ${OR_SITE_URL} - OR_APP_NAME: ${OR_APP_NAME} - OR_API_KEY: ${OR_API_KEY} - AZURE_API_BASE: ${AZURE_API_BASE} - AZURE_API_VERSION: ${AZURE_API_VERSION} - AZURE_API_KEY: ${AZURE_API_KEY} - REPLICATE_API_KEY: ${REPLICATE_API_KEY} - REPLICATE_API_TOKEN: ${REPLICATE_API_TOKEN} - ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY} - INFISICAL_TOKEN: ${INFISICAL_TOKEN} - NOVITA_API_KEY: ${NOVITA_API_KEY} - INFINITY_API_KEY: ${INFINITY_API_KEY} - LITELLM_LOG: ${LITELLM_LOG:-WARN} - LANGFUSE_PUBLIC_KEY: ${LANGFUSE_PUBLIC_KEY} - LANGFUSE_SECRET_KEY: ${LANGFUSE_SECRET_KEY} - LANGFUSE_OTEL_HOST: ${LANGFUSE_OTEL_HOST} - networks: - - litellm - - badge-net - - public - expose: - - 4000 - deploy: - replicas: 1 - update_config: - parallelism: 1 - delay: 10s - failure_action: rollback - placement: - constraints: - - node.role == worker + litellm: + image: litellm/litellm:latest + restart: unless-stopped + # volumes: + # - /home/litellm/config.yaml:/app/config.yaml + # command: + # - "--config=/app/config.yaml" + environment: + DATABASE_URL: ${DATABASE_URL} + LITELLM_MASTER_KEY: ${LITELLM_MASTER_KEY} + LITELLM_SALT_KEY: ${LITELLM_SALT_KEY} + UI_USERNAME: ${UI_USERNAME} + UI_PASSWORD: ${UI_PASSWORD} + STORE_MODEL_IN_DB: "True" + # Provider Keys + OPENAI_API_KEY: ${OPENAI_API_KEY} + OPENAI_BASE_URL: ${OPENAI_BASE_URL} + COHERE_API_KEY: ${COHERE_API_KEY} + OR_SITE_URL: ${OR_SITE_URL} + OR_APP_NAME: ${OR_APP_NAME} + OR_API_KEY: ${OR_API_KEY} + AZURE_API_BASE: ${AZURE_API_BASE} + AZURE_API_VERSION: ${AZURE_API_VERSION} + AZURE_API_KEY: ${AZURE_API_KEY} + REPLICATE_API_KEY: ${REPLICATE_API_KEY} + REPLICATE_API_TOKEN: ${REPLICATE_API_TOKEN} + ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY} + INFISICAL_TOKEN: ${INFISICAL_TOKEN} + NOVITA_API_KEY: ${NOVITA_API_KEY} + INFINITY_API_KEY: ${INFINITY_API_KEY} + LITELLM_LOG: ${LITELLM_LOG:-WARN} + LANGFUSE_PUBLIC_KEY: ${LANGFUSE_PUBLIC_KEY} + LANGFUSE_SECRET_KEY: ${LANGFUSE_SECRET_KEY} + LANGFUSE_OTEL_HOST: ${LANGFUSE_OTEL_HOST} + networks: + - litellm + - badge-net + - public + expose: + - 4000 + healthcheck: + # Simple TCP port check using Python - no auth or external tools required + test: + ["CMD", "python", "-c", "import socket; s = socket.socket(); s.settimeout(5); s.connect(('localhost', 4000)); s.close()"] + interval: 30s + timeout: 10s + retries: 3 + deploy: + replicas: 1 + update_config: + parallelism: 1 + delay: 10s + failure_action: rollback + placement: + constraints: + - node.hostname == crackbox + labels: + - "traefik.enable=true" + - "traefik.swarm.network=public" + - "traefik.http.routers.litellm.entrypoints=web" + - "traefik.http.routers.litellm.rule=Host(`llm.lab`) || Host(`llm.toy`)" + - "traefik.http.routers.litellm.service=litellm" + - "traefik.http.services.litellm.loadbalancer.server.port=4000" - litellm-db: - image: postgres:17-alpine - restart: unless-stopped - healthcheck: - test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER} -d ${POSTGRES_DB}"] - interval: 5s - timeout: 5s - retries: 5 - volumes: - - litellm-db-data:/var/lib/postgresql/data - networks: - - litellm - environment: - POSTGRES_DB: ${POSTGRES_DB} - POSTGRES_USER: ${POSTGRES_USER} - POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} - deploy: - replicas: 1 - update_config: - parallelism: 1 - delay: 10s - failure_action: rollback - placement: - constraints: - - node.hostname == crackbox + litellm-db: + image: postgres:17-alpine + restart: unless-stopped + healthcheck: + test: + [ + "CMD-SHELL", + "pg_isready -U ${POSTGRES_USER} -d ${POSTGRES_DB}", + ] + interval: 5s + timeout: 5s + retries: 5 + volumes: + - /home/litellm/db/data:/var/lib/postgresql/data + networks: + - litellm + environment: + POSTGRES_DB: ${POSTGRES_DB} + POSTGRES_USER: ${POSTGRES_USER} + POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} + deploy: + replicas: 1 + update_config: + parallelism: 1 + delay: 10s + failure_action: rollback + placement: + constraints: + - node.hostname == crackbox -volumes: - litellm-db-data: - name: litellm-db-data - driver: local - driver_opts: - type: tmpfs - device: tmpfs +# volumes: +# litellm-db-data: +# driver: local networks: - litellm: - name: litellm - internal: true - driver: overlay - badge-net: - name: badge-net - external: true - public: - name: public - external: true \ No newline at end of file + litellm: + driver: overlay + attachable: true + badge-net: + external: true + public: + external: true diff --git a/compose/swarm/docker-compose.nhost.yml b/compose/swarm/docker-compose.nhost.yml index b9e34d8..7610cd3 100644 --- a/compose/swarm/docker-compose.nhost.yml +++ b/compose/swarm/docker-compose.nhost.yml @@ -12,7 +12,7 @@ services: labels: traefik.enable: "true" traefik.http.routers.auth.entrypoints: web - traefik.http.routers.auth.rule: Host(``) + traefik.http.routers.auth.rule: Host(`auth.nhost.toy`) traefik.http.routers.auth.service: auth traefik.http.routers.auth.tls: "false" traefik.http.services.auth.loadbalancer.server.port: "4000" @@ -119,11 +119,11 @@ services: labels: traefik.enable: "true" traefik.http.routers.console.entrypoints: web - traefik.http.routers.console.rule: Host(``) + traefik.http.routers.console.rule: Host(`hasura.nhost.toy`) && PathPrefix(`/console`) traefik.http.routers.console.service: console traefik.http.routers.console.tls: "false" traefik.http.routers.migrate.entrypoints: web - traefik.http.routers.migrate.rule: Host(``) && PathPrefix(`/apis/`) + traefik.http.routers.migrate.rule: Host(`hasura.nhost.toy`) && PathPrefix(`/apis/`) traefik.http.routers.migrate.service: migrate traefik.http.routers.migrate.tls: "false" traefik.http.services.console.loadbalancer.server.port: "9695" @@ -496,4 +496,4 @@ volumes: pgdata: name: swarm_pgdata root_node_modules: - name: swarm_root_node_modules \ No newline at end of file + name: swarm_root_node_modules diff --git a/traefik/traefik.yaml b/traefik/traefik.yaml index e73243d..a2ebe84 100755 --- a/traefik/traefik.yaml +++ b/traefik/traefik.yaml @@ -10,6 +10,22 @@ api: dashboard: true debug: false +metrics: + prometheus: + addEntryPointsLabels: true + addServicesLabels: true + +tls: + stores: + default: + defaultCertificate: + certFile: /etc/traefik/tls/local.crt + keyFile: /etc/traefik/tls/local.key + +serversTransport: + # Allow self-signed certs from internal services; change to false if you add real TLS + insecureSkipVerify: true + entryPoints: web: address: ":80" @@ -23,3 +39,5 @@ providers: endpoint: "unix:///var/run/docker.sock" exposedByDefault: false network: public + refreshSeconds: 15 + watch: true