diff --git a/.dockerignore b/.dockerignore index f1a82ffa..f738d586 100644 --- a/.dockerignore +++ b/.dockerignore @@ -28,6 +28,12 @@ Makefile # Exclude other projects /tests /scripts +/data +/dickens +/reproduce +/output_complete +/rag_storage +/inputs # Python version manager file .python-version diff --git a/.github/workflows/docker-build-offline.yml b/.github/workflows/docker-build-offline.yml new file mode 100644 index 00000000..91bf723d --- /dev/null +++ b/.github/workflows/docker-build-offline.yml @@ -0,0 +1,77 @@ +name: Build Offline Docker Image + +on: + workflow_dispatch: + +permissions: + contents: read + packages: write + +jobs: + build-and-push-offline: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Get latest tag + id: get_tag + run: | + LATEST_TAG=$(git describe --tags --abbrev=0 2>/dev/null || echo "") + if [ -z "$LATEST_TAG" ]; then + LATEST_TAG="sha-$(git rev-parse --short HEAD)" + echo "No tags found, using commit SHA: $LATEST_TAG" + else + echo "Latest tag found: $LATEST_TAG" + fi + echo "tag=$LATEST_TAG" >> $GITHUB_OUTPUT + + - name: Prepare offline tag + id: offline_tag + run: | + OFFLINE_TAG="${{ steps.get_tag.outputs.tag }}-offline" + echo "Offline image tag: $OFFLINE_TAG" + echo "offline_tag=$OFFLINE_TAG" >> $GITHUB_OUTPUT + + - name: Update version in __init__.py + run: | + sed -i "s/__version__ = \".*\"/__version__ = \"${{ steps.get_tag.outputs.tag }}\"/" lightrag/__init__.py + cat lightrag/__init__.py | grep __version__ + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata for Docker + id: meta + uses: docker/metadata-action@v5 + with: + images: ghcr.io/${{ github.repository }} + tags: | + type=raw,value=${{ steps.offline_tag.outputs.offline_tag }} + + - name: Build and push offline Docker image + uses: docker/build-push-action@v5 + with: + context: . + file: ./Dockerfile.offline + platforms: linux/amd64,linux/arm64 + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max + + - name: Output image details + run: | + echo "Offline Docker image built and pushed successfully!" + echo "Image tag: ghcr.io/${{ github.repository }}:${{ steps.offline_tag.outputs.offline_tag }}" + echo "Base Git tag used: ${{ steps.get_tag.outputs.tag }}" diff --git a/Dockerfile.offline b/Dockerfile.offline new file mode 100644 index 00000000..b2bd5d58 --- /dev/null +++ b/Dockerfile.offline @@ -0,0 +1,87 @@ +# Frontend build stage +FROM oven/bun:1 AS frontend-builder + +WORKDIR /app + +# Copy frontend source code +COPY lightrag_webui/ ./lightrag_webui/ + +# Build frontend assets for inclusion in the API package +RUN cd lightrag_webui \ + && bun install --frozen-lockfile \ + && bun run build + +# Python build stage +FROM python:3.12-slim AS builder + +ENV DEBIAN_FRONTEND=noninteractive + +WORKDIR /app + +# Upgrade packaging tools and install system deps (Rust is required by some wheels) +RUN pip install --upgrade pip setuptools wheel \ + && apt-get update \ + && apt-get install -y --no-install-recommends \ + curl \ + build-essential \ + pkg-config \ + && rm -rf /var/lib/apt/lists/* \ + && curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + +ENV PATH="/root/.cargo/bin:/root/.local/bin:${PATH}" + +# Copy project metadata and sources +COPY pyproject.toml . +COPY setup.py . +COPY requirements-offline*.txt ./ +COPY lightrag/ ./lightrag/ + +# Include pre-built frontend assets from the previous stage +COPY --from=frontend-builder /app/lightrag/api/webui ./lightrag/api/webui + +# Install LightRAG with API extras and all offline dependencies +RUN pip install --user --no-cache-dir --use-pep517 .[api] +RUN pip install --user --no-cache-dir -r requirements-offline.txt + +# Prepare offline cache directory and pre-populate tiktoken data +RUN mkdir -p /app/data/tiktoken \ + && lightrag-download-cache --cache-dir /app/data/tiktoken || status=$?; \ + if [ -n "${status:-}" ] && [ "$status" -ne 0 ] && [ "$status" -ne 2 ]; then exit "$status"; fi + +# Final stage +FROM python:3.12-slim + +WORKDIR /app + +RUN pip install --upgrade pip setuptools wheel + +# Copy installed packages and application code +COPY --from=builder /root/.local /root/.local +COPY --from=builder /app/lightrag ./lightrag +COPY pyproject.toml . +COPY setup.py . +COPY requirements-offline*.txt ./ + +# Ensure the installed scripts are on PATH +ENV PATH=/root/.local/bin:$PATH + +# Install editable package for runtime (re-using cached wheels) and verify extras +# IMPORTANT: Must be done BEFORE creating data/ directory to avoid setuptools error +RUN pip install --no-cache-dir --use-pep517 ".[api]" +RUN pip install --no-cache-dir -r requirements-offline.txt + +# Create persistent data directories AFTER package installation +RUN mkdir -p /app/data/rag_storage /app/data/inputs /app/data/tiktoken + +# Copy offline cache into the newly created directory +COPY --from=builder /app/data/tiktoken /app/data/tiktoken + +# Point to the prepared cache +ENV TIKTOKEN_CACHE_DIR=/app/data/tiktoken +ENV WORKING_DIR=/app/data/rag_storage +ENV INPUT_DIR=/app/data/inputs + +# Expose API port +EXPOSE 9621 + +ENTRYPOINT ["python", "-m", "lightrag.api.lightrag_server"] diff --git a/docker-compose.yml b/docker-compose.yml index 2881b5c4..e315e31b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -12,9 +12,9 @@ services: volumes: - ./data/rag_storage:/app/data/rag_storage - ./data/inputs:/app/data/inputs - - ./data/tiktoken:/app/data/tiktoken - ./config.ini:/app/config.ini - ./.env:/app/.env + # - ./data/tiktoken:/app/data/tiktoken env_file: - .env environment: diff --git a/pyproject.toml b/pyproject.toml index f7008c9a..b31e58fa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -130,6 +130,7 @@ Repository = "https://github.com/HKUDS/LightRAG" [tool.setuptools.packages.find] include = ["lightrag*"] +exclude = ["data*", "tests*", "scripts*", "examples*", "dickens*", "reproduce*", "output_complete*", "rag_storage*", "inputs*"] [tool.setuptools] include-package-data = true