Add offline Docker build support with embedded models and cache

- Add offline Dockerfile with tiktoken cache
- Create GitHub workflow for offline builds
- Update dockerignore for cleaner builds
- Exclude dev dirs from package setup
- Remove tiktoken volume from compose
This commit is contained in:
yangdx
2025-10-15 15:40:30 +08:00
parent 83b10a52ad
commit 6d1ae40478
5 changed files with 172 additions and 1 deletions

View File

@@ -28,6 +28,12 @@ Makefile
# Exclude other projects
/tests
/scripts
/data
/dickens
/reproduce
/output_complete
/rag_storage
/inputs
# Python version manager file
.python-version

View File

@@ -0,0 +1,77 @@
name: Build Offline Docker Image
on:
workflow_dispatch:
permissions:
contents: read
packages: write
jobs:
build-and-push-offline:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Get latest tag
id: get_tag
run: |
LATEST_TAG=$(git describe --tags --abbrev=0 2>/dev/null || echo "")
if [ -z "$LATEST_TAG" ]; then
LATEST_TAG="sha-$(git rev-parse --short HEAD)"
echo "No tags found, using commit SHA: $LATEST_TAG"
else
echo "Latest tag found: $LATEST_TAG"
fi
echo "tag=$LATEST_TAG" >> $GITHUB_OUTPUT
- name: Prepare offline tag
id: offline_tag
run: |
OFFLINE_TAG="${{ steps.get_tag.outputs.tag }}-offline"
echo "Offline image tag: $OFFLINE_TAG"
echo "offline_tag=$OFFLINE_TAG" >> $GITHUB_OUTPUT
- name: Update version in __init__.py
run: |
sed -i "s/__version__ = \".*\"/__version__ = \"${{ steps.get_tag.outputs.tag }}\"/" lightrag/__init__.py
cat lightrag/__init__.py | grep __version__
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata for Docker
id: meta
uses: docker/metadata-action@v5
with:
images: ghcr.io/${{ github.repository }}
tags: |
type=raw,value=${{ steps.offline_tag.outputs.offline_tag }}
- name: Build and push offline Docker image
uses: docker/build-push-action@v5
with:
context: .
file: ./Dockerfile.offline
platforms: linux/amd64,linux/arm64
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
cache-to: type=gha,mode=max
- name: Output image details
run: |
echo "Offline Docker image built and pushed successfully!"
echo "Image tag: ghcr.io/${{ github.repository }}:${{ steps.offline_tag.outputs.offline_tag }}"
echo "Base Git tag used: ${{ steps.get_tag.outputs.tag }}"

87
Dockerfile.offline Normal file
View File

@@ -0,0 +1,87 @@
# Frontend build stage
FROM oven/bun:1 AS frontend-builder
WORKDIR /app
# Copy frontend source code
COPY lightrag_webui/ ./lightrag_webui/
# Build frontend assets for inclusion in the API package
RUN cd lightrag_webui \
&& bun install --frozen-lockfile \
&& bun run build
# Python build stage
FROM python:3.12-slim AS builder
ENV DEBIAN_FRONTEND=noninteractive
WORKDIR /app
# Upgrade packaging tools and install system deps (Rust is required by some wheels)
RUN pip install --upgrade pip setuptools wheel \
&& apt-get update \
&& apt-get install -y --no-install-recommends \
curl \
build-essential \
pkg-config \
&& rm -rf /var/lib/apt/lists/* \
&& curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
ENV PATH="/root/.cargo/bin:/root/.local/bin:${PATH}"
# Copy project metadata and sources
COPY pyproject.toml .
COPY setup.py .
COPY requirements-offline*.txt ./
COPY lightrag/ ./lightrag/
# Include pre-built frontend assets from the previous stage
COPY --from=frontend-builder /app/lightrag/api/webui ./lightrag/api/webui
# Install LightRAG with API extras and all offline dependencies
RUN pip install --user --no-cache-dir --use-pep517 .[api]
RUN pip install --user --no-cache-dir -r requirements-offline.txt
# Prepare offline cache directory and pre-populate tiktoken data
RUN mkdir -p /app/data/tiktoken \
&& lightrag-download-cache --cache-dir /app/data/tiktoken || status=$?; \
if [ -n "${status:-}" ] && [ "$status" -ne 0 ] && [ "$status" -ne 2 ]; then exit "$status"; fi
# Final stage
FROM python:3.12-slim
WORKDIR /app
RUN pip install --upgrade pip setuptools wheel
# Copy installed packages and application code
COPY --from=builder /root/.local /root/.local
COPY --from=builder /app/lightrag ./lightrag
COPY pyproject.toml .
COPY setup.py .
COPY requirements-offline*.txt ./
# Ensure the installed scripts are on PATH
ENV PATH=/root/.local/bin:$PATH
# Install editable package for runtime (re-using cached wheels) and verify extras
# IMPORTANT: Must be done BEFORE creating data/ directory to avoid setuptools error
RUN pip install --no-cache-dir --use-pep517 ".[api]"
RUN pip install --no-cache-dir -r requirements-offline.txt
# Create persistent data directories AFTER package installation
RUN mkdir -p /app/data/rag_storage /app/data/inputs /app/data/tiktoken
# Copy offline cache into the newly created directory
COPY --from=builder /app/data/tiktoken /app/data/tiktoken
# Point to the prepared cache
ENV TIKTOKEN_CACHE_DIR=/app/data/tiktoken
ENV WORKING_DIR=/app/data/rag_storage
ENV INPUT_DIR=/app/data/inputs
# Expose API port
EXPOSE 9621
ENTRYPOINT ["python", "-m", "lightrag.api.lightrag_server"]

View File

@@ -12,9 +12,9 @@ services:
volumes:
- ./data/rag_storage:/app/data/rag_storage
- ./data/inputs:/app/data/inputs
- ./data/tiktoken:/app/data/tiktoken
- ./config.ini:/app/config.ini
- ./.env:/app/.env
# - ./data/tiktoken:/app/data/tiktoken
env_file:
- .env
environment:

View File

@@ -130,6 +130,7 @@ Repository = "https://github.com/HKUDS/LightRAG"
[tool.setuptools.packages.find]
include = ["lightrag*"]
exclude = ["data*", "tests*", "scripts*", "examples*", "dickens*", "reproduce*", "output_complete*", "rag_storage*", "inputs*"]
[tool.setuptools]
include-package-data = true