Add offline Docker build support with embedded models and cache
- Add offline Dockerfile with tiktoken cache - Create GitHub workflow for offline builds - Update dockerignore for cleaner builds - Exclude dev dirs from package setup - Remove tiktoken volume from compose
This commit is contained in:
@@ -28,6 +28,12 @@ Makefile
|
||||
# Exclude other projects
|
||||
/tests
|
||||
/scripts
|
||||
/data
|
||||
/dickens
|
||||
/reproduce
|
||||
/output_complete
|
||||
/rag_storage
|
||||
/inputs
|
||||
|
||||
# Python version manager file
|
||||
.python-version
|
||||
|
||||
77
.github/workflows/docker-build-offline.yml
vendored
Normal file
77
.github/workflows/docker-build-offline.yml
vendored
Normal file
@@ -0,0 +1,77 @@
|
||||
name: Build Offline Docker Image
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
|
||||
jobs:
|
||||
build-and-push-offline:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Get latest tag
|
||||
id: get_tag
|
||||
run: |
|
||||
LATEST_TAG=$(git describe --tags --abbrev=0 2>/dev/null || echo "")
|
||||
if [ -z "$LATEST_TAG" ]; then
|
||||
LATEST_TAG="sha-$(git rev-parse --short HEAD)"
|
||||
echo "No tags found, using commit SHA: $LATEST_TAG"
|
||||
else
|
||||
echo "Latest tag found: $LATEST_TAG"
|
||||
fi
|
||||
echo "tag=$LATEST_TAG" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Prepare offline tag
|
||||
id: offline_tag
|
||||
run: |
|
||||
OFFLINE_TAG="${{ steps.get_tag.outputs.tag }}-offline"
|
||||
echo "Offline image tag: $OFFLINE_TAG"
|
||||
echo "offline_tag=$OFFLINE_TAG" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Update version in __init__.py
|
||||
run: |
|
||||
sed -i "s/__version__ = \".*\"/__version__ = \"${{ steps.get_tag.outputs.tag }}\"/" lightrag/__init__.py
|
||||
cat lightrag/__init__.py | grep __version__
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to GitHub Container Registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Extract metadata for Docker
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ghcr.io/${{ github.repository }}
|
||||
tags: |
|
||||
type=raw,value=${{ steps.offline_tag.outputs.offline_tag }}
|
||||
|
||||
- name: Build and push offline Docker image
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: .
|
||||
file: ./Dockerfile.offline
|
||||
platforms: linux/amd64,linux/arm64
|
||||
push: true
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
|
||||
- name: Output image details
|
||||
run: |
|
||||
echo "Offline Docker image built and pushed successfully!"
|
||||
echo "Image tag: ghcr.io/${{ github.repository }}:${{ steps.offline_tag.outputs.offline_tag }}"
|
||||
echo "Base Git tag used: ${{ steps.get_tag.outputs.tag }}"
|
||||
87
Dockerfile.offline
Normal file
87
Dockerfile.offline
Normal file
@@ -0,0 +1,87 @@
|
||||
# Frontend build stage
|
||||
FROM oven/bun:1 AS frontend-builder
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Copy frontend source code
|
||||
COPY lightrag_webui/ ./lightrag_webui/
|
||||
|
||||
# Build frontend assets for inclusion in the API package
|
||||
RUN cd lightrag_webui \
|
||||
&& bun install --frozen-lockfile \
|
||||
&& bun run build
|
||||
|
||||
# Python build stage
|
||||
FROM python:3.12-slim AS builder
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Upgrade packaging tools and install system deps (Rust is required by some wheels)
|
||||
RUN pip install --upgrade pip setuptools wheel \
|
||||
&& apt-get update \
|
||||
&& apt-get install -y --no-install-recommends \
|
||||
curl \
|
||||
build-essential \
|
||||
pkg-config \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
||||
|
||||
ENV PATH="/root/.cargo/bin:/root/.local/bin:${PATH}"
|
||||
|
||||
# Copy project metadata and sources
|
||||
COPY pyproject.toml .
|
||||
COPY setup.py .
|
||||
COPY requirements-offline*.txt ./
|
||||
COPY lightrag/ ./lightrag/
|
||||
|
||||
# Include pre-built frontend assets from the previous stage
|
||||
COPY --from=frontend-builder /app/lightrag/api/webui ./lightrag/api/webui
|
||||
|
||||
# Install LightRAG with API extras and all offline dependencies
|
||||
RUN pip install --user --no-cache-dir --use-pep517 .[api]
|
||||
RUN pip install --user --no-cache-dir -r requirements-offline.txt
|
||||
|
||||
# Prepare offline cache directory and pre-populate tiktoken data
|
||||
RUN mkdir -p /app/data/tiktoken \
|
||||
&& lightrag-download-cache --cache-dir /app/data/tiktoken || status=$?; \
|
||||
if [ -n "${status:-}" ] && [ "$status" -ne 0 ] && [ "$status" -ne 2 ]; then exit "$status"; fi
|
||||
|
||||
# Final stage
|
||||
FROM python:3.12-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
RUN pip install --upgrade pip setuptools wheel
|
||||
|
||||
# Copy installed packages and application code
|
||||
COPY --from=builder /root/.local /root/.local
|
||||
COPY --from=builder /app/lightrag ./lightrag
|
||||
COPY pyproject.toml .
|
||||
COPY setup.py .
|
||||
COPY requirements-offline*.txt ./
|
||||
|
||||
# Ensure the installed scripts are on PATH
|
||||
ENV PATH=/root/.local/bin:$PATH
|
||||
|
||||
# Install editable package for runtime (re-using cached wheels) and verify extras
|
||||
# IMPORTANT: Must be done BEFORE creating data/ directory to avoid setuptools error
|
||||
RUN pip install --no-cache-dir --use-pep517 ".[api]"
|
||||
RUN pip install --no-cache-dir -r requirements-offline.txt
|
||||
|
||||
# Create persistent data directories AFTER package installation
|
||||
RUN mkdir -p /app/data/rag_storage /app/data/inputs /app/data/tiktoken
|
||||
|
||||
# Copy offline cache into the newly created directory
|
||||
COPY --from=builder /app/data/tiktoken /app/data/tiktoken
|
||||
|
||||
# Point to the prepared cache
|
||||
ENV TIKTOKEN_CACHE_DIR=/app/data/tiktoken
|
||||
ENV WORKING_DIR=/app/data/rag_storage
|
||||
ENV INPUT_DIR=/app/data/inputs
|
||||
|
||||
# Expose API port
|
||||
EXPOSE 9621
|
||||
|
||||
ENTRYPOINT ["python", "-m", "lightrag.api.lightrag_server"]
|
||||
@@ -12,9 +12,9 @@ services:
|
||||
volumes:
|
||||
- ./data/rag_storage:/app/data/rag_storage
|
||||
- ./data/inputs:/app/data/inputs
|
||||
- ./data/tiktoken:/app/data/tiktoken
|
||||
- ./config.ini:/app/config.ini
|
||||
- ./.env:/app/.env
|
||||
# - ./data/tiktoken:/app/data/tiktoken
|
||||
env_file:
|
||||
- .env
|
||||
environment:
|
||||
|
||||
@@ -130,6 +130,7 @@ Repository = "https://github.com/HKUDS/LightRAG"
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
include = ["lightrag*"]
|
||||
exclude = ["data*", "tests*", "scripts*", "examples*", "dickens*", "reproduce*", "output_complete*", "rag_storage*", "inputs*"]
|
||||
|
||||
[tool.setuptools]
|
||||
include-package-data = true
|
||||
|
||||
Reference in New Issue
Block a user