haoheliu · tuxx · Mar 10, 2025 · Mar 10, 2025 · Mar 10, 2025 · Mar 10, 2025
diff --git a/.github/workflows/docker-build-publish.yml b/.github/workflows/docker-build-publish.yml
@@ -0,0 +1,51 @@
+name: Build and Publish Docker Images
+
+on:
+  push:
+    tags:
+      - 'v*'
+
+jobs:
+  build-and-push:
+    name: Build and Push Docker Images
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v3
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v2
+
+      - name: Log in to GitHub Container Registry
+        uses: docker/login-action@v2
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Convert repository name to lowercase
+        id: repo-name
+        run: |
+          echo "REPO_LOWERCASE=$(echo ${{ github.repository }} | tr '[:upper:]' '[:lower:]')" >> $GITHUB_OUTPUT
+
+      - name: Extract version from tag
+        id: version
+        run: |
+          VERSION=${GITHUB_REF#refs/tags/v}
+          echo "VERSION=$VERSION" >> $GITHUB_OUTPUT
+
+      - name: Build and push docker container
+        uses: docker/build-push-action@v4
+        with:
+          context: .
+          push: true
+          file: Dockerfile
+          tags: |
+            ghcr.io/${{ steps.repo-name.outputs.REPO_LOWERCASE }}/audioldm:${{ steps.version.outputs.VERSION }}
+            ghcr.io/${{ steps.repo-name.outputs.REPO_LOWERCASE }}/audioldm:latest
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,106 @@
+FROM nvidia/cuda:11.6.2-cudnn8-runtime-ubuntu20.04 AS builder
+
+# Set environment variables
+ENV DEBIAN_FRONTEND=noninteractive
+ENV PYTHONUNBUFFERED=1
+
+# Install build dependencies
+RUN apt-get update && apt-get install -y \
+    python3.8 \
+    python3-pip \
+    python3.8-dev \
+    git \
+    ffmpeg \
+    libsndfile1 \
+    wget \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+# Set Python 3.8 as default
+RUN ln -sf /usr/bin/python3.8 /usr/bin/python && \
+    ln -sf /usr/bin/python3.8 /usr/bin/python3
+
+# Set working directory
+WORKDIR /app
+
+# Install PyTorch with CUDA support
+RUN pip3 install --no-cache-dir torch==1.13.1+cu116 torchvision==0.14.1+cu116 torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cu116
+
+# Install all AudioLDM dependencies
+RUN pip3 install --no-cache-dir \
+    tqdm \
+    pyyaml \
+    einops \
+    chardet \
+    numpy==1.23.5 \
+    soundfile \
+    librosa==0.9.2 \
+    scipy \
+    pandas \
+    torchlibrosa==0.0.9 \
+    transformers==4.29.0 \
+    progressbar \
+    ftfy \
+    diffusers \
+    gradio==3.22.1
+
+# Install AudioLDM
+RUN pip3 install --no-cache-dir git+https://github.com/haoheliu/AudioLDM.git
+
+# Clone only the necessary files from repository
+RUN git clone --depth 1 https://github.com/haoheliu/AudioLDM . && \
+    rm -rf .git
+
+# Clean up pip cache and unnecessary files
+RUN find /usr/local/lib/python3.8/dist-packages -name "*.pyc" -delete && \
+    find /usr/local/lib/python3.8/dist-packages -name "__pycache__" -delete && \
+    rm -rf /root/.cache/pip
+
+# Create a smaller final image
+FROM nvidia/cuda:11.6.2-cudnn8-runtime-ubuntu20.04
+
+# Set environment variables
+ENV DEBIAN_FRONTEND=noninteractive
+ENV PYTHONUNBUFFERED=1
+
+# Install runtime dependencies
+RUN apt-get update && apt-get install -y \
+    python3.8 \
+    python3-pip \
+    ffmpeg \
+    libsndfile1 \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+# Set Python 3.8 as default
+RUN ln -sf /usr/bin/python3.8 /usr/bin/python && \
+    ln -sf /usr/bin/python3.8 /usr/bin/python3
+
+# Set working directory
+WORKDIR /app
+
+# Copy installed Python packages and application files from builder stage
+COPY --from=builder /usr/local/lib/python3.8/dist-packages /usr/local/lib/python3.8/dist-packages
+COPY --from=builder /usr/local/bin/audioldm /usr/local/bin/audioldm
+COPY --from=builder /app /app
+
+# Make the entrypoint script
+COPY <<EOF /app/entrypoint.sh
+#!/bin/bash
+if [ "\$1" = "webapp" ]; then
+    # Run the Gradio web app
+    python app.py
+else
+    # Run audioldm with arguments
+    audioldm "\$@"
+fi
+EOF
+
+# Make the entrypoint script executable
+RUN chmod +x /app/entrypoint.sh
+
+# Set the entrypoint
+ENTRYPOINT ["/app/entrypoint.sh"]
+
+# Default command (can be overridden)
+CMD ["--help"]
diff --git a/README.md b/README.md
@@ -134,6 +134,44 @@ optional arguments:
 
 For the evaluation of audio generative model, please refer to [audioldm_eval](https://github.com/haoheliu/audioldm_eval).
 
+
+## Using Docker
+
+AudioLDM is also available as a Docker container for easier deployment and consistency across platforms.
+
+### Prerequisites
+
+- Docker installed on your system
+- For GPU support: NVIDIA GPU with [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html) installed
+
+### Quick Start
+
+Pull and run the Docker image:
+
+```shell
+docker run --gpus all -v $(pwd)/output:/app/output ghcr.io/haoheliu/audioldm:latest --text "A hammer is hitting a wooden surface" --save_path /app/output
+```
+
+### Running the Web App with Docker
+To run the Gradio web interface:
+```shell
+docker run --gpus all -p 7860:7860 ghcr.io/haoheliu/audioldm:latest webapp
+```
+
+### All AudioLDM Options
+
+You can use all AudioLDM options with Docker:
+
+```shell
+docker run --gpus all -v $(pwd)/output:/app/output ghcr.io/haoheliu/audioldm:latest \
+  --mode "generation" \
+  --text "Rain falling on a window" \
+  --model_name "audioldm-m-full" \
+  --guidance_scale 3.5 \
+  --duration 10 \
+  --save_path /app/output
+```
+
 # Hugging Face 🧨 Diffusers
 
 AudioLDM is available in the Hugging Face [🧨 Diffusers](https://github.com/huggingface/diffusers) library from v0.15.0 onwards. The official checkpoints can be found on the [Hugging Face Hub](https://huggingface.co/cvssp), alongside [documentation](https://huggingface.co/docs/diffusers/main/en/api/pipelines/audioldm) and [examples scripts](https://huggingface.co/docs/diffusers/main/en/api/pipelines/audioldm).