blib-la · kklemon · Feb 11, 2024 · Feb 11, 2024 · Feb 11, 2024 · Feb 11, 2024
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -8,13 +8,13 @@ on:
 
 jobs:
   release:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-latest-m
     if: ${{ !contains(github.event.head_commit.message, 'chore(release)') }}
     permissions:
       contents: write # to be able to publish a GitHub release
       issues: write # to be able to comment on released issues
       pull-requests: write # to be able to comment on released pull requests
-    
+
     steps:
       - name: Checkout
         uses: actions/checkout@v3
@@ -55,7 +55,7 @@ jobs:
       - name: Semantic release
         uses: codfish/semantic-release-action@v3
         id: semanticrelease
-        with: 
+        with:
           additional-packages: |
             ['@semantic-release/git', '@semantic-release/changelog']
         env:
@@ -69,7 +69,7 @@ jobs:
           tags: |
             ${{ secrets.DOCKERHUB_REPO }}/${{ secrets.DOCKERHUB_IMG }}:${{ steps.semanticrelease.outputs.release-version }}
             ${{ secrets.DOCKERHUB_REPO }}/${{ secrets.DOCKERHUB_IMG }}:latest
-  
+
       - name: Update description on Docker Hub
         if: steps.semanticrelease.outputs.new-release-published == 'true'
         uses: peter-evans/dockerhub-description@v3

diff --git a/.gitignore b/.gitignore
@@ -4,4 +4,5 @@ venv
 data
 models
 simulated_uploaded
-__pycache__
+__pycache__
+snapshot.json
diff --git a/.releaserc b/.releaserc
@@ -0,0 +1,24 @@
+{
+  "branches": [
+    "main"
+  ],
+  "tagFormat": "${version}",
+  "plugins": [
+    "@semantic-release/commit-analyzer",
+    "@semantic-release/release-notes-generator",
+    [
+        "@semantic-release/changelog",
+        {
+          "changelogFile": "CHANGELOG.md"
+        }
+      ],
+    [
+      "@semantic-release/git",
+      {
+        "assets": ["README.md", "CHANGELOG.md"],
+        "message": "chore(release): ${nextRelease.version} \n\n${nextRelease.notes}"
+      }
+    ],
+    "@semantic-release/github"
+  ]
+}
diff --git a/Dockerfile b/Dockerfile
@@ -7,6 +7,8 @@ ENV DEBIAN_FRONTEND=noninteractive
 ENV PIP_PREFER_BINARY=1
 # Ensures output from python is printed immediately to the terminal without buffering
 ENV PYTHONUNBUFFERED=1 
+# Speed up some cmake builds
+ENV CMAKE_BUILD_PARALLEL_LEVEL=8
 
 # Install Python, git and other necessary tools
 RUN apt-get update && apt-get install -y \
@@ -32,22 +34,23 @@ RUN pip3 install --no-cache-dir torch torchvision torchaudio --index-url https:/
 # Install runpod
 RUN pip3 install runpod requests
 
-# Download checkpoints/vae/LoRA to include in image
-RUN wget -O models/checkpoints/sd_xl_base_1.0.safetensors https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_base_1.0.safetensors
-RUN wget -O models/vae/sdxl_vae.safetensors https://huggingface.co/stabilityai/sdxl-vae/resolve/main/sdxl_vae.safetensors
-RUN wget -O models/vae/sdxl-vae-fp16-fix.safetensors https://huggingface.co/madebyollin/sdxl-vae-fp16-fix/resolve/main/sdxl_vae.safetensors
-RUN wget -O models/loras/xl_more_art-full_v1.safetensors https://civitai.com/api/download/models/152309
+ARG SKIP_DEFAULT_MODELS
+# Download checkpoints/vae/LoRA to include in image.
+RUN if [ -z "$SKIP_DEFAULT_MODELS" ]; then wget -O models/checkpoints/sd_xl_base_1.0.safetensors https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_base_1.0.safetensors; fi
+RUN if [ -z "$SKIP_DEFAULT_MODELS" ]; then wget -O models/vae/sdxl_vae.safetensors https://huggingface.co/stabilityai/sdxl-vae/resolve/main/sdxl_vae.safetensors; fi
+RUN if [ -z "$SKIP_DEFAULT_MODELS" ]; then wget -O models/vae/sdxl-vae-fp16-fix.safetensors https://huggingface.co/madebyollin/sdxl-vae-fp16-fix/resolve/main/sdxl_vae.safetensors; fi
+RUN if [ -z "$SKIP_DEFAULT_MODELS" ]; then wget -O models/loras/xl_more_art-full_v1.safetensors https://civitai.com/api/download/models/152309; fi
 
-# Example for adding specific models into image
-# ADD models/checkpoints/sd_xl_base_1.0.safetensors models/checkpoints/
-# ADD models/vae/sdxl_vae.safetensors models/vae/
-
-# Go back to the root
 WORKDIR /
 
-# Add the start and the handler
-ADD src/start.sh src/rp_handler.py test_input.json ./
-RUN chmod +x /start.sh
+# Add scripts
+ADD src/start.sh src/restore_snapshot.sh src/rp_handler.py test_input.json ./
+RUN chmod +x /start.sh /restore_snapshot.sh
+
+# Optionally copy snapshot file
+ADD snapshot.jso[n] /
+
+RUN /restore_snapshot.sh
 
 # Start the container
 CMD /start.sh
diff --git a/README.md b/README.md
@@ -10,31 +10,34 @@ Read our article here: https://blib.la/blog/comfyui-on-runpod
 
 [![Discord](https://img.shields.io/discord/1091306623819059300?color=7289da&label=Discord&logo=discord&logoColor=fff&style=for-the-badge)](https://discord.com/invite/m3TBB9XEkb)
 
-
 ---
 
 <!-- toc -->
 
 - [Quickstart](#quickstart)
 - [Features](#features)
 - [Config](#config)
-  * [Upload image to AWS S3](#upload-image-to-aws-s3)
+  - [Upload image to AWS S3](#upload-image-to-aws-s3)
 - [Use the Docker image on RunPod](#use-the-docker-image-on-runpod)
+  - [Bring your own models](#bring-your-own-models)
+    - [Network Volume](#network-volume)
+    - [Custom Docker Image](#custom-docker-image)
 - [API specification](#api-specification)
-  * [JSON Request Body](#json-request-body)
-  * [Fields](#fields)
-    + ["input.images"](#inputimages)
+  - [JSON Request Body](#json-request-body)
+  - [Fields](#fields)
+    - ["input.images"](#inputimages)
 - [Interact with your RunPod API](#interact-with-your-runpod-api)
-  * [Health status](#health-status)
-  * [Generate an image](#generate-an-image)
-    + [Example request with cURL](#example-request-with-curl)
+  - [Health status](#health-status)
+  - [Generate an image](#generate-an-image)
+    - [Example request with cURL](#example-request-with-curl)
 - [How to get the workflow from ComfyUI?](#how-to-get-the-workflow-from-comfyui)
 - [Build the image](#build-the-image)
+  - [Restoring ComfyUI snapshots](#restoring-comfyui-snapshots)
 - [Local testing](#local-testing)
-  * [Setup](#setup)
-    + [Setup for Windows](#setup-for-windows)
-  * [Test: handler](#test-handler)
-  * [Test: docker image](#test-docker-image)
+  - [Setup](#setup)
+    - [Setup for Windows](#setup-for-windows)
+  - [Test: handler](#test-handler)
+  - [Test: docker image](#test-docker-image)
 - [Automatically deploy to Docker hub with Github Actions](#automatically-deploy-to-docker-hub-with-github-actions)
 - [Acknowledgments](#acknowledgments)
 
@@ -62,13 +65,16 @@ Read our article here: https://blib.la/blog/comfyui-on-runpod
   - [sdxl-vae-fp16-fix](https://huggingface.co/madebyollin/sdxl-vae-fp16-fix/)
 - Build-in LoRA:
   - [xl_more_art-full_v1.safetensors](https://civitai.com/models/124347?modelVersionId=152309) (Enhancer)
+- [Bring your own models](#bring-your-own-models)
 - Based on [Ubuntu + NVIDIA CUDA](https://hub.docker.com/r/nvidia/cuda)
 
 ## Config
 
-| Environment Variable | Description                                                                                                                                                                        | Default |
-| -------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------- |
-| `REFRESH_WORKER`     | When you want stop the worker after each finished job to have a clean state, see [official documentation](https://docs.runpod.io/docs/handler-additional-controls#refresh-worker). | `false` |
+| Environment Variable        | Description                                                                                                                                                                           | Default |
+| --------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------- |
+| `REFRESH_WORKER`            | When you want to stop the worker after each finished job to have a clean state, see [official documentation](https://docs.runpod.io/docs/handler-additional-controls#refresh-worker). | `false` |
+| `COMFY_POLLING_INTERVAL_MS` | Time to wait between poll attempts in milliseconds.                                                                                                                                   | `250`   |
+| `COMFY_POLLING_MAX_RETRIES` | Maximum number of poll attempts. You have to increase when you have long running workflows.                                                                                           | `500`   |
 
 ### Upload image to AWS S3
 
@@ -104,15 +110,49 @@ This is only needed if you want to upload the generated picture to AWS S3. If yo
   - Max Workers: `3` (whatever makes sense for you)
   - Idle Timeout: `5` (you can leave the default)
   - Flash Boot: `enabled` (doesn't cost more, but provides faster boot of our worker, which is good)
-  - Advanced: Leave the defaults
+  - Advanced: If you are using a Network Volume, select it under `Select Network Volume`. Otherwise leave the defaults.
   - Select a GPU that has some availability
   - GPUs/Worker: `1`
 - Click `deploy`
 - Your endpoint will be created, you can click on it to see the dashboard
 
+### Bring your own models
+
+#### Network Volume
+
+This is possible because of [RunPod Network Volumes](https://docs.runpod.io/docs/create-a-network-volume), which also works for [serverless](https://docs.runpod.io/serverless/references/endpoint-configurations#select-network-volume).
+
+- [Create a Network Volume](https://docs.runpod.io/docs/create-a-network-volume)
+- Create a temporary GPU instance to populate the volume.
+  Navigate to `Manage > Storage`, click `Deploy` under the volume, deploy any GPU instance
+- Navigate to `Manage > Pods`. Under the new GPU instance, click `Connect`. This
+  will give you either a Jupyter notebook where you can select `Shell` or an address you can ssh to.
+- Within a shell on the GPU instance, populate the Network Volume. By default, the volume
+  is mounted at `/workspace`. In this example, we create the ComfyUI model
+  structure and download a single checkpoint.
+  ```
+  cd /workspace
+  for i in checkpoints clip clip_vision configs controlnet embeddings loras upscale_models vae; do mkdir -p models/$i; done
+  wget -O models/checkpoints/sd_xl_turbo_1.0_fp16.safetensors https://huggingface.co/stabilityai/sdxl-turbo/blob/main/sd_xl_turbo_1.0_fp16.safetensors
+  ```
+- [Delete the temporary GPU instance](https://docs.runpod.io/docs/pods#terminating-a-pod)
+- Configure your Endpoint to use the Network Volume. Either [create a new endpoint](#use-the-docker-image-on-runpod) or update
+  `Advanced > Select Network Volume (optional)` on an existing endpoint
+
+#### Custom Docker Image
+
+- Fork the repository
+- Add your models directly into the [Dockerfile](./Dockerfile) like this:
+
+```Dockerfile
+RUN wget -O models/checkpoints/sd_xl_base_1.0.safetensors https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_base_1.0.safetensors
+```
+
+- [Build your Docker Image](#build-the-image)
+
 ## API specification
 
-The following describes which fields exist when doing requests to the API. We only describe the fields that are sent via `input` as those are needed by the worker itself. For a full list of fields, please take a look at the [official documentation](https://docs.runpod.io/docs/serverless-usage). 
+The following describes which fields exist when doing requests to the API. We only describe the fields that are sent via `input` as those are needed by the worker itself. For a full list of fields, please take a look at the [official documentation](https://docs.runpod.io/docs/serverless-usage).
 
 ### JSON Request Body
 
@@ -138,10 +178,9 @@ The following describes which fields exist when doing requests to the API. We on
 | `input.workflow` | Object | Yes      | Contains the ComfyUI workflow configuration.                                                                                              |
 | `input.images`   | Array  | No       | An array of images. Each image will be added into the "input"-folder of ComfyUI and can then be used in the workflow by using it's `name` |
 
-
 #### "input.images"
 
-An array of images, where each image should have a different name. 
+An array of images, where each image should have a different name.
 
 🚨 The request body for a RunPod endpoint is 10 MB for `/run` and 20 MB for `/runsync`, so make sure that your input images are not super huge as this will be blocked by RunPod otherwise, see the [official documentation](https://docs.runpod.io/docs/serverless-endpoint-urls)
 
@@ -150,8 +189,6 @@ An array of images, where each image should have a different name.
 | `name`     | String | Yes      | The name of the image. Please use the same name in your workflow to reference the image. |
 | `image`    | String | Yes      | A base64 encoded string of the image.                                                    |
 
-
-
 ## Interact with your RunPod API
 
 - In the [User Settings](https://www.runpod.io/console/serverless/user/settings) click on `API Keys` and then on the `API Key` button
@@ -160,8 +197,6 @@ An array of images, where each image should have a different name.
   - Replace `<api_key>` with your key
   - Replace `<endpoint_id>` with the ID of the endpoint, you find that when you click on your endpoint, it's part of the URLs shown at the bottom of the first box
 
-
-
 ### Health status
 
 ```bash
@@ -172,7 +207,7 @@ curl -H "Authorization: Bearer <api_key>" https://api.runpod.ai/v2/<endpoint_id>
 
 You can either create a new job async by using `/run` or a sync by using runsync. The example here is using a sync job and waits until the response is delivered.
 
-The API expects a [JSON in this form](#json-request-body), where `workflow` is the [workflow from ComfyUI, exported as JSON](#how-to-get-the-workflow-from-comfyui) and `images` is optional. 
+The API expects a [JSON in this form](#json-request-body), where `workflow` is the [workflow from ComfyUI, exported as JSON](#how-to-get-the-workflow-from-comfyui) and `images` is optional.
 
 Please also take a look at the [test_input.json](./test_input.json) to see how the API input should look like.
 
@@ -203,8 +238,20 @@ You can now take the content of this file and put it into your `workflow` when i
 
 You can build the image locally: `docker build -t timpietruskyblibla/runpod-worker-comfy:dev --platform linux/amd64 .`
 
+If you plan to bring your own ComfyUI models, you can add the `SKIP_DEFAULT_MODELS` build arg to reduce image size:
+`docker build --build-arg SKIP_DEFAULT_MODELS=1 -t timpietruskyblibla/runpod-worker-comfy:dev --platform linux/amd64 .`.
+This will skip downloading the default models for this image.
+
 🚨 It's important to specify the `--platform linux/amd64`, otherwise you will get an error on RunPod, see [#13](https://github.com/blib-la/runpod-worker-comfy/issues/13)
 
+### Restoring ComfyUI snapshots
+
+The popular [ComfyUI Manager](https://github.com/ltdrdata/ComfyUI-Manager) extension allows exporting a snapshot of ComfyUI with all installed extensions as JSON file. See [here](https://github.com/ltdrdata/ComfyUI-Manager?tab=readme-ov-file#snapshot-manager) on how to use this feature.
+
+To restore a snapshot within the Docker build process and make all listed extensions available in the RunPod worker, simply put the snapshot file named as `snapshot.json` in the root directory and trigger an image build. 
+
+🚨 Some custom nodes and extensions may download models as part of the installation process. This can considerably blow up the image size. Additionally, having many custom nodes and extensions may increase the initialization time of ComfyUI. You should therefore be careful with what exensions to add to your worker.
+
 ## Local testing
 
 Both tests will use the data from [test_input.json](./test_input.json), so make your changes in there to test this properly.
@@ -244,9 +291,22 @@ To run the Docker image on Windows, we need to have WSL2 and a Linux distro (lik
 You can also start the handler itself to have the local server running: `python src/rp_handler.py`
 To get this to work you will also need to start "ComfyUI", otherwise the handler will not work.
 
-### Test: docker image
+### Local API
+
+For enhanced local development, you can start an API server that simulates the RunPod worker environment. This feature is particularly useful for debugging and testing your integrations locally.
 
-- If you want to run the Docker container, you can use: `docker-compose up`
+#### Starting local endpoint
+
+Set the `SERVE_API_LOCALLY` environment variable to `true` to activate the local API server when running your Docker container. This is already the default value in the `docker-compose.yml`, so you can get it runnig by executing:
+
+```bash
+docker-compose up
+```
+
+#### Accessing the API
+
+- With the local API server running, it's accessible at: [http://localhost:8000](http://localhost:8000)
+- When you open this in your browser, you can also see the API documentation and can interact with the API directly
 
 ## Automatically deploy to Docker hub with Github Actions
 
@@ -264,7 +324,6 @@ If you want to use this, you should add these secrets to your repository:
 | `DOCKERHUB_REPO`       | The repository on Docker Hub where the image will be pushed. | `timpietruskyblibla`  |
 | `DOCKERHUB_IMG`        | The name of the image to be pushed to Docker Hub.            | `runpod-worker-comfy` |
 
-
 ## Acknowledgments
 
 - Thanks to [all contributors](https://github.com/blib-la/runpod-worker-comfy/graphs/contributors) for your awesome work