feat(models): add Depth Anything V2 Large model (#79)

rickstaa · RUFFY-369 · web-flow · commit d7c096f40f2e · 2025-04-04T11:53:20.000-04:00
This commit adds the large variant of the Depth Anything V2 model to the
comfystream container.

Co-authored-by: RUFFY-369 &lt;prakarshkaushik369@gmail.com&gt;
diff --git a/configs/models.yaml b/configs/models.yaml
@@ -16,11 +16,15 @@ models:
       - url: "https://huggingface.co/aaronb/dreamshaper-8-dmd-1kstep/raw/main/config.json"
         path: "unet/dreamshaper-8-dmd-1kstep.json"
 
-  # Depth Anything ONNX model
+  # Depth Anything V2 ONNX models
   depthanything-onnx:
     name: "DepthAnything ONNX"
     url: "https://huggingface.co/yuvraj108c/Depth-Anything-2-Onnx/resolve/main/depth_anything_v2_vitb.onnx?download=true"
     path: "tensorrt/depth-anything/depth_anything_vitl14.onnx"
+  depth-anything-v2-large-onnx:
+    name: "DepthAnything V2 Large ONNX"
+    url: "https://huggingface.co/yuvraj108c/Depth-Anything-2-Onnx/resolve/main/depth_anything_v2_vitl.onnx?download=true"
+    path: "tensorrt/depth-anything/depth_anything_v2_vitl.onnx"
 
   # TAESD models
   taesd:
@@ -68,4 +72,4 @@ models:
     name: "ClipTextModel"
     url: "https://huggingface.co/Lykon/dreamshaper-8/resolve/main/text_encoder/model.fp16.safetensors"
     path: "text_encoders/CLIPText/model.fp16.safetensors"
-    type: "text_encoder"
+    type: "text_encoder"
diff --git a/configs/nodes.yaml b/configs/nodes.yaml
@@ -11,7 +11,8 @@ nodes:
 
   comfyui-depthanything-tensorrt:
     name: "ComfyUI DepthAnything TensorRT"
-    url: "https://github.com/yuvraj108c/ComfyUI-Depth-Anything-Tensorrt"
+    url: "https://github.com/rickstaa/ComfyUI-Depth-Anything-Tensorrt"
+    branch: "feature/add-export-trt-args"
     type: "tensorrt"
 
   # Ryan's nodes
@@ -74,4 +75,4 @@ nodes:
     name: "ComfyUI Stream Pack"
     url: "https://github.com/livepeer/ComfyUI-Stream-Pack"
     branch: "main"
-    type: "utility"
+    type: "utility"
diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh
@@ -49,7 +49,7 @@ if [ "$1" = "--build-engines" ]; then
   # Build Static Engine for Dreamshaper
   python src/comfystream/scripts/build_trt.py --model /workspace/ComfyUI/models/unet/dreamshaper-8-dmd-1kstep.safetensors --out-engine /workspace/ComfyUI/output/tensorrt/static-dreamshaper8_SD15_\$stat-b-1-h-512-w-512_00001_.engine
 
-  # Build Engine for DepthAnything2
+  # Build Engine for Depth Anything V2
   if [ ! -f "$DEPTH_ANYTHING_DIR/depth_anything_vitl14-fp16.engine" ]; then
     if [ ! -d "$DEPTH_ANYTHING_DIR" ]; then
       mkdir -p "$DEPTH_ANYTHING_DIR"
@@ -59,6 +59,14 @@ if [ "$1" = "--build-engines" ]; then
   else
     echo "Engine for DepthAnything2 already exists, skipping..."
   fi
+
+  # Build Engine for Depth Anything2 (large)
+  if [ ! -f "$DEPTH_ANYTHING_DIR/depth_anything_v2_vitl-fp16.engine" ]; then
+    cd "$DEPTH_ANYTHING_DIR"
+    python /workspace/ComfyUI/custom_nodes/ComfyUI-Depth-Anything-Tensorrt/export_trt.py --trt-path "${DEPTH_ANYTHING_DIR}/depth_anything_v2_vitl-fp16.engine" --onnx-path "${DEPTH_ANYTHING_DIR}/depth_anything_v2_vitl.onnx"
+  else
+    echo "Engine for DepthAnything2 (large) already exists, skipping..."
+  fi
   shift
 fi
 
diff --git a/workflows/comfystream/sd15-tensorrt-depthanything-large-api.json b/workflows/comfystream/sd15-tensorrt-depthanything-large-api.json
@@ -0,0 +1,217 @@
+{
+  "1": {
+    "inputs": {
+      "image": "example.png"
+    },
+    "class_type": "LoadImage",
+    "_meta": {
+      "title": "Load Image"
+    }
+  },
+  "2": {
+    "inputs": {
+      "engine": "depth_anything_v2_vitl-fp16.engine",
+      "images": [
+        "1",
+        0
+      ]
+    },
+    "class_type": "DepthAnythingTensorrt",
+    "_meta": {
+      "title": "Depth Anything Tensorrt"
+    }
+  },
+  "3": {
+    "inputs": {
+      "unet_name": "static-dreamshaper8_SD15_$stat-b-1-h-512-w-512_00001_.engine",
+      "model_type": "SD15"
+    },
+    "class_type": "TensorRTLoader",
+    "_meta": {
+      "title": "TensorRT Loader"
+    }
+  },
+  "5": {
+    "inputs": {
+      "text": "the hulk",
+      "clip": [
+        "23",
+        0
+      ]
+    },
+    "class_type": "CLIPTextEncode",
+    "_meta": {
+      "title": "CLIP Text Encode (Prompt)"
+    }
+  },
+  "6": {
+    "inputs": {
+      "text": "",
+      "clip": [
+        "23",
+        0
+      ]
+    },
+    "class_type": "CLIPTextEncode",
+    "_meta": {
+      "title": "CLIP Text Encode (Prompt)"
+    }
+  },
+  "7": {
+    "inputs": {
+      "seed": 446080115054598,
+      "steps": 1,
+      "cfg": 1,
+      "sampler_name": "lcm",
+      "scheduler": "normal",
+      "denoise": 1,
+      "model": [
+        "3",
+        0
+      ],
+      "positive": [
+        "9",
+        0
+      ],
+      "negative": [
+        "9",
+        1
+      ],
+      "latent_image": [
+        "16",
+        0
+      ]
+    },
+    "class_type": "KSampler",
+    "_meta": {
+      "title": "KSampler"
+    }
+  },
+  "8": {
+    "inputs": {
+      "control_net_name": "control_v11f1p_sd15_depth_fp16.safetensors"
+    },
+    "class_type": "ControlNetLoader",
+    "_meta": {
+      "title": "Load ControlNet Model"
+    }
+  },
+  "9": {
+    "inputs": {
+      "strength": 1.0000000000000002,
+      "start_percent": 0,
+      "end_percent": 1,
+      "positive": [
+        "5",
+        0
+      ],
+      "negative": [
+        "6",
+        0
+      ],
+      "control_net": [
+        "10",
+        0
+      ],
+      "image": [
+        "2",
+        0
+      ]
+    },
+    "class_type": "ControlNetApplyAdvanced",
+    "_meta": {
+      "title": "Apply ControlNet"
+    }
+  },
+  "10": {
+    "inputs": {
+      "backend": "inductor",
+      "fullgraph": false,
+      "mode": "reduce-overhead",
+      "controlnet": [
+        "8",
+        0
+      ]
+    },
+    "class_type": "TorchCompileLoadControlNet",
+    "_meta": {
+      "title": "TorchCompileLoadControlNet"
+    }
+  },
+  "11": {
+    "inputs": {
+      "vae_name": "taesd"
+    },
+    "class_type": "VAELoader",
+    "_meta": {
+      "title": "Load VAE"
+    }
+  },
+  "13": {
+    "inputs": {
+      "backend": "inductor",
+      "fullgraph": true,
+      "mode": "reduce-overhead",
+      "compile_encoder": true,
+      "compile_decoder": true,
+      "vae": [
+        "11",
+        0
+      ]
+    },
+    "class_type": "TorchCompileLoadVAE",
+    "_meta": {
+      "title": "TorchCompileLoadVAE"
+    }
+  },
+  "14": {
+    "inputs": {
+      "samples": [
+        "7",
+        0
+      ],
+      "vae": [
+        "13",
+        0
+      ]
+    },
+    "class_type": "VAEDecode",
+    "_meta": {
+      "title": "VAE Decode"
+    }
+  },
+  "15": {
+    "inputs": {
+      "images": [
+        "14",
+        0
+      ]
+    },
+    "class_type": "PreviewImage",
+    "_meta": {
+      "title": "Preview Image"
+    }
+  },
+  "16": {
+    "inputs": {
+      "width": 512,
+      "height": 512,
+      "batch_size": 1
+    },
+    "class_type": "EmptyLatentImage",
+    "_meta": {
+      "title": "Empty Latent Image"
+    }
+  },
+  "23": {
+    "inputs": {
+      "clip_name": "CLIPText/model.fp16.safetensors",
+      "type": "stable_diffusion",
+      "device": "default"
+    },
+    "class_type": "CLIPLoader",
+    "_meta": {
+      "title": "Load CLIP"
+    }
+  }
+}