huggingface · lerignoux · Jan 15, 2024 · Jan 16, 2024
diff --git a/README.md b/README.md
@@ -113,4 +113,6 @@ processed_image_dwpose = dwpose(img)
 
 ### Image resolution
 
-In order to maintain the image aspect ratio, `detect_resolution`, `image_resolution` and images sizes need to be using multiple of `64`.
+In order to maintain the image aspect ratio, `detect_resolution`, `image_resolution` and calls to `resize_image` need to be using multiple of `32`.  
+Otherwise images will be resized to work correctly.  
+Resolution can be set to `None` to prevent resize. This may lead to RunTimeError.  
diff --git a/src/controlnet_aux/canny/__init__.py b/src/controlnet_aux/canny/__init__.py
@@ -20,12 +20,13 @@ def __call__(self, input_image=None, low_threshold=100, high_threshold=200, dete
             output_type = output_type or "np"
 
         input_image = HWC3(input_image)
-        input_image = resize_image(input_image, detect_resolution)
+        if detect_resolution is not None:
+            input_image = resize_image(input_image, detect_resolution)
 
         detected_map = cv2.Canny(input_image, low_threshold, high_threshold)
         detected_map = HWC3(detected_map)      
 
-        img = resize_image(input_image, image_resolution)
+        img = resize_image(input_image, image_resolution) if image_resolution is not None else input_image
         H, W, C = img.shape
 
         detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR)

diff --git a/src/controlnet_aux/dwpose/__init__.py b/src/controlnet_aux/dwpose/__init__.py
@@ -45,7 +45,8 @@ def __call__(self, input_image, detect_resolution=512, image_resolution=512, out
         input_image = cv2.cvtColor(np.array(input_image, dtype=np.uint8), cv2.COLOR_RGB2BGR)
 
         input_image = HWC3(input_image)
-        input_image = resize_image(input_image, detect_resolution)
+        if detect_resolution is not None:
+            input_image = resize_image(input_image, detect_resolution)
         H, W, C = input_image.shape
 
         with torch.no_grad():
@@ -80,7 +81,7 @@ def __call__(self, input_image, detect_resolution=512, image_resolution=512, out
             detected_map = draw_pose(pose, H, W)
             detected_map = HWC3(detected_map)
 
-            img = resize_image(input_image, image_resolution)
+            img = resize_image(input_image, image_resolution) if image_resolution is not None else input_image
             H, W, C = img.shape
 
             detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR)

diff --git a/src/controlnet_aux/hed/__init__.py b/src/controlnet_aux/hed/__init__.py
@@ -93,7 +93,8 @@ def __call__(self, input_image, detect_resolution=512, image_resolution=512, saf
             input_image = np.array(input_image, dtype=np.uint8)
 
         input_image = HWC3(input_image)
-        input_image = resize_image(input_image, detect_resolution)
+        if detect_resolution is not None:
+            input_image = resize_image(input_image, detect_resolution)
 
         assert input_image.ndim == 3
         H, W, C = input_image.shape
@@ -112,7 +113,7 @@ def __call__(self, input_image, detect_resolution=512, image_resolution=512, saf
         detected_map = edge
         detected_map = HWC3(detected_map)
 
-        img = resize_image(input_image, image_resolution)
+        img = resize_image(input_image, image_resolution) if image_resolution is not None else input_image
         H, W, C = img.shape
 
         detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR)

diff --git a/src/controlnet_aux/leres/__init__.py b/src/controlnet_aux/leres/__init__.py
@@ -62,7 +62,8 @@ def __call__(self, input_image, thr_a=0, thr_b=0, boost=False, detect_resolution
             input_image = np.array(input_image, dtype=np.uint8)
 
         input_image = HWC3(input_image)
-        input_image = resize_image(input_image, detect_resolution)
+        if detect_resolution is not None:
+            input_image = resize_image(input_image, detect_resolution)
 
         assert input_image.ndim == 3
         height, width, dim = input_image.shape
@@ -107,7 +108,7 @@ def __call__(self, input_image, thr_a=0, thr_b=0, boost=False, detect_resolution
         detected_map = depth_image
         detected_map = HWC3(detected_map)      
 
-        img = resize_image(input_image, image_resolution)
+        img = resize_image(input_image, image_resolution) if image_resolution is not None else input_image
         H, W, C = img.shape
 
         detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR)

diff --git a/src/controlnet_aux/lineart/__init__.py b/src/controlnet_aux/lineart/__init__.py
@@ -137,7 +137,8 @@ def __call__(self, input_image, coarse=False, detect_resolution=512, image_resol
             input_image = np.array(input_image, dtype=np.uint8)
 
         input_image = HWC3(input_image)
-        input_image = resize_image(input_image, detect_resolution)
+        if detect_resolution is not None:
+            input_image = resize_image(input_image, detect_resolution)
 
         model = self.model_coarse if coarse else self.model
         assert input_image.ndim == 3
@@ -155,7 +156,7 @@ def __call__(self, input_image, coarse=False, detect_resolution=512, image_resol
 
         detected_map = HWC3(detected_map)
 
-        img = resize_image(input_image, image_resolution)
+        img = resize_image(input_image, image_resolution) if image_resolution is not None else input_image
         H, W, C = img.shape
 
         detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR)

diff --git a/src/controlnet_aux/lineart_anime/__init__.py b/src/controlnet_aux/lineart_anime/__init__.py
@@ -156,7 +156,8 @@ def __call__(self, input_image, detect_resolution=512, image_resolution=512, out
             input_image = np.array(input_image, dtype=np.uint8)
 
         input_image = HWC3(input_image)
-        input_image = resize_image(input_image, detect_resolution)
+        if detect_resolution is not None:
+            input_image = resize_image(input_image, detect_resolution)
 
         H, W, C = input_image.shape
         Hn = 256 * int(np.ceil(float(H) / 256.0))
@@ -177,7 +178,7 @@ def __call__(self, input_image, detect_resolution=512, image_resolution=512, out
 
         detected_map = HWC3(detected_map)
 
-        img = resize_image(input_image, image_resolution)
+        img = resize_image(input_image, image_resolution) if image_resolution is not None else input_image
         H, W, C = img.shape
 
         detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR)

diff --git a/src/controlnet_aux/mediapipe_face/__init__.py b/src/controlnet_aux/mediapipe_face/__init__.py
@@ -37,12 +37,13 @@ def __call__(self,
             input_image = np.array(input_image, dtype=np.uint8)
 
         input_image = HWC3(input_image)
-        input_image = resize_image(input_image, detect_resolution)
+        if detect_resolution is not None:
+            input_image = resize_image(input_image, detect_resolution)
 
         detected_map = generate_annotation(input_image, max_faces, min_confidence)
         detected_map = HWC3(detected_map)
 
-        img = resize_image(input_image, image_resolution)
+        img = resize_image(input_image, image_resolution) if image_resolution is not None else input_image
         H, W, C = img.shape
 
         detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR)

diff --git a/src/controlnet_aux/midas/__init__.py b/src/controlnet_aux/midas/__init__.py
@@ -45,7 +45,8 @@ def __call__(self, input_image, a=np.pi * 2.0, bg_th=0.1, depth_and_normal=False
             output_type = output_type or "np"
 
         input_image = HWC3(input_image)
-        input_image = resize_image(input_image, detect_resolution)
+        if detect_resolution is not None:
+            input_image = resize_image(input_image, detect_resolution)
 
         assert input_image.ndim == 3
         image_depth = input_image
@@ -77,7 +78,7 @@ def __call__(self, input_image, a=np.pi * 2.0, bg_th=0.1, depth_and_normal=False
         if depth_and_normal:
             normal_image = HWC3(normal_image)
 
-        img = resize_image(input_image, image_resolution)
+        img = resize_image(input_image, image_resolution) if image_resolution is not None else input_image
         H, W, C = img.shape
 
         depth_image = cv2.resize(depth_image, (W, H), interpolation=cv2.INTER_LINEAR)

diff --git a/src/controlnet_aux/mlsd/__init__.py b/src/controlnet_aux/mlsd/__init__.py
@@ -51,7 +51,8 @@ def __call__(self, input_image, thr_v=0.1, thr_d=0.1, detect_resolution=512, ima
             input_image = np.array(input_image, dtype=np.uint8)
 
         input_image = HWC3(input_image)
-        input_image = resize_image(input_image, detect_resolution)
+        if detect_resolution is not None:
+            input_image = resize_image(input_image, detect_resolution)
 
         assert input_image.ndim == 3
         img = input_image
@@ -68,7 +69,7 @@ def __call__(self, input_image, thr_v=0.1, thr_d=0.1, detect_resolution=512, ima
         detected_map = img_output[:, :, 0]
         detected_map = HWC3(detected_map)
 
-        img = resize_image(input_image, image_resolution)
+        img = resize_image(input_image, image_resolution) if image_resolution is not None else input_image
         H, W, C = img.shape
 
         detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR)

diff --git a/src/controlnet_aux/normalbae/__init__.py b/src/controlnet_aux/normalbae/__init__.py
@@ -74,7 +74,8 @@ def __call__(self, input_image, detect_resolution=512, image_resolution=512, out
             input_image = np.array(input_image, dtype=np.uint8)
 
         input_image = HWC3(input_image)
-        input_image = resize_image(input_image, detect_resolution)
+        if detect_resolution is not None:
+            input_image = resize_image(input_image, detect_resolution)
 
         assert input_image.ndim == 3
         image_normal = input_image
@@ -97,7 +98,7 @@ def __call__(self, input_image, detect_resolution=512, image_resolution=512, out
         detected_map = normal_image
         detected_map = HWC3(detected_map)      
 
-        img = resize_image(input_image, image_resolution)
+        img = resize_image(input_image, image_resolution) if image_resolution is not None else input_image
         H, W, C = img.shape
 
         detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR)

diff --git a/src/controlnet_aux/open_pose/__init__.py b/src/controlnet_aux/open_pose/__init__.py
@@ -214,7 +214,8 @@ def __call__(self, input_image, detect_resolution=512, image_resolution=512, inc
             input_image = np.array(input_image, dtype=np.uint8)
 
         input_image = HWC3(input_image)
-        input_image = resize_image(input_image, detect_resolution)
+        if detect_resolution is not None:
+            input_image = resize_image(input_image, detect_resolution)
         H, W, C = input_image.shape
 
         poses = self.detect_poses(input_image, include_hand, include_face)
@@ -223,7 +224,7 @@ def __call__(self, input_image, detect_resolution=512, image_resolution=512, inc
         detected_map = canvas
         detected_map = HWC3(detected_map)
 
-        img = resize_image(input_image, image_resolution)
+        img = resize_image(input_image, image_resolution) if image_resolution is not None else input_image
         H, W, C = img.shape
 
         detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR)

diff --git a/src/controlnet_aux/pidi/__init__.py b/src/controlnet_aux/pidi/__init__.py
@@ -49,7 +49,8 @@ def __call__(self, input_image, detect_resolution=512, image_resolution=512, saf
             input_image = np.array(input_image, dtype=np.uint8)
 
         input_image = HWC3(input_image)
-        input_image = resize_image(input_image, detect_resolution)
+        if detect_resolution is not None:
+            input_image = resize_image(input_image, detect_resolution)
         assert input_image.ndim == 3
         input_image = input_image[:, :, ::-1].copy()
         with torch.no_grad():
@@ -67,7 +68,7 @@ def __call__(self, input_image, detect_resolution=512, image_resolution=512, saf
         detected_map = edge[0, 0]
         detected_map = HWC3(detected_map)
 
-        img = resize_image(input_image, image_resolution)
+        img = resize_image(input_image, image_resolution) if image_resolution is not None else input_image
         H, W, C = img.shape
 
         detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR)

diff --git a/src/controlnet_aux/segment_anything/__init__.py b/src/controlnet_aux/segment_anything/__init__.py
@@ -71,7 +71,8 @@ def __call__(self, input_image: Union[np.ndarray, Image.Image]=None, detect_reso
             input_image = np.array(input_image, dtype=np.uint8)
 
         input_image = HWC3(input_image)
-        input_image = resize_image(input_image, detect_resolution)
+        if detect_resolution is not None:
+            input_image = resize_image(input_image, detect_resolution)
 
         # Generate Masks
         masks = self.mask_generator.generate(input_image)
@@ -81,7 +82,7 @@ def __call__(self, input_image: Union[np.ndarray, Image.Image]=None, detect_reso
         detected_map = map
         detected_map = HWC3(detected_map)
 
-        img = resize_image(input_image, image_resolution)
+        img = resize_image(input_image, image_resolution) if image_resolution is not None else input_image
         H, W, C = img.shape
 
         detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR)

diff --git a/src/controlnet_aux/shuffle/__init__.py b/src/controlnet_aux/shuffle/__init__.py
@@ -21,7 +21,8 @@ def __call__(self, input_image, h=None, w=None, f=None, detect_resolution=512, i
             input_image = np.array(input_image, dtype=np.uint8)
 
         input_image = HWC3(input_image)
-        input_image = resize_image(input_image, detect_resolution)
+        if detect_resolution is not None:
+            input_image = resize_image(input_image, detect_resolution)
 
         H, W, C = input_image.shape
         if h is None:
@@ -35,7 +36,7 @@ def __call__(self, input_image, h=None, w=None, f=None, detect_resolution=512, i
         flow = np.concatenate([x, y], axis=2).astype(np.float32)
         detected_map = cv2.remap(input_image, flow, None, cv2.INTER_LINEAR)
 
-        img = resize_image(input_image, image_resolution)
+        img = resize_image(input_image, image_resolution) if image_resolution is not None else input_image
         H, W, C = img.shape
 
         detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR)

diff --git a/src/controlnet_aux/util.py b/src/controlnet_aux/util.py
@@ -91,8 +91,10 @@ def resize_image(input_image, resolution):
     k = float(resolution) / min(H, W)
     H *= k
     W *= k
-    H = int(np.round(H / 64.0)) * 64
-    W = int(np.round(W / 64.0)) * 64
+    # We ensure image size is multiple of 32. If not this leads to RuntimeError:
+    # The size of tensor a (X) must match the size of tensor b (Y) at non-singleton dimension Z
+    H = int(np.round(H / 32.0)) * 32
+    W = int(np.round(W / 32.0)) * 32
     img = cv2.resize(input_image, (W, H), interpolation=cv2.INTER_LANCZOS4 if k > 1 else cv2.INTER_AREA)
     return img
 

diff --git a/src/controlnet_aux/zoe/__init__.py b/src/controlnet_aux/zoe/__init__.py
@@ -47,7 +47,8 @@ def __call__(self, input_image, detect_resolution=512, image_resolution=512, out
             output_type = output_type or "np"
 
         input_image = HWC3(input_image)
-        input_image = resize_image(input_image, detect_resolution)
+        if detect_resolution is not None:
+            input_image = resize_image(input_image, detect_resolution)
 
         assert input_image.ndim == 3
         image_depth = input_image
@@ -73,7 +74,7 @@ def __call__(self, input_image, detect_resolution=512, image_resolution=512, out
         detected_map = depth_image
         detected_map = HWC3(detected_map)      
 
-        img = resize_image(input_image, image_resolution)
+        img = resize_image(input_image, image_resolution) if image_resolution is not None else input_image
         H, W, C = img.shape
 
         detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR)