Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ensuring input images are multiple of 32 as lower leads to RunTimeErr… #90

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -113,4 +113,6 @@ processed_image_dwpose = dwpose(img)

### Image resolution

In order to maintain the image aspect ratio, `detect_resolution`, `image_resolution` and images sizes need to be using multiple of `64`.
In order to maintain the image aspect ratio, `detect_resolution`, `image_resolution` and calls to `resize_image` need to be using multiple of `32`.
Otherwise images will be resized to work correctly.
Resolution can be set to `None` to prevent resize. This may lead to RunTimeError.
5 changes: 3 additions & 2 deletions src/controlnet_aux/canny/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,13 @@ def __call__(self, input_image=None, low_threshold=100, high_threshold=200, dete
output_type = output_type or "np"

input_image = HWC3(input_image)
input_image = resize_image(input_image, detect_resolution)
if detect_resolution is not None:
input_image = resize_image(input_image, detect_resolution)

detected_map = cv2.Canny(input_image, low_threshold, high_threshold)
detected_map = HWC3(detected_map)

img = resize_image(input_image, image_resolution)
img = resize_image(input_image, image_resolution) if image_resolution is not None else input_image
H, W, C = img.shape

detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR)
Expand Down
5 changes: 3 additions & 2 deletions src/controlnet_aux/dwpose/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ def __call__(self, input_image, detect_resolution=512, image_resolution=512, out
input_image = cv2.cvtColor(np.array(input_image, dtype=np.uint8), cv2.COLOR_RGB2BGR)

input_image = HWC3(input_image)
input_image = resize_image(input_image, detect_resolution)
if detect_resolution is not None:
input_image = resize_image(input_image, detect_resolution)
H, W, C = input_image.shape

with torch.no_grad():
Expand Down Expand Up @@ -80,7 +81,7 @@ def __call__(self, input_image, detect_resolution=512, image_resolution=512, out
detected_map = draw_pose(pose, H, W)
detected_map = HWC3(detected_map)

img = resize_image(input_image, image_resolution)
img = resize_image(input_image, image_resolution) if image_resolution is not None else input_image
H, W, C = img.shape

detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR)
Expand Down
5 changes: 3 additions & 2 deletions src/controlnet_aux/hed/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,8 @@ def __call__(self, input_image, detect_resolution=512, image_resolution=512, saf
input_image = np.array(input_image, dtype=np.uint8)

input_image = HWC3(input_image)
input_image = resize_image(input_image, detect_resolution)
if detect_resolution is not None:
input_image = resize_image(input_image, detect_resolution)

assert input_image.ndim == 3
H, W, C = input_image.shape
Expand All @@ -112,7 +113,7 @@ def __call__(self, input_image, detect_resolution=512, image_resolution=512, saf
detected_map = edge
detected_map = HWC3(detected_map)

img = resize_image(input_image, image_resolution)
img = resize_image(input_image, image_resolution) if image_resolution is not None else input_image
H, W, C = img.shape

detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR)
Expand Down
5 changes: 3 additions & 2 deletions src/controlnet_aux/leres/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ def __call__(self, input_image, thr_a=0, thr_b=0, boost=False, detect_resolution
input_image = np.array(input_image, dtype=np.uint8)

input_image = HWC3(input_image)
input_image = resize_image(input_image, detect_resolution)
if detect_resolution is not None:
input_image = resize_image(input_image, detect_resolution)

assert input_image.ndim == 3
height, width, dim = input_image.shape
Expand Down Expand Up @@ -107,7 +108,7 @@ def __call__(self, input_image, thr_a=0, thr_b=0, boost=False, detect_resolution
detected_map = depth_image
detected_map = HWC3(detected_map)

img = resize_image(input_image, image_resolution)
img = resize_image(input_image, image_resolution) if image_resolution is not None else input_image
H, W, C = img.shape

detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR)
Expand Down
5 changes: 3 additions & 2 deletions src/controlnet_aux/lineart/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,8 @@ def __call__(self, input_image, coarse=False, detect_resolution=512, image_resol
input_image = np.array(input_image, dtype=np.uint8)

input_image = HWC3(input_image)
input_image = resize_image(input_image, detect_resolution)
if detect_resolution is not None:
input_image = resize_image(input_image, detect_resolution)

model = self.model_coarse if coarse else self.model
assert input_image.ndim == 3
Expand All @@ -155,7 +156,7 @@ def __call__(self, input_image, coarse=False, detect_resolution=512, image_resol

detected_map = HWC3(detected_map)

img = resize_image(input_image, image_resolution)
img = resize_image(input_image, image_resolution) if image_resolution is not None else input_image
H, W, C = img.shape

detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR)
Expand Down
5 changes: 3 additions & 2 deletions src/controlnet_aux/lineart_anime/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,8 @@ def __call__(self, input_image, detect_resolution=512, image_resolution=512, out
input_image = np.array(input_image, dtype=np.uint8)

input_image = HWC3(input_image)
input_image = resize_image(input_image, detect_resolution)
if detect_resolution is not None:
input_image = resize_image(input_image, detect_resolution)

H, W, C = input_image.shape
Hn = 256 * int(np.ceil(float(H) / 256.0))
Expand All @@ -177,7 +178,7 @@ def __call__(self, input_image, detect_resolution=512, image_resolution=512, out

detected_map = HWC3(detected_map)

img = resize_image(input_image, image_resolution)
img = resize_image(input_image, image_resolution) if image_resolution is not None else input_image
H, W, C = img.shape

detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR)
Expand Down
5 changes: 3 additions & 2 deletions src/controlnet_aux/mediapipe_face/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,13 @@ def __call__(self,
input_image = np.array(input_image, dtype=np.uint8)

input_image = HWC3(input_image)
input_image = resize_image(input_image, detect_resolution)
if detect_resolution is not None:
input_image = resize_image(input_image, detect_resolution)

detected_map = generate_annotation(input_image, max_faces, min_confidence)
detected_map = HWC3(detected_map)

img = resize_image(input_image, image_resolution)
img = resize_image(input_image, image_resolution) if image_resolution is not None else input_image
H, W, C = img.shape

detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR)
Expand Down
5 changes: 3 additions & 2 deletions src/controlnet_aux/midas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ def __call__(self, input_image, a=np.pi * 2.0, bg_th=0.1, depth_and_normal=False
output_type = output_type or "np"

input_image = HWC3(input_image)
input_image = resize_image(input_image, detect_resolution)
if detect_resolution is not None:
input_image = resize_image(input_image, detect_resolution)

assert input_image.ndim == 3
image_depth = input_image
Expand Down Expand Up @@ -77,7 +78,7 @@ def __call__(self, input_image, a=np.pi * 2.0, bg_th=0.1, depth_and_normal=False
if depth_and_normal:
normal_image = HWC3(normal_image)

img = resize_image(input_image, image_resolution)
img = resize_image(input_image, image_resolution) if image_resolution is not None else input_image
H, W, C = img.shape

depth_image = cv2.resize(depth_image, (W, H), interpolation=cv2.INTER_LINEAR)
Expand Down
5 changes: 3 additions & 2 deletions src/controlnet_aux/mlsd/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ def __call__(self, input_image, thr_v=0.1, thr_d=0.1, detect_resolution=512, ima
input_image = np.array(input_image, dtype=np.uint8)

input_image = HWC3(input_image)
input_image = resize_image(input_image, detect_resolution)
if detect_resolution is not None:
input_image = resize_image(input_image, detect_resolution)

assert input_image.ndim == 3
img = input_image
Expand All @@ -68,7 +69,7 @@ def __call__(self, input_image, thr_v=0.1, thr_d=0.1, detect_resolution=512, ima
detected_map = img_output[:, :, 0]
detected_map = HWC3(detected_map)

img = resize_image(input_image, image_resolution)
img = resize_image(input_image, image_resolution) if image_resolution is not None else input_image
H, W, C = img.shape

detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR)
Expand Down
5 changes: 3 additions & 2 deletions src/controlnet_aux/normalbae/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,8 @@ def __call__(self, input_image, detect_resolution=512, image_resolution=512, out
input_image = np.array(input_image, dtype=np.uint8)

input_image = HWC3(input_image)
input_image = resize_image(input_image, detect_resolution)
if detect_resolution is not None:
input_image = resize_image(input_image, detect_resolution)

assert input_image.ndim == 3
image_normal = input_image
Expand All @@ -97,7 +98,7 @@ def __call__(self, input_image, detect_resolution=512, image_resolution=512, out
detected_map = normal_image
detected_map = HWC3(detected_map)

img = resize_image(input_image, image_resolution)
img = resize_image(input_image, image_resolution) if image_resolution is not None else input_image
H, W, C = img.shape

detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR)
Expand Down
5 changes: 3 additions & 2 deletions src/controlnet_aux/open_pose/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,8 @@ def __call__(self, input_image, detect_resolution=512, image_resolution=512, inc
input_image = np.array(input_image, dtype=np.uint8)

input_image = HWC3(input_image)
input_image = resize_image(input_image, detect_resolution)
if detect_resolution is not None:
input_image = resize_image(input_image, detect_resolution)
H, W, C = input_image.shape

poses = self.detect_poses(input_image, include_hand, include_face)
Expand All @@ -223,7 +224,7 @@ def __call__(self, input_image, detect_resolution=512, image_resolution=512, inc
detected_map = canvas
detected_map = HWC3(detected_map)

img = resize_image(input_image, image_resolution)
img = resize_image(input_image, image_resolution) if image_resolution is not None else input_image
H, W, C = img.shape

detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR)
Expand Down
5 changes: 3 additions & 2 deletions src/controlnet_aux/pidi/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@ def __call__(self, input_image, detect_resolution=512, image_resolution=512, saf
input_image = np.array(input_image, dtype=np.uint8)

input_image = HWC3(input_image)
input_image = resize_image(input_image, detect_resolution)
if detect_resolution is not None:
input_image = resize_image(input_image, detect_resolution)
assert input_image.ndim == 3
input_image = input_image[:, :, ::-1].copy()
with torch.no_grad():
Expand All @@ -67,7 +68,7 @@ def __call__(self, input_image, detect_resolution=512, image_resolution=512, saf
detected_map = edge[0, 0]
detected_map = HWC3(detected_map)

img = resize_image(input_image, image_resolution)
img = resize_image(input_image, image_resolution) if image_resolution is not None else input_image
H, W, C = img.shape

detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR)
Expand Down
5 changes: 3 additions & 2 deletions src/controlnet_aux/segment_anything/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@ def __call__(self, input_image: Union[np.ndarray, Image.Image]=None, detect_reso
input_image = np.array(input_image, dtype=np.uint8)

input_image = HWC3(input_image)
input_image = resize_image(input_image, detect_resolution)
if detect_resolution is not None:
input_image = resize_image(input_image, detect_resolution)

# Generate Masks
masks = self.mask_generator.generate(input_image)
Expand All @@ -81,7 +82,7 @@ def __call__(self, input_image: Union[np.ndarray, Image.Image]=None, detect_reso
detected_map = map
detected_map = HWC3(detected_map)

img = resize_image(input_image, image_resolution)
img = resize_image(input_image, image_resolution) if image_resolution is not None else input_image
H, W, C = img.shape

detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR)
Expand Down
5 changes: 3 additions & 2 deletions src/controlnet_aux/shuffle/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ def __call__(self, input_image, h=None, w=None, f=None, detect_resolution=512, i
input_image = np.array(input_image, dtype=np.uint8)

input_image = HWC3(input_image)
input_image = resize_image(input_image, detect_resolution)
if detect_resolution is not None:
input_image = resize_image(input_image, detect_resolution)

H, W, C = input_image.shape
if h is None:
Expand All @@ -35,7 +36,7 @@ def __call__(self, input_image, h=None, w=None, f=None, detect_resolution=512, i
flow = np.concatenate([x, y], axis=2).astype(np.float32)
detected_map = cv2.remap(input_image, flow, None, cv2.INTER_LINEAR)

img = resize_image(input_image, image_resolution)
img = resize_image(input_image, image_resolution) if image_resolution is not None else input_image
H, W, C = img.shape

detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR)
Expand Down
6 changes: 4 additions & 2 deletions src/controlnet_aux/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,10 @@ def resize_image(input_image, resolution):
k = float(resolution) / min(H, W)
H *= k
W *= k
H = int(np.round(H / 64.0)) * 64
W = int(np.round(W / 64.0)) * 64
# We ensure image size is multiple of 32. If not this leads to RuntimeError:
# The size of tensor a (X) must match the size of tensor b (Y) at non-singleton dimension Z
H = int(np.round(H / 32.0)) * 32
W = int(np.round(W / 32.0)) * 32
img = cv2.resize(input_image, (W, H), interpolation=cv2.INTER_LANCZOS4 if k > 1 else cv2.INTER_AREA)
return img

Expand Down
5 changes: 3 additions & 2 deletions src/controlnet_aux/zoe/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ def __call__(self, input_image, detect_resolution=512, image_resolution=512, out
output_type = output_type or "np"

input_image = HWC3(input_image)
input_image = resize_image(input_image, detect_resolution)
if detect_resolution is not None:
input_image = resize_image(input_image, detect_resolution)

assert input_image.ndim == 3
image_depth = input_image
Expand All @@ -73,7 +74,7 @@ def __call__(self, input_image, detect_resolution=512, image_resolution=512, out
detected_map = depth_image
detected_map = HWC3(detected_map)

img = resize_image(input_image, image_resolution)
img = resize_image(input_image, image_resolution) if image_resolution is not None else input_image
H, W, C = img.shape

detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR)
Expand Down