feature: densepose controlnet (#481)

6 months ago · df86aa6668
parent ce37e60b11
commit df86aa6668
8 changed files with 696 additions and 6 deletions
--- a/imaginairy/cli/imagine.py
+++ b/imaginairy/cli/imagine.py
@ -62,6 +62,7 @@ from imaginairy.cli.shared import (
            "inpaint",
            "colorize",
            "qrcode",
            "densepose",
        ]
    ),
    help="how the control image is used as signal",
--- a/imaginairy/config.py
+++ b/imaginairy/config.py
@ -168,10 +168,17 @@ MODEL_WEIGHT_CONFIGS = [
        defaults={"negative_prompt": DEFAULT_NEGATIVE_PROMPT},
    ),
    ModelWeightsConfig(
-        name="Redshift Diffusion",
+        name="Miniaturus Potentia V1.2",
-        aliases=["redshift-diffusion", "red", "redshift-diffusion-15", "red15"],
+        aliases=[
            "miniaturuspotentia",
            "potentia",
            "miniaturuspotentia12",
            "mp12",
            "mp",
            "potentia12",
        ],
        architecture=MODEL_ARCHITECTURE_LOOKUP["sd15"],
-        weights_location="https://huggingface.co/nitrosocke/redshift-diffusion/tree/80837fe18df05807861ab91c3bad3693c9342e4c/",
+        weights_location="https://huggingface.co/dataautogpt3/Miniaturus_PotentiaV1.2/tree/7ef539518ad5ad591c45f0b920050883f7e51e83/",
        defaults={"negative_prompt": DEFAULT_NEGATIVE_PROMPT},
    ),
    # SDXL Weights
@ -338,6 +345,13 @@ CONTROL_CONFIGS = [
        weights_location="https://huggingface.co/monster-labs/control_v1p_sd15_qrcode_monster/resolve/4a946e610f670c4cd6cf46b8641fca190e4f56c4/diffusion_pytorch_model.safetensors",
        aliases=["qrcode"],
    ),
    ControlConfig(
        name="DensePose",
        control_type="densepose",
        config_path="configs/control-net-v15.yaml",
        weights_location="https://huggingface.co/zcxu-eric/MagicAnimate/resolve/3d80ae8c50b289e55ee68deecc83afaab9c6a382/densepose_controlnet/diffusion_pytorch_model.safetensors?download=true",
        aliases=["densepose"],
    ),
 ]
 CONTROL_CONFIG_SHORTCUTS: dict[str, ControlConfig] = {}
@ -398,7 +412,7 @@ IP_ADAPTER_WEIGHT_LOCATIONS = {
    },
 }
 SD21_UNCLIP_WEIGHTS_URL = "https://huggingface.co/stabilityai/stable-diffusion-2-1-unclip/resolve/e99f66a92bdcd1b0fb0d4b6a9b81b3b37d8bea44/image_encoder/model.fp16.safetensors"
-
+DENSEPOSE_REPO_URL = "https://huggingface.co/LayerNorm/DensePose-TorchScript-with-hint-image/resolve/65446422ea6225b9d72f93f3d2e2ad55e78b0b78"
 SOLVER_TYPE_NAMES = [s.aliases[0] for s in SOLVER_CONFIGS]
--- a/imaginairy/img_processors/control_modes.py
+++ b/imaginairy/img_processors/control_modes.py
@ -139,6 +139,24 @@ def create_pose_map(img_t: "Tensor"):
    return pose_t
 def create_densepose_map(img_t: "Tensor") -> "Tensor":
    import torch
    from imaginairy.img_processors.densepose import generate_densepose_image
    img_np = generate_densepose_image(img_t)
    img_t = (
        torch.tensor(img_np, dtype=torch.float)
        if not isinstance(img_np, torch.Tensor)
        else img_np.float()
    )
    img_t /= 255.0
    img_t = img_t.permute(2, 0, 1).unsqueeze(0)
    return img_t
 def make_noise_disk(H: int, W: int, C: int, F: int) -> "np.ndarray":
    import cv2
    import numpy as np
@ -312,4 +330,5 @@ CONTROL_MODES: Dict[str, FunctionType] = {
    "details": noop,
    "colorize": to_grayscale,
    "qrcode": adaptive_threshold_binarize,
    "densepose": create_densepose_map,
 }
--- a/imaginairy/img_processors/densepose.py
+++ b/imaginairy/img_processors/densepose.py
@ -0,0 +1,653 @@
 # adapted from https://github.com/Mikubill/sd-webui-controlnet/blob/0b90426254debf78bfc09d88c064d2caf0935282/annotator/densepose/densepose.py
 import logging
 import math
 from enum import IntEnum
 from functools import lru_cache
 from typing import List, Tuple, Union
 import cv2
 import numpy as np
 import torch
 from torch.nn import functional as F
 from imaginairy import config
 from imaginairy.utils.downloads import get_cached_url_path
 logger = logging.getLogger(__name__)
 N_PART_LABELS = 24
 _RawBoxType = Union[List[float], Tuple[float, ...], torch.Tensor, np.ndarray]
 IntTupleBox = Tuple[int, int, int, int]
 def safer_memory(x):
    # Fix many MAC/AMD problems
    return np.ascontiguousarray(x.copy()).copy()
 def pad64(x):
    return int(np.ceil(float(x) / 64.0) * 64 - x)
 def resize_image_with_pad_torch(
    img, resolution, upscale_method="bicubic", mode="constant"
 ):
    B, C, H_raw, W_raw = img.shape
    k = float(resolution) / float(min(H_raw, W_raw))
    H_target = int(math.ceil(float(H_raw) * k))
    W_target = int(math.ceil(float(W_raw) * k))
    if k > 1:
        img = F.interpolate(
            img,
            size=(H_target, W_target),
            mode=upscale_method,
            align_corners=False,
        )
    else:
        img = F.interpolate(img, size=(H_target, W_target), mode="area")
    H_pad, W_pad = pad64(H_target), pad64(W_target)
    # print(f"image after resize but before padding: {img.shape}")
    img_padded = F.pad(img, (0, W_pad, 0, H_pad), mode=mode)
    def remove_pad(x):
        # print(
        #     f"remove_pad: x.shape: {x.shape}. H_target: {H_target}, W_target: {W_target}"
        # )
        return safer_memory(x[:H_target, :W_target, ...])
    return img_padded, remove_pad
 def HWC3(x: np.ndarray) -> np.ndarray:
    assert x.dtype == np.uint8
    if x.ndim == 2:
        x = x[:, :, None]
    assert x.ndim == 3
    H, W, C = x.shape
    assert C == 1 or C == 3 or C == 4
    if C == 3:
        return x
    if C == 1:
        return np.concatenate([x, x, x], axis=2)
    if C == 4:
        color = x[:, :, 0:3].astype(np.float32)
        alpha = x[:, :, 3:4].astype(np.float32) / 255.0
        y = color * alpha + 255.0 * (1.0 - alpha)
        y = y.clip(0, 255).astype(np.uint8)
        return y
    raise RuntimeError("unreachable")
@lru_cache(maxsize=1)
 def get_densepose_model(
    filename="densepose_r101_fpn_dl.torchscript", base_url=config.DENSEPOSE_REPO_URL
 ):
    import torchvision  # noqa
    url = f"{base_url}/{filename}"
    torchscript_model_path = get_cached_url_path(url)
    logger.info(f"Loading densepose model {url} from {torchscript_model_path}")
    densepose = torch.jit.load(torchscript_model_path, map_location="cpu")
    return densepose
@lru_cache(maxsize=1)
 def get_segment_result_visualizer():
    return DensePoseMaskedColormapResultsVisualizer(
        alpha=1,
        data_extractor=_extract_i_from_iuvarr,
        segm_extractor=_extract_i_from_iuvarr,
        val_scale=255.0 / N_PART_LABELS,
    )
 def mask_to_bbox(mask_img_t):
    m = mask_img_t.nonzero()
    if m.numel() == 0:
        return None
    y0 = torch.min(m[:, 0])
    y1 = torch.max(m[:, 0])
    x0 = torch.min(m[:, 1])
    x1 = torch.max(m[:, 1])
    return x0, y0, x1, y1
 def pad_bbox(bbox, max_height, max_width, pad=1):
    x0, y0, x1, y1 = bbox
    x0 = max(0, x0 - pad)
    y0 = max(0, y0 - pad)
    x1 = min(max_width, x1 + pad)
    y1 = min(max_height, y1 + pad)
    return x0, y0, x1, y1
 def square_bbox(bbox, max_height, max_width):
    """
    Adjusts the bounding box to make it as close to a square as possible while
    ensuring it does not exceed the max_size of the image and still includes
    the original bounding box contents.
    Args:
    - bbox: A tuple of (x0, y0, x1, y1) for the original bounding box.
    - max_size: A tuple of (max_width, max_height) representing the image size.
    Returns:
    - A tuple of (x0, y0, x1, y1) for the adjusted bounding box.
    """
    x0, y0, x1, y1 = bbox
    width = x1 - x0
    height = y1 - y0
    # Determine how much to adjust to make the bounding box square
    if width > height:
        diff = width - height
        half_diff = diff // 2
        y0 = max(0, y0 - half_diff)
        y1 = min(max_height, y1 + half_diff + (diff % 2))  # Add 1 if diff is odd
    elif height > width:
        diff = height - width
        half_diff = diff // 2
        x0 = max(0, x0 - half_diff)
        x1 = min(max_width, x1 + half_diff + (diff % 2))  # Add 1 if diff is odd
    # Ensure the bounding box is within the image boundaries
    x0 = max(0, min(x0, max_width - 1))
    y0 = max(0, min(y0, max_height - 1))
    x1 = max(0, min(x1, max_width))
    y1 = max(0, min(y1, max_height))
    return x0, y0, x1, y1
 def _np_to_t(img_np):
    img_t = torch.from_numpy(img_np) / 255.0
    img_t = img_t.permute(2, 0, 1)
    img_t = img_t.unsqueeze(0)
    return img_t
 def generate_densepose_image(
    img: torch.Tensor,
    detect_resolution=512,
    upscale_method="bicubic",
    cmap="viridis",
    double_pass=False,
 ):
    assert_tensor_float_11_bchw(img)
    input_h, input_w = img.shape[-2:]
    if double_pass:
        first_densepose_img_np = _generate_densepose_image(
            img, detect_resolution, upscale_method, cmap, adapt_viridis_bg=False
        )
        first_densepose_img_t = _np_to_t(first_densepose_img_np)
        # convert the densepose image into a mask (every color other than black is part of the mask)
        densepose_img_mask = first_densepose_img_t[0].sum(dim=0) > 0
        # print(f"Mask shape: {densepose_img_mask.shape}")
        # bbox = masks_to_boxes(densepose_img_mask.unsqueeze(0)).to(torch.uint8)
        # crop image by bbox
        bbox = mask_to_bbox(densepose_img_mask)
        # print(f"bbox: {bbox}")
        if bbox is None:
            densepose_np = first_densepose_img_np
        else:
            bbox = pad_bbox(bbox, max_height=input_h, max_width=input_w, pad=10)
            # print(f"padded bbox: {bbox}")
            bbox = square_bbox(bbox, max_height=input_h, max_width=input_w)
            # print(f"boxed bbox: {bbox}")
            x0, y0, x1, y1 = bbox
            cropped_img = img[:, :, y0:y1, x0:x1]
            # print(f"cropped_img shape: {cropped_img.shape}")
            densepose_np = _generate_densepose_image(
                cropped_img,
                detect_resolution,
                upscale_method,
                cmap,
                adapt_viridis_bg=False,
            )
            # print(f"cropped densepose_np shape: {densepose_np.shape}")
            # print(
            #     f"pasting into first_densepose_img_np shape: {first_densepose_img_np.shape} at {y0}:{y1}, {x0}:{x1}"
            # )
            # paste denspose_np back into first_densepose_img_np using bbox
            first_densepose_img_np[y0:y1, x0:x1] = densepose_np
            densepose_np = first_densepose_img_np
    else:
        densepose_np = _generate_densepose_image(
            img, detect_resolution, upscale_method, cmap, adapt_viridis_bg=False
        )
    if cmap == "viridis":
        densepose_np[:, :, 0][densepose_np[:, :, 0] == 0] = 68
        densepose_np[:, :, 1][densepose_np[:, :, 1] == 0] = 1
        densepose_np[:, :, 2][densepose_np[:, :, 2] == 0] = 84
    return densepose_np
 def _generate_densepose_image(
    img: torch.Tensor,
    detect_resolution=512,
    upscale_method="bicubic",
    cmap="viridis",
    adapt_viridis_bg=True,
 ) -> np.ndarray:
    assert_tensor_float_11_bchw(img)
    input_h, input_w = img.shape[-2:]
    # print(f"input_h: {input_h}, input_w: {input_w}")
    img, remove_pad = resize_image_with_pad_torch(
        img, detect_resolution, upscale_method
    )
    img = ((img + 1.0) * 127.5).to(torch.uint8)
    assert_tensor_uint8_255_bchw(img)
    H, W = img.shape[-2:]
    # print(f"reduced input img size (with padding): h{H}xw{W}")
    hint_image_canvas = np.zeros([H, W], dtype=np.uint8)
    hint_image_canvas = np.tile(hint_image_canvas[:, :, np.newaxis], [1, 1, 3])
    densepose_model = get_densepose_model()
    pred_boxes, coarse_seg, fine_segm, u, v = densepose_model(img.squeeze(0))
    densepose_results = list(
        map(
            densepose_chart_predictor_output_to_result,
            pred_boxes,
            coarse_seg,
            fine_segm,
            u,
            v,
        )
    )
    cmaps = {
        "viridis": cv2.COLORMAP_VIRIDIS,
        "parula": cv2.COLORMAP_PARULA,
        "jet": cv2.COLORMAP_JET,
    }
    cv2_cmap = cmaps.get(cmap, cv2.COLORMAP_PARULA)
    result_visualizer = get_segment_result_visualizer()
    result_visualizer.mask_visualizer.cmap = cv2_cmap
    hint_image = result_visualizer.visualize(hint_image_canvas, densepose_results)
    hint_image = cv2.cvtColor(hint_image, cv2.COLOR_BGR2RGB)
    if cv2_cmap == cv2.COLORMAP_VIRIDIS and adapt_viridis_bg:
        hint_image[:, :, 0][hint_image[:, :, 0] == 0] = 68
        hint_image[:, :, 1][hint_image[:, :, 1] == 0] = 1
        hint_image[:, :, 2][hint_image[:, :, 2] == 0] = 84
    # print(f"hint_image shape: {hint_image.shape}")
    detected_map = remove_pad(HWC3(hint_image))
    # print(f"detected_map shape (padding removed): {detected_map.shape}")
    # print(f"Resizing detected_map to original size: {input_w}x{input_h}")
    # if map is smaller than input size, scale it up
    if detected_map.shape[0] < input_h or detected_map.shape[1] < input_w:
        detected_map = cv2.resize(
            detected_map, (input_w, input_h), interpolation=cv2.INTER_NEAREST
        )
    else:
        # scale it down
        detected_map = cv2.resize(
            detected_map, (input_w, input_h), interpolation=cv2.INTER_AREA
        )
    # print(f"detected_map shape (resized to original): {detected_map.shape}")
    return detected_map
 def assert_ndarray_uint8_255_hwc(img):
    # assert input_image is ndarray with colors 0-255
    assert img.dtype == np.uint8
    assert img.ndim == 3
    assert img.shape[2] == 3
    assert img.max() <= 255
    assert img.min() >= 0
 def assert_tensor_uint8_255_bchw(img):
    # assert input_image is a PyTorch tensor with colors 0-255 and dimensions (C, H, W)
    assert isinstance(img, torch.Tensor)
    assert img.dtype == torch.uint8
    assert img.ndim == 4
    assert img.shape[1] == 3
    assert img.max() <= 255
    assert img.min() >= 0
 def assert_tensor_float_11_bchw(img):
    # assert input_image is a PyTorch tensor with colors -1 to 1 and dimensions (C, H, W)
    if not isinstance(img, torch.Tensor):
        msg = f"Input image must be a PyTorch tensor, but got {type(img)}"
        raise TypeError(msg)
    if img.dtype not in (torch.float32, torch.float64, torch.float16):
        msg = f"Input image must be a float tensor, but got {img.dtype}"
        raise ValueError(msg)
    if img.ndim != 4:
        msg = f"Input image must be 4D (B, C, H, W), but got {img.ndim}D"
        raise ValueError(msg)
    if img.shape[1] != 3:
        msg = f"Input image must have 3 channels, but got {img.shape[1]}"
        raise ValueError(msg)
    if img.max() > 1 or img.min() < -1:
        msg = f"Input image must have values in [-1, 1], but got {img.min()} .. {img.max()}"
        raise ValueError(msg)
 class BoxMode(IntEnum):
    """
    Enum of different ways to represent a box.
    """
    XYXY_ABS = 0
    """
    (x0, y0, x1, y1) in absolute floating points coordinates.
    The coordinates in range [0, width or height].
    """
    XYWH_ABS = 1
    """
    (x0, y0, w, h) in absolute floating points coordinates.
    """
    XYXY_REL = 2
    """
    Not yet supported!
    (x0, y0, x1, y1) in range [0, 1]. They are relative to the size of the image.
    """
    XYWH_REL = 3
    """
    Not yet supported!
    (x0, y0, w, h) in range [0, 1]. They are relative to the size of the image.
    """
    XYWHA_ABS = 4
    """
    (xc, yc, w, h, a) in absolute floating points coordinates.
    (xc, yc) is the center of the rotated box, and the angle a is in degrees ccw.
    """
    @staticmethod
    def convert(
        box: _RawBoxType, from_mode: "BoxMode", to_mode: "BoxMode"
    ) -> _RawBoxType:
        """
        Args:
            box: can be a k-tuple, k-list or an Nxk array/tensor, where k = 4 or 5
            from_mode, to_mode (BoxMode)
        Returns:
            The converted box of the same type.
        """
        if from_mode == to_mode:
            return box
        original_type = type(box)
        is_numpy = isinstance(box, np.ndarray)
        single_box = isinstance(box, (list, tuple))
        if single_box:
            assert len(box) == 4 or len(box) == 5, (
                "BoxMode.convert takes either a k-tuple/list or an Nxk array/tensor,"
                " where k == 4 or 5"
            )
            arr = torch.tensor(box)[None, :]
        else:
            # avoid modifying the input box
            arr = torch.from_numpy(np.asarray(box)).clone() if is_numpy else box.clone()  # type: ignore
        assert to_mode not in [
            BoxMode.XYXY_REL,
            BoxMode.XYWH_REL,
        ], "Relative mode not yet supported!"
        assert from_mode not in [
            BoxMode.XYXY_REL,
            BoxMode.XYWH_REL,
        ], "Relative mode not yet supported!"
        if from_mode == BoxMode.XYWHA_ABS and to_mode == BoxMode.XYXY_ABS:
            assert (
                arr.shape[-1] == 5
            ), "The last dimension of input shape must be 5 for XYWHA format"
            original_dtype = arr.dtype
            arr = arr.double()
            w = arr[:, 2]
            h = arr[:, 3]
            a = arr[:, 4]
            c = torch.abs(torch.cos(a * math.pi / 180.0))
            s = torch.abs(torch.sin(a * math.pi / 180.0))
            # This basically computes the horizontal bounding rectangle of the rotated box
            new_w = c * w + s * h
            new_h = c * h + s * w
            # convert center to top-left corner
            arr[:, 0] -= new_w / 2.0
            arr[:, 1] -= new_h / 2.0
            # bottom-right corner
            arr[:, 2] = arr[:, 0] + new_w
            arr[:, 3] = arr[:, 1] + new_h
            arr = arr[:, :4].to(dtype=original_dtype)
        elif from_mode == BoxMode.XYWH_ABS and to_mode == BoxMode.XYWHA_ABS:
            original_dtype = arr.dtype
            arr = arr.double()
            arr[:, 0] += arr[:, 2] / 2.0
            arr[:, 1] += arr[:, 3] / 2.0
            angles = torch.zeros((arr.shape[0], 1), dtype=arr.dtype)
            arr = torch.cat((arr, angles), axis=1).to(dtype=original_dtype)  # type: ignore
        else:
            if to_mode == BoxMode.XYXY_ABS and from_mode == BoxMode.XYWH_ABS:
                arr[:, 2] += arr[:, 0]
                arr[:, 3] += arr[:, 1]
            elif from_mode == BoxMode.XYXY_ABS and to_mode == BoxMode.XYWH_ABS:
                arr[:, 2] -= arr[:, 0]
                arr[:, 3] -= arr[:, 1]
            else:
                msg = f"Conversion from BoxMode {from_mode} to {to_mode} is not supported yet"
                raise NotImplementedError(msg)
        if single_box:
            return original_type(arr.flatten().tolist())
        if is_numpy:
            return arr.numpy()
        else:
            return arr
 class MatrixVisualizer:
    def __init__(
        self,
        inplace=True,
        cmap=cv2.COLORMAP_PARULA,
        val_scale=1.0,
        alpha=0.7,
        interp_method_matrix=cv2.INTER_LINEAR,
        interp_method_mask=cv2.INTER_NEAREST,
    ):
        self.inplace = inplace
        self.cmap = cmap
        self.val_scale = val_scale
        self.alpha = alpha
        self.interp_method_matrix = interp_method_matrix
        self.interp_method_mask = interp_method_mask
    def visualize(self, image_bgr: np.ndarray, mask: np.ndarray, matrix, bbox_xywh):
        self._check_image(image_bgr)
        self._check_mask_matrix(mask, matrix)
        image_target_bgr = image_bgr if self.inplace else image_bgr * 0
        x, y, w, h = (int(v) for v in bbox_xywh)
        if w <= 0 or h <= 0:
            return image_bgr
        mask, matrix = self._resize(mask, matrix, w, h)
        mask_bg = np.tile((mask == 0)[:, :, np.newaxis], [1, 1, 3])
        matrix_scaled = matrix.astype(np.float32) * self.val_scale
        _EPSILON = 1e-6
        if np.any(matrix_scaled > 255 + _EPSILON):
            logger = logging.getLogger(__name__)
            logger.warning(
                f"Matrix has values > {255 + _EPSILON} after "
                f"scaling, clipping to [0..255]"
            )
        matrix_scaled_8u = matrix_scaled.clip(0, 255).astype(np.uint8)
        matrix_vis = cv2.applyColorMap(matrix_scaled_8u, self.cmap)
        matrix_vis[mask_bg] = image_target_bgr[y : y + h, x : x + w, :][mask_bg]
        image_target_bgr[y : y + h, x : x + w, :] = (
            image_target_bgr[y : y + h, x : x + w, :] * (1.0 - self.alpha)
            + matrix_vis * self.alpha
        )
        return image_target_bgr.astype(np.uint8)
    def _resize(self, mask, matrix, w, h):
        if (w != mask.shape[1]) or (h != mask.shape[0]):
            mask = cv2.resize(mask, (w, h), self.interp_method_mask)
        if (w != matrix.shape[1]) or (h != matrix.shape[0]):
            matrix = cv2.resize(matrix, (w, h), self.interp_method_matrix)
        return mask, matrix
    def _check_image(self, image_rgb):
        assert len(image_rgb.shape) == 3
        assert image_rgb.shape[2] == 3
        assert image_rgb.dtype == np.uint8
    def _check_mask_matrix(self, mask, matrix):
        assert len(matrix.shape) == 2
        assert len(mask.shape) == 2
        assert mask.dtype == np.uint8
 class DensePoseMaskedColormapResultsVisualizer:
    def __init__(
        self,
        data_extractor,
        segm_extractor,
        inplace=True,
        cmap=cv2.COLORMAP_PARULA,
        alpha=0.7,
        val_scale=1.0,
    ):
        self.mask_visualizer = MatrixVisualizer(
            inplace=inplace, cmap=cmap, val_scale=val_scale, alpha=alpha
        )
        self.data_extractor = data_extractor
        self.segm_extractor = segm_extractor
    def visualize(
        self,
        image_bgr: np.ndarray,
        results,
    ) -> np.ndarray:
        for result in results:
            boxes_xywh, labels, uv = result
            iuv_array = torch.cat((labels[None].type(torch.float32), uv * 255.0)).type(
                torch.uint8
            )
            self.visualize_iuv_arr(image_bgr, iuv_array.cpu().numpy(), boxes_xywh)
        return image_bgr
    def visualize_iuv_arr(self, image_bgr, iuv_arr: np.ndarray, bbox_xywh) -> None:
        matrix = self.data_extractor(iuv_arr)
        segm = self.segm_extractor(iuv_arr)
        mask = (segm > 0).astype(np.uint8)
        self.mask_visualizer.visualize(image_bgr, mask, matrix, bbox_xywh)
 def _extract_i_from_iuvarr(iuv_arr):
    return iuv_arr[0, :, :]
 def _extract_u_from_iuvarr(iuv_arr):
    return iuv_arr[1, :, :]
 def _extract_v_from_iuvarr(iuv_arr):
    return iuv_arr[2, :, :]
 def make_int_box(box: torch.Tensor) -> IntTupleBox:
    int_box = [0, 0, 0, 0]
    int_box[0], int_box[1], int_box[2], int_box[3] = tuple(box.long().tolist())
    return int_box[0], int_box[1], int_box[2], int_box[3]
 def densepose_chart_predictor_output_to_result(
    boxes: torch.Tensor, coarse_segm: torch.Tensor, fine_segm, u, v
 ):
    boxes = boxes.unsqueeze(0)
    coarse_segm = coarse_segm.unsqueeze(0)
    fine_segm = fine_segm.unsqueeze(0)
    u = u.unsqueeze(0)
    v = v.unsqueeze(0)
    boxes_xyxy_abs = boxes.clone()
    boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
    box_xywh = make_int_box(boxes_xywh_abs[0])  # type: ignore
    labels = resample_fine_and_coarse_segm_tensors_to_bbox(
        fine_segm, coarse_segm, box_xywh
    ).squeeze(0)
    uv = resample_uv_tensors_to_bbox(u, v, labels, box_xywh)
    return box_xywh, labels, uv
 def resample_fine_and_coarse_segm_tensors_to_bbox(
    fine_segm: torch.Tensor, coarse_segm: torch.Tensor, box_xywh_abs: IntTupleBox
 ):
    """
    Resample fine and coarse segmentation tensors to the given
    bounding box and derive labels for each pixel of the bounding box
    Args:
        fine_segm: float tensor of shape [1, C, Hout, Wout]
        coarse_segm: float tensor of shape [1, K, Hout, Wout]
        box_xywh_abs (tuple of 4 int): bounding box given by its upper-left
            corner coordinates, width (W) and height (H)
    Return:
        Labels for each pixel of the bounding box, a long tensor of size [1, H, W]
    """
    x, y, w, h = box_xywh_abs
    w = max(int(w), 1)
    h = max(int(h), 1)
    # coarse segmentation
    coarse_segm_bbox = F.interpolate(
        coarse_segm,
        (h, w),
        mode="bilinear",
        align_corners=False,
    ).argmax(dim=1)
    # combined coarse and fine segmentation
    labels = (
        F.interpolate(fine_segm, (h, w), mode="bilinear", align_corners=False).argmax(
            dim=1
        )
        * (coarse_segm_bbox > 0).long()
    )
    return labels
 def resample_uv_tensors_to_bbox(
    u: torch.Tensor,
    v: torch.Tensor,
    labels: torch.Tensor,
    box_xywh_abs: IntTupleBox,
 ) -> torch.Tensor:
    """
    Resamples U and V coordinate estimates for the given bounding box
    Args:
        u (tensor [1, C, H, W] of float): U coordinates
        v (tensor [1, C, H, W] of float): V coordinates
        labels (tensor [H, W] of long): labels obtained by resampling segmentation
            outputs for the given bounding box
        box_xywh_abs (tuple of 4 int): bounding box that corresponds to predictor outputs
    Return:
       Resampled U and V coordinates - a tensor [2, H, W] of float
    """
    x, y, w, h = box_xywh_abs
    w = max(int(w), 1)
    h = max(int(h), 1)
    u_bbox = F.interpolate(u, (h, w), mode="bilinear", align_corners=False)
    v_bbox = F.interpolate(v, (h, w), mode="bilinear", align_corners=False)
    uv = torch.zeros([2, h, w], dtype=torch.float32, device=u.device)
    for part_id in range(1, u_bbox.size(1)):
        uv[0][labels == part_id] = u_bbox[0, part_id][labels == part_id]
        uv[1][labels == part_id] = v_bbox[0, part_id][labels == part_id]
    return uv
--- a/imaginairy/utils/downloads.py
+++ b/imaginairy/utils/downloads.py
@ -91,7 +91,9 @@ def huggingface_cached_path(url: str) -> str:
    dest_path = try_to_load_from_cache(
        repo_id=repo, revision=commit_hash, filename=filepath
    )
-    if not dest_path:
+    from huggingface_hub.file_download import _CACHED_NO_EXIST
    if not dest_path or dest_path == _CACHED_NO_EXIST:
        check_huggingface_url_authorized(url)
        token = HfFolder.get_token()
        logger.info(f"Downloading {url} from huggingface")
--- a/tests/data/cuda-tests.csv
+++ b/tests/data/cuda-tests.csv
@ -1,3 +1,4 @@
 tests/img_processors/test_control_modes.py::test_control_images[densepose-create_densepose_map]
 tests/img_processors/test_control_modes.py::test_control_images[depth-create_depth_map]
 tests/img_processors/test_control_modes.py::test_control_images[hed-create_hed_edges]
 tests/img_processors/test_control_modes.py::test_control_images[normal-create_normal_map]
--- a/tests/expected_output/test_control_images[densepose-create_densepose_map]_.png
+++ b/tests/expected_output/test_control_images[densepose-create_densepose_map]_.png
--- a/tests/test_http_app/test_routes.py
+++ b/tests/test_http_app/test_routes.py
@ -57,12 +57,12 @@ async def test_list_models():
    assert response.status_code == 200
    expected_model_ids = {
        "miniaturuspotentia",
        "sd15",
        "openjourney-v1",
        "openjourney-v2",
        "openjourney-v4",
        "modern-disney",
        "redshift-diffusion",
        "sdxl",
        "opendalle11",
    }