feature: inpaint image preparation methods. better organization

1 month ago · 86aed3520a
parent 3c1c695f76
commit 86aed3520a
20 changed files with 629 additions and 171 deletions
--- a/imaginairy/api/generate_compvis.py
+++ b/imaginairy/api/generate_compvis.py
@ -41,13 +41,15 @@ def _generate_single_image(
        MaskMode,
    )
    from imaginairy.utils import get_device, randn_seeded
-    from imaginairy.utils.img_utils import (
-        add_caption_to_image,
-        pillow_fit_image_within,
+    from imaginairy.utils.img_convert import (
        pillow_img_to_torch_image,
        pillow_mask_to_latent_mask,
        torch_img_to_pillow_img,
    )
+    from imaginairy.utils.img_utils import (
+        add_caption_to_image,
+        pillow_fit_image_within,
+    )
    from imaginairy.utils.log_utils import (
        ImageLoggingContext,
        log_conditioning,
--- a/imaginairy/api/generate_refiners.py
+++ b/imaginairy/api/generate_refiners.py
@ -39,12 +39,14 @@ def generate_single_image(
    from imaginairy.samplers import SolverName
    from imaginairy.schema import ImagineResult
    from imaginairy.utils import get_device, randn_seeded
+    from imaginairy.utils.img_convert import (
+        pillow_img_to_torch_image,
+        pillow_mask_to_latent_mask,
+    )
    from imaginairy.utils.img_utils import (
        add_caption_to_image,
        combine_image,
        pillow_fit_image_within,
-        pillow_img_to_torch_image,
-        pillow_mask_to_latent_mask,
    )
    from imaginairy.utils.log_utils import (
        ImageLoggingContext,
@ -470,9 +472,9 @@ def prep_control_input(
    from PIL import ImageOps

    from imaginairy.utils import get_device
+    from imaginairy.utils.img_convert import pillow_img_to_torch_image
    from imaginairy.utils.img_utils import (
        pillow_fit_image_within,
-        pillow_img_to_torch_image,
    )
    from imaginairy.utils.log_utils import (
        log_img,
--- a/imaginairy/enhancers/blur_detect.py
+++ b/imaginairy/enhancers/blur_detect.py
@ -2,7 +2,7 @@

 import cv2

-from imaginairy.utils.img_utils import pillow_img_to_opencv_img
+from imaginairy.utils.img_convert import pillow_img_to_opencv_img


 def calculate_blurriness_level(img):
--- a/imaginairy/img_processors/densepose.py
+++ b/imaginairy/img_processors/densepose.py
@ -12,6 +12,10 @@ from torch.nn import functional as F

 from imaginairy import config
 from imaginairy.utils.downloads import get_cached_url_path
+from imaginairy.utils.img_convert import (
+    assert_tensor_float_11_bchw,
+    assert_tensor_uint8_255_bchw,
+)

 logger = logging.getLogger(__name__)

@ -240,6 +244,7 @@ def _generate_densepose_image(
 ) -> np.ndarray:
    assert_tensor_float_11_bchw(img)
    input_h, input_w = img.shape[-2:]
+    img = img.to("cpu")
    # print(f"input_h: {input_h}, input_w: {input_w}")
    img, remove_pad = resize_image_with_pad_torch(
        img, detect_resolution, upscale_method
@ -295,47 +300,6 @@ def _generate_densepose_image(
    return detected_map


-def assert_ndarray_uint8_255_hwc(img):
-    # assert input_image is ndarray with colors 0-255
-    assert img.dtype == np.uint8
-    assert img.ndim == 3
-    assert img.shape[2] == 3
-    assert img.max() <= 255
-    assert img.min() >= 0
-
-
-def assert_tensor_uint8_255_bchw(img):
-    # assert input_image is a PyTorch tensor with colors 0-255 and dimensions (C, H, W)
-    assert isinstance(img, torch.Tensor)
-    assert img.dtype == torch.uint8
-    assert img.ndim == 4
-    assert img.shape[1] == 3
-    assert img.max() <= 255
-    assert img.min() >= 0
-
-
-def assert_tensor_float_11_bchw(img):
-    # assert input_image is a PyTorch tensor with colors -1 to 1 and dimensions (C, H, W)
-    if not isinstance(img, torch.Tensor):
-        msg = f"Input image must be a PyTorch tensor, but got {type(img)}"
-        raise TypeError(msg)
-
-    if img.dtype not in (torch.float32, torch.float64, torch.float16):
-        msg = f"Input image must be a float tensor, but got {img.dtype}"
-        raise ValueError(msg)
-
-    if img.ndim != 4:
-        msg = f"Input image must be 4D (B, C, H, W), but got {img.ndim}D"
-        raise ValueError(msg)
-
-    if img.shape[1] != 3:
-        msg = f"Input image must have 3 channels, but got {img.shape[1]}"
-        raise ValueError(msg)
-    if img.max() > 1 or img.min() < -1:
-        msg = f"Input image must have values in [-1, 1], but got {img.min()} .. {img.max()}"
-        raise ValueError(msg)
-
-
 class BoxMode(IntEnum):
    """
    Enum of different ways to represent a box.
--- a/imaginairy/img_processors/openpose.py
+++ b/imaginairy/img_processors/openpose.py
@ -12,7 +12,7 @@ from torch import nn

 from imaginairy.utils import get_device
 from imaginairy.utils.downloads import get_cached_url_path
-from imaginairy.utils.img_utils import torch_image_to_openvcv_img
+from imaginairy.utils.img_convert import torch_image_to_openvcv_img


 def pad_right_down_corner(img, stride, padValue):
--- a/imaginairy/schema.py
+++ b/imaginairy/schema.py
@ -834,7 +834,7 @@ class ImagineResult:
        import torch

        from imaginairy.utils import get_device, get_hardware_description
-        from imaginairy.utils.img_utils import (
+        from imaginairy.utils.img_convert import (
            model_latent_to_pillow_img,
            torch_img_to_pillow_img,
        )
--- a/imaginairy/utils/animations.py
+++ b/imaginairy/utils/animations.py
@ -8,11 +8,13 @@ import cv2
 import torch

 from imaginairy.utils import shrink_list
+from imaginairy.utils.img_convert import (
+    model_latents_to_pillow_imgs,
+    pillow_img_to_opencv_img,
+)
 from imaginairy.utils.img_utils import (
    add_caption_to_image,
    imgpaths_to_imgs,
-    model_latents_to_pillow_imgs,
-    pillow_img_to_opencv_img,
 )

 if TYPE_CHECKING:
--- a/imaginairy/utils/img_convert.py
+++ b/imaginairy/utils/img_convert.py
@ -0,0 +1,202 @@
+"""
+Library format cheat sheet:
+
+Library     Dim Order       Channel Order       Value Range     Type
+Pillow                      R, G, B, A          0-255           PIL.Image.Image
+OpenCV                      B, G, R, A          0-255           np.ndarray
+Torch       (B), C, H, W    R, G, B             -1.0-1.0        torch.Tensor
+"""
+
+from typing import Sequence
+
+import numpy as np
+import PIL
+import torch
+from einops import rearrange, repeat
+from PIL import Image
+from torch import Tensor
+
+from imaginairy.schema import LazyLoadingImage
+from imaginairy.utils import get_device
+
+
+def assert_bc3hw(t: Tensor):
+    assert isinstance(t, torch.Tensor)
+    assert t.ndim == 4
+    assert t.shape[1] == 3
+
+
+def assert_b1c3hw(t: Tensor):
+    if not isinstance(t, torch.Tensor):
+        raise TypeError("Expected a torch.Tensor")
+    if t.ndim != 4:
+        msg = f"Expected 4 dimensions (Batch, Channel, Height, Width), got {t.ndim}"
+        raise ValueError(msg)
+    if t.shape[1] != 3:
+        msg = f"Expected 3 channels, got {t.shape[1]}"
+        raise ValueError(msg)
+
+
+def assert_torch_mask(t: Tensor):
+    if not isinstance(t, torch.Tensor):
+        raise TypeError("Expected a torch.Tensor")
+    if t.ndim != 4:
+        msg = f"Expected 4 dimensions (Batch, Channel, Height, Width), got {t.ndim}"
+        raise ValueError(msg)
+    if t.shape[1] != 1:
+        msg = f"Expected 1 channels, got {t.shape[1]}"
+        raise ValueError(msg)
+
+
+def pillow_img_to_torch_image(
+    img: PIL.Image.Image | LazyLoadingImage, convert="RGB"
+) -> torch.Tensor:
+    if convert:
+        img = img.convert(convert)
+    img_np = np.array(img).astype(np.float32) / 255.0
+
+    if len(img_np.shape) == 2:
+        # add channel at end if missing
+        img_np = img_np[:, :, None]
+    # b, h, w, c => b, c, h, w
+    img_np = img_np[None].transpose(0, 3, 1, 2)
+    img_t = torch.from_numpy(img_np)
+    return 2.0 * img_t - 1.0
+
+
+def pillow_mask_255_to_torch_mask(
+    mask: PIL.Image.Image | LazyLoadingImage,
+) -> torch.Tensor:
+    mask_np = np.array(mask).astype(np.float32) / 255.0
+    mask_np = mask_np[None, None]
+    mask_t = torch.from_numpy(mask_np)
+    return mask_t
+
+
+def pillow_mask_to_latent_mask(
+    mask_img: PIL.Image.Image | LazyLoadingImage, downsampling_factor
+) -> torch.Tensor:
+    mask_img = mask_img.resize(
+        (
+            mask_img.width // downsampling_factor,
+            mask_img.height // downsampling_factor,
+        ),
+        resample=Image.Resampling.LANCZOS,
+    )
+
+    mask = np.array(mask_img).astype(np.float32) / 255.0
+    mask = mask[None, None]
+    mask_t = torch.from_numpy(mask)
+    return mask_t
+
+
+def pillow_img_to_opencv_img(img: PIL.Image.Image | LazyLoadingImage):
+    open_cv_image = np.array(img)
+    # Convert RGB to BGR
+    open_cv_image = open_cv_image[:, :, ::-1].copy()
+    return open_cv_image
+
+
+def torch_image_to_openvcv_img(img: torch.Tensor) -> np.ndarray:
+    img = (img + 1) / 2
+    img_np = img.detach().cpu().numpy()
+    # assert there is only one image
+    assert img_np.shape[0] == 1
+    img_np = img_np[0]
+    img_np = img_np.transpose(1, 2, 0)
+    img_np = (img_np * 255).astype(np.uint8)
+    # RGB to BGR
+    img_np = img_np[:, :, ::-1]
+    return img_np
+
+
+def torch_img_to_pillow_img(img_t: torch.Tensor) -> PIL.Image.Image:
+    img_t = img_t.to(torch.float32).detach().cpu()
+    if len(img_t.shape) == 3:
+        img_t = img_t.unsqueeze(0)
+    if img_t.shape[0] != 1:
+        raise ValueError("Only batch size 1 supported")
+    if img_t.shape[1] == 1:
+        colorspace = "L"
+    elif img_t.shape[1] == 3:
+        colorspace = "RGB"
+    else:
+        msg = (
+            f"Unsupported colorspace. {img_t.shape[1]} channels in {img_t.shape} shape"
+        )
+        raise ValueError(msg)
+    img_t = rearrange(img_t, "b c h w -> b h w c")
+    img_t = torch.clamp((img_t + 1.0) / 2.0, min=0.0, max=1.0)
+    img_np = (255.0 * img_t).cpu().numpy().astype(np.uint8)[0]
+    if colorspace == "L":
+        img_np = img_np[:, :, 0]
+    return Image.fromarray(img_np, colorspace)
+
+
+def model_latent_to_pillow_img(latent: torch.Tensor) -> PIL.Image.Image:
+    from imaginairy.utils.model_manager import get_current_diffusion_model
+
+    if len(latent.shape) == 3:
+        latent = latent.unsqueeze(0)
+    if latent.shape[0] != 1:
+        raise ValueError("Only batch size 1 supported")
+    model = get_current_diffusion_model()
+    img_t = model.lda.decode(latent)
+    return torch_img_to_pillow_img(img_t)
+
+
+def model_latents_to_pillow_imgs(latents: torch.Tensor) -> Sequence[PIL.Image.Image]:
+    return [model_latent_to_pillow_img(latent) for latent in latents]
+
+
+def pillow_img_to_model_latent(
+    model, img: PIL.Image.Image | LazyLoadingImage, batch_size=1, half=True
+):
+    init_image = pillow_img_to_torch_image(img).to(get_device())
+    init_image = repeat(init_image, "1 ... -> b ...", b=batch_size)
+    if half:
+        return model.get_first_stage_encoding(
+            model.encode_first_stage(init_image.half())
+        )
+    return model.get_first_stage_encoding(model.encode_first_stage(init_image))
+
+
+def assert_ndarray_uint8_255_hwc(img):
+    # assert input_image is ndarray with colors 0-255
+    assert img.dtype == np.uint8
+    assert img.ndim == 3
+    assert img.shape[2] == 3
+    assert img.max() <= 255
+    assert img.min() >= 0
+
+
+def assert_tensor_uint8_255_bchw(img):
+    # assert input_image is a PyTorch tensor with colors 0-255 and dimensions (C, H, W)
+    assert isinstance(img, torch.Tensor)
+    assert img.dtype == torch.uint8
+    assert img.ndim == 4
+    assert img.shape[1] == 3
+    assert img.max() <= 255
+    assert img.min() >= 0
+
+
+def assert_tensor_float_11_bchw(img):
+    # assert input_image is a PyTorch tensor with colors -1 to 1 and dimensions (C, H, W)
+    if not isinstance(img, torch.Tensor):
+        msg = f"Input image must be a PyTorch tensor, but got {type(img)}"
+        raise TypeError(msg)
+
+    if img.dtype not in (torch.float32, torch.float64, torch.float16):
+        msg = f"Input image must be a float tensor, but got {img.dtype}"
+        raise ValueError(msg)
+
+    if img.ndim != 4:
+        msg = f"Input image must be 4D (B, C, H, W), but got {img.ndim}D"
+        raise ValueError(msg)
+
+    if img.shape[1] != 3:
+        msg = f"Input image must have 3 channels, but got {img.shape[1]}"
+        raise ValueError(msg)
+    if img.max() > 1 or img.min() < -1:
+        msg = f"Input image must have values in [-1, 1], but got {img.min()} .. {img.max()}"
+        raise ValueError(msg)
--- a/imaginairy/utils/img_utils.py
+++ b/imaginairy/utils/img_utils.py
@ -1,25 +1,17 @@
 """
 image utils.
-
-Library format cheat sheet:
-
-Library     Dim Order       Channel Order       Value Range     Type
-Pillow                      R, G, B, A          0-255           PIL.Image.Image
-OpenCV                      B, G, R, A          0-255           np.ndarray
-Torch       (B), C, H, W    R, G, B             -1.0-1.0        torch.Tensor
-
 """

-from typing import Sequence
-
-import numpy as np
 import PIL
 import torch
-from einops import rearrange, repeat
 from PIL import Image, ImageDraw, ImageFont
+from torch import Tensor
+from torch.nn import functional as F

 from imaginairy.schema import LazyLoadingImage
-from imaginairy.utils import get_device
+from imaginairy.utils import img_convert
+from imaginairy.utils.mask_helpers import binary_erosion
+from imaginairy.utils.mathy import make_odd
 from imaginairy.utils.named_resolutions import normalize_image_size
 from imaginairy.utils.paths import PKG_ROOT

@ -51,106 +43,6 @@ def pillow_fit_image_within(
    return image


-def pillow_img_to_torch_image(
-    img: PIL.Image.Image | LazyLoadingImage, convert="RGB"
-) -> torch.Tensor:
-    if convert:
-        img = img.convert(convert)
-    img_np = np.array(img).astype(np.float32) / 255.0
-    # b, h, w, c => b, c, h, w
-    img_np = img_np[None].transpose(0, 3, 1, 2)
-    img_t = torch.from_numpy(img_np)
-    return 2.0 * img_t - 1.0
-
-
-def pillow_mask_to_latent_mask(
-    mask_img: PIL.Image.Image | LazyLoadingImage, downsampling_factor
-) -> torch.Tensor:
-    mask_img = mask_img.resize(
-        (
-            mask_img.width // downsampling_factor,
-            mask_img.height // downsampling_factor,
-        ),
-        resample=Image.Resampling.LANCZOS,
-    )
-
-    mask = np.array(mask_img).astype(np.float32) / 255.0
-    mask = mask[None, None]
-    mask_t = torch.from_numpy(mask)
-    return mask_t
-
-
-def pillow_img_to_opencv_img(img: PIL.Image.Image | LazyLoadingImage):
-    open_cv_image = np.array(img)
-    # Convert RGB to BGR
-    open_cv_image = open_cv_image[:, :, ::-1].copy()
-    return open_cv_image
-
-
-def torch_image_to_openvcv_img(img: torch.Tensor) -> np.ndarray:
-    img = (img + 1) / 2
-    img_np = img.detach().cpu().numpy()
-    # assert there is only one image
-    assert img_np.shape[0] == 1
-    img_np = img_np[0]
-    img_np = img_np.transpose(1, 2, 0)
-    img_np = (img_np * 255).astype(np.uint8)
-    # RGB to BGR
-    img_np = img_np[:, :, ::-1]
-    return img_np
-
-
-def torch_img_to_pillow_img(img_t: torch.Tensor) -> PIL.Image.Image:
-    img_t = img_t.to(torch.float32).detach().cpu()
-    if len(img_t.shape) == 3:
-        img_t = img_t.unsqueeze(0)
-    if img_t.shape[0] != 1:
-        raise ValueError("Only batch size 1 supported")
-    if img_t.shape[1] == 1:
-        colorspace = "L"
-    elif img_t.shape[1] == 3:
-        colorspace = "RGB"
-    else:
-        msg = (
-            f"Unsupported colorspace. {img_t.shape[1]} channels in {img_t.shape} shape"
-        )
-        raise ValueError(msg)
-    img_t = rearrange(img_t, "b c h w -> b h w c")
-    img_t = torch.clamp((img_t + 1.0) / 2.0, min=0.0, max=1.0)
-    img_np = (255.0 * img_t).cpu().numpy().astype(np.uint8)[0]
-    if colorspace == "L":
-        img_np = img_np[:, :, 0]
-    return Image.fromarray(img_np, colorspace)
-
-
-def model_latent_to_pillow_img(latent: torch.Tensor) -> PIL.Image.Image:
-    from imaginairy.utils.model_manager import get_current_diffusion_model
-
-    if len(latent.shape) == 3:
-        latent = latent.unsqueeze(0)
-    if latent.shape[0] != 1:
-        raise ValueError("Only batch size 1 supported")
-    model = get_current_diffusion_model()
-    img_t = model.lda.decode(latent)
-    return torch_img_to_pillow_img(img_t)
-
-
-def model_latents_to_pillow_imgs(latents: torch.Tensor) -> Sequence[PIL.Image.Image]:
-    return [model_latent_to_pillow_img(latent) for latent in latents]
-
-
-def pillow_img_to_model_latent(
-    model, img: PIL.Image.Image | LazyLoadingImage, batch_size=1, half=True
-):
-    init_image = pillow_img_to_torch_image(img).to(get_device())
-    init_image = repeat(init_image, "1 ... -> b ...", b=batch_size)
-    if half:
-        return model.get_first_stage_encoding(
-            model.encode_first_stage(init_image.half())
-        )
-    return model.get_first_stage_encoding(model.encode_first_stage(init_image))
-
-
 def imgpaths_to_imgs(imgpaths):
    imgs = []
    for imgpath in imgpaths:
@ -250,6 +142,28 @@ def combine_image(original_img, generated_img, mask_img):
    return rebuilt_orig_img


+def combine_img_torch(
+    target_img: torch.Tensor,
+    source_img: torch.Tensor,
+    mask_img: torch.Tensor,
+) -> torch.Tensor:
+    """Combine the source image with the target image using the mask image."""
+    img_convert.assert_b1c3hw(target_img)
+    img_convert.assert_b1c3hw(source_img)
+    img_convert.assert_torch_mask(mask_img)
+
+    # assert mask and img are the same size
+    if mask_img.shape[-2:] != source_img.shape[-2:]:
+        msg = "Mask and image must have the same height and width."
+        raise ValueError(msg)
+
+    # Using the mask, combine the images
+    combined_img = target_img * (1 - mask_img) + source_img * mask_img
+
+    img_convert.assert_b1c3hw(combined_img)
+    return combined_img
+
+
 def calc_scale_to_fit_within(height: int, width: int, max_size) -> float:
    max_width, max_height = normalize_image_size(max_size)
    if width <= max_width and height <= max_height:
@ -282,3 +196,40 @@ def aspect_ratio(width, height):
    y = height // divisor

    return f"{x}:{y}"
+
+
+def blur_fill(image: torch.Tensor, mask: torch.Tensor, blur: int, falloff: int):
+    blur = make_odd(blur)
+    falloff = min(make_odd(falloff), blur - 2)
+
+    original = image.clone()
+    alpha = mask.floor()
+    if falloff > 0:
+        erosion = binary_erosion(alpha, falloff)
+        alpha = alpha * gaussian_blur(erosion, falloff)
+    alpha = alpha.repeat(1, 3, 1, 1)
+
+    image = gaussian_blur(image, blur)
+    image = original + (image - original) * alpha
+    return image
+
+
+def gaussian_blur(image: Tensor, radius: int, sigma: float = 0):
+    c = image.shape[-3]
+    if sigma <= 0:
+        sigma = 0.3 * (radius - 1) + 0.8
+
+    kernel = _gaussian_kernel(radius, sigma).to(image.device)
+    kernel_x = kernel[..., None, :].repeat(c, 1, 1).unsqueeze(1)
+    kernel_y = kernel[..., None].repeat(c, 1, 1).unsqueeze(1)
+
+    image = F.pad(image, (radius, radius, radius, radius), mode="reflect")
+    image = F.conv2d(image, kernel_x, groups=c)
+    image = F.conv2d(image, kernel_y, groups=c)
+    return image
+
+
+def _gaussian_kernel(radius: int, sigma: float):
+    x = torch.linspace(-radius, radius, steps=radius * 2 + 1)
+    pdf = torch.exp(-0.5 * (x / sigma).pow(2))
+    return pdf / pdf.sum()
--- a/imaginairy/utils/log_utils.py
+++ b/imaginairy/utils/log_utils.py
@ -331,7 +331,7 @@ class ImageLoggingContext:
        )

    def log_progress_latent(self, latent):
-        from imaginairy.utils.img_utils import model_latents_to_pillow_imgs
+        from imaginairy.utils.img_convert import model_latents_to_pillow_imgs

        if not self.progress_img_callback:
            return
--- a/imaginairy/utils/mask_helpers.py
+++ b/imaginairy/utils/mask_helpers.py
@ -0,0 +1,143 @@
+from typing import Union
+
+import numpy as np
+import torch
+from torch import Tensor
+from torch.nn import functional as F
+
+from imaginairy.utils import img_convert
+from imaginairy.utils.img_convert import assert_bc3hw
+from imaginairy.utils.mathy import make_odd
+
+
+def binary_erosion(mask: Tensor, radius: int):
+    kernel = torch.ones(1, 1, radius * 2 + 1, radius * 2 + 1, device=mask.device)
+    mask = F.pad(mask, (radius, radius, radius, radius), mode="constant", value=1)
+    mask = F.conv2d(mask, kernel, groups=1)
+    mask = (mask == kernel.numel()).to(mask.dtype)
+    return mask
+
+
+def highlight_masked_area(
+    img: Tensor,
+    mask: Tensor,
+    color: Union[tuple[int, int, int], None] = None,
+    highlight_strength: float = 0.5,
+) -> Tensor:
+    """
+    Highlights the masked area of an image tensor with a specified color.
+    """
+    from imaginairy.utils.img_utils import combine_img_torch
+
+    img_convert.assert_b1c3hw(img)
+    img_convert.assert_torch_mask(mask)
+
+    # Ensure mask is in the same device as image_tensor
+    mask = mask.to(img.device)
+    if color is None:
+        color = tuple(np.random.randint(0, 256, 3))
+    else:
+        if any(c > 255 or c < 0 for c in color):
+            raise ValueError("Color values must be in the range [0, 255].")
+    # Convert color to a tensor and normalize to [0, 1]
+    color_tensor = torch.tensor(color, device=img.device, dtype=img.dtype) / 255.0
+    solid_color = torch.ones_like(img)
+    for channel in range(3):
+        solid_color[:, channel, :, :] *= color_tensor[channel]
+
+    highlighted_image = combine_img_torch(img, solid_color, mask * highlight_strength)
+
+    return highlighted_image
+
+
+def fill_neutral(image: Tensor, mask: Tensor, falloff: int = 1) -> Tensor:
+    img_convert.assert_bc3hw(image)
+    img_convert.assert_torch_mask(mask)
+
+    mask = mask_falloff(mask, falloff)
+    filled_img = image.detach().clone()
+    m = (1.0 - mask).squeeze(0).squeeze(0)
+    for i in range(3):
+        filled_img[:, i, :, :] -= 0.5
+        filled_img[:, i, :, :] *= m
+        filled_img[:, i, :, :] += 0.5
+    img_convert.assert_bc3hw(filled_img)
+    return filled_img
+
+
+def fill_noise(image: Tensor, mask: Tensor, falloff: int = 1, seed=1) -> Tensor:
+    """
+    Fills a masked area in an image with random noise.
+    """
+    img_convert.assert_bc3hw(image)
+    img_convert.assert_torch_mask(mask)
+
+    mask = mask_falloff(mask, falloff)
+    filled_img = image.detach().clone()
+    noise = torch.rand_like(filled_img) * 2 - 1
+    filled_img = filled_img * (1 - mask) + noise * mask
+    img_convert.assert_bc3hw(filled_img)
+    return filled_img
+
+
+# def expand_mask(mask, expand, tapered_corners):
+#     c = 0 if tapered_corners else 1
+#     kernel = np.array([[c, 1, c], [1, 1, 1], [c, 1, c]])
+#     mask = mask.reshape((-1, mask.shape[-2], mask.shape[-1]))
+#     out = []
+#     for m in mask:
+#         output = m.numpy()
+#         for _ in range(abs(expand)):
+#             if expand < 0:
+#                 output = scipy.ndimage.grey_erosion(output, footprint=kernel)
+#             else:
+#                 output = scipy.ndimage.grey_dilation(output, footprint=kernel)
+#         output = torch.from_numpy(output)
+#         out.append(output)
+#     return (torch.stack(out, dim=0),)
+
+
+def mask_falloff(mask: Tensor, falloff: int) -> Tensor:
+    """
+    Applies a falloff effect to a binary mask tensor to create smooth transitions at its edges.
+    """
+    from imaginairy.utils.img_utils import gaussian_blur
+
+    alpha = mask.expand(1, *mask.shape[-3:]).floor()
+    if falloff > 0:
+        falloff = make_odd(falloff)
+        erosion = binary_erosion(alpha, falloff)
+        alpha = alpha * gaussian_blur(erosion, falloff)
+    return alpha
+
+
+def fill_navier_stokes(image: Tensor, mask: Tensor, falloff: int = 1) -> Tensor:
+    """
+    Fills a masked area in an image using Navier-Stokes inpainting.
+
+    https://docs.opencv.org/3.4/df/d3d/tutorial_py_inpainting.html
+    """
+    import cv2
+
+    assert_bc3hw(image)
+    alpha = mask_falloff(mask, falloff)
+    filled_img = image.detach().clone()
+
+    alpha_np = alpha.squeeze(0).squeeze(0).cpu().numpy()
+    alpha_bc = alpha_np.reshape(*alpha_np.shape)
+    filled_img = filled_img.squeeze(0)
+    for channel_slice in filled_img:
+        image_np = channel_slice.cpu().numpy()
+        filled_np = cv2.inpaint(
+            (255.0 * (image_np + 1) / 2).astype(np.uint8),
+            (255.0 * alpha_np).astype(np.uint8),
+            3,
+            cv2.INPAINT_NS,
+        )
+        filled_np = (filled_np.astype(np.float32) / 255.0) * 2 - 1
+        filled_np = image_np * (1.0 - alpha_bc) + filled_np * alpha_bc
+        channel_slice.copy_(torch.from_numpy(filled_np))
+
+    filled_img = filled_img.unsqueeze(0)
+    assert_bc3hw(filled_img)
+    return filled_img
--- a/imaginairy/utils/mathy.py
+++ b/imaginairy/utils/mathy.py
@ -0,0 +1,2 @@
+def make_odd(n):
+    return int(n + 1 - n % 2)
--- a/imaginairy/utils/outpaint.py
+++ b/imaginairy/utils/outpaint.py
@ -6,7 +6,7 @@ import torch
 from PIL import Image, ImageDraw
 from torch import nn

-from imaginairy.utils.img_utils import torch_img_to_pillow_img
+from imaginairy.utils.img_convert import torch_img_to_pillow_img


 def outpaint_calculations(
--- a/tests/data/cuda-tests.csv
+++ b/tests/data/cuda-tests.csv
@ -42,6 +42,7 @@ tests/test_api.py::test_tile_mode
 tests/test_api/test_generate.py::test_cliptext_inpainting_pearl_doctor
 tests/test_api/test_generate.py::test_controlnet[canny]
 tests/test_api/test_generate.py::test_controlnet[colorize]
+tests/test_api/test_generate.py::test_controlnet[densepose]
 tests/test_api/test_generate.py::test_controlnet[depth]
 tests/test_api/test_generate.py::test_controlnet[details]
 tests/test_api/test_generate.py::test_controlnet[edit]
@ -91,6 +92,7 @@ tests/test_enhancers/test_upscale_realesrgan.py::test_upscale_textured_image
 tests/test_modules/diffusion/test_model.py::test_nonlinearity
 tests/test_outpaint.py::test_outpainting_outpaint
 tests/test_safety.py::test_is_nsfw
+tests/test_utils/test_mask_helpers.py::test_inpaint_prep_dogbench
 tests/test_utils/test_model_cache.py::test_cache_ordering
 tests/test_utils/test_model_cache.py::test_get_existing_move_to_gpu
 tests/test_utils/test_model_cache.py::test_set_cpu_full
--- a/tests/data/dog-on-bench.png
+++ b/tests/data/dog-on-bench.png
--- a/tests/img_processors/test_control_modes.py
+++ b/tests/img_processors/test_control_modes.py
@ -6,7 +6,7 @@ from imaginairy.img_processors.control_modes import CONTROL_MODES, create_depth_
 from imaginairy.modules.midas.api import ISL_PATHS
 from imaginairy.schema import LazyLoadingImage
 from imaginairy.utils import seed_everything
-from imaginairy.utils.img_utils import (
+from imaginairy.utils.img_convert import (
    pillow_img_to_torch_image,
    torch_img_to_pillow_img,
 )
--- a/tests/test_feather_tile.py
+++ b/tests/test_feather_tile.py
@ -4,7 +4,7 @@ import pytest

 from imaginairy.schema import LazyLoadingImage
 from imaginairy.utils.feather_tile import rebuild_image, tile_image, tile_setup
-from imaginairy.utils.img_utils import (
+from imaginairy.utils.img_convert import (
    pillow_img_to_torch_image,
    torch_img_to_pillow_img,
 )
--- a/tests/test_modules/test_autoencoders.py
+++ b/tests/test_modules/test_autoencoders.py
@ -6,11 +6,13 @@ from torch.nn.functional import interpolate
 from imaginairy.enhancers.upscale_riverwing import upscale_latent
 from imaginairy.schema import LazyLoadingImage
 from imaginairy.utils import get_device
-from imaginairy.utils.img_utils import (
-    pillow_fit_image_within,
+from imaginairy.utils.img_convert import (
    pillow_img_to_torch_image,
    torch_img_to_pillow_img,
 )
+from imaginairy.utils.img_utils import (
+    pillow_fit_image_within,
+)
 from imaginairy.utils.model_manager import get_diffusion_model
 from tests import TESTS_FOLDER

--- a/tests/test_utils/test_mask_helpers.py
+++ b/tests/test_utils/test_mask_helpers.py
@ -0,0 +1,173 @@
+import torch
+from PIL import Image, ImageOps
+
+from imaginairy.api import imagine
+from imaginairy.enhancers.clip_masking import get_img_mask
+from imaginairy.schema import ImaginePrompt
+from imaginairy.utils import img_convert
+from imaginairy.utils.img_convert import (
+    pillow_img_to_torch_image,
+    pillow_mask_255_to_torch_mask,
+    torch_img_to_pillow_img,
+)
+from imaginairy.utils.img_utils import blur_fill, combine_image
+from imaginairy.utils.mask_helpers import (
+    fill_navier_stokes,
+    fill_neutral,
+    fill_noise,
+    highlight_masked_area,
+)
+from tests import TESTS_FOLDER
+
+
+def makemask(mask, offset=0.1, threshold=0.2):
+    B, C, H, W = mask.shape
+    if C == 3:
+        mask = mask.mean(dim=1, keepdim=True)
+
+    assert 0.0 <= offset < threshold <= 1.0, "Threshold must be higher than offset"
+    mask = (mask - offset) * (1 / (threshold - offset))
+    mask = mask.clamp(0, 1)
+    return mask
+
+
+def test_fill_neutral(filename_base_for_outputs):
+    img = Image.open(f"{TESTS_FOLDER}/data/bench2.png").convert("RGB")
+    mask = Image.open(f"{TESTS_FOLDER}/data/bench2_mask.png")
+
+    img_t = pillow_img_to_torch_image(img)
+    mask_t = pillow_img_to_torch_image(mask)
+    mask_t = makemask(mask_t)
+    for falloff in [0, 1, 3, 5, 17]:
+        filled_img_t = fill_neutral(img_t, mask_t, falloff=falloff)
+        filled_img = torch_img_to_pillow_img(filled_img_t)
+        img_path = f"{filename_base_for_outputs}_filled_neutral_falloff_{falloff}.png"
+        filled_img.save(img_path)
+        # assert_image_similar_to_expectation(filled_img, img_path=img_path, threshold=7000)
+
+
+def test_fill_navier_stokes(filename_base_for_outputs):
+    img = Image.open(f"{TESTS_FOLDER}/data/bench2.png").convert("RGB")
+    mask = Image.open(f"{TESTS_FOLDER}/data/bench2_mask.png")
+
+    img_t = pillow_img_to_torch_image(img)
+    mask_t = pillow_img_to_torch_image(mask)
+    mask_t = makemask(mask_t)
+    for falloff in [0, 1, 3, 5, 17]:
+        filled_img_t = fill_navier_stokes(img_t, mask_t, falloff=falloff)
+        filled_img = torch_img_to_pillow_img(filled_img_t)
+        img_path = f"{filename_base_for_outputs}_filled_neutral_falloff_{falloff}.png"
+        filled_img.save(img_path)
+
+
+def test_inpaint_prep_dogbench(filename_base_for_outputs):
+    save_count = 0
+
+    def save(i, name):
+        nonlocal save_count
+        if isinstance(i, torch.Tensor):
+            i = torch_img_to_pillow_img(i)
+        i.save(f"{filename_base_for_outputs}_{save_count:02d}_{name}.png")
+        save_count += 1
+
+    img = Image.open(f"{TESTS_FOLDER}/data/dog-on-bench.png").convert("RGB")
+    img_t = pillow_img_to_torch_image(img)
+    save(img, "original")
+
+    mask_img, mask_img_g = get_img_mask(img, "dog", threshold=0.5)
+    save(mask_img_g, "mask_g")
+    mask_img_g_t = pillow_mask_255_to_torch_mask(mask_img_g)
+    print(
+        f"mask_img_g value range: {mask_img_g_t.min().item()} - {mask_img_g_t.max().item()}"
+    )
+    mask_highlight_g_t = highlight_masked_area(
+        img_t, mask_img_g_t, color=(255, 0, 0), highlight_strength=1
+    )
+    save(mask_highlight_g_t, "highlighted-mask_g")
+
+    save(mask_img, "mask")
+
+    mask_t = pillow_mask_255_to_torch_mask(mask_img)
+    mask_t = makemask(mask_t)
+
+    mask_highlight_t = highlight_masked_area(
+        img_t,
+        mask_t,
+        #  color=(255, 0, 0)
+    )
+    save(mask_highlight_t, "highlighted-mask")
+
+    navier_filled_img_t = fill_navier_stokes(img_t, mask_t, falloff=0)
+    save(navier_filled_img_t, "filled-navier-stokes")
+
+    # blur the filled area
+    blur_filled_img_t = blur_fill(navier_filled_img_t, mask=mask_t, blur=20, falloff=40)
+    save(blur_filled_img_t, "navier-blurred-filled")
+
+    # neutral fill the masked area
+    neutral_filled_img_t = fill_neutral(img_t, mask_t, falloff=1)
+    save(neutral_filled_img_t, "filled-neutral")
+
+    # noise fill the masked area
+    noise_filled_img_t = fill_noise(img_t, mask_t)
+    save(noise_filled_img_t, "filled-noise")
+
+    seed = 2
+    prompt_text = "a red fox on a bench"
+
+    prompts = [
+        ImaginePrompt(
+            prompt_text,
+            init_image=img,
+            init_image_strength=0,
+            mask_image=mask_img,
+            seed=seed,
+            model_weights="sdxl",
+            caption_text="original-filled",
+        ),
+        ImaginePrompt(
+            prompt_text,
+            init_image=img_convert.torch_img_to_pillow_img(neutral_filled_img_t),
+            init_image_strength=0,
+            mask_image=mask_img,
+            seed=seed,
+            model_weights="sdxl",
+            caption_text="neutral-filled",
+        ),
+        ImaginePrompt(
+            prompt_text,
+            init_image=img_convert.torch_img_to_pillow_img(noise_filled_img_t),
+            init_image_strength=0,
+            mask_image=mask_img,
+            seed=seed,
+            model_weights="sdxl",
+            caption_text="noise-filled",
+        ),
+        ImaginePrompt(
+            prompt_text,
+            init_image=img_convert.torch_img_to_pillow_img(blur_filled_img_t),
+            init_image_strength=0,
+            mask_image=mask_img,
+            seed=seed,
+            model_weights="sdxl",
+            caption_text="navier-stokes-filled",
+        ),
+    ]
+
+    for result in imagine(prompts):
+        generated_img = result.images["pre-reconstitution"]
+        save(generated_img, f"{result.prompt.caption_text}_pre-reconstitution")
+
+        rebuilt_img = combine_image(
+            original_img=img,
+            generated_img=generated_img,
+            mask_img=ImageOps.invert(mask_img),
+        )
+        save(rebuilt_img, f"{result.prompt.caption_text}_rebuilt")
+
+        # for img_name, img in result.images.items():
+        #     if "mask" in img_name:
+        #         continue
+        #
+        #     name = f"{result.prompt.caption_text}_{img_name}"
+        #     save(img, name)
--- a/tests/test_utils/test_mathy.py
+++ b/tests/test_utils/test_mathy.py
@ -0,0 +1,13 @@
+from imaginairy.utils.mathy import make_odd
+
+
+def test_make_odd():
+    assert make_odd(0) == 1
+    assert make_odd(1) == 1
+    assert make_odd(2) == 3
+    assert make_odd(3) == 3
+    assert make_odd(4) == 5
+    assert make_odd(4.1) == 5
+    assert make_odd(-1) == -1
+    assert make_odd(-2) == -1
+    assert make_odd(1000) == 1001