feature: inpaint image preparation methods. better organization

bd/improved-inpaint
Bryce 1 month ago
parent 3c1c695f76
commit 86aed3520a

@ -41,13 +41,15 @@ def _generate_single_image(
MaskMode,
)
from imaginairy.utils import get_device, randn_seeded
from imaginairy.utils.img_utils import (
add_caption_to_image,
pillow_fit_image_within,
from imaginairy.utils.img_convert import (
pillow_img_to_torch_image,
pillow_mask_to_latent_mask,
torch_img_to_pillow_img,
)
from imaginairy.utils.img_utils import (
add_caption_to_image,
pillow_fit_image_within,
)
from imaginairy.utils.log_utils import (
ImageLoggingContext,
log_conditioning,

@ -39,12 +39,14 @@ def generate_single_image(
from imaginairy.samplers import SolverName
from imaginairy.schema import ImagineResult
from imaginairy.utils import get_device, randn_seeded
from imaginairy.utils.img_convert import (
pillow_img_to_torch_image,
pillow_mask_to_latent_mask,
)
from imaginairy.utils.img_utils import (
add_caption_to_image,
combine_image,
pillow_fit_image_within,
pillow_img_to_torch_image,
pillow_mask_to_latent_mask,
)
from imaginairy.utils.log_utils import (
ImageLoggingContext,
@ -470,9 +472,9 @@ def prep_control_input(
from PIL import ImageOps
from imaginairy.utils import get_device
from imaginairy.utils.img_convert import pillow_img_to_torch_image
from imaginairy.utils.img_utils import (
pillow_fit_image_within,
pillow_img_to_torch_image,
)
from imaginairy.utils.log_utils import (
log_img,

@ -2,7 +2,7 @@
import cv2
from imaginairy.utils.img_utils import pillow_img_to_opencv_img
from imaginairy.utils.img_convert import pillow_img_to_opencv_img
def calculate_blurriness_level(img):

@ -12,6 +12,10 @@ from torch.nn import functional as F
from imaginairy import config
from imaginairy.utils.downloads import get_cached_url_path
from imaginairy.utils.img_convert import (
assert_tensor_float_11_bchw,
assert_tensor_uint8_255_bchw,
)
logger = logging.getLogger(__name__)
@ -240,6 +244,7 @@ def _generate_densepose_image(
) -> np.ndarray:
assert_tensor_float_11_bchw(img)
input_h, input_w = img.shape[-2:]
img = img.to("cpu")
# print(f"input_h: {input_h}, input_w: {input_w}")
img, remove_pad = resize_image_with_pad_torch(
img, detect_resolution, upscale_method
@ -295,47 +300,6 @@ def _generate_densepose_image(
return detected_map
def assert_ndarray_uint8_255_hwc(img):
# assert input_image is ndarray with colors 0-255
assert img.dtype == np.uint8
assert img.ndim == 3
assert img.shape[2] == 3
assert img.max() <= 255
assert img.min() >= 0
def assert_tensor_uint8_255_bchw(img):
# assert input_image is a PyTorch tensor with colors 0-255 and dimensions (C, H, W)
assert isinstance(img, torch.Tensor)
assert img.dtype == torch.uint8
assert img.ndim == 4
assert img.shape[1] == 3
assert img.max() <= 255
assert img.min() >= 0
def assert_tensor_float_11_bchw(img):
# assert input_image is a PyTorch tensor with colors -1 to 1 and dimensions (C, H, W)
if not isinstance(img, torch.Tensor):
msg = f"Input image must be a PyTorch tensor, but got {type(img)}"
raise TypeError(msg)
if img.dtype not in (torch.float32, torch.float64, torch.float16):
msg = f"Input image must be a float tensor, but got {img.dtype}"
raise ValueError(msg)
if img.ndim != 4:
msg = f"Input image must be 4D (B, C, H, W), but got {img.ndim}D"
raise ValueError(msg)
if img.shape[1] != 3:
msg = f"Input image must have 3 channels, but got {img.shape[1]}"
raise ValueError(msg)
if img.max() > 1 or img.min() < -1:
msg = f"Input image must have values in [-1, 1], but got {img.min()} .. {img.max()}"
raise ValueError(msg)
class BoxMode(IntEnum):
"""
Enum of different ways to represent a box.

@ -12,7 +12,7 @@ from torch import nn
from imaginairy.utils import get_device
from imaginairy.utils.downloads import get_cached_url_path
from imaginairy.utils.img_utils import torch_image_to_openvcv_img
from imaginairy.utils.img_convert import torch_image_to_openvcv_img
def pad_right_down_corner(img, stride, padValue):

@ -834,7 +834,7 @@ class ImagineResult:
import torch
from imaginairy.utils import get_device, get_hardware_description
from imaginairy.utils.img_utils import (
from imaginairy.utils.img_convert import (
model_latent_to_pillow_img,
torch_img_to_pillow_img,
)

@ -8,11 +8,13 @@ import cv2
import torch
from imaginairy.utils import shrink_list
from imaginairy.utils.img_convert import (
model_latents_to_pillow_imgs,
pillow_img_to_opencv_img,
)
from imaginairy.utils.img_utils import (
add_caption_to_image,
imgpaths_to_imgs,
model_latents_to_pillow_imgs,
pillow_img_to_opencv_img,
)
if TYPE_CHECKING:

@ -0,0 +1,202 @@
"""
Library format cheat sheet:
Library Dim Order Channel Order Value Range Type
Pillow R, G, B, A 0-255 PIL.Image.Image
OpenCV B, G, R, A 0-255 np.ndarray
Torch (B), C, H, W R, G, B -1.0-1.0 torch.Tensor
"""
from typing import Sequence
import numpy as np
import PIL
import torch
from einops import rearrange, repeat
from PIL import Image
from torch import Tensor
from imaginairy.schema import LazyLoadingImage
from imaginairy.utils import get_device
def assert_bc3hw(t: Tensor):
assert isinstance(t, torch.Tensor)
assert t.ndim == 4
assert t.shape[1] == 3
def assert_b1c3hw(t: Tensor):
if not isinstance(t, torch.Tensor):
raise TypeError("Expected a torch.Tensor")
if t.ndim != 4:
msg = f"Expected 4 dimensions (Batch, Channel, Height, Width), got {t.ndim}"
raise ValueError(msg)
if t.shape[1] != 3:
msg = f"Expected 3 channels, got {t.shape[1]}"
raise ValueError(msg)
def assert_torch_mask(t: Tensor):
if not isinstance(t, torch.Tensor):
raise TypeError("Expected a torch.Tensor")
if t.ndim != 4:
msg = f"Expected 4 dimensions (Batch, Channel, Height, Width), got {t.ndim}"
raise ValueError(msg)
if t.shape[1] != 1:
msg = f"Expected 1 channels, got {t.shape[1]}"
raise ValueError(msg)
def pillow_img_to_torch_image(
img: PIL.Image.Image | LazyLoadingImage, convert="RGB"
) -> torch.Tensor:
if convert:
img = img.convert(convert)
img_np = np.array(img).astype(np.float32) / 255.0
if len(img_np.shape) == 2:
# add channel at end if missing
img_np = img_np[:, :, None]
# b, h, w, c => b, c, h, w
img_np = img_np[None].transpose(0, 3, 1, 2)
img_t = torch.from_numpy(img_np)
return 2.0 * img_t - 1.0
def pillow_mask_255_to_torch_mask(
mask: PIL.Image.Image | LazyLoadingImage,
) -> torch.Tensor:
mask_np = np.array(mask).astype(np.float32) / 255.0
mask_np = mask_np[None, None]
mask_t = torch.from_numpy(mask_np)
return mask_t
def pillow_mask_to_latent_mask(
mask_img: PIL.Image.Image | LazyLoadingImage, downsampling_factor
) -> torch.Tensor:
mask_img = mask_img.resize(
(
mask_img.width // downsampling_factor,
mask_img.height // downsampling_factor,
),
resample=Image.Resampling.LANCZOS,
)
mask = np.array(mask_img).astype(np.float32) / 255.0
mask = mask[None, None]
mask_t = torch.from_numpy(mask)
return mask_t
def pillow_img_to_opencv_img(img: PIL.Image.Image | LazyLoadingImage):
open_cv_image = np.array(img)
# Convert RGB to BGR
open_cv_image = open_cv_image[:, :, ::-1].copy()
return open_cv_image
def torch_image_to_openvcv_img(img: torch.Tensor) -> np.ndarray:
img = (img + 1) / 2
img_np = img.detach().cpu().numpy()
# assert there is only one image
assert img_np.shape[0] == 1
img_np = img_np[0]
img_np = img_np.transpose(1, 2, 0)
img_np = (img_np * 255).astype(np.uint8)
# RGB to BGR
img_np = img_np[:, :, ::-1]
return img_np
def torch_img_to_pillow_img(img_t: torch.Tensor) -> PIL.Image.Image:
img_t = img_t.to(torch.float32).detach().cpu()
if len(img_t.shape) == 3:
img_t = img_t.unsqueeze(0)
if img_t.shape[0] != 1:
raise ValueError("Only batch size 1 supported")
if img_t.shape[1] == 1:
colorspace = "L"
elif img_t.shape[1] == 3:
colorspace = "RGB"
else:
msg = (
f"Unsupported colorspace. {img_t.shape[1]} channels in {img_t.shape} shape"
)
raise ValueError(msg)
img_t = rearrange(img_t, "b c h w -> b h w c")
img_t = torch.clamp((img_t + 1.0) / 2.0, min=0.0, max=1.0)
img_np = (255.0 * img_t).cpu().numpy().astype(np.uint8)[0]
if colorspace == "L":
img_np = img_np[:, :, 0]
return Image.fromarray(img_np, colorspace)
def model_latent_to_pillow_img(latent: torch.Tensor) -> PIL.Image.Image:
from imaginairy.utils.model_manager import get_current_diffusion_model
if len(latent.shape) == 3:
latent = latent.unsqueeze(0)
if latent.shape[0] != 1:
raise ValueError("Only batch size 1 supported")
model = get_current_diffusion_model()
img_t = model.lda.decode(latent)
return torch_img_to_pillow_img(img_t)
def model_latents_to_pillow_imgs(latents: torch.Tensor) -> Sequence[PIL.Image.Image]:
return [model_latent_to_pillow_img(latent) for latent in latents]
def pillow_img_to_model_latent(
model, img: PIL.Image.Image | LazyLoadingImage, batch_size=1, half=True
):
init_image = pillow_img_to_torch_image(img).to(get_device())
init_image = repeat(init_image, "1 ... -> b ...", b=batch_size)
if half:
return model.get_first_stage_encoding(
model.encode_first_stage(init_image.half())
)
return model.get_first_stage_encoding(model.encode_first_stage(init_image))
def assert_ndarray_uint8_255_hwc(img):
# assert input_image is ndarray with colors 0-255
assert img.dtype == np.uint8
assert img.ndim == 3
assert img.shape[2] == 3
assert img.max() <= 255
assert img.min() >= 0
def assert_tensor_uint8_255_bchw(img):
# assert input_image is a PyTorch tensor with colors 0-255 and dimensions (C, H, W)
assert isinstance(img, torch.Tensor)
assert img.dtype == torch.uint8
assert img.ndim == 4
assert img.shape[1] == 3
assert img.max() <= 255
assert img.min() >= 0
def assert_tensor_float_11_bchw(img):
# assert input_image is a PyTorch tensor with colors -1 to 1 and dimensions (C, H, W)
if not isinstance(img, torch.Tensor):
msg = f"Input image must be a PyTorch tensor, but got {type(img)}"
raise TypeError(msg)
if img.dtype not in (torch.float32, torch.float64, torch.float16):
msg = f"Input image must be a float tensor, but got {img.dtype}"
raise ValueError(msg)
if img.ndim != 4:
msg = f"Input image must be 4D (B, C, H, W), but got {img.ndim}D"
raise ValueError(msg)
if img.shape[1] != 3:
msg = f"Input image must have 3 channels, but got {img.shape[1]}"
raise ValueError(msg)
if img.max() > 1 or img.min() < -1:
msg = f"Input image must have values in [-1, 1], but got {img.min()} .. {img.max()}"
raise ValueError(msg)

@ -1,25 +1,17 @@
"""
image utils.
Library format cheat sheet:
Library Dim Order Channel Order Value Range Type
Pillow R, G, B, A 0-255 PIL.Image.Image
OpenCV B, G, R, A 0-255 np.ndarray
Torch (B), C, H, W R, G, B -1.0-1.0 torch.Tensor
"""
from typing import Sequence
import numpy as np
import PIL
import torch
from einops import rearrange, repeat
from PIL import Image, ImageDraw, ImageFont
from torch import Tensor
from torch.nn import functional as F
from imaginairy.schema import LazyLoadingImage
from imaginairy.utils import get_device
from imaginairy.utils import img_convert
from imaginairy.utils.mask_helpers import binary_erosion
from imaginairy.utils.mathy import make_odd
from imaginairy.utils.named_resolutions import normalize_image_size
from imaginairy.utils.paths import PKG_ROOT
@ -51,106 +43,6 @@ def pillow_fit_image_within(
return image
def pillow_img_to_torch_image(
img: PIL.Image.Image | LazyLoadingImage, convert="RGB"
) -> torch.Tensor:
if convert:
img = img.convert(convert)
img_np = np.array(img).astype(np.float32) / 255.0
# b, h, w, c => b, c, h, w
img_np = img_np[None].transpose(0, 3, 1, 2)
img_t = torch.from_numpy(img_np)
return 2.0 * img_t - 1.0
def pillow_mask_to_latent_mask(
mask_img: PIL.Image.Image | LazyLoadingImage, downsampling_factor
) -> torch.Tensor:
mask_img = mask_img.resize(
(
mask_img.width // downsampling_factor,
mask_img.height // downsampling_factor,
),
resample=Image.Resampling.LANCZOS,
)
mask = np.array(mask_img).astype(np.float32) / 255.0
mask = mask[None, None]
mask_t = torch.from_numpy(mask)
return mask_t
def pillow_img_to_opencv_img(img: PIL.Image.Image | LazyLoadingImage):
open_cv_image = np.array(img)
# Convert RGB to BGR
open_cv_image = open_cv_image[:, :, ::-1].copy()
return open_cv_image
def torch_image_to_openvcv_img(img: torch.Tensor) -> np.ndarray:
img = (img + 1) / 2
img_np = img.detach().cpu().numpy()
# assert there is only one image
assert img_np.shape[0] == 1
img_np = img_np[0]
img_np = img_np.transpose(1, 2, 0)
img_np = (img_np * 255).astype(np.uint8)
# RGB to BGR
img_np = img_np[:, :, ::-1]
return img_np
def torch_img_to_pillow_img(img_t: torch.Tensor) -> PIL.Image.Image:
img_t = img_t.to(torch.float32).detach().cpu()
if len(img_t.shape) == 3:
img_t = img_t.unsqueeze(0)
if img_t.shape[0] != 1:
raise ValueError("Only batch size 1 supported")
if img_t.shape[1] == 1:
colorspace = "L"
elif img_t.shape[1] == 3:
colorspace = "RGB"
else:
msg = (
f"Unsupported colorspace. {img_t.shape[1]} channels in {img_t.shape} shape"
)
raise ValueError(msg)
img_t = rearrange(img_t, "b c h w -> b h w c")
img_t = torch.clamp((img_t + 1.0) / 2.0, min=0.0, max=1.0)
img_np = (255.0 * img_t).cpu().numpy().astype(np.uint8)[0]
if colorspace == "L":
img_np = img_np[:, :, 0]
return Image.fromarray(img_np, colorspace)
def model_latent_to_pillow_img(latent: torch.Tensor) -> PIL.Image.Image:
from imaginairy.utils.model_manager import get_current_diffusion_model
if len(latent.shape) == 3:
latent = latent.unsqueeze(0)
if latent.shape[0] != 1:
raise ValueError("Only batch size 1 supported")
model = get_current_diffusion_model()
img_t = model.lda.decode(latent)
return torch_img_to_pillow_img(img_t)
def model_latents_to_pillow_imgs(latents: torch.Tensor) -> Sequence[PIL.Image.Image]:
return [model_latent_to_pillow_img(latent) for latent in latents]
def pillow_img_to_model_latent(
model, img: PIL.Image.Image | LazyLoadingImage, batch_size=1, half=True
):
init_image = pillow_img_to_torch_image(img).to(get_device())
init_image = repeat(init_image, "1 ... -> b ...", b=batch_size)
if half:
return model.get_first_stage_encoding(
model.encode_first_stage(init_image.half())
)
return model.get_first_stage_encoding(model.encode_first_stage(init_image))
def imgpaths_to_imgs(imgpaths):
imgs = []
for imgpath in imgpaths:
@ -250,6 +142,28 @@ def combine_image(original_img, generated_img, mask_img):
return rebuilt_orig_img
def combine_img_torch(
target_img: torch.Tensor,
source_img: torch.Tensor,
mask_img: torch.Tensor,
) -> torch.Tensor:
"""Combine the source image with the target image using the mask image."""
img_convert.assert_b1c3hw(target_img)
img_convert.assert_b1c3hw(source_img)
img_convert.assert_torch_mask(mask_img)
# assert mask and img are the same size
if mask_img.shape[-2:] != source_img.shape[-2:]:
msg = "Mask and image must have the same height and width."
raise ValueError(msg)
# Using the mask, combine the images
combined_img = target_img * (1 - mask_img) + source_img * mask_img
img_convert.assert_b1c3hw(combined_img)
return combined_img
def calc_scale_to_fit_within(height: int, width: int, max_size) -> float:
max_width, max_height = normalize_image_size(max_size)
if width <= max_width and height <= max_height:
@ -282,3 +196,40 @@ def aspect_ratio(width, height):
y = height // divisor
return f"{x}:{y}"
def blur_fill(image: torch.Tensor, mask: torch.Tensor, blur: int, falloff: int):
blur = make_odd(blur)
falloff = min(make_odd(falloff), blur - 2)
original = image.clone()
alpha = mask.floor()
if falloff > 0:
erosion = binary_erosion(alpha, falloff)
alpha = alpha * gaussian_blur(erosion, falloff)
alpha = alpha.repeat(1, 3, 1, 1)
image = gaussian_blur(image, blur)
image = original + (image - original) * alpha
return image
def gaussian_blur(image: Tensor, radius: int, sigma: float = 0):
c = image.shape[-3]
if sigma <= 0:
sigma = 0.3 * (radius - 1) + 0.8
kernel = _gaussian_kernel(radius, sigma).to(image.device)
kernel_x = kernel[..., None, :].repeat(c, 1, 1).unsqueeze(1)
kernel_y = kernel[..., None].repeat(c, 1, 1).unsqueeze(1)
image = F.pad(image, (radius, radius, radius, radius), mode="reflect")
image = F.conv2d(image, kernel_x, groups=c)
image = F.conv2d(image, kernel_y, groups=c)
return image
def _gaussian_kernel(radius: int, sigma: float):
x = torch.linspace(-radius, radius, steps=radius * 2 + 1)
pdf = torch.exp(-0.5 * (x / sigma).pow(2))
return pdf / pdf.sum()

@ -331,7 +331,7 @@ class ImageLoggingContext:
)
def log_progress_latent(self, latent):
from imaginairy.utils.img_utils import model_latents_to_pillow_imgs
from imaginairy.utils.img_convert import model_latents_to_pillow_imgs
if not self.progress_img_callback:
return

@ -0,0 +1,143 @@
from typing import Union
import numpy as np
import torch
from torch import Tensor
from torch.nn import functional as F
from imaginairy.utils import img_convert
from imaginairy.utils.img_convert import assert_bc3hw
from imaginairy.utils.mathy import make_odd
def binary_erosion(mask: Tensor, radius: int):
kernel = torch.ones(1, 1, radius * 2 + 1, radius * 2 + 1, device=mask.device)
mask = F.pad(mask, (radius, radius, radius, radius), mode="constant", value=1)
mask = F.conv2d(mask, kernel, groups=1)
mask = (mask == kernel.numel()).to(mask.dtype)
return mask
def highlight_masked_area(
img: Tensor,
mask: Tensor,
color: Union[tuple[int, int, int], None] = None,
highlight_strength: float = 0.5,
) -> Tensor:
"""
Highlights the masked area of an image tensor with a specified color.
"""
from imaginairy.utils.img_utils import combine_img_torch
img_convert.assert_b1c3hw(img)
img_convert.assert_torch_mask(mask)
# Ensure mask is in the same device as image_tensor
mask = mask.to(img.device)
if color is None:
color = tuple(np.random.randint(0, 256, 3))
else:
if any(c > 255 or c < 0 for c in color):
raise ValueError("Color values must be in the range [0, 255].")
# Convert color to a tensor and normalize to [0, 1]
color_tensor = torch.tensor(color, device=img.device, dtype=img.dtype) / 255.0
solid_color = torch.ones_like(img)
for channel in range(3):
solid_color[:, channel, :, :] *= color_tensor[channel]
highlighted_image = combine_img_torch(img, solid_color, mask * highlight_strength)
return highlighted_image
def fill_neutral(image: Tensor, mask: Tensor, falloff: int = 1) -> Tensor:
img_convert.assert_bc3hw(image)
img_convert.assert_torch_mask(mask)
mask = mask_falloff(mask, falloff)
filled_img = image.detach().clone()
m = (1.0 - mask).squeeze(0).squeeze(0)
for i in range(3):
filled_img[:, i, :, :] -= 0.5
filled_img[:, i, :, :] *= m
filled_img[:, i, :, :] += 0.5
img_convert.assert_bc3hw(filled_img)
return filled_img
def fill_noise(image: Tensor, mask: Tensor, falloff: int = 1, seed=1) -> Tensor:
"""
Fills a masked area in an image with random noise.
"""
img_convert.assert_bc3hw(image)
img_convert.assert_torch_mask(mask)
mask = mask_falloff(mask, falloff)
filled_img = image.detach().clone()
noise = torch.rand_like(filled_img) * 2 - 1
filled_img = filled_img * (1 - mask) + noise * mask
img_convert.assert_bc3hw(filled_img)
return filled_img
# def expand_mask(mask, expand, tapered_corners):
# c = 0 if tapered_corners else 1
# kernel = np.array([[c, 1, c], [1, 1, 1], [c, 1, c]])
# mask = mask.reshape((-1, mask.shape[-2], mask.shape[-1]))
# out = []
# for m in mask:
# output = m.numpy()
# for _ in range(abs(expand)):
# if expand < 0:
# output = scipy.ndimage.grey_erosion(output, footprint=kernel)
# else:
# output = scipy.ndimage.grey_dilation(output, footprint=kernel)
# output = torch.from_numpy(output)
# out.append(output)
# return (torch.stack(out, dim=0),)
def mask_falloff(mask: Tensor, falloff: int) -> Tensor:
"""
Applies a falloff effect to a binary mask tensor to create smooth transitions at its edges.
"""
from imaginairy.utils.img_utils import gaussian_blur
alpha = mask.expand(1, *mask.shape[-3:]).floor()
if falloff > 0:
falloff = make_odd(falloff)
erosion = binary_erosion(alpha, falloff)
alpha = alpha * gaussian_blur(erosion, falloff)
return alpha
def fill_navier_stokes(image: Tensor, mask: Tensor, falloff: int = 1) -> Tensor:
"""
Fills a masked area in an image using Navier-Stokes inpainting.
https://docs.opencv.org/3.4/df/d3d/tutorial_py_inpainting.html
"""
import cv2
assert_bc3hw(image)
alpha = mask_falloff(mask, falloff)
filled_img = image.detach().clone()
alpha_np = alpha.squeeze(0).squeeze(0).cpu().numpy()
alpha_bc = alpha_np.reshape(*alpha_np.shape)
filled_img = filled_img.squeeze(0)
for channel_slice in filled_img:
image_np = channel_slice.cpu().numpy()
filled_np = cv2.inpaint(
(255.0 * (image_np + 1) / 2).astype(np.uint8),
(255.0 * alpha_np).astype(np.uint8),
3,
cv2.INPAINT_NS,
)
filled_np = (filled_np.astype(np.float32) / 255.0) * 2 - 1
filled_np = image_np * (1.0 - alpha_bc) + filled_np * alpha_bc
channel_slice.copy_(torch.from_numpy(filled_np))
filled_img = filled_img.unsqueeze(0)
assert_bc3hw(filled_img)
return filled_img

@ -0,0 +1,2 @@
def make_odd(n):
return int(n + 1 - n % 2)

@ -6,7 +6,7 @@ import torch
from PIL import Image, ImageDraw
from torch import nn
from imaginairy.utils.img_utils import torch_img_to_pillow_img
from imaginairy.utils.img_convert import torch_img_to_pillow_img
def outpaint_calculations(

@ -42,6 +42,7 @@ tests/test_api.py::test_tile_mode
tests/test_api/test_generate.py::test_cliptext_inpainting_pearl_doctor
tests/test_api/test_generate.py::test_controlnet[canny]
tests/test_api/test_generate.py::test_controlnet[colorize]
tests/test_api/test_generate.py::test_controlnet[densepose]
tests/test_api/test_generate.py::test_controlnet[depth]
tests/test_api/test_generate.py::test_controlnet[details]
tests/test_api/test_generate.py::test_controlnet[edit]
@ -91,6 +92,7 @@ tests/test_enhancers/test_upscale_realesrgan.py::test_upscale_textured_image
tests/test_modules/diffusion/test_model.py::test_nonlinearity
tests/test_outpaint.py::test_outpainting_outpaint
tests/test_safety.py::test_is_nsfw
tests/test_utils/test_mask_helpers.py::test_inpaint_prep_dogbench
tests/test_utils/test_model_cache.py::test_cache_ordering
tests/test_utils/test_model_cache.py::test_get_existing_move_to_gpu
tests/test_utils/test_model_cache.py::test_set_cpu_full

1 tests/img_processors/test_control_modes.py::test_control_images[densepose-create_densepose_map]
42 tests/test_api/test_generate.py::test_cliptext_inpainting_pearl_doctor
43 tests/test_api/test_generate.py::test_controlnet[canny]
44 tests/test_api/test_generate.py::test_controlnet[colorize]
45 tests/test_api/test_generate.py::test_controlnet[densepose]
46 tests/test_api/test_generate.py::test_controlnet[depth]
47 tests/test_api/test_generate.py::test_controlnet[details]
48 tests/test_api/test_generate.py::test_controlnet[edit]
92 tests/test_modules/diffusion/test_model.py::test_nonlinearity
93 tests/test_outpaint.py::test_outpainting_outpaint
94 tests/test_safety.py::test_is_nsfw
95 tests/test_utils/test_mask_helpers.py::test_inpaint_prep_dogbench
96 tests/test_utils/test_model_cache.py::test_cache_ordering
97 tests/test_utils/test_model_cache.py::test_get_existing_move_to_gpu
98 tests/test_utils/test_model_cache.py::test_set_cpu_full

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.2 MiB

@ -6,7 +6,7 @@ from imaginairy.img_processors.control_modes import CONTROL_MODES, create_depth_
from imaginairy.modules.midas.api import ISL_PATHS
from imaginairy.schema import LazyLoadingImage
from imaginairy.utils import seed_everything
from imaginairy.utils.img_utils import (
from imaginairy.utils.img_convert import (
pillow_img_to_torch_image,
torch_img_to_pillow_img,
)

@ -4,7 +4,7 @@ import pytest
from imaginairy.schema import LazyLoadingImage
from imaginairy.utils.feather_tile import rebuild_image, tile_image, tile_setup
from imaginairy.utils.img_utils import (
from imaginairy.utils.img_convert import (
pillow_img_to_torch_image,
torch_img_to_pillow_img,
)

@ -6,11 +6,13 @@ from torch.nn.functional import interpolate
from imaginairy.enhancers.upscale_riverwing import upscale_latent
from imaginairy.schema import LazyLoadingImage
from imaginairy.utils import get_device
from imaginairy.utils.img_utils import (
pillow_fit_image_within,
from imaginairy.utils.img_convert import (
pillow_img_to_torch_image,
torch_img_to_pillow_img,
)
from imaginairy.utils.img_utils import (
pillow_fit_image_within,
)
from imaginairy.utils.model_manager import get_diffusion_model
from tests import TESTS_FOLDER

@ -0,0 +1,173 @@
import torch
from PIL import Image, ImageOps
from imaginairy.api import imagine
from imaginairy.enhancers.clip_masking import get_img_mask
from imaginairy.schema import ImaginePrompt
from imaginairy.utils import img_convert
from imaginairy.utils.img_convert import (
pillow_img_to_torch_image,
pillow_mask_255_to_torch_mask,
torch_img_to_pillow_img,
)
from imaginairy.utils.img_utils import blur_fill, combine_image
from imaginairy.utils.mask_helpers import (
fill_navier_stokes,
fill_neutral,
fill_noise,
highlight_masked_area,
)
from tests import TESTS_FOLDER
def makemask(mask, offset=0.1, threshold=0.2):
B, C, H, W = mask.shape
if C == 3:
mask = mask.mean(dim=1, keepdim=True)
assert 0.0 <= offset < threshold <= 1.0, "Threshold must be higher than offset"
mask = (mask - offset) * (1 / (threshold - offset))
mask = mask.clamp(0, 1)
return mask
def test_fill_neutral(filename_base_for_outputs):
img = Image.open(f"{TESTS_FOLDER}/data/bench2.png").convert("RGB")
mask = Image.open(f"{TESTS_FOLDER}/data/bench2_mask.png")
img_t = pillow_img_to_torch_image(img)
mask_t = pillow_img_to_torch_image(mask)
mask_t = makemask(mask_t)
for falloff in [0, 1, 3, 5, 17]:
filled_img_t = fill_neutral(img_t, mask_t, falloff=falloff)
filled_img = torch_img_to_pillow_img(filled_img_t)
img_path = f"{filename_base_for_outputs}_filled_neutral_falloff_{falloff}.png"
filled_img.save(img_path)
# assert_image_similar_to_expectation(filled_img, img_path=img_path, threshold=7000)
def test_fill_navier_stokes(filename_base_for_outputs):
img = Image.open(f"{TESTS_FOLDER}/data/bench2.png").convert("RGB")
mask = Image.open(f"{TESTS_FOLDER}/data/bench2_mask.png")
img_t = pillow_img_to_torch_image(img)
mask_t = pillow_img_to_torch_image(mask)
mask_t = makemask(mask_t)
for falloff in [0, 1, 3, 5, 17]:
filled_img_t = fill_navier_stokes(img_t, mask_t, falloff=falloff)
filled_img = torch_img_to_pillow_img(filled_img_t)
img_path = f"{filename_base_for_outputs}_filled_neutral_falloff_{falloff}.png"
filled_img.save(img_path)
def test_inpaint_prep_dogbench(filename_base_for_outputs):
save_count = 0
def save(i, name):
nonlocal save_count
if isinstance(i, torch.Tensor):
i = torch_img_to_pillow_img(i)
i.save(f"{filename_base_for_outputs}_{save_count:02d}_{name}.png")
save_count += 1
img = Image.open(f"{TESTS_FOLDER}/data/dog-on-bench.png").convert("RGB")
img_t = pillow_img_to_torch_image(img)
save(img, "original")
mask_img, mask_img_g = get_img_mask(img, "dog", threshold=0.5)
save(mask_img_g, "mask_g")
mask_img_g_t = pillow_mask_255_to_torch_mask(mask_img_g)
print(
f"mask_img_g value range: {mask_img_g_t.min().item()} - {mask_img_g_t.max().item()}"
)
mask_highlight_g_t = highlight_masked_area(
img_t, mask_img_g_t, color=(255, 0, 0), highlight_strength=1
)
save(mask_highlight_g_t, "highlighted-mask_g")
save(mask_img, "mask")
mask_t = pillow_mask_255_to_torch_mask(mask_img)
mask_t = makemask(mask_t)
mask_highlight_t = highlight_masked_area(
img_t,
mask_t,
# color=(255, 0, 0)
)
save(mask_highlight_t, "highlighted-mask")
navier_filled_img_t = fill_navier_stokes(img_t, mask_t, falloff=0)
save(navier_filled_img_t, "filled-navier-stokes")
# blur the filled area
blur_filled_img_t = blur_fill(navier_filled_img_t, mask=mask_t, blur=20, falloff=40)
save(blur_filled_img_t, "navier-blurred-filled")
# neutral fill the masked area
neutral_filled_img_t = fill_neutral(img_t, mask_t, falloff=1)
save(neutral_filled_img_t, "filled-neutral")
# noise fill the masked area
noise_filled_img_t = fill_noise(img_t, mask_t)
save(noise_filled_img_t, "filled-noise")
seed = 2
prompt_text = "a red fox on a bench"
prompts = [
ImaginePrompt(
prompt_text,
init_image=img,
init_image_strength=0,
mask_image=mask_img,
seed=seed,
model_weights="sdxl",
caption_text="original-filled",
),
ImaginePrompt(
prompt_text,
init_image=img_convert.torch_img_to_pillow_img(neutral_filled_img_t),
init_image_strength=0,
mask_image=mask_img,
seed=seed,
model_weights="sdxl",
caption_text="neutral-filled",
),
ImaginePrompt(
prompt_text,
init_image=img_convert.torch_img_to_pillow_img(noise_filled_img_t),
init_image_strength=0,
mask_image=mask_img,
seed=seed,
model_weights="sdxl",
caption_text="noise-filled",
),
ImaginePrompt(
prompt_text,
init_image=img_convert.torch_img_to_pillow_img(blur_filled_img_t),
init_image_strength=0,
mask_image=mask_img,
seed=seed,
model_weights="sdxl",
caption_text="navier-stokes-filled",
),
]
for result in imagine(prompts):
generated_img = result.images["pre-reconstitution"]
save(generated_img, f"{result.prompt.caption_text}_pre-reconstitution")
rebuilt_img = combine_image(
original_img=img,
generated_img=generated_img,
mask_img=ImageOps.invert(mask_img),
)
save(rebuilt_img, f"{result.prompt.caption_text}_rebuilt")
# for img_name, img in result.images.items():
# if "mask" in img_name:
# continue
#
# name = f"{result.prompt.caption_text}_{img_name}"
# save(img, name)

@ -0,0 +1,13 @@
from imaginairy.utils.mathy import make_odd
def test_make_odd():
assert make_odd(0) == 1
assert make_odd(1) == 1
assert make_odd(2) == 3
assert make_odd(3) == 3
assert make_odd(4) == 5
assert make_odd(4.1) == 5
assert make_odd(-1) == -1
assert make_odd(-2) == -1
assert make_odd(1000) == 1001
Loading…
Cancel
Save