mirror of
https://github.com/brycedrennan/imaginAIry
synced 2024-10-31 03:20:40 +00:00
261 lines
7.7 KiB
Python
261 lines
7.7 KiB
Python
"""
|
|
image utils.
|
|
|
|
Library format cheat sheet:
|
|
|
|
Library Dim Order Channel Order Value Range Type
|
|
Pillow R, G, B, A 0-255 PIL.Image.Image
|
|
OpenCV B, G, R, A 0-255 np.ndarray
|
|
Torch (B), C, H, W R, G, B -1.0-1.0 torch.Tensor
|
|
|
|
"""
|
|
from typing import Sequence
|
|
|
|
import numpy as np
|
|
import PIL
|
|
import torch
|
|
from einops import rearrange, repeat
|
|
from PIL import Image, ImageDraw, ImageFont
|
|
|
|
from imaginairy.schema import LazyLoadingImage
|
|
from imaginairy.utils import get_device
|
|
from imaginairy.utils.named_resolutions import normalize_image_size
|
|
from imaginairy.utils.paths import PKG_ROOT
|
|
|
|
|
|
def pillow_fit_image_within(
|
|
image: PIL.Image.Image | LazyLoadingImage,
|
|
max_height=512,
|
|
max_width=512,
|
|
convert="RGB",
|
|
snap_size=8,
|
|
) -> PIL.Image.Image:
|
|
image = image.convert(convert)
|
|
w, h = image.size
|
|
resize_ratio = 1
|
|
if w > max_width or h > max_height:
|
|
resize_ratio = min(max_width / w, max_height / h)
|
|
elif w < max_width and h < max_height:
|
|
# it's smaller than our target image, enlarge
|
|
resize_ratio = max(max_width / w, max_height / h)
|
|
|
|
if resize_ratio != 1:
|
|
w, h = int(w * resize_ratio), int(h * resize_ratio)
|
|
# resize to integer multiple of snap_size
|
|
w -= w % snap_size
|
|
h -= h % snap_size
|
|
|
|
if (w, h) != image.size:
|
|
image = image.resize((w, h), resample=Image.Resampling.LANCZOS)
|
|
return image
|
|
|
|
|
|
def pillow_img_to_torch_image(
|
|
img: PIL.Image.Image | LazyLoadingImage, convert="RGB"
|
|
) -> torch.Tensor:
|
|
if convert:
|
|
img = img.convert(convert)
|
|
img_np = np.array(img).astype(np.float32) / 255.0
|
|
# b, h, w, c => b, c, h, w
|
|
img_np = img_np[None].transpose(0, 3, 1, 2)
|
|
img_t = torch.from_numpy(img_np)
|
|
return 2.0 * img_t - 1.0
|
|
|
|
|
|
def pillow_mask_to_latent_mask(
|
|
mask_img: PIL.Image.Image | LazyLoadingImage, downsampling_factor
|
|
) -> torch.Tensor:
|
|
mask_img = mask_img.resize(
|
|
(
|
|
mask_img.width // downsampling_factor,
|
|
mask_img.height // downsampling_factor,
|
|
),
|
|
resample=Image.Resampling.LANCZOS,
|
|
)
|
|
|
|
mask = np.array(mask_img).astype(np.float32) / 255.0
|
|
mask = mask[None, None]
|
|
mask_t = torch.from_numpy(mask)
|
|
return mask_t
|
|
|
|
|
|
def pillow_img_to_opencv_img(img: PIL.Image.Image | LazyLoadingImage):
|
|
open_cv_image = np.array(img)
|
|
# Convert RGB to BGR
|
|
open_cv_image = open_cv_image[:, :, ::-1].copy()
|
|
return open_cv_image
|
|
|
|
|
|
def torch_image_to_openvcv_img(img: torch.Tensor) -> np.ndarray:
|
|
img = (img + 1) / 2
|
|
img_np = img.detach().cpu().numpy()
|
|
# assert there is only one image
|
|
assert img_np.shape[0] == 1
|
|
img_np = img_np[0]
|
|
img_np = img_np.transpose(1, 2, 0)
|
|
img_np = (img_np * 255).astype(np.uint8)
|
|
# RGB to BGR
|
|
img_np = img_np[:, :, ::-1]
|
|
return img_np
|
|
|
|
|
|
def torch_img_to_pillow_img(img_t: torch.Tensor) -> PIL.Image.Image:
|
|
img_t = img_t.to(torch.float32).detach().cpu()
|
|
if len(img_t.shape) == 3:
|
|
img_t = img_t.unsqueeze(0)
|
|
if img_t.shape[0] != 1:
|
|
raise ValueError("Only batch size 1 supported")
|
|
if img_t.shape[1] == 1:
|
|
colorspace = "L"
|
|
elif img_t.shape[1] == 3:
|
|
colorspace = "RGB"
|
|
else:
|
|
msg = (
|
|
f"Unsupported colorspace. {img_t.shape[1]} channels in {img_t.shape} shape"
|
|
)
|
|
raise ValueError(msg)
|
|
img_t = rearrange(img_t, "b c h w -> b h w c")
|
|
img_t = torch.clamp((img_t + 1.0) / 2.0, min=0.0, max=1.0)
|
|
img_np = (255.0 * img_t).cpu().numpy().astype(np.uint8)[0]
|
|
if colorspace == "L":
|
|
img_np = img_np[:, :, 0]
|
|
return Image.fromarray(img_np, colorspace)
|
|
|
|
|
|
def model_latent_to_pillow_img(latent: torch.Tensor) -> PIL.Image.Image:
|
|
from imaginairy.utils.model_manager import get_current_diffusion_model
|
|
|
|
if len(latent.shape) == 3:
|
|
latent = latent.unsqueeze(0)
|
|
if latent.shape[0] != 1:
|
|
raise ValueError("Only batch size 1 supported")
|
|
model = get_current_diffusion_model()
|
|
img_t = model.lda.decode(latent)
|
|
return torch_img_to_pillow_img(img_t)
|
|
|
|
|
|
def model_latents_to_pillow_imgs(latents: torch.Tensor) -> Sequence[PIL.Image.Image]:
|
|
return [model_latent_to_pillow_img(latent) for latent in latents]
|
|
|
|
|
|
def pillow_img_to_model_latent(
|
|
model, img: PIL.Image.Image | LazyLoadingImage, batch_size=1, half=True
|
|
):
|
|
init_image = pillow_img_to_torch_image(img).to(get_device())
|
|
init_image = repeat(init_image, "1 ... -> b ...", b=batch_size)
|
|
if half:
|
|
return model.get_first_stage_encoding(
|
|
model.encode_first_stage(init_image.half())
|
|
)
|
|
return model.get_first_stage_encoding(model.encode_first_stage(init_image))
|
|
|
|
|
|
def imgpaths_to_imgs(imgpaths):
|
|
imgs = []
|
|
for imgpath in imgpaths:
|
|
if isinstance(imgpath, str):
|
|
img = LazyLoadingImage(filepath=imgpath)
|
|
imgs.append(img)
|
|
else:
|
|
imgs.append(imgpath)
|
|
|
|
return imgs
|
|
|
|
|
|
def add_caption_to_image(
|
|
img: PIL.Image.Image | LazyLoadingImage,
|
|
caption,
|
|
font_size=16,
|
|
font_path=f"{PKG_ROOT}/data/DejaVuSans.ttf",
|
|
):
|
|
img_pil = img.as_pillow() if isinstance(img, LazyLoadingImage) else img
|
|
draw = ImageDraw.Draw(img_pil)
|
|
|
|
font = ImageFont.truetype(font_path, font_size)
|
|
|
|
x = 15
|
|
y = img_pil.height - 15 - font_size
|
|
|
|
draw.text(
|
|
(x, y),
|
|
caption,
|
|
font=font,
|
|
fill=(255, 255, 255),
|
|
stroke_width=3,
|
|
stroke_fill=(0, 0, 0),
|
|
)
|
|
|
|
|
|
def create_halo_effect(
|
|
bw_image: PIL.Image.Image, background_color: tuple
|
|
) -> PIL.Image.Image:
|
|
from PIL import Image, ImageFilter
|
|
|
|
# Step 1: Make white portion of the image transparent
|
|
transparent_image = bw_image.convert("RGBA")
|
|
datas = transparent_image.getdata()
|
|
new_data = []
|
|
for item in datas:
|
|
# Change all white (also shades of whites)
|
|
# to transparent
|
|
if item[0] > 200 and item[1] > 200 and item[2] > 200:
|
|
new_data.append((255, 255, 255, 0))
|
|
else:
|
|
new_data.append(item)
|
|
transparent_image.putdata(new_data) # type: ignore
|
|
|
|
# Step 2: Make a copy of the image
|
|
eroded_image = transparent_image.copy()
|
|
|
|
# Step 3: Erode and blur the copy
|
|
# eroded_image = ImageOps.invert(eroded_image.convert("L")).convert("1")
|
|
# eroded_image = eroded_image.filter(ImageFilter.MinFilter(3)) # Erode
|
|
eroded_image = eroded_image.filter(ImageFilter.GaussianBlur(radius=25))
|
|
|
|
# Step 4: Create new canvas
|
|
new_canvas = Image.new("RGBA", bw_image.size, color=background_color)
|
|
|
|
# Step 5: Paste the blurred copy on the new canvas
|
|
new_canvas.paste(eroded_image, (0, 0), eroded_image)
|
|
|
|
# Step 6: Paste the original sharp image on the new canvas
|
|
new_canvas.paste(transparent_image, (0, 0), transparent_image)
|
|
|
|
return new_canvas
|
|
|
|
|
|
def combine_image(original_img, generated_img, mask_img):
|
|
"""Combine the generated image with the original image using the mask image."""
|
|
from PIL import Image
|
|
|
|
from imaginairy.utils.log_utils import log_img
|
|
|
|
generated_img = generated_img.resize(
|
|
original_img.size,
|
|
resample=Image.Resampling.LANCZOS,
|
|
)
|
|
|
|
mask_for_orig_size = mask_img.resize(
|
|
original_img.size,
|
|
resample=Image.Resampling.LANCZOS,
|
|
)
|
|
log_img(mask_for_orig_size, "mask for original image size")
|
|
|
|
rebuilt_orig_img = Image.composite(
|
|
original_img,
|
|
generated_img,
|
|
mask_for_orig_size,
|
|
)
|
|
return rebuilt_orig_img
|
|
|
|
|
|
def calc_scale_to_fit_within(height: int, width: int, max_size) -> float:
|
|
max_width, max_height = normalize_image_size(max_size)
|
|
if width <= max_width and height <= max_height:
|
|
return 1
|
|
|
|
width_ratio = max_width / width
|
|
height_ratio = max_height / height
|
|
|
|
return min(width_ratio, height_ratio)
|