imaginAIry/imaginairy/img_utils.py

"""
image utils.

Library format cheat sheet:

Library     Dim Order       Channel Order       Value Range     Type
Pillow                      R, G, B, A          0-255           PIL.Image.Image
OpenCV                      B, G, R, A          0-255           np.ndarray
Torch       (B), C, H, W    R, G, B             -1.0-1.0        torch.Tensor

"""
from typing import Sequence

import numpy as np
import PIL
import torch
from einops import rearrange, repeat
from PIL import Image, ImageDraw, ImageFont

from imaginairy.paths import PKG_ROOT
from imaginairy.schema import LazyLoadingImage
from imaginairy.utils import get_device


def pillow_fit_image_within(
    image: PIL.Image.Image, max_height=512, max_width=512, convert="RGB", snap_size=8
):
    image = image.convert(convert)
    w, h = image.size
    resize_ratio = 1
    if w > max_width or h > max_height:
        resize_ratio = min(max_width / w, max_height / h)
    elif w < max_width and h < max_height:
        # it's smaller than our target image, enlarge
        resize_ratio = max(max_width / w, max_height / h)

    if resize_ratio != 1:
        w, h = int(w * resize_ratio), int(h * resize_ratio)
    # resize to integer multiple of snap_size
    w -= w % snap_size
    h -= h % snap_size

    if (w, h) != image.size:
        image = image.resize((w, h), resample=Image.Resampling.LANCZOS)
    return image


def pillow_img_to_torch_image(img: PIL.Image.Image, convert="RGB"):
    if convert:
        img = img.convert(convert)
    img_np = np.array(img).astype(np.float32) / 255.0
    # b, h, w, c => b, c, h, w
    img_np = img_np[None].transpose(0, 3, 1, 2)
    img_t = torch.from_numpy(img_np)
    return 2.0 * img_t - 1.0


def pillow_mask_to_latent_mask(mask_img: PIL.Image.Image, downsampling_factor):
    mask_img = mask_img.resize(
        (
            mask_img.width // downsampling_factor,
            mask_img.height // downsampling_factor,
        ),
        resample=Image.Resampling.LANCZOS,
    )

    mask = np.array(mask_img).astype(np.float32) / 255.0
    mask = mask[None, None]
    mask = torch.from_numpy(mask)
    return mask


def pillow_img_to_opencv_img(img: PIL.Image.Image):
    open_cv_image = np.array(img)
    # Convert RGB to BGR
    open_cv_image = open_cv_image[:, :, ::-1].copy()
    return open_cv_image


def torch_image_to_openvcv_img(img: torch.Tensor) -> np.ndarray:
    img = (img + 1) / 2
    img_np = img.detach().cpu().numpy()
    # assert there is only one image
    assert img_np.shape[0] == 1
    img_np = img_np[0]
    img_np = img_np.transpose(1, 2, 0)
    img_np = (img_np * 255).astype(np.uint8)
    # RGB to BGR
    img_np = img_np[:, :, ::-1]
    return img_np


def torch_img_to_pillow_img(img_t: torch.Tensor):
    img_t = img_t.to(torch.float32).detach().cpu()
    if len(img_t.shape) == 3:
        img_t = img_t.unsqueeze(0)
    if img_t.shape[0] != 1:
        raise ValueError("Only batch size 1 supported")
    if img_t.shape[1] == 1:
        colorspace = "L"
    elif img_t.shape[1] == 3:
        colorspace = "RGB"
    else:
        msg = (
            f"Unsupported colorspace. {img_t.shape[1]} channels in {img_t.shape} shape"
        )
        raise ValueError(msg)
    img_t = rearrange(img_t, "b c h w -> b h w c")
    img_t = torch.clamp((img_t + 1.0) / 2.0, min=0.0, max=1.0)
    img_np = (255.0 * img_t).cpu().numpy().astype(np.uint8)[0]
    if colorspace == "L":
        img_np = img_np[:, :, 0]
    return Image.fromarray(img_np, colorspace)


def model_latent_to_pillow_img(latent: torch.Tensor) -> PIL.Image.Image:
    from imaginairy.model_manager import get_current_diffusion_model

    if len(latent.shape) == 3:
        latent = latent.unsqueeze(0)
    if latent.shape[0] != 1:
        raise ValueError("Only batch size 1 supported")
    model = get_current_diffusion_model()
    img_t = model.lda.decode(latent)
    return torch_img_to_pillow_img(img_t)


def model_latents_to_pillow_imgs(latents: torch.Tensor) -> Sequence[PIL.Image.Image]:
    return [model_latent_to_pillow_img(latent) for latent in latents]


def pillow_img_to_model_latent(model, img, batch_size=1, half=True):
    init_image = pillow_img_to_torch_image(img).to(get_device())
    init_image = repeat(init_image, "1 ... -> b ...", b=batch_size)
    if half:
        return model.get_first_stage_encoding(
            model.encode_first_stage(init_image.half())
        )
    return model.get_first_stage_encoding(model.encode_first_stage(init_image))


def imgpaths_to_imgs(imgpaths):
    imgs = []
    for imgpath in imgpaths:
        if isinstance(imgpath, str):
            img = LazyLoadingImage(filepath=imgpath)
            imgs.append(img)
        else:
            imgs.append(imgpath)

    return imgs


def add_caption_to_image(
    img, caption, font_size=16, font_path=f"{PKG_ROOT}/data/DejaVuSans.ttf"
):
    draw = ImageDraw.Draw(img)

    font = ImageFont.truetype(font_path, font_size)

    x = 15
    y = img.height - 15 - font_size

    draw.text(
        (x, y),
        caption,
        font=font,
        fill=(255, 255, 255),
        stroke_width=3,
        stroke_fill=(0, 0, 0),
    )
feature: controlnet 2023-02-12 07:42:19 +00:00			`"""`
			`image utils.`

			`Library format cheat sheet:`

			`Library Dim Order Channel Order Value Range Type`
			`Pillow R, G, B, A 0-255 PIL.Image.Image`
			`OpenCV B, G, R, A 0-255 np.ndarray`
			`Torch (B), C, H, W R, G, B -1.0-1.0 torch.Tensor`

			`"""`
feature: boolean logic masks Specify advanced text based masks using boolean logic and strength modifiers. Mask descriptions must be lowercase. Keywords uppercase. Valid symbols: `AND`, `OR`, `NOT`, `()`, and mask strength modifier `{1.5}` where `+` can be any of `+ - /`. Single-character boolean operators also work. When writing strength modifies know that pixel values are between 0 and 1. - feature: apply mask edits to original files - feature: auto-rotate images if exif data specifies to do so - fix: accept mask images in command line 2022-09-24 05:58:48 +00:00			`from typing import Sequence`

			`import numpy as np`
			`import PIL`
			`import torch`
			`from einops import rearrange, repeat`
feature: add compilation animations (#224) - add generation/compare gifs 2023-01-29 01:16:47 +00:00			`from PIL import Image, ImageDraw, ImageFont`
feature: boolean logic masks Specify advanced text based masks using boolean logic and strength modifiers. Mask descriptions must be lowercase. Keywords uppercase. Valid symbols: `AND`, `OR`, `NOT`, `()`, and mask strength modifier `{1.5}` where `+` can be any of `+ - /`. Single-character boolean operators also work. When writing strength modifies know that pixel values are between 0 and 1. - feature: apply mask edits to original files - feature: auto-rotate images if exif data specifies to do so - fix: accept mask images in command line 2022-09-24 05:58:48 +00:00
feature: add compilation animations (#224) - add generation/compare gifs 2023-01-29 01:16:47 +00:00			`from imaginairy.paths import PKG_ROOT`
			`from imaginairy.schema import LazyLoadingImage`
feature: boolean logic masks Specify advanced text based masks using boolean logic and strength modifiers. Mask descriptions must be lowercase. Keywords uppercase. Valid symbols: `AND`, `OR`, `NOT`, `()`, and mask strength modifier `{1.5}` where `+` can be any of `+ - /`. Single-character boolean operators also work. When writing strength modifies know that pixel values are between 0 and 1. - feature: apply mask edits to original files - feature: auto-rotate images if exif data specifies to do so - fix: accept mask images in command line 2022-09-24 05:58:48 +00:00			`from imaginairy.utils import get_device`


feature: finetuning - feature: finetuning your own image models - feature: image prep command. crops to face or other interesting parts of photo - fix: back-compat for hf_hub_download - feature: add prune-ckpt command - feature: allow specification of model config file 2023-01-01 22:54:49 +00:00			`def pillow_fit_image_within(`
feature: image sizes can now be multiples of 8 instead of 64 from https://github.com/CompVis/stable-diffusion/issues/60#issuecomment-1240294667 2023-01-24 06:25:56 +00:00			`image: PIL.Image.Image, max_height=512, max_width=512, convert="RGB", snap_size=8`
feature: finetuning - feature: finetuning your own image models - feature: image prep command. crops to face or other interesting parts of photo - fix: back-compat for hf_hub_download - feature: add prune-ckpt command - feature: allow specification of model config file 2023-01-01 22:54:49 +00:00			`):`
			`image = image.convert(convert)`
feature: boolean logic masks Specify advanced text based masks using boolean logic and strength modifiers. Mask descriptions must be lowercase. Keywords uppercase. Valid symbols: `AND`, `OR`, `NOT`, `()`, and mask strength modifier `{1.5}` where `+` can be any of `+ - /`. Single-character boolean operators also work. When writing strength modifies know that pixel values are between 0 and 1. - feature: apply mask edits to original files - feature: auto-rotate images if exif data specifies to do so - fix: accept mask images in command line 2022-09-24 05:58:48 +00:00			`w, h = image.size`
fix: handle small input images If input images didn't need resizing because they were already smaller than max width/height then they didn't get normalized to a multiple of 64. This caused an exception like the following: ```Sizes of tensors must match except in dimension 1. Expected size 4 but got size 3 for tensor number 1 in the list. ``` 2022-10-06 06:13:48 +00:00			`resize_ratio = 1`
fix: masking now works properly at strengths 0 and 1 2022-09-24 21:41:25 +00:00			`if w > max_width or h > max_height:`
			`resize_ratio = min(max_width / w, max_height / h)`
fix: handle small input images If input images didn't need resizing because they were already smaller than max width/height then they didn't get normalized to a multiple of 64. This caused an exception like the following: ```Sizes of tensors must match except in dimension 1. Expected size 4 but got size 3 for tensor number 1 in the list. ``` 2022-10-06 06:13:48 +00:00			`elif w < max_width and h < max_height:`
			`# it's smaller than our target image, enlarge`
			`resize_ratio = max(max_width / w, max_height / h)`

			`if resize_ratio != 1:`
fix: masking now works properly at strengths 0 and 1 2022-09-24 21:41:25 +00:00			`w, h = int(w * resize_ratio), int(h * resize_ratio)`
feature: image sizes can now be multiples of 8 instead of 64 from https://github.com/CompVis/stable-diffusion/issues/60#issuecomment-1240294667 2023-01-24 06:25:56 +00:00			`# resize to integer multiple of snap_size`
			`w -= w % snap_size`
			`h -= h % snap_size`
lint: new ruff linter 2023-01-02 04:14:22 +00:00
fix: handle small input images If input images didn't need resizing because they were already smaller than max width/height then they didn't get normalized to a multiple of 64. This caused an exception like the following: ```Sizes of tensors must match except in dimension 1. Expected size 4 but got size 3 for tensor number 1 in the list. ``` 2022-10-06 06:13:48 +00:00			`if (w, h) != image.size:`
fix: masking now works properly at strengths 0 and 1 2022-09-24 21:41:25 +00:00			`image = image.resize((w, h), resample=Image.Resampling.LANCZOS)`
style: fix lint issues 2022-09-24 07:29:45 +00:00			`return image`
feature: boolean logic masks Specify advanced text based masks using boolean logic and strength modifiers. Mask descriptions must be lowercase. Keywords uppercase. Valid symbols: `AND`, `OR`, `NOT`, `()`, and mask strength modifier `{1.5}` where `+` can be any of `+ - /`. Single-character boolean operators also work. When writing strength modifies know that pixel values are between 0 and 1. - feature: apply mask edits to original files - feature: auto-rotate images if exif data specifies to do so - fix: accept mask images in command line 2022-09-24 05:58:48 +00:00

refactor: cleanup image generation code 2023-02-15 16:02:36 +00:00			`def pillow_img_to_torch_image(img: PIL.Image.Image, convert="RGB"):`
			`if convert:`
			`img = img.convert(convert)`
ci: add type checker fix some typehint issues 2023-12-11 02:29:47 +00:00			`img_np = np.array(img).astype(np.float32) / 255.0`
feature: controlnet 2023-02-12 07:42:19 +00:00			`# b, h, w, c => b, c, h, w`
ci: add type checker fix some typehint issues 2023-12-11 02:29:47 +00:00			`img_np = img_np[None].transpose(0, 3, 1, 2)`
			`img_t = torch.from_numpy(img_np)`
			`return 2.0 * img_t - 1.0`
feature: boolean logic masks Specify advanced text based masks using boolean logic and strength modifiers. Mask descriptions must be lowercase. Keywords uppercase. Valid symbols: `AND`, `OR`, `NOT`, `()`, and mask strength modifier `{1.5}` where `+` can be any of `+ - /`. Single-character boolean operators also work. When writing strength modifies know that pixel values are between 0 and 1. - feature: apply mask edits to original files - feature: auto-rotate images if exif data specifies to do so - fix: accept mask images in command line 2022-09-24 05:58:48 +00:00

refactor: cleanup image generation code 2023-02-15 16:02:36 +00:00			`def pillow_mask_to_latent_mask(mask_img: PIL.Image.Image, downsampling_factor):`
			`mask_img = mask_img.resize(`
			`(`
			`mask_img.width // downsampling_factor,`
			`mask_img.height // downsampling_factor,`
			`),`
			`resample=Image.Resampling.LANCZOS,`
			`)`

			`mask = np.array(mask_img).astype(np.float32) / 255.0`
			`mask = mask[None, None]`
			`mask = torch.from_numpy(mask)`
			`return mask`
feature: sliced latent decoding allows generation of bigger images. tile seams can be noticeable occasionally despite the feathering 2023-02-12 08:52:50 +00:00

feature: boolean logic masks Specify advanced text based masks using boolean logic and strength modifiers. Mask descriptions must be lowercase. Keywords uppercase. Valid symbols: `AND`, `OR`, `NOT`, `()`, and mask strength modifier `{1.5}` where `+` can be any of `+ - /`. Single-character boolean operators also work. When writing strength modifies know that pixel values are between 0 and 1. - feature: apply mask edits to original files - feature: auto-rotate images if exif data specifies to do so - fix: accept mask images in command line 2022-09-24 05:58:48 +00:00			`def pillow_img_to_opencv_img(img: PIL.Image.Image):`
			`open_cv_image = np.array(img)`
			`# Convert RGB to BGR`
			`open_cv_image = open_cv_image[:, :, ::-1].copy()`
			`return open_cv_image`


ci: add type checker fix some typehint issues 2023-12-11 02:29:47 +00:00			`def torch_image_to_openvcv_img(img: torch.Tensor) -> np.ndarray:`
feature: controlnet 2023-02-12 07:42:19 +00:00			`img = (img + 1) / 2`
ci: add type checker fix some typehint issues 2023-12-11 02:29:47 +00:00			`img_np = img.detach().cpu().numpy()`
feature: controlnet 2023-02-12 07:42:19 +00:00			`# assert there is only one image`
ci: add type checker fix some typehint issues 2023-12-11 02:29:47 +00:00			`assert img_np.shape[0] == 1`
			`img_np = img_np[0]`
			`img_np = img_np.transpose(1, 2, 0)`
			`img_np = (img_np * 255).astype(np.uint8)`
feature: controlnet 2023-02-12 07:42:19 +00:00			`# RGB to BGR`
ci: add type checker fix some typehint issues 2023-12-11 02:29:47 +00:00			`img_np = img_np[:, :, ::-1]`
			`return img_np`
feature: controlnet 2023-02-12 07:42:19 +00:00

refactor: cleanup image generation code 2023-02-15 16:02:36 +00:00			`def torch_img_to_pillow_img(img_t: torch.Tensor):`
fix: images came out wrong if processed on mps 2023-02-15 20:43:19 +00:00			`img_t = img_t.to(torch.float32).detach().cpu()`
refactor: cleanup image generation code 2023-02-15 16:02:36 +00:00			`if len(img_t.shape) == 3:`
			`img_t = img_t.unsqueeze(0)`
			`if img_t.shape[0] != 1:`
			`raise ValueError("Only batch size 1 supported")`
			`if img_t.shape[1] == 1:`
			`colorspace = "L"`
			`elif img_t.shape[1] == 3:`
			`colorspace = "RGB"`
			`else:`
style: speed up linting and autoformatting. fix lints 2023-09-29 08:13:50 +00:00			`msg = (`
feature: generate large images Added a composition stage so large images are more coherent 2023-02-12 02:23:45 +00:00			`f"Unsupported colorspace. {img_t.shape[1]} channels in {img_t.shape} shape"`
			`)`
style: speed up linting and autoformatting. fix lints 2023-09-29 08:13:50 +00:00			`raise ValueError(msg)`
refactor: cleanup image generation code 2023-02-15 16:02:36 +00:00			`img_t = rearrange(img_t, "b c h w -> b h w c")`
			`img_t = torch.clamp((img_t + 1.0) / 2.0, min=0.0, max=1.0)`
			`img_np = (255.0 * img_t).cpu().numpy().astype(np.uint8)[0]`
			`if colorspace == "L":`
			`img_np = img_np[:, :, 0]`
			`return Image.fromarray(img_np, colorspace)`


			`def model_latent_to_pillow_img(latent: torch.Tensor) -> PIL.Image.Image:`
style: speed up linting and autoformatting. fix lints 2023-09-29 08:13:50 +00:00			`from imaginairy.model_manager import get_current_diffusion_model`
feature: boolean logic masks Specify advanced text based masks using boolean logic and strength modifiers. Mask descriptions must be lowercase. Keywords uppercase. Valid symbols: `AND`, `OR`, `NOT`, `()`, and mask strength modifier `{1.5}` where `+` can be any of `+ - /`. Single-character boolean operators also work. When writing strength modifies know that pixel values are between 0 and 1. - feature: apply mask edits to original files - feature: auto-rotate images if exif data specifies to do so - fix: accept mask images in command line 2022-09-24 05:58:48 +00:00
refactor: cleanup image generation code 2023-02-15 16:02:36 +00:00			`if len(latent.shape) == 3:`
			`latent = latent.unsqueeze(0)`
			`if latent.shape[0] != 1:`
			`raise ValueError("Only batch size 1 supported")`
feature: inpainting model support; improved model manager 2022-10-23 21:46:45 +00:00			`model = get_current_diffusion_model()`
feature: use refiners library for generation BREAKING CHANGE - stable diffusion 1.5 + inpainting working - self-attention guidance working. improves image generation quality - tile-mode working - inpainting self-attention guidance working disable/broken features: - sd 1.4, 2.0, 2.1 - most of the samplers - pix2pix edit - most of the controlnets - memory management - python 3.8 support wip 2023-11-16 03:46:56 +00:00			`img_t = model.lda.decode(latent)`
refactor: cleanup image generation code 2023-02-15 16:02:36 +00:00			`return torch_img_to_pillow_img(img_t)`


			`def model_latents_to_pillow_imgs(latents: torch.Tensor) -> Sequence[PIL.Image.Image]:`
			`return [model_latent_to_pillow_img(latent) for latent in latents]`
feature: boolean logic masks Specify advanced text based masks using boolean logic and strength modifiers. Mask descriptions must be lowercase. Keywords uppercase. Valid symbols: `AND`, `OR`, `NOT`, `()`, and mask strength modifier `{1.5}` where `+` can be any of `+ - /`. Single-character boolean operators also work. When writing strength modifies know that pixel values are between 0 and 1. - feature: apply mask edits to original files - feature: auto-rotate images if exif data specifies to do so - fix: accept mask images in command line 2022-09-24 05:58:48 +00:00

			`def pillow_img_to_model_latent(model, img, batch_size=1, half=True):`
			`init_image = pillow_img_to_torch_image(img).to(get_device())`
			`init_image = repeat(init_image, "1 ... -> b ...", b=batch_size)`
			`if half:`
			`return model.get_first_stage_encoding(`
			`model.encode_first_stage(init_image.half())`
			`)`
			`return model.get_first_stage_encoding(model.encode_first_stage(init_image))`
feature: image edit gifs and demo-reels 2023-01-22 01:36:47 +00:00

feature: add compilation animations (#224) - add generation/compare gifs 2023-01-29 01:16:47 +00:00			`def imgpaths_to_imgs(imgpaths):`
			`imgs = []`
			`for imgpath in imgpaths:`
			`if isinstance(imgpath, str):`
			`img = LazyLoadingImage(filepath=imgpath)`
			`imgs.append(img)`
			`else:`
			`imgs.append(imgpath)`

			`return imgs`


			`def add_caption_to_image(`
			`img, caption, font_size=16, font_path=f"{PKG_ROOT}/data/DejaVuSans.ttf"`
			`):`
			`draw = ImageDraw.Draw(img)`

			`font = ImageFont.truetype(font_path, font_size)`

			`x = 15`
			`y = img.height - 15 - font_size`
feature: image edit gifs and demo-reels 2023-01-22 01:36:47 +00:00
feature: add compilation animations (#224) - add generation/compare gifs 2023-01-29 01:16:47 +00:00			`draw.text(`
			`(x, y),`
			`caption,`
			`font=font,`
			`fill=(255, 255, 255),`
			`stroke_width=3,`
			`stroke_fill=(0, 0, 0),`
feature: image edit gifs and demo-reels 2023-01-22 01:36:47 +00:00			`)`