imaginAIry/imaginairy/img_utils.py

68 lines
2.3 KiB
Python
Raw Normal View History

from typing import Sequence
import numpy as np
import PIL
import torch
from einops import rearrange, repeat
from PIL import Image
from imaginairy.utils import get_device
def pillow_fit_image_within(image: PIL.Image.Image, max_height=512, max_width=512):
image = image.convert("RGB")
w, h = image.size
resize_ratio = 1
if w > max_width or h > max_height:
resize_ratio = min(max_width / w, max_height / h)
elif w < max_width and h < max_height:
# it's smaller than our target image, enlarge
resize_ratio = max(max_width / w, max_height / h)
if resize_ratio != 1:
w, h = int(w * resize_ratio), int(h * resize_ratio)
w, h = map(lambda x: x - x % 64, (w, h)) # resize to integer multiple of 64
if (w, h) != image.size:
image = image.resize((w, h), resample=Image.Resampling.LANCZOS)
2022-09-24 07:29:45 +00:00
return image
def pillow_img_to_torch_image(img: PIL.Image.Image):
img = img.convert("RGB")
img = np.array(img).astype(np.float32) / 255.0
img = img[None].transpose(0, 3, 1, 2)
img = torch.from_numpy(img)
return 2.0 * img - 1.0
def pillow_img_to_opencv_img(img: PIL.Image.Image):
open_cv_image = np.array(img)
# Convert RGB to BGR
open_cv_image = open_cv_image[:, :, ::-1].copy()
return open_cv_image
def model_latents_to_pillow_imgs(latents: torch.Tensor) -> Sequence[PIL.Image.Image]:
from imaginairy.model_manager import get_current_diffusion_model # noqa
model = get_current_diffusion_model()
latents = model.decode_first_stage(latents)
latents = torch.clamp((latents + 1.0) / 2.0, min=0.0, max=1.0)
imgs = []
for latent in latents:
latent = 255.0 * rearrange(latent.cpu().numpy(), "c h w -> h w c")
img = Image.fromarray(latent.astype(np.uint8))
imgs.append(img)
return imgs
def pillow_img_to_model_latent(model, img, batch_size=1, half=True):
# init_image = pil_img_to_torch(img, half=half).to(device)
init_image = pillow_img_to_torch_image(img).to(get_device())
init_image = repeat(init_image, "1 ... -> b ...", b=batch_size)
if half:
return model.get_first_stage_encoding(
model.encode_first_stage(init_image.half())
)
return model.get_first_stage_encoding(model.encode_first_stage(init_image))