imaginAIry/imaginairy/training_tools/image_prep.py

"""Functions for image preprocessing and generation"""

import logging
import os
import os.path
import re

from PIL import Image
from tqdm import tqdm

from imaginairy.api import imagine
from imaginairy.enhancers.face_restoration_codeformer import enhance_faces
from imaginairy.enhancers.facecrop import detect_faces, generate_face_crops
from imaginairy.enhancers.upscale_realesrgan import upscale_image
from imaginairy.schema import ImaginePrompt, LazyLoadingImage
from imaginairy.vendored.smart_crop import SmartCrop

logger = logging.getLogger(__name__)


def get_image_filenames(folder):
    filenames = []
    for filename in os.listdir(folder):
        if not filename.lower().endswith((".jpg", ".jpeg", ".png")):
            continue
        if filename.startswith("."):
            continue
        filenames.append(filename)
    return filenames


def prep_images(
    images_dir, is_person=False, output_folder_name="prepped-images", target_size=512
):
    """
    Crops and resizes a directory of images in preparation for training.

    If is_person=True, it will detect the face and produces several crops at different zoom levels. For crops that
    are too small, it will use the face restoration model to enhance the faces.

    For non-person images, it will use the smartcrop algorithm to crop the image to the most interesting part. If the
    input image is too small it will be upscaled.

    Prep will go a lot faster if all the images are big enough to not require upscaling.

    """
    output_folder = os.path.join(images_dir, output_folder_name)
    os.makedirs(output_folder, exist_ok=True)
    logger.info(f"Prepping images in {images_dir} to {output_folder}")
    image_filenames = get_image_filenames(images_dir)
    pbar = tqdm(image_filenames)
    for filename in pbar:
        pbar.set_description(filename)

        input_path = os.path.join(images_dir, filename)
        img = LazyLoadingImage(filepath=input_path).convert("RGB")
        if is_person:
            face_rois = detect_faces(img)
            if len(face_rois) == 0:
                logger.info(f"No faces detected in image {filename}, skipping")
                continue
            if len(face_rois) > 1:
                logger.info(f"Multiple faces detected in image {filename}, skipping")
                continue
            face_roi = face_rois[0]
            face_roi_crops = generate_face_crops(
                face_roi, max_width=img.width, max_height=img.height
            )
            for n, face_roi_crop in enumerate(face_roi_crops):
                cropped_output_path = os.path.join(
                    output_folder, f"{filename}_[alt-{n:02d}].jpg"
                )
                if os.path.exists(cropped_output_path):
                    logger.debug(
                        f"Skipping {cropped_output_path} because it already exists"
                    )
                    continue
                x1, y1, x2, y2 = face_roi_crop
                crop_width = x2 - x1
                crop_height = y2 - y1
                if crop_width != crop_height:
                    logger.info(
                        f"Face ROI crop for {filename} {crop_width}x{crop_height} is not square, skipping"
                    )
                    continue
                cropped_img = img.crop(face_roi_crop)

                if crop_width < target_size:
                    logger.info(f"Upscaling {filename} {face_roi_crop}")
                    cropped_img = cropped_img.resize(
                        (target_size, target_size), resample=Image.Resampling.LANCZOS
                    )
                    cropped_img = enhance_faces(cropped_img, fidelity=1)
                else:
                    cropped_img = cropped_img.resize(
                        (target_size, target_size), resample=Image.Resampling.LANCZOS
                    )
                cropped_img.save(cropped_output_path, quality=95)
        else:
            # scale image so that largest dimension is target_size
            n = 0
            cropped_output_path = os.path.join(output_folder, f"{filename}_{n}.jpg")
            if os.path.exists(cropped_output_path):
                logger.debug(
                    f"Skipping {cropped_output_path} because it already exists"
                )
                continue
            if img.width < target_size or img.height < target_size:
                # upscale the image if it's too small
                logger.info(f"Upscaling {filename}")
                img = upscale_image(img)

            if img.width > img.height:
                scale_factor = target_size / img.height
            else:
                scale_factor = target_size / img.width

            # downscale so shortest side is target_size
            new_width = int(round(img.width * scale_factor))
            new_height = int(round(img.height * scale_factor))
            img = img.resize((new_width, new_height), resample=Image.Resampling.LANCZOS)

            result = SmartCrop().crop(img, width=target_size, height=target_size)

            box = (
                result["top_crop"]["x"],
                result["top_crop"]["y"],
                result["top_crop"]["width"] + result["top_crop"]["x"],
                result["top_crop"]["height"] + result["top_crop"]["y"],
            )

            cropped_image = img.crop(box)
            cropped_image.save(cropped_output_path, quality=95)
    logger.info(f"Image Prep complete. Review output at {output_folder}")


def prompt_normalized(prompt):
    return re.sub(r"[^a-zA-Z0-9.,\[\]-]+", "-", prompt)[:130]


def create_class_images(class_description, output_folder, num_images=200):
    """
    Generate images of class_description.
    """
    existing_images = get_image_filenames(output_folder)
    existing_image_count = len(existing_images)
    class_slug = prompt_normalized(class_description)

    while existing_image_count < num_images:
        prompt = ImaginePrompt(class_description, steps=20)
        result = next(iter(imagine([prompt])))
        if result.is_nsfw:
            continue
        dest = os.path.join(
            output_folder, f"{existing_image_count:03d}_{class_slug}.jpg"
        )
        result.save(dest)
        existing_image_count += 1