feature: add colorization controlnet. improve `aimg colorize` command

1 year ago · dc8f8d5a3d
parent 7e62297f73
commit dc8f8d5a3d
8 changed files with 103 additions and 27 deletions
--- a/README.md
+++ b/README.md
@ -130,6 +130,21 @@ imagine --control-image "assets/wishbone.jpg" --control-mode details "sharp focu
 </p>


+### Image (re)Colorization (using brightness control)
+Colorize black and white images or re-color existing images.
+
+The generated colors will be applied back to the original image. You can either provide a caption or 
+allow the tool to generate one for you.
+
+```bash
+aimg colorize pearl-girl.jpg --caption "photo of a woman"
+```
+<p float="left">
+    <img src="assets/girl_with_a_pearl_earring.jpg" height="256">
+    <img src="assets/pearl-gray.jpg" height="256">
+    <img src="assets/pearl-recolor-a.jpg" height="256">
+</p>
+
 ###  Instruction based image edits [by InstructPix2Pix](https://github.com/timothybrooks/instruct-pix2pix)
 Just tell imaginairy how to edit the image and it will do it for you!
 <p float="left">
@ -468,10 +483,12 @@ A: The AI models are cached in `~/.cache/` (or `HUGGINGFACE_HUB_CACHE`). To dele

 ## ChangeLog

+**13.0.0**
+- 🎉 feature: multi-controlnet support. pass in multiple `--control-mode`, `--control-image`, and `--control-image-raw` arguments.
+- 🎉 feature: "better" memory management. If GPU is full, least-recently-used model is moved to RAM.
+- feature: add colorization controlnet. improve `aimg colorize` command
 - feature: [disabled] inpainting controlnet can be used instead of finetuned inpainting model
  - The inpainting controlnet doesn't work as well as the finetuned model
- feature: multi-controlnet support. pass in multiple `--control-mode`, `--control-image`, and `--control-image-raw` arguments.
- feature: "better" memory management. If GPU is full, least-recently-used model is moved to RAM.
 - feature: python interface allows configuration of controlnet strength
 - fix: hide the "triton" error messages
 - feature: show full stack trace on error in cli
--- a/assets/pearl-gray.jpg
+++ b/assets/pearl-gray.jpg
--- a/assets/pearl-recolor-a.jpg
+++ b/assets/pearl-recolor-a.jpg
--- a/imaginairy/cli/colorize.py
+++ b/imaginairy/cli/colorize.py
@ -21,8 +21,14 @@ logger = logging.getLogger(__name__)
    type=int,
    help="How many times to repeat the renders. If you provide two prompts and --repeat=3 then six images will be generated.",
 )
+@click.option(
+    "--caption",
+    default="",
+    show_default=False,
+    help="Description of the photo. If not provided, it will be generated automatically.",
+)
@click.command("colorize")
-def colorize_cmd(image_filepaths, outdir, repeats):
+def colorize_cmd(image_filepaths, outdir, repeats, caption):
    """
    Colorize images using AI. Doesn't work very well yet.
    """
@ -52,6 +58,6 @@ def colorize_cmd(image_filepaths, outdir, repeats):
                img = LazyLoadingImage(filepath=p)
            logger.info(f"Colorizing {p} and saving it to {savepath}")

-            img = colorize_img(img)
+            img = colorize_img(img, caption=caption)

            img.save(savepath)
--- a/imaginairy/cli/imagine.py
+++ b/imaginairy/cli/imagine.py
@ -48,6 +48,7 @@ from imaginairy.cli.shared import _imagine_cmd, add_options, common_options
            "edit",
            "inpaint",
            "details",
+            "colorize",
        ]
    ),
    help="how the control image is used as signal",
--- a/imaginairy/colorize.py
+++ b/imaginairy/colorize.py
@ -1,38 +1,48 @@
-from PIL import Image
+import logging
+
+from PIL import Image, ImageEnhance, ImageStat

 from imaginairy import ImaginePrompt, imagine
 from imaginairy.enhancers.describe_image_blip import generate_caption
 from imaginairy.schema import ControlNetInput

+logger = logging.getLogger(__name__)

-def colorize_img(img):
-    caption = generate_caption(img)
-    caption = caption.replace("black and white", "color")
-    control_input = ControlNetInput(mode="hed", image=img)
-    prompt = ImaginePrompt(
-        prompt=caption,
-        init_image=img,
-        init_image_strength=0.01,
-        control_inputs=[control_input],
-        negative_prompt="black and white",
-        # width=img.width,
-        # height=img.height,
-    )
-    result = list(imagine(prompt))[0]
-    colorized_img = replace_color(img, result.images["generated"])

+def colorize_img(img, max_width=1024, max_height=1024, caption=None):
+    if not caption:
+        caption = generate_caption(img, min_length=10)
+        caption = caption.replace("black and white", "color")
+        caption = caption.replace("old picture", "professional color photo")
+        caption = caption.replace("vintage photograph", "professional color photo")
+        caption = caption.replace("old photo", "professional color photo")
+        caption = caption.replace("vintage photo", "professional color photo")
+        caption = caption.replace("old color", "color")
+        caption = caption.replace(" old fashioned ", " ")
+        caption = caption.replace(" old time ", " ")
+        caption = caption.replace(" old ", " ")
+        logger.info(caption)
+    control_inputs = [
+        ControlNetInput(mode="colorize", image=img, strength=2),
+    ]
+    prompt_add = ". color photo, sharp-focus, highly detailed, intricate, Canon 5D"
    prompt = ImaginePrompt(
-        prompt=caption,
-        init_image=colorized_img,
-        init_image_strength=0.1,
-        control_inputs=[control_input],
-        negative_prompt="black and white",
-        width=min(img.width, 1024),
-        height=min(img.height, 1024),
+        prompt=f"{caption}{prompt_add}",
+        init_image=img,
+        init_image_strength=0.0,
+        control_inputs=control_inputs,
+        width=min(img.width, max_width),
+        height=min(img.height, max_height),
        steps=30,
+        prompt_strength=12,
    )
    result = list(imagine(prompt))[0]
    colorized_img = replace_color(img, result.images["generated"])
+
+    # allows the algorithm some leeway for the overall brightness of the image
+    # results look better with this
+    colorized_img = match_brightness(colorized_img, result.images["generated"])
+
    return colorized_img


@ -43,3 +53,19 @@ def replace_color(target_img, color_src_img):
    hue, saturation, _ = color_src_img.convert("HSV").split()

    return Image.merge("HSV", (hue, saturation, value)).convert("RGB")
+
+
+def calculate_brightness(image):
+    greyscale_image = image.convert("L")
+    stat = ImageStat.Stat(greyscale_image)
+    return stat.mean[0]
+
+
+def match_brightness(target_img, source_img):
+    target_brightness = calculate_brightness(target_img)
+    source_brightness = calculate_brightness(source_img)
+
+    brightness_factor = source_brightness / target_brightness
+
+    enhancer = ImageEnhance.Brightness(target_img)
+    return enhancer.enhance(brightness_factor)
--- a/imaginairy/config.py
+++ b/imaginairy/config.py
@ -236,6 +236,13 @@ CONTROLNET_CONFIGS = [
        weights_url="https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/69fc48b9cbd98661f6d0288dc59b59a5ccb32a6b/control_v11f1e_sd15_tile.pth",
        alias="details",
    ),
+    ControlNetConfig(
+        short_name="colorize15",
+        control_type="colorize",
+        config_path="configs/control-net-v15.yaml",
+        weights_url="https://huggingface.co/ioclab/ioc-controlnet/resolve/87b589ef8aa80c37be814fffc31203853a2928c1/models/control_v1p_sd15_brightness.safetensors",
+        alias="colorize",
+    ),
 ]

 CONTROLNET_CONFIG_SHORTCUTS = {m.short_name: m for m in CONTROLNET_CONFIGS}
--- a/imaginairy/img_processors/control_modes.py
+++ b/imaginairy/img_processors/control_modes.py
@ -207,6 +207,24 @@ def inpaint_prep(mask_image_t, target_image_t):
    return output_image_t


+def to_grayscale(img):
+    # The dimensions of input should be (batch_size, channels, height, width)
+    assert img.dim() == 4 and img.size(1) == 3
+
+    # Apply the formula to convert to grayscale.
+    gray = (
+        0.2989 * img[:, 0, :, :] + 0.5870 * img[:, 1, :, :] + 0.1140 * img[:, 2, :, :]
+    )
+
+    # Expand the dimensions so it's a 1-channel image.
+    gray = gray.unsqueeze(1)
+
+    # Duplicate the single channel to have 3 identical channels
+    gray_3_channels = gray.repeat(1, 3, 1, 1)
+
+    return (gray_3_channels + 1.0) / 2.0
+
+
 def noop(img):
    return (img + 1.0) / 2.0

@ -223,4 +241,5 @@ CONTROL_MODES = {
    "edit": noop,
    "inpaint": inpaint_prep,
    "details": noop,
+    "colorize": to_grayscale,
 }