feature: adds ability to use qrcode

feature: adds controlnet qrcode image generation. feature: adds control net for qrcode image generation.
6 months ago · 3f3e080d39
parent 62de446a92
commit 3f3e080d39
8 changed files with 74 additions and 0 deletions
--- a/imaginairy/cli/imagine.py
+++ b/imaginairy/cli/imagine.py
@ -59,6 +59,7 @@ from imaginairy.cli.shared import (
            "inpaint",
            "details",
            "colorize",
+            "qrcode",
        ]
    ),
    help="how the control image is used as signal",
--- a/imaginairy/config.py
+++ b/imaginairy/config.py
@ -276,6 +276,13 @@ CONTROL_CONFIGS = [
        config_path="configs/control-net-v15.yaml",
        weights_location="https://huggingface.co/ioclab/control_v1p_sd15_brightness/resolve/8509361eb1ba89c03839040ed8c75e5f11bbd9c5/diffusion_pytorch_model.safetensors",
    ),
+    ControlConfig(
+        name="qrcode",
+        control_type="qrcode",
+        config_path="configs/control-net-v15.yaml",
+        weights_location="https://huggingface.co/monster-labs/control_v1p_sd15_qrcode_monster/resolve/4a946e610f670c4cd6cf46b8641fca190e4f56c4/diffusion_pytorch_model.safetensors",
+        aliases=["qrcode"],
+    ),
 ]

 CONTROL_CONFIG_SHORTCUTS: dict[str, ControlConfig] = {}
--- a/imaginairy/img_processors/control_modes.py
+++ b/imaginairy/img_processors/control_modes.py
@ -239,6 +239,64 @@ def noop(img: "Tensor") -> "Tensor":

 FunctionType = Union["Callable[[Tensor, Tensor], Tensor]", "Callable[[Tensor], Tensor]"]

+
+def adaptive_threshold_binarize(img: "Tensor") -> "Tensor":
+    """
+    Use adaptive thresholding to binarize the image.
+
+    Using OpenCV for adaptive thresholding as it provides robust and efficient implementation.
+    The output tensor will have values between 0 and 1.
+    """
+    import cv2
+    import numpy as np
+    import torch
+
+    from imaginairy.utils import get_device
+
+    # img = img.to("cpu")
+    # img = img.to(get_device())
+
+    if img.dim() != 4:
+        raise ValueError("Input should be a 4d tensor")
+    if img.size(1) != 3:
+        raise ValueError("Input should have 3 channels")
+
+    if not torch.all((img >= -1) & (img <= 1)):
+        raise ValueError("All tensor values must be between -1 and 1")
+
+    normalized = (img + 1) / 2
+
+    # returns img if it is already grayscale
+    if torch.allclose(
+        normalized[:, 0, :, :], normalized[:, 1, :, :]
+    ) and torch.allclose(normalized[:, 1, :, :], normalized[:, 2, :, :]):
+        return normalized
+
+    # grayscale = normalized.mean(dim=1, keepdim=True)
+    grayscale = to_grayscale(img)
+    grayscale = grayscale[:, 0:1, :, :]
+
+    grayscale_np = grayscale.squeeze(1).numpy()
+
+    blockSize = 129
+    C = 2
+    for i in range(grayscale_np.shape[0]):
+        grayscale_np[i] = cv2.adaptiveThreshold(
+            (grayscale_np[i] * 255).astype(np.uint8),
+            255,
+            cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
+            cv2.THRESH_BINARY,
+            blockSize,
+            C,
+        )
+
+    grayscale_np = grayscale_np / 255
+
+    binary = torch.from_numpy(grayscale_np).unsqueeze(1).to(get_device()).float()
+
+    return binary.repeat(1, 3, 1, 1)
+
+
 CONTROL_MODES: Dict[str, FunctionType] = {
    "canny": create_canny_edges,
    "depth": create_depth_map,
@ -252,4 +310,5 @@ CONTROL_MODES: Dict[str, FunctionType] = {
    "inpaint": inpaint_prep,
    # "details": noop,
    "colorize": to_grayscale,
+    "qrcode": adaptive_threshold_binarize,
 }
--- a/tests/data/swirl.jpeg
+++ b/tests/data/swirl.jpeg
--- a/tests/expected_output/test_control_images[qrcode-adaptive_threshold_binarize]_.png
+++ b/tests/expected_output/test_control_images[qrcode-adaptive_threshold_binarize]_.png
--- a/tests/expected_output/test_control_images[qrcode-otsu_binarize]_.png
+++ b/tests/expected_output/test_control_images[qrcode-otsu_binarize]_.png
--- a/tests/expected_output/test_controlnet[qrcode]_.png
+++ b/tests/expected_output/test_controlnet[qrcode]_.png
--- a/tests/test_api.py
+++ b/tests/test_api.py
@ -325,6 +325,13 @@ def test_controlnet(filename_base_for_outputs, control_mode):
            mode=control_mode,
            image=mask_image,
        )
+    elif control_mode == "qrcode":
+        prompt_text = "a fruit salad"
+        swirl_img = LazyLoadingImage(filepath=f"{TESTS_FOLDER}/data/swirl.jpeg")
+        control_input = ControlInput(
+            mode=control_mode,
+            image=swirl_img,
+        )

    prompt = ImaginePrompt(
        prompt_text,