feature: adds ability to use qrcode

feature: adds controlnet qrcode image generation.
feature: adds control net for qrcode image generation.
pull/414/head
jaydrennan 6 months ago committed by Bryce Drennan
parent 62de446a92
commit 3f3e080d39

@ -59,6 +59,7 @@ from imaginairy.cli.shared import (
"inpaint",
"details",
"colorize",
"qrcode",
]
),
help="how the control image is used as signal",

@ -276,6 +276,13 @@ CONTROL_CONFIGS = [
config_path="configs/control-net-v15.yaml",
weights_location="https://huggingface.co/ioclab/control_v1p_sd15_brightness/resolve/8509361eb1ba89c03839040ed8c75e5f11bbd9c5/diffusion_pytorch_model.safetensors",
),
ControlConfig(
name="qrcode",
control_type="qrcode",
config_path="configs/control-net-v15.yaml",
weights_location="https://huggingface.co/monster-labs/control_v1p_sd15_qrcode_monster/resolve/4a946e610f670c4cd6cf46b8641fca190e4f56c4/diffusion_pytorch_model.safetensors",
aliases=["qrcode"],
),
]
CONTROL_CONFIG_SHORTCUTS: dict[str, ControlConfig] = {}

@ -239,6 +239,64 @@ def noop(img: "Tensor") -> "Tensor":
FunctionType = Union["Callable[[Tensor, Tensor], Tensor]", "Callable[[Tensor], Tensor]"]
def adaptive_threshold_binarize(img: "Tensor") -> "Tensor":
"""
Use adaptive thresholding to binarize the image.
Using OpenCV for adaptive thresholding as it provides robust and efficient implementation.
The output tensor will have values between 0 and 1.
"""
import cv2
import numpy as np
import torch
from imaginairy.utils import get_device
# img = img.to("cpu")
# img = img.to(get_device())
if img.dim() != 4:
raise ValueError("Input should be a 4d tensor")
if img.size(1) != 3:
raise ValueError("Input should have 3 channels")
if not torch.all((img >= -1) & (img <= 1)):
raise ValueError("All tensor values must be between -1 and 1")
normalized = (img + 1) / 2
# returns img if it is already grayscale
if torch.allclose(
normalized[:, 0, :, :], normalized[:, 1, :, :]
) and torch.allclose(normalized[:, 1, :, :], normalized[:, 2, :, :]):
return normalized
# grayscale = normalized.mean(dim=1, keepdim=True)
grayscale = to_grayscale(img)
grayscale = grayscale[:, 0:1, :, :]
grayscale_np = grayscale.squeeze(1).numpy()
blockSize = 129
C = 2
for i in range(grayscale_np.shape[0]):
grayscale_np[i] = cv2.adaptiveThreshold(
(grayscale_np[i] * 255).astype(np.uint8),
255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY,
blockSize,
C,
)
grayscale_np = grayscale_np / 255
binary = torch.from_numpy(grayscale_np).unsqueeze(1).to(get_device()).float()
return binary.repeat(1, 3, 1, 1)
CONTROL_MODES: Dict[str, FunctionType] = {
"canny": create_canny_edges,
"depth": create_depth_map,
@ -252,4 +310,5 @@ CONTROL_MODES: Dict[str, FunctionType] = {
"inpaint": inpaint_prep,
# "details": noop,
"colorize": to_grayscale,
"qrcode": adaptive_threshold_binarize,
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 39 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 48 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 31 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 405 KiB

@ -325,6 +325,13 @@ def test_controlnet(filename_base_for_outputs, control_mode):
mode=control_mode,
image=mask_image,
)
elif control_mode == "qrcode":
prompt_text = "a fruit salad"
swirl_img = LazyLoadingImage(filepath=f"{TESTS_FOLDER}/data/swirl.jpeg")
control_input = ControlInput(
mode=control_mode,
image=swirl_img,
)
prompt = ImaginePrompt(
prompt_text,

Loading…
Cancel
Save