feature: add "shuffle" control mode

Image is generated from elements of control image. similar to style transfer
2024-10-31 03:20:40 +00:00 · 2023-05-05 00:29:43 -07:00 · 2023-05-05 00:29:43 -07:00 · 476a81a967
commit 476a81a967
parent 750d4f7ea8
9 changed files with 168 additions and 3 deletions
--- a/README.md
+++ b/README.md
@ -91,6 +91,19 @@ imagine --control-image bird.jpg  --control-mode normal  "a bird"
    <img src="assets/bird-normal-generated.jpg" height="256">
 </p>

+**Image Shuffle Control**
+
+Generates the image based on elements of the control image. Kind of similar to style transfer.
+```bash
+imagine --control-image pearl-girl.jpg  --control-mode shuffle  "a clown"
+```
+The middle image is the "shuffled" input image
+<p float="left">
+    <img src="assets/girl_with_a_pearl_earring.jpg" height="256">
+    <img src="assets/pearl_shuffle_019331_1_kdpmpp2m15_PS7.5_img2img-0.0_a_clown.jpg" height="256">
+    <img src="assets/pearl_shuffle_clown_019331_1_kdpmpp2m15_PS7.5_img2img-0.0_a_clown.jpg" height="256">
+</p>
+

 ###  Instruction based image edits [by InstructPix2Pix](https://github.com/timothybrooks/instruct-pix2pix)
 Just tell imaginairy how to edit the image and it will do it for you!
@ -399,8 +412,9 @@ docker run -it --gpus all -v $HOME/.cache/huggingface:/root/.cache/huggingface -

 ## ChangeLog

- feature: upgrade to [controlnet 1.1](https://github.com/lllyasviel/ControlNet-v1-1-nightly)
- fix: controlnet now works with all sd1.5 based models
+- 🎉 feature: add "shuffle" control mode. Image is generated from elements of control image. similar to style transfer
+- 🎉 feature: upgrade to [controlnet 1.1](https://github.com/lllyasviel/ControlNet-v1-1-nightly)
+- 🎉 fix: controlnet now works with all SD 1.5 based models
 - fix: raw control images are now properly loaded. fixes #296
 - fix: filenames start numbers after latest image, even if some previous images were deleted

--- a/assets/pearl_shuffle_019331_1_kdpmpp2m15_PS7.5_img2img-0.0_a_clown.jpg
+++ b/assets/pearl_shuffle_019331_1_kdpmpp2m15_PS7.5_img2img-0.0_a_clown.jpg
--- a/assets/pearl_shuffle_clown_019331_1_kdpmpp2m15_PS7.5_img2img-0.0_a_clown.jpg
+++ b/assets/pearl_shuffle_clown_019331_1_kdpmpp2m15_PS7.5_img2img-0.0_a_clown.jpg
--- a/imaginairy/cli/imagine.py
+++ b/imaginairy/cli/imagine.py
@ -40,7 +40,7 @@ from imaginairy.cli.shared import (
    "--control-mode",
    default=None,
    show_default=False,
-    type=click.Choice(["", "canny", "depth", "normal", "hed", "openpose"]),
+    type=click.Choice(["", "canny", "depth", "normal", "hed", "openpose", "shuffle"]),
    help="how the control image is used as signal",
 )
@click.pass_context
--- a/imaginairy/config.py
+++ b/imaginairy/config.py
@ -198,6 +198,13 @@ CONTROLNET_CONFIGS = [
        weights_url="https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/69fc48b9cbd98661f6d0288dc59b59a5ccb32a6b/control_v11p_sd15_openpose.pth",
        alias="openpose",
    ),
+    ControlNetConfig(
+        short_name="shuffle15",
+        control_type="shuffle",
+        config_path="configs/control-net-v15-pool.yaml",
+        weights_url="https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/69fc48b9cbd98661f6d0288dc59b59a5ccb32a6b/control_v11e_sd15_shuffle.pth",
+        alias="shuffle",
+    ),
 ]

 CONTROLNET_CONFIG_SHORTCUTS = {m.short_name: m for m in CONTROLNET_CONFIGS}
--- a/imaginairy/configs/control-net-v15-pool.yaml
+++ b/imaginairy/configs/control-net-v15-pool.yaml
@ -0,0 +1,80 @@
+model:
+  target: imaginairy.modules.cldm.ControlLDM
+  params:
+    linear_start: 0.00085
+    linear_end: 0.0120
+    num_timesteps_cond: 1
+    log_every_t: 200
+    timesteps: 1000
+    first_stage_key: "image"
+    cond_stage_key: "txt"
+    control_key: "hint"
+    image_size: 64
+    channels: 4
+    cond_stage_trainable: false
+    conditioning_key: crossattn
+    monitor: val/loss_simple_ema
+    scale_factor: 0.18215
+    use_ema: False
+    only_mid_control: False
+    global_average_pooling: True
+
+    unet_config:
+      target: imaginairy.modules.cldm.ControlledUnetModel
+      params:
+        use_checkpoint: True
+        image_size: 32 # unused
+        in_channels: 4
+        out_channels: 4
+        model_channels: 320
+        attention_resolutions: [ 4, 2, 1 ]
+        num_res_blocks: 2
+        channel_mult: [ 1, 2, 4, 4 ]
+        num_heads: 8
+        use_spatial_transformer: True
+        transformer_depth: 1
+        context_dim: 768
+        legacy: False
+
+    first_stage_config:
+      target: imaginairy.modules.autoencoder.AutoencoderKL
+      params:
+        embed_dim: 4
+        monitor: val/rec_loss
+        ddconfig:
+          double_z: true
+          z_channels: 4
+          resolution: 256
+          in_channels: 3
+          out_ch: 3
+          ch: 128
+          ch_mult:
+          - 1
+          - 2
+          - 4
+          - 4
+          num_res_blocks: 2
+          attn_resolutions: []
+          dropout: 0.0
+        lossconfig:
+          target: torch.nn.Identity
+
+    cond_stage_config:
+      target: imaginairy.modules.clip_embedders.FrozenCLIPEmbedder
+
+    control_stage_config:
+      target: imaginairy.modules.cldm.ControlNet
+      params:
+        image_size: 32 # unused
+        in_channels: 4
+        hint_channels: 3
+        model_channels: 320
+        attention_resolutions: [ 4, 2, 1 ]
+        num_res_blocks: 2
+        channel_mult: [ 1, 2, 4, 4 ]
+        num_heads: 8
+        use_spatial_transformer: True
+        transformer_depth: 1
+        context_dim: 768
+        use_checkpoint: True
+        legacy: False
--- a/imaginairy/img_processors/control_modes.py
+++ b/imaginairy/img_processors/control_modes.py
@ -127,6 +127,67 @@ def create_pose_map(img_t):
    return pose_t


+def make_noise_disk(H, W, C, F):
+    import cv2
+    import numpy as np
+
+    noise = np.random.uniform(low=0, high=1, size=((H // F) + 2, (W // F) + 2, C))
+    noise = cv2.resize(noise, (W + 2 * F, H + 2 * F), interpolation=cv2.INTER_CUBIC)
+    noise = noise[F : F + H, F : F + W]
+    noise -= np.min(noise)
+    noise /= np.max(noise)
+    if C == 1:
+        noise = noise[:, :, None]
+    return noise
+
+
+def shuffle_map_np(img, h=None, w=None, f=256):
+    import cv2
+    import numpy as np
+
+    H, W, C = img.shape
+    if h is None:
+        h = H
+    if w is None:
+        w = W
+
+    x = make_noise_disk(h, w, 1, f) * float(W - 1)
+    y = make_noise_disk(h, w, 1, f) * float(H - 1)
+    flow = np.concatenate([x, y], axis=2).astype(np.float32)
+    return cv2.remap(img, flow, None, cv2.INTER_LINEAR)
+
+
+def shuffle_map_torch(tensor, h=None, w=None, f=256):
+    import torch
+
+    # Assuming the input tensor is in shape (B, C, H, W)
+    B, C, H, W = tensor.shape
+    device = tensor.device
+    tensor = tensor.cpu()
+
+    # Create an empty tensor with the same shape as input tensor to store the shuffled images
+    shuffled_tensor = torch.empty_like(tensor)
+
+    # Iterate over the batch and apply the shuffle_map function to each image
+    for b in range(B):
+        # Convert the input torch tensor to a numpy array
+        img_np = tensor[b].numpy().transpose(1, 2, 0)  # Shape (H, W, C)
+
+        # Call the shuffle_map function with the numpy array as input
+        shuffled_np = shuffle_map_np(img_np, h, w, f)
+
+        # Convert the shuffled numpy array back to a torch tensor and store it in the shuffled_tensor
+        shuffled_tensor[b] = torch.from_numpy(
+            shuffled_np.transpose(2, 0, 1)
+        )  # Shape (C, H, W)
+    shuffled_tensor = (shuffled_tensor + 1.0) / 2.0
+    return shuffled_tensor.to(device)
+
+
+def noop(img):
+    return img
+
+
 CONTROL_MODES = {
    "canny": create_canny_edges,
    "depth": create_depth_map,
@ -135,4 +196,5 @@ CONTROL_MODES = {
    # "mlsd": create_mlsd_edges,
    "openpose": create_pose_map,
    # "scribble": None,
+    "shuffle": shuffle_map_torch,
 }
--- a/tests/expected_output/test_control_images[shuffle-shuffle_map_torch]_.png
+++ b/tests/expected_output/test_control_images[shuffle-shuffle_map_torch]_.png
--- a/tests/img_processors/test_control_modes.py
+++ b/tests/img_processors/test_control_modes.py
@ -1,4 +1,5 @@
 import pytest
+from lightning_fabric import seed_everything

 from imaginairy import LazyLoadingImage
 from imaginairy.img_processors.control_modes import CONTROL_MODES
@ -16,6 +17,7 @@ control_mode_params = list(CONTROL_MODES.items())

@pytest.mark.parametrize("control_name,control_func", control_mode_params)
 def test_control_images(filename_base_for_outputs, control_func, control_name):
+    seed_everything(42)
    img = LazyLoadingImage(filepath=f"{TESTS_FOLDER}/data/bench2.png")
    img_t = pillow_img_to_torch_image(img)