feature: add "shuffle" control mode

Image is generated from elements of control image. similar to style transfer
2024-10-31 03:20:40 +00:00 · 2023-05-05 00:29:43 -07:00 · 2023-05-05 00:29:43 -07:00 · 476a81a967
commit 476a81a967
parent 750d4f7ea8
9 changed files with 168 additions and 3 deletions
--- a/README.md
+++ b/README.md
@ -91,6 +91,19 @@ imagine --control-image bird.jpg  --control-mode normal  "a bird"
    <img src="assets/bird-normal-generated.jpg" height="256">
 </p>
 **Image Shuffle Control**
 Generates the image based on elements of the control image. Kind of similar to style transfer.
 ```bash
 imagine --control-image pearl-girl.jpg  --control-mode shuffle  "a clown"
 ```
 The middle image is the "shuffled" input image
 <p float="left">
    <img src="assets/girl_with_a_pearl_earring.jpg" height="256">
    <img src="assets/pearl_shuffle_019331_1_kdpmpp2m15_PS7.5_img2img-0.0_a_clown.jpg" height="256">
    <img src="assets/pearl_shuffle_clown_019331_1_kdpmpp2m15_PS7.5_img2img-0.0_a_clown.jpg" height="256">
 </p>
 ###  Instruction based image edits [by InstructPix2Pix](https://github.com/timothybrooks/instruct-pix2pix)
 Just tell imaginairy how to edit the image and it will do it for you!
@ -399,8 +412,9 @@ docker run -it --gpus all -v $HOME/.cache/huggingface:/root/.cache/huggingface -
 ## ChangeLog
- feature: upgrade to [controlnet 1.1](https://github.com/lllyasviel/ControlNet-v1-1-nightly)
+- 🎉 feature: add "shuffle" control mode. Image is generated from elements of control image. similar to style transfer
- fix: controlnet now works with all sd1.5 based models
+- 🎉 feature: upgrade to [controlnet 1.1](https://github.com/lllyasviel/ControlNet-v1-1-nightly)
 - 🎉 fix: controlnet now works with all SD 1.5 based models
 - fix: raw control images are now properly loaded. fixes #296
 - fix: filenames start numbers after latest image, even if some previous images were deleted
--- a/assets/pearl_shuffle_019331_1_kdpmpp2m15_PS7.5_img2img-0.0_a_clown.jpg
+++ b/assets/pearl_shuffle_019331_1_kdpmpp2m15_PS7.5_img2img-0.0_a_clown.jpg
--- a/assets/pearl_shuffle_clown_019331_1_kdpmpp2m15_PS7.5_img2img-0.0_a_clown.jpg
+++ b/assets/pearl_shuffle_clown_019331_1_kdpmpp2m15_PS7.5_img2img-0.0_a_clown.jpg
--- a/imaginairy/cli/imagine.py
+++ b/imaginairy/cli/imagine.py
@ -40,7 +40,7 @@ from imaginairy.cli.shared import (
    "--control-mode",
    default=None,
    show_default=False,
-    type=click.Choice(["", "canny", "depth", "normal", "hed", "openpose"]),
+    type=click.Choice(["", "canny", "depth", "normal", "hed", "openpose", "shuffle"]),
    help="how the control image is used as signal",
 )
@click.pass_context
--- a/imaginairy/config.py
+++ b/imaginairy/config.py
@ -198,6 +198,13 @@ CONTROLNET_CONFIGS = [
        weights_url="https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/69fc48b9cbd98661f6d0288dc59b59a5ccb32a6b/control_v11p_sd15_openpose.pth",
        alias="openpose",
    ),
    ControlNetConfig(
        short_name="shuffle15",
        control_type="shuffle",
        config_path="configs/control-net-v15-pool.yaml",
        weights_url="https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/69fc48b9cbd98661f6d0288dc59b59a5ccb32a6b/control_v11e_sd15_shuffle.pth",
        alias="shuffle",
    ),
 ]
 CONTROLNET_CONFIG_SHORTCUTS = {m.short_name: m for m in CONTROLNET_CONFIGS}
--- a/imaginairy/configs/control-net-v15-pool.yaml
+++ b/imaginairy/configs/control-net-v15-pool.yaml
@ -0,0 +1,80 @@
 model:
  target: imaginairy.modules.cldm.ControlLDM
  params:
    linear_start: 0.00085
    linear_end: 0.0120
    num_timesteps_cond: 1
    log_every_t: 200
    timesteps: 1000
    first_stage_key: "image"
    cond_stage_key: "txt"
    control_key: "hint"
    image_size: 64
    channels: 4
    cond_stage_trainable: false
    conditioning_key: crossattn
    monitor: val/loss_simple_ema
    scale_factor: 0.18215
    use_ema: False
    only_mid_control: False
    global_average_pooling: True
    unet_config:
      target: imaginairy.modules.cldm.ControlledUnetModel
      params:
        use_checkpoint: True
        image_size: 32 # unused
        in_channels: 4
        out_channels: 4
        model_channels: 320
        attention_resolutions: [ 4, 2, 1 ]
        num_res_blocks: 2
        channel_mult: [ 1, 2, 4, 4 ]
        num_heads: 8
        use_spatial_transformer: True
        transformer_depth: 1
        context_dim: 768
        legacy: False
    first_stage_config:
      target: imaginairy.modules.autoencoder.AutoencoderKL
      params:
        embed_dim: 4
        monitor: val/rec_loss
        ddconfig:
          double_z: true
          z_channels: 4
          resolution: 256
          in_channels: 3
          out_ch: 3
          ch: 128
          ch_mult:
          - 1
          - 2
          - 4
          - 4
          num_res_blocks: 2
          attn_resolutions: []
          dropout: 0.0
        lossconfig:
          target: torch.nn.Identity
    cond_stage_config:
      target: imaginairy.modules.clip_embedders.FrozenCLIPEmbedder
    control_stage_config:
      target: imaginairy.modules.cldm.ControlNet
      params:
        image_size: 32 # unused
        in_channels: 4
        hint_channels: 3
        model_channels: 320
        attention_resolutions: [ 4, 2, 1 ]
        num_res_blocks: 2
        channel_mult: [ 1, 2, 4, 4 ]
        num_heads: 8
        use_spatial_transformer: True
        transformer_depth: 1
        context_dim: 768
        use_checkpoint: True
        legacy: False
--- a/imaginairy/img_processors/control_modes.py
+++ b/imaginairy/img_processors/control_modes.py
@ -127,6 +127,67 @@ def create_pose_map(img_t):
    return pose_t
 def make_noise_disk(H, W, C, F):
    import cv2
    import numpy as np
    noise = np.random.uniform(low=0, high=1, size=((H // F) + 2, (W // F) + 2, C))
    noise = cv2.resize(noise, (W + 2 * F, H + 2 * F), interpolation=cv2.INTER_CUBIC)
    noise = noise[F : F + H, F : F + W]
    noise -= np.min(noise)
    noise /= np.max(noise)
    if C == 1:
        noise = noise[:, :, None]
    return noise
 def shuffle_map_np(img, h=None, w=None, f=256):
    import cv2
    import numpy as np
    H, W, C = img.shape
    if h is None:
        h = H
    if w is None:
        w = W
    x = make_noise_disk(h, w, 1, f) * float(W - 1)
    y = make_noise_disk(h, w, 1, f) * float(H - 1)
    flow = np.concatenate([x, y], axis=2).astype(np.float32)
    return cv2.remap(img, flow, None, cv2.INTER_LINEAR)
 def shuffle_map_torch(tensor, h=None, w=None, f=256):
    import torch
    # Assuming the input tensor is in shape (B, C, H, W)
    B, C, H, W = tensor.shape
    device = tensor.device
    tensor = tensor.cpu()
    # Create an empty tensor with the same shape as input tensor to store the shuffled images
    shuffled_tensor = torch.empty_like(tensor)
    # Iterate over the batch and apply the shuffle_map function to each image
    for b in range(B):
        # Convert the input torch tensor to a numpy array
        img_np = tensor[b].numpy().transpose(1, 2, 0)  # Shape (H, W, C)
        # Call the shuffle_map function with the numpy array as input
        shuffled_np = shuffle_map_np(img_np, h, w, f)
        # Convert the shuffled numpy array back to a torch tensor and store it in the shuffled_tensor
        shuffled_tensor[b] = torch.from_numpy(
            shuffled_np.transpose(2, 0, 1)
        )  # Shape (C, H, W)
    shuffled_tensor = (shuffled_tensor + 1.0) / 2.0
    return shuffled_tensor.to(device)
 def noop(img):
    return img
 CONTROL_MODES = {
    "canny": create_canny_edges,
    "depth": create_depth_map,
@ -135,4 +196,5 @@ CONTROL_MODES = {
    # "mlsd": create_mlsd_edges,
    "openpose": create_pose_map,
    # "scribble": None,
    "shuffle": shuffle_map_torch,
 }
--- a/tests/expected_output/test_control_images[shuffle-shuffle_map_torch]_.png
+++ b/tests/expected_output/test_control_images[shuffle-shuffle_map_torch]_.png
--- a/tests/img_processors/test_control_modes.py
+++ b/tests/img_processors/test_control_modes.py
@ -1,4 +1,5 @@
 import pytest
 from lightning_fabric import seed_everything
 from imaginairy import LazyLoadingImage
 from imaginairy.img_processors.control_modes import CONTROL_MODES
@ -16,6 +17,7 @@ control_mode_params = list(CONTROL_MODES.items())
@pytest.mark.parametrize("control_name,control_func", control_mode_params)
 def test_control_images(filename_base_for_outputs, control_func, control_name):
    seed_everything(42)
    img = LazyLoadingImage(filepath=f"{TESTS_FOLDER}/data/bench2.png")
    img_t = pillow_img_to_torch_image(img)