feature: add "shuffle" control mode

Image is generated from elements of control image. similar to style transfer
This commit is contained in:
Bryce 2023-05-05 00:29:43 -07:00 committed by Bryce Drennan
parent 750d4f7ea8
commit 476a81a967
9 changed files with 168 additions and 3 deletions

View File

@ -91,6 +91,19 @@ imagine --control-image bird.jpg --control-mode normal "a bird"
<img src="assets/bird-normal-generated.jpg" height="256">
</p>
**Image Shuffle Control**
Generates the image based on elements of the control image. Kind of similar to style transfer.
```bash
imagine --control-image pearl-girl.jpg --control-mode shuffle "a clown"
```
The middle image is the "shuffled" input image
<p float="left">
<img src="assets/girl_with_a_pearl_earring.jpg" height="256">
<img src="assets/pearl_shuffle_019331_1_kdpmpp2m15_PS7.5_img2img-0.0_a_clown.jpg" height="256">
<img src="assets/pearl_shuffle_clown_019331_1_kdpmpp2m15_PS7.5_img2img-0.0_a_clown.jpg" height="256">
</p>
### Instruction based image edits [by InstructPix2Pix](https://github.com/timothybrooks/instruct-pix2pix)
Just tell imaginairy how to edit the image and it will do it for you!
@ -399,8 +412,9 @@ docker run -it --gpus all -v $HOME/.cache/huggingface:/root/.cache/huggingface -
## ChangeLog
- feature: upgrade to [controlnet 1.1](https://github.com/lllyasviel/ControlNet-v1-1-nightly)
- fix: controlnet now works with all sd1.5 based models
- 🎉 feature: add "shuffle" control mode. Image is generated from elements of control image. similar to style transfer
- 🎉 feature: upgrade to [controlnet 1.1](https://github.com/lllyasviel/ControlNet-v1-1-nightly)
- 🎉 fix: controlnet now works with all SD 1.5 based models
- fix: raw control images are now properly loaded. fixes #296
- fix: filenames start numbers after latest image, even if some previous images were deleted

Binary file not shown.

After

Width:  |  Height:  |  Size: 34 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 34 KiB

View File

@ -40,7 +40,7 @@ from imaginairy.cli.shared import (
"--control-mode",
default=None,
show_default=False,
type=click.Choice(["", "canny", "depth", "normal", "hed", "openpose"]),
type=click.Choice(["", "canny", "depth", "normal", "hed", "openpose", "shuffle"]),
help="how the control image is used as signal",
)
@click.pass_context

View File

@ -198,6 +198,13 @@ CONTROLNET_CONFIGS = [
weights_url="https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/69fc48b9cbd98661f6d0288dc59b59a5ccb32a6b/control_v11p_sd15_openpose.pth",
alias="openpose",
),
ControlNetConfig(
short_name="shuffle15",
control_type="shuffle",
config_path="configs/control-net-v15-pool.yaml",
weights_url="https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/69fc48b9cbd98661f6d0288dc59b59a5ccb32a6b/control_v11e_sd15_shuffle.pth",
alias="shuffle",
),
]
CONTROLNET_CONFIG_SHORTCUTS = {m.short_name: m for m in CONTROLNET_CONFIGS}

View File

@ -0,0 +1,80 @@
model:
target: imaginairy.modules.cldm.ControlLDM
params:
linear_start: 0.00085
linear_end: 0.0120
num_timesteps_cond: 1
log_every_t: 200
timesteps: 1000
first_stage_key: "image"
cond_stage_key: "txt"
control_key: "hint"
image_size: 64
channels: 4
cond_stage_trainable: false
conditioning_key: crossattn
monitor: val/loss_simple_ema
scale_factor: 0.18215
use_ema: False
only_mid_control: False
global_average_pooling: True
unet_config:
target: imaginairy.modules.cldm.ControlledUnetModel
params:
use_checkpoint: True
image_size: 32 # unused
in_channels: 4
out_channels: 4
model_channels: 320
attention_resolutions: [ 4, 2, 1 ]
num_res_blocks: 2
channel_mult: [ 1, 2, 4, 4 ]
num_heads: 8
use_spatial_transformer: True
transformer_depth: 1
context_dim: 768
legacy: False
first_stage_config:
target: imaginairy.modules.autoencoder.AutoencoderKL
params:
embed_dim: 4
monitor: val/rec_loss
ddconfig:
double_z: true
z_channels: 4
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 4
- 4
num_res_blocks: 2
attn_resolutions: []
dropout: 0.0
lossconfig:
target: torch.nn.Identity
cond_stage_config:
target: imaginairy.modules.clip_embedders.FrozenCLIPEmbedder
control_stage_config:
target: imaginairy.modules.cldm.ControlNet
params:
image_size: 32 # unused
in_channels: 4
hint_channels: 3
model_channels: 320
attention_resolutions: [ 4, 2, 1 ]
num_res_blocks: 2
channel_mult: [ 1, 2, 4, 4 ]
num_heads: 8
use_spatial_transformer: True
transformer_depth: 1
context_dim: 768
use_checkpoint: True
legacy: False

View File

@ -127,6 +127,67 @@ def create_pose_map(img_t):
return pose_t
def make_noise_disk(H, W, C, F):
import cv2
import numpy as np
noise = np.random.uniform(low=0, high=1, size=((H // F) + 2, (W // F) + 2, C))
noise = cv2.resize(noise, (W + 2 * F, H + 2 * F), interpolation=cv2.INTER_CUBIC)
noise = noise[F : F + H, F : F + W]
noise -= np.min(noise)
noise /= np.max(noise)
if C == 1:
noise = noise[:, :, None]
return noise
def shuffle_map_np(img, h=None, w=None, f=256):
import cv2
import numpy as np
H, W, C = img.shape
if h is None:
h = H
if w is None:
w = W
x = make_noise_disk(h, w, 1, f) * float(W - 1)
y = make_noise_disk(h, w, 1, f) * float(H - 1)
flow = np.concatenate([x, y], axis=2).astype(np.float32)
return cv2.remap(img, flow, None, cv2.INTER_LINEAR)
def shuffle_map_torch(tensor, h=None, w=None, f=256):
import torch
# Assuming the input tensor is in shape (B, C, H, W)
B, C, H, W = tensor.shape
device = tensor.device
tensor = tensor.cpu()
# Create an empty tensor with the same shape as input tensor to store the shuffled images
shuffled_tensor = torch.empty_like(tensor)
# Iterate over the batch and apply the shuffle_map function to each image
for b in range(B):
# Convert the input torch tensor to a numpy array
img_np = tensor[b].numpy().transpose(1, 2, 0) # Shape (H, W, C)
# Call the shuffle_map function with the numpy array as input
shuffled_np = shuffle_map_np(img_np, h, w, f)
# Convert the shuffled numpy array back to a torch tensor and store it in the shuffled_tensor
shuffled_tensor[b] = torch.from_numpy(
shuffled_np.transpose(2, 0, 1)
) # Shape (C, H, W)
shuffled_tensor = (shuffled_tensor + 1.0) / 2.0
return shuffled_tensor.to(device)
def noop(img):
return img
CONTROL_MODES = {
"canny": create_canny_edges,
"depth": create_depth_map,
@ -135,4 +196,5 @@ CONTROL_MODES = {
# "mlsd": create_mlsd_edges,
"openpose": create_pose_map,
# "scribble": None,
"shuffle": shuffle_map_torch,
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 565 KiB

View File

@ -1,4 +1,5 @@
import pytest
from lightning_fabric import seed_everything
from imaginairy import LazyLoadingImage
from imaginairy.img_processors.control_modes import CONTROL_MODES
@ -16,6 +17,7 @@ control_mode_params = list(CONTROL_MODES.items())
@pytest.mark.parametrize("control_name,control_func", control_mode_params)
def test_control_images(filename_base_for_outputs, control_func, control_name):
seed_everything(42)
img = LazyLoadingImage(filepath=f"{TESTS_FOLDER}/data/bench2.png")
img_t = pillow_img_to_torch_image(img)