mirror of
https://github.com/brycedrennan/imaginAIry
synced 2024-10-31 03:20:40 +00:00
feature: add "shuffle" control mode
Image is generated from elements of control image. similar to style transfer
This commit is contained in:
parent
750d4f7ea8
commit
476a81a967
18
README.md
18
README.md
@ -91,6 +91,19 @@ imagine --control-image bird.jpg --control-mode normal "a bird"
|
||||
<img src="assets/bird-normal-generated.jpg" height="256">
|
||||
</p>
|
||||
|
||||
**Image Shuffle Control**
|
||||
|
||||
Generates the image based on elements of the control image. Kind of similar to style transfer.
|
||||
```bash
|
||||
imagine --control-image pearl-girl.jpg --control-mode shuffle "a clown"
|
||||
```
|
||||
The middle image is the "shuffled" input image
|
||||
<p float="left">
|
||||
<img src="assets/girl_with_a_pearl_earring.jpg" height="256">
|
||||
<img src="assets/pearl_shuffle_019331_1_kdpmpp2m15_PS7.5_img2img-0.0_a_clown.jpg" height="256">
|
||||
<img src="assets/pearl_shuffle_clown_019331_1_kdpmpp2m15_PS7.5_img2img-0.0_a_clown.jpg" height="256">
|
||||
</p>
|
||||
|
||||
|
||||
### Instruction based image edits [by InstructPix2Pix](https://github.com/timothybrooks/instruct-pix2pix)
|
||||
Just tell imaginairy how to edit the image and it will do it for you!
|
||||
@ -399,8 +412,9 @@ docker run -it --gpus all -v $HOME/.cache/huggingface:/root/.cache/huggingface -
|
||||
|
||||
## ChangeLog
|
||||
|
||||
- feature: upgrade to [controlnet 1.1](https://github.com/lllyasviel/ControlNet-v1-1-nightly)
|
||||
- fix: controlnet now works with all sd1.5 based models
|
||||
- 🎉 feature: add "shuffle" control mode. Image is generated from elements of control image. similar to style transfer
|
||||
- 🎉 feature: upgrade to [controlnet 1.1](https://github.com/lllyasviel/ControlNet-v1-1-nightly)
|
||||
- 🎉 fix: controlnet now works with all SD 1.5 based models
|
||||
- fix: raw control images are now properly loaded. fixes #296
|
||||
- fix: filenames start numbers after latest image, even if some previous images were deleted
|
||||
|
||||
|
Binary file not shown.
After Width: | Height: | Size: 34 KiB |
Binary file not shown.
After Width: | Height: | Size: 34 KiB |
@ -40,7 +40,7 @@ from imaginairy.cli.shared import (
|
||||
"--control-mode",
|
||||
default=None,
|
||||
show_default=False,
|
||||
type=click.Choice(["", "canny", "depth", "normal", "hed", "openpose"]),
|
||||
type=click.Choice(["", "canny", "depth", "normal", "hed", "openpose", "shuffle"]),
|
||||
help="how the control image is used as signal",
|
||||
)
|
||||
@click.pass_context
|
||||
|
@ -198,6 +198,13 @@ CONTROLNET_CONFIGS = [
|
||||
weights_url="https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/69fc48b9cbd98661f6d0288dc59b59a5ccb32a6b/control_v11p_sd15_openpose.pth",
|
||||
alias="openpose",
|
||||
),
|
||||
ControlNetConfig(
|
||||
short_name="shuffle15",
|
||||
control_type="shuffle",
|
||||
config_path="configs/control-net-v15-pool.yaml",
|
||||
weights_url="https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/69fc48b9cbd98661f6d0288dc59b59a5ccb32a6b/control_v11e_sd15_shuffle.pth",
|
||||
alias="shuffle",
|
||||
),
|
||||
]
|
||||
|
||||
CONTROLNET_CONFIG_SHORTCUTS = {m.short_name: m for m in CONTROLNET_CONFIGS}
|
||||
|
80
imaginairy/configs/control-net-v15-pool.yaml
Normal file
80
imaginairy/configs/control-net-v15-pool.yaml
Normal file
@ -0,0 +1,80 @@
|
||||
model:
|
||||
target: imaginairy.modules.cldm.ControlLDM
|
||||
params:
|
||||
linear_start: 0.00085
|
||||
linear_end: 0.0120
|
||||
num_timesteps_cond: 1
|
||||
log_every_t: 200
|
||||
timesteps: 1000
|
||||
first_stage_key: "image"
|
||||
cond_stage_key: "txt"
|
||||
control_key: "hint"
|
||||
image_size: 64
|
||||
channels: 4
|
||||
cond_stage_trainable: false
|
||||
conditioning_key: crossattn
|
||||
monitor: val/loss_simple_ema
|
||||
scale_factor: 0.18215
|
||||
use_ema: False
|
||||
only_mid_control: False
|
||||
global_average_pooling: True
|
||||
|
||||
unet_config:
|
||||
target: imaginairy.modules.cldm.ControlledUnetModel
|
||||
params:
|
||||
use_checkpoint: True
|
||||
image_size: 32 # unused
|
||||
in_channels: 4
|
||||
out_channels: 4
|
||||
model_channels: 320
|
||||
attention_resolutions: [ 4, 2, 1 ]
|
||||
num_res_blocks: 2
|
||||
channel_mult: [ 1, 2, 4, 4 ]
|
||||
num_heads: 8
|
||||
use_spatial_transformer: True
|
||||
transformer_depth: 1
|
||||
context_dim: 768
|
||||
legacy: False
|
||||
|
||||
first_stage_config:
|
||||
target: imaginairy.modules.autoencoder.AutoencoderKL
|
||||
params:
|
||||
embed_dim: 4
|
||||
monitor: val/rec_loss
|
||||
ddconfig:
|
||||
double_z: true
|
||||
z_channels: 4
|
||||
resolution: 256
|
||||
in_channels: 3
|
||||
out_ch: 3
|
||||
ch: 128
|
||||
ch_mult:
|
||||
- 1
|
||||
- 2
|
||||
- 4
|
||||
- 4
|
||||
num_res_blocks: 2
|
||||
attn_resolutions: []
|
||||
dropout: 0.0
|
||||
lossconfig:
|
||||
target: torch.nn.Identity
|
||||
|
||||
cond_stage_config:
|
||||
target: imaginairy.modules.clip_embedders.FrozenCLIPEmbedder
|
||||
|
||||
control_stage_config:
|
||||
target: imaginairy.modules.cldm.ControlNet
|
||||
params:
|
||||
image_size: 32 # unused
|
||||
in_channels: 4
|
||||
hint_channels: 3
|
||||
model_channels: 320
|
||||
attention_resolutions: [ 4, 2, 1 ]
|
||||
num_res_blocks: 2
|
||||
channel_mult: [ 1, 2, 4, 4 ]
|
||||
num_heads: 8
|
||||
use_spatial_transformer: True
|
||||
transformer_depth: 1
|
||||
context_dim: 768
|
||||
use_checkpoint: True
|
||||
legacy: False
|
@ -127,6 +127,67 @@ def create_pose_map(img_t):
|
||||
return pose_t
|
||||
|
||||
|
||||
def make_noise_disk(H, W, C, F):
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
noise = np.random.uniform(low=0, high=1, size=((H // F) + 2, (W // F) + 2, C))
|
||||
noise = cv2.resize(noise, (W + 2 * F, H + 2 * F), interpolation=cv2.INTER_CUBIC)
|
||||
noise = noise[F : F + H, F : F + W]
|
||||
noise -= np.min(noise)
|
||||
noise /= np.max(noise)
|
||||
if C == 1:
|
||||
noise = noise[:, :, None]
|
||||
return noise
|
||||
|
||||
|
||||
def shuffle_map_np(img, h=None, w=None, f=256):
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
H, W, C = img.shape
|
||||
if h is None:
|
||||
h = H
|
||||
if w is None:
|
||||
w = W
|
||||
|
||||
x = make_noise_disk(h, w, 1, f) * float(W - 1)
|
||||
y = make_noise_disk(h, w, 1, f) * float(H - 1)
|
||||
flow = np.concatenate([x, y], axis=2).astype(np.float32)
|
||||
return cv2.remap(img, flow, None, cv2.INTER_LINEAR)
|
||||
|
||||
|
||||
def shuffle_map_torch(tensor, h=None, w=None, f=256):
|
||||
import torch
|
||||
|
||||
# Assuming the input tensor is in shape (B, C, H, W)
|
||||
B, C, H, W = tensor.shape
|
||||
device = tensor.device
|
||||
tensor = tensor.cpu()
|
||||
|
||||
# Create an empty tensor with the same shape as input tensor to store the shuffled images
|
||||
shuffled_tensor = torch.empty_like(tensor)
|
||||
|
||||
# Iterate over the batch and apply the shuffle_map function to each image
|
||||
for b in range(B):
|
||||
# Convert the input torch tensor to a numpy array
|
||||
img_np = tensor[b].numpy().transpose(1, 2, 0) # Shape (H, W, C)
|
||||
|
||||
# Call the shuffle_map function with the numpy array as input
|
||||
shuffled_np = shuffle_map_np(img_np, h, w, f)
|
||||
|
||||
# Convert the shuffled numpy array back to a torch tensor and store it in the shuffled_tensor
|
||||
shuffled_tensor[b] = torch.from_numpy(
|
||||
shuffled_np.transpose(2, 0, 1)
|
||||
) # Shape (C, H, W)
|
||||
shuffled_tensor = (shuffled_tensor + 1.0) / 2.0
|
||||
return shuffled_tensor.to(device)
|
||||
|
||||
|
||||
def noop(img):
|
||||
return img
|
||||
|
||||
|
||||
CONTROL_MODES = {
|
||||
"canny": create_canny_edges,
|
||||
"depth": create_depth_map,
|
||||
@ -135,4 +196,5 @@ CONTROL_MODES = {
|
||||
# "mlsd": create_mlsd_edges,
|
||||
"openpose": create_pose_map,
|
||||
# "scribble": None,
|
||||
"shuffle": shuffle_map_torch,
|
||||
}
|
||||
|
Binary file not shown.
After Width: | Height: | Size: 565 KiB |
@ -1,4 +1,5 @@
|
||||
import pytest
|
||||
from lightning_fabric import seed_everything
|
||||
|
||||
from imaginairy import LazyLoadingImage
|
||||
from imaginairy.img_processors.control_modes import CONTROL_MODES
|
||||
@ -16,6 +17,7 @@ control_mode_params = list(CONTROL_MODES.items())
|
||||
|
||||
@pytest.mark.parametrize("control_name,control_func", control_mode_params)
|
||||
def test_control_images(filename_base_for_outputs, control_func, control_name):
|
||||
seed_everything(42)
|
||||
img = LazyLoadingImage(filepath=f"{TESTS_FOLDER}/data/bench2.png")
|
||||
img_t = pillow_img_to_torch_image(img)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user