mirror of
https://github.com/brycedrennan/imaginAIry
synced 2024-10-31 03:20:40 +00:00
feature: add "shuffle" control mode
Image is generated from elements of control image. similar to style transfer
This commit is contained in:
parent
750d4f7ea8
commit
476a81a967
18
README.md
18
README.md
@ -91,6 +91,19 @@ imagine --control-image bird.jpg --control-mode normal "a bird"
|
|||||||
<img src="assets/bird-normal-generated.jpg" height="256">
|
<img src="assets/bird-normal-generated.jpg" height="256">
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
|
**Image Shuffle Control**
|
||||||
|
|
||||||
|
Generates the image based on elements of the control image. Kind of similar to style transfer.
|
||||||
|
```bash
|
||||||
|
imagine --control-image pearl-girl.jpg --control-mode shuffle "a clown"
|
||||||
|
```
|
||||||
|
The middle image is the "shuffled" input image
|
||||||
|
<p float="left">
|
||||||
|
<img src="assets/girl_with_a_pearl_earring.jpg" height="256">
|
||||||
|
<img src="assets/pearl_shuffle_019331_1_kdpmpp2m15_PS7.5_img2img-0.0_a_clown.jpg" height="256">
|
||||||
|
<img src="assets/pearl_shuffle_clown_019331_1_kdpmpp2m15_PS7.5_img2img-0.0_a_clown.jpg" height="256">
|
||||||
|
</p>
|
||||||
|
|
||||||
|
|
||||||
### Instruction based image edits [by InstructPix2Pix](https://github.com/timothybrooks/instruct-pix2pix)
|
### Instruction based image edits [by InstructPix2Pix](https://github.com/timothybrooks/instruct-pix2pix)
|
||||||
Just tell imaginairy how to edit the image and it will do it for you!
|
Just tell imaginairy how to edit the image and it will do it for you!
|
||||||
@ -399,8 +412,9 @@ docker run -it --gpus all -v $HOME/.cache/huggingface:/root/.cache/huggingface -
|
|||||||
|
|
||||||
## ChangeLog
|
## ChangeLog
|
||||||
|
|
||||||
- feature: upgrade to [controlnet 1.1](https://github.com/lllyasviel/ControlNet-v1-1-nightly)
|
- 🎉 feature: add "shuffle" control mode. Image is generated from elements of control image. similar to style transfer
|
||||||
- fix: controlnet now works with all sd1.5 based models
|
- 🎉 feature: upgrade to [controlnet 1.1](https://github.com/lllyasviel/ControlNet-v1-1-nightly)
|
||||||
|
- 🎉 fix: controlnet now works with all SD 1.5 based models
|
||||||
- fix: raw control images are now properly loaded. fixes #296
|
- fix: raw control images are now properly loaded. fixes #296
|
||||||
- fix: filenames start numbers after latest image, even if some previous images were deleted
|
- fix: filenames start numbers after latest image, even if some previous images were deleted
|
||||||
|
|
||||||
|
Binary file not shown.
After Width: | Height: | Size: 34 KiB |
Binary file not shown.
After Width: | Height: | Size: 34 KiB |
@ -40,7 +40,7 @@ from imaginairy.cli.shared import (
|
|||||||
"--control-mode",
|
"--control-mode",
|
||||||
default=None,
|
default=None,
|
||||||
show_default=False,
|
show_default=False,
|
||||||
type=click.Choice(["", "canny", "depth", "normal", "hed", "openpose"]),
|
type=click.Choice(["", "canny", "depth", "normal", "hed", "openpose", "shuffle"]),
|
||||||
help="how the control image is used as signal",
|
help="how the control image is used as signal",
|
||||||
)
|
)
|
||||||
@click.pass_context
|
@click.pass_context
|
||||||
|
@ -198,6 +198,13 @@ CONTROLNET_CONFIGS = [
|
|||||||
weights_url="https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/69fc48b9cbd98661f6d0288dc59b59a5ccb32a6b/control_v11p_sd15_openpose.pth",
|
weights_url="https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/69fc48b9cbd98661f6d0288dc59b59a5ccb32a6b/control_v11p_sd15_openpose.pth",
|
||||||
alias="openpose",
|
alias="openpose",
|
||||||
),
|
),
|
||||||
|
ControlNetConfig(
|
||||||
|
short_name="shuffle15",
|
||||||
|
control_type="shuffle",
|
||||||
|
config_path="configs/control-net-v15-pool.yaml",
|
||||||
|
weights_url="https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/69fc48b9cbd98661f6d0288dc59b59a5ccb32a6b/control_v11e_sd15_shuffle.pth",
|
||||||
|
alias="shuffle",
|
||||||
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
CONTROLNET_CONFIG_SHORTCUTS = {m.short_name: m for m in CONTROLNET_CONFIGS}
|
CONTROLNET_CONFIG_SHORTCUTS = {m.short_name: m for m in CONTROLNET_CONFIGS}
|
||||||
|
80
imaginairy/configs/control-net-v15-pool.yaml
Normal file
80
imaginairy/configs/control-net-v15-pool.yaml
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
model:
|
||||||
|
target: imaginairy.modules.cldm.ControlLDM
|
||||||
|
params:
|
||||||
|
linear_start: 0.00085
|
||||||
|
linear_end: 0.0120
|
||||||
|
num_timesteps_cond: 1
|
||||||
|
log_every_t: 200
|
||||||
|
timesteps: 1000
|
||||||
|
first_stage_key: "image"
|
||||||
|
cond_stage_key: "txt"
|
||||||
|
control_key: "hint"
|
||||||
|
image_size: 64
|
||||||
|
channels: 4
|
||||||
|
cond_stage_trainable: false
|
||||||
|
conditioning_key: crossattn
|
||||||
|
monitor: val/loss_simple_ema
|
||||||
|
scale_factor: 0.18215
|
||||||
|
use_ema: False
|
||||||
|
only_mid_control: False
|
||||||
|
global_average_pooling: True
|
||||||
|
|
||||||
|
unet_config:
|
||||||
|
target: imaginairy.modules.cldm.ControlledUnetModel
|
||||||
|
params:
|
||||||
|
use_checkpoint: True
|
||||||
|
image_size: 32 # unused
|
||||||
|
in_channels: 4
|
||||||
|
out_channels: 4
|
||||||
|
model_channels: 320
|
||||||
|
attention_resolutions: [ 4, 2, 1 ]
|
||||||
|
num_res_blocks: 2
|
||||||
|
channel_mult: [ 1, 2, 4, 4 ]
|
||||||
|
num_heads: 8
|
||||||
|
use_spatial_transformer: True
|
||||||
|
transformer_depth: 1
|
||||||
|
context_dim: 768
|
||||||
|
legacy: False
|
||||||
|
|
||||||
|
first_stage_config:
|
||||||
|
target: imaginairy.modules.autoencoder.AutoencoderKL
|
||||||
|
params:
|
||||||
|
embed_dim: 4
|
||||||
|
monitor: val/rec_loss
|
||||||
|
ddconfig:
|
||||||
|
double_z: true
|
||||||
|
z_channels: 4
|
||||||
|
resolution: 256
|
||||||
|
in_channels: 3
|
||||||
|
out_ch: 3
|
||||||
|
ch: 128
|
||||||
|
ch_mult:
|
||||||
|
- 1
|
||||||
|
- 2
|
||||||
|
- 4
|
||||||
|
- 4
|
||||||
|
num_res_blocks: 2
|
||||||
|
attn_resolutions: []
|
||||||
|
dropout: 0.0
|
||||||
|
lossconfig:
|
||||||
|
target: torch.nn.Identity
|
||||||
|
|
||||||
|
cond_stage_config:
|
||||||
|
target: imaginairy.modules.clip_embedders.FrozenCLIPEmbedder
|
||||||
|
|
||||||
|
control_stage_config:
|
||||||
|
target: imaginairy.modules.cldm.ControlNet
|
||||||
|
params:
|
||||||
|
image_size: 32 # unused
|
||||||
|
in_channels: 4
|
||||||
|
hint_channels: 3
|
||||||
|
model_channels: 320
|
||||||
|
attention_resolutions: [ 4, 2, 1 ]
|
||||||
|
num_res_blocks: 2
|
||||||
|
channel_mult: [ 1, 2, 4, 4 ]
|
||||||
|
num_heads: 8
|
||||||
|
use_spatial_transformer: True
|
||||||
|
transformer_depth: 1
|
||||||
|
context_dim: 768
|
||||||
|
use_checkpoint: True
|
||||||
|
legacy: False
|
@ -127,6 +127,67 @@ def create_pose_map(img_t):
|
|||||||
return pose_t
|
return pose_t
|
||||||
|
|
||||||
|
|
||||||
|
def make_noise_disk(H, W, C, F):
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
noise = np.random.uniform(low=0, high=1, size=((H // F) + 2, (W // F) + 2, C))
|
||||||
|
noise = cv2.resize(noise, (W + 2 * F, H + 2 * F), interpolation=cv2.INTER_CUBIC)
|
||||||
|
noise = noise[F : F + H, F : F + W]
|
||||||
|
noise -= np.min(noise)
|
||||||
|
noise /= np.max(noise)
|
||||||
|
if C == 1:
|
||||||
|
noise = noise[:, :, None]
|
||||||
|
return noise
|
||||||
|
|
||||||
|
|
||||||
|
def shuffle_map_np(img, h=None, w=None, f=256):
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
H, W, C = img.shape
|
||||||
|
if h is None:
|
||||||
|
h = H
|
||||||
|
if w is None:
|
||||||
|
w = W
|
||||||
|
|
||||||
|
x = make_noise_disk(h, w, 1, f) * float(W - 1)
|
||||||
|
y = make_noise_disk(h, w, 1, f) * float(H - 1)
|
||||||
|
flow = np.concatenate([x, y], axis=2).astype(np.float32)
|
||||||
|
return cv2.remap(img, flow, None, cv2.INTER_LINEAR)
|
||||||
|
|
||||||
|
|
||||||
|
def shuffle_map_torch(tensor, h=None, w=None, f=256):
|
||||||
|
import torch
|
||||||
|
|
||||||
|
# Assuming the input tensor is in shape (B, C, H, W)
|
||||||
|
B, C, H, W = tensor.shape
|
||||||
|
device = tensor.device
|
||||||
|
tensor = tensor.cpu()
|
||||||
|
|
||||||
|
# Create an empty tensor with the same shape as input tensor to store the shuffled images
|
||||||
|
shuffled_tensor = torch.empty_like(tensor)
|
||||||
|
|
||||||
|
# Iterate over the batch and apply the shuffle_map function to each image
|
||||||
|
for b in range(B):
|
||||||
|
# Convert the input torch tensor to a numpy array
|
||||||
|
img_np = tensor[b].numpy().transpose(1, 2, 0) # Shape (H, W, C)
|
||||||
|
|
||||||
|
# Call the shuffle_map function with the numpy array as input
|
||||||
|
shuffled_np = shuffle_map_np(img_np, h, w, f)
|
||||||
|
|
||||||
|
# Convert the shuffled numpy array back to a torch tensor and store it in the shuffled_tensor
|
||||||
|
shuffled_tensor[b] = torch.from_numpy(
|
||||||
|
shuffled_np.transpose(2, 0, 1)
|
||||||
|
) # Shape (C, H, W)
|
||||||
|
shuffled_tensor = (shuffled_tensor + 1.0) / 2.0
|
||||||
|
return shuffled_tensor.to(device)
|
||||||
|
|
||||||
|
|
||||||
|
def noop(img):
|
||||||
|
return img
|
||||||
|
|
||||||
|
|
||||||
CONTROL_MODES = {
|
CONTROL_MODES = {
|
||||||
"canny": create_canny_edges,
|
"canny": create_canny_edges,
|
||||||
"depth": create_depth_map,
|
"depth": create_depth_map,
|
||||||
@ -135,4 +196,5 @@ CONTROL_MODES = {
|
|||||||
# "mlsd": create_mlsd_edges,
|
# "mlsd": create_mlsd_edges,
|
||||||
"openpose": create_pose_map,
|
"openpose": create_pose_map,
|
||||||
# "scribble": None,
|
# "scribble": None,
|
||||||
|
"shuffle": shuffle_map_torch,
|
||||||
}
|
}
|
||||||
|
Binary file not shown.
After Width: | Height: | Size: 565 KiB |
@ -1,4 +1,5 @@
|
|||||||
import pytest
|
import pytest
|
||||||
|
from lightning_fabric import seed_everything
|
||||||
|
|
||||||
from imaginairy import LazyLoadingImage
|
from imaginairy import LazyLoadingImage
|
||||||
from imaginairy.img_processors.control_modes import CONTROL_MODES
|
from imaginairy.img_processors.control_modes import CONTROL_MODES
|
||||||
@ -16,6 +17,7 @@ control_mode_params = list(CONTROL_MODES.items())
|
|||||||
|
|
||||||
@pytest.mark.parametrize("control_name,control_func", control_mode_params)
|
@pytest.mark.parametrize("control_name,control_func", control_mode_params)
|
||||||
def test_control_images(filename_base_for_outputs, control_func, control_name):
|
def test_control_images(filename_base_for_outputs, control_func, control_name):
|
||||||
|
seed_everything(42)
|
||||||
img = LazyLoadingImage(filepath=f"{TESTS_FOLDER}/data/bench2.png")
|
img = LazyLoadingImage(filepath=f"{TESTS_FOLDER}/data/bench2.png")
|
||||||
img_t = pillow_img_to_torch_image(img)
|
img_t = pillow_img_to_torch_image(img)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user