feature: 🎉 Edit Images with Instructions alone!
After Width: | Height: | Size: 39 KiB |
After Width: | Height: | Size: 23 KiB |
After Width: | Height: | Size: 46 KiB |
After Width: | Height: | Size: 91 KiB |
After Width: | Height: | Size: 38 KiB |
After Width: | Height: | Size: 23 KiB |
After Width: | Height: | Size: 252 KiB |
After Width: | Height: | Size: 34 KiB |
@ -0,0 +1,70 @@
|
||||
model:
|
||||
base_learning_rate: 1.0e-04
|
||||
target: imaginairy.modules.diffusion.ddpm.LatentDiffusion
|
||||
params:
|
||||
linear_start: 0.00085
|
||||
linear_end: 0.0120
|
||||
num_timesteps_cond: 1
|
||||
log_every_t: 200
|
||||
timesteps: 1000
|
||||
first_stage_key: "edited"
|
||||
cond_stage_key: "edit"
|
||||
image_size: 16
|
||||
channels: 4
|
||||
cond_stage_trainable: false
|
||||
conditioning_key: hybrid
|
||||
monitor: val/loss_simple_ema
|
||||
scale_factor: 0.18215
|
||||
use_ema: false
|
||||
|
||||
scheduler_config:
|
||||
target: imaginairy.lr_scheduler.LambdaLinearScheduler
|
||||
params:
|
||||
warm_up_steps: [ 0 ]
|
||||
cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
|
||||
f_start: [ 1.e-6 ]
|
||||
f_max: [ 1. ]
|
||||
f_min: [ 1. ]
|
||||
|
||||
unet_config:
|
||||
target: imaginairy.modules.diffusion.openaimodel.UNetModel
|
||||
params:
|
||||
use_checkpoint: True
|
||||
image_size: 32 # unused
|
||||
in_channels: 8
|
||||
out_channels: 4
|
||||
model_channels: 320
|
||||
attention_resolutions: [ 4, 2, 1 ]
|
||||
num_res_blocks: 2
|
||||
channel_mult: [ 1, 2, 4, 4 ]
|
||||
num_heads: 8
|
||||
use_spatial_transformer: True
|
||||
transformer_depth: 1
|
||||
context_dim: 768
|
||||
legacy: False
|
||||
|
||||
first_stage_config:
|
||||
target: imaginairy.modules.autoencoder.AutoencoderKL
|
||||
params:
|
||||
embed_dim: 4
|
||||
monitor: val/rec_loss
|
||||
ddconfig:
|
||||
double_z: true
|
||||
z_channels: 4
|
||||
resolution: 256
|
||||
in_channels: 3
|
||||
out_ch: 3
|
||||
ch: 128
|
||||
ch_mult:
|
||||
- 1
|
||||
- 2
|
||||
- 4
|
||||
- 4
|
||||
num_res_blocks: 2
|
||||
attn_resolutions: []
|
||||
dropout: 0.0
|
||||
lossconfig:
|
||||
target: torch.nn.Identity
|
||||
|
||||
cond_stage_config:
|
||||
target: imaginairy.modules.clip_embedders.FrozenCLIPEmbedder
|
@ -0,0 +1,72 @@
|
||||
"""
|
||||
Wrapper for instruct pix2pix model.
|
||||
|
||||
modified from https://github.com/timothybrooks/instruct-pix2pix/blob/main/edit_cli.py
|
||||
"""
|
||||
import torch
|
||||
from einops import einops
|
||||
from torch import nn
|
||||
|
||||
from imaginairy.samplers.base import mask_blend
|
||||
|
||||
|
||||
class CFGEditingDenoiser(nn.Module):
|
||||
def __init__(self, model):
|
||||
super().__init__()
|
||||
self.inner_model = model
|
||||
|
||||
def forward(
|
||||
self,
|
||||
z,
|
||||
sigma,
|
||||
cond,
|
||||
uncond,
|
||||
cond_scale,
|
||||
image_cfg_scale=1.5,
|
||||
mask=None,
|
||||
mask_noise=None,
|
||||
orig_latent=None,
|
||||
):
|
||||
cfg_z = einops.repeat(z, "1 ... -> n ...", n=3)
|
||||
cfg_sigma = einops.repeat(sigma, "1 ... -> n ...", n=3)
|
||||
cfg_cond = {
|
||||
"c_crossattn": [
|
||||
torch.cat(
|
||||
[
|
||||
cond["c_crossattn"][0],
|
||||
uncond["c_crossattn"][0],
|
||||
uncond["c_crossattn"][0],
|
||||
]
|
||||
)
|
||||
],
|
||||
"c_concat": [
|
||||
torch.cat(
|
||||
[cond["c_concat"][0], cond["c_concat"][0], uncond["c_concat"][0]]
|
||||
)
|
||||
],
|
||||
}
|
||||
|
||||
if mask is not None:
|
||||
assert orig_latent is not None
|
||||
t = self.inner_model.sigma_to_t(sigma, quantize=True)
|
||||
big_sigma = max(sigma, 1)
|
||||
cfg_z = mask_blend(
|
||||
noisy_latent=cfg_z,
|
||||
orig_latent=orig_latent * big_sigma,
|
||||
mask=mask,
|
||||
mask_noise=mask_noise * big_sigma,
|
||||
ts=t,
|
||||
model=self.inner_model.inner_model,
|
||||
)
|
||||
|
||||
out_cond, out_img_cond, out_uncond = self.inner_model(
|
||||
cfg_z, cfg_sigma, cond=cfg_cond
|
||||
).chunk(3)
|
||||
|
||||
result = (
|
||||
out_uncond
|
||||
+ cond_scale * (out_cond - out_img_cond)
|
||||
+ image_cfg_scale * (out_img_cond - out_uncond)
|
||||
)
|
||||
|
||||
return result
|