feature: Stable Diffusion 2.1
@ -230,6 +230,10 @@ docker run -it --gpus all -v $HOME/.cache/huggingface:/root/.cache/huggingface -
|
||||
[Example Colab](https://colab.research.google.com/drive/1rOvQNs0Cmn_yU1bKWjCOHzGVDgZkaTtO?usp=sharing)
|
||||
|
||||
## ChangeLog
|
||||
**7.1.0**
|
||||
- feature: 🎉 Stable Diffusion 2.1. Generated people are no longer (completely) distorted.
|
||||
Use with `--model SD-2.1` or `--model SD-2.0-v`
|
||||
|
||||
**7.0.0**
|
||||
- feature: negative prompting. `--negative-prompt` or `ImaginePrompt(..., negative_prompt="ugly, deformed, extra arms, etc")`
|
||||
- feature: a default negative prompt is added to all generations. Images in SD-2.0 don't look bad anymore. Images in 1.5 look improved as well.
|
||||
|
@ -19,6 +19,7 @@ class ModelConfig:
|
||||
config_path: str
|
||||
weights_url: str
|
||||
default_image_size: int
|
||||
forced_attn_precision: str = "default"
|
||||
|
||||
|
||||
MODEL_CONFIGS = [
|
||||
@ -52,18 +53,37 @@ MODEL_CONFIGS = [
|
||||
weights_url="https://huggingface.co/stabilityai/stable-diffusion-2-inpainting/resolve/main/512-inpainting-ema.ckpt",
|
||||
default_image_size=512,
|
||||
),
|
||||
ModelConfig(
|
||||
short_name="SD-2.1",
|
||||
config_path="configs/stable-diffusion-v2-inference.yaml",
|
||||
weights_url="https://huggingface.co/stabilityai/stable-diffusion-2-1-base/resolve/main/v2-1_512-ema-pruned.ckpt",
|
||||
default_image_size=512,
|
||||
),
|
||||
ModelConfig(
|
||||
short_name="SD-2.1-inpaint",
|
||||
config_path="configs/stable-diffusion-v2-inpainting-inference.yaml",
|
||||
weights_url="https://huggingface.co/stabilityai/stable-diffusion-2-inpainting/resolve/main/512-inpainting-ema.ckpt",
|
||||
default_image_size=512,
|
||||
),
|
||||
ModelConfig(
|
||||
short_name="SD-2.1-v",
|
||||
config_path="configs/stable-diffusion-v2-inference-v.yaml",
|
||||
weights_url="https://huggingface.co/stabilityai/stable-diffusion-2-1/resolve/main/v2-1_768-ema-pruned.ckpt",
|
||||
default_image_size=768,
|
||||
forced_attn_precision="fp32",
|
||||
),
|
||||
ModelConfig(
|
||||
short_name="SD-2.0-v",
|
||||
config_path="configs/stable-diffusion-v2-inference-v.yaml",
|
||||
weights_url="https://huggingface.co/stabilityai/stable-diffusion-2/resolve/main/768-v-ema.ckpt",
|
||||
default_image_size=768,
|
||||
),
|
||||
ModelConfig(
|
||||
short_name="SD-2.0-upscale",
|
||||
config_path="configs/stable-diffusion-v2-upscaling.yaml",
|
||||
weights_url="https://huggingface.co/stabilityai/stable-diffusion-x4-upscaler/resolve/main/x4-upscaler-ema.ckpt",
|
||||
default_image_size=512,
|
||||
),
|
||||
# ModelConfig(
|
||||
# short_name="SD-2.0-upscale",
|
||||
# config_path="configs/stable-diffusion-v2-upscaling.yaml",
|
||||
# weights_url="https://huggingface.co/stabilityai/stable-diffusion-x4-upscaler/resolve/main/x4-upscaler-ema.ckpt",
|
||||
# default_image_size=512,
|
||||
# ),
|
||||
]
|
||||
|
||||
MODEL_CONFIG_SHORTCUTS = {m.short_name: m for m in MODEL_CONFIGS}
|
||||
|
@ -11,6 +11,7 @@ from transformers.utils.hub import TRANSFORMERS_CACHE, HfFolder
|
||||
from transformers.utils.hub import url_to_filename as tf_url_to_filename
|
||||
|
||||
from imaginairy import config as iconfig
|
||||
from imaginairy.modules import attention
|
||||
from imaginairy.paths import PKG_ROOT
|
||||
from imaginairy.utils import get_device, instantiate_from_config
|
||||
|
||||
@ -137,6 +138,7 @@ def _get_diffusion_model(
|
||||
Weights location may also be shortcut name, e.g. "SD-1.5"
|
||||
"""
|
||||
global MOST_RECENTLY_LOADED_MODEL # noqa
|
||||
model_config = None
|
||||
if weights_location is None:
|
||||
weights_location = iconfig.DEFAULT_MODEL
|
||||
if (
|
||||
@ -155,6 +157,12 @@ def _get_diffusion_model(
|
||||
model_config.weights_url,
|
||||
)
|
||||
|
||||
# some models need the attention calculated in float32
|
||||
if model_config is not None:
|
||||
attention.ATTENTION_PRECISION_OVERRIDE = model_config.forced_attn_precision
|
||||
else:
|
||||
attention.ATTENTION_PRECISION_OVERRIDE = "default"
|
||||
|
||||
key = (config_path, weights_location)
|
||||
if key not in LOADED_MODELS:
|
||||
MemoryAwareModel(
|
||||
|
@ -18,6 +18,9 @@ except ImportError:
|
||||
XFORMERS_IS_AVAILBLE = False
|
||||
|
||||
|
||||
ATTENTION_PRECISION_OVERRIDE = "default"
|
||||
|
||||
|
||||
class GEGLU(nn.Module):
|
||||
def __init__(self, dim_in, dim_out):
|
||||
super().__init__()
|
||||
@ -178,13 +181,20 @@ class CrossAttention(nn.Module):
|
||||
|
||||
q = self.to_q(x)
|
||||
context = context if context is not None else x
|
||||
k = self.to_k(context)
|
||||
k = self.to_k(context) * self.scale
|
||||
v = self.to_v(context)
|
||||
|
||||
q, k, v = map(lambda t: rearrange(t, "b n (h d) -> (b h) n d", h=h), (q, k, v))
|
||||
|
||||
sim = einsum("b i d, b j d -> b i j", q, k) * self.scale
|
||||
# force cast to fp32 to avoid overflowing
|
||||
if ATTENTION_PRECISION_OVERRIDE == "fp32":
|
||||
with torch.autocast(enabled=False, device_type=get_device()):
|
||||
q, k = q.float(), k.float()
|
||||
sim = einsum("b i d, b j d -> b i j", q, k)
|
||||
else:
|
||||
sim = einsum("b i d, b j d -> b i j", q, k)
|
||||
|
||||
del q, k
|
||||
# if mask is not None:
|
||||
# if sim.shape[2] == 320 and False:
|
||||
# mask = [mask] * 2
|
||||
@ -237,7 +247,14 @@ class CrossAttention(nn.Module):
|
||||
slice_size = q.shape[1] // steps if (q.shape[1] % steps) == 0 else q.shape[1]
|
||||
for i in range(0, q.shape[1], slice_size):
|
||||
end = i + slice_size
|
||||
s1 = einsum("b i d, b j d -> b i j", q[:, i:end], k)
|
||||
|
||||
# force cast to fp32 to avoid overflowing
|
||||
if ATTENTION_PRECISION_OVERRIDE == "fp32":
|
||||
with torch.autocast(enabled=False, device_type=get_device()):
|
||||
q, k = q.float(), k.float()
|
||||
s1 = einsum("b i d, b j d -> b i j", q[:, i:end], k)
|
||||
else:
|
||||
s1 = einsum("b i d, b j d -> b i j", q[:, i:end], k)
|
||||
|
||||
s2 = s1.softmax(dim=-1, dtype=q.dtype)
|
||||
del s1
|
||||
|
Before Width: | Height: | Size: 322 KiB After Width: | Height: | Size: 323 KiB |
Before Width: | Height: | Size: 342 KiB After Width: | Height: | Size: 352 KiB |
Before Width: | Height: | Size: 881 KiB After Width: | Height: | Size: 880 KiB |
After Width: | Height: | Size: 892 KiB |
After Width: | Height: | Size: 365 KiB |
Before Width: | Height: | Size: 772 KiB After Width: | Height: | Size: 769 KiB |
After Width: | Height: | Size: 812 KiB |
After Width: | Height: | Size: 369 KiB |
Before Width: | Height: | Size: 1.0 MiB After Width: | Height: | Size: 1.0 MiB |
After Width: | Height: | Size: 1.0 MiB |
After Width: | Height: | Size: 411 KiB |
Before Width: | Height: | Size: 820 KiB After Width: | Height: | Size: 820 KiB |
After Width: | Height: | Size: 833 KiB |
After Width: | Height: | Size: 400 KiB |
Before Width: | Height: | Size: 809 KiB After Width: | Height: | Size: 809 KiB |
After Width: | Height: | Size: 962 KiB |
After Width: | Height: | Size: 282 KiB |
Before Width: | Height: | Size: 934 KiB After Width: | Height: | Size: 935 KiB |
After Width: | Height: | Size: 963 KiB |
After Width: | Height: | Size: 1.8 KiB |
@ -39,7 +39,9 @@ compare_prompts = [
|
||||
|
||||
|
||||
@pytest.mark.skipif(get_device() != "cuda", reason="Too slow to run on CPU or MPS")
|
||||
@pytest.mark.parametrize("model_version", ["SD-1.4", "SD-1.5", "SD-2.0", "SD-2.0-v"])
|
||||
@pytest.mark.parametrize(
|
||||
"model_version", ["SD-1.4", "SD-1.5", "SD-2.0", "SD-2.0-v", "SD-2.1", "SD-2.1-v"]
|
||||
)
|
||||
def test_model_versions(filename_base_for_orig_outputs, model_version):
|
||||
"""Test that we can switch between model versions"""
|
||||
prompts = []
|
||||
|