diff --git a/imaginairy/modules/attention.py b/imaginairy/modules/attention.py index 1144d8e..58f6d3a 100644 --- a/imaginairy/modules/attention.py +++ b/imaginairy/modules/attention.py @@ -1,5 +1,4 @@ import math -from inspect import isfunction import torch import torch.nn.functional as F @@ -10,27 +9,6 @@ from imaginairy.modules.diffusion.util import checkpoint from imaginairy.utils import get_device -def uniq(arr): - return {el: True for el in arr}.keys() - - -def default(val, d): - if val is not None: - return val - return d() if isfunction(d) else d - - -def max_neg_value(t): - return -torch.finfo(t.dtype).max - - -def init_(tensor): - dim = tensor.shape[-1] - std = 1 / math.sqrt(dim) - tensor.uniform_(-std, std) - return tensor - - # feedforward class GEGLU(nn.Module): def __init__(self, dim_in, dim_out): @@ -193,7 +171,7 @@ class CrossAttention(nn.Module): h = self.heads q_in = self.to_q(x) - context = default(context, x) + context = context if context is not None else x k_in = self.to_k(context) v_in = self.to_v(context) del context, x @@ -227,7 +205,7 @@ class CrossAttention(nn.Module): max_res = math.floor(math.sqrt(math.sqrt(mem_free_total / 2.5)) / 8) * 64 raise RuntimeError( f"Not enough memory, use lower resolution (max approx. {max_res}x{max_res}). " - f"Need: {mem_required/64/gb:0.1f}GB free, Have:{mem_free_total/gb:0.1f}GB free" + f"Need: {mem_required / 64 / gb:0.1f}GB free, Have:{mem_free_total / gb:0.1f}GB free" ) slice_size = q.shape[1] // steps if (q.shape[1] % steps) == 0 else q.shape[1] diff --git a/imaginairy/modules/find_noise.py b/imaginairy/modules/find_noise.py deleted file mode 100644 index 7d394d0..0000000 --- a/imaginairy/modules/find_noise.py +++ /dev/null @@ -1,80 +0,0 @@ -""" -I tried it with the DDIM sampler and it didn't work. - -Probably need to use the k-diffusion sampler with it -from https://gist.githubusercontent.com/trygvebw/c71334dd127d537a15e9d59790f7f5e1/raw/a846393251f5be8289d4febc75a19f1f962aabcc/find_noise.py - -needs https://github.com/crowsonkb/k-diffusion -""" -from contextlib import nullcontext - -import torch -from torch import autocast - -from imaginairy.img_utils import pillow_img_to_model_latent -from imaginairy.utils import get_device -from imaginairy.vendored import k_diffusion as K - - -def find_noise_for_image(model, pil_img, prompt, steps=50, cond_scale=1.0, half=True): - img_latent = pillow_img_to_model_latent(model, pil_img, batch_size=1, half=half) - return find_noise_for_latent( - model, - img_latent, - prompt, - steps=steps, - cond_scale=cond_scale, - ) - - -def find_noise_for_latent(model, img_latent, prompt, steps=50, cond_scale=1.0): - x = img_latent - - _autocast = autocast if get_device() in ("cuda", "cpu") else nullcontext - with (torch.no_grad(), _autocast(get_device())): - uncond = model.get_learned_conditioning([""]) - cond = model.get_learned_conditioning([prompt]) - - s_in = x.new_ones([x.shape[0]]) - dnw = K.external.CompVisDenoiser(model) - sigmas = dnw.get_sigmas(steps).flip(0) - - with (torch.no_grad(), _autocast(get_device())): - for i in range(1, len(sigmas)): - x_in = torch.cat([x] * 2) - sigma_in = torch.cat([sigmas[i] * s_in] * 2) - cond_in = torch.cat([uncond, cond]) - - c_out, c_in = [ - K.utils.append_dims(k, x_in.ndim) for k in dnw.get_scalings(sigma_in) - ] - t = dnw.sigma_to_t(sigma_in) - - eps = model.apply_model(x_in * c_in, t, cond=cond_in) - denoised_uncond, denoised_cond = (x_in + eps * c_out).chunk(2) - - denoised = denoised_uncond + (denoised_cond - denoised_uncond) * cond_scale - - d = (x - denoised) / sigmas[i] - dt = sigmas[i] - sigmas[i - 1] - - x = x + d * dt - - # This shouldn't be necessary, but solved some VRAM issues - del ( - x_in, - sigma_in, - cond_in, - c_out, - c_in, - t, - ) - del eps, denoised_uncond, denoised_cond, denoised, d, dt - # collect_and_empty() - - # return (x / x.std()) - return (x / x.std()) * sigmas[-1] - - -if __name__ == "__main__": - pass