mirror of
https://github.com/brycedrennan/imaginAIry
synced 2024-10-31 03:20:40 +00:00
feature: update refiners
better handles img2img (partial diffusion runs)
This commit is contained in:
parent
1bf53e47cf
commit
cf8a44b317
2
Makefile
2
Makefile
@ -210,7 +210,7 @@ vendorize_normal_map:
|
||||
|
||||
|
||||
vendorize_refiners:
|
||||
export REPO=git@github.com:finegrain-ai/refiners.git PKG=refiners COMMIT=ce3035923ba71bcb5044708d2f1c37fd1d6722e9 && \
|
||||
export REPO=git@github.com:finegrain-ai/refiners.git PKG=refiners COMMIT=91aea9b7ff63ddf93f99e2ce6a4452bd658b1948 && \
|
||||
make download_repo REPO=$$REPO PKG=$$PKG COMMIT=$$COMMIT && \
|
||||
mkdir -p ./imaginairy/vendored/$$PKG && \
|
||||
rm -rf ./imaginairy/vendored/$$PKG/* && \
|
||||
|
@ -289,7 +289,7 @@ def generate_single_image(
|
||||
msg = f"Unknown solver type: {prompt.solver_type}"
|
||||
raise ValueError(msg)
|
||||
sd.scheduler.to(device=sd.unet.device, dtype=sd.unet.dtype)
|
||||
sd.set_num_inference_steps(prompt.steps)
|
||||
sd.set_inference_steps(prompt.steps, first_step=first_step)
|
||||
|
||||
if hasattr(sd, "mask_latents") and mask_image is not None:
|
||||
sd.set_inpainting_conditions(
|
||||
@ -306,11 +306,11 @@ def generate_single_image(
|
||||
|
||||
if init_latent is not None:
|
||||
noise_step = noise_step if noise_step is not None else first_step
|
||||
if first_step >= len(sd.steps):
|
||||
if first_step >= len(sd.scheduler.all_steps):
|
||||
noised_latent = init_latent
|
||||
else:
|
||||
noised_latent = sd.scheduler.add_noise(
|
||||
x=init_latent, noise=noise, step=sd.steps[noise_step]
|
||||
x=init_latent, noise=noise, step=sd.scheduler.all_steps[noise_step]
|
||||
)
|
||||
|
||||
with lc.timing("text-conditioning"):
|
||||
@ -330,7 +330,7 @@ def generate_single_image(
|
||||
|
||||
with lc.timing("unet"):
|
||||
for step in tqdm(
|
||||
sd.steps[first_step:], bar_format=" {l_bar}{bar}{r_bar}", leave=False
|
||||
sd.steps, bar_format=" {l_bar}{bar}{r_bar}", leave=False
|
||||
):
|
||||
log_latent(x, "noisy_latent")
|
||||
x = sd(
|
||||
|
@ -193,19 +193,23 @@ def summarize_tensor(tensor: torch.Tensor, /) -> str:
|
||||
f"dtype={str(object=tensor.dtype).removeprefix('torch.')}",
|
||||
f"device={tensor.device}",
|
||||
]
|
||||
if not tensor.is_complex():
|
||||
if tensor.is_complex():
|
||||
tensor_f = tensor.real.float()
|
||||
else:
|
||||
if tensor.numel() > 0:
|
||||
info_list.extend(
|
||||
[
|
||||
f"min={tensor.min():.2f}", # type: ignore
|
||||
f"max={tensor.max():.2f}", # type: ignore
|
||||
]
|
||||
)
|
||||
tensor_f = tensor.float()
|
||||
|
||||
info_list.extend(
|
||||
[
|
||||
f"mean={tensor.float().mean():.2f}",
|
||||
f"std={tensor.float().std():.2f}",
|
||||
f"norm={norm(x=tensor.float()):.2f}",
|
||||
f"mean={tensor_f.mean():.2f}",
|
||||
f"std={tensor_f.std():.2f}",
|
||||
f"norm={norm(x=tensor_f):.2f}",
|
||||
f"grad={tensor.requires_grad}",
|
||||
]
|
||||
)
|
||||
|
@ -32,21 +32,21 @@ class LatentDiffusionModel(fl.Module, ABC):
|
||||
self.clip_text_encoder = clip_text_encoder.to(device=self.device, dtype=self.dtype)
|
||||
self.scheduler = scheduler.to(device=self.device, dtype=self.dtype)
|
||||
|
||||
def set_num_inference_steps(self, num_inference_steps: int) -> None:
|
||||
def set_inference_steps(self, num_steps: int, first_step: int = 0) -> None:
|
||||
initial_diffusion_rate = self.scheduler.initial_diffusion_rate
|
||||
final_diffusion_rate = self.scheduler.final_diffusion_rate
|
||||
device, dtype = self.scheduler.device, self.scheduler.dtype
|
||||
self.scheduler = self.scheduler.__class__(
|
||||
num_inference_steps,
|
||||
num_inference_steps=num_steps,
|
||||
initial_diffusion_rate=initial_diffusion_rate,
|
||||
final_diffusion_rate=final_diffusion_rate,
|
||||
first_inference_step=first_step,
|
||||
).to(device=device, dtype=dtype)
|
||||
|
||||
def init_latents(
|
||||
self,
|
||||
size: tuple[int, int],
|
||||
init_image: Image.Image | None = None,
|
||||
first_step: int = 0,
|
||||
noise: Tensor | None = None,
|
||||
) -> Tensor:
|
||||
height, width = size
|
||||
@ -59,11 +59,15 @@ class LatentDiffusionModel(fl.Module, ABC):
|
||||
if init_image is None:
|
||||
return noise
|
||||
encoded_image = self.lda.encode_image(image=init_image.resize(size=(width, height)))
|
||||
return self.scheduler.add_noise(x=encoded_image, noise=noise, step=self.steps[first_step])
|
||||
return self.scheduler.add_noise(
|
||||
x=encoded_image,
|
||||
noise=noise,
|
||||
step=self.scheduler.first_inference_step,
|
||||
)
|
||||
|
||||
@property
|
||||
def steps(self) -> list[int]:
|
||||
return self.scheduler.steps
|
||||
return self.scheduler.inference_steps
|
||||
|
||||
@abstractmethod
|
||||
def set_unet_context(self, *, timestep: Tensor, clip_text_embedding: Tensor, **_: Tensor) -> None:
|
||||
|
@ -24,23 +24,23 @@ def compute_sinusoidal_embedding(
|
||||
class RangeEncoder(fl.Chain):
|
||||
def __init__(
|
||||
self,
|
||||
sinuosidal_embedding_dim: int,
|
||||
sinusoidal_embedding_dim: int,
|
||||
embedding_dim: int,
|
||||
device: Device | str | None = None,
|
||||
dtype: DType | None = None,
|
||||
) -> None:
|
||||
self.sinuosidal_embedding_dim = sinuosidal_embedding_dim
|
||||
self.sinusoidal_embedding_dim = sinusoidal_embedding_dim
|
||||
self.embedding_dim = embedding_dim
|
||||
super().__init__(
|
||||
fl.Lambda(self.compute_sinuosoidal_embedding),
|
||||
fl.Lambda(self.compute_sinusoidal_embedding),
|
||||
fl.Converter(set_device=False, set_dtype=True),
|
||||
fl.Linear(in_features=sinuosidal_embedding_dim, out_features=embedding_dim, device=device, dtype=dtype),
|
||||
fl.Linear(in_features=sinusoidal_embedding_dim, out_features=embedding_dim, device=device, dtype=dtype),
|
||||
fl.SiLU(),
|
||||
fl.Linear(in_features=embedding_dim, out_features=embedding_dim, device=device, dtype=dtype),
|
||||
)
|
||||
|
||||
def compute_sinuosoidal_embedding(self, x: Int[Tensor, "*batch 1"]) -> Float[Tensor, "*batch 1 embedding_dim"]:
|
||||
return compute_sinusoidal_embedding(x, embedding_dim=self.sinuosidal_embedding_dim)
|
||||
def compute_sinusoidal_embedding(self, x: Int[Tensor, "*batch 1"]) -> Float[Tensor, "*batch 1 embedding_dim"]:
|
||||
return compute_sinusoidal_embedding(x, embedding_dim=self.sinusoidal_embedding_dim)
|
||||
|
||||
|
||||
class RangeAdapter2d(fl.Sum, Adapter[fl.Conv2d]):
|
||||
|
@ -11,6 +11,7 @@ class DDIM(Scheduler):
|
||||
initial_diffusion_rate: float = 8.5e-4,
|
||||
final_diffusion_rate: float = 1.2e-2,
|
||||
noise_schedule: NoiseSchedule = NoiseSchedule.QUADRATIC,
|
||||
first_inference_step: int = 0,
|
||||
device: Device | str = "cpu",
|
||||
dtype: Dtype = float32,
|
||||
) -> None:
|
||||
@ -20,6 +21,7 @@ class DDIM(Scheduler):
|
||||
initial_diffusion_rate=initial_diffusion_rate,
|
||||
final_diffusion_rate=final_diffusion_rate,
|
||||
noise_schedule=noise_schedule,
|
||||
first_inference_step=first_inference_step,
|
||||
device=device,
|
||||
dtype=dtype,
|
||||
)
|
||||
@ -35,6 +37,8 @@ class DDIM(Scheduler):
|
||||
return timesteps.flip(0)
|
||||
|
||||
def __call__(self, x: Tensor, noise: Tensor, step: int, generator: Generator | None = None) -> Tensor:
|
||||
assert self.first_inference_step <= step < self.num_inference_steps, "invalid step {step}"
|
||||
|
||||
timestep, previous_timestep = (
|
||||
self.timesteps[step],
|
||||
(
|
||||
|
@ -5,8 +5,9 @@ from imaginairy.vendored.refiners.foundationals.latent_diffusion.schedulers.sche
|
||||
|
||||
class DDPM(Scheduler):
|
||||
"""
|
||||
The Denoising Diffusion Probabilistic Models (DDPM) is a specific type of diffusion model,
|
||||
which uses a specific strategy to generate the timesteps and applies the diffusion process in a specific way.
|
||||
Denoising Diffusion Probabilistic Model
|
||||
|
||||
Only used for training Latent Diffusion models. Cannot be called.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
@ -15,6 +16,7 @@ class DDPM(Scheduler):
|
||||
num_train_timesteps: int = 1_000,
|
||||
initial_diffusion_rate: float = 8.5e-4,
|
||||
final_diffusion_rate: float = 1.2e-2,
|
||||
first_inference_step: int = 0,
|
||||
device: Device | str = "cpu",
|
||||
) -> None:
|
||||
super().__init__(
|
||||
@ -22,6 +24,7 @@ class DDPM(Scheduler):
|
||||
num_train_timesteps=num_train_timesteps,
|
||||
initial_diffusion_rate=initial_diffusion_rate,
|
||||
final_diffusion_rate=final_diffusion_rate,
|
||||
first_inference_step=first_inference_step,
|
||||
device=device,
|
||||
)
|
||||
|
||||
|
@ -24,6 +24,7 @@ class DPMSolver(Scheduler):
|
||||
final_diffusion_rate: float = 1.2e-2,
|
||||
last_step_first_order: bool = False,
|
||||
noise_schedule: NoiseSchedule = NoiseSchedule.QUADRATIC,
|
||||
first_inference_step: int = 0,
|
||||
device: Device | str = "cpu",
|
||||
dtype: Dtype = float32,
|
||||
):
|
||||
@ -33,12 +34,12 @@ class DPMSolver(Scheduler):
|
||||
initial_diffusion_rate=initial_diffusion_rate,
|
||||
final_diffusion_rate=final_diffusion_rate,
|
||||
noise_schedule=noise_schedule,
|
||||
first_inference_step=first_inference_step,
|
||||
device=device,
|
||||
dtype=dtype,
|
||||
)
|
||||
self.estimated_data = deque([tensor([])] * 2, maxlen=2)
|
||||
self.last_step_first_order = last_step_first_order
|
||||
self._first_step_has_been_run = False
|
||||
|
||||
def _generate_timesteps(self) -> Tensor:
|
||||
# We need to use numpy here because:
|
||||
@ -81,6 +82,7 @@ class DPMSolver(Scheduler):
|
||||
previous_scale_factor = self.cumulative_scale_factors[previous_timestep]
|
||||
previous_noise_std = self.noise_std[previous_timestep]
|
||||
current_noise_std = self.noise_std[current_timestep]
|
||||
|
||||
estimation_delta = (current_data_estimation - next_data_estimation) / (
|
||||
(current_ratio - next_ratio) / (previous_ratio - current_ratio)
|
||||
)
|
||||
@ -100,13 +102,14 @@ class DPMSolver(Scheduler):
|
||||
backward Euler update, which is a numerical method commonly used to solve ordinary differential equations
|
||||
(ODEs).
|
||||
"""
|
||||
assert self.first_inference_step <= step < self.num_inference_steps, "invalid step {step}"
|
||||
|
||||
current_timestep = self.timesteps[step]
|
||||
scale_factor, noise_ratio = self.cumulative_scale_factors[current_timestep], self.noise_std[current_timestep]
|
||||
estimated_denoised_data = (x - noise_ratio * noise) / scale_factor
|
||||
self.estimated_data.append(estimated_denoised_data)
|
||||
|
||||
if step == 0 or (self.last_step_first_order and step == self.num_inference_steps - 1) or not self._first_step_has_been_run:
|
||||
self._first_step_has_been_run = True
|
||||
if step == self.first_inference_step or (self.last_step_first_order and step == self.num_inference_steps - 1):
|
||||
return self.dpm_solver_first_order_update(x=x, noise=estimated_denoised_data, step=step)
|
||||
|
||||
return self.multistep_dpm_solver_second_order_update(x=x, step=step)
|
||||
|
@ -13,6 +13,7 @@ class EulerScheduler(Scheduler):
|
||||
initial_diffusion_rate: float = 8.5e-4,
|
||||
final_diffusion_rate: float = 1.2e-2,
|
||||
noise_schedule: NoiseSchedule = NoiseSchedule.QUADRATIC,
|
||||
first_inference_step: int = 0,
|
||||
device: Device | str = "cpu",
|
||||
dtype: Dtype = float32,
|
||||
):
|
||||
@ -24,6 +25,7 @@ class EulerScheduler(Scheduler):
|
||||
initial_diffusion_rate=initial_diffusion_rate,
|
||||
final_diffusion_rate=final_diffusion_rate,
|
||||
noise_schedule=noise_schedule,
|
||||
first_inference_step=first_inference_step,
|
||||
device=device,
|
||||
dtype=dtype,
|
||||
)
|
||||
@ -64,6 +66,8 @@ class EulerScheduler(Scheduler):
|
||||
s_tmax: float = float("inf"),
|
||||
s_noise: float = 1.0,
|
||||
) -> Tensor:
|
||||
assert self.first_inference_step <= step < self.num_inference_steps, "invalid step {step}"
|
||||
|
||||
sigma = self.sigmas[step]
|
||||
|
||||
gamma = min(s_churn / (len(self.sigmas) - 1), 2**0.5 - 1) if s_tmin <= sigma <= s_tmax else 0
|
||||
|
@ -33,6 +33,7 @@ class Scheduler(ABC):
|
||||
initial_diffusion_rate: float = 8.5e-4,
|
||||
final_diffusion_rate: float = 1.2e-2,
|
||||
noise_schedule: NoiseSchedule = NoiseSchedule.QUADRATIC,
|
||||
first_inference_step: int = 0,
|
||||
device: Device | str = "cpu",
|
||||
dtype: DType = float32,
|
||||
):
|
||||
@ -43,6 +44,7 @@ class Scheduler(ABC):
|
||||
self.initial_diffusion_rate = initial_diffusion_rate
|
||||
self.final_diffusion_rate = final_diffusion_rate
|
||||
self.noise_schedule = noise_schedule
|
||||
self.first_inference_step = first_inference_step
|
||||
self.scale_factors = self.sample_noise_schedule()
|
||||
self.cumulative_scale_factors = sqrt(self.scale_factors.cumprod(dim=0))
|
||||
self.noise_std = sqrt(1.0 - self.scale_factors.cumprod(dim=0))
|
||||
@ -68,9 +70,13 @@ class Scheduler(ABC):
|
||||
...
|
||||
|
||||
@property
|
||||
def steps(self) -> list[int]:
|
||||
def all_steps(self) -> list[int]:
|
||||
return list(range(self.num_inference_steps))
|
||||
|
||||
@property
|
||||
def inference_steps(self) -> list[int]:
|
||||
return self.all_steps[self.first_inference_step :]
|
||||
|
||||
def scale_model_input(self, x: Tensor, step: int) -> Tensor:
|
||||
"""
|
||||
For compatibility with schedulers that need to scale the input according to the current timestep.
|
||||
|
@ -28,7 +28,7 @@ class TextTimeEmbedding(fl.Chain):
|
||||
fl.Chain(
|
||||
fl.UseContext(context="diffusion", key="time_ids"),
|
||||
fl.Unsqueeze(dim=-1),
|
||||
fl.Lambda(func=self.compute_sinuosoidal_embedding),
|
||||
fl.Lambda(func=self.compute_sinusoidal_embedding),
|
||||
fl.Reshape(-1),
|
||||
),
|
||||
dim=1,
|
||||
@ -49,7 +49,7 @@ class TextTimeEmbedding(fl.Chain):
|
||||
),
|
||||
)
|
||||
|
||||
def compute_sinuosoidal_embedding(self, x: Tensor) -> Tensor:
|
||||
def compute_sinusoidal_embedding(self, x: Tensor) -> Tensor:
|
||||
return compute_sinusoidal_embedding(x=x, embedding_dim=self.time_ids_embedding_dim)
|
||||
|
||||
|
||||
@ -61,7 +61,7 @@ class TimestepEncoder(fl.Passthrough):
|
||||
fl.Chain(
|
||||
fl.UseContext(context="diffusion", key="timestep"),
|
||||
RangeEncoder(
|
||||
sinuosidal_embedding_dim=320,
|
||||
sinusoidal_embedding_dim=320,
|
||||
embedding_dim=self.timestep_embedding_dim,
|
||||
device=device,
|
||||
dtype=dtype,
|
||||
|
@ -1 +1 @@
|
||||
vendored from git@github.com:finegrain-ai/refiners.git @ ce3035923ba71bcb5044708d2f1c37fd1d6722e9
|
||||
vendored from git@github.com:finegrain-ai/refiners.git @ 91aea9b7ff63ddf93f99e2ce6a4452bd658b1948
|
||||
|
Binary file not shown.
Before Width: | Height: | Size: 3.1 MiB After Width: | Height: | Size: 3.0 MiB |
Loading…
Reference in New Issue
Block a user