diff --git a/Makefile b/Makefile index b56d193..cb7fccb 100644 --- a/Makefile +++ b/Makefile @@ -210,7 +210,7 @@ vendorize_normal_map: vendorize_refiners: - export REPO=git@github.com:finegrain-ai/refiners.git PKG=refiners COMMIT=ce3035923ba71bcb5044708d2f1c37fd1d6722e9 && \ + export REPO=git@github.com:finegrain-ai/refiners.git PKG=refiners COMMIT=91aea9b7ff63ddf93f99e2ce6a4452bd658b1948 && \ make download_repo REPO=$$REPO PKG=$$PKG COMMIT=$$COMMIT && \ mkdir -p ./imaginairy/vendored/$$PKG && \ rm -rf ./imaginairy/vendored/$$PKG/* && \ diff --git a/imaginairy/api/generate_refiners.py b/imaginairy/api/generate_refiners.py index 7fd2645..6f29737 100644 --- a/imaginairy/api/generate_refiners.py +++ b/imaginairy/api/generate_refiners.py @@ -289,7 +289,7 @@ def generate_single_image( msg = f"Unknown solver type: {prompt.solver_type}" raise ValueError(msg) sd.scheduler.to(device=sd.unet.device, dtype=sd.unet.dtype) - sd.set_num_inference_steps(prompt.steps) + sd.set_inference_steps(prompt.steps, first_step=first_step) if hasattr(sd, "mask_latents") and mask_image is not None: sd.set_inpainting_conditions( @@ -306,11 +306,11 @@ def generate_single_image( if init_latent is not None: noise_step = noise_step if noise_step is not None else first_step - if first_step >= len(sd.steps): + if first_step >= len(sd.scheduler.all_steps): noised_latent = init_latent else: noised_latent = sd.scheduler.add_noise( - x=init_latent, noise=noise, step=sd.steps[noise_step] + x=init_latent, noise=noise, step=sd.scheduler.all_steps[noise_step] ) with lc.timing("text-conditioning"): @@ -330,7 +330,7 @@ def generate_single_image( with lc.timing("unet"): for step in tqdm( - sd.steps[first_step:], bar_format=" {l_bar}{bar}{r_bar}", leave=False + sd.steps, bar_format=" {l_bar}{bar}{r_bar}", leave=False ): log_latent(x, "noisy_latent") x = sd( diff --git a/imaginairy/vendored/refiners/fluxion/utils.py b/imaginairy/vendored/refiners/fluxion/utils.py index 09d9c06..dedf8e1 100644 --- a/imaginairy/vendored/refiners/fluxion/utils.py +++ b/imaginairy/vendored/refiners/fluxion/utils.py @@ -193,19 +193,23 @@ def summarize_tensor(tensor: torch.Tensor, /) -> str: f"dtype={str(object=tensor.dtype).removeprefix('torch.')}", f"device={tensor.device}", ] - if not tensor.is_complex(): - info_list.extend( - [ - f"min={tensor.min():.2f}", # type: ignore - f"max={tensor.max():.2f}", # type: ignore - ] - ) + if tensor.is_complex(): + tensor_f = tensor.real.float() + else: + if tensor.numel() > 0: + info_list.extend( + [ + f"min={tensor.min():.2f}", # type: ignore + f"max={tensor.max():.2f}", # type: ignore + ] + ) + tensor_f = tensor.float() info_list.extend( [ - f"mean={tensor.float().mean():.2f}", - f"std={tensor.float().std():.2f}", - f"norm={norm(x=tensor.float()):.2f}", + f"mean={tensor_f.mean():.2f}", + f"std={tensor_f.std():.2f}", + f"norm={norm(x=tensor_f):.2f}", f"grad={tensor.requires_grad}", ] ) diff --git a/imaginairy/vendored/refiners/foundationals/latent_diffusion/model.py b/imaginairy/vendored/refiners/foundationals/latent_diffusion/model.py index 1b80848..6da919c 100644 --- a/imaginairy/vendored/refiners/foundationals/latent_diffusion/model.py +++ b/imaginairy/vendored/refiners/foundationals/latent_diffusion/model.py @@ -32,21 +32,21 @@ class LatentDiffusionModel(fl.Module, ABC): self.clip_text_encoder = clip_text_encoder.to(device=self.device, dtype=self.dtype) self.scheduler = scheduler.to(device=self.device, dtype=self.dtype) - def set_num_inference_steps(self, num_inference_steps: int) -> None: + def set_inference_steps(self, num_steps: int, first_step: int = 0) -> None: initial_diffusion_rate = self.scheduler.initial_diffusion_rate final_diffusion_rate = self.scheduler.final_diffusion_rate device, dtype = self.scheduler.device, self.scheduler.dtype self.scheduler = self.scheduler.__class__( - num_inference_steps, + num_inference_steps=num_steps, initial_diffusion_rate=initial_diffusion_rate, final_diffusion_rate=final_diffusion_rate, + first_inference_step=first_step, ).to(device=device, dtype=dtype) def init_latents( self, size: tuple[int, int], init_image: Image.Image | None = None, - first_step: int = 0, noise: Tensor | None = None, ) -> Tensor: height, width = size @@ -59,11 +59,15 @@ class LatentDiffusionModel(fl.Module, ABC): if init_image is None: return noise encoded_image = self.lda.encode_image(image=init_image.resize(size=(width, height))) - return self.scheduler.add_noise(x=encoded_image, noise=noise, step=self.steps[first_step]) + return self.scheduler.add_noise( + x=encoded_image, + noise=noise, + step=self.scheduler.first_inference_step, + ) @property def steps(self) -> list[int]: - return self.scheduler.steps + return self.scheduler.inference_steps @abstractmethod def set_unet_context(self, *, timestep: Tensor, clip_text_embedding: Tensor, **_: Tensor) -> None: diff --git a/imaginairy/vendored/refiners/foundationals/latent_diffusion/range_adapter.py b/imaginairy/vendored/refiners/foundationals/latent_diffusion/range_adapter.py index 50febc1..6fd0d4a 100644 --- a/imaginairy/vendored/refiners/foundationals/latent_diffusion/range_adapter.py +++ b/imaginairy/vendored/refiners/foundationals/latent_diffusion/range_adapter.py @@ -24,23 +24,23 @@ def compute_sinusoidal_embedding( class RangeEncoder(fl.Chain): def __init__( self, - sinuosidal_embedding_dim: int, + sinusoidal_embedding_dim: int, embedding_dim: int, device: Device | str | None = None, dtype: DType | None = None, ) -> None: - self.sinuosidal_embedding_dim = sinuosidal_embedding_dim + self.sinusoidal_embedding_dim = sinusoidal_embedding_dim self.embedding_dim = embedding_dim super().__init__( - fl.Lambda(self.compute_sinuosoidal_embedding), + fl.Lambda(self.compute_sinusoidal_embedding), fl.Converter(set_device=False, set_dtype=True), - fl.Linear(in_features=sinuosidal_embedding_dim, out_features=embedding_dim, device=device, dtype=dtype), + fl.Linear(in_features=sinusoidal_embedding_dim, out_features=embedding_dim, device=device, dtype=dtype), fl.SiLU(), fl.Linear(in_features=embedding_dim, out_features=embedding_dim, device=device, dtype=dtype), ) - def compute_sinuosoidal_embedding(self, x: Int[Tensor, "*batch 1"]) -> Float[Tensor, "*batch 1 embedding_dim"]: - return compute_sinusoidal_embedding(x, embedding_dim=self.sinuosidal_embedding_dim) + def compute_sinusoidal_embedding(self, x: Int[Tensor, "*batch 1"]) -> Float[Tensor, "*batch 1 embedding_dim"]: + return compute_sinusoidal_embedding(x, embedding_dim=self.sinusoidal_embedding_dim) class RangeAdapter2d(fl.Sum, Adapter[fl.Conv2d]): diff --git a/imaginairy/vendored/refiners/foundationals/latent_diffusion/schedulers/ddim.py b/imaginairy/vendored/refiners/foundationals/latent_diffusion/schedulers/ddim.py index 34c5e6b..558778b 100644 --- a/imaginairy/vendored/refiners/foundationals/latent_diffusion/schedulers/ddim.py +++ b/imaginairy/vendored/refiners/foundationals/latent_diffusion/schedulers/ddim.py @@ -11,6 +11,7 @@ class DDIM(Scheduler): initial_diffusion_rate: float = 8.5e-4, final_diffusion_rate: float = 1.2e-2, noise_schedule: NoiseSchedule = NoiseSchedule.QUADRATIC, + first_inference_step: int = 0, device: Device | str = "cpu", dtype: Dtype = float32, ) -> None: @@ -20,6 +21,7 @@ class DDIM(Scheduler): initial_diffusion_rate=initial_diffusion_rate, final_diffusion_rate=final_diffusion_rate, noise_schedule=noise_schedule, + first_inference_step=first_inference_step, device=device, dtype=dtype, ) @@ -35,6 +37,8 @@ class DDIM(Scheduler): return timesteps.flip(0) def __call__(self, x: Tensor, noise: Tensor, step: int, generator: Generator | None = None) -> Tensor: + assert self.first_inference_step <= step < self.num_inference_steps, "invalid step {step}" + timestep, previous_timestep = ( self.timesteps[step], ( diff --git a/imaginairy/vendored/refiners/foundationals/latent_diffusion/schedulers/ddpm.py b/imaginairy/vendored/refiners/foundationals/latent_diffusion/schedulers/ddpm.py index 40873b3..764e086 100644 --- a/imaginairy/vendored/refiners/foundationals/latent_diffusion/schedulers/ddpm.py +++ b/imaginairy/vendored/refiners/foundationals/latent_diffusion/schedulers/ddpm.py @@ -5,8 +5,9 @@ from imaginairy.vendored.refiners.foundationals.latent_diffusion.schedulers.sche class DDPM(Scheduler): """ - The Denoising Diffusion Probabilistic Models (DDPM) is a specific type of diffusion model, - which uses a specific strategy to generate the timesteps and applies the diffusion process in a specific way. + Denoising Diffusion Probabilistic Model + + Only used for training Latent Diffusion models. Cannot be called. """ def __init__( @@ -15,6 +16,7 @@ class DDPM(Scheduler): num_train_timesteps: int = 1_000, initial_diffusion_rate: float = 8.5e-4, final_diffusion_rate: float = 1.2e-2, + first_inference_step: int = 0, device: Device | str = "cpu", ) -> None: super().__init__( @@ -22,6 +24,7 @@ class DDPM(Scheduler): num_train_timesteps=num_train_timesteps, initial_diffusion_rate=initial_diffusion_rate, final_diffusion_rate=final_diffusion_rate, + first_inference_step=first_inference_step, device=device, ) diff --git a/imaginairy/vendored/refiners/foundationals/latent_diffusion/schedulers/dpm_solver.py b/imaginairy/vendored/refiners/foundationals/latent_diffusion/schedulers/dpm_solver.py index 5df3df1..33d432c 100644 --- a/imaginairy/vendored/refiners/foundationals/latent_diffusion/schedulers/dpm_solver.py +++ b/imaginairy/vendored/refiners/foundationals/latent_diffusion/schedulers/dpm_solver.py @@ -24,6 +24,7 @@ class DPMSolver(Scheduler): final_diffusion_rate: float = 1.2e-2, last_step_first_order: bool = False, noise_schedule: NoiseSchedule = NoiseSchedule.QUADRATIC, + first_inference_step: int = 0, device: Device | str = "cpu", dtype: Dtype = float32, ): @@ -33,12 +34,12 @@ class DPMSolver(Scheduler): initial_diffusion_rate=initial_diffusion_rate, final_diffusion_rate=final_diffusion_rate, noise_schedule=noise_schedule, + first_inference_step=first_inference_step, device=device, dtype=dtype, ) self.estimated_data = deque([tensor([])] * 2, maxlen=2) self.last_step_first_order = last_step_first_order - self._first_step_has_been_run = False def _generate_timesteps(self) -> Tensor: # We need to use numpy here because: @@ -81,6 +82,7 @@ class DPMSolver(Scheduler): previous_scale_factor = self.cumulative_scale_factors[previous_timestep] previous_noise_std = self.noise_std[previous_timestep] current_noise_std = self.noise_std[current_timestep] + estimation_delta = (current_data_estimation - next_data_estimation) / ( (current_ratio - next_ratio) / (previous_ratio - current_ratio) ) @@ -100,13 +102,14 @@ class DPMSolver(Scheduler): backward Euler update, which is a numerical method commonly used to solve ordinary differential equations (ODEs). """ + assert self.first_inference_step <= step < self.num_inference_steps, "invalid step {step}" + current_timestep = self.timesteps[step] scale_factor, noise_ratio = self.cumulative_scale_factors[current_timestep], self.noise_std[current_timestep] estimated_denoised_data = (x - noise_ratio * noise) / scale_factor self.estimated_data.append(estimated_denoised_data) - if step == 0 or (self.last_step_first_order and step == self.num_inference_steps - 1) or not self._first_step_has_been_run: - self._first_step_has_been_run = True + if step == self.first_inference_step or (self.last_step_first_order and step == self.num_inference_steps - 1): return self.dpm_solver_first_order_update(x=x, noise=estimated_denoised_data, step=step) return self.multistep_dpm_solver_second_order_update(x=x, step=step) diff --git a/imaginairy/vendored/refiners/foundationals/latent_diffusion/schedulers/euler.py b/imaginairy/vendored/refiners/foundationals/latent_diffusion/schedulers/euler.py index 3cc22d0..8508ae5 100644 --- a/imaginairy/vendored/refiners/foundationals/latent_diffusion/schedulers/euler.py +++ b/imaginairy/vendored/refiners/foundationals/latent_diffusion/schedulers/euler.py @@ -13,6 +13,7 @@ class EulerScheduler(Scheduler): initial_diffusion_rate: float = 8.5e-4, final_diffusion_rate: float = 1.2e-2, noise_schedule: NoiseSchedule = NoiseSchedule.QUADRATIC, + first_inference_step: int = 0, device: Device | str = "cpu", dtype: Dtype = float32, ): @@ -24,6 +25,7 @@ class EulerScheduler(Scheduler): initial_diffusion_rate=initial_diffusion_rate, final_diffusion_rate=final_diffusion_rate, noise_schedule=noise_schedule, + first_inference_step=first_inference_step, device=device, dtype=dtype, ) @@ -64,6 +66,8 @@ class EulerScheduler(Scheduler): s_tmax: float = float("inf"), s_noise: float = 1.0, ) -> Tensor: + assert self.first_inference_step <= step < self.num_inference_steps, "invalid step {step}" + sigma = self.sigmas[step] gamma = min(s_churn / (len(self.sigmas) - 1), 2**0.5 - 1) if s_tmin <= sigma <= s_tmax else 0 diff --git a/imaginairy/vendored/refiners/foundationals/latent_diffusion/schedulers/scheduler.py b/imaginairy/vendored/refiners/foundationals/latent_diffusion/schedulers/scheduler.py index f64a4cc..37f9beb 100644 --- a/imaginairy/vendored/refiners/foundationals/latent_diffusion/schedulers/scheduler.py +++ b/imaginairy/vendored/refiners/foundationals/latent_diffusion/schedulers/scheduler.py @@ -33,6 +33,7 @@ class Scheduler(ABC): initial_diffusion_rate: float = 8.5e-4, final_diffusion_rate: float = 1.2e-2, noise_schedule: NoiseSchedule = NoiseSchedule.QUADRATIC, + first_inference_step: int = 0, device: Device | str = "cpu", dtype: DType = float32, ): @@ -43,6 +44,7 @@ class Scheduler(ABC): self.initial_diffusion_rate = initial_diffusion_rate self.final_diffusion_rate = final_diffusion_rate self.noise_schedule = noise_schedule + self.first_inference_step = first_inference_step self.scale_factors = self.sample_noise_schedule() self.cumulative_scale_factors = sqrt(self.scale_factors.cumprod(dim=0)) self.noise_std = sqrt(1.0 - self.scale_factors.cumprod(dim=0)) @@ -68,9 +70,13 @@ class Scheduler(ABC): ... @property - def steps(self) -> list[int]: + def all_steps(self) -> list[int]: return list(range(self.num_inference_steps)) + @property + def inference_steps(self) -> list[int]: + return self.all_steps[self.first_inference_step :] + def scale_model_input(self, x: Tensor, step: int) -> Tensor: """ For compatibility with schedulers that need to scale the input according to the current timestep. diff --git a/imaginairy/vendored/refiners/foundationals/latent_diffusion/stable_diffusion_xl/unet.py b/imaginairy/vendored/refiners/foundationals/latent_diffusion/stable_diffusion_xl/unet.py index 03de722..257eaa4 100644 --- a/imaginairy/vendored/refiners/foundationals/latent_diffusion/stable_diffusion_xl/unet.py +++ b/imaginairy/vendored/refiners/foundationals/latent_diffusion/stable_diffusion_xl/unet.py @@ -28,7 +28,7 @@ class TextTimeEmbedding(fl.Chain): fl.Chain( fl.UseContext(context="diffusion", key="time_ids"), fl.Unsqueeze(dim=-1), - fl.Lambda(func=self.compute_sinuosoidal_embedding), + fl.Lambda(func=self.compute_sinusoidal_embedding), fl.Reshape(-1), ), dim=1, @@ -49,7 +49,7 @@ class TextTimeEmbedding(fl.Chain): ), ) - def compute_sinuosoidal_embedding(self, x: Tensor) -> Tensor: + def compute_sinusoidal_embedding(self, x: Tensor) -> Tensor: return compute_sinusoidal_embedding(x=x, embedding_dim=self.time_ids_embedding_dim) @@ -61,7 +61,7 @@ class TimestepEncoder(fl.Passthrough): fl.Chain( fl.UseContext(context="diffusion", key="timestep"), RangeEncoder( - sinuosidal_embedding_dim=320, + sinusoidal_embedding_dim=320, embedding_dim=self.timestep_embedding_dim, device=device, dtype=dtype, diff --git a/imaginairy/vendored/refiners/readme.txt b/imaginairy/vendored/refiners/readme.txt index d6f973e..f5a0a81 100644 --- a/imaginairy/vendored/refiners/readme.txt +++ b/imaginairy/vendored/refiners/readme.txt @@ -1 +1 @@ -vendored from git@github.com:finegrain-ai/refiners.git @ ce3035923ba71bcb5044708d2f1c37fd1d6722e9 +vendored from git@github.com:finegrain-ai/refiners.git @ 91aea9b7ff63ddf93f99e2ce6a4452bd658b1948 diff --git a/tests/expected_output/test_large_image_.png b/tests/expected_output/test_large_image_.png index 517e4a1..d7cac48 100644 Binary files a/tests/expected_output/test_large_image_.png and b/tests/expected_output/test_large_image_.png differ