fix: masking now works properly at strengths 0 and 1

2 years ago · d832f15297
parent b69072d382
commit d832f15297
8 changed files with 23 additions and 18 deletions
--- a/2
+++ b/2
@ -26,7 +26,7 @@ init: require_pyenv  ## Setup a dev environment for local development.

 af: autoformat  ## Alias for `autoformat`
 autoformat:  ## Run the autoformatter.
-	@pycln . --all
+	@pycln . --all --quiet
 	@isort --atomic --profile black .
 	@black .

--- a/README.md
+++ b/README.md
@ -187,6 +187,7 @@ docker run -it --gpus all -v $HOME/.cache/huggingface:/root/.cache/huggingface -
 - feature: apply mask edits to original files
 - feature: auto-rotate images if exif data specifies to do so
 - fix: accept mask images in command line
+ - fix: img2img algorithm was wrong and wouldn't at values close to 0 or 1

 **1.6.2**
 - fix: another bfloat16 fix
--- a/imaginairy/api.py
+++ b/imaginairy/api.py
@ -98,7 +98,7 @@ def imagine_image_files(
    generated_imgs_path = os.path.join(outdir, "generated")
    os.makedirs(generated_imgs_path, exist_ok=True)

-    base_count = len(os.listdir(outdir))
+    base_count = len(os.listdir(generated_imgs_path))
    output_file_extension = output_file_extension.lower()
    if output_file_extension not in {"jpg", "png"}:
        raise ValueError("Must output a png or jpg")
@ -210,8 +210,10 @@ def imagine(
                mask, mask_image, mask_image_orig = None, None, None
                if prompt.init_image:
                    generation_strength = 1 - prompt.init_image_strength
-                    ddim_steps = int(prompt.steps / generation_strength)
-                    sampler.make_schedule(ddim_num_steps=ddim_steps, ddim_eta=ddim_eta)
+                    t_enc = int(prompt.steps * generation_strength)
+                    sampler.make_schedule(
+                        ddim_num_steps=prompt.steps, ddim_eta=ddim_eta
+                    )
                    try:
                        init_image = pillow_fit_image_within(
                            prompt.init_image,
@ -265,7 +267,7 @@ def imagine(
                    # encode (scaled latent)
                    z_enc = sampler.stochastic_encode(
                        init_latent,
-                        torch.tensor([prompt.steps]).to(get_device()),
+                        torch.tensor([t_enc - 1]).to(get_device()),
                    )
                    log_latent(z_enc, "z_enc")

@ -273,7 +275,7 @@ def imagine(
                    samples = sampler.decode(
                        z_enc,
                        c,
-                        prompt.steps,
+                        t_enc,
                        unconditional_guidance_scale=prompt.prompt_strength,
                        unconditional_conditioning=uc,
                        img_callback=_img_callback,
--- a/imaginairy/cmds.py
+++ b/imaginairy/cmds.py
@ -113,7 +113,8 @@ def configure_logging(level="INFO"):
    help="What level of logs to show.",
 )
@click.option(
-    "--quiet", "-q",
+    "--quiet",
+    "-q",
    is_flag=True,
    help="Alias of `--log-level ERROR`",
 )
@ -245,7 +246,7 @@ def imagine_cmd(
        output_file_extension="png",
        print_caption=caption,
        precision=precision,
-        mask_modify_original=mask_modify_original
+        mask_modify_original=mask_modify_original,
    )


--- a/imaginairy/img_utils.py
+++ b/imaginairy/img_utils.py
@ -12,10 +12,11 @@ from imaginairy.utils import get_device
 def pillow_fit_image_within(image: PIL.Image.Image, max_height=512, max_width=512):
    image = image.convert("RGB")
    w, h = image.size
-    resize_ratio = min(max_width / w, max_height / h)
-    w, h = int(w * resize_ratio), int(h * resize_ratio)
-    w, h = map(lambda x: x - x % 64, (w, h))  # resize to integer multiple of 64
-    image = image.resize((w, h), resample=Image.Resampling.LANCZOS)
+    if w > max_width or h > max_height:
+        resize_ratio = min(max_width / w, max_height / h)
+        w, h = int(w * resize_ratio), int(h * resize_ratio)
+        w, h = map(lambda x: x - x % 64, (w, h))  # resize to integer multiple of 64
+        image = image.resize((w, h), resample=Image.Resampling.LANCZOS)
    return image


--- a/imaginairy/samplers/ddim.py
+++ b/imaginairy/samplers/ddim.py
@ -313,7 +313,7 @@ class DDIMSampler:
    def stochastic_encode(self, init_latent, t, noise=None):
        # fast, but does not allow for exact reconstruction
        # t serves as an index to gather the correct alphas
-
+        t = t.clamp(0, 1000)
        sqrt_alphas_cumprod = torch.sqrt(self.ddim_alphas)
        sqrt_one_minus_alphas_cumprod = self.ddim_sqrt_one_minus_alphas

--- a/imaginairy/samplers/plms.py
+++ b/imaginairy/samplers/plms.py
@ -363,7 +363,7 @@ class PLMSSampler:
    def stochastic_encode(self, init_latent, t, noise=None):
        # fast, but does not allow for exact reconstruction
        # t serves as an index to gather the correct alphas
-
+        t = t.clamp(0, 1000)
        sqrt_alphas_cumprod = torch.sqrt(self.ddim_alphas)
        sqrt_one_minus_alphas_cumprod = self.ddim_sqrt_one_minus_alphas

--- a/tests/test_imagine.py
+++ b/tests/test_imagine.py
@ -48,12 +48,12 @@ def test_imagine(sampler_type, expected_md5):

 device_sampler_type_test_cases_img_2_img = {
    "mps:0": {
-        ("plms", "54656a7f449cb73b99436e61470172b3"),
-        ("ddim", "87d04423f6d03ddfc065cabc62e3909c"),
+        ("plms", "0d9c40c348cdac7bdc8d5a472f378f42"),
+        ("ddim", "12921ee5a8d276f1b477d196d304fef2"),
    },
    "cuda": {
-        ("plms", "c95f23a7039cf702e2e448f454e27c46"),
-        ("ddim", "ba054c630d9999ccbcc60c9fb687682d"),
+        ("plms", "28752d4e1d778abc3e9424f4f23d1aaf"),
+        ("ddim", "28752d4e1d778abc3e9424f4f23d1aaf"),
    },
    "cpu": [],
 }