feature: facilitate references to previous image generation

2 years ago · 16cef16c3e
parent 248679d8de
commit 16cef16c3e
3 changed files with 40 additions and 12 deletions
--- a/README.md
+++ b/README.md
@ -525,7 +525,7 @@ would be uncorrelated to the rest of the surrounding image.  It created terrible
     - https://github.com/LambdaLabsML/lambda-diffusers
     - https://www.reddit.com/r/MachineLearning/comments/x6k5bm/n_stable_diffusion_image_variations_released/
 - Image Editing
-   - outpainting
+   - ✅outpainting
     - https://github.com/parlance-zz/g-diffuser-bot/search?q=noise&type=issues
     - lama cleaner
   - ✅ inpainting
@ -598,7 +598,9 @@ would be uncorrelated to the rest of the surrounding image.  It created terrible
   - https://github.com/vicgalle/stable-diffusion-aesthetic-gradients
 - Training
   - Finetuning "dreambooth" style
-   - Textual Inversion
+   - [Textual Inversion](https://arxiv.org/abs/2208.01618)
+     - [Fast Textual Inversion](https://github.com/peterwilli/sd-leap-booster) 
+   - [Low-rank Adaptation for Fast Text-to-Image Diffusion Fine-tuning (LORA)](https://github.com/cloneofsimo/lora)
   - Performance Improvements
    - [ColoassalAI](https://github.com/hpcaitech/ColossalAI/tree/main/examples/images/diffusion) - almost got it working but it's not easy enough to install to merit inclusion in imaginairy. We should check back in on this.
    - Xformers
--- a/imaginairy/api.py
+++ b/imaginairy/api.py
@ -6,7 +6,7 @@ import numpy as np
 import torch
 import torch.nn
 from einops import rearrange, repeat
-from PIL import Image, ImageDraw, ImageFilter, ImageOps
+from PIL import Image, ImageDraw, ImageOps
 from pytorch_lightning import seed_everything

 from imaginairy.enhancers.clip_masking import get_img_mask
@ -51,6 +51,9 @@ if IMAGINAIRY_SAFETY_MODE in {"disabled", "classify"}:
 elif IMAGINAIRY_SAFETY_MODE == "filter":
    IMAGINAIRY_SAFETY_MODE = SafetyMode.STRICT

+# we put this in the global scope so it can be used in the interactive shell
+_most_recent_result = None
+

 def imagine_image_files(
    prompts,
@ -88,6 +91,9 @@ def imagine_image_files(
        add_caption=print_caption,
    ):
        prompt = result.prompt
+        if prompt.is_intermediate:
+            # we don't save intermediate images
+            continue
        img_str = ""
        if prompt.init_image:
            img_str = f"_img2img-{prompt.init_image_strength}"
@ -103,7 +109,7 @@ def imagine_image_files(
                subpath, f"{basefilename}_[{image_type}].{output_file_extension}"
            )
            result.save(filepath, image_type=image_type)
-            logger.info(f"🖼  [{image_type}] saved to: {filepath}")
+            logger.info(f"[{image_type}] saved to: {filepath}")
            if image_type == return_filename_type:
                result_filenames.append(filepath)
        if make_comparison_gif and prompt.init_image:
@ -134,6 +140,7 @@ def imagine(
    half_mode=None,
    add_caption=False,
 ):
+    global _most_recent_result  # noqa
    latent_channels = 4
    downsampling_factor = 8
    batch_size = 1
@ -153,6 +160,18 @@ def imagine(
        precision
    ), fix_torch_nn_layer_norm(), fix_torch_group_norm():
        for i, prompt in enumerate(prompts):
+            # handle prompt pulling in previous values
+            if isinstance(prompt.init_image, str) and prompt.init_image.startswith(
+                "*prev"
+            ):
+                _, img_type = prompt.init_image.strip("*").split(".")
+                prompt.init_image = _most_recent_result.images[img_type]
+            if isinstance(prompt.mask_image, str) and prompt.mask_image.startswith(
+                "*prev"
+            ):
+                _, img_type = prompt.mask_image.strip("*").split(".")
+                prompt.mask_image = _most_recent_result.images[img_type]
+
            logger.info(
                f"Generating 🖼  {i + 1}/{num_prompts}: {prompt.prompt_description()}"
            )
@ -419,9 +438,10 @@ def imagine(
                    x_sample_8_orig = x_sample.astype(np.uint8)
                    img = Image.fromarray(x_sample_8_orig)
                    if mask_image_orig and init_image:
-                        mask_final = mask_image_orig.filter(
-                            ImageFilter.GaussianBlur(radius=3)
-                        )
+                        # mask_final = mask_image_orig.filter(
+                        #     ImageFilter.GaussianBlur(radius=3)
+                        # )
+                        mask_final = mask_image_orig.copy()
                        log_img(mask_final, "reconstituting mask")
                        mask_final = ImageOps.invert(mask_final)
                        img = Image.composite(img, init_image, mask_final)
@ -471,9 +491,9 @@ def imagine(
                                starting_image.size,
                                resample=Image.Resampling.LANCZOS,
                            )
-                            mask_for_orig_size = mask_for_orig_size.filter(
-                                ImageFilter.GaussianBlur(radius=5)
-                            )
+                            # mask_for_orig_size = mask_for_orig_size.filter(
+                            #     ImageFilter.GaussianBlur(radius=5)
+                            # )
                            log_img(mask_for_orig_size, "mask for original image size")

                            rebuilt_orig_img = Image.composite(
@ -495,6 +515,7 @@ def imagine(
                        depth_image=depth_image_display,
                        timings=lc.get_timings(),
                    )
+                    _most_recent_result = result
                    logger.info(f"Image Generated. Timings: {result.timings_str()}")
                    yield result

--- a/imaginairy/schema.py
+++ b/imaginairy/schema.py
@ -115,6 +115,7 @@ class ImaginePrompt:
        tile_mode="",
        model=config.DEFAULT_MODEL,
        model_config_path=None,
+        is_intermediate=False,
    ):

        self.prompts = self.process_prompt_input(prompt)
@ -128,10 +129,12 @@ class ImaginePrompt:
            assert tile_mode in ("", "x", "y", "xy")

        if isinstance(init_image, str):
-            init_image = LazyLoadingImage(filepath=init_image)
+            if not init_image.startswith("*prev."):
+                init_image = LazyLoadingImage(filepath=init_image)

        if isinstance(mask_image, str):
-            mask_image = LazyLoadingImage(filepath=mask_image)
+            if not init_image.startswith("*prev."):
+                mask_image = LazyLoadingImage(filepath=mask_image)

        if mask_image is not None and mask_prompt is not None:
            raise ValueError("You can only set one of `mask_image` and `mask_prompt`")
@ -159,6 +162,8 @@ class ImaginePrompt:
        self.tile_mode = tile_mode
        self.model = model
        self.model_config_path = model_config_path
+        # we don't want to save intermediate images
+        self.is_intermediate = is_intermediate

        if self.height is None or self.width is None or self.steps is None:
            SamplerCls = SAMPLER_LOOKUP[self.sampler_type]