test: add some tests/logging

add some experiments
2 years ago · 59648dbe61
parent 19d0b563ac
commit 59648dbe61
9 changed files with 205 additions and 15 deletions
--- a/README.md
+++ b/README.md
@ -142,6 +142,7 @@ docker run -it --gpus all -v $HOME/.cache/huggingface:/root/.cache/huggingface -
   - ✅ https://github.com/CompVis/stable-diffusion/compare/main...Doggettx:stable-diffusion:autocast-improvements#
   - ✅ https://www.reddit.com/r/StableDiffusion/comments/xalaws/test_update_for_less_memory_usage_and_higher/
   - https://github.com/neonsecret/stable-diffusion  https://github.com/CompVis/stable-diffusion/pull/177
+   - https://github.com/huggingface/diffusers/pull/532/files
 - ✅ deploy to pypi
 - find similar images https://knn5.laion.ai/?back=https%3A%2F%2Fknn5.laion.ai%2F&index=laion5B&useMclip=false
 - Development Environment
@ -205,4 +206,6 @@ docker run -it --gpus all -v $HOME/.cache/huggingface:/root/.cache/huggingface -

 ## Further Reading
 - Differences between samplers
-   - https://www.reddit.com/r/StableDiffusion/comments/xbeyw3/can_anyone_offer_a_little_guidance_on_the/
+   - https://www.reddit.com/r/StableDiffusion/comments/xbeyw3/can_anyone_offer_a_little_guidance_on_the/
+ - https://www.reddit.com/r/bigsleep/comments/xb5cat/wiskkeys_lists_of_texttoimage_systems_and_related/
+ - https://huggingface.co/blog/annotated-diffusion
--- a/imaginairy/api.py
+++ b/imaginairy/api.py
@ -16,7 +16,7 @@ from transformers import cached_path

 from imaginairy.enhancers.face_restoration_codeformer import enhance_faces
 from imaginairy.enhancers.upscale_realesrgan import upscale_image
-from imaginairy.img_log import LatentLoggingContext, log_latent
+from imaginairy.img_log import ImageLoggingContext, log_conditioning, log_latent
 from imaginairy.safety import is_nsfw
 from imaginairy.samplers.base import get_sampler
 from imaginairy.schema import ImaginePrompt, ImagineResult
@ -115,7 +115,7 @@ def imagine_image_files(
    def _record_step(img, description, step_count, prompt):
        steps_path = os.path.join(outdir, "steps", f"{base_count:08}_S{prompt.seed}")
        os.makedirs(steps_path, exist_ok=True)
-        filename = f"{base_count:08}_S{prompt.seed}_step{step_count:04}.jpg"
+        filename = f"{base_count:08}_S{prompt.seed}_step{step_count:04}_{prompt_normalized(description)[:40]}.jpg"
        destination = os.path.join(steps_path, filename)
        draw = ImageDraw.Draw(img)
        draw.text((10, 10), str(description))
@ -142,6 +142,7 @@ def imagine_image_files(
            result.save_upscaled(bigfilepath)
            logger.info(f"    Upscaled 🖼  saved to: {bigfilepath}")
        base_count += 1
+        del result


 def imagine(
@ -173,8 +174,10 @@ def imagine(
    )
    with torch.no_grad(), precision_scope(get_device()), fix_torch_nn_layer_norm():
        for prompt in prompts:
-            with LatentLoggingContext(
-                prompt=prompt, model=model, img_callback=img_callback
+            with ImageLoggingContext(
+                prompt=prompt,
+                model=model,
+                img_callback=img_callback,
            ):
                logger.info(f"Generating {prompt.prompt_description()}")
                seed_everything(prompt.seed)
@ -182,11 +185,17 @@ def imagine(
                uc = None
                if prompt.prompt_strength != 1.0:
                    uc = model.get_learned_conditioning(1 * [""])
-                total_weight = sum(wp.weight for wp in prompt.prompts)
-                c = sum(
-                    model.get_learned_conditioning(wp.text) * (wp.weight / total_weight)
-                    for wp in prompt.prompts
-                )
+                    log_conditioning(uc, "neutral conditioning")
+                if prompt.conditioning is not None:
+                    c = prompt.conditioning
+                else:
+                    total_weight = sum(wp.weight for wp in prompt.prompts)
+                    c = sum(
+                        model.get_learned_conditioning(wp.text)
+                        * (wp.weight / total_weight)
+                        for wp in prompt.prompts
+                    )
+                log_conditioning(c, "positive conditioning")

                shape = [
                    latent_channels,
--- a/imaginairy/img_log.py
+++ b/imaginairy/img_log.py
@ -1,15 +1,24 @@
 import logging
+import re

 import numpy as np
 import torch
 from einops import rearrange
 from PIL import Image
+from torchvision.transforms import ToPILImage

 _CURRENT_LOGGING_CONTEXT = None

 logger = logging.getLogger(__name__)


+def log_conditioning(conditioning, description):
+    if _CURRENT_LOGGING_CONTEXT is None:
+        return
+
+    _CURRENT_LOGGING_CONTEXT.log_conditioning(conditioning, description)
+
+
 def log_latent(latents, description):
    if _CURRENT_LOGGING_CONTEXT is None:
        return
@ -23,12 +32,13 @@ def log_latent(latents, description):
    _CURRENT_LOGGING_CONTEXT.log_latents(latents, description)


-class LatentLoggingContext:
-    def __init__(self, prompt, model, img_callback=None):
+class ImageLoggingContext:
+    def __init__(self, prompt, model, img_callback=None, img_outdir=None):
        self.prompt = prompt
        self.model = model
        self.step_count = 0
        self.img_callback = img_callback
+        self.img_outdir = img_outdir

    def __enter__(self):
        global _CURRENT_LOGGING_CONTEXT  # noqa
@ -39,6 +49,13 @@ class LatentLoggingContext:
        global _CURRENT_LOGGING_CONTEXT  # noqa
        _CURRENT_LOGGING_CONTEXT = None

+    def log_conditioning(self, conditioning, description):
+        if not self.img_callback:
+            return
+        img = conditioning_to_img(conditioning)
+
+        self.img_callback(img, description, self.step_count, self.prompt)
+
    def log_latents(self, latents, description):
        if not self.img_callback:
            return
@ -53,3 +70,20 @@ class LatentLoggingContext:
            latent = 255.0 * rearrange(latent.cpu().numpy(), "c h w -> h w c")
            img = Image.fromarray(latent.astype(np.uint8))
            self.img_callback(img, description, self.step_count, self.prompt)
+
+    # def img_callback(self, img, description, step_count, prompt):
+    #     steps_path = os.path.join(self.img_outdir, "steps", f"{self.file_num:08}_S{prompt.seed}")
+    #     os.makedirs(steps_path, exist_ok=True)
+    #     filename = f"{self.file_num:08}_S{prompt.seed}_step{step_count:04}_{filesafe_text(description)[:40]}.jpg"
+    #     destination = os.path.join(steps_path, filename)
+    #     draw = ImageDraw.Draw(img)
+    #     draw.text((10, 10), str(description))
+    #     img.save(destination)
+
+
+def filesafe_text(t):
+    return re.sub(r"[^a-zA-Z0-9.,\[\]() -]+", "_", t)[:130]
+
+
+def conditioning_to_img(conditioning):
+    return ToPILImage()(conditioning)
--- a/imaginairy/samplers/ddim.py
+++ b/imaginairy/samplers/ddim.py
@ -281,7 +281,7 @@ class DDIMSampler:
            noise_pred = noise_pred_uncond + unconditional_guidance_scale * (
                noise_pred - noise_pred_uncond
            )
-
+        log_latent(noise_pred, "noise prediction")
        alphas = self.model.alphas_cumprod if use_original_steps else self.ddim_alphas
        alphas_prev = (
            self.model.alphas_cumprod_prev
--- a/imaginairy/schema.py
+++ b/imaginairy/schema.py
@ -55,7 +55,9 @@ class LazyLoadingImage:
                f"Loaded input 🖼  of size {self._img.size} from {self._lazy_filepath}"
            )
        elif self._lazy_url:
-            self._img = Image.open(requests.get(self._lazy_url, stream=True, timeout=60).raw)
+            self._img = Image.open(
+                requests.get(self._lazy_url, stream=True, timeout=60).raw
+            )
            logger.info(
                f"Loaded input 🖼  of size {self._img.size} from {self._lazy_url}"
            )
@ -89,6 +91,7 @@ class ImaginePrompt:
        upscale=False,
        fix_faces=False,
        sampler_type="PLMS",
+        conditioning=None,
    ):
        prompt = prompt if prompt is not None else "a scenic landscape"
        if isinstance(prompt, str):
@ -108,6 +111,7 @@ class ImaginePrompt:
        self.upscale = upscale
        self.fix_faces = fix_faces
        self.sampler_type = sampler_type
+        self.conditioning = conditioning

    @property
    def prompt_text(self):
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -4,6 +4,7 @@ import pytest

 from imaginairy import api
 from imaginairy.suppress_logs import suppress_annoying_logs_and_warnings
+from imaginairy.utils import fix_torch_nn_layer_norm

 if "pytest" in str(sys.argv):
    suppress_annoying_logs_and_warnings()
@ -12,3 +13,5 @@ if "pytest" in str(sys.argv):
@pytest.fixture(scope="session", autouse=True)
 def pre_setup():
    api.IMAGINAIRY_SAFETY_MODE = "disabled"
+    with fix_torch_nn_layer_norm():
+        yield
--- a/tests/test_clip_embedder.py
+++ b/tests/test_clip_embedder.py
@ -0,0 +1,16 @@
+import hashlib
+
+from imaginairy.modules.clip_embedders import FrozenCLIPEmbedder
+from imaginairy.utils import get_device
+
+
+def hash_tensor(t):
+    t = t.cpu().detach().numpy().tobytes()
+    return hashlib.md5(t).hexdigest()
+
+
+def test_text_conditioning():
+    embedder = FrozenCLIPEmbedder()
+    embedder.to(get_device())
+    neutral_embedding = embedder.encode([""])
+    assert hash_tensor(neutral_embedding) == "263e5ee7d2be087d816e094b80ffc546"
--- a/tests/test_experiments.py
+++ b/tests/test_experiments.py
@ -0,0 +1,121 @@
+import os.path
+
+import torch
+from PIL import ImageDraw
+
+from imaginairy import ImaginePrompt, LazyLoadingImage, imagine, imagine_image_files
+from imaginairy.api import load_model
+from imaginairy.img_log import ImageLoggingContext, filesafe_text, log_latent
+from imaginairy.modules.clip_embedders import FrozenCLIPEmbedder
+from imaginairy.samplers.ddim import DDIMSampler
+from imaginairy.utils import get_device, pillow_img_to_torch_image
+from tests import TESTS_FOLDER
+
+
+def experiment_text_conditioning_combos():
+    """
+    Can we do math with the embeddings?
+
+    Yes. it works but doesn't look great.
+    """
+    embedder = FrozenCLIPEmbedder()
+    embedder.to(get_device())
+
+    beach_e = embedder.encode(["a beach"])
+    beach_water_e = embedder.encode(["a beach. ocean, waves, water"])
+    waterness = beach_water_e - beach_e
+    waterless_beach = beach_e - waterness
+
+    imagine_image_files(
+        [ImaginePrompt("waterless_beach", conditioning=waterless_beach, seed=1)],
+        outdir=f"{TESTS_FOLDER}/test_output",
+    )
+    imagine_image_files(
+        [ImaginePrompt("waterness", conditioning=waterness, seed=1)],
+        outdir=f"{TESTS_FOLDER}/test_output",
+    )
+    imagine_image_files(
+        [ImaginePrompt("beach", conditioning=beach_e, seed=1)],
+        outdir=f"{TESTS_FOLDER}/test_output",
+    )
+
+
+def experiment_step_repeats():
+    """
+    Run the same step over and over on an image without noise
+
+    Removes detail from the image.
+    """
+    model = load_model()
+    model.to(get_device())
+    model.eval()
+    embedder = FrozenCLIPEmbedder()
+    embedder.to(get_device())
+
+    sampler = DDIMSampler(model)
+    sampler.make_schedule(1000)
+
+    img = LazyLoadingImage(filepath=f"{TESTS_FOLDER}/data/beach_at_sainte_adresse.jpg")
+    init_image, w, h = pillow_img_to_torch_image(
+        img,
+        max_height=512,
+        max_width=512,
+    )
+    init_image = init_image.to(get_device())
+    init_latent = model.get_first_stage_encoding(model.encode_first_stage(init_image))
+    log_latent(init_latent, "init_latent")
+    noise = torch.randn_like(init_latent)
+    base_count = 1
+    neutral_embedding = embedder.encode([""])
+    outdir = f"{TESTS_FOLDER}/test_output"
+
+    def _record_step(img, description, step_count, prompt):
+        steps_path = os.path.join(outdir, "steps", f"{base_count:08}_S{prompt.seed}")
+        os.makedirs(steps_path, exist_ok=True)
+        filename = f"{base_count:08}_S{prompt.seed}_step{step_count:04}_{filesafe_text(description)[:40]}.png"
+        destination = os.path.join(steps_path, filename)
+        draw = ImageDraw.Draw(img)
+        draw.text((10, 10), str(description))
+        img.save(destination)
+
+    with ImageLoggingContext(
+        prompt=ImaginePrompt(""),
+        model=model,
+        img_callback=_record_step,
+    ):
+        x_prev = init_latent
+        index = 50
+        base_count = index
+        t = torch.Tensor([index]).to(get_device())
+        # noise_pred = model.apply_model(init_latent, t, neutral_embedding)
+        # log_latent(noise_pred, "noise prediction")
+        for _ in range(100):
+            x_prev, pred_x0 = sampler.p_sample_ddim(x_prev, neutral_embedding, t, index)
+            log_latent(pred_x0, "pred_x0")
+            x_prev = pred_x0
+
+
+def experiment_repeated_img_2_img():
+    """
+    Experiment with putting an image repeatedly through image2image
+
+    It creates screwy images
+    """
+    outdir = f"{TESTS_FOLDER}/test_output/img2img2img"
+    img = LazyLoadingImage(filepath=f"{TESTS_FOLDER}/data/beach_at_sainte_adresse.jpg")
+    img.save(f"{outdir}/0.png")
+    for step_num in range(50):
+        prompt = ImaginePrompt(
+            "Beach at Sainte Adresse. hyperealistic photo. sharp focus, canon 5d",
+            init_image=img,
+            init_image_strength=0.50,
+            width=512,
+            height=512,
+            steps=50,
+            sampler_type="DDIM",
+        )
+
+        result = next(imagine(prompt))
+        img = result.img
+        os.makedirs(outdir, exist_ok=True)
+        img.save(f"{outdir}/{step_num:04}.png")
--- a/tests/test_imagine.py
+++ b/tests/test_imagine.py
@ -69,7 +69,7 @@ def test_img_to_img_from_url():
        init_image_strength=0.5,
        width=512,
        height=512,
-        steps=50,
+        steps=5,
        seed=1,
        sampler_type="DDIM",
    )