Merge pull request #18 from brycedrennan/bugfixes

Bugfixes + per-prompt tile mode
2 years ago · 08fca72033
parent 41b0ae99b3 a275d9c9cd
commit 08fca72033
19 changed files with 216 additions and 73 deletions
--- a/.dockerignore
+++ b/.dockerignore
@ -0,0 +1,21 @@
+__pycache__
+*.pyc
+*.pyo
+*.pyd
+.Python
+env
+pip-log.txt
+pip-delete-this-directory.txt
+.tox
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.log
+.git
+.mypy_cache
+.pytest_cache
+.hypothesis
+.DS_Store
--- a/6
+++ b/6
@ -51,6 +51,12 @@ deploy:  ## Deploy the package to pypi.org
 	rm -rf dist
 	@echo "Deploy successful! ✨ 🍰 ✨"

+build-dev-image:
+	docker build -f tests/Dockerfile -t imaginairy-dev .
+
+run-dev: build-dev-image
+	docker run -it -v $$HOME/.cache/huggingface:/root/.cache/huggingface -v $$HOME/.cache/torch:/root/.cache/torch -v `pwd`/outputs:/outputs imaginairy-dev /bin/bash
+
 requirements:  ## Freeze the requirements.txt file
 	pip-compile setup.py requirements-dev.in --output-file=requirements-dev.txt --upgrade

--- a/README.md
+++ b/README.md
@ -117,7 +117,7 @@ from imaginairy import imagine, imagine_image_files, ImaginePrompt, WeightedProm

 url = "https://upload.wikimedia.org/wikipedia/commons/thumb/6/6c/Thomas_Cole_-_Architect%E2%80%99s_Dream_-_Google_Art_Project.jpg/540px-Thomas_Cole_-_Architect%E2%80%99s_Dream_-_Google_Art_Project.jpg"
 prompts = [
-    ImaginePrompt("a scenic landscape", seed=1),
+    ImaginePrompt("a scenic landscape", seed=1, upscale=True),
    ImaginePrompt("a bowl of fruit"),
    ImaginePrompt([
        WeightedPrompt("cat", weight=1),
@ -133,7 +133,8 @@ prompts = [
        mask_prompt="fruit|stems",
        mask_mode="replace",
        mask_expansion=3
-    )
+    ),
+    ImaginePrompt("strawberries", tile_mode=True),
 ]
 for result in imagine(prompts):
    # do something
@ -162,8 +163,16 @@ docker build . -t imaginairy
 docker run -it --gpus all -v $HOME/.cache/huggingface:/root/.cache/huggingface -v $HOME/.cache/torch:/root/.cache/torch -v `pwd`/outputs:/outputs imaginairy /bin/bash
 ```

+## Running on Google Colab
+[Example Colab](https://colab.research.google.com/drive/1rOvQNs0Cmn_yU1bKWjCOHzGVDgZkaTtO?usp=sharing)
+
 ## ChangeLog

+**1.6.0**
+ - fix: *maybe* address #13 with `expected scalar type BFloat16 but found Float`
+   - at minimum one can specify `--precision full` now and that will probably fix the issue  
+ - feature: tile mode can now be specified per-prompt
+
 **1.5.3**
 - fix: missing config file for describe feature

--- a/imaginairy/api.py
+++ b/imaginairy/api.py
@ -1,7 +1,6 @@
 import logging
 import os
 import re
-from contextlib import nullcontext
 from functools import lru_cache

 import numpy as np
@ -11,7 +10,6 @@ from einops import rearrange
 from omegaconf import OmegaConf
 from PIL import Image, ImageDraw, ImageFilter, ImageOps
 from pytorch_lightning import seed_everything
-from torch import autocast
 from transformers import cached_path

 from imaginairy.enhancers.clip_masking import get_img_mask
@ -29,11 +27,13 @@ from imaginairy.samplers.base import get_sampler
 from imaginairy.schema import ImaginePrompt, ImagineResult
 from imaginairy.utils import (
    expand_mask,
+    fix_torch_group_norm,
    fix_torch_nn_layer_norm,
    get_device,
    instantiate_from_config,
    pillow_fit_image_within,
    pillow_img_to_torch_image,
+    platform_appropriate_autocast,
 )

 LIB_PATH = os.path.dirname(__file__)
@ -73,31 +73,11 @@ def load_model_from_config(config):
    return model


-def patch_conv(**patch):
-    """
-    Patch to enable tiling mode
-
-    https://github.com/replicate/cog-stable-diffusion/compare/main...TomMoore515:material_stable_diffusion:main
-    """
-    cls = torch.nn.Conv2d
-    init = cls.__init__
-
-    def __init__(self, *args, **kwargs):
-        return init(self, *args, **kwargs, **patch)
-
-    cls.__init__ = __init__
-
-
@lru_cache()
-def load_model(tile_mode=False):
-    if tile_mode:
-        # generated images are tileable
-        patch_conv(padding_mode="circular")
-
+def load_model():
    config = "configs/stable-diffusion-v1.yaml"
    config = OmegaConf.load(f"{LIB_PATH}/{config}")
    model = load_model_from_config(config)
-
    model = model.to(get_device())
    return model

@ -111,7 +91,6 @@ def imagine_image_files(
    ddim_eta=0.0,
    record_step_images=False,
    output_file_extension="jpg",
-    tile_mode=False,
    print_caption=False,
 ):
    big_path = os.path.join(outdir, "upscaled")
@ -139,7 +118,6 @@ def imagine_image_files(
        precision=precision,
        ddim_eta=ddim_eta,
        img_callback=_record_step if record_step_images else None,
-        tile_mode=tile_mode,
        add_caption=print_caption,
    ):
        prompt = result.prompt
@ -164,11 +142,10 @@ def imagine(
    precision="autocast",
    ddim_eta=0.0,
    img_callback=None,
-    tile_mode=False,
    half_mode=None,
    add_caption=False,
 ):
-    model = load_model(tile_mode=tile_mode)
+    model = load_model()

    # only run half-mode on cuda. run it by default
    half_mode = half_mode is None and get_device() == "cuda"
@ -179,13 +156,12 @@ def imagine(
    prompts = [ImaginePrompt(prompts)] if isinstance(prompts, str) else prompts
    prompts = [prompts] if isinstance(prompts, ImaginePrompt) else prompts
    _img_callback = None
+    if get_device() == "cpu":
+        logger.info("Running in CPU mode. it's gonna be slooooooow.")

-    precision_scope = (
-        autocast
-        if precision == "autocast" and get_device() in ("cuda", "cpu")
-        else nullcontext
-    )
-    with torch.no_grad(), precision_scope(get_device()), fix_torch_nn_layer_norm():
+    with torch.no_grad(), platform_appropriate_autocast(
+        precision
+    ), fix_torch_nn_layer_norm(), fix_torch_group_norm():
        for prompt in prompts:
            with ImageLoggingContext(
                prompt=prompt,
@ -194,6 +170,7 @@ def imagine(
            ):
                logger.info(f"Generating {prompt.prompt_description()}")
                seed_everything(prompt.seed)
+                model.tile_mode(prompt.tile_mode)

                uc = None
                if prompt.prompt_strength != 1.0:
--- a/imaginairy/cmds.py
+++ b/imaginairy/cmds.py
@ -121,7 +121,7 @@ def configure_logging(level="INFO"):
@click.option(
    "--tile",
    is_flag=True,
-    help="Any images rendered will be tileable.  Unfortunately cannot be controlled at the per-image level yet",
+    help="Any images rendered will be tileable.",
 )
@click.option(
    "--mask-image",
@ -149,6 +149,12 @@ def configure_logging(level="INFO"):
    is_flag=True,
    help="Generate a text description of the generated image",
 )
+@click.option(
+    "--precision",
+    help="evaluate at this precision",
+    type=click.Choice(["full", "autocast"]),
+    default="autocast",
+)
@click.pass_context
 def imagine_cmd(
    ctx,
@ -174,6 +180,7 @@ def imagine_cmd(
    mask_mode,
    mask_expansion,
    caption,
+    precision,
 ):
    """Have the AI generate images. alias:imagine"""
    if ctx.invoked_subcommand is not None:
@ -190,7 +197,7 @@ def imagine_cmd(
        init_image = LazyLoadingImage(url=init_image)

    prompts = []
-    load_model(tile_mode=tile)
+    load_model()
    for _ in range(repeats):
        for prompt_text in prompt_texts:
            prompt = ImaginePrompt(
@ -209,6 +216,7 @@ def imagine_cmd(
                mask_mode=mask_mode,
                upscale=upscale,
                fix_faces=fix_faces,
+                tile_mode=tile,
            )
            prompts.append(prompt)

@ -217,9 +225,9 @@ def imagine_cmd(
        outdir=outdir,
        ddim_eta=ddim_eta,
        record_step_images="images" in show_work,
-        tile_mode=tile,
        output_file_extension="png",
        print_caption=caption,
+        precision=precision,
    )


--- a/imaginairy/modules/diffusion/ddpm.py
+++ b/imaginairy/modules/diffusion/ddpm.py
@ -273,6 +273,18 @@ class LatentDiffusion(DDPM):
            self.init_from_ckpt(ckpt_path, ignore_keys)
            self.restarted_from_ckpt = True

+        # store initial padding mode so we can switch to 'circular'
+        # when we want tiled images
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                m._initial_padding_mode = m.padding_mode
+
+    def tile_mode(self, enabled):
+        """For creating seamless tiles"""
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                m.padding_mode = "circular" if enabled else m._initial_padding_mode
+
    def make_cond_schedule(
        self,
    ):
--- a/imaginairy/modules/distributions.py
+++ b/imaginairy/modules/distributions.py
@ -24,23 +24,24 @@ class DiagonalGaussianDistribution:
    def kl(self, other=None):
        if self.deterministic:
            return torch.Tensor([0.0])
-        else:
-            if other is None:
-                return 0.5 * torch.sum(
-                    torch.pow(self.mean, 2) + self.var - 1.0 - self.logvar,
-                    dim=[1, 2, 3],
-                )
-            else:
-                return 0.5 * torch.sum(
-                    torch.pow(self.mean - other.mean, 2) / other.var
-                    + self.var / other.var
-                    - 1.0
-                    - self.logvar
-                    + other.logvar,
-                    dim=[1, 2, 3],
-                )
-
-    def nll(self, sample, dims=[1, 2, 3]):
+
+        if other is None:
+            return 0.5 * torch.sum(
+                torch.pow(self.mean, 2) + self.var - 1.0 - self.logvar,
+                dim=[1, 2, 3],
+            )
+
+        return 0.5 * torch.sum(
+            torch.pow(self.mean - other.mean, 2) / other.var
+            + self.var / other.var
+            - 1.0
+            - self.logvar
+            + other.logvar,
+            dim=[1, 2, 3],
+        )
+
+    def nll(self, sample, dims=None):
+        dims = dims if dims is None else [1, 2, 3]
        if self.deterministic:
            return torch.Tensor([0.0])
        logtwopi = np.log(2.0 * np.pi)
--- a/imaginairy/schema.py
+++ b/imaginairy/schema.py
@ -103,6 +103,7 @@ class ImaginePrompt:
        fix_faces=False,
        sampler_type="PLMS",
        conditioning=None,
+        tile_mode=False,
    ):
        prompt = prompt if prompt is not None else "a scenic landscape"
        if isinstance(prompt, str):
@ -131,6 +132,7 @@ class ImaginePrompt:
        self.mask_image = mask_image
        self.mask_mode = mask_mode
        self.mask_expansion = mask_expansion
+        self.tile_mode = tile_mode

    @property
    def prompt_text(self):
--- a/imaginairy/utils.py
+++ b/imaginairy/utils.py
@ -2,7 +2,7 @@ import importlib
 import logging
 import os.path
 import platform
-from contextlib import contextmanager
+from contextlib import contextmanager, nullcontext
 from functools import lru_cache
 from typing import List, Optional

@ -10,7 +10,7 @@ import numpy as np
 import requests
 import torch
 from PIL import Image, ImageFilter
-from torch import Tensor
+from torch import Tensor, autocast
 from torch.nn import functional
 from torch.overrides import handle_torch_function, has_torch_function_variadic
 from transformers import cached_path
@ -61,6 +61,18 @@ def get_obj_from_str(string, reload=False):
    return getattr(importlib.import_module(module, package=None), cls)


+@contextmanager
+def platform_appropriate_autocast(precision="autocast"):
+    """
+    allow calculations to run in mixed precision, which can be faster
+    """
+    precision_scope = nullcontext
+    if precision == "autocast" and get_device() in ("cuda", "cpu"):
+        precision_scope = autocast
+    with precision_scope(get_device()):
+        yield
+
+
 def _fixed_layer_norm(
    input: Tensor,  # noqa
    normalized_shape: List[int],
@ -104,6 +116,43 @@ def fix_torch_nn_layer_norm():
        functional.layer_norm = orig_function


+@contextmanager
+def fix_torch_group_norm():
+    """
+    Patch group_norm to cast the weights to the same type as the inputs
+
+    From what I can understand all the other repos just switch to full precision instead
+    of addressing this.  I think this would make things slower but I'm not sure.
+
+    https://github.com/pytorch/pytorch/pull/81852
+
+    """
+
+    orig_group_norm = functional.group_norm
+
+    def _group_norm_wrapper(
+        input: Tensor,  # noqa
+        num_groups: int,
+        weight: Optional[Tensor] = None,
+        bias: Optional[Tensor] = None,
+        eps: float = 1e-5,
+    ) -> Tensor:
+        if weight is not None and weight.dtype != input.dtype:
+            weight = weight.to(input.dtype)
+        if bias is not None and bias.dtype != input.dtype:
+            bias = bias.to(input.dtype)
+
+        return orig_group_norm(
+            input=input, num_groups=num_groups, weight=weight, bias=bias, eps=eps
+        )
+
+    functional.group_norm = _group_norm_wrapper
+    try:
+        yield
+    finally:
+        functional.group_norm = orig_group_norm
+
+
 def expand_mask(mask_image, size):
    if size < 0:
        threshold = 0.95
--- a/requirements-dev.in
+++ b/requirements-dev.in
@ -6,3 +6,4 @@ pydocstyle
 pylama
 pylint
 pytest
+pytest-randomly
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@ -10,7 +10,7 @@ absl-py==1.2.0
    #   tensorboard
 addict==2.4.0
    # via basicsr
-aiohttp==3.8.1
+aiohttp==3.8.3
    # via fsspec
 aiosignal==1.2.0
    # via aiohttp
@ -68,7 +68,7 @@ filelock==3.8.0
    #   transformers
 filterpy==1.4.5
    # via facexlib
-fonttools==4.37.2
+fonttools==4.37.3
    # via matplotlib
 frozenlist==1.3.1
    # via
@ -86,7 +86,7 @@ gfpgan==1.3.8
    # via
    #   imaginAIry (setup.py)
    #   realesrgan
-google-auth==2.11.0
+google-auth==2.11.1
    # via
    #   google-auth-oauthlib
    #   tb-nightly
@ -95,7 +95,7 @@ google-auth-oauthlib==0.4.6
    # via
    #   tb-nightly
    #   tensorboard
-grpcio==1.49.0
+grpcio==1.48.1
    # via
    #   tb-nightly
    #   tensorboard
@ -212,6 +212,7 @@ pillow==9.2.0
    #   diffusers
    #   facexlib
    #   imageio
+    #   imaginAIry (setup.py)
    #   matplotlib
    #   realesrgan
    #   scikit-image
@ -249,13 +250,17 @@ pyflakes==2.5.0
    # via pylama
 pylama==8.4.1
    # via -r requirements-dev.in
-pylint==2.15.2
+pylint==2.15.3
    # via -r requirements-dev.in
 pyparsing==3.0.9
    # via
    #   matplotlib
    #   packaging
 pytest==7.1.3
+    # via
+    #   -r requirements-dev.in
+    #   pytest-randomly
+pytest-randomly==3.12.0
    # via -r requirements-dev.in
 python-dateutil==2.8.2
    # via matplotlib
@ -273,7 +278,7 @@ pyyaml==6.0
    #   pycln
    #   pytorch-lightning
    #   transformers
-realesrgan==0.2.8
+realesrgan==0.3.0
    # via imaginAIry (setup.py)
 regex==2022.9.13
    # via
@ -311,7 +316,7 @@ six==1.16.0
    #   python-dateutil
 snowballstemmer==2.2.0
    # via pydocstyle
-tb-nightly==2.11.0a20220918
+tb-nightly==2.11.0a20220921
    # via
    #   basicsr
    #   gfpgan
--- a/setup.py
+++ b/setup.py
@ -7,7 +7,7 @@ setup(
    name="imaginAIry",
    author="Bryce Drennan",
    # author_email="b r y p y d o t io",
-    version="1.5.4",
+    version="1.6.0",
    description="AI imagined images. Pythonic generation of stable diffusion images.",
    long_description=readme,
    long_description_content_type="text/markdown",
--- a/tests/Dockerfile
+++ b/tests/Dockerfile
@ -0,0 +1,36 @@
+FROM python:3.10.6-slim  as base
+
+RUN apt-get update && apt-get install -y libgl1 libglib2.0-0 make
+
+ENV PIP_DISABLE_PIP_VERSION_CHECK=1 \
+    PIP_ROOT_USER_ACTION=ignore
+
+
+FROM base as build_wheel
+
+RUN pip install wheel
+
+WORKDIR /app
+
+COPY imaginairy ./imaginairy
+COPY setup.py README.md ./
+
+RUN python setup.py bdist_wheel
+
+
+
+FROM base as install_wheel
+
+WORKDIR /app
+
+COPY requirements-dev.in ./
+
+RUN pip install -r requirements-dev.in
+
+COPY --from=build_wheel /app/dist/* ./
+
+RUN pip install *.whl
+RUN imagine --help
+COPY Makefile ./
+COPY tests ./tests
+
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -4,7 +4,11 @@ import pytest

 from imaginairy import api
 from imaginairy.suppress_logs import suppress_annoying_logs_and_warnings
-from imaginairy.utils import fix_torch_nn_layer_norm
+from imaginairy.utils import (
+    fix_torch_group_norm,
+    fix_torch_nn_layer_norm,
+    platform_appropriate_autocast,
+)

 if "pytest" in str(sys.argv):
    suppress_annoying_logs_and_warnings()
@ -13,5 +17,6 @@ if "pytest" in str(sys.argv):
@pytest.fixture(scope="session", autouse=True)
 def pre_setup():
    api.IMAGINAIRY_SAFETY_MODE = "disabled"
-    with fix_torch_nn_layer_norm():
+    suppress_annoying_logs_and_warnings()
+    with fix_torch_nn_layer_norm(), fix_torch_group_norm(), platform_appropriate_autocast():
        yield
--- a/tests/test_clip_embedder.py
+++ b/tests/test_clip_embedder.py
@ -17,4 +17,4 @@ def test_text_conditioning():
    if "mps" in get_device():
        assert hashed == "263e5ee7d2be087d816e094b80ffc546"
    elif "cuda" in get_device():
-        assert hashed == "3d7867d5b2ebf15102a9ca9476d63ebc"
+        assert hashed == "41818051d7c469fc57d0a940c9d24d82"
--- a/tests/test_cmds.py
+++ b/tests/test_cmds.py
@ -1,9 +1,12 @@
+import pytest
 from click.testing import CliRunner

 from imaginairy.cmds import imagine_cmd
+from imaginairy.utils import get_device
 from tests import TESTS_FOLDER


+@pytest.mark.skipif(get_device() == "cpu", reason="Too slow to run on CPU")
 def test_imagine_cmd():
    runner = CliRunner()
    result = runner.invoke(
--- a/tests/test_enhancers.py
+++ b/tests/test_enhancers.py
@ -20,7 +20,7 @@ def test_fix_faces():
    if "mps" in get_device():
        assert img_hash(img) == "a75991307eda675a26eeb7073f828e93"
    else:
-        assert img_hash(img) == "5aa847a1464de75b158658a35800b6bf"
+        assert img_hash(img) == "e56c1205bbc8f251be05773f2ba7fa24"


 def img_hash(img):
--- a/tests/test_imagine.py
+++ b/tests/test_imagine.py
@ -8,7 +8,7 @@ from imaginairy.utils import get_device
 from . import TESTS_FOLDER

 device_sampler_type_test_cases = {
-    "mps:0": {
+    "mps:0": [
        ("plms", "b4b434ed45919f3505ac2be162791c71"),
        ("ddim", "b369032a025915c0a7ccced165a609b3"),
        ("k_lms", "b87325c189799d646ccd07b331564eb6"),
@ -17,8 +17,8 @@ device_sampler_type_test_cases = {
        ("k_euler", "d126da5ca8b08099cde8b5037464e788"),
        ("k_euler_a", "cac5ca2e26c31a544b76a9442eb2ea37"),
        ("k_heun", "0382ef71d9967fefd15676410289ebab"),
-    },
-    "cuda": {
+    ],
+    "cuda": [
        ("plms", "62e78287e7848e48d45a1b207fb84102"),
        ("ddim", "164c2a008b100e5fa07d3db2018605bd"),
        ("k_lms", "450fea507ccfb44b677d30fae9f40a52"),
@ -27,7 +27,8 @@ device_sampler_type_test_cases = {
        ("k_euler", "06df9c19d472bfa6530db98be4ea10e8"),
        ("k_euler_a", "79552628ff77914c8b6870703fe116b5"),
        ("k_heun", "8ced3578ae25d34da9f4e4b1a20bf416"),
-    },
+    ],
+    "cpu": [],
 }
 sampler_type_test_cases = device_sampler_type_test_cases[get_device()]

@ -54,12 +55,14 @@ device_sampler_type_test_cases_img_2_img = {
        ("plms", "efba8b836b51d262dbf72284844869f8"),
        ("ddim", "a62878000ad3b581a11dd3fb329dc7d2"),
    },
+    "cpu": [],
 }
 sampler_type_test_cases_img_2_img = device_sampler_type_test_cases_img_2_img[
    get_device()
 ]


+@pytest.mark.skipif(get_device() == "cpu", reason="Too slow to run on CPU")
@pytest.mark.parametrize("sampler_type,expected_md5", sampler_type_test_cases_img_2_img)
 def test_img_to_img(sampler_type, expected_md5):
    prompt = ImaginePrompt(
@ -79,6 +82,7 @@ def test_img_to_img(sampler_type, expected_md5):
    assert result.md5() == expected_md5


+@pytest.mark.skipif(get_device() == "cpu", reason="Too slow to run on CPU")
 def test_img_to_img_from_url():
    prompt = ImaginePrompt(
        "dogs lying on a hot pink couch",
@ -96,6 +100,7 @@ def test_img_to_img_from_url():
    imagine_image_files(prompt, outdir=out_folder)


+@pytest.mark.skipif(get_device() == "cpu", reason="Too slow to run on CPU")
 def test_img_to_file():
    prompt = ImaginePrompt(
        "an old growth forest, diffuse light poking through the canopy. high-resolution, nature photography, nat geo photo",
@ -110,6 +115,7 @@ def test_img_to_file():
    imagine_image_files(prompt, outdir=out_folder)


+@pytest.mark.skipif(get_device() == "cpu", reason="Too slow to run on CPU")
 def test_inpainting():
    prompt = ImaginePrompt(
        "a basketball on a bench",
@ -126,6 +132,7 @@ def test_inpainting():
    imagine_image_files(prompt, outdir=out_folder)


+@pytest.mark.skipif(get_device() == "cpu", reason="Too slow to run on CPU")
 def test_cliptext_inpainting():
    prompts = [
        ImaginePrompt(
--- a/tests/test_safety.py
+++ b/tests/test_safety.py
@ -18,6 +18,7 @@ def test_is_nsfw():

 def _pil_to_latent(img):
    model = load_model()
+    model.tile_mode(False)
    img = pillow_img_to_torch_image(img)
    img = img.to(get_device())
    latent = model.get_first_stage_encoding(model.encode_first_stage(img))