Merge pull request #18 from brycedrennan/bugfixes

Bugfixes + per-prompt tile mode
pull/21/head 1.6.0
Bryce Drennan 2 years ago committed by GitHub
commit 08fca72033
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -0,0 +1,21 @@
__pycache__
*.pyc
*.pyo
*.pyd
.Python
env
pip-log.txt
pip-delete-this-directory.txt
.tox
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.log
.git
.mypy_cache
.pytest_cache
.hypothesis
.DS_Store

@ -51,6 +51,12 @@ deploy: ## Deploy the package to pypi.org
rm -rf dist
@echo "Deploy successful! ✨ 🍰 ✨"
build-dev-image:
docker build -f tests/Dockerfile -t imaginairy-dev .
run-dev: build-dev-image
docker run -it -v $$HOME/.cache/huggingface:/root/.cache/huggingface -v $$HOME/.cache/torch:/root/.cache/torch -v `pwd`/outputs:/outputs imaginairy-dev /bin/bash
requirements: ## Freeze the requirements.txt file
pip-compile setup.py requirements-dev.in --output-file=requirements-dev.txt --upgrade

@ -117,7 +117,7 @@ from imaginairy import imagine, imagine_image_files, ImaginePrompt, WeightedProm
url = "https://upload.wikimedia.org/wikipedia/commons/thumb/6/6c/Thomas_Cole_-_Architect%E2%80%99s_Dream_-_Google_Art_Project.jpg/540px-Thomas_Cole_-_Architect%E2%80%99s_Dream_-_Google_Art_Project.jpg"
prompts = [
ImaginePrompt("a scenic landscape", seed=1),
ImaginePrompt("a scenic landscape", seed=1, upscale=True),
ImaginePrompt("a bowl of fruit"),
ImaginePrompt([
WeightedPrompt("cat", weight=1),
@ -133,7 +133,8 @@ prompts = [
mask_prompt="fruit|stems",
mask_mode="replace",
mask_expansion=3
)
),
ImaginePrompt("strawberries", tile_mode=True),
]
for result in imagine(prompts):
# do something
@ -162,8 +163,16 @@ docker build . -t imaginairy
docker run -it --gpus all -v $HOME/.cache/huggingface:/root/.cache/huggingface -v $HOME/.cache/torch:/root/.cache/torch -v `pwd`/outputs:/outputs imaginairy /bin/bash
```
## Running on Google Colab
[Example Colab](https://colab.research.google.com/drive/1rOvQNs0Cmn_yU1bKWjCOHzGVDgZkaTtO?usp=sharing)
## ChangeLog
**1.6.0**
- fix: *maybe* address #13 with `expected scalar type BFloat16 but found Float`
- at minimum one can specify `--precision full` now and that will probably fix the issue
- feature: tile mode can now be specified per-prompt
**1.5.3**
- fix: missing config file for describe feature

@ -1,7 +1,6 @@
import logging
import os
import re
from contextlib import nullcontext
from functools import lru_cache
import numpy as np
@ -11,7 +10,6 @@ from einops import rearrange
from omegaconf import OmegaConf
from PIL import Image, ImageDraw, ImageFilter, ImageOps
from pytorch_lightning import seed_everything
from torch import autocast
from transformers import cached_path
from imaginairy.enhancers.clip_masking import get_img_mask
@ -29,11 +27,13 @@ from imaginairy.samplers.base import get_sampler
from imaginairy.schema import ImaginePrompt, ImagineResult
from imaginairy.utils import (
expand_mask,
fix_torch_group_norm,
fix_torch_nn_layer_norm,
get_device,
instantiate_from_config,
pillow_fit_image_within,
pillow_img_to_torch_image,
platform_appropriate_autocast,
)
LIB_PATH = os.path.dirname(__file__)
@ -73,31 +73,11 @@ def load_model_from_config(config):
return model
def patch_conv(**patch):
"""
Patch to enable tiling mode
https://github.com/replicate/cog-stable-diffusion/compare/main...TomMoore515:material_stable_diffusion:main
"""
cls = torch.nn.Conv2d
init = cls.__init__
def __init__(self, *args, **kwargs):
return init(self, *args, **kwargs, **patch)
cls.__init__ = __init__
@lru_cache()
def load_model(tile_mode=False):
if tile_mode:
# generated images are tileable
patch_conv(padding_mode="circular")
def load_model():
config = "configs/stable-diffusion-v1.yaml"
config = OmegaConf.load(f"{LIB_PATH}/{config}")
model = load_model_from_config(config)
model = model.to(get_device())
return model
@ -111,7 +91,6 @@ def imagine_image_files(
ddim_eta=0.0,
record_step_images=False,
output_file_extension="jpg",
tile_mode=False,
print_caption=False,
):
big_path = os.path.join(outdir, "upscaled")
@ -139,7 +118,6 @@ def imagine_image_files(
precision=precision,
ddim_eta=ddim_eta,
img_callback=_record_step if record_step_images else None,
tile_mode=tile_mode,
add_caption=print_caption,
):
prompt = result.prompt
@ -164,11 +142,10 @@ def imagine(
precision="autocast",
ddim_eta=0.0,
img_callback=None,
tile_mode=False,
half_mode=None,
add_caption=False,
):
model = load_model(tile_mode=tile_mode)
model = load_model()
# only run half-mode on cuda. run it by default
half_mode = half_mode is None and get_device() == "cuda"
@ -179,13 +156,12 @@ def imagine(
prompts = [ImaginePrompt(prompts)] if isinstance(prompts, str) else prompts
prompts = [prompts] if isinstance(prompts, ImaginePrompt) else prompts
_img_callback = None
if get_device() == "cpu":
logger.info("Running in CPU mode. it's gonna be slooooooow.")
precision_scope = (
autocast
if precision == "autocast" and get_device() in ("cuda", "cpu")
else nullcontext
)
with torch.no_grad(), precision_scope(get_device()), fix_torch_nn_layer_norm():
with torch.no_grad(), platform_appropriate_autocast(
precision
), fix_torch_nn_layer_norm(), fix_torch_group_norm():
for prompt in prompts:
with ImageLoggingContext(
prompt=prompt,
@ -194,6 +170,7 @@ def imagine(
):
logger.info(f"Generating {prompt.prompt_description()}")
seed_everything(prompt.seed)
model.tile_mode(prompt.tile_mode)
uc = None
if prompt.prompt_strength != 1.0:

@ -121,7 +121,7 @@ def configure_logging(level="INFO"):
@click.option(
"--tile",
is_flag=True,
help="Any images rendered will be tileable. Unfortunately cannot be controlled at the per-image level yet",
help="Any images rendered will be tileable.",
)
@click.option(
"--mask-image",
@ -149,6 +149,12 @@ def configure_logging(level="INFO"):
is_flag=True,
help="Generate a text description of the generated image",
)
@click.option(
"--precision",
help="evaluate at this precision",
type=click.Choice(["full", "autocast"]),
default="autocast",
)
@click.pass_context
def imagine_cmd(
ctx,
@ -174,6 +180,7 @@ def imagine_cmd(
mask_mode,
mask_expansion,
caption,
precision,
):
"""Have the AI generate images. alias:imagine"""
if ctx.invoked_subcommand is not None:
@ -190,7 +197,7 @@ def imagine_cmd(
init_image = LazyLoadingImage(url=init_image)
prompts = []
load_model(tile_mode=tile)
load_model()
for _ in range(repeats):
for prompt_text in prompt_texts:
prompt = ImaginePrompt(
@ -209,6 +216,7 @@ def imagine_cmd(
mask_mode=mask_mode,
upscale=upscale,
fix_faces=fix_faces,
tile_mode=tile,
)
prompts.append(prompt)
@ -217,9 +225,9 @@ def imagine_cmd(
outdir=outdir,
ddim_eta=ddim_eta,
record_step_images="images" in show_work,
tile_mode=tile,
output_file_extension="png",
print_caption=caption,
precision=precision,
)

@ -273,6 +273,18 @@ class LatentDiffusion(DDPM):
self.init_from_ckpt(ckpt_path, ignore_keys)
self.restarted_from_ckpt = True
# store initial padding mode so we can switch to 'circular'
# when we want tiled images
for m in self.modules():
if isinstance(m, nn.Conv2d):
m._initial_padding_mode = m.padding_mode
def tile_mode(self, enabled):
"""For creating seamless tiles"""
for m in self.modules():
if isinstance(m, nn.Conv2d):
m.padding_mode = "circular" if enabled else m._initial_padding_mode
def make_cond_schedule(
self,
):

@ -24,23 +24,24 @@ class DiagonalGaussianDistribution:
def kl(self, other=None):
if self.deterministic:
return torch.Tensor([0.0])
else:
if other is None:
return 0.5 * torch.sum(
torch.pow(self.mean, 2) + self.var - 1.0 - self.logvar,
dim=[1, 2, 3],
)
else:
return 0.5 * torch.sum(
torch.pow(self.mean - other.mean, 2) / other.var
+ self.var / other.var
- 1.0
- self.logvar
+ other.logvar,
dim=[1, 2, 3],
)
def nll(self, sample, dims=[1, 2, 3]):
if other is None:
return 0.5 * torch.sum(
torch.pow(self.mean, 2) + self.var - 1.0 - self.logvar,
dim=[1, 2, 3],
)
return 0.5 * torch.sum(
torch.pow(self.mean - other.mean, 2) / other.var
+ self.var / other.var
- 1.0
- self.logvar
+ other.logvar,
dim=[1, 2, 3],
)
def nll(self, sample, dims=None):
dims = dims if dims is None else [1, 2, 3]
if self.deterministic:
return torch.Tensor([0.0])
logtwopi = np.log(2.0 * np.pi)

@ -103,6 +103,7 @@ class ImaginePrompt:
fix_faces=False,
sampler_type="PLMS",
conditioning=None,
tile_mode=False,
):
prompt = prompt if prompt is not None else "a scenic landscape"
if isinstance(prompt, str):
@ -131,6 +132,7 @@ class ImaginePrompt:
self.mask_image = mask_image
self.mask_mode = mask_mode
self.mask_expansion = mask_expansion
self.tile_mode = tile_mode
@property
def prompt_text(self):

@ -2,7 +2,7 @@ import importlib
import logging
import os.path
import platform
from contextlib import contextmanager
from contextlib import contextmanager, nullcontext
from functools import lru_cache
from typing import List, Optional
@ -10,7 +10,7 @@ import numpy as np
import requests
import torch
from PIL import Image, ImageFilter
from torch import Tensor
from torch import Tensor, autocast
from torch.nn import functional
from torch.overrides import handle_torch_function, has_torch_function_variadic
from transformers import cached_path
@ -61,6 +61,18 @@ def get_obj_from_str(string, reload=False):
return getattr(importlib.import_module(module, package=None), cls)
@contextmanager
def platform_appropriate_autocast(precision="autocast"):
"""
allow calculations to run in mixed precision, which can be faster
"""
precision_scope = nullcontext
if precision == "autocast" and get_device() in ("cuda", "cpu"):
precision_scope = autocast
with precision_scope(get_device()):
yield
def _fixed_layer_norm(
input: Tensor, # noqa
normalized_shape: List[int],
@ -104,6 +116,43 @@ def fix_torch_nn_layer_norm():
functional.layer_norm = orig_function
@contextmanager
def fix_torch_group_norm():
"""
Patch group_norm to cast the weights to the same type as the inputs
From what I can understand all the other repos just switch to full precision instead
of addressing this. I think this would make things slower but I'm not sure.
https://github.com/pytorch/pytorch/pull/81852
"""
orig_group_norm = functional.group_norm
def _group_norm_wrapper(
input: Tensor, # noqa
num_groups: int,
weight: Optional[Tensor] = None,
bias: Optional[Tensor] = None,
eps: float = 1e-5,
) -> Tensor:
if weight is not None and weight.dtype != input.dtype:
weight = weight.to(input.dtype)
if bias is not None and bias.dtype != input.dtype:
bias = bias.to(input.dtype)
return orig_group_norm(
input=input, num_groups=num_groups, weight=weight, bias=bias, eps=eps
)
functional.group_norm = _group_norm_wrapper
try:
yield
finally:
functional.group_norm = orig_group_norm
def expand_mask(mask_image, size):
if size < 0:
threshold = 0.95

@ -6,3 +6,4 @@ pydocstyle
pylama
pylint
pytest
pytest-randomly

@ -10,7 +10,7 @@ absl-py==1.2.0
# tensorboard
addict==2.4.0
# via basicsr
aiohttp==3.8.1
aiohttp==3.8.3
# via fsspec
aiosignal==1.2.0
# via aiohttp
@ -68,7 +68,7 @@ filelock==3.8.0
# transformers
filterpy==1.4.5
# via facexlib
fonttools==4.37.2
fonttools==4.37.3
# via matplotlib
frozenlist==1.3.1
# via
@ -86,7 +86,7 @@ gfpgan==1.3.8
# via
# imaginAIry (setup.py)
# realesrgan
google-auth==2.11.0
google-auth==2.11.1
# via
# google-auth-oauthlib
# tb-nightly
@ -95,7 +95,7 @@ google-auth-oauthlib==0.4.6
# via
# tb-nightly
# tensorboard
grpcio==1.49.0
grpcio==1.48.1
# via
# tb-nightly
# tensorboard
@ -212,6 +212,7 @@ pillow==9.2.0
# diffusers
# facexlib
# imageio
# imaginAIry (setup.py)
# matplotlib
# realesrgan
# scikit-image
@ -249,13 +250,17 @@ pyflakes==2.5.0
# via pylama
pylama==8.4.1
# via -r requirements-dev.in
pylint==2.15.2
pylint==2.15.3
# via -r requirements-dev.in
pyparsing==3.0.9
# via
# matplotlib
# packaging
pytest==7.1.3
# via
# -r requirements-dev.in
# pytest-randomly
pytest-randomly==3.12.0
# via -r requirements-dev.in
python-dateutil==2.8.2
# via matplotlib
@ -273,7 +278,7 @@ pyyaml==6.0
# pycln
# pytorch-lightning
# transformers
realesrgan==0.2.8
realesrgan==0.3.0
# via imaginAIry (setup.py)
regex==2022.9.13
# via
@ -311,7 +316,7 @@ six==1.16.0
# python-dateutil
snowballstemmer==2.2.0
# via pydocstyle
tb-nightly==2.11.0a20220918
tb-nightly==2.11.0a20220921
# via
# basicsr
# gfpgan

@ -7,7 +7,7 @@ setup(
name="imaginAIry",
author="Bryce Drennan",
# author_email="b r y p y d o t io",
version="1.5.4",
version="1.6.0",
description="AI imagined images. Pythonic generation of stable diffusion images.",
long_description=readme,
long_description_content_type="text/markdown",

@ -0,0 +1,36 @@
FROM python:3.10.6-slim as base
RUN apt-get update && apt-get install -y libgl1 libglib2.0-0 make
ENV PIP_DISABLE_PIP_VERSION_CHECK=1 \
PIP_ROOT_USER_ACTION=ignore
FROM base as build_wheel
RUN pip install wheel
WORKDIR /app
COPY imaginairy ./imaginairy
COPY setup.py README.md ./
RUN python setup.py bdist_wheel
FROM base as install_wheel
WORKDIR /app
COPY requirements-dev.in ./
RUN pip install -r requirements-dev.in
COPY --from=build_wheel /app/dist/* ./
RUN pip install *.whl
RUN imagine --help
COPY Makefile ./
COPY tests ./tests

@ -4,7 +4,11 @@ import pytest
from imaginairy import api
from imaginairy.suppress_logs import suppress_annoying_logs_and_warnings
from imaginairy.utils import fix_torch_nn_layer_norm
from imaginairy.utils import (
fix_torch_group_norm,
fix_torch_nn_layer_norm,
platform_appropriate_autocast,
)
if "pytest" in str(sys.argv):
suppress_annoying_logs_and_warnings()
@ -13,5 +17,6 @@ if "pytest" in str(sys.argv):
@pytest.fixture(scope="session", autouse=True)
def pre_setup():
api.IMAGINAIRY_SAFETY_MODE = "disabled"
with fix_torch_nn_layer_norm():
suppress_annoying_logs_and_warnings()
with fix_torch_nn_layer_norm(), fix_torch_group_norm(), platform_appropriate_autocast():
yield

@ -17,4 +17,4 @@ def test_text_conditioning():
if "mps" in get_device():
assert hashed == "263e5ee7d2be087d816e094b80ffc546"
elif "cuda" in get_device():
assert hashed == "3d7867d5b2ebf15102a9ca9476d63ebc"
assert hashed == "41818051d7c469fc57d0a940c9d24d82"

@ -1,9 +1,12 @@
import pytest
from click.testing import CliRunner
from imaginairy.cmds import imagine_cmd
from imaginairy.utils import get_device
from tests import TESTS_FOLDER
@pytest.mark.skipif(get_device() == "cpu", reason="Too slow to run on CPU")
def test_imagine_cmd():
runner = CliRunner()
result = runner.invoke(

@ -20,7 +20,7 @@ def test_fix_faces():
if "mps" in get_device():
assert img_hash(img) == "a75991307eda675a26eeb7073f828e93"
else:
assert img_hash(img) == "5aa847a1464de75b158658a35800b6bf"
assert img_hash(img) == "e56c1205bbc8f251be05773f2ba7fa24"
def img_hash(img):

@ -8,7 +8,7 @@ from imaginairy.utils import get_device
from . import TESTS_FOLDER
device_sampler_type_test_cases = {
"mps:0": {
"mps:0": [
("plms", "b4b434ed45919f3505ac2be162791c71"),
("ddim", "b369032a025915c0a7ccced165a609b3"),
("k_lms", "b87325c189799d646ccd07b331564eb6"),
@ -17,8 +17,8 @@ device_sampler_type_test_cases = {
("k_euler", "d126da5ca8b08099cde8b5037464e788"),
("k_euler_a", "cac5ca2e26c31a544b76a9442eb2ea37"),
("k_heun", "0382ef71d9967fefd15676410289ebab"),
},
"cuda": {
],
"cuda": [
("plms", "62e78287e7848e48d45a1b207fb84102"),
("ddim", "164c2a008b100e5fa07d3db2018605bd"),
("k_lms", "450fea507ccfb44b677d30fae9f40a52"),
@ -27,7 +27,8 @@ device_sampler_type_test_cases = {
("k_euler", "06df9c19d472bfa6530db98be4ea10e8"),
("k_euler_a", "79552628ff77914c8b6870703fe116b5"),
("k_heun", "8ced3578ae25d34da9f4e4b1a20bf416"),
},
],
"cpu": [],
}
sampler_type_test_cases = device_sampler_type_test_cases[get_device()]
@ -54,12 +55,14 @@ device_sampler_type_test_cases_img_2_img = {
("plms", "efba8b836b51d262dbf72284844869f8"),
("ddim", "a62878000ad3b581a11dd3fb329dc7d2"),
},
"cpu": [],
}
sampler_type_test_cases_img_2_img = device_sampler_type_test_cases_img_2_img[
get_device()
]
@pytest.mark.skipif(get_device() == "cpu", reason="Too slow to run on CPU")
@pytest.mark.parametrize("sampler_type,expected_md5", sampler_type_test_cases_img_2_img)
def test_img_to_img(sampler_type, expected_md5):
prompt = ImaginePrompt(
@ -79,6 +82,7 @@ def test_img_to_img(sampler_type, expected_md5):
assert result.md5() == expected_md5
@pytest.mark.skipif(get_device() == "cpu", reason="Too slow to run on CPU")
def test_img_to_img_from_url():
prompt = ImaginePrompt(
"dogs lying on a hot pink couch",
@ -96,6 +100,7 @@ def test_img_to_img_from_url():
imagine_image_files(prompt, outdir=out_folder)
@pytest.mark.skipif(get_device() == "cpu", reason="Too slow to run on CPU")
def test_img_to_file():
prompt = ImaginePrompt(
"an old growth forest, diffuse light poking through the canopy. high-resolution, nature photography, nat geo photo",
@ -110,6 +115,7 @@ def test_img_to_file():
imagine_image_files(prompt, outdir=out_folder)
@pytest.mark.skipif(get_device() == "cpu", reason="Too slow to run on CPU")
def test_inpainting():
prompt = ImaginePrompt(
"a basketball on a bench",
@ -126,6 +132,7 @@ def test_inpainting():
imagine_image_files(prompt, outdir=out_folder)
@pytest.mark.skipif(get_device() == "cpu", reason="Too slow to run on CPU")
def test_cliptext_inpainting():
prompts = [
ImaginePrompt(

@ -18,6 +18,7 @@ def test_is_nsfw():
def _pil_to_latent(img):
model = load_model()
model.tile_mode(False)
img = pillow_img_to_torch_image(img)
img = img.to(get_device())
latent = model.get_first_stage_encoding(model.encode_first_stage(img))

Loading…
Cancel
Save