From 248679d8de9e242b5963542893de792640ca885d Mon Sep 17 00:00:00 2001 From: Bryce Date: Mon, 23 Jan 2023 22:25:56 -0800 Subject: [PATCH] feature: image sizes can now be multiples of 8 instead of 64 from https://github.com/CompVis/stable-diffusion/issues/60#issuecomment-1240294667 --- README.md | 12 ++++++++---- imaginairy/cmds.py | 12 ++++++------ imaginairy/img_utils.py | 8 ++++---- imaginairy/modules/diffusion/openaimodel.py | 4 ++++ imaginairy/outpaint.py | 4 ++-- 5 files changed, 24 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index ad928aa..27c2db8 100644 --- a/README.md +++ b/README.md @@ -282,6 +282,10 @@ docker run -it --gpus all -v $HOME/.cache/huggingface:/root/.cache/huggingface - [Example Colab](https://colab.research.google.com/drive/1rOvQNs0Cmn_yU1bKWjCOHzGVDgZkaTtO?usp=sharing) ## ChangeLog + +**8.1.0** +- feature: image sizes can now be multiples of 8 instead of 64. Inputs will be silently rounded down. + **8.0.5** - fix: bypass huggingface cache retrieval bug @@ -483,14 +487,14 @@ would be uncorrelated to the rest of the surrounding image. It created terrible ## Todo - Performance Optimizations - - ✅ https://github.com/huggingface/diffusers/blob/main/docs/source/optimization/fp16.mdx - - ✅ https://github.com/CompVis/stable-diffusion/compare/main...Doggettx:stable-diffusion:autocast-improvements# - - ✅ https://www.reddit.com/r/StableDiffusion/comments/xalaws/test_update_for_less_memory_usage_and_higher/ + - ✅ fp16 + - ✅ [Doggettx Sliced attention](https://github.com/CompVis/stable-diffusion/compare/main...Doggettx:stable-diffusion:autocast-improvements#) + - ✅ xformers support https://www.photoroom.com/tech/stable-diffusion-100-percent-faster-with-memory-efficient-attention/ - https://github.com/neonsecret/stable-diffusion - https://github.com/CompVis/stable-diffusion/pull/177 - https://github.com/huggingface/diffusers/pull/532/files - https://github.com/HazyResearch/flash-attention - - ✅ xformers improvements https://www.photoroom.com/tech/stable-diffusion-100-percent-faster-with-memory-efficient-attention/ + - Development Environment - ✅ add tests - ✅ set up ci (test/lint/format) diff --git a/imaginairy/cmds.py b/imaginairy/cmds.py index c39ea83..beea45c 100644 --- a/imaginairy/cmds.py +++ b/imaginairy/cmds.py @@ -69,7 +69,7 @@ logger = logging.getLogger(__name__) default=None, show_default=True, type=int, - help="Image height. Should be multiple of 64.", + help="Image height. Should be multiple of 8.", ) @click.option( "-w", @@ -77,7 +77,7 @@ logger = logging.getLogger(__name__) default=None, show_default=True, type=int, - help="Image width. Should be multiple of 64.", + help="Image width. Should be multiple of 8.", ) @click.option( "--steps", @@ -174,7 +174,7 @@ logger = logging.getLogger(__name__) @click.option( "--outpaint", help=( - "Specify in what directions to expand the image. Values will be snapped such that output image size is multiples of 64. Examples\n" + "Specify in what directions to expand the image. Values will be snapped such that output image size is multiples of 8. Examples\n" " `--outpaint up10,down300,left50,right50`\n" " `--outpaint u10,d300,l50,r50`\n" " `--outpaint all200`\n" @@ -341,7 +341,7 @@ def imagine_cmd( default=None, show_default=True, type=int, - help="Image height. Should be multiple of 64.", + help="Image height. Should be multiple of 8.", ) @click.option( "-w", @@ -349,7 +349,7 @@ def imagine_cmd( default=None, show_default=True, type=int, - help="Image width. Should be multiple of 64.", + help="Image width. Should be multiple of 8.", ) @click.option( "--steps", @@ -446,7 +446,7 @@ def imagine_cmd( @click.option( "--outpaint", help=( - "Specify in what directions to expand the image. Values will be snapped such that output image size is multiples of 64. Examples\n" + "Specify in what directions to expand the image. Values will be snapped such that output image size is multiples of 8. Examples\n" " `--outpaint up10,down300,left50,right50`\n" " `--outpaint u10,d300,l50,r50`\n" " `--outpaint all200`\n" diff --git a/imaginairy/img_utils.py b/imaginairy/img_utils.py index 9cb9bcb..9f1ae5d 100644 --- a/imaginairy/img_utils.py +++ b/imaginairy/img_utils.py @@ -10,7 +10,7 @@ from imaginairy.utils import get_device def pillow_fit_image_within( - image: PIL.Image.Image, max_height=512, max_width=512, convert="RGB" + image: PIL.Image.Image, max_height=512, max_width=512, convert="RGB", snap_size=8 ): image = image.convert(convert) w, h = image.size @@ -23,9 +23,9 @@ def pillow_fit_image_within( if resize_ratio != 1: w, h = int(w * resize_ratio), int(h * resize_ratio) - # resize to integer multiple of 64 - w -= w % 64 - h -= h % 64 + # resize to integer multiple of snap_size + w -= w % snap_size + h -= h % snap_size if (w, h) != image.size: image = image.resize((w, h), resample=Image.Resampling.LANCZOS) diff --git a/imaginairy/modules/diffusion/openaimodel.py b/imaginairy/modules/diffusion/openaimodel.py index 5e9ec7a..0eafb26 100644 --- a/imaginairy/modules/diffusion/openaimodel.py +++ b/imaginairy/modules/diffusion/openaimodel.py @@ -836,6 +836,10 @@ class UNetModel(nn.Module): hs.append(h) h = self.middle_block(h, emb, context) for module in self.output_blocks: + # allows us to work with multiples of 8 instead of 64 for image sizes + # https://github.com/CompVis/stable-diffusion/issues/60#issuecomment-1240294667 + if h.shape[-2:] != hs[-1].shape[-2:]: + h = F.interpolate(h, hs[-1].shape[-2:], mode="nearest") h = th.cat([h, hs.pop()], dim=1) h = module(h, emb, context) h = h.type(x.dtype) diff --git a/imaginairy/outpaint.py b/imaginairy/outpaint.py index 5c1a236..30deab2 100644 --- a/imaginairy/outpaint.py +++ b/imaginairy/outpaint.py @@ -4,7 +4,7 @@ from PIL import Image, ImageDraw def prepare_image_for_outpaint( - img, mask=None, up=None, down=None, left=None, right=None, _all=0, snap_multiple=64 + img, mask=None, up=None, down=None, left=None, right=None, _all=0, snap_multiple=8 ): up = up if up is not None else _all down = down if down is not None else _all @@ -31,7 +31,7 @@ def prepare_image_for_outpaint( expanded_image.paste(img, (left, up)) # extend border pixels outward, this helps prevents lines at the boundary because masks getting reduced to - # 64x64 latent space can cause som inaccuracies + # 64x64 latent space can cause some inaccuracies if up > 0: expanded_image.paste(