From 248679d8de9e242b5963542893de792640ca885d Mon Sep 17 00:00:00 2001
From: Bryce <github20210803@accounts.brycedrennan.com>
Date: Mon, 23 Jan 2023 22:25:56 -0800
Subject: [PATCH] feature: image sizes can now be multiples of 8 instead of 64

from https://github.com/CompVis/stable-diffusion/issues/60#issuecomment-1240294667
---
 README.md                                   | 12 ++++++++----
 imaginairy/cmds.py                          | 12 ++++++------
 imaginairy/img_utils.py                     |  8 ++++----
 imaginairy/modules/diffusion/openaimodel.py |  4 ++++
 imaginairy/outpaint.py                      |  4 ++--
 5 files changed, 24 insertions(+), 16 deletions(-)

diff --git a/README.md b/README.md
index ad928aa..27c2db8 100644
--- a/README.md
+++ b/README.md
@@ -282,6 +282,10 @@ docker run -it --gpus all -v $HOME/.cache/huggingface:/root/.cache/huggingface -
 [Example Colab](https://colab.research.google.com/drive/1rOvQNs0Cmn_yU1bKWjCOHzGVDgZkaTtO?usp=sharing)
 
 ## ChangeLog
+
+**8.1.0**
+- feature: image sizes can now be multiples of 8 instead of 64. Inputs will be silently rounded down.
+
 **8.0.5**
 - fix: bypass huggingface cache retrieval bug
 
@@ -483,14 +487,14 @@ would be uncorrelated to the rest of the surrounding image.  It created terrible
 ## Todo
 
  - Performance Optimizations
-   - ✅ https://github.com/huggingface/diffusers/blob/main/docs/source/optimization/fp16.mdx
-   - ✅ https://github.com/CompVis/stable-diffusion/compare/main...Doggettx:stable-diffusion:autocast-improvements#
-   - ✅ https://www.reddit.com/r/StableDiffusion/comments/xalaws/test_update_for_less_memory_usage_and_higher/
+   - ✅ fp16
+   - ✅ [Doggettx Sliced attention](https://github.com/CompVis/stable-diffusion/compare/main...Doggettx:stable-diffusion:autocast-improvements#)
+   - ✅ xformers support https://www.photoroom.com/tech/stable-diffusion-100-percent-faster-with-memory-efficient-attention/
    - https://github.com/neonsecret/stable-diffusion  
    - https://github.com/CompVis/stable-diffusion/pull/177
    - https://github.com/huggingface/diffusers/pull/532/files
    - https://github.com/HazyResearch/flash-attention
-   - ✅ xformers improvements https://www.photoroom.com/tech/stable-diffusion-100-percent-faster-with-memory-efficient-attention/
+   
  - Development Environment
    - ✅ add tests
    - ✅ set up ci (test/lint/format)
diff --git a/imaginairy/cmds.py b/imaginairy/cmds.py
index c39ea83..beea45c 100644
--- a/imaginairy/cmds.py
+++ b/imaginairy/cmds.py
@@ -69,7 +69,7 @@ logger = logging.getLogger(__name__)
     default=None,
     show_default=True,
     type=int,
-    help="Image height. Should be multiple of 64.",
+    help="Image height. Should be multiple of 8.",
 )
 @click.option(
     "-w",
@@ -77,7 +77,7 @@ logger = logging.getLogger(__name__)
     default=None,
     show_default=True,
     type=int,
-    help="Image width. Should be multiple of 64.",
+    help="Image width. Should be multiple of 8.",
 )
 @click.option(
     "--steps",
@@ -174,7 +174,7 @@ logger = logging.getLogger(__name__)
 @click.option(
     "--outpaint",
     help=(
-        "Specify in what directions to expand the image. Values will be snapped such that output image size is multiples of 64. Examples\n"
+        "Specify in what directions to expand the image. Values will be snapped such that output image size is multiples of 8. Examples\n"
         "  `--outpaint up10,down300,left50,right50`\n"
         "  `--outpaint u10,d300,l50,r50`\n"
         "  `--outpaint all200`\n"
@@ -341,7 +341,7 @@ def imagine_cmd(
     default=None,
     show_default=True,
     type=int,
-    help="Image height. Should be multiple of 64.",
+    help="Image height. Should be multiple of 8.",
 )
 @click.option(
     "-w",
@@ -349,7 +349,7 @@ def imagine_cmd(
     default=None,
     show_default=True,
     type=int,
-    help="Image width. Should be multiple of 64.",
+    help="Image width. Should be multiple of 8.",
 )
 @click.option(
     "--steps",
@@ -446,7 +446,7 @@ def imagine_cmd(
 @click.option(
     "--outpaint",
     help=(
-        "Specify in what directions to expand the image. Values will be snapped such that output image size is multiples of 64. Examples\n"
+        "Specify in what directions to expand the image. Values will be snapped such that output image size is multiples of 8. Examples\n"
         "  `--outpaint up10,down300,left50,right50`\n"
         "  `--outpaint u10,d300,l50,r50`\n"
         "  `--outpaint all200`\n"
diff --git a/imaginairy/img_utils.py b/imaginairy/img_utils.py
index 9cb9bcb..9f1ae5d 100644
--- a/imaginairy/img_utils.py
+++ b/imaginairy/img_utils.py
@@ -10,7 +10,7 @@ from imaginairy.utils import get_device
 
 
 def pillow_fit_image_within(
-    image: PIL.Image.Image, max_height=512, max_width=512, convert="RGB"
+    image: PIL.Image.Image, max_height=512, max_width=512, convert="RGB", snap_size=8
 ):
     image = image.convert(convert)
     w, h = image.size
@@ -23,9 +23,9 @@ def pillow_fit_image_within(
 
     if resize_ratio != 1:
         w, h = int(w * resize_ratio), int(h * resize_ratio)
-    # resize to integer multiple of 64
-    w -= w % 64
-    h -= h % 64
+    # resize to integer multiple of snap_size
+    w -= w % snap_size
+    h -= h % snap_size
 
     if (w, h) != image.size:
         image = image.resize((w, h), resample=Image.Resampling.LANCZOS)
diff --git a/imaginairy/modules/diffusion/openaimodel.py b/imaginairy/modules/diffusion/openaimodel.py
index 5e9ec7a..0eafb26 100644
--- a/imaginairy/modules/diffusion/openaimodel.py
+++ b/imaginairy/modules/diffusion/openaimodel.py
@@ -836,6 +836,10 @@ class UNetModel(nn.Module):
             hs.append(h)
         h = self.middle_block(h, emb, context)
         for module in self.output_blocks:
+            # allows us to work with multiples of 8 instead of 64 for image sizes
+            # https://github.com/CompVis/stable-diffusion/issues/60#issuecomment-1240294667
+            if h.shape[-2:] != hs[-1].shape[-2:]:
+                h = F.interpolate(h, hs[-1].shape[-2:], mode="nearest")
             h = th.cat([h, hs.pop()], dim=1)
             h = module(h, emb, context)
         h = h.type(x.dtype)
diff --git a/imaginairy/outpaint.py b/imaginairy/outpaint.py
index 5c1a236..30deab2 100644
--- a/imaginairy/outpaint.py
+++ b/imaginairy/outpaint.py
@@ -4,7 +4,7 @@ from PIL import Image, ImageDraw
 
 
 def prepare_image_for_outpaint(
-    img, mask=None, up=None, down=None, left=None, right=None, _all=0, snap_multiple=64
+    img, mask=None, up=None, down=None, left=None, right=None, _all=0, snap_multiple=8
 ):
     up = up if up is not None else _all
     down = down if down is not None else _all
@@ -31,7 +31,7 @@ def prepare_image_for_outpaint(
     expanded_image.paste(img, (left, up))
 
     # extend border pixels outward, this helps prevents lines at the boundary because masks getting reduced to
-    # 64x64 latent space can cause som inaccuracies
+    # 64x64 latent space can cause some inaccuracies
 
     if up > 0:
         expanded_image.paste(