feature: image sizes can now be multiples of 8 instead of 64

from https://github.com/CompVis/stable-diffusion/issues/60#issuecomment-1240294667
pull/205/head
Bryce 2 years ago committed by Bryce Drennan
parent 9bef5a151a
commit 248679d8de

@ -282,6 +282,10 @@ docker run -it --gpus all -v $HOME/.cache/huggingface:/root/.cache/huggingface -
[Example Colab](https://colab.research.google.com/drive/1rOvQNs0Cmn_yU1bKWjCOHzGVDgZkaTtO?usp=sharing)
## ChangeLog
**8.1.0**
- feature: image sizes can now be multiples of 8 instead of 64. Inputs will be silently rounded down.
**8.0.5**
- fix: bypass huggingface cache retrieval bug
@ -483,14 +487,14 @@ would be uncorrelated to the rest of the surrounding image. It created terrible
## Todo
- Performance Optimizations
- ✅ https://github.com/huggingface/diffusers/blob/main/docs/source/optimization/fp16.mdx
- ✅ https://github.com/CompVis/stable-diffusion/compare/main...Doggettx:stable-diffusion:autocast-improvements#
- ✅ https://www.reddit.com/r/StableDiffusion/comments/xalaws/test_update_for_less_memory_usage_and_higher/
- ✅ fp16
- ✅ [Doggettx Sliced attention](https://github.com/CompVis/stable-diffusion/compare/main...Doggettx:stable-diffusion:autocast-improvements#)
- ✅ xformers support https://www.photoroom.com/tech/stable-diffusion-100-percent-faster-with-memory-efficient-attention/
- https://github.com/neonsecret/stable-diffusion
- https://github.com/CompVis/stable-diffusion/pull/177
- https://github.com/huggingface/diffusers/pull/532/files
- https://github.com/HazyResearch/flash-attention
- ✅ xformers improvements https://www.photoroom.com/tech/stable-diffusion-100-percent-faster-with-memory-efficient-attention/
- Development Environment
- ✅ add tests
- ✅ set up ci (test/lint/format)

@ -69,7 +69,7 @@ logger = logging.getLogger(__name__)
default=None,
show_default=True,
type=int,
help="Image height. Should be multiple of 64.",
help="Image height. Should be multiple of 8.",
)
@click.option(
"-w",
@ -77,7 +77,7 @@ logger = logging.getLogger(__name__)
default=None,
show_default=True,
type=int,
help="Image width. Should be multiple of 64.",
help="Image width. Should be multiple of 8.",
)
@click.option(
"--steps",
@ -174,7 +174,7 @@ logger = logging.getLogger(__name__)
@click.option(
"--outpaint",
help=(
"Specify in what directions to expand the image. Values will be snapped such that output image size is multiples of 64. Examples\n"
"Specify in what directions to expand the image. Values will be snapped such that output image size is multiples of 8. Examples\n"
" `--outpaint up10,down300,left50,right50`\n"
" `--outpaint u10,d300,l50,r50`\n"
" `--outpaint all200`\n"
@ -341,7 +341,7 @@ def imagine_cmd(
default=None,
show_default=True,
type=int,
help="Image height. Should be multiple of 64.",
help="Image height. Should be multiple of 8.",
)
@click.option(
"-w",
@ -349,7 +349,7 @@ def imagine_cmd(
default=None,
show_default=True,
type=int,
help="Image width. Should be multiple of 64.",
help="Image width. Should be multiple of 8.",
)
@click.option(
"--steps",
@ -446,7 +446,7 @@ def imagine_cmd(
@click.option(
"--outpaint",
help=(
"Specify in what directions to expand the image. Values will be snapped such that output image size is multiples of 64. Examples\n"
"Specify in what directions to expand the image. Values will be snapped such that output image size is multiples of 8. Examples\n"
" `--outpaint up10,down300,left50,right50`\n"
" `--outpaint u10,d300,l50,r50`\n"
" `--outpaint all200`\n"

@ -10,7 +10,7 @@ from imaginairy.utils import get_device
def pillow_fit_image_within(
image: PIL.Image.Image, max_height=512, max_width=512, convert="RGB"
image: PIL.Image.Image, max_height=512, max_width=512, convert="RGB", snap_size=8
):
image = image.convert(convert)
w, h = image.size
@ -23,9 +23,9 @@ def pillow_fit_image_within(
if resize_ratio != 1:
w, h = int(w * resize_ratio), int(h * resize_ratio)
# resize to integer multiple of 64
w -= w % 64
h -= h % 64
# resize to integer multiple of snap_size
w -= w % snap_size
h -= h % snap_size
if (w, h) != image.size:
image = image.resize((w, h), resample=Image.Resampling.LANCZOS)

@ -836,6 +836,10 @@ class UNetModel(nn.Module):
hs.append(h)
h = self.middle_block(h, emb, context)
for module in self.output_blocks:
# allows us to work with multiples of 8 instead of 64 for image sizes
# https://github.com/CompVis/stable-diffusion/issues/60#issuecomment-1240294667
if h.shape[-2:] != hs[-1].shape[-2:]:
h = F.interpolate(h, hs[-1].shape[-2:], mode="nearest")
h = th.cat([h, hs.pop()], dim=1)
h = module(h, emb, context)
h = h.type(x.dtype)

@ -4,7 +4,7 @@ from PIL import Image, ImageDraw
def prepare_image_for_outpaint(
img, mask=None, up=None, down=None, left=None, right=None, _all=0, snap_multiple=64
img, mask=None, up=None, down=None, left=None, right=None, _all=0, snap_multiple=8
):
up = up if up is not None else _all
down = down if down is not None else _all
@ -31,7 +31,7 @@ def prepare_image_for_outpaint(
expanded_image.paste(img, (left, up))
# extend border pixels outward, this helps prevents lines at the boundary because masks getting reduced to
# 64x64 latent space can cause som inaccuracies
# 64x64 latent space can cause some inaccuracies
if up > 0:
expanded_image.paste(

Loading…
Cancel
Save