feature: tile in a single dimension ("x" or "y")

1 year ago · ad0b9e8ab8
parent 218bb23e3f
commit ad0b9e8ab8
6 changed files with 75 additions and 6 deletions
--- a/README.md
+++ b/README.md
@ -114,6 +114,11 @@ When writing strength modifiers keep in mind that pixel values are between 0 and
 <br>
 <img src="https://raw.githubusercontent.com/brycedrennan/imaginAIry/master/assets/000075_961095192_PLMS40_PS7.5_piles_of_old_books.jpg" height="128"><img src="https://raw.githubusercontent.com/brycedrennan/imaginAIry/master/assets/000075_961095192_PLMS40_PS7.5_piles_of_old_books.jpg" height="128"><img src="https://raw.githubusercontent.com/brycedrennan/imaginAIry/master/assets/000075_961095192_PLMS40_PS7.5_piles_of_old_books.jpg" height="128">
 <img src="https://raw.githubusercontent.com/brycedrennan/imaginAIry/master/assets/000040_527733581_PLMS40_PS7.5_leaves.jpg" height="128"><img src="https://raw.githubusercontent.com/brycedrennan/imaginAIry/master/assets/000040_527733581_PLMS40_PS7.5_leaves.jpg" height="128"><img src="https://raw.githubusercontent.com/brycedrennan/imaginAIry/master/assets/000040_527733581_PLMS40_PS7.5_leaves.jpg" height="128">
+#### 360 degree images
+```bash
+imagine --tile-x -w 1024 -h 512 "360 degree equirectangular panorama photograph of the desert"  --upscale
+```
+<img src="assets/desert_360.jpg" height="128">

 ### Image-to-Image
 ```bash
@ -230,6 +235,10 @@ docker run -it --gpus all -v $HOME/.cache/huggingface:/root/.cache/huggingface -
 [Example Colab](https://colab.research.google.com/drive/1rOvQNs0Cmn_yU1bKWjCOHzGVDgZkaTtO?usp=sharing)

 ## ChangeLog
+**7.2.0**
+- feature: 🎉 tile in a single dimension ("x" or "y").  This enables, with a bit of luck, generation of 360 VR images.
+Try this for example: `imagine --tile-x -w 1024 -h 512 "360 degree equirectangular panorama photograph of the mountains"  --upscale`
+
 **7.1.1**
 - fix: memory/speed regression introduced in 6.1.0
 - fix: model switching now clears memory better, thus avoiding out of memory errors
--- a/assets/desert_360.jpg
+++ b/assets/desert_360.jpg
--- a/imaginairy/api.py
+++ b/imaginairy/api.py
@ -145,6 +145,7 @@ def imagine(
                progress_img_interval_min_s=progress_img_interval_min_s,
            ) as lc:
                seed_everything(prompt.seed)
+
                model.tile_mode(prompt.tile_mode)
                with lc.timing("conditioning"):
                    # need to expand if doing batches
--- a/imaginairy/cmds.py
+++ b/imaginairy/cmds.py
@ -122,7 +122,17 @@ logger = logging.getLogger(__name__)
@click.option(
    "--tile",
    is_flag=True,
-    help="Any images rendered will be tileable.",
+    help="Any images rendered will be tileable in both X and Y directions.",
+)
+@click.option(
+    "--tile-x",
+    is_flag=True,
+    help="Any images rendered will be tileable in the X direction.",
+)
+@click.option(
+    "--tile-y",
+    is_flag=True,
+    help="Any images rendered will be tileable in the Y direction.",
 )
@click.option(
    "--mask-image",
@ -202,6 +212,8 @@ def imagine_cmd(
    quiet,
    show_work,
    tile,
+    tile_x,
+    tile_y,
    mask_image,
    mask_prompt,
    mask_mode,
@ -241,6 +253,15 @@ def imagine_cmd(
                    prompt_library_paths=prompt_library_path,
                )
            prompt_iterator = prompt_expanding_iterators[prompt_text]
+            if tile:
+                _tile_mode = "xy"
+            elif tile_x:
+                _tile_mode = "x"
+            elif tile_y:
+                _tile_mode = "y"
+            else:
+                _tile_mode = ""
+
            prompt = ImaginePrompt(
                next(prompt_iterator),
                negative_prompt=negative_prompt,
@ -259,7 +280,7 @@ def imagine_cmd(
                upscale=upscale,
                fix_faces=fix_faces,
                fix_faces_fidelity=fix_faces_fidelity,
-                tile_mode=tile,
+                tile_mode=_tile_mode,
                model=model_weights_path,
            )
            prompts.append(prompt)
--- a/imaginairy/modules/diffusion/ddpm.py
+++ b/imaginairy/modules/diffusion/ddpm.py
@ -15,6 +15,8 @@ import pytorch_lightning as pl
 import torch
 from einops import rearrange, repeat
 from torch import nn
+from torch.nn import functional as F
+from torch.nn.modules.utils import _pair
 from torchvision.utils import make_grid
 from tqdm import tqdm

@ -650,6 +652,17 @@ class DDPM(pl.LightningModule):
        return opt


+def _TileModeConv2DConvForward(
+    self, input: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor  # noqa
+):
+    working = F.pad(input, self.paddingX, mode=self.padding_modeX)
+    working = F.pad(working, self.paddingY, mode=self.padding_modeY)
+
+    return F.conv2d(
+        working, weight, bias, self.stride, _pair(0), self.dilation, self.groups
+    )
+
+
 class LatentDiffusion(DDPM):
    """main class"""

@ -706,16 +719,34 @@ class LatentDiffusion(DDPM):

        # store initial padding mode so we can switch to 'circular'
        # when we want tiled images
+        # replace conv_forward with function that can do tiling in one direction
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                m._initial_padding_mode = m.padding_mode
+                m._conv_forward = _TileModeConv2DConvForward.__get__(  # noqa
+                    m, nn.Conv2d
+                )

-    def tile_mode(self, enabled):
+    def tile_mode(self, tile_mode):
        """For creating seamless tiles"""
+        tile_mode = tile_mode or ""
+        tile_x = "x" in tile_mode
+        tile_y = "y" in tile_mode
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
-                m.padding_mode = (
-                    "circular" if enabled else m._initial_padding_mode  # noqa
+                m.padding_modeX = "circular" if tile_x else "constant"
+                m.padding_modeY = "circular" if tile_y else "constant"
+                m.paddingX = (
+                    m._reversed_padding_repeated_twice[0],  # noqa
+                    m._reversed_padding_repeated_twice[1],  # noqa
+                    0,
+                    0,
+                )
+                m.paddingY = (
+                    0,
+                    0,
+                    m._reversed_padding_repeated_twice[2],  # noqa
+                    m._reversed_padding_repeated_twice[3],  # noqa
                )

    def make_cond_schedule(
--- a/imaginairy/schema.py
+++ b/imaginairy/schema.py
@ -111,13 +111,20 @@ class ImaginePrompt:
        fix_faces_fidelity=DEFAULT_FACE_FIDELITY,
        sampler_type=config.DEFAULT_SAMPLER,
        conditioning=None,
-        tile_mode=False,
+        tile_mode="",
        model=config.DEFAULT_MODEL,
    ):

        self.prompts = self.process_prompt_input(prompt)
        self.negative_prompt = self.process_prompt_input(negative_prompt)
        self.prompt_strength = prompt_strength
+        if tile_mode is True:
+            tile_mode = "xy"
+        elif tile_mode is False:
+            tile_mode = ""
+        else:
+            tile_mode = tile_mode.lower()
+            assert tile_mode in ("", "x", "y", "xy")

        if isinstance(init_image, str):
            init_image = LazyLoadingImage(filepath=init_image)