perf: add xformers dependency

2 years ago · 68e7fd73c5
parent 003a512dc8
commit 68e7fd73c5
4 changed files with 20 additions and 12 deletions
--- a/README.md
+++ b/README.md
@ -298,7 +298,9 @@ docker run -it --gpus all -v $HOME/.cache/huggingface:/root/.cache/huggingface -

 ## ChangeLog

- feature: sliced latent decoding - now possible to make much bigger images. 8 MP (3840x2160) on 11 GB GPU.
+- perf: `xformers` added as a dependency for linux and windows.  Gives a nice speed boost.
+- perf: sliced attention now runs on MacOS. A typo prevented that from happening previously.
+- perf: sliced latent decoding - now possible to make much bigger images. 3310x3310 on 11 GB GPU.

 **9.0.2**
 - fix: edit interface was broken
--- a/imaginairy/modules/attention.py
+++ b/imaginairy/modules/attention.py
@ -10,13 +10,16 @@ from torch import einsum, nn
 from imaginairy.modules.diffusion.util import checkpoint as checkpoint_eval
 from imaginairy.utils import get_device

+XFORMERS_IS_AVAILABLE = False
+
 try:
-    import xformers  # noqa
-    import xformers.ops  # noqa
+    if get_device() == "cuda":
+        import xformers  # noqa
+        import xformers.ops  # noqa

-    XFORMERS_IS_AVAILBLE = True
+        XFORMERS_IS_AVAILABLE = True
 except ImportError:
-    XFORMERS_IS_AVAILBLE = False
+    pass


 ALLOW_SPLITMEM = True
@ -181,7 +184,7 @@ class CrossAttention(nn.Module):
        #     mask = _global_mask_hack.to(torch.bool)

        if get_device() == "cuda" or "mps" in get_device():
-            if not XFORMERS_IS_AVAILBLE and ALLOW_SPLITMEM:
+            if not XFORMERS_IS_AVAILABLE and ALLOW_SPLITMEM:
                return self.forward_splitmem(x, context=context, mask=mask)

        h = self.heads
@ -368,7 +371,7 @@ class BasicTransformerBlock(nn.Module):
        disable_self_attn=False,
    ):
        super().__init__()
-        attn_mode = "softmax-xformers" if XFORMERS_IS_AVAILBLE else "softmax"
+        attn_mode = "softmax-xformers" if XFORMERS_IS_AVAILABLE else "softmax"
        assert attn_mode in self.ATTENTION_MODES
        attn_cls = self.ATTENTION_MODES[attn_mode]
        self.disable_self_attn = disable_self_attn
--- a/imaginairy/modules/diffusion/model.py
+++ b/imaginairy/modules/diffusion/model.py
@ -11,14 +11,16 @@ from torch import nn
 from imaginairy.modules.attention import MemoryEfficientCrossAttention
 from imaginairy.utils import get_device

+XFORMERS_IS_AVAILABLE = False
+
 try:
-    import xformers  # noqa
-    import xformers.ops  # noqa
+    if get_device() == "cuda":
+        import xformers  # noqa
+        import xformers.ops  # noqa

-    XFORMERS_IS_AVAILABLE = True
+        XFORMERS_IS_AVAILABLE = True
 except ImportError:
-    XFORMERS_IS_AVAILABLE = False
-    # print("No module 'xformers'. Proceeding without it.")
+    pass


 def get_timestep_embedding(timesteps, embedding_dim):
--- a/setup.py
+++ b/setup.py
@ -76,5 +76,6 @@ setup(
        "torchmetrics>=0.6.0",
        "torchvision>=0.13.1",
        "kornia>=0.6",
+        "xformers>=0.0.16; sys_platform!='darwin'",
    ],
 )