diff --git a/README.md b/README.md index 5a700ad..1045841 100644 --- a/README.md +++ b/README.md @@ -298,7 +298,9 @@ docker run -it --gpus all -v $HOME/.cache/huggingface:/root/.cache/huggingface - ## ChangeLog -- feature: sliced latent decoding - now possible to make much bigger images. 8 MP (3840x2160) on 11 GB GPU. +- perf: `xformers` added as a dependency for linux and windows. Gives a nice speed boost. +- perf: sliced attention now runs on MacOS. A typo prevented that from happening previously. +- perf: sliced latent decoding - now possible to make much bigger images. 3310x3310 on 11 GB GPU. **9.0.2** - fix: edit interface was broken diff --git a/imaginairy/modules/attention.py b/imaginairy/modules/attention.py index dbe465c..b9c4c58 100644 --- a/imaginairy/modules/attention.py +++ b/imaginairy/modules/attention.py @@ -10,13 +10,16 @@ from torch import einsum, nn from imaginairy.modules.diffusion.util import checkpoint as checkpoint_eval from imaginairy.utils import get_device +XFORMERS_IS_AVAILABLE = False + try: - import xformers # noqa - import xformers.ops # noqa + if get_device() == "cuda": + import xformers # noqa + import xformers.ops # noqa - XFORMERS_IS_AVAILBLE = True + XFORMERS_IS_AVAILABLE = True except ImportError: - XFORMERS_IS_AVAILBLE = False + pass ALLOW_SPLITMEM = True @@ -181,7 +184,7 @@ class CrossAttention(nn.Module): # mask = _global_mask_hack.to(torch.bool) if get_device() == "cuda" or "mps" in get_device(): - if not XFORMERS_IS_AVAILBLE and ALLOW_SPLITMEM: + if not XFORMERS_IS_AVAILABLE and ALLOW_SPLITMEM: return self.forward_splitmem(x, context=context, mask=mask) h = self.heads @@ -368,7 +371,7 @@ class BasicTransformerBlock(nn.Module): disable_self_attn=False, ): super().__init__() - attn_mode = "softmax-xformers" if XFORMERS_IS_AVAILBLE else "softmax" + attn_mode = "softmax-xformers" if XFORMERS_IS_AVAILABLE else "softmax" assert attn_mode in self.ATTENTION_MODES attn_cls = self.ATTENTION_MODES[attn_mode] self.disable_self_attn = disable_self_attn diff --git a/imaginairy/modules/diffusion/model.py b/imaginairy/modules/diffusion/model.py index 95c7b7e..81653d2 100644 --- a/imaginairy/modules/diffusion/model.py +++ b/imaginairy/modules/diffusion/model.py @@ -11,14 +11,16 @@ from torch import nn from imaginairy.modules.attention import MemoryEfficientCrossAttention from imaginairy.utils import get_device +XFORMERS_IS_AVAILABLE = False + try: - import xformers # noqa - import xformers.ops # noqa + if get_device() == "cuda": + import xformers # noqa + import xformers.ops # noqa - XFORMERS_IS_AVAILABLE = True + XFORMERS_IS_AVAILABLE = True except ImportError: - XFORMERS_IS_AVAILABLE = False - # print("No module 'xformers'. Proceeding without it.") + pass def get_timestep_embedding(timesteps, embedding_dim): diff --git a/setup.py b/setup.py index 0a0832f..548ebaf 100644 --- a/setup.py +++ b/setup.py @@ -76,5 +76,6 @@ setup( "torchmetrics>=0.6.0", "torchvision>=0.13.1", "kornia>=0.6", + "xformers>=0.0.16; sys_platform!='darwin'", ], )