diff --git a/imaginairy/api.py b/imaginairy/api.py
index ad03122..7f5873c 100755
--- a/imaginairy/api.py
+++ b/imaginairy/api.py
@@ -213,7 +213,7 @@ def imagine(
                     ddim_steps = int(prompt.steps / generation_strength)
                     sampler.make_schedule(ddim_num_steps=ddim_steps, ddim_eta=ddim_eta)
                     try:
-                        init_image, _, h = pillow_fit_image_within(
+                        init_image = pillow_fit_image_within(
                             prompt.init_image,
                             max_height=prompt.height,
                             max_width=prompt.width,
diff --git a/imaginairy/enhancers/face_restoration_codeformer.py b/imaginairy/enhancers/face_restoration_codeformer.py
index 97eb49f..b1c7e8c 100644
--- a/imaginairy/enhancers/face_restoration_codeformer.py
+++ b/imaginairy/enhancers/face_restoration_codeformer.py
@@ -68,7 +68,7 @@ def enhance_faces(img, fidelity=0):
 
         try:
             with torch.no_grad():
-                output = net(cropped_face_t, w=fidelity, adain=True)[0]
+                output = net(cropped_face_t, w=fidelity, adain=True)[0]  # noqa
                 restored_face = tensor2img(output, rgb2bgr=True, min_max=(-1, 1))
             del output
             torch.cuda.empty_cache()
diff --git a/imaginairy/img_utils.py b/imaginairy/img_utils.py
index b2cc817..e0f79cd 100644
--- a/imaginairy/img_utils.py
+++ b/imaginairy/img_utils.py
@@ -16,7 +16,7 @@ def pillow_fit_image_within(image: PIL.Image.Image, max_height=512, max_width=51
     w, h = int(w * resize_ratio), int(h * resize_ratio)
     w, h = map(lambda x: x - x % 64, (w, h))  # resize to integer multiple of 64
     image = image.resize((w, h), resample=Image.Resampling.NEAREST)
-    return image, w, h
+    return image
 
 
 def pillow_img_to_torch_image(img: PIL.Image.Image):
diff --git a/imaginairy/modules/attention.py b/imaginairy/modules/attention.py
index 4d9f45a..1144d8e 100644
--- a/imaginairy/modules/attention.py
+++ b/imaginairy/modules/attention.py
@@ -85,7 +85,7 @@ class LinearAttention(nn.Module):
         self.to_out = nn.Conv2d(hidden_dim, dim, 1)
 
     def forward(self, x):
-        b, c, h, w = x.shape
+        b, c, h, w = x.shape  # noqa
         qkv = self.to_qkv(x)
         q, k, v = rearrange(
             qkv, "b (qkv heads c) h w -> qkv b heads c (h w)", heads=self.heads, qkv=3
@@ -126,7 +126,7 @@ class SpatialSelfAttention(nn.Module):
         v = self.v(h_)
 
         # compute attention
-        b, c, h, w = q.shape
+        b, c, h, w = q.shape  # noqa
         q = rearrange(q, "b c h w -> b (h w) c")
         k = rearrange(k, "b c h w -> b c (h w)")
         w_ = torch.einsum("bij,bjk->bik", q, k)
@@ -178,9 +178,9 @@ class CrossAttention(nn.Module):
 
         if mask is not None:
             mask = rearrange(mask, "b ... -> b (...)")
-            max_neg_value = -torch.finfo(sim.dtype).max
+            _max_neg_value = -torch.finfo(sim.dtype).max
             mask = repeat(mask, "b j -> (b h) () j", h=h)
-            sim.masked_fill_(~mask, max_neg_value)
+            sim.masked_fill_(~mask, _max_neg_value)
 
         # attention, what we cannot get enough of
         attn = sim.softmax(dim=-1)
@@ -189,7 +189,7 @@ class CrossAttention(nn.Module):
         out = rearrange(out, "(b h) n d -> b n (h d)", h=h)
         return self.to_out(out)
 
-    def forward_cuda(self, x, context=None, mask=None):
+    def forward_cuda(self, x, context=None, mask=None):  # noqa
         h = self.heads
 
         q_in = self.to_q(x)
@@ -258,7 +258,7 @@ class BasicTransformerBlock(nn.Module):
         dropout=0.0,
         context_dim=None,
         gated_ff=True,
-        checkpoint=True,
+        checkpoint=True,  # noqa
     ):
         super().__init__()
         self.attn1 = CrossAttention(
@@ -326,7 +326,7 @@ class SpatialTransformer(nn.Module):
 
     def forward(self, x, context=None):
         # note: if no context is given, cross-attention defaults to self-attention
-        b, c, h, w = x.shape
+        b, c, h, w = x.shape  # noqa
         x_in = x
         x = self.norm(x)
         x = self.proj_in(x)
diff --git a/imaginairy/modules/autoencoder.py b/imaginairy/modules/autoencoder.py
index a0e123e..6d03e82 100644
--- a/imaginairy/modules/autoencoder.py
+++ b/imaginairy/modules/autoencoder.py
@@ -17,12 +17,13 @@ class AutoencoderKL(pl.LightningModule):
         lossconfig,
         embed_dim,
         ckpt_path=None,
-        ignore_keys=[],
+        ignore_keys=None,
         image_key="image",
         colorize_nlabels=None,
         monitor=None,
     ):
         super().__init__()
+        ignore_keys = [] if ignore_keys is None else ignore_keys
         self.image_key = image_key
         self.encoder = Encoder(**ddconfig)
         self.decoder = Decoder(**ddconfig)
@@ -32,20 +33,21 @@ class AutoencoderKL(pl.LightningModule):
         self.post_quant_conv = torch.nn.Conv2d(embed_dim, ddconfig["z_channels"], 1)
         self.embed_dim = embed_dim
         if colorize_nlabels is not None:
-            assert type(colorize_nlabels) == int
+            assert isinstance(colorize_nlabels, int)
             self.register_buffer("colorize", torch.randn(3, colorize_nlabels, 1, 1))
         if monitor is not None:
             self.monitor = monitor
         if ckpt_path is not None:
             self.init_from_ckpt(ckpt_path, ignore_keys=ignore_keys)
 
-    def init_from_ckpt(self, path, ignore_keys=list()):
+    def init_from_ckpt(self, path, ignore_keys=None):
+        ignore_keys = [] if ignore_keys is None else ignore_keys
         sd = torch.load(path, map_location="cpu")["state_dict"]
         keys = list(sd.keys())
         for k in keys:
             for ik in ignore_keys:
                 if k.startswith(ik):
-                    logger.info("Deleting key {} from state_dict.".format(k))
+                    logger.info(f"Deleting key {k} from state_dict.")
                     del sd[k]
         self.load_state_dict(sd, strict=False)
         logger.info(f"Restored from {path}")
@@ -61,7 +63,7 @@ class AutoencoderKL(pl.LightningModule):
         dec = self.decoder(z)
         return dec
 
-    def forward(self, input, sample_posterior=True):
+    def forward(self, input, sample_posterior=True):  # noqa
         posterior = self.encode(input)
         if sample_posterior:
             z = posterior.sample()
diff --git a/imaginairy/modules/clip_embedders.py b/imaginairy/modules/clip_embedders.py
index 402f5a6..88e5ec0 100644
--- a/imaginairy/modules/clip_embedders.py
+++ b/imaginairy/modules/clip_embedders.py
@@ -1,7 +1,7 @@
 import kornia
 import torch
-import torch.nn as nn
 from einops import repeat
+from torch import nn
 from transformers import CLIPTextModel, CLIPTokenizer
 
 from imaginairy.utils import get_device
@@ -102,7 +102,9 @@ class FrozenClipImageEmbedder(nn.Module):
         antialias=False,
     ):
         super().__init__()
-        self.model, preprocess = clip.load(name=model_name, device=device, jit=jit)
+        self.model, preprocess = clip.load(  # noqa
+            name=model_name, device=device, jit=jit
+        )
 
         self.antialias = antialias
 
diff --git a/imaginairy/modules/diffusion/ddpm.py b/imaginairy/modules/diffusion/ddpm.py
index 7736632..1638108 100644
--- a/imaginairy/modules/diffusion/ddpm.py
+++ b/imaginairy/modules/diffusion/ddpm.py
@@ -54,7 +54,7 @@ class DDPM(pl.LightningModule):
         beta_schedule="linear",
         loss_type="l2",
         ckpt_path=None,
-        ignore_keys=[],
+        ignore_keys=None,
         load_only_unet=False,
         monitor="val/loss",
         first_stage_key="image",
@@ -77,6 +77,8 @@ class DDPM(pl.LightningModule):
         logvar_init=0.0,
     ):
         super().__init__()
+        ignore_keys = [] if ignore_keys is None else ignore_keys
+
         assert parameterization in [
             "eps",
             "x0",
@@ -236,7 +238,6 @@ class LatentDiffusion(DDPM):
         conditioning_key=None,
         scale_factor=1.0,
         scale_by_std=False,
-        *args,
         **kwargs,
     ):
         self.num_timesteps_cond = (
@@ -251,7 +252,7 @@ class LatentDiffusion(DDPM):
             conditioning_key = None
         ckpt_path = kwargs.pop("ckpt_path", None)
         ignore_keys = kwargs.pop("ignore_keys", [])
-        super().__init__(conditioning_key=conditioning_key, *args, **kwargs)
+        super().__init__(conditioning_key=conditioning_key, **kwargs)
         self.concat_mode = concat_mode
         self.cond_stage_trainable = cond_stage_trainable
         self.cond_stage_key = cond_stage_key
@@ -286,7 +287,9 @@ class LatentDiffusion(DDPM):
         """For creating seamless tiles"""
         for m in self.modules():
             if isinstance(m, nn.Conv2d):
-                m.padding_mode = "circular" if enabled else m._initial_padding_mode
+                m.padding_mode = (
+                    "circular" if enabled else m._initial_padding_mode  # noqa
+                )
 
     def make_cond_schedule(
         self,
@@ -436,7 +439,7 @@ class LatentDiffusion(DDPM):
         :param x: img of size (bs, c, h, w)
         :return: n img crops of size (n, bs, c, kernel_size[0], kernel_size[1])
         """
-        bs, nc, h, w = x.shape
+        bs, nc, h, w = x.shape  # noqa
 
         # number of crops in image
         Ly = (h - kernel_size[0]) // stride[0] + 1
@@ -595,7 +598,7 @@ class LatentDiffusion(DDPM):
             stride = self.split_input_params["stride"]  # eg. (64, 64)
             df = self.split_input_params["vqf"]
             self.split_input_params["original_image_size"] = x.shape[-2:]
-            bs, nc, h, w = x.shape
+            bs, nc, h, w = x.shape  # noqa
             if ks[0] > h or ks[1] > w:
                 ks = (min(ks[0], h), min(ks[1], w))
                 logger.info("reducing Kernel")
diff --git a/imaginairy/modules/diffusion/model.py b/imaginairy/modules/diffusion/model.py
index abc688d..7cfd5c0 100644
--- a/imaginairy/modules/diffusion/model.py
+++ b/imaginairy/modules/diffusion/model.py
@@ -1,3 +1,4 @@
+# pylama:ignore=W0613,W0612
 # pytorch_diffusion + derived encoder decoder
 import gc
 import logging
@@ -5,8 +6,8 @@ import math
 
 import numpy as np
 import torch
-import torch.nn as nn
 from einops import rearrange
+from torch import nn
 
 from imaginairy.modules.attention import LinearAttention
 from imaginairy.modules.distributions import DiagonalGaussianDistribution
@@ -17,7 +18,7 @@ logger = logging.getLogger(__name__)
 
 def get_timestep_embedding(timesteps, embedding_dim):
     """
-    This matches the implementation in Denoising Diffusion Probabilistic Models:
+    Matches the implementation in Denoising Diffusion Probabilistic Models:
     From Fairseq.
     Build sinusoidal embeddings.
     This matches the implementation in tensor2tensor, but differs slightly
@@ -286,10 +287,10 @@ def make_attn(in_channels, attn_type="vanilla"):
     )
     if attn_type == "vanilla":
         return AttnBlock(in_channels)
-    elif attn_type == "none":
+    if attn_type == "none":
         return nn.Identity(in_channels)
-    else:
-        return LinAttnBlock(in_channels)
+
+    return LinAttnBlock(in_channels)
 
 
 class Encoder(nn.Module):
@@ -502,6 +503,7 @@ class Decoder(nn.Module):
         self.conv_out = torch.nn.Conv2d(
             block_in, out_ch, kernel_size=3, stride=1, padding=1
         )
+        self.last_z_shape = None
 
     def forward(self, z):
         # assert z.shape[1:] == self.z_shape[1:]
@@ -656,22 +658,22 @@ class Resize(nn.Module):
         self.mode = mode
         if self.with_conv:
             logger.info(
-                f"Note: {self.__class__.__name} uses learned downsampling and will ignore the fixed {mode} mode"
+                f"Note: {self.__class__.__name} uses learned downsampling and will ignore the fixed {mode} mode"  # noqa
             )
             raise NotImplementedError()
-            assert in_channels is not None
-            # no asymmetric padding in torch conv, must do it ourselves
-            self.conv = torch.nn.Conv2d(
-                in_channels, in_channels, kernel_size=4, stride=2, padding=1
-            )
+            # assert in_channels is not None
+            # # no asymmetric padding in torch conv, must do it ourselves
+            # self.conv = torch.nn.Conv2d(
+            #     in_channels, in_channels, kernel_size=4, stride=2, padding=1
+            # )
 
     def forward(self, x, scale_factor=1.0):
         if scale_factor == 1.0:
             return x
-        else:
-            x = torch.nn.functional.interpolate(
-                x, mode=self.mode, align_corners=False, scale_factor=scale_factor
-            )
+
+        x = torch.nn.functional.interpolate(
+            x, mode=self.mode, align_corners=False, scale_factor=scale_factor
+        )
         return x
 
 
diff --git a/imaginairy/modules/diffusion/openaimodel.py b/imaginairy/modules/diffusion/openaimodel.py
index f867780..bc41840 100644
--- a/imaginairy/modules/diffusion/openaimodel.py
+++ b/imaginairy/modules/diffusion/openaimodel.py
@@ -4,6 +4,7 @@ from abc import abstractmethod
 import numpy as np
 import torch as th
 import torch.nn.functional as F
+from omegaconf.listconfig import ListConfig
 from torch import nn
 
 from imaginairy.modules.attention import SpatialTransformer
@@ -488,7 +489,6 @@ class UNetModel(nn.Module):
             assert (
                 use_spatial_transformer
             ), "Fool!! You forgot to use the spatial transformer for your cross-attention conditioning..."
-            from omegaconf.listconfig import ListConfig
 
             if isinstance(context_dim, ListConfig):
                 context_dim = list(context_dim)
@@ -753,7 +753,7 @@ class UNetModel(nn.Module):
         self.middle_block.apply(convert_module_to_f32)
         self.output_blocks.apply(convert_module_to_f32)
 
-    def forward(self, x, timesteps=None, context=None, y=None, **kwargs):
+    def forward(self, x, timesteps=None, context=None, y=None, **kwargs):  # noqa
         """
         Apply the model to an input batch.
         :param x: an [N x C x ...] Tensor of inputs.
diff --git a/imaginairy/modules/diffusion/util.py b/imaginairy/modules/diffusion/util.py
index 3963646..ef061f4 100644
--- a/imaginairy/modules/diffusion/util.py
+++ b/imaginairy/modules/diffusion/util.py
@@ -13,8 +13,8 @@ import math
 
 import numpy as np
 import torch
-import torch.nn as nn
 from einops import repeat as e_repeat
+from torch import nn
 
 from imaginairy.utils import instantiate_from_config
 
@@ -57,7 +57,7 @@ def make_beta_schedule(
 
 
 def frange(start, stop, step):
-    """range but handles floats"""
+    """Range but handles floats"""
     x = start
     while True:
         if x >= stop:
@@ -148,11 +148,11 @@ def checkpoint(func, inputs, params, flag):
     if flag:
         args = tuple(inputs) + tuple(params)
         return CheckpointFunction.apply(func, len(inputs), *args)
-    else:
-        return func(*inputs)
+
+    return func(*inputs)
 
 
-class CheckpointFunction(torch.autograd.Function):
+class CheckpointFunction(torch.autograd.Function):  # noqa
     @staticmethod
     def forward(ctx, run_function, length, *args):
         ctx.run_function = run_function
@@ -252,7 +252,7 @@ class SiLU(nn.Module):
 
 
 class GroupNorm32(nn.GroupNorm):
-    def forward(self, x):
+    def forward(self, x):  # noqa
         return super().forward(x.float()).type(x.dtype)
 
 
diff --git a/imaginairy/modules/find_noise.py b/imaginairy/modules/find_noise.py
index 93991dd..7d394d0 100644
--- a/imaginairy/modules/find_noise.py
+++ b/imaginairy/modules/find_noise.py
@@ -17,16 +17,13 @@ from imaginairy.vendored import k_diffusion as K
 
 
 def find_noise_for_image(model, pil_img, prompt, steps=50, cond_scale=1.0, half=True):
-    img_latent = pillow_img_to_model_latent(
-        model, pil_img, batch_size=1, device="cuda", half=half
-    )
+    img_latent = pillow_img_to_model_latent(model, pil_img, batch_size=1, half=half)
     return find_noise_for_latent(
         model,
         img_latent,
         prompt,
         steps=steps,
         cond_scale=cond_scale,
-        half=half,
     )
 
 
diff --git a/imaginairy/samplers/base.py b/imaginairy/samplers/base.py
index 2a73bae..45064da 100644
--- a/imaginairy/samplers/base.py
+++ b/imaginairy/samplers/base.py
@@ -1,3 +1,4 @@
+# pylama:ignore=W0613
 import torch
 from torch import nn
 
@@ -25,9 +26,9 @@ _k_sampler_type_lookup = {
 
 
 def get_sampler(sampler_type, model):
-    from imaginairy.samplers.ddim import DDIMSampler
-    from imaginairy.samplers.kdiff import KDiffusionSampler
-    from imaginairy.samplers.plms import PLMSSampler
+    from imaginairy.samplers.ddim import DDIMSampler  # noqa
+    from imaginairy.samplers.kdiff import KDiffusionSampler  # noqa
+    from imaginairy.samplers.plms import PLMSSampler  # noqa
 
     sampler_type = sampler_type.lower()
     if sampler_type == "plms":
diff --git a/imaginairy/samplers/kdiff.py b/imaginairy/samplers/kdiff.py
index a1a1cb9..dae309a 100644
--- a/imaginairy/samplers/kdiff.py
+++ b/imaginairy/samplers/kdiff.py
@@ -1,3 +1,4 @@
+# pylama:ignore=W0613
 import torch
 
 from imaginairy.img_log import log_latent
diff --git a/imaginairy/samplers/plms.py b/imaginairy/samplers/plms.py
index 978e9ff..eadad4d 100644
--- a/imaginairy/samplers/plms.py
+++ b/imaginairy/samplers/plms.py
@@ -1,3 +1,4 @@
+# pylama:ignore=W0613
 """SAMPLING ONLY."""
 import logging