From 1381c7fed4b6140969d7cd17aa41e8476ca95556 Mon Sep 17 00:00:00 2001 From: Bryce Date: Sun, 1 Jan 2023 20:14:22 -0800 Subject: [PATCH] lint: new ruff linter --- Makefile | 3 +- imaginairy/cmds.py | 4 +- imaginairy/enhancers/clip_masking.py | 2 +- imaginairy/enhancers/describe_image_blip.py | 4 +- imaginairy/enhancers/describe_image_clip.py | 4 +- .../enhancers/face_restoration_codeformer.py | 6 +- imaginairy/enhancers/prompt_expansion.py | 12 +-- imaginairy/enhancers/upscale_realesrgan.py | 2 +- imaginairy/img_utils.py | 5 +- imaginairy/model_manager.py | 8 +- imaginairy/modules/attention.py | 16 ++-- imaginairy/modules/autoencoder.py | 4 +- imaginairy/modules/clip_embedders.py | 2 +- imaginairy/modules/diffusion/ddpm.py | 69 +++++++------- imaginairy/modules/diffusion/model.py | 17 ++-- imaginairy/modules/diffusion/openaimodel.py | 15 ++-- imaginairy/modules/diffusion/util.py | 2 +- imaginairy/modules/encoders.py | 6 +- imaginairy/modules/midas/midas/midas_net.py | 2 +- .../modules/midas/midas/midas_net_custom.py | 2 +- imaginairy/modules/midas/midas/vit.py | 2 +- imaginairy/modules/midas/utils.py | 6 +- imaginairy/safety.py | 6 +- imaginairy/samplers/ddim.py | 2 +- imaginairy/samplers/kdiff.py | 2 +- imaginairy/samplers/plms.py | 2 +- imaginairy/schema.py | 2 +- imaginairy/utils.py | 22 ++--- imaginairy/vendored/blip/blip.py | 6 +- imaginairy/vendored/blip/blip_itm.py | 2 +- imaginairy/vendored/blip/blip_nlvr.py | 2 +- imaginairy/vendored/blip/blip_pretrain.py | 15 ++-- imaginairy/vendored/blip/blip_retrieval.py | 2 +- imaginairy/vendored/blip/blip_vqa.py | 2 +- imaginairy/vendored/blip/med.py | 8 +- imaginairy/vendored/blip/nlvr_encoder.py | 4 +- imaginairy/vendored/blip/vit.py | 10 +-- imaginairy/vendored/clip/clip.py | 6 +- imaginairy/vendored/clip/model.py | 12 ++- imaginairy/vendored/clip/simple_tokenizer.py | 6 +- imaginairy/vendored/clipseg/__init__.py | 12 +-- .../vendored/codeformer/codeformer_arch.py | 6 +- imaginairy/vendored/codeformer/vqgan_arch.py | 6 +- imaginairy/vendored/k_diffusion/__init__.py | 12 --- .../vendored/k_diffusion/models/__init__.py | 1 - .../vendored/k_diffusion/models/image_v1.py | 28 +++--- imaginairy/vendored/k_diffusion/sampling.py | 25 ++++-- requirements-dev.in | 2 +- requirements-dev.txt | 90 ++++++++----------- scripts/prep_vocab_lists.py | 4 +- setup.py | 2 +- tests/test_api.py | 28 +++--- tox.ini | 2 +- 53 files changed, 256 insertions(+), 266 deletions(-) diff --git a/Makefile b/Makefile index c16db61..7c4c289 100644 --- a/Makefile +++ b/Makefile @@ -27,7 +27,8 @@ init: require_pyenv ## Setup a dev environment for local development. af: autoformat ## Alias for `autoformat` autoformat: ## Run the autoformatter. @pycln . --all --quiet --extend-exclude __init__\.py - @isort --atomic --profile black . + @# ERA,T201 + @-ruff --extend-ignore ANN,ARG001,C90,DTZ,D100,D101,D102,D103,D202,D203,D212,D415,E501,RET504,S101,UP006,UP007 --extend-select C,D400,I,UP,W --unfixable T,ERA --fix-only . @black . test: ## Run the tests. diff --git a/imaginairy/cmds.py b/imaginairy/cmds.py index 5ea9f63..47b49eb 100644 --- a/imaginairy/cmds.py +++ b/imaginairy/cmds.py @@ -223,7 +223,7 @@ def imagine_cmd( model_weights_path, prompt_library_path, ): - """Have the AI generate images. alias:imagine""" + """Have the AI generate images. alias:imagine.""" if ctx.invoked_subcommand is not None: return @@ -303,7 +303,7 @@ def aimg(): @click.argument("image_filepaths", nargs=-1) @aimg.command() def describe(image_filepaths): - """Generate text descriptions of images""" + """Generate text descriptions of images.""" imgs = [] for p in image_filepaths: if p.startswith("http"): diff --git a/imaginairy/enhancers/clip_masking.py b/imaginairy/enhancers/clip_masking.py index 2649348..ed590f5 100644 --- a/imaginairy/enhancers/clip_masking.py +++ b/imaginairy/enhancers/clip_masking.py @@ -14,7 +14,7 @@ from imaginairy.vendored.clipseg import CLIPDensePredT weights_url = "https://github.com/timojl/clipseg/raw/master/weights/rd64-uni.pth" -@lru_cache() +@lru_cache def clip_mask_model(): from imaginairy.paths import PKG_ROOT # noqa diff --git a/imaginairy/enhancers/describe_image_blip.py b/imaginairy/enhancers/describe_image_blip.py index 06ca891..07fffe6 100644 --- a/imaginairy/enhancers/describe_image_blip.py +++ b/imaginairy/enhancers/describe_image_blip.py @@ -17,7 +17,7 @@ if "mps" in device: BLIP_EVAL_SIZE = 384 -@lru_cache() +@lru_cache def blip_model(): from imaginairy.paths import PKG_ROOT # noqa @@ -35,7 +35,7 @@ def blip_model(): def generate_caption(image, min_length=30): - """Given an image, return a caption""" + """Given an image, return a caption.""" gpu_image = ( transforms.Compose( [ diff --git a/imaginairy/enhancers/describe_image_clip.py b/imaginairy/enhancers/describe_image_clip.py index f9aedb1..42cec54 100644 --- a/imaginairy/enhancers/describe_image_clip.py +++ b/imaginairy/enhancers/describe_image_clip.py @@ -10,7 +10,7 @@ from imaginairy.vendored import clip device = "cuda" if torch.cuda.is_available() else "cpu" -@lru_cache() +@lru_cache def get_model(): model_name = "ViT-L/14" model, preprocess = clip.load(model_name, device=device) @@ -18,7 +18,7 @@ def get_model(): def find_img_text_similarity(image: Image.Image, phrases: Sequence): - """Find the likelihood of a list of textual concepts existing in the image""" + """Find the likelihood of a list of textual concepts existing in the image.""" model, preprocess = get_model() image = preprocess(image).unsqueeze(0).to(device) diff --git a/imaginairy/enhancers/face_restoration_codeformer.py b/imaginairy/enhancers/face_restoration_codeformer.py index b3eff12..5b84609 100644 --- a/imaginairy/enhancers/face_restoration_codeformer.py +++ b/imaginairy/enhancers/face_restoration_codeformer.py @@ -14,7 +14,7 @@ from imaginairy.vendored.codeformer.codeformer_arch import CodeFormer logger = logging.getLogger(__name__) -@lru_cache() +@lru_cache def codeformer_model(): model = CodeFormer( dim_embd=512, @@ -31,10 +31,10 @@ def codeformer_model(): return model -@lru_cache() +@lru_cache def face_restore_helper(): """ - Provide a singleton of FaceRestoreHelper + Provide a singleton of FaceRestoreHelper. FaceRestoreHelper loads a model internally so we need to cache it or we end up with a memory leak diff --git a/imaginairy/enhancers/prompt_expansion.py b/imaginairy/enhancers/prompt_expansion.py index c96df04..a3972d2 100644 --- a/imaginairy/enhancers/prompt_expansion.py +++ b/imaginairy/enhancers/prompt_expansion.py @@ -15,9 +15,9 @@ formatter = Formatter() PROMPT_EXPANSION_PATTERN = re.compile(r"[|a-z0-9_ -]+") -@lru_cache() +@lru_cache def prompt_library_filepaths(prompt_library_paths=None): - """Return all available category/filepath pairs""" + """Return all available category/filepath pairs.""" prompt_library_paths = [] if not prompt_library_paths else prompt_library_paths combined_prompt_library_filepaths = {} for prompt_path in DEFAULT_PROMPT_LIBRARY_PATHS + list(prompt_library_paths): @@ -27,15 +27,15 @@ def prompt_library_filepaths(prompt_library_paths=None): return combined_prompt_library_filepaths -@lru_cache() +@lru_cache def category_list(prompt_library_paths=None): - """Return the names of available phrase-lists""" + """Return the names of available phrase-lists.""" categories = list(prompt_library_filepaths(prompt_library_paths).keys()) categories.sort() return categories -@lru_cache() +@lru_cache def prompt_library_filepath(library_path): lookup = {} @@ -70,7 +70,7 @@ def get_phrases(category_name, prompt_library_paths=None): def expand_prompts(prompt_text, n=1, prompt_library_paths=None): """ - Replaces {vars} with random samples of corresponding phraselists + Replaces {vars} with random samples of corresponding phraselists. Example: p = "a happy {animal}" diff --git a/imaginairy/enhancers/upscale_realesrgan.py b/imaginairy/enhancers/upscale_realesrgan.py index 9f75fb9..d35375a 100644 --- a/imaginairy/enhancers/upscale_realesrgan.py +++ b/imaginairy/enhancers/upscale_realesrgan.py @@ -10,7 +10,7 @@ from imaginairy.model_manager import get_cached_url_path from imaginairy.utils import get_device -@lru_cache() +@lru_cache def realesrgan_upsampler(): model = RRDBNet( num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4 diff --git a/imaginairy/img_utils.py b/imaginairy/img_utils.py index 4f8f616..a7b4531 100644 --- a/imaginairy/img_utils.py +++ b/imaginairy/img_utils.py @@ -21,7 +21,10 @@ def pillow_fit_image_within(image: PIL.Image.Image, max_height=512, max_width=51 if resize_ratio != 1: w, h = int(w * resize_ratio), int(h * resize_ratio) - w, h = map(lambda x: x - x % 64, (w, h)) # resize to integer multiple of 64 + # resize to integer multiple of 64 + w -= w % 64 + h -= h % 64 + if (w, h) != image.size: image = image.resize((w, h), resample=Image.Resampling.LANCZOS) return image diff --git a/imaginairy/model_manager.py b/imaginairy/model_manager.py index 0029115..024820a 100644 --- a/imaginairy/model_manager.py +++ b/imaginairy/model_manager.py @@ -29,7 +29,7 @@ class HuggingFaceAuthorizationError(RuntimeError): class MemoryAwareModel: - """Wraps a model to allow dynamic loading/unloading as needed""" + """Wraps a model to allow dynamic loading/unloading as needed.""" def __init__(self, config_path, weights_path, half_mode=None): self._config_path = config_path @@ -122,7 +122,7 @@ def get_diffusion_model( for_inpainting=False, ): """ - Load a diffusion model + Load a diffusion model. Weights location may also be shortcut name, e.g. "SD-1.5" """ @@ -148,7 +148,7 @@ def _get_diffusion_model( for_inpainting=False, ): """ - Load a diffusion model + Load a diffusion model. Weights location may also be shortcut name, e.g. "SD-1.5" """ @@ -217,7 +217,7 @@ def get_cache_dir(): def get_cached_url_path(url): """ - Gets the contents of a url, but caches the response indefinitely + Gets the contents of a url, but caches the response indefinitely. While we attempt to use the cached_path from huggingface transformers, we fall back to our own implementation if the url does not provide an etag header, which `cached_path` diff --git a/imaginairy/modules/attention.py b/imaginairy/modules/attention.py index cf7f3af..2bd36ba 100644 --- a/imaginairy/modules/attention.py +++ b/imaginairy/modules/attention.py @@ -184,7 +184,7 @@ class CrossAttention(nn.Module): k = self.to_k(context) * self.scale v = self.to_v(context) - q, k, v = map(lambda t: rearrange(t, "b n (h d) -> (b h) n d", h=h), (q, k, v)) + q, k, v = (rearrange(t, "b n (h d) -> (b h) n d", h=h) for t in (q, k, v)) # force cast to fp32 to avoid overflowing if ATTENTION_PRECISION_OVERRIDE == "fp32": @@ -219,8 +219,8 @@ class CrossAttention(nn.Module): v_in = self.to_v(context) del context, x - q, k, v = map( - lambda t: rearrange(t, "b n (h d) -> (b h) n d", h=h), (q_in, k_in, v_in) + q, k, v = ( + rearrange(t, "b n (h d) -> (b h) n d", h=h) for t in (q_in, k_in, v_in) ) del q_in, k_in, v_in @@ -300,13 +300,13 @@ class MemoryEfficientCrossAttention(nn.Module): v = self.to_v(context) b, _, _ = q.shape - q, k, v = map( - lambda t: t.unsqueeze(3) + q, k, v = ( + t.unsqueeze(3) .reshape(b, t.shape[1], self.heads, self.dim_head) .permute(0, 2, 1, 3) .reshape(b * self.heads, t.shape[1], self.dim_head) - .contiguous(), - (q, k, v), + .contiguous() + for t in (q, k, v) ) # actually compute the attention, what we cannot get enough of @@ -392,7 +392,7 @@ class SpatialTransformer(nn.Module): and reshape to b, t, d. Then apply standard transformer action. Finally, reshape to image - NEW: use_linear for more efficiency instead of the 1x1 convs + NEW: use_linear for more efficiency instead of the 1x1 convs. """ def __init__( diff --git a/imaginairy/modules/autoencoder.py b/imaginairy/modules/autoencoder.py index 506952c..e7946d5 100644 --- a/imaginairy/modules/autoencoder.py +++ b/imaginairy/modules/autoencoder.py @@ -168,7 +168,9 @@ class AutoencoderKL(pl.LightningModule): def validation_step(self, batch, batch_idx): log_dict = self._validation_step(batch, batch_idx) with self.ema_scope(): - log_dict_ema = self._validation_step(batch, batch_idx, postfix="_ema") + log_dict_ema = self._validation_step( # noqa + batch, batch_idx, postfix="_ema" + ) return log_dict def _validation_step(self, batch, batch_idx, postfix=""): diff --git a/imaginairy/modules/clip_embedders.py b/imaginairy/modules/clip_embedders.py index 88e5ec0..b713ee9 100644 --- a/imaginairy/modules/clip_embedders.py +++ b/imaginairy/modules/clip_embedders.py @@ -9,7 +9,7 @@ from imaginairy.vendored import clip class FrozenCLIPEmbedder(nn.Module): - """Uses the CLIP transformer encoder for text (from Hugging Face)""" + """Uses the CLIP transformer encoder for text (from Hugging Face).""" def __init__( self, diff --git a/imaginairy/modules/diffusion/ddpm.py b/imaginairy/modules/diffusion/ddpm.py index bce88fe..82975a4 100644 --- a/imaginairy/modules/diffusion/ddpm.py +++ b/imaginairy/modules/diffusion/ddpm.py @@ -3,7 +3,7 @@ wild mixture of https://github.com/lucidrains/denoising-diffusion-pytorch/blob/7706bdfc6f527f58d33f84b7b522e61e6e3164b3/denoising_diffusion_pytorch/denoising_diffusion_pytorch.py https://github.com/openai/improved-diffusion/blob/e94489283bb876ac1477d5dd7709bbbd2d9902ce/improved_diffusion/gaussian_diffusion.py https://github.com/CompVis/taming-transformers --- merci +-- merci. """ import itertools import logging @@ -66,7 +66,7 @@ class DDPM(pl.LightningModule): beta_schedule="linear", loss_type="l2", ckpt_path=None, - ignore_keys=tuple(), + ignore_keys=(), load_only_unet=False, monitor="val/loss", use_ema=True, @@ -286,7 +286,7 @@ class DDPM(pl.LightningModule): print(f"{context}: Restored training weights") @torch.no_grad() - def init_from_ckpt(self, path, ignore_keys=tuple(), only_model=False): + def init_from_ckpt(self, path, ignore_keys=(), only_model=False): sd = torch.load(path, map_location="cpu") if "state_dict" in list(sd.keys()): sd = sd["state_dict"] @@ -664,7 +664,7 @@ def _TileModeConv2DConvForward( class LatentDiffusion(DDPM): - """main class""" + """main class.""" def __init__( self, @@ -728,7 +728,7 @@ class LatentDiffusion(DDPM): ) def tile_mode(self, tile_mode): - """For creating seamless tiles""" + """For creating seamless tiles.""" tile_mode = tile_mode or "" tile_x = "x" in tile_mode tile_y = "y" in tile_mode @@ -904,9 +904,12 @@ class LatentDiffusion(DDPM): Lx = (w - kernel_size[1]) // stride[1] + 1 if uf == 1 and df == 1: - fold_params = dict( - kernel_size=kernel_size, dilation=1, padding=0, stride=stride - ) + fold_params = { + "kernel_size": kernel_size, + "dilation": 1, + "padding": 0, + "stride": stride, + } unfold = torch.nn.Unfold(**fold_params) fold = torch.nn.Fold(output_size=x.shape[2:], **fold_params) @@ -918,17 +921,20 @@ class LatentDiffusion(DDPM): weighting = weighting.view((1, 1, kernel_size[0], kernel_size[1], Ly * Lx)) elif uf > 1 and df == 1: - fold_params = dict( - kernel_size=kernel_size, dilation=1, padding=0, stride=stride - ) + fold_params = { + "kernel_size": kernel_size, + "dilation": 1, + "padding": 0, + "stride": stride, + } unfold = torch.nn.Unfold(**fold_params) - fold_params2 = dict( - kernel_size=(kernel_size[0] * uf, kernel_size[0] * uf), - dilation=1, - padding=0, - stride=(stride[0] * uf, stride[1] * uf), - ) + fold_params2 = { + "kernel_size": (kernel_size[0] * uf, kernel_size[0] * uf), + "dilation": 1, + "padding": 0, + "stride": (stride[0] * uf, stride[1] * uf), + } fold = torch.nn.Fold( output_size=(x.shape[2] * uf, x.shape[3] * uf), **fold_params2 ) @@ -944,17 +950,20 @@ class LatentDiffusion(DDPM): ) elif df > 1 and uf == 1: - fold_params = dict( - kernel_size=kernel_size, dilation=1, padding=0, stride=stride - ) + fold_params = { + "kernel_size": kernel_size, + "dilation": 1, + "padding": 0, + "stride": stride, + } unfold = torch.nn.Unfold(**fold_params) - fold_params2 = dict( - kernel_size=(kernel_size[0] // df, kernel_size[0] // df), - dilation=1, - padding=0, - stride=(stride[0] // df, stride[1] // df), - ) + fold_params2 = { + "kernel_size": (kernel_size[0] // df, kernel_size[0] // df), + "dilation": 1, + "padding": 0, + "stride": (stride[0] // df, stride[1] // df), + } fold = torch.nn.Fold( output_size=(x.shape[2] // df, x.shape[3] // df), **fold_params2 ) @@ -1370,7 +1379,7 @@ class DiffusionWrapper(pl.LightningModule): class LatentFinetuneDiffusion(LatentDiffusion): """ Basis for different finetunas, such as inpainting or depth2image - To disable finetuning mode, set finetune_keys to None + To disable finetuning mode, set finetune_keys to None. """ def __init__( @@ -1399,7 +1408,7 @@ class LatentFinetuneDiffusion(LatentDiffusion): if ckpt_path is not None: self.init_from_ckpt(ckpt_path, ignore_keys) - def init_from_ckpt(self, path, ignore_keys=tuple(), only_model=False): + def init_from_ckpt(self, path, ignore_keys=(), only_model=False): sd = torch.load(path, map_location="cpu") if "state_dict" in list(sd.keys()): sd = sd["state_dict"] @@ -1606,7 +1615,7 @@ class LatentInpaintDiffusion(LatentDiffusion): class LatentDepth2ImageDiffusion(LatentFinetuneDiffusion): """ - condition on monocular depth estimation + condition on monocular depth estimation. """ def __init__(self, depth_stage_config, concat_keys=("midas_in",), **kwargs): @@ -1671,7 +1680,7 @@ class LatentDepth2ImageDiffusion(LatentFinetuneDiffusion): class LatentUpscaleFinetuneDiffusion(LatentFinetuneDiffusion): """ - condition on low-res image (and optionally on some spatial noise augmentation) + condition on low-res image (and optionally on some spatial noise augmentation). """ def __init__( diff --git a/imaginairy/modules/diffusion/model.py b/imaginairy/modules/diffusion/model.py index 2ec5653..95c7b7e 100644 --- a/imaginairy/modules/diffusion/model.py +++ b/imaginairy/modules/diffusion/model.py @@ -288,7 +288,7 @@ class MemoryEfficientAttnBlock(nn.Module): """ Uses xformers efficient implementation, see https://github.com/MatthieuTPHR/diffusers/blob/d80b531ff8060ec1ea982b65a1b8df70f73aa67c/src/diffusers/models/attention.py#L223 - Note: this is a single-head self-attention operation + Note: this is a single-head self-attention operation. """ # @@ -320,16 +320,16 @@ class MemoryEfficientAttnBlock(nn.Module): # compute attention B, C, H, W = q.shape - q, k, v = map(lambda x: rearrange(x, "b c h w -> b (h w) c"), (q, k, v)) - - q, k, v = map( - lambda t: t.unsqueeze(3) + q, k, v = (rearrange(x, "b c h w -> b (h w) c") for x in (q, k, v)) + q, k, v = ( + t.unsqueeze(3) .reshape(B, t.shape[1], 1, C) .permute(0, 2, 1, 3) .reshape(B * 1, t.shape[1], C) - .contiguous(), - (q, k, v), + .contiguous() + for t in (q, k, v) ) + out = xformers.ops.memory_efficient_attention( q, k, v, attn_bias=None, op=self.attention_op ) @@ -704,8 +704,7 @@ class Decoder(nn.Module): self.give_pre_end = give_pre_end self.tanh_out = tanh_out - # compute in_ch_mult, block_in and curr_res at lowest res - in_ch_mult = (1,) + tuple(ch_mult) + # compute block_in and curr_res at lowest res block_in = ch * ch_mult[self.num_resolutions - 1] curr_res = resolution // 2 ** (self.num_resolutions - 1) self.z_shape = (1, z_channels, curr_res, curr_res) diff --git a/imaginairy/modules/diffusion/openaimodel.py b/imaginairy/modules/diffusion/openaimodel.py index 5f5fffd..5e9ec7a 100644 --- a/imaginairy/modules/diffusion/openaimodel.py +++ b/imaginairy/modules/diffusion/openaimodel.py @@ -30,7 +30,7 @@ def convert_module_to_f32(_): class AttentionPool2d(nn.Module): """ - Adapted from CLIP: https://github.com/openai/CLIP/blob/main/clip/model.py + Adapted from CLIP: https://github.com/openai/CLIP/blob/main/clip/model.py. """ def __init__( @@ -123,7 +123,7 @@ class Upsample(nn.Module): class TransposedUpsample(nn.Module): - """Learned 2x upsampling without padding""" + """Learned 2x upsampling without padding.""" def __init__(self, channels, out_channels=None, ks=5): super().__init__() @@ -346,7 +346,7 @@ def count_flops_attn(model, _x, y): model, inputs=(inputs, timestamps), custom_ops={QKVAttention: QKVAttention.count_flops}, - ) + ). """ b, c, *spatial = y[0].shape num_spatial = int(np.prod(spatial)) @@ -359,7 +359,7 @@ def count_flops_attn(model, _x, y): class QKVAttentionLegacy(nn.Module): """ - A module which performs QKV attention. Matches legacy QKVAttention + input/output heads shaping + A module which performs QKV attention. Matches legacy QKVAttention + input/output heads shaping. """ def __init__(self, n_heads): @@ -530,11 +530,10 @@ class UNetModel(nn.Module): if num_attention_blocks is not None: assert len(num_attention_blocks) == len(self.num_res_blocks) assert all( - map( - lambda i: self.num_res_blocks[i] >= num_attention_blocks[i], - range(len(num_attention_blocks)), - ) + self.num_res_blocks[i] >= num_attention_blocks[i] + for i in range(len(num_attention_blocks)) ) + print( f"Constructor of UNetModel received num_attention_blocks={num_attention_blocks}. " f"This option has LESS priority than attention_resolutions {attention_resolutions}, " diff --git a/imaginairy/modules/diffusion/util.py b/imaginairy/modules/diffusion/util.py index 880ebb7..a973540 100644 --- a/imaginairy/modules/diffusion/util.py +++ b/imaginairy/modules/diffusion/util.py @@ -57,7 +57,7 @@ def make_beta_schedule( def frange(start, stop, step): - """Range but handles floats""" + """Range but handles floats.""" x = start while True: if x >= stop: diff --git a/imaginairy/modules/encoders.py b/imaginairy/modules/encoders.py index 8bf59f6..238dfac 100644 --- a/imaginairy/modules/encoders.py +++ b/imaginairy/modules/encoders.py @@ -54,7 +54,7 @@ def disabled_train(self, mode=True): # noqa class FrozenT5Embedder(AbstractEncoder): - """Uses the T5 transformer encoder for text""" + """Uses the T5 transformer encoder for text.""" def __init__( self, version="google/t5-v1_1-large", device="cuda", max_length=77, freeze=True @@ -94,7 +94,7 @@ class FrozenT5Embedder(AbstractEncoder): class FrozenCLIPEmbedder(AbstractEncoder): - """Uses the CLIP transformer encoder for text (from huggingface)""" + """Uses the CLIP transformer encoder for text (from huggingface).""" LAYERS = ["last", "pooled", "hidden"] @@ -155,7 +155,7 @@ class FrozenCLIPEmbedder(AbstractEncoder): class FrozenOpenCLIPEmbedder(AbstractEncoder): """ - Uses the OpenCLIP transformer encoder for text + Uses the OpenCLIP transformer encoder for text. """ LAYERS = [ diff --git a/imaginairy/modules/midas/midas/midas_net.py b/imaginairy/modules/midas/midas/midas_net.py index e13a57e..3dae9b8 100644 --- a/imaginairy/modules/midas/midas/midas_net.py +++ b/imaginairy/modules/midas/midas/midas_net.py @@ -1,7 +1,7 @@ """ MidashNet: Network for monocular depth estimation trained by mixing several datasets. This file contains code that is adapted from -https://github.com/thomasjpfan/pytorch_refinenet/blob/master/pytorch_refinenet/refinenet/refinenet_4cascade.py +https://github.com/thomasjpfan/pytorch_refinenet/blob/master/pytorch_refinenet/refinenet/refinenet_4cascade.py. """ import torch from torch import nn diff --git a/imaginairy/modules/midas/midas/midas_net_custom.py b/imaginairy/modules/midas/midas/midas_net_custom.py index 4186371..1030b30 100644 --- a/imaginairy/modules/midas/midas/midas_net_custom.py +++ b/imaginairy/modules/midas/midas/midas_net_custom.py @@ -1,7 +1,7 @@ """ MidashNet: Network for monocular depth estimation trained by mixing several datasets. This file contains code that is adapted from -https://github.com/thomasjpfan/pytorch_refinenet/blob/master/pytorch_refinenet/refinenet/refinenet_4cascade.py +https://github.com/thomasjpfan/pytorch_refinenet/blob/master/pytorch_refinenet/refinenet/refinenet_4cascade.py. """ import torch from torch import nn diff --git a/imaginairy/modules/midas/midas/vit.py b/imaginairy/modules/midas/midas/vit.py index 90ef785..ae519f7 100644 --- a/imaginairy/modules/midas/midas/vit.py +++ b/imaginairy/modules/midas/midas/vit.py @@ -57,7 +57,7 @@ class Transpose(nn.Module): def forward_vit(pretrained, x): b, c, h, w = x.shape - glob = pretrained.model.forward_flex(x) + pretrained.model.forward_flex(x) layer_1 = pretrained.activations["1"] layer_2 = pretrained.activations["2"] diff --git a/imaginairy/modules/midas/utils.py b/imaginairy/modules/midas/utils.py index 25a7f03..1b4b16b 100644 --- a/imaginairy/modules/midas/utils.py +++ b/imaginairy/modules/midas/utils.py @@ -105,15 +105,15 @@ def write_pfm(path, image, scale=1): else: raise Exception("Image must have H x W x 3, H x W x 1 or H x W dimensions.") - file.write("PF\n" if color else "Pf\n".encode()) - file.write("%d %d\n".encode() % (image.shape[1], image.shape[0])) + file.write("PF\n" if color else b"Pf\n") + file.write(b"%d %d\n" % (image.shape[1], image.shape[0])) endian = image.dtype.byteorder if endian == "<" or endian == "=" and sys.byteorder == "little": scale = -scale - file.write("%f\n".encode() % scale) + file.write(b"%f\n" % scale) image.tofile(file) diff --git a/imaginairy/safety.py b/imaginairy/safety.py index 9c3270f..14a4c0e 100644 --- a/imaginairy/safety.py +++ b/imaginairy/safety.py @@ -115,7 +115,7 @@ class EnhancedStableDiffusionSafetyChecker( return safety_results -@lru_cache() +@lru_cache def safety_models(): safety_model_id = "CompVis/stable-diffusion-safety-checker" monkeypatch_safety_cosine_distance() @@ -126,14 +126,14 @@ def safety_models(): return safety_feature_extractor, safety_checker -@lru_cache() +@lru_cache def monkeypatch_safety_cosine_distance(): orig_cosine_distance = safety_checker_mod.cosine_distance def cosine_distance_float32(image_embeds, text_embeds): """ In some environments we need to distance to be in float32 - but it was coming as BFloat16 + but it was coming as BFloat16. """ return orig_cosine_distance(image_embeds, text_embeds).to(torch.float32) diff --git a/imaginairy/samplers/ddim.py b/imaginairy/samplers/ddim.py index d642c95..2b42d5b 100644 --- a/imaginairy/samplers/ddim.py +++ b/imaginairy/samplers/ddim.py @@ -21,7 +21,7 @@ logger = logging.getLogger(__name__) class DDIMSampler(ImageSampler): """ - Denoising Diffusion Implicit Models + Denoising Diffusion Implicit Models. https://arxiv.org/abs/2010.02502 """ diff --git a/imaginairy/samplers/kdiff.py b/imaginairy/samplers/kdiff.py index 822a775..41c399f 100644 --- a/imaginairy/samplers/kdiff.py +++ b/imaginairy/samplers/kdiff.py @@ -208,7 +208,7 @@ class LMSSampler(KDiffusionSampler): class CFGDenoiser(nn.Module): """ - Conditional forward guidance wrapper + Conditional forward guidance wrapper. """ def __init__(self, model): diff --git a/imaginairy/samplers/plms.py b/imaginairy/samplers/plms.py index 315ec46..3f52bf2 100644 --- a/imaginairy/samplers/plms.py +++ b/imaginairy/samplers/plms.py @@ -21,7 +21,7 @@ logger = logging.getLogger(__name__) class PLMSSampler(ImageSampler): """ - probabilistic least-mean-squares + probabilistic least-mean-squares. Provenance: https://github.com/CompVis/latent-diffusion/commit/f0c4e092c156986e125f48c61a0edd38ba8ad059 diff --git a/imaginairy/schema.py b/imaginairy/schema.py index b142129..c70b837 100644 --- a/imaginairy/schema.py +++ b/imaginairy/schema.py @@ -216,7 +216,7 @@ class ImaginePrompt: class ExifCodes: - """https://www.awaresystems.be/imaging/tiff/tifftags/baseline.html""" + """https://www.awaresystems.be/imaging/tiff/tifftags/baseline.html.""" ImageDescription = 0x010E Software = 0x0131 diff --git a/imaginairy/utils.py b/imaginairy/utils.py index 3657558..bcc169a 100644 --- a/imaginairy/utils.py +++ b/imaginairy/utils.py @@ -13,9 +13,9 @@ from torch.overrides import handle_torch_function, has_torch_function_variadic logger = logging.getLogger(__name__) -@lru_cache() +@lru_cache def get_device() -> str: - """Return the best torch backend available""" + """Return the best torch backend available.""" if torch.cuda.is_available(): return "cuda" @@ -25,9 +25,9 @@ def get_device() -> str: return "cpu" -@lru_cache() +@lru_cache def get_hardware_description(device_type: str) -> str: - """Description of the hardware being used""" + """Description of the hardware being used.""" desc = platform.platform() if device_type == "cuda": desc += "-" + torch.cuda.get_device_name(0) @@ -37,7 +37,7 @@ def get_hardware_description(device_type: str) -> str: def get_obj_from_str(import_path: str, reload=False) -> Any: """ - Gets a python object from a string reference if it's location + Gets a python object from a string reference if it's location. Example: "functools.lru_cache" """ @@ -50,7 +50,7 @@ def get_obj_from_str(import_path: str, reload=False) -> Any: def instantiate_from_config(config: Union[dict, str]) -> Any: - """Instantiate an object from a config dict""" + """Instantiate an object from a config dict.""" if "target" not in config: if config == "__is_first_stage__": return None @@ -65,7 +65,7 @@ def instantiate_from_config(config: Union[dict, str]) -> Any: @contextmanager def platform_appropriate_autocast(precision="autocast"): """ - Allow calculations to run in mixed precision, which can be faster + Allow calculations to run in mixed precision, which can be faster. """ precision_scope = nullcontext # autocast not supported on CPU @@ -111,7 +111,7 @@ def _fixed_layer_norm( @contextmanager def fix_torch_nn_layer_norm(): - """https://github.com/CompVis/stable-diffusion/issues/25#issuecomment-1221416526""" + """https://github.com/CompVis/stable-diffusion/issues/25#issuecomment-1221416526.""" orig_function = functional.layer_norm functional.layer_norm = _fixed_layer_norm try: @@ -123,7 +123,7 @@ def fix_torch_nn_layer_norm(): @contextmanager def fix_torch_group_norm(): """ - Patch group_norm to cast the weights to the same type as the inputs + Patch group_norm to cast the weights to the same type as the inputs. From what I can understand all the other repos just switch to full precision instead of addressing this. I think this would make things slower but I'm not sure. @@ -158,7 +158,7 @@ def fix_torch_group_norm(): def randn_seeded(seed: int, size: List[int]) -> Tensor: - """Generate a random tensor with a given seed""" + """Generate a random tensor with a given seed.""" g_cpu = torch.Generator() g_cpu.manual_seed(seed) noise = torch.randn( @@ -170,7 +170,7 @@ def randn_seeded(seed: int, size: List[int]) -> Tensor: def check_torch_working(): - """Check that torch is working""" + """Check that torch is working.""" try: torch.randn(1, device=get_device()) except RuntimeError as e: diff --git a/imaginairy/vendored/blip/blip.py b/imaginairy/vendored/blip/blip.py index 418ed51..bf2ec26 100644 --- a/imaginairy/vendored/blip/blip.py +++ b/imaginairy/vendored/blip/blip.py @@ -3,7 +3,7 @@ * All rights reserved. * SPDX-License-Identifier: BSD-3-Clause * For full license text, see LICENSE.txt file in the repo root or https://opensource.org/licenses/BSD-3-Clause - * By Junnan Li + * By Junnan Li. """ import warnings @@ -34,7 +34,7 @@ class BLIP_Base(nn.Module): Args: med_config (str): path for the mixture of encoder-decoder model's configuration file image_size (int): input image size - vit (str): model size of vision transformer + vit (str): model size of vision transformer. """ super().__init__() @@ -102,7 +102,7 @@ class BLIP_Decoder(nn.Module): Args: med_config (str): path for the mixture of encoder-decoder model's configuration file image_size (int): input image size - vit (str): model size of vision transformer + vit (str): model size of vision transformer. """ super().__init__() diff --git a/imaginairy/vendored/blip/blip_itm.py b/imaginairy/vendored/blip/blip_itm.py index 16f980c..791634e 100644 --- a/imaginairy/vendored/blip/blip_itm.py +++ b/imaginairy/vendored/blip/blip_itm.py @@ -19,7 +19,7 @@ class BLIP_ITM(nn.Module): Args: med_config (str): path for the mixture of encoder-decoder model's configuration file image_size (int): input image size - vit (str): model size of vision transformer + vit (str): model size of vision transformer. """ super().__init__() diff --git a/imaginairy/vendored/blip/blip_nlvr.py b/imaginairy/vendored/blip/blip_nlvr.py index 1e7aa93..0cd62d9 100644 --- a/imaginairy/vendored/blip/blip_nlvr.py +++ b/imaginairy/vendored/blip/blip_nlvr.py @@ -21,7 +21,7 @@ class BLIP_NLVR(nn.Module): Args: med_config (str): path for the mixture of encoder-decoder model's configuration file image_size (int): input image size - vit (str): model size of vision transformer + vit (str): model size of vision transformer. """ super().__init__() diff --git a/imaginairy/vendored/blip/blip_pretrain.py b/imaginairy/vendored/blip/blip_pretrain.py index dab6052..814e3f1 100644 --- a/imaginairy/vendored/blip/blip_pretrain.py +++ b/imaginairy/vendored/blip/blip_pretrain.py @@ -3,8 +3,10 @@ * All rights reserved. * SPDX-License-Identifier: BSD-3-Clause * For full license text, see LICENSE.txt file in the repo root or https://opensource.org/licenses/BSD-3-Clause - * By Junnan Li + * By Junnan Li. """ +from typing import List + import transformers from models.med import BertConfig, BertLMHeadModel, BertModel @@ -32,7 +34,7 @@ class BLIP_Pretrain(nn.Module): Args: med_config (str): path for the mixture of encoder-decoder model's configuration file image_size (int): input image size - vit (str): model size of vision transformer + vit (str): model size of vision transformer. """ super().__init__() @@ -329,9 +331,6 @@ def concat_all_gather(tensor): return output -from typing import List - - def tie_encoder_decoder_weights( encoder: nn.Module, decoder: nn.Module, base_model_prefix: str, skip_key: str ): @@ -368,9 +367,9 @@ def tie_encoder_decoder_weights( len(encoder_modules) > 0 ), f"Encoder module {encoder_pointer} does not match decoder module {decoder_pointer}" - all_encoder_weights = set( - [module_name + "/" + sub_name for sub_name in encoder_modules.keys()] - ) + all_encoder_weights = { + module_name + "/" + sub_name for sub_name in encoder_modules.keys() + } encoder_layer_pos = 0 for name, module in decoder_modules.items(): if name.isdigit(): diff --git a/imaginairy/vendored/blip/blip_retrieval.py b/imaginairy/vendored/blip/blip_retrieval.py index c3086b0..0b6da3f 100644 --- a/imaginairy/vendored/blip/blip_retrieval.py +++ b/imaginairy/vendored/blip/blip_retrieval.py @@ -22,7 +22,7 @@ class BLIP_Retrieval(nn.Module): Args: med_config (str): path for the mixture of encoder-decoder model's configuration file image_size (int): input image size - vit (str): model size of vision transformer + vit (str): model size of vision transformer. """ super().__init__() diff --git a/imaginairy/vendored/blip/blip_vqa.py b/imaginairy/vendored/blip/blip_vqa.py index c18bc05..d6e6489 100644 --- a/imaginairy/vendored/blip/blip_vqa.py +++ b/imaginairy/vendored/blip/blip_vqa.py @@ -19,7 +19,7 @@ class BLIP_VQA(nn.Module): Args: med_config (str): path for the mixture of encoder-decoder model's configuration file image_size (int): input image size - vit (str): model size of vision transformer + vit (str): model size of vision transformer. """ super().__init__() diff --git a/imaginairy/vendored/blip/med.py b/imaginairy/vendored/blip/med.py index b757256..d86456c 100644 --- a/imaginairy/vendored/blip/med.py +++ b/imaginairy/vendored/blip/med.py @@ -5,7 +5,7 @@ * For full license text, see LICENSE.txt file in the repo root or https://opensource.org/licenses/BSD-3-Clause * By Junnan Li * Based on huggingface code base - * https://github.com/huggingface/transformers/blob/v4.15.0/src/transformers/models/bert + * https://github.com/huggingface/transformers/blob/v4.15.0/src/transformers/models/bert. """ import math @@ -611,7 +611,7 @@ class BertPreTrainedModel(PreTrainedModel): _keys_to_ignore_on_load_missing = [r"position_ids"] def _init_weights(self, module): - """Initialize the weights""" + """Initialize the weights.""" if isinstance(module, (nn.Linear, nn.Embedding)): # Slightly different from the TF version which uses truncated_normal for initialization # cf https://github.com/pytorch/pytorch/pull/5617 @@ -654,7 +654,7 @@ class BertModel(BertPreTrainedModel): def _prune_heads(self, heads_to_prune): """ Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base - class PreTrainedModel + class PreTrainedModel. """ for layer, heads in heads_to_prune.items(): self.encoder.layer[layer].attention.prune_heads(heads) @@ -977,7 +977,7 @@ class BertLMHeadModel(BertPreTrainedModel): >>> model = BertLMHeadModel.from_pretrained('bert-base-cased', config=config) >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") >>> outputs = model(**inputs) - >>> prediction_logits = outputs.logits + >>> prediction_logits = outputs.logits. """ return_dict = ( return_dict if return_dict is not None else self.config.use_return_dict diff --git a/imaginairy/vendored/blip/nlvr_encoder.py b/imaginairy/vendored/blip/nlvr_encoder.py index fc5dc87..98dcf3a 100644 --- a/imaginairy/vendored/blip/nlvr_encoder.py +++ b/imaginairy/vendored/blip/nlvr_encoder.py @@ -656,7 +656,7 @@ class BertPreTrainedModel(PreTrainedModel): _keys_to_ignore_on_load_missing = [r"position_ids"] def _init_weights(self, module): - """Initialize the weights""" + """Initialize the weights.""" if isinstance(module, (nn.Linear, nn.Embedding)): # Slightly different from the TF version which uses truncated_normal for initialization # cf https://github.com/pytorch/pytorch/pull/5617 @@ -699,7 +699,7 @@ class BertModel(BertPreTrainedModel): def _prune_heads(self, heads_to_prune): """ Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base - class PreTrainedModel + class PreTrainedModel. """ for layer, heads in heads_to_prune.items(): self.encoder.layer[layer].attention.prune_heads(heads) diff --git a/imaginairy/vendored/blip/vit.py b/imaginairy/vendored/blip/vit.py index f1903ae..8aaa34a 100644 --- a/imaginairy/vendored/blip/vit.py +++ b/imaginairy/vendored/blip/vit.py @@ -5,7 +5,7 @@ * For full license text, see LICENSE.txt file in the repo root or https://opensource.org/licenses/BSD-3-Clause * By Junnan Li * Based on timm code base - * https://github.com/rwightman/pytorch-image-models/tree/master/timm + * https://github.com/rwightman/pytorch-image-models/tree/master/timm. """ from functools import partial @@ -19,7 +19,7 @@ from timm.models.vision_transformer import PatchEmbed class Mlp(nn.Module): - """MLP as used in Vision Transformer, MLP-Mixer and related networks""" + """MLP as used in Vision Transformer, MLP-Mixer and related networks.""" def __init__( self, @@ -156,7 +156,7 @@ class Block(nn.Module): class VisionTransformer(nn.Module): """Vision Transformer A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale` - - https://arxiv.org/abs/2010.11929 + https://arxiv.org/abs/2010.11929. """ def __init__( @@ -195,7 +195,7 @@ class VisionTransformer(nn.Module): drop_rate (float): dropout rate attn_drop_rate (float): attention dropout rate drop_path_rate (float): stochastic depth rate - norm_layer: (nn.Module): normalization layer + norm_layer: (nn.Module): normalization layer. """ super().__init__() self.num_features = ( @@ -282,7 +282,7 @@ class VisionTransformer(nn.Module): @torch.no_grad() def _load_weights(model: VisionTransformer, checkpoint_path: str, prefix: str = ""): - """Load weights from .npz checkpoints for official Google Brain Flax implementation""" + """Load weights from .npz checkpoints for official Google Brain Flax implementation.""" import numpy as np def _n2p(w, t=True): diff --git a/imaginairy/vendored/clip/clip.py b/imaginairy/vendored/clip/clip.py index c533889..f1f1a31 100644 --- a/imaginairy/vendored/clip/clip.py +++ b/imaginairy/vendored/clip/clip.py @@ -109,7 +109,7 @@ def _transform(n_px): def available_models() -> List[str]: - """Returns the names of available CLIP models""" + """Returns the names of available CLIP models.""" return list(_MODELS.keys()) @@ -119,7 +119,7 @@ def load( jit: bool = False, download_root: str = None, ): - """Load a CLIP model + """Load a CLIP model. Parameters ---------- @@ -246,7 +246,7 @@ def tokenize( texts: Union[str, List[str]], context_length: int = 77, truncate: bool = False ) -> Union[torch.IntTensor, torch.LongTensor]: """ - Returns the tokenized representation of given input string(s) + Returns the tokenized representation of given input string(s). Parameters ---------- diff --git a/imaginairy/vendored/clip/model.py b/imaginairy/vendored/clip/model.py index c3b060b..dbf3e1f 100644 --- a/imaginairy/vendored/clip/model.py +++ b/imaginairy/vendored/clip/model.py @@ -117,7 +117,7 @@ class ModifiedResNet(nn.Module): A ResNet class that is similar to torchvision's but contains the following changes: - There are now 3 "stem" convolutions as opposed to 1, with an average pool instead of a max pool. - Performs anti-aliasing strided convolutions, where an avgpool is prepended to convolutions with stride > 1 - - The final pooling layer is a QKV attention instead of an average pool + - The final pooling layer is a QKV attention instead of an average pool. """ def __init__(self, layers, output_dim, heads, input_resolution=224, width=64): @@ -447,7 +447,7 @@ class CLIP(nn.Module): def convert_weights(model: nn.Module): - """Convert applicable model parameters to fp16""" + """Convert applicable model parameters to fp16.""" def _convert_weights_to_fp16(l): if isinstance(l, (nn.Conv1d, nn.Conv2d, nn.Linear)): @@ -495,11 +495,11 @@ def build_model(state_dict: dict): else: counts: list = [ len( - set( + { k.split(".")[2] for k in state_dict if k.startswith(f"visual.layer{b}") - ) + } ) for b in [1, 2, 3, 4] ] @@ -521,9 +521,7 @@ def build_model(state_dict: dict): transformer_width = state_dict["ln_final.weight"].shape[0] transformer_heads = transformer_width // 64 transformer_layers = len( - set( - k.split(".")[2] for k in state_dict if k.startswith("transformer.resblocks") - ) + {k.split(".")[2] for k in state_dict if k.startswith("transformer.resblocks")} ) model = CLIP( diff --git a/imaginairy/vendored/clip/simple_tokenizer.py b/imaginairy/vendored/clip/simple_tokenizer.py index 7487823..d637943 100644 --- a/imaginairy/vendored/clip/simple_tokenizer.py +++ b/imaginairy/vendored/clip/simple_tokenizer.py @@ -7,14 +7,14 @@ import ftfy import regex as re -@lru_cache() +@lru_cache def default_bpe(): return os.path.join( os.path.dirname(os.path.abspath(__file__)), "bpe_simple_vocab_16e6.txt.gz" ) -@lru_cache() +@lru_cache def bytes_to_unicode(): """ Returns list of utf-8 byte and a corresponding list of unicode strings. @@ -65,7 +65,7 @@ def whitespace_clean(text): return text -class SimpleTokenizer(object): +class SimpleTokenizer: def __init__(self, bpe_path: str = default_bpe()): self.byte_encoder = bytes_to_unicode() self.byte_decoder = {v: k for k, v in self.byte_encoder.items()} diff --git a/imaginairy/vendored/clipseg/__init__.py b/imaginairy/vendored/clipseg/__init__.py index a9a67a3..5a69ce7 100755 --- a/imaginairy/vendored/clipseg/__init__.py +++ b/imaginairy/vendored/clipseg/__init__.py @@ -4,7 +4,6 @@ from os.path import basename, dirname, isfile, join import torch from torch import nn from torch.nn import functional as nnf -from torch.nn.modules.activation import ReLU def precompute_clip_vectors(): @@ -182,7 +181,7 @@ class CLIPDenseBase(nn.Module): k: torch.from_numpy(v) for k, v in precomp.items() } else: - self.precomputed_prompts = dict() + self.precomputed_prompts = {} def rescaled_pos_emb(self, new_size): assert len(new_size) == 2 @@ -383,11 +382,7 @@ def clip_load_untrained(version): transformer_width = state_dict["ln_final.weight"].shape[0] transformer_heads = transformer_width // 64 transformer_layers = len( - set( - k.split(".")[2] - for k in state_dict - if k.startswith(f"transformer.resblocks") - ) + {k.split(".")[2] for k in state_dict if k.startswith(f"transformer.resblocks")} ) return CLIP( @@ -717,12 +712,11 @@ class CLIPSegMultiLabel(nn.Module): def __init__(self, model) -> None: super().__init__() - from third_party.JoEm.data_loader import VOC, get_seen_idx, get_unseen_idx + from third_party.JoEm.data_loader import VOC self.pascal_classes = VOC from general_utils import load_model - from models.clipseg import CLIPDensePredT # self.clipseg = load_model('rd64-vit16-neg0.2-phrasecut', strict=False) self.clipseg = load_model(model, strict=False) diff --git a/imaginairy/vendored/codeformer/codeformer_arch.py b/imaginairy/vendored/codeformer/codeformer_arch.py index a179c6a..748d23b 100644 --- a/imaginairy/vendored/codeformer/codeformer_arch.py +++ b/imaginairy/vendored/codeformer/codeformer_arch.py @@ -95,7 +95,7 @@ class PositionEmbeddingSine(nn.Module): def _get_activation_fn(activation): - """Return an activation function given a string""" + """Return an activation function given a string.""" if activation == "relu": return F.relu if activation == "gelu": @@ -186,9 +186,7 @@ class CodeFormer(VQAutoEncoder): connect_list=["32", "64", "128", "256"], fix_modules=["quantize", "generator"], ): - super(CodeFormer, self).__init__( - 512, 64, [1, 2, 2, 4, 4, 8], "nearest", 2, [16], codebook_size - ) + super().__init__(512, 64, [1, 2, 2, 4, 4, 8], "nearest", 2, [16], codebook_size) if fix_modules is not None: for module in fix_modules: diff --git a/imaginairy/vendored/codeformer/vqgan_arch.py b/imaginairy/vendored/codeformer/vqgan_arch.py index b3e9827..beb4540 100644 --- a/imaginairy/vendored/codeformer/vqgan_arch.py +++ b/imaginairy/vendored/codeformer/vqgan_arch.py @@ -1,6 +1,6 @@ """ VQGAN code, adapted from the original created by the Unleashing Transformers authors: -https://github.com/samb-t/unleashing-transformers/blob/master/models/vqgan.py +https://github.com/samb-t/unleashing-transformers/blob/master/models/vqgan.py. """ @@ -25,7 +25,7 @@ def swish(x): # Define VQVAE classes class VectorQuantizer(nn.Module): def __init__(self, codebook_size, emb_dim, beta): - super(VectorQuantizer, self).__init__() + super().__init__() self.codebook_size = codebook_size # number of embeddings self.emb_dim = emb_dim # dimension of embedding self.beta = beta # commitment cost used in loss term, beta * ||z_e(x)-sg[e]||^2 @@ -173,7 +173,7 @@ class Upsample(nn.Module): class ResBlock(nn.Module): def __init__(self, in_channels, out_channels=None): - super(ResBlock, self).__init__() + super().__init__() self.in_channels = in_channels self.out_channels = in_channels if out_channels is None else out_channels self.norm1 = normalize(in_channels) diff --git a/imaginairy/vendored/k_diffusion/__init__.py b/imaginairy/vendored/k_diffusion/__init__.py index 48d0f41..e69de29 100644 --- a/imaginairy/vendored/k_diffusion/__init__.py +++ b/imaginairy/vendored/k_diffusion/__init__.py @@ -1,12 +0,0 @@ -from . import ( - augmentation, - config, - evaluation, - external, - gns, - layers, - models, - sampling, - utils, -) -from .layers import Denoiser diff --git a/imaginairy/vendored/k_diffusion/models/__init__.py b/imaginairy/vendored/k_diffusion/models/__init__.py index 82608ff..e69de29 100644 --- a/imaginairy/vendored/k_diffusion/models/__init__.py +++ b/imaginairy/vendored/k_diffusion/models/__init__.py @@ -1 +0,0 @@ -from .image_v1 import ImageDenoiserModelV1 diff --git a/imaginairy/vendored/k_diffusion/models/image_v1.py b/imaginairy/vendored/k_diffusion/models/image_v1.py index a257346..a8fc678 100644 --- a/imaginairy/vendored/k_diffusion/models/image_v1.py +++ b/imaginairy/vendored/k_diffusion/models/image_v1.py @@ -56,18 +56,20 @@ class DBlock(layers.ConditionedSequential): ) ) if self_attn: - norm = lambda c_in: layers.AdaGN( - feats_in, c_in, max(1, my_c_out // group_size) - ) + + def norm(c_in): + return layers.AdaGN(feats_in, c_in, max(1, my_c_out // group_size)) + modules.append( layers.SelfAttention2d( my_c_out, max(1, my_c_out // head_size), norm, dropout_rate ) ) if cross_attn: - norm = lambda c_in: layers.AdaGN( - feats_in, c_in, max(1, my_c_out // group_size) - ) + + def norm(c_in): + return layers.AdaGN(feats_in, c_in, max(1, my_c_out // group_size)) + modules.append( layers.CrossAttention2d( my_c_out, @@ -111,18 +113,20 @@ class UBlock(layers.ConditionedSequential): ) ) if self_attn: - norm = lambda c_in: layers.AdaGN( - feats_in, c_in, max(1, my_c_out // group_size) - ) + + def norm(c_in): + return layers.AdaGN(feats_in, c_in, max(1, my_c_out // group_size)) + modules.append( layers.SelfAttention2d( my_c_out, max(1, my_c_out // head_size), norm, dropout_rate ) ) if cross_attn: - norm = lambda c_in: layers.AdaGN( - feats_in, c_in, max(1, my_c_out // group_size) - ) + + def norm(c_in): + return layers.AdaGN(feats_in, c_in, max(1, my_c_out // group_size)) + modules.append( layers.CrossAttention2d( my_c_out, diff --git a/imaginairy/vendored/k_diffusion/sampling.py b/imaginairy/vendored/k_diffusion/sampling.py index a527926..e17a890 100644 --- a/imaginairy/vendored/k_diffusion/sampling.py +++ b/imaginairy/vendored/k_diffusion/sampling.py @@ -798,8 +798,12 @@ def sample_dpmpp_2s_ancestral( extra_args = {} if extra_args is None else extra_args noise_sampler = default_noise_sampler(x) if noise_sampler is None else noise_sampler s_in = x.new_ones([x.shape[0]]) - sigma_fn = lambda t: t.neg().exp() - t_fn = lambda sigma: sigma.to("cpu").log().neg().to(x.device) + + def sigma_fn(t): + return t.neg().exp() + + def t_fn(sigma): + return sigma.to("cpu").log().neg().to(x.device) for i in trange(len(sigmas) - 1, disable=disable): denoised = model(x, sigmas[i] * s_in, **extra_args) @@ -856,8 +860,12 @@ def sample_dpmpp_sde( ) extra_args = {} if extra_args is None else extra_args s_in = x.new_ones([x.shape[0]]) - sigma_fn = lambda t: t.neg().exp() - t_fn = lambda sigma: sigma.to("cpu").log().neg().to(x.device) + + def sigma_fn(t): + return t.neg().exp() + + def t_fn(sigma): + return sigma.to("cpu").log().neg().to(x.device) for i in trange(len(sigmas) - 1, disable=disable): denoised = model(x, sigmas[i] * s_in, **extra_args) @@ -906,8 +914,13 @@ def sample_dpmpp_2m(model, x, sigmas, extra_args=None, callback=None, disable=No """DPM-Solver++(2M).""" extra_args = {} if extra_args is None else extra_args s_in = x.new_ones([x.shape[0]]) - sigma_fn = lambda t: t.neg().exp() - t_fn = lambda sigma: sigma.to("cpu").log().neg().to(x.device) + + def sigma_fn(t): + return t.neg().exp() + + def t_fn(sigma): + return sigma.to("cpu").log().neg().to(x.device) + old_denoised = None for i in trange(len(sigmas) - 1, disable=disable): diff --git a/requirements-dev.in b/requirements-dev.in index ed07de8..6c418ae 100644 --- a/requirements-dev.in +++ b/requirements-dev.in @@ -1,7 +1,7 @@ black coverage isort -pycln +ruff pydocstyle pylama pylint diff --git a/requirements-dev.txt b/requirements-dev.txt index 60d69e5..941da06 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -20,7 +20,7 @@ astroid==2.12.13 # via pylint async-timeout==4.0.2 # via aiohttp -attrs==22.1.0 +attrs==22.2.0 # via # aiohttp # pytest @@ -28,11 +28,11 @@ basicsr==1.4.2 # via # gfpgan # realesrgan -black==22.10.0 +black==22.12.0 # via -r requirements-dev.in cachetools==5.2.0 # via google-auth -certifi==2022.9.24 +certifi==2022.12.7 # via requests charset-normalizer==2.1.1 # via @@ -43,30 +43,29 @@ click==8.1.3 # black # click-shell # imaginAIry (setup.py) - # typer click-shell==2.1 # via imaginAIry (setup.py) contourpy==1.0.6 # via matplotlib -coverage==6.5.0 +coverage==7.0.1 # via -r requirements-dev.in cycler==0.11.0 # via matplotlib -diffusers==0.8.1 +diffusers==0.11.1 # via imaginAIry (setup.py) dill==0.3.6 # via pylint einops==0.3.0 # via imaginAIry (setup.py) -exceptiongroup==1.0.4 +exceptiongroup==1.1.0 # via pytest facexlib==0.2.5 # via # gfpgan # realesrgan -fairscale==0.4.12 +fairscale==0.4.13 # via imaginAIry (setup.py) -filelock==3.8.0 +filelock==3.9.0 # via # diffusers # huggingface-hub @@ -93,7 +92,7 @@ gfpgan==1.3.8 # via # imaginAIry (setup.py) # realesrgan -google-auth==2.14.1 +google-auth==2.15.0 # via # google-auth-oauthlib # tb-nightly @@ -102,11 +101,11 @@ google-auth-oauthlib==0.4.6 # via # tb-nightly # tensorboard -grpcio==1.50.0 +grpcio==1.51.1 # via # tb-nightly # tensorboard -huggingface-hub==0.11.0 +huggingface-hub==0.11.1 # via # diffusers # open-clip-torch @@ -120,11 +119,11 @@ imageio==2.9.0 # via # imaginAIry (setup.py) # scikit-image -importlib-metadata==5.1.0 +importlib-metadata==6.0.0 # via diffusers iniconfig==1.1.1 # via pytest -isort==5.10.1 +isort==5.11.4 # via # -r requirements-dev.in # pylint @@ -134,11 +133,9 @@ kornia==0.6 # via imaginAIry (setup.py) lazy-object-proxy==1.8.0 # via astroid -libcst==0.4.9 - # via pycln llvmlite==0.39.1 # via numba -lmdb==1.3.0 +lmdb==1.4.0 # via # basicsr # gfpgan @@ -154,14 +151,12 @@ mccabe==0.7.0 # via # pylama # pylint -multidict==6.0.2 +multidict==6.0.4 # via # aiohttp # yarl mypy-extensions==0.4.3 - # via - # black - # typing-inspect + # via black networkx==2.8.8 # via scikit-image numba==0.56.4 @@ -195,15 +190,15 @@ oauthlib==3.2.2 # via requests-oauthlib omegaconf==2.1.1 # via imaginAIry (setup.py) -open-clip-torch==2.7.0 +open-clip-torch==2.9.1 # via imaginAIry (setup.py) -opencv-python==4.6.0.66 +opencv-python==4.7.0.68 # via # basicsr # facexlib # gfpgan # realesrgan -packaging==21.3 +packaging==22.0 # via # huggingface-hub # kornia @@ -214,11 +209,9 @@ packaging==21.3 # scikit-image # torchmetrics # transformers -pathspec==0.9.0 - # via - # black - # pycln -pillow==9.3.0 +pathspec==0.10.3 + # via black +pillow==9.4.0 # via # basicsr # diffusers @@ -229,7 +222,7 @@ pillow==9.3.0 # realesrgan # scikit-image # torchvision -platformdirs==2.5.4 +platformdirs==2.6.2 # via # black # pylint @@ -238,6 +231,7 @@ pluggy==1.0.0 protobuf==3.20.3 # via # imaginAIry (setup.py) + # open-clip-torch # tb-nightly # tensorboard psutil==5.9.4 @@ -248,8 +242,6 @@ pyasn1==0.4.8 # rsa pyasn1-modules==0.2.8 # via google-auth -pycln==2.1.2 - # via -r requirements-dev.in pycodestyle==2.10.0 # via pylama pydeprecate==0.3.1 @@ -262,12 +254,10 @@ pyflakes==3.0.1 # via pylama pylama==8.4.1 # via -r requirements-dev.in -pylint==2.15.6 +pylint==2.15.9 # via -r requirements-dev.in pyparsing==3.0.9 - # via - # matplotlib - # packaging + # via matplotlib pytest==7.2.0 # via # -r requirements-dev.in @@ -288,9 +278,7 @@ pyyaml==6.0 # basicsr # gfpgan # huggingface-hub - # libcst # omegaconf - # pycln # pytorch-lightning # timm # transformers @@ -307,6 +295,7 @@ requests==2.28.1 # diffusers # fsspec # huggingface-hub + # imaginAIry (setup.py) # requests-oauthlib # responses # tb-nightly @@ -319,6 +308,8 @@ responses==0.22.0 # via -r requirements-dev.in rsa==4.9 # via google-auth +ruff==0.0.206 + # via -r requirements-dev.in scikit-image==0.19.3 # via basicsr scipy==1.9.3 @@ -329,14 +320,15 @@ scipy==1.9.3 # gfpgan # scikit-image # torchdiffeq +sentencepiece==0.1.97 + # via open-clip-torch six==1.16.0 # via # google-auth - # grpcio # python-dateutil snowballstemmer==2.2.0 # via pydocstyle -tb-nightly==2.12.0a20221125 +tb-nightly==2.12.0a20230101 # via # basicsr # gfpgan @@ -366,10 +358,8 @@ tomli==2.0.1 # pylint # pytest tomlkit==0.11.6 - # via - # pycln - # pylint -torch==1.13.0 + # via pylint +torch==1.13.1 # via # basicsr # facexlib @@ -390,7 +380,7 @@ torchmetrics==0.6.0 # via # imaginAIry (setup.py) # pytorch-lightning -torchvision==0.14.0 +torchvision==0.14.1 # via # basicsr # facexlib @@ -412,20 +402,14 @@ tqdm==4.64.1 # transformers transformers==4.19.2 # via imaginAIry (setup.py) -typer==0.7.0 - # via pycln types-toml==0.10.8.1 # via responses typing-extensions==4.4.0 # via # huggingface-hub - # libcst # pytorch-lightning # torch # torchvision - # typing-inspect -typing-inspect==0.8.0 - # via libcst urllib3==1.26.13 # via # requests @@ -446,9 +430,9 @@ yapf==0.32.0 # via # basicsr # gfpgan -yarl==1.8.1 +yarl==1.8.2 # via aiohttp -zipp==3.10.0 +zipp==3.11.0 # via importlib-metadata # The following packages are considered to be unsafe in a requirements file: diff --git a/scripts/prep_vocab_lists.py b/scripts/prep_vocab_lists.py index af9463d..f6809f8 100644 --- a/scripts/prep_vocab_lists.py +++ b/scripts/prep_vocab_lists.py @@ -51,7 +51,7 @@ def timed(description): def make_txts(): src_json = f"{CURDIR}/../downloads/noodle-soup-prompts/nsp_pantry.json" dst_folder = f"{CURDIR}/../imaginairy/vendored/noodle_soup_prompts" - with open(src_json, "r", encoding="utf-8") as f: + with open(src_json, encoding="utf-8") as f: prompts = json.load(f) categories = [] for c in prompts.keys(): @@ -65,7 +65,7 @@ def make_txts(): renamed_c = category_renames.get(c, c) with gzip.open(f"{dst_folder}/{renamed_c}.txt.gz", "wb") as f: for p in filtered_phrases: - f.write(f"{p}\n".encode("utf-8")) + f.write(f"{p}\n".encode()) if __name__ == "__main__": diff --git a/setup.py b/setup.py index cbcd66a..aaf4b71 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,6 @@ from setuptools import find_packages, setup -with open("README.md", "r", encoding="utf-8") as f: +with open("README.md", encoding="utf-8") as f: readme = f.read() setup( diff --git a/tests/test_api.py b/tests/test_api.py index 5b30804..7f0dbba 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -43,7 +43,7 @@ compare_prompts = [ "model_version", ["SD-1.4", "SD-1.5", "SD-2.0", "SD-2.0-v", "SD-2.1", "SD-2.1-v"] ) def test_model_versions(filename_base_for_orig_outputs, model_version): - """Test that we can switch between model versions""" + """Test that we can switch between model versions.""" prompts = [] for prompt_text in compare_prompts: prompts.append( @@ -172,19 +172,19 @@ def test_img_to_img_fruit_2_gold_repeat(): img = LazyLoadingImage(filepath=f"{TESTS_FOLDER}/data/bowl_of_fruit.jpg") run_count = 1 - kwargs = dict( - prompt="a white bowl filled with gold coins. sharp focus", - prompt_strength=12, - init_image=img, - init_image_strength=0.2, - mask_prompt="(fruit OR stem{*5} OR fruit stem)", - mask_mode="replace", - steps=20, - seed=946188797, - sampler_type="plms", - fix_faces=True, - upscale=True, - ) + kwargs = { + "prompt": "a white bowl filled with gold coins. sharp focus", + "prompt_strength": 12, + "init_image": img, + "init_image_strength": 0.2, + "mask_prompt": "(fruit OR stem{*5} OR fruit stem)", + "mask_mode": "replace", + "steps": 20, + "seed": 946188797, + "sampler_type": "plms", + "fix_faces": True, + "upscale": True, + } prompts = [ ImaginePrompt(**kwargs), ImaginePrompt(**kwargs), diff --git a/tox.ini b/tox.ini index 3d75c61..6d73d07 100644 --- a/tox.ini +++ b/tox.ini @@ -13,7 +13,7 @@ linters = pylint,pycodestyle,pydocstyle,pyflakes,mypy ignore = Z999,C0103,C0301,C0302,C0114,C0115,C0116, Z999,D100,D101,D102,D103,D105,D106,D107,D200,D202,D203,D205,D212,D400,D401,D406,D407,D413,D415,D417, - Z999,E203,E501,E1101, + Z999,E203,E501,E1101,E1131, Z999,R0901,R0902,R0903,R0904,R0193,R0912,R0913,R0914,R0915,R1702, Z999,W0221,W0511,W0612,W0613,W1203