style: fix all the mypy typing issues

...or ignore them.
pull/411/head^2
Bryce 6 months ago committed by Bryce Drennan
parent 5a636e45c5
commit 012cc648d3

@ -1,12 +1,12 @@
import logging
import os
import re
from typing import TYPE_CHECKING, Callable
from typing import TYPE_CHECKING, Any, Callable
from imaginairy.utils.named_resolutions import normalize_image_size
if TYPE_CHECKING:
from imaginairy.schema import ImaginePrompt
from imaginairy.schema import ImaginePrompt, LazyLoadingImage
logger = logging.getLogger(__name__)
@ -335,20 +335,24 @@ def _generate_single_image_compvis(
]
SolverCls = SOLVER_LOOKUP[prompt.solver_type.lower()]
solver = SolverCls(model)
mask_latent = mask_image = mask_image_orig = mask_grayscale = None
t_enc = init_latent = control_image = None
mask_image: Image.Image | LazyLoadingImage | None = None
mask_latent = mask_image_orig = mask_grayscale = None
init_latent: torch.Tensor | None = None
t_enc = None
starting_image = None
denoiser_cls = None
c_cat = []
c_cat_neutral = None
result_images = {}
result_images: dict[str, torch.Tensor | Image.Image | None] = {}
assert prompt.seed is not None
seed_everything(prompt.seed)
noise = randn_seeded(seed=prompt.seed, size=shape).to(get_device())
control_strengths = []
if prompt.init_image:
starting_image = prompt.init_image
assert prompt.init_image_strength is not None
generation_strength = 1 - prompt.init_image_strength
if model.cond_stage_key == "edit" or generation_strength >= 1:
@ -367,18 +371,18 @@ def _generate_single_image_compvis(
starting_image, mask_image = prepare_image_for_outpaint(
starting_image, mask_image, **outpaint_kwargs
)
assert starting_image is not None
init_image = pillow_fit_image_within(
starting_image,
max_height=prompt.height,
max_width=prompt.width,
)
init_image_t = pillow_img_to_torch_image(init_image)
init_image_t = init_image_t.to(get_device())
init_image_t = pillow_img_to_torch_image(init_image).to(get_device())
init_latent = model.get_first_stage_encoding(
model.encode_first_stage(init_image_t)
)
shape = init_latent.shape
assert init_latent is not None
shape = list(init_latent.shape)
log_latent(init_latent, "init_latent")
@ -405,9 +409,9 @@ def _generate_single_image_compvis(
control_inputs.append(
ControlInput(mode="inpaint", image=mask_image)
)
assert prompt.seed is not None
seed_everything(prompt.seed)
noise = randn_seeded(seed=prompt.seed, size=init_latent.shape).to(
noise = randn_seeded(seed=prompt.seed, size=list(init_latent.shape)).to(
get_device()
)
# noise = noise[:, :, : init_latent.shape[2], : init_latent.shape[3]]
@ -451,8 +455,13 @@ def _generate_single_image_compvis(
control_image = control_input.image_raw
elif control_input.image is not None:
control_image = control_input.image
else:
raise RuntimeError("Control image must be provided")
assert control_image is not None
control_image = control_image.convert("RGB")
log_img(control_image, "control_image_input")
assert control_image is not None
control_image_input = pillow_fit_image_within(
control_image,
max_height=prompt.height,
@ -464,11 +473,11 @@ def _generate_single_image_compvis(
if control_input.image_raw is None:
control_prep_function = CONTROL_MODES[control_input.mode]
if control_input.mode == "inpaint":
control_image_t = control_prep_function(
control_image_t = control_prep_function( # type: ignore
control_image_input_t, init_image_t
)
else:
control_image_t = control_prep_function(control_image_input_t)
control_image_t = control_prep_function(control_image_input_t) # type: ignore
else:
control_image_t = (control_image_input_t + 1) / 2
@ -499,6 +508,8 @@ def _generate_single_image_compvis(
elif hasattr(model, "masked_image_key"):
# inpainting model
assert mask_image_orig is not None
assert mask_latent is not None
mask_t = pillow_img_to_torch_image(ImageOps.invert(mask_image_orig)).to(
get_device()
)
@ -519,6 +530,7 @@ def _generate_single_image_compvis(
elif model.cond_stage_key == "edit":
# pix2pix model
c_cat = [model.encode_first_stage(init_image_t)]
assert init_latent is not None
c_cat_neutral = [torch.zeros_like(init_latent)]
denoiser_cls = CFGEditingDenoiser
if c_cat:
@ -527,18 +539,24 @@ def _generate_single_image_compvis(
if c_cat_neutral is None:
c_cat_neutral = c_cat
positive_conditioning = {
positive_conditioning_d: dict[str, Any] = {
"c_concat": c_cat,
"c_crossattn": [positive_conditioning],
}
neutral_conditioning = {
neutral_conditioning_d: dict[str, Any] = {
"c_concat": c_cat_neutral,
"c_crossattn": [neutral_conditioning],
}
del neutral_conditioning
del positive_conditioning
if control_strengths and is_controlnet_model:
positive_conditioning["control_strengths"] = torch.Tensor(control_strengths)
neutral_conditioning["control_strengths"] = torch.Tensor(control_strengths)
positive_conditioning_d["control_strengths"] = torch.Tensor(
control_strengths
)
neutral_conditioning_d["control_strengths"] = torch.Tensor(
control_strengths
)
if (
prompt.allow_compose_phase
@ -575,8 +593,8 @@ def _generate_single_image_compvis(
with lc.timing("sampling"):
samples = solver.sample(
num_steps=prompt.steps,
positive_conditioning=positive_conditioning,
neutral_conditioning=neutral_conditioning,
positive_conditioning=positive_conditioning_d,
neutral_conditioning=neutral_conditioning_d,
guidance_scale=prompt.prompt_strength,
t_start=t_enc,
mask=mask_latent,

@ -19,7 +19,7 @@ def _generate_single_image(
half_mode=None,
):
import torch.nn
from PIL import ImageOps
from PIL import Image, ImageOps
from pytorch_lightning import seed_everything
from refiners.foundationals.latent_diffusion.schedulers import DDIM, DPMSolver
from tqdm import tqdm
@ -51,7 +51,7 @@ def _generate_single_image(
from imaginairy.outpaint import outpaint_arg_str_parse, prepare_image_for_outpaint
from imaginairy.safety import create_safety_score
from imaginairy.samplers import SolverName
from imaginairy.schema import ImaginePrompt, ImagineResult
from imaginairy.schema import ImagineResult
from imaginairy.utils import get_device, randn_seeded
if dtype is None:
@ -75,7 +75,6 @@ def _generate_single_image(
mask_image = None
mask_image_orig = None
prompt: ImaginePrompt = prompt.make_concrete_copy()
def latent_logger(latents):
progress_latents.append(latents)
@ -101,8 +100,8 @@ def _generate_single_image(
)
clip_text_embedding = clip_text_embedding.to(device=sd.device, dtype=sd.dtype)
result_images = {}
progress_latents = []
result_images: dict[str, torch.Tensor | None | Image.Image] = {}
progress_latents: list[torch.Tensor] = []
first_step = 0
mask_grayscale = None
@ -125,7 +124,8 @@ def _generate_single_image(
if prompt.init_image:
starting_image = prompt.init_image
first_step = int((prompt.steps) * prompt.init_image_strength)
assert prompt.init_image_strength is not None
first_step = int(prompt.steps * prompt.init_image_strength)
# noise_step = int((prompt.steps - 1) * prompt.init_image_strength)
if prompt.mask_prompt:
@ -150,7 +150,7 @@ def _generate_single_image(
init_image_t = init_image_t.to(device=sd.device, dtype=sd.dtype)
init_latent = sd.lda.encode(init_image_t)
shape = init_latent.shape
shape = list(init_latent.shape)
log_latent(init_latent, "init_latent")
@ -179,6 +179,7 @@ def _generate_single_image(
)
seed_everything(prompt.seed)
assert prompt.seed is not None
noise = randn_seeded(seed=prompt.seed, size=shape).to(
get_device(), dtype=sd.dtype
@ -210,11 +211,11 @@ def _generate_single_image(
if control_input.image_raw is None:
control_prep_function = CONTROL_MODES[control_input.mode]
if control_input.mode == "inpaint":
control_image_t = control_prep_function(
control_image_t = control_prep_function( # type: ignore
control_image_input_t, init_image_t
)
else:
control_image_t = control_prep_function(control_image_input_t)
control_image_t = control_prep_function(control_image_input_t) # type: ignore
else:
control_image_t = (control_image_input_t + 1) / 2
@ -246,9 +247,9 @@ def _generate_single_image(
raise ValueError(msg)
from refiners.foundationals.latent_diffusion import SD1ControlnetAdapter
controlnet = SD1ControlnetAdapter(
controlnet = SD1ControlnetAdapter( # type: ignore
name=control_input.mode,
target=sd.unet,
target=sd.unet, # type: ignore
weights_location=control_config.weights_location,
)
controlnet.set_scale(control_input.strength)

@ -9,6 +9,7 @@ from torchvision import transforms
from imaginairy.img_utils import pillow_fit_image_within
from imaginairy.log_utils import log_img
from imaginairy.schema import LazyLoadingImage
from imaginairy.vendored.clipseg import CLIPDensePredT
weights_url = "https://github.com/timojl/clipseg/raw/master/weights/rd64-uni.pth"
@ -32,7 +33,7 @@ def clip_mask_model():
def get_img_mask(
img: PIL.Image.Image,
img: PIL.Image.Image | LazyLoadingImage,
mask_description_statement: str,
threshold: Optional[float] = None,
):

@ -1,7 +1,12 @@
"""Functions to create hint images for controlnet."""
from typing import TYPE_CHECKING, Callable, Dict, Union
if TYPE_CHECKING:
import numpy as np
from torch import Tensor # noqa
def create_canny_edges(img):
def create_canny_edges(img: "Tensor") -> "Tensor":
import cv2
import numpy as np
import torch
@ -33,7 +38,7 @@ def create_canny_edges(img):
return canny_image
def create_depth_map(img):
def create_depth_map(img: "Tensor") -> "Tensor":
import torch
orig_size = img.shape[2:]
@ -56,7 +61,7 @@ def create_depth_map(img):
return depth_pt
def _create_depth_map_raw(img):
def _create_depth_map_raw(img: "Tensor") -> "Tensor":
import torch
from imaginairy.modules.midas.api import MiDaSInference, midas_device
@ -83,7 +88,7 @@ def _create_depth_map_raw(img):
return depth_pt
def create_normal_map(img):
def create_normal_map(img: "Tensor") -> "Tensor":
import torch
from imaginairy.vendored.imaginairy_normal_map.model import (
@ -97,7 +102,7 @@ def create_normal_map(img):
return normal_img_t
def create_hed_edges(img_t):
def create_hed_edges(img_t: "Tensor") -> "Tensor":
import torch
from imaginairy.img_processors.hed_boundary import create_hed_map
@ -120,7 +125,7 @@ def create_hed_edges(img_t):
return hint_t
def create_pose_map(img_t):
def create_pose_map(img_t: "Tensor"):
from imaginairy.img_processors.openpose import create_body_pose_img
from imaginairy.utils import get_device
@ -130,7 +135,7 @@ def create_pose_map(img_t):
return pose_t
def make_noise_disk(H, W, C, F):
def make_noise_disk(H: int, W: int, C: int, F: int) -> "np.ndarray":
import cv2
import numpy as np
@ -144,7 +149,7 @@ def make_noise_disk(H, W, C, F):
return noise
def shuffle_map_np(img, h=None, w=None, f=256):
def shuffle_map_np(img: "np.ndarray", h=None, w=None, f=256) -> "np.ndarray":
import cv2
import numpy as np
@ -160,7 +165,7 @@ def shuffle_map_np(img, h=None, w=None, f=256):
return cv2.remap(img, flow, None, cv2.INTER_LINEAR)
def shuffle_map_torch(tensor, h=None, w=None, f=256):
def shuffle_map_torch(tensor: "Tensor", h=None, w=None, f=256) -> "Tensor":
import torch
# Assuming the input tensor is in shape (B, C, H, W)
@ -187,7 +192,7 @@ def shuffle_map_torch(tensor, h=None, w=None, f=256):
return shuffled_tensor.to(device)
def inpaint_prep(mask_image_t, target_image_t):
def inpaint_prep(mask_image_t: "Tensor", target_image_t: "Tensor") -> "Tensor":
"""
Combines the masked image and target image into a single tensor.
@ -207,7 +212,7 @@ def inpaint_prep(mask_image_t, target_image_t):
return output_image_t
def to_grayscale(img):
def to_grayscale(img: "Tensor") -> "Tensor":
# The dimensions of input should be (batch_size, channels, height, width)
if img.dim() != 4:
raise ValueError("Input should be a 4d tensor")
@ -228,11 +233,13 @@ def to_grayscale(img):
return (gray_3_channels + 1.0) / 2.0
def noop(img):
def noop(img: "Tensor") -> "Tensor":
return (img + 1.0) / 2.0
CONTROL_MODES = {
FunctionType = Union["Callable[[Tensor, Tensor], Tensor]", "Callable[[Tensor], Tensor]"]
CONTROL_MODES: Dict[str, FunctionType] = {
"canny": create_canny_edges,
"depth": create_depth_map,
"normal": create_normal_map,

@ -23,8 +23,12 @@ from imaginairy.utils import get_device
def pillow_fit_image_within(
image: PIL.Image.Image, max_height=512, max_width=512, convert="RGB", snap_size=8
):
image: PIL.Image.Image | LazyLoadingImage,
max_height=512,
max_width=512,
convert="RGB",
snap_size=8,
) -> PIL.Image.Image:
image = image.convert(convert)
w, h = image.size
resize_ratio = 1
@ -45,7 +49,9 @@ def pillow_fit_image_within(
return image
def pillow_img_to_torch_image(img: PIL.Image.Image, convert="RGB"):
def pillow_img_to_torch_image(
img: PIL.Image.Image | LazyLoadingImage, convert="RGB"
) -> torch.Tensor:
if convert:
img = img.convert(convert)
img_np = np.array(img).astype(np.float32) / 255.0
@ -55,7 +61,9 @@ def pillow_img_to_torch_image(img: PIL.Image.Image, convert="RGB"):
return 2.0 * img_t - 1.0
def pillow_mask_to_latent_mask(mask_img: PIL.Image.Image, downsampling_factor):
def pillow_mask_to_latent_mask(
mask_img: PIL.Image.Image | LazyLoadingImage, downsampling_factor
) -> torch.Tensor:
mask_img = mask_img.resize(
(
mask_img.width // downsampling_factor,
@ -66,11 +74,11 @@ def pillow_mask_to_latent_mask(mask_img: PIL.Image.Image, downsampling_factor):
mask = np.array(mask_img).astype(np.float32) / 255.0
mask = mask[None, None]
mask = torch.from_numpy(mask)
return mask
mask_t = torch.from_numpy(mask)
return mask_t
def pillow_img_to_opencv_img(img: PIL.Image.Image):
def pillow_img_to_opencv_img(img: PIL.Image.Image | LazyLoadingImage):
open_cv_image = np.array(img)
# Convert RGB to BGR
open_cv_image = open_cv_image[:, :, ::-1].copy()
@ -90,7 +98,7 @@ def torch_image_to_openvcv_img(img: torch.Tensor) -> np.ndarray:
return img_np
def torch_img_to_pillow_img(img_t: torch.Tensor):
def torch_img_to_pillow_img(img_t: torch.Tensor) -> PIL.Image.Image:
img_t = img_t.to(torch.float32).detach().cpu()
if len(img_t.shape) == 3:
img_t = img_t.unsqueeze(0)
@ -129,7 +137,9 @@ def model_latents_to_pillow_imgs(latents: torch.Tensor) -> Sequence[PIL.Image.Im
return [model_latent_to_pillow_img(latent) for latent in latents]
def pillow_img_to_model_latent(model, img, batch_size=1, half=True):
def pillow_img_to_model_latent(
model, img: PIL.Image.Image | LazyLoadingImage, batch_size=1, half=True
):
init_image = pillow_img_to_torch_image(img).to(get_device())
init_image = repeat(init_image, "1 ... -> b ...", b=batch_size)
if half:
@ -152,14 +162,18 @@ def imgpaths_to_imgs(imgpaths):
def add_caption_to_image(
img, caption, font_size=16, font_path=f"{PKG_ROOT}/data/DejaVuSans.ttf"
img: PIL.Image.Image | LazyLoadingImage,
caption,
font_size=16,
font_path=f"{PKG_ROOT}/data/DejaVuSans.ttf",
):
draw = ImageDraw.Draw(img)
img_pil = img.as_pillow() if isinstance(img, LazyLoadingImage) else img
draw = ImageDraw.Draw(img_pil)
font = ImageFont.truetype(font_path, font_size)
x = 15
y = img.height - 15 - font_size
y = img_pil.height - 15 - font_size
draw.text(
(x, y),

@ -266,6 +266,7 @@ def _get_diffusion_model_refiners(
text_encoder_weights,
) = load_stable_diffusion_compvis_weights(weights_location)
StableDiffusionCls: type[LatentDiffusionModel]
if for_inpainting:
unet = SD1UNet(in_channels=9)
StableDiffusionCls = StableDiffusion_1_Inpainting
@ -390,9 +391,9 @@ def resolve_model_weights_config(
return model_weights_config
def get_model_default_image_size(model_architecture: str | ModelArchitecture):
def get_model_default_image_size(model_architecture: str | ModelArchitecture | None):
if isinstance(model_architecture, str):
model_architecture = iconfig.MODEL_WEIGHT_CONFIG_LOOKUP.get(
model_architecture = iconfig.MODEL_ARCHITECTURE_LOOKUP.get(
model_architecture, None
)
default_size = None

@ -2,7 +2,7 @@ import torch
from torch import nn
from imaginairy.modules.attention import SpatialTransformer
from imaginairy.modules.diffusion.ddpm import LatentDiffusion
from imaginairy.modules.diffusion.ddpm import LatentDiffusion # type: ignore
from imaginairy.modules.diffusion.openaimodel import (
AttentionBlock,
Downsample,

@ -890,7 +890,7 @@ class LatentDiffusion(DDPM):
denoise_grid = make_grid(denoise_grid, nrow=n_imgs_per_row)
return denoise_grid
def get_first_stage_encoding(self, encoder_posterior):
def get_first_stage_encoding(self, encoder_posterior) -> torch.Tensor:
if isinstance(encoder_posterior, DiagonalGaussianDistribution):
z = encoder_posterior.mode()
elif isinstance(encoder_posterior, torch.Tensor):

@ -1,7 +1,6 @@
# pytorch_diffusion + derived encoder decoder
import gc
import math
from typing import Any, Optional
import numpy as np
import torch
@ -300,7 +299,7 @@ class MemoryEfficientAttnBlock(nn.Module):
self.proj_out = torch.nn.Conv2d(
in_channels, in_channels, kernel_size=1, stride=1, padding=0
)
self.attention_op: Optional[Any] = None
self.attention_op = None
def forward(self, x):
h_ = x

@ -62,13 +62,13 @@ class TileModeMixin(nn.Module):
if isinstance(m, nn.Conv2d):
if not hasattr(m, "_orig_conv_forward"):
# patch with a function that can handle tiling in a single direction
m._initial_padding_mode = m.padding_mode
m._orig_conv_forward = m._conv_forward
m._conv_forward = _tile_mode_conv2d_conv_forward.__get__(
m._initial_padding_mode = m.padding_mode # type: ignore
m._orig_conv_forward = m._conv_forward # type: ignore
m._conv_forward = _tile_mode_conv2d_conv_forward.__get__( # type: ignore
m, nn.Conv2d
)
m.padding_modeX = "circular" if tile_x else "constant"
m.padding_modeY = "circular" if tile_y else "constant"
m.padding_modeX = "circular" if tile_x else "constant" # type: ignore
m.padding_modeY = "circular" if tile_y else "constant" # type: ignore
if m.padding_modeY == m.padding_modeX:
m.padding_mode = m.padding_modeX
m.paddingX = (
@ -76,13 +76,13 @@ class TileModeMixin(nn.Module):
m._reversed_padding_repeated_twice[1],
0,
0,
)
) # type: ignore
m.paddingY = (
0,
0,
m._reversed_padding_repeated_twice[2],
m._reversed_padding_repeated_twice[3],
)
) # type: ignore
class StableDiffusion_1(TileModeMixin, RefinerStableDiffusion_1):
@ -291,7 +291,9 @@ def monkeypatch_sd1controlnetadapter():
dtype=target.dtype,
)
self._controlnet: list[Controlnet] = [controlnet] # not registered by PyTorch
self._controlnet: list[Controlnet] = [ # type: ignore
controlnet
] # not registered by PyTorch
with self.setup_adapter(target):
super(SD1ControlnetAdapter, self).__init__(target)

@ -21,7 +21,7 @@ class SafetyResult:
self.special_care_scores = {}
self.is_filtered = False
def add_special_care_score(self, concept_idx, abs_score, threshold):
def add_special_care_score(self, concept_idx: int, abs_score, threshold):
adjustment = self._default_adjustment
adjusted_score = round(abs_score - threshold + adjustment, 3)
try:
@ -138,8 +138,8 @@ def monkeypatch_safety_cosine_distance():
safety_checker_mod.cosine_distance = cosine_distance_float32
_CONCEPT_DESCRIPTIONS = []
_SPECIAL_CARE_DESCRIPTIONS = []
_CONCEPT_DESCRIPTIONS: list[str] = []
_SPECIAL_CARE_DESCRIPTIONS: list[str] = []
def create_safety_score(img, safety_mode=SafetyMode.STRICT):

@ -1,5 +1,6 @@
# pylama:ignore=W0613
from abc import ABC
from typing import Callable
import torch
from torch import nn
@ -58,7 +59,7 @@ def sample_dpm_fast(model, x, sigmas, extra_args=None, disable=False, callback=N
class KDiffusionSolver(ImageSolver, ABC):
sampler_func: callable
sampler_func: Callable
def __init__(self, model):
super().__init__(model)

@ -279,7 +279,7 @@ class ImaginePrompt(BaseModel, protected_namespaces=()):
mask_mode: MaskMode = MaskMode.REPLACE
mask_modify_original: bool = True
outpaint: str | None = ""
model_weights: config.ModelWeightsConfig = Field(
model_weights: config.ModelWeightsConfig = Field( # type: ignore
default=config.DEFAULT_MODEL_WEIGHTS, validate_default=True
)
solver_type: str = Field(default=config.DEFAULT_SOLVER, validate_default=True)
@ -504,7 +504,7 @@ class ImaginePrompt(BaseModel, protected_namespaces=()):
model_weights = config.DEFAULT_MODEL_WEIGHTS
from imaginairy.model_manager import resolve_model_weights_config
should_use_inpainting = (
should_use_inpainting = bool(
data.get("mask_image") or data.get("mask_prompt") or data.get("outpaint")
)
should_use_inpainting_weights = (

@ -19,7 +19,7 @@ try:
except ImportError:
# let's not break all of imaginairy just because a training import doesn't exist in an older version of PL
# Use >= 1.6.0 to make this work
DDPStrategy = None
DDPStrategy = None # type: ignore
import contextlib
from pytorch_lightning.trainer import Trainer

@ -6,7 +6,7 @@ import re
import time
from glob import glob
from pathlib import Path
from typing import Optional
from typing import Any, Optional
import cv2
import numpy as np
@ -31,9 +31,10 @@ logger = logging.getLogger(__name__)
def generate_video(
input_path: str = "other/images/sound-music.jpg", # Can either be image file or folder with image files
num_frames: Optional[int] = None,
num_steps: Optional[int] = None,
input_path: str, # Can either be image file or folder with image files
output_folder: str | None = None,
num_frames: int = 6,
num_steps: int = 30,
model_name: str = "svd_xt",
fps_id: int = 6,
output_fps: int = 6,
@ -42,7 +43,6 @@ def generate_video(
seed: Optional[int] = None,
decoding_t: int = 1, # Number of frames decoded at a time! This eats most VRAM. Reduce if necessary.
device: Optional[str] = None,
output_folder: Optional[str] = None,
repetitions=1,
):
"""
@ -77,7 +77,8 @@ def generate_video(
num_frames = default(num_frames, video_model_config.defaults.get("frames", 12))
num_steps = default(num_steps, video_model_config.defaults.get("steps", 30))
output_folder = default(output_folder, "outputs/video/")
output_folder_str = default(output_folder, "outputs/video/")
del output_folder
video_config_path = f"{PKG_ROOT}/{video_model_config.architecture.config_path}"
logger.info(
@ -119,11 +120,10 @@ def generate_video(
for _ in range(repetitions):
for input_path in all_img_paths:
if input_path.startswith("http"):
image = LazyLoadingImage(url=input_path)
image = LazyLoadingImage(url=input_path).as_pillow()
else:
image = LazyLoadingImage(filepath=input_path)
image = LazyLoadingImage(filepath=input_path).as_pillow()
crop_coords = None
image = image.as_pillow()
if image.mode == "RGBA":
image = image.convert("RGB")
if image.size != expected_size:
@ -180,7 +180,7 @@ def generate_video(
"Large fps value! This may lead to suboptimal performance."
)
value_dict = {}
value_dict: dict[str, Any] = {}
value_dict["motion_bucket_id"] = motion_bucket_id
value_dict["fps_id"] = fps_id
value_dict["cond_aug"] = cond_aug
@ -250,14 +250,14 @@ def generate_video(
left, upper, right, lower = crop_coords
samples = samples[:, :, upper:lower, left:right]
os.makedirs(output_folder, exist_ok=True)
base_count = len(glob(os.path.join(output_folder, "*.mp4"))) + 1
os.makedirs(output_folder_str, exist_ok=True)
base_count = len(glob(os.path.join(output_folder_str, "*.mp4"))) + 1
source_slug = make_safe_filename(input_path)
video_filename = f"{base_count:06d}_{model_name}_{seed}_{fps_id}fps_{source_slug}.mp4"
video_path = os.path.join(output_folder, video_filename)
video_path = os.path.join(output_folder_str, video_filename)
writer = cv2.VideoWriter(
video_path,
cv2.VideoWriter_fourcc(*"MP4V"),
cv2.VideoWriter_fourcc(*"MP4V"), # type: ignore
output_fps,
(samples.shape[-1], samples.shape[-2]),
)
@ -332,7 +332,7 @@ def load_model(
):
oconfig = OmegaConf.load(config)
ckpt_path = get_cached_url_path(weights_url)
oconfig["model"]["params"]["ckpt_path"] = ckpt_path
oconfig["model"]["params"]["ckpt_path"] = ckpt_path # type: ignore
if device == "cuda":
oconfig.model.params.conditioner_config.params.emb_models[
0
@ -407,13 +407,3 @@ def make_safe_filename(input_string):
safe_name = re.sub(r"[^a-zA-Z0-9\-]", "", name_without_extension)
return safe_name
if __name__ == "__main__":
# configure logging
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)s %(name)s: %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
generate_video()

@ -6,7 +6,7 @@ from typing import TYPE_CHECKING
from imaginairy.weight_management import utils
if TYPE_CHECKING:
from torch import Tensor
from torch import Tensor # noqa
@dataclass
@ -69,8 +69,8 @@ class WeightMap:
source_keys = set(source_weights.keys())
return source_keys.issubset(self.all_valid_prefixes)
def cast_weights(self, source_weights):
converted_state_dict: dict[str, Tensor] = {}
def cast_weights(self, source_weights) -> dict[str, "Tensor"]:
converted_state_dict: dict[str, "Tensor"] = {}
for source_key in source_weights:
source_prefix, suffix = source_key.rsplit(sep=".", maxsplit=1)
# handle aliases
@ -96,7 +96,7 @@ def load_state_dict_conversion_maps() -> dict[str, dict]:
from importlib.resources import files
for file in files("imaginairy").joinpath("weight_conversion/maps").iterdir():
if file.is_file() and file.suffix == ".json":
if file.is_file() and file.suffix == ".json": # type: ignore
conversion_maps[file.name] = json.loads(file.read_text())
return conversion_maps

@ -8,7 +8,7 @@ is_for_windows = len(sys.argv) >= 3 and sys.argv[2].startswith("--plat-name=win"
if is_for_windows:
scripts = None
entry_points = {
entry_points: dict | None = {
"console_scripts": [
"imagine=imaginairy.cli.main:imagine_cmd",
"aimg=imaginairy.cli.main:aimg",

Loading…
Cancel
Save