feature: add city96 latent upscaler

bd/refactor-upscalers
Bryce 5 months ago
parent 1bf53e47cf
commit 9ff49bc74e

@ -30,7 +30,7 @@ def _generate_single_image(
from imaginairy.enhancers.clip_masking import get_img_mask
from imaginairy.enhancers.describe_image_blip import generate_caption
from imaginairy.enhancers.face_restoration_codeformer import enhance_faces
from imaginairy.enhancers.upscale_realesrgan import upscale_image
from imaginairy.enhancers.upscalers.realesrgan import upscale_image
from imaginairy.modules.midas.api import torch_image_to_depth_map
from imaginairy.samplers import SOLVER_LOOKUP
from imaginairy.samplers.editing import CFGEditingDenoiser
@ -534,7 +534,7 @@ def _generate_composition_image(
result = _generate_single_image(composition_prompt, dtype=dtype)
img = result.images["generated"]
while img.width < target_width:
from imaginairy.enhancers.upscale_realesrgan import upscale_image
from imaginairy.enhancers.upscalers.realesrgan import upscale_image
img = upscale_image(img)

@ -35,7 +35,7 @@ def generate_single_image(
from imaginairy.enhancers.clip_masking import get_img_mask
from imaginairy.enhancers.describe_image_blip import generate_caption
from imaginairy.enhancers.face_restoration_codeformer import enhance_faces
from imaginairy.enhancers.upscale_realesrgan import upscale_image
from imaginairy.enhancers.upscalers.realesrgan import upscale_image
from imaginairy.samplers import SolverName
from imaginairy.schema import ImagineResult
from imaginairy.utils import get_device, randn_seeded
@ -587,7 +587,7 @@ def _generate_composition_image(
)
img = result.images["generated"]
while img.width < target_width:
from imaginairy.enhancers.upscale_realesrgan import upscale_image
from imaginairy.enhancers.upscalers.realesrgan import upscale_image
if prompt.fix_faces:
from imaginairy.enhancers.face_restoration_codeformer import enhance_faces

@ -32,7 +32,7 @@ def upscale_cmd(image_filepaths, outdir, fix_faces, fix_faces_fidelity):
from tqdm import tqdm
from imaginairy.enhancers.face_restoration_codeformer import enhance_faces
from imaginairy.enhancers.upscale_realesrgan import upscale_image
from imaginairy.enhancers.upscalers.realesrgan import upscale_image
from imaginairy.schema import LazyLoadingImage
from imaginairy.utils import glob_expand_paths

@ -0,0 +1,81 @@
from typing import Literal
import torch
import torch.nn as nn
from safetensors.torch import load_file
from imaginairy.utils.downloads import get_cached_url_path
LatentVerType = Literal["v1", "xl"]
ScaleFactorType = Literal["1.25", "1.5", "2.0"]
class Upscaler(nn.Module):
"""
Basic NN layout, ported from:
https://github.com/city96/SD-Latent-Upscaler/blob/main/upscaler.py
"""
version = 2.1 # network revision
def head(self):
return [
nn.Conv2d(self.chan, self.size, kernel_size=self.krn, padding=self.pad),
nn.ReLU(),
nn.Upsample(scale_factor=self.fac, mode="nearest"),
nn.ReLU(),
]
def core(self):
layers = []
for _ in range(self.depth):
layers += [
nn.Conv2d(self.size, self.size, kernel_size=self.krn, padding=self.pad),
nn.ReLU(),
]
return layers
def tail(self):
return [
nn.Conv2d(self.size, self.chan, kernel_size=self.krn, padding=self.pad),
]
def __init__(self, fac, depth=16):
super().__init__()
self.size = 64 # Conv2d size
self.chan = 4 # in/out channels
self.depth = depth # no. of layers
self.fac = fac # scale factor
self.krn = 3 # kernel size
self.pad = 1 # padding
self.sequential = nn.Sequential(
*self.head(),
*self.core(),
*self.tail(),
)
def forward(self, x: torch.Tensor) -> torch.Tensor:
return self.sequential(x)
def upscale_latent(
latent: torch.Tensor, latent_ver: LatentVerType, scale_factor: ScaleFactorType
):
model = Upscaler(scale_factor)
orig_dtype, orig_device = latent.dtype, latent.device
latent = latent.to(dtype=torch.float32, device="cpu")
latent = latent / 0.13025
filename = (
f"latent-upscaler-v{model.version}_SD{latent_ver}-x{scale_factor}.safetensors"
)
weights_url = f"https://huggingface.co/city96/SD-Latent-Upscaler/resolve/99c65021fa947dfe3d71ec4e24793fe7533a3322/{filename}"
weights_path = get_cached_url_path(weights_url)
model.load_state_dict(load_file(weights_path), assign=True)
big_latent = model(latent)
big_latent = big_latent.to(dtype=orig_dtype, device=orig_device)
del model
big_latent = big_latent * 0.13025
return big_latent

@ -144,12 +144,12 @@ def test_edit_demo(monkeypatch):
def test_upscale(monkeypatch):
from imaginairy.enhancers import upscale_realesrgan
from imaginairy.enhancers.upscalers import realesrgan
def mock_upscale_image(*args, **kwargs):
return LazyLoadingImage(filepath=f"{TESTS_FOLDER}/data/dog.jpg")
monkeypatch.setattr(upscale_realesrgan, "upscale_image", mock_upscale_image)
monkeypatch.setattr(realesrgan, "upscale_image", mock_upscale_image)
runner = CliRunner()
result = runner.invoke(
upscale_cmd,

@ -1,6 +1,6 @@
from PIL import Image
from imaginairy.enhancers.upscale_realesrgan import upscale_image
from imaginairy.enhancers.upscalers.realesrgan import upscale_image
from tests import TESTS_FOLDER
from tests.utils import assert_image_similar_to_expectation

@ -3,7 +3,7 @@ import pytest
from PIL import Image
from torch.nn.functional import interpolate
from imaginairy.enhancers.upscale_riverwing import upscale_latent
from imaginairy.enhancers.upscalers.riverwing import upscale_latent
from imaginairy.schema import LazyLoadingImage
from imaginairy.utils import get_device
from imaginairy.utils.img_utils import (

Loading…
Cancel
Save