test: add some autoencoder tests
the fold-unfold encoding/decoding looks like it's slower and has worse seams than the sliced feathering approachpull/264/head
parent
1ceb17c083
commit
1563e0b871
Binary file not shown.
After Width: | Height: | Size: 132 KiB |
Binary file not shown.
After Width: | Height: | Size: 23 KiB |
@ -0,0 +1,146 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
from PIL import Image
|
||||
from torch.nn.functional import interpolate
|
||||
|
||||
from imaginairy import LazyLoadingImage
|
||||
from imaginairy.enhancers.upscale_riverwing import upscale_latent
|
||||
from imaginairy.img_utils import (
|
||||
pillow_fit_image_within,
|
||||
pillow_img_to_torch_image,
|
||||
torch_img_to_pillow_img,
|
||||
)
|
||||
from imaginairy.model_manager import get_diffusion_model
|
||||
from imaginairy.utils import get_device
|
||||
from tests import TESTS_FOLDER
|
||||
|
||||
strat_combos = [
|
||||
("sliced", "sliced"),
|
||||
("sliced", "all_at_once"),
|
||||
("folds", "folds"),
|
||||
("folds", "all_at_once"),
|
||||
("all_at_once", "all_at_once"),
|
||||
("all_at_once", "sliced"),
|
||||
("all_at_once", "folds"),
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.skipif(get_device() == "cpu", reason="Too slow to run on CPU")
|
||||
@pytest.mark.parametrize("encode_strat,decode_strat", strat_combos)
|
||||
def test_encode_decode(filename_base_for_outputs, encode_strat, decode_strat):
|
||||
"""Test that encoding and decoding works."""
|
||||
model = get_diffusion_model()
|
||||
img = LazyLoadingImage(filepath=f"{TESTS_FOLDER}/data/beach_at_sainte_adresse.jpg")
|
||||
img = pillow_fit_image_within(img, max_height=img.height, max_width=img.width)
|
||||
img.save(f"{filename_base_for_outputs}_orig.png")
|
||||
img_t = pillow_img_to_torch_image(img).to(get_device())
|
||||
if encode_strat == "all_at_once":
|
||||
latent = model.first_stage_model.encode_all_at_once(img_t) * model.scale_factor
|
||||
elif encode_strat == "folds":
|
||||
latent = model.first_stage_model.encode_with_folds(img_t) * model.scale_factor
|
||||
else:
|
||||
latent = model.first_stage_model.encode_sliced(img_t) * model.scale_factor
|
||||
|
||||
if decode_strat == "all_at_once":
|
||||
decoded_img_t = model.first_stage_model.decode_all_at_once(
|
||||
latent / model.scale_factor
|
||||
)
|
||||
elif decode_strat == "folds":
|
||||
decoded_img_t = model.first_stage_model.decode_with_folds(
|
||||
latent / model.scale_factor
|
||||
)
|
||||
else:
|
||||
decoded_img_t = model.first_stage_model.decode_sliced(
|
||||
latent / model.scale_factor
|
||||
)
|
||||
decoded_img_t = interpolate(decoded_img_t, img_t.shape[-2:])
|
||||
decoded_img = torch_img_to_pillow_img(decoded_img_t)
|
||||
decoded_img.save(f"{filename_base_for_outputs}.png")
|
||||
diff_img = Image.fromarray(np.asarray(img) - np.asarray(decoded_img))
|
||||
diff_img.save(f"{filename_base_for_outputs}_diff.png")
|
||||
|
||||
|
||||
@pytest.mark.skip()
|
||||
def test_encode_decode_naive_scale(filename_base_for_outputs):
|
||||
model = get_diffusion_model()
|
||||
img = LazyLoadingImage(filepath=f"{TESTS_FOLDER}/data/dog.jpg")
|
||||
img = pillow_fit_image_within(img, max_height=img.height, max_width=img.width)
|
||||
img.save(f"{filename_base_for_outputs}_orig.png")
|
||||
img_t = pillow_img_to_torch_image(img).to(get_device())
|
||||
latent = model.first_stage_model.encode_sliced(img_t) * model.scale_factor
|
||||
latent = interpolate(latent, scale_factor=2)
|
||||
|
||||
decoded_img_t = model.first_stage_model.decode_sliced(latent / model.scale_factor)
|
||||
decoded_img = torch_img_to_pillow_img(decoded_img_t)
|
||||
decoded_img.save(f"{filename_base_for_outputs}.png")
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="experimental")
|
||||
def test_upscale_methods(filename_base_for_outputs, steps):
|
||||
"""
|
||||
compare upscale methods.
|
||||
"""
|
||||
steps = 25
|
||||
model = get_diffusion_model()
|
||||
roi_pcts = (0.7, 0.1, 0.9, 0.3)
|
||||
|
||||
def crop_pct(img, roi_pcts):
|
||||
w, h = img.size
|
||||
roi = (
|
||||
int(w * roi_pcts[0]),
|
||||
int(h * roi_pcts[1]),
|
||||
int(w * roi_pcts[2]),
|
||||
int(h * roi_pcts[3]),
|
||||
)
|
||||
return img.crop(roi)
|
||||
|
||||
def decode(latent):
|
||||
t = model.first_stage_model.decode_sliced(latent / model.scale_factor)
|
||||
return torch_img_to_pillow_img(t)
|
||||
|
||||
img = LazyLoadingImage(
|
||||
filepath=f"{TESTS_FOLDER}/data/010853_1_kdpmpp2m30_PS7.5_portrait_photo_of_a_freckled_woman_[generated].jpg"
|
||||
)
|
||||
img = pillow_fit_image_within(img, max_height=img.height, max_width=img.width)
|
||||
img = crop_pct(img, roi_pcts)
|
||||
|
||||
upscaled = []
|
||||
sampling_methods = [
|
||||
("nearest", Image.Resampling.NEAREST),
|
||||
("bilinear", Image.Resampling.BILINEAR),
|
||||
("bicubic", Image.Resampling.BICUBIC),
|
||||
("lanczos", Image.Resampling.LANCZOS),
|
||||
]
|
||||
for method_name, sample_method in sampling_methods:
|
||||
upscaled.append(
|
||||
(
|
||||
img.resize((img.width * 4, img.height * 4), resample=sample_method),
|
||||
f"{method_name}",
|
||||
)
|
||||
)
|
||||
|
||||
img_t = pillow_img_to_torch_image(img).to(get_device())
|
||||
latent = model.first_stage_model.encode_sliced(img_t) * model.scale_factor
|
||||
|
||||
sharp_latent = upscale_latent(
|
||||
latent, steps=steps, upscale_prompt="high detail, sharp focus, 4k"
|
||||
)
|
||||
sharp_latent = upscale_latent(
|
||||
sharp_latent, steps=steps, upscale_prompt="high detail, sharp focus, 4k"
|
||||
)
|
||||
upscaled.append((decode(sharp_latent), "riverwing-upscaler-sharp"))
|
||||
|
||||
blurry_latent = upscale_latent(
|
||||
latent, steps=steps, upscale_prompt="blurry, low detail, 360p"
|
||||
)
|
||||
blurry_latent = upscale_latent(
|
||||
blurry_latent, steps=steps, upscale_prompt="blurry, low detail, 360p"
|
||||
)
|
||||
upscaled.append((decode(blurry_latent), "riverwing-upscaler-blurry"))
|
||||
|
||||
# upscaled.append((decode(latent).resize(), "original"))
|
||||
|
||||
for img, name in upscaled:
|
||||
img.resize((img.width, img.height), resample=Image.NEAREST).save(
|
||||
f"{filename_base_for_outputs}_{name}.jpg"
|
||||
)
|
Loading…
Reference in New Issue