diff --git a/imaginairy/api/generate.py b/imaginairy/api/generate.py index 2f260a6..d938557 100755 --- a/imaginairy/api/generate.py +++ b/imaginairy/api/generate.py @@ -2,9 +2,10 @@ import logging import os -from typing import Callable +from typing import TYPE_CHECKING, Callable -from imaginairy.utils import prompt_normalized +if TYPE_CHECKING: + from imaginairy.schema import ImaginePrompt logger = logging.getLogger(__name__) @@ -36,7 +37,7 @@ def imagine_image_files( from PIL import ImageDraw from imaginairy.api.video_sample import generate_video - from imaginairy.utils import get_next_filenumber + from imaginairy.utils import get_next_filenumber, prompt_normalized from imaginairy.utils.animations import make_bounce_animation from imaginairy.utils.img_utils import pillow_fit_image_within @@ -78,6 +79,7 @@ def imagine_image_files( img_str = "" if prompt.init_image: img_str = f"_img2img-{prompt.init_image_strength}" + basefilename = ( f"{base_count:06}_{prompt.seed}_{prompt.solver_type.replace('_', '')}{prompt.steps}_" f"PS{prompt.prompt_strength}{img_str}_{prompt_normalized(prompt.prompt_text)}" diff --git a/imaginairy/api/generate_compvis.py b/imaginairy/api/generate_compvis.py index 70a03d4..06de995 100644 --- a/imaginairy/api/generate_compvis.py +++ b/imaginairy/api/generate_compvis.py @@ -1,14 +1,15 @@ -from typing import Any - -from imaginairy.api.generate import ( - IMAGINAIRY_SAFETY_MODE, - logger, -) -from imaginairy.api.generate_refiners import _generate_composition_image -from imaginairy.schema import ImaginePrompt, LazyLoadingImage +import logging +from typing import TYPE_CHECKING, Any + +from imaginairy.api.generate import IMAGINAIRY_SAFETY_MODE from imaginairy.utils.img_utils import calc_scale_to_fit_within, combine_image from imaginairy.utils.named_resolutions import normalize_image_size +if TYPE_CHECKING: + from imaginairy.schema import ImaginePrompt + +logger = logging.getLogger(__name__) + def _generate_single_image_compvis( prompt: "ImaginePrompt", @@ -33,7 +34,12 @@ def _generate_single_image_compvis( from imaginairy.modules.midas.api import torch_image_to_depth_map from imaginairy.samplers import SOLVER_LOOKUP from imaginairy.samplers.editing import CFGEditingDenoiser - from imaginairy.schema import ControlInput, ImagineResult, MaskMode + from imaginairy.schema import ( + ControlInput, + ImagineResult, + LazyLoadingImage, + MaskMode, + ) from imaginairy.utils import get_device, randn_seeded from imaginairy.utils.img_utils import ( add_caption_to_image, @@ -61,7 +67,7 @@ def _generate_single_image_compvis( latent_channels = 4 downsampling_factor = 8 batch_size = 1 - global _most_recent_result + # global _most_recent_result # handle prompt pulling in previous values # if isinstance(prompt.init_image, str) and prompt.init_image.startswith("*prev"): # _, img_type = prompt.init_image.strip("*").split(".") @@ -473,7 +479,7 @@ def _generate_single_image_compvis( progress_latents=progress_latents.copy(), ) - _most_recent_result = result + # _most_recent_result = result logger.info(f"Image Generated. Timings: {result.timings_str()}") return result diff --git a/imaginairy/api/generate_refiners.py b/imaginairy/api/generate_refiners.py index e1e1949..2924872 100644 --- a/imaginairy/api/generate_refiners.py +++ b/imaginairy/api/generate_refiners.py @@ -5,8 +5,6 @@ from typing import List, Optional from imaginairy.config import CONTROL_CONFIG_SHORTCUTS from imaginairy.schema import ControlInput, ImaginePrompt, MaskMode, WeightedPrompt -from imaginairy.utils.img_utils import calc_scale_to_fit_within -from imaginairy.utils.named_resolutions import normalize_image_size logger = logging.getLogger(__name__) @@ -226,6 +224,7 @@ def generate_single_image( comp_image, comp_img_orig = _generate_composition_image(**compose_kwargs) if comp_image is not None: + prompt.fix_faces = False # done in composition result_images["composition"] = comp_img_orig result_images["composition-upscaled"] = comp_image composition_strength = prompt.composition_strength @@ -535,8 +534,9 @@ def _generate_composition_image( ): from PIL import Image - from imaginairy.api.generate_refiners import generate_single_image from imaginairy.utils import default, get_default_dtype + from imaginairy.utils.img_utils import calc_scale_to_fit_within + from imaginairy.utils.named_resolutions import normalize_image_size cutoff = normalize_image_size(cutoff) if prompt.width <= cutoff[0] and prompt.height <= cutoff[1]: @@ -571,13 +571,12 @@ def _generate_composition_image( while img.width < target_width: from imaginairy.enhancers.upscale_realesrgan import upscale_image - img = upscale_image(img) + if prompt.fix_faces: + from imaginairy.enhancers.face_restoration_codeformer import enhance_faces - # samples = generate_single_image(composition_prompt, return_latent=True) - # while samples.shape[-1] * 8 < target_width: - # samples = upscale_latent(samples) - # - # img = model_latent_to_pillow_img(samples) + img = enhance_faces(img, fidelity=prompt.fix_faces_fidelity) + + img = upscale_image(img, ultrasharp=True) img = img.resize( (target_width, target_height), diff --git a/tests/data/cuda-tests.csv b/tests/data/cuda-tests.csv index 94254cd..66262db 100644 --- a/tests/data/cuda-tests.csv +++ b/tests/data/cuda-tests.csv @@ -48,6 +48,11 @@ tests/test_enhancers.py::test_clip_masking tests/test_enhancers.py::test_clip_text_comparison tests/test_enhancers.py::test_describe_picture tests/test_enhancers.py::test_fix_faces +tests/test_enhancers/test_clip_masking.py::test_clip_masking +tests/test_enhancers/test_describe_image_blip.py::test_describe_picture +tests/test_enhancers/test_describe_image_clip.py::test_clip_text_comparison +tests/test_enhancers/test_face_restoration_codeformer.py::test_fix_faces +tests/test_enhancers/test_upscale_realesrgan.py::test_upscale_textured_image tests/test_outpaint.py::test_outpainting_outpaint tests/test_safety.py::test_is_nsfw tests/test_utils/test_model_cache.py::test_cache_ordering diff --git a/tests/test_enhancers.py b/tests/test_enhancers.py deleted file mode 100644 index 2642661..0000000 --- a/tests/test_enhancers.py +++ /dev/null @@ -1,165 +0,0 @@ -import pytest -from PIL import Image -from pytorch_lightning import seed_everything - -from imaginairy.api import imagine -from imaginairy.enhancers.bool_masker import MASK_PROMPT -from imaginairy.enhancers.clip_masking import get_img_mask -from imaginairy.enhancers.describe_image_blip import generate_caption -from imaginairy.enhancers.describe_image_clip import find_img_text_similarity -from imaginairy.enhancers.face_restoration_codeformer import enhance_faces -from imaginairy.schema import ImaginePrompt -from imaginairy.utils import get_device -from tests import TESTS_FOLDER -from tests.utils import assert_image_similar_to_expectation - - -@pytest.mark.skipif( - get_device() == "cpu", reason="TypeError: Got unsupported ScalarType BFloat16" -) -def test_fix_faces(filename_base_for_orig_outputs, filename_base_for_outputs): - distorted_img = Image.open(f"{TESTS_FOLDER}/data/distorted_face.png") - seed_everything(1) - img = enhance_faces(distorted_img) - - distorted_img.save(f"{filename_base_for_orig_outputs}__orig.jpg") - img_path = f"{filename_base_for_outputs}.png" - assert_image_similar_to_expectation(img, img_path=img_path, threshold=2800) - - -@pytest.mark.skipif(get_device() == "cpu", reason="Too slow to run on CPU") -def test_clip_masking(filename_base_for_outputs): - img = Image.open(f"{TESTS_FOLDER}/data/girl_with_a_pearl_earring_large.jpg") - - for mask_modifier in ["*0.5", "*6", "+1", "+11", "+101", "-25"]: - pred_bin, pred_grayscale = get_img_mask( - img, - f"face AND NOT (bandana OR hair OR blue fabric){{{mask_modifier}}}", - threshold=0.5, - ) - - mask_modifier = mask_modifier.replace("*", "x") - img_path = f"{filename_base_for_outputs}_mask{mask_modifier}_g.png" - - assert_image_similar_to_expectation( - pred_grayscale, img_path=img_path, threshold=300 - ) - - img_path = f"{filename_base_for_outputs}_mask{mask_modifier}_bin.png" - assert_image_similar_to_expectation(pred_bin, img_path=img_path, threshold=10) - - prompt = ImaginePrompt( - "woman in sparkly gold jacket", - init_image=img, - init_image_strength=0.5, - # lower steps for faster tests - steps=40, - mask_prompt="(head OR face){*5}", - mask_mode="keep", - upscale=False, - fix_faces=True, - seed=42, - # solver_type="plms", - ) - - result = next(imagine(prompt)) - img_path = f"{filename_base_for_outputs}.png" - assert_image_similar_to_expectation(result.img, img_path=img_path, threshold=7000) - - -boolean_mask_test_cases = [ - ( - "fruit bowl", - "'fruit bowl'", - ), - ( - "((((fruit bowl))))", - "'fruit bowl'", - ), - ( - "fruit OR bowl", - "('fruit' OR 'bowl')", - ), - ( - "fruit|bowl", - "('fruit' OR 'bowl')", - ), - ( - "fruit | bowl", - "('fruit' OR 'bowl')", - ), - ( - "fruit OR bowl OR pear", - "('fruit' OR 'bowl' OR 'pear')", - ), - ( - "fruit AND bowl", - "('fruit' AND 'bowl')", - ), - ( - "fruit & bowl", - "('fruit' AND 'bowl')", - ), - ( - "fruit AND NOT green", - "('fruit' AND NOT 'green')", - ), - ( - "fruit bowl{+0.5}", - "'fruit bowl'+0.5", - ), - ( - "fruit bowl{+0.5} OR fruit", - "('fruit bowl'+0.5 OR 'fruit')", - ), - ( - "NOT pizza", - "NOT 'pizza'", - ), - ( - "car AND (wheels OR trunk OR engine OR windows) AND NOT (truck OR headlights{*10})", - "('car' AND ('wheels' OR 'trunk' OR 'engine' OR 'windows') AND NOT ('truck' OR 'headlights'*10))", - ), - ( - "car AND (wheels OR trunk OR engine OR windows OR headlights) AND NOT (truck OR headlights){*10}", - "('car' AND ('wheels' OR 'trunk' OR 'engine' OR 'windows' OR 'headlights') AND NOT ('truck' OR 'headlights')*10)", - ), -] - - -@pytest.mark.parametrize(("mask_text", "expected"), boolean_mask_test_cases) -def test_clip_mask_parser(mask_text, expected): - parsed = MASK_PROMPT.parseString(mask_text)[0][0] - assert str(parsed) == expected - - -@pytest.mark.skipif(get_device() == "cpu", reason="Too slow to run on CPU") -def test_describe_picture(): - seed_everything(1) - img = Image.open(f"{TESTS_FOLDER}/data/girl_with_a_pearl_earring.jpg") - caption = generate_caption(img) - assert caption in { - "a painting of a girl with a pearl earring wearing a yellow dress and a pearl earring in her ear and a black background", - "a painting of a girl with a pearl ear wearing a yellow dress and a pearl earring on her left ear and a black background", - "a painting of a woman with a pearl ear wearing an ornament pearl earring and wearing an orange, white, blue and yellow dress", - "a painting of a woman with a pearl earring looking to her left, in profile with her right eye partially closed, standing upright", - } - - -@pytest.mark.skipif(get_device() == "cpu", reason="Too slow to run on CPU") -def test_clip_text_comparison(): - img = Image.open(f"{TESTS_FOLDER}/data/girl_with_a_pearl_earring.jpg") - phrases = [ - "Johannes Vermeer painting", - "a painting of a girl with a pearl earring", - "a bulldozer", - "photo", - ] - probs = find_img_text_similarity(img, phrases) - assert probs[:2] == [ - ( - "a painting of a girl with a pearl earring", - pytest.approx(0.2857227921485901, abs=0.01), - ), - ("Johannes Vermeer painting", pytest.approx(0.25186583399772644, abs=0.01)), - ] diff --git a/tests/enhancers/__init__.py b/tests/test_enhancers/__init__.py similarity index 100% rename from tests/enhancers/__init__.py rename to tests/test_enhancers/__init__.py diff --git a/tests/enhancers/test_blur_detect.py b/tests/test_enhancers/test_blur_detect.py similarity index 100% rename from tests/enhancers/test_blur_detect.py rename to tests/test_enhancers/test_blur_detect.py diff --git a/tests/test_enhancers/test_bool_masker.py b/tests/test_enhancers/test_bool_masker.py new file mode 100644 index 0000000..2a5e529 --- /dev/null +++ b/tests/test_enhancers/test_bool_masker.py @@ -0,0 +1,68 @@ +import pytest + +from imaginairy.enhancers.bool_masker import MASK_PROMPT + +boolean_mask_test_cases = [ + ( + "fruit bowl", + "'fruit bowl'", + ), + ( + "((((fruit bowl))))", + "'fruit bowl'", + ), + ( + "fruit OR bowl", + "('fruit' OR 'bowl')", + ), + ( + "fruit|bowl", + "('fruit' OR 'bowl')", + ), + ( + "fruit | bowl", + "('fruit' OR 'bowl')", + ), + ( + "fruit OR bowl OR pear", + "('fruit' OR 'bowl' OR 'pear')", + ), + ( + "fruit AND bowl", + "('fruit' AND 'bowl')", + ), + ( + "fruit & bowl", + "('fruit' AND 'bowl')", + ), + ( + "fruit AND NOT green", + "('fruit' AND NOT 'green')", + ), + ( + "fruit bowl{+0.5}", + "'fruit bowl'+0.5", + ), + ( + "fruit bowl{+0.5} OR fruit", + "('fruit bowl'+0.5 OR 'fruit')", + ), + ( + "NOT pizza", + "NOT 'pizza'", + ), + ( + "car AND (wheels OR trunk OR engine OR windows) AND NOT (truck OR headlights{*10})", + "('car' AND ('wheels' OR 'trunk' OR 'engine' OR 'windows') AND NOT ('truck' OR 'headlights'*10))", + ), + ( + "car AND (wheels OR trunk OR engine OR windows OR headlights) AND NOT (truck OR headlights){*10}", + "('car' AND ('wheels' OR 'trunk' OR 'engine' OR 'windows' OR 'headlights') AND NOT ('truck' OR 'headlights')*10)", + ), +] + + +@pytest.mark.parametrize(("mask_text", "expected"), boolean_mask_test_cases) +def test_clip_mask_parser(mask_text, expected): + parsed = MASK_PROMPT.parseString(mask_text)[0][0] + assert str(parsed) == expected diff --git a/tests/test_enhancers/test_clip_masking.py b/tests/test_enhancers/test_clip_masking.py new file mode 100644 index 0000000..2f215c9 --- /dev/null +++ b/tests/test_enhancers/test_clip_masking.py @@ -0,0 +1,49 @@ +import pytest +from PIL import Image + +from imaginairy.api import imagine +from imaginairy.enhancers.clip_masking import get_img_mask +from imaginairy.schema import ImaginePrompt +from imaginairy.utils import get_device +from tests import TESTS_FOLDER +from tests.utils import assert_image_similar_to_expectation + + +@pytest.mark.skipif(get_device() == "cpu", reason="Too slow to run on CPU") +def test_clip_masking(filename_base_for_outputs): + img = Image.open(f"{TESTS_FOLDER}/data/girl_with_a_pearl_earring_large.jpg") + + for mask_modifier in ["*0.5", "*6", "+1", "+11", "+101", "-25"]: + pred_bin, pred_grayscale = get_img_mask( + img, + f"face AND NOT (bandana OR hair OR blue fabric){{{mask_modifier}}}", + threshold=0.5, + ) + + mask_modifier = mask_modifier.replace("*", "x") + img_path = f"{filename_base_for_outputs}_mask{mask_modifier}_g.png" + + assert_image_similar_to_expectation( + pred_grayscale, img_path=img_path, threshold=300 + ) + + img_path = f"{filename_base_for_outputs}_mask{mask_modifier}_bin.png" + assert_image_similar_to_expectation(pred_bin, img_path=img_path, threshold=10) + + prompt = ImaginePrompt( + "woman in sparkly gold jacket", + init_image=img, + init_image_strength=0.5, + # lower steps for faster tests + steps=40, + mask_prompt="(head OR face){*5}", + mask_mode="keep", + upscale=False, + fix_faces=True, + seed=42, + # solver_type="plms", + ) + + result = next(imagine(prompt)) + img_path = f"{filename_base_for_outputs}.png" + assert_image_similar_to_expectation(result.img, img_path=img_path, threshold=7000) diff --git a/tests/test_enhancers/test_describe_image_blip.py b/tests/test_enhancers/test_describe_image_blip.py new file mode 100644 index 0000000..89b55ea --- /dev/null +++ b/tests/test_enhancers/test_describe_image_blip.py @@ -0,0 +1,20 @@ +import pytest +from lightning_fabric import seed_everything +from PIL import Image + +from imaginairy.enhancers.describe_image_blip import generate_caption +from imaginairy.utils import get_device +from tests import TESTS_FOLDER + + +@pytest.mark.skipif(get_device() == "cpu", reason="Too slow to run on CPU") +def test_describe_picture(): + seed_everything(1) + img = Image.open(f"{TESTS_FOLDER}/data/girl_with_a_pearl_earring.jpg") + caption = generate_caption(img) + assert caption in { + "a painting of a girl with a pearl earring wearing a yellow dress and a pearl earring in her ear and a black background", + "a painting of a girl with a pearl ear wearing a yellow dress and a pearl earring on her left ear and a black background", + "a painting of a woman with a pearl ear wearing an ornament pearl earring and wearing an orange, white, blue and yellow dress", + "a painting of a woman with a pearl earring looking to her left, in profile with her right eye partially closed, standing upright", + } diff --git a/tests/test_enhancers/test_describe_image_clip.py b/tests/test_enhancers/test_describe_image_clip.py new file mode 100644 index 0000000..2cdf6e5 --- /dev/null +++ b/tests/test_enhancers/test_describe_image_clip.py @@ -0,0 +1,25 @@ +import pytest +from PIL import Image + +from imaginairy.enhancers.describe_image_clip import find_img_text_similarity +from imaginairy.utils import get_device +from tests import TESTS_FOLDER + + +@pytest.mark.skipif(get_device() == "cpu", reason="Too slow to run on CPU") +def test_clip_text_comparison(): + img = Image.open(f"{TESTS_FOLDER}/data/girl_with_a_pearl_earring.jpg") + phrases = [ + "Johannes Vermeer painting", + "a painting of a girl with a pearl earring", + "a bulldozer", + "photo", + ] + probs = find_img_text_similarity(img, phrases) + assert probs[:2] == [ + ( + "a painting of a girl with a pearl earring", + pytest.approx(0.2857227921485901, abs=0.01), + ), + ("Johannes Vermeer painting", pytest.approx(0.25186583399772644, abs=0.01)), + ] diff --git a/tests/enhancers/test_facecrop.py b/tests/test_enhancers/test_facecrop.py similarity index 100% rename from tests/enhancers/test_facecrop.py rename to tests/test_enhancers/test_facecrop.py diff --git a/tests/enhancers/test_prompt_expansion.py b/tests/test_enhancers/test_prompt_expansion.py similarity index 100% rename from tests/enhancers/test_prompt_expansion.py rename to tests/test_enhancers/test_prompt_expansion.py diff --git a/tests/test_schema/test_imagineprompt.py b/tests/test_schema/test_imagineprompt.py index 5cca2c9..cdbef30 100644 --- a/tests/test_schema/test_imagineprompt.py +++ b/tests/test_schema/test_imagineprompt.py @@ -173,7 +173,7 @@ def test_imagine_prompt_default_negative(): def test_imagine_prompt_fix_faces_fidelity(): - assert ImaginePrompt("fruit", fix_faces_fidelity=None).fix_faces_fidelity == 0.2 + assert ImaginePrompt("fruit", fix_faces_fidelity=None).fix_faces_fidelity == 0.5 def test_imagine_prompt_init_strength_zero():