feature: allow multiple additive targets for text masking targets

2 years ago · f040e3bffe
parent a0087c1304
commit f040e3bffe
6 changed files with 43 additions and 19 deletions
--- a/README.md
+++ b/README.md
@ -38,19 +38,19 @@ Generating 🖼  : "portrait photo of a freckled woman" 512x512px seed:500686645
 ```bash
 >> imagine --init-image pearl_earring.jpg --mask-prompt face --mask-mode keep --init-image-strength .4 "a female doctor" "an elegant woman"
 ```
-<img src="https://raw.githubusercontent.com/brycedrennan/imaginAIry/master/assets/mask_examples/pearl000.jpg" height="256">➡️ 
-<img src="https://raw.githubusercontent.com/brycedrennan/imaginAIry/master/assets/mask_examples/pearl002.jpg" height="256">
-<img src="https://raw.githubusercontent.com/brycedrennan/imaginAIry/master/assets/mask_examples/pearl004.jpg" height="256">
-<img src="https://raw.githubusercontent.com/brycedrennan/imaginAIry/master/assets/mask_examples/pearl001.jpg" height="256">
-<img src="https://raw.githubusercontent.com/brycedrennan/imaginAIry/master/assets/mask_examples/pearl003.jpg" height="256">
+<img src="https://raw.githubusercontent.com/brycedrennan/imaginAIry/master/assets/mask_examples/pearl000.jpg" height="200">➡️ 
+<img src="https://raw.githubusercontent.com/brycedrennan/imaginAIry/master/assets/mask_examples/pearl002.jpg" height="200">
+<img src="https://raw.githubusercontent.com/brycedrennan/imaginAIry/master/assets/mask_examples/pearl004.jpg" height="200">
+<img src="https://raw.githubusercontent.com/brycedrennan/imaginAIry/master/assets/mask_examples/pearl001.jpg" height="200">
+<img src="https://raw.githubusercontent.com/brycedrennan/imaginAIry/master/assets/mask_examples/pearl003.jpg" height="200">
 ```bash
 >> imagine --init-image fruit-bowl.jpg --mask-prompt fruit --mask-mode replace --init-image-strength .1 "a bowl of pears" "a bowl of gold" "a bowl of popcorn" "a bowl of spaghetti"
 ```
-<img src="https://raw.githubusercontent.com/brycedrennan/imaginAIry/master/assets/000056_293284644_PLMS40_PS7.5_photo_of_a_bowl_of_fruit.jpg" height="256">➡️ 
-<img src="https://raw.githubusercontent.com/brycedrennan/imaginAIry/master/assets/mask_examples/bowl004.jpg" height="256">
-<img src="https://raw.githubusercontent.com/brycedrennan/imaginAIry/master/assets/mask_examples/bowl001.jpg" height="256">
-<img src="https://raw.githubusercontent.com/brycedrennan/imaginAIry/master/assets/mask_examples/bowl002.jpg" height="256">
-<img src="https://raw.githubusercontent.com/brycedrennan/imaginAIry/master/assets/mask_examples/bowl003.jpg" height="256">
+<img src="https://raw.githubusercontent.com/brycedrennan/imaginAIry/master/assets/000056_293284644_PLMS40_PS7.5_photo_of_a_bowl_of_fruit.jpg" height="200">➡️ 
+<img src="https://raw.githubusercontent.com/brycedrennan/imaginAIry/master/assets/mask_examples/bowl004.jpg" height="200">
+<img src="https://raw.githubusercontent.com/brycedrennan/imaginAIry/master/assets/mask_examples/bowl001.jpg" height="200">
+<img src="https://raw.githubusercontent.com/brycedrennan/imaginAIry/master/assets/mask_examples/bowl002.jpg" height="200">
+<img src="https://raw.githubusercontent.com/brycedrennan/imaginAIry/master/assets/mask_examples/bowl003.jpg" height="200">


 ### Face Enhancement [by CodeFormer](https://github.com/sczhou/CodeFormer)
@ -115,7 +115,14 @@ prompts = [
    ]),
    ImaginePrompt(
        "a spacious building", 
-        init_image=LazyLoadingImage(url)
+        init_image=LazyLoadingImage(url=url)
+    ),
+    ImaginePrompt(
+        "a bowl of strawberries", 
+        init_image=LazyLoadingImage(filepath="mypath/to/bowl_of_fruit.jpg"),
+        mask_prompt="fruit|stems",
+        mask_mode="replace",
+        mask_expansion=3
    )
 ]
 for result in imagine(prompts):
--- a/imaginairy/api.py
+++ b/imaginairy/api.py
@ -237,9 +237,10 @@ def imagine(
                        log_img(mask_image, "init mask")
                        # mask_image = mask_image.filter(ImageFilter.GaussianBlur(8))
                        mask_image = expand_mask(mask_image, prompt.mask_expansion)
+                        log_img(mask_image, "init mask expanded")
                        if prompt.mask_mode == ImaginePrompt.MaskMode.REPLACE:
                            mask_image = ImageOps.invert(mask_image)
-                        log_img(mask_image, "init mask expanded")
+
                        log_img(
                            Image.composite(init_image, mask_image, mask_image),
                            "mask overlay",
@ -310,9 +311,12 @@ def imagine(
                    x_sample_8_orig = x_sample.astype(np.uint8)
                    img = Image.fromarray(x_sample_8_orig)
                    if mask_image_orig and init_image:
+
+                        mask_image_orig = expand_mask(mask_image_orig, -3)
                        mask_image_orig = mask_image_orig.filter(
                            ImageFilter.GaussianBlur(radius=3)
                        )
+                        log_img(mask_image_orig, "reconstituting mask")
                        mask_image_orig = ImageOps.invert(mask_image_orig)
                        img = Image.composite(img, init_image, mask_image_orig)
                        log_img(img, "reconstituted image")
@ -331,6 +335,9 @@ def imagine(
                    if prompt.upscale:
                        logger.info("    Upscaling 🖼  using real-ESRGAN...")
                        upscaled_img = upscale_image(img)
+                        if prompt.fix_faces:
+                            logger.info("    Fixing 😊 's in big 🖼  using CodeFormer...")
+                            upscaled_img = enhance_faces(upscaled_img, fidelity=0.8)

                    yield ImagineResult(
                        img=img,
--- a/imaginairy/cmds.py
+++ b/imaginairy/cmds.py
@ -139,7 +139,7 @@ def configure_logging(level="INFO"):
 )
@click.option(
    "--mask-expansion",
-    default="8",
+    default="2",
    type=int,
    help="How much to grow (or shrink) the mask area",
 )
--- a/imaginairy/enhancers/clip_masking.py
+++ b/imaginairy/enhancers/clip_masking.py
@ -27,10 +27,11 @@ def clip_mask_model():


 def get_img_mask(img, mask_description):
-    return get_img_masks(img, [mask_description])[0]
+    descriptions = mask_description.split("|")
+    return get_img_masks(img, descriptions, combine=True)[0]


-def get_img_masks(img, mask_descriptions):
+def get_img_masks(img, mask_descriptions, combine=False):
    a, b = img.size
    orig_size = b, a
    log_img(img, "image for masking")
@ -49,8 +50,17 @@ def get_img_masks(img, mask_descriptions):
            img.repeat(len(mask_descriptions), 1, 1, 1), mask_descriptions
        )[0]
    preds = transforms.Resize(orig_size)(preds)
+    preds = transforms.GaussianBlur(kernel_size=9)(preds)

    preds = [torch.sigmoid(p[0]) for p in preds]
+
+    if combine:
+        f_pred = preds[0]
+        for description, pred in zip(mask_descriptions, preds):
+            log_img(pred, f"mask search: {description}")
+            f_pred = torch.maximum(f_pred, pred)
+        preds = [f_pred]
+
    bw_preds = []
    for p in preds:
        log_img(p, f"clip mask for {mask_descriptions}")
@ -58,7 +68,7 @@ def get_img_masks(img, mask_descriptions):
        _min = p.min()
        _max = p.max()
        _range = _max - _min
-        p = (p > (_min + (_range * 0.5))).float()
+        p = (p > (_min + (_range * 0.25))).float()
        bw_preds.append(transforms.ToPILImage()(p))

    return bw_preds
--- a/imaginairy/schema.py
+++ b/imaginairy/schema.py
@ -94,7 +94,7 @@ class ImaginePrompt:
        mask_prompt=None,
        mask_image=None,
        mask_mode=MaskMode.REPLACE,
-        mask_expansion=8,
+        mask_expansion=2,
        seed=None,
        steps=50,
        height=512,
--- a/imaginairy/utils.py
+++ b/imaginairy/utils.py
@ -106,9 +106,9 @@ def fix_torch_nn_layer_norm():

 def expand_mask(mask_image, size):
    if size < 0:
-        threshold = 0.9
+        threshold = 0.95
    else:
-        threshold = 0.1
+        threshold = 0.05
    mask_image = mask_image.convert("L")
    mask_image = mask_image.filter(ImageFilter.GaussianBlur(size))
    log_img(mask_image, "init mask blurred")