feature: add ability to dynamically make word images (#417)

6 months ago · 2144f26fa7
parent 3bd3dfdeaf
commit 2144f26fa7
10 changed files with 417 additions and 7 deletions
--- a/imaginairy/cli/imagine.py
+++ b/imaginairy/cli/imagine.py
@ -59,7 +59,6 @@ from imaginairy.cli.shared import (
            "shuffle",
            "edit",
            "inpaint",
-            "details",
            "colorize",
            "qrcode",
        ]
@ -129,6 +128,8 @@ def imagine_cmd(
    Can be invoked via either `aimg imagine` or just `imagine`.
    """
    from imaginairy.schema import ControlInput, LazyLoadingImage
+    from imaginairy.utils import named_resolutions
+    from imaginairy.utils.text_image import image_from_textimg_str

    # hacky method of getting order of control images (mixing raw and normal images)
    control_images = [
@ -143,6 +144,7 @@ def imagine_cmd(
    ]

    control_inputs = []
+    resolved_width, resolved_height = named_resolutions.normalize_image_size(size)
    if control_mode:
        for i, cm in enumerate(control_mode):
            option = index_default(control_images, i, None)
@ -154,8 +156,13 @@ def imagine_cmd(
            elif option[0].name == "control_image":
                control_image = option[1]
                control_image_raw = None
-                if control_image and control_image.startswith("http"):
-                    control_image = LazyLoadingImage(url=control_image)
+                if control_image:
+                    if control_image.startswith("http"):
+                        control_image = LazyLoadingImage(url=control_image)
+                    elif control_image.startswith("textimg="):
+                        control_image = image_from_textimg_str(
+                            control_image, resolved_width, resolved_height
+                        )
            else:
                control_image = None
                control_image_raw = option[1]
--- a/imaginairy/cli/shared.py
+++ b/imaginairy/cli/shared.py
@ -119,6 +119,16 @@ def _imagine_cmd(
    for _init_image in init_images:
        if _init_image and _init_image.startswith("http"):
            _init_image = LazyLoadingImage(url=_init_image)
+        elif _init_image.startswith("textimg="):
+            from imaginairy.utils import named_resolutions
+            from imaginairy.utils.text_image import image_from_textimg_str
+
+            resolved_width, resolved_height = named_resolutions.normalize_image_size(
+                size
+            )
+            _init_image = image_from_textimg_str(
+                _init_image, resolved_width, resolved_height
+            )
        else:
            _init_image = LazyLoadingImage(filepath=_init_image)
        new_init_images.append(_init_image)
--- a/imaginairy/img_processors/control_modes.py
+++ b/imaginairy/img_processors/control_modes.py
@ -253,9 +253,6 @@ def adaptive_threshold_binarize(img: "Tensor") -> "Tensor":

    from imaginairy.utils import get_device

-    # img = img.to("cpu")
-    # img = img.to(get_device())
-
    if img.dim() != 4:
        raise ValueError("Input should be a 4d tensor")
    if img.size(1) != 3:
@ -276,7 +273,7 @@ def adaptive_threshold_binarize(img: "Tensor") -> "Tensor":
    grayscale = to_grayscale(img)
    grayscale = grayscale[:, 0:1, :, :]

-    grayscale_np = grayscale.squeeze(1).numpy()
+    grayscale_np = grayscale.squeeze(1).to("cpu").numpy()

    blockSize = 129
    C = 2
--- a/imaginairy/utils/img_utils.py
+++ b/imaginairy/utils/img_utils.py
@ -183,3 +183,41 @@ def add_caption_to_image(
        stroke_width=3,
        stroke_fill=(0, 0, 0),
    )
+
+
+def create_halo_effect(
+    bw_image: PIL.Image.Image, background_color: tuple
+) -> PIL.Image.Image:
+    from PIL import Image, ImageFilter
+
+    # Step 1: Make white portion of the image transparent
+    transparent_image = bw_image.convert("RGBA")
+    datas = transparent_image.getdata()
+    new_data = []
+    for item in datas:
+        # Change all white (also shades of whites)
+        # to transparent
+        if item[0] > 200 and item[1] > 200 and item[2] > 200:
+            new_data.append((255, 255, 255, 0))
+        else:
+            new_data.append(item)
+    transparent_image.putdata(new_data)  # type: ignore
+
+    # Step 2: Make a copy of the image
+    eroded_image = transparent_image.copy()
+
+    # Step 3: Erode and blur the copy
+    # eroded_image = ImageOps.invert(eroded_image.convert("L")).convert("1")
+    # eroded_image = eroded_image.filter(ImageFilter.MinFilter(3))  # Erode
+    eroded_image = eroded_image.filter(ImageFilter.GaussianBlur(radius=25))
+
+    # Step 4: Create new canvas
+    new_canvas = Image.new("RGBA", bw_image.size, color=background_color)
+
+    # Step 5: Paste the blurred copy on the new canvas
+    new_canvas.paste(eroded_image, (0, 0), eroded_image)
+
+    # Step 6: Paste the original sharp image on the new canvas
+    new_canvas.paste(transparent_image, (0, 0), transparent_image)
+
+    return new_canvas
--- a/imaginairy/utils/spaced_kv_parser.py
+++ b/imaginairy/utils/spaced_kv_parser.py
@ -0,0 +1,115 @@
+from functools import lru_cache
+
+from pyparsing import (
+    CharsNotIn,
+    Group,
+    OneOrMore,
+    Optional,
+    Word,
+    alphanums,
+    alphas,
+    quotedString,
+    removeQuotes,
+)
+
+
+@lru_cache
+def _make_attribute_parser():
+    key_parser = Word(alphas, alphanums + "_")
+    quoted_value_parser = quotedString.setParseAction(removeQuotes)
+    unquoted_value_parser = CharsNotIn(" =\"'")
+    key_value_pair_parser = (
+        key_parser
+        + "="
+        + Optional(quoted_value_parser | unquoted_value_parser, default="")
+    )
+    multiple_pairs_parser = OneOrMore(Group(key_value_pair_parser))
+    return multiple_pairs_parser
+
+
+def parse_spaced_key_value_pairs(text: str) -> dict[str, str]:
+    """
+    Parses a string of key-value pairs separated by spaces.
+
+    :param text: String of key-value pairs separated by spaces.
+    :return: List of key-value pairs.
+    """
+    if not text:
+        return {}
+
+    rows = _make_attribute_parser().parseString(text, parseAll=True)
+    data = {r[0]: r[2] for r in rows}
+    return data
+
+
+def parse_spaced_key_value_pairs_html(text: str):
+    html_version = f"<foo {text}>"
+    parsed_html = parse_html_tag(html_version)
+    return parsed_html["attributes"]
+
+
+def parse_html_tag(html_tag):
+    """
+    Parses a single HTML tag and returns a dictionary with the tag name and its attributes.
+
+    Args:
+    html_tag (str): A string representing the HTML tag to be parsed.
+
+    Returns:
+    dict: A dictionary with 'tagname' and 'attributes'. 'tagname' is a string and 'attributes' is a dictionary.
+    """
+
+    from html.parser import HTMLParser
+
+    class MyHTMLParser(HTMLParser):
+        def __init__(self):
+            super().__init__()
+            self.tagname = ""
+            self.attributes = {}
+
+        def handle_starttag(self, tag, attrs):
+            self.tagname = tag
+            self.attributes = dict(attrs)
+
+    parser = MyHTMLParser()
+    parser.feed(html_tag)
+    return {"tagname": parser.tagname, "attributes": parser.attributes}
+
+
+def parse_spaced_key_value_pairs_re(text: str) -> dict[str, str]:
+    """
+    Parses a string of key-value pairs separated by spaces.
+
+    :param text: String of key-value pairs separated by spaces.
+    :return: List of key-value pairs.
+    """
+    if not text:
+        return {}
+    import re
+
+    # Building regex parts for readability
+    key_pattern = r"(?P<key>\w+)"
+    quoted_value_pattern = r'(?:"[^"\\]*(?:\\.[^"\\]*)*"|\'[^\'\\]*(?:\\.[^\'\\]*)*\')'
+    unquoted_value_pattern = r'[^\'"\s]*'
+    value_pattern = f"(?P<value>{quoted_value_pattern}|{unquoted_value_pattern})"
+
+    # Complete pattern with named groups
+    pattern = rf"{key_pattern}={value_pattern}"
+
+    # Find all matches
+    matches = re.findall(pattern, text)
+
+    # Validate the query string format
+    if not matches and text:
+        raise ValueError("Invalid format")
+
+    parsed_query = {}
+    for key, value in matches:
+        if (value.startswith('"') and value.endswith('"')) or (
+            value.startswith("'") and value.endswith("'")
+        ):
+            # Remove quotes and handle escape sequences
+            value = bytes(value[1:-1], "utf-8").decode("unicode_escape")
+        parsed_query[key] = value
+
+    return parsed_query
--- a/imaginairy/utils/text_image.py
+++ b/imaginairy/utils/text_image.py
@ -0,0 +1,141 @@
+from typing import Literal
+
+import pyparsing
+from PIL import Image, ImageDraw, ImageFont
+from PIL.ImageColor import getrgb
+
+from imaginairy.utils.img_utils import create_halo_effect
+from imaginairy.utils.paths import PKG_ROOT
+from imaginairy.utils.spaced_kv_parser import parse_spaced_key_value_pairs
+
+
+def determine_max_font_size(
+    text: str,
+    draw: ImageDraw.ImageDraw,
+    font_path: str,
+    width: int,
+    height: int,
+    margin_pct: float,
+    line_spacing: int = 4,
+) -> int:
+    """
+    Determine the maximum font size that allows the text to fit within the given image dimensions and margin constraints.
+    Updated to use multiline_textbbox in Pillow 10.1.0.
+
+    :param text: Text to be drawn.
+    :param draw: ImageDraw object to measure text size.
+    :param font_path: Path to the font file.
+    :param width: Width of the image.
+    :param height: Height of the image.
+    :param margin_pct: Margin percentage.
+    :return: Maximum font size.
+    """
+    max_width = width - 2 * (width * margin_pct)
+    max_height = height - 2 * (height * margin_pct)
+
+    font_size = 1
+    font = ImageFont.truetype(font_path, font_size)
+
+    while True:
+        # Use multiline_textbbox to get the bounding box of the text
+        bbox = draw.multiline_textbbox((0, 0), text, font=font, spacing=line_spacing)
+        text_width = bbox[2] - bbox[0]  # right - left
+        text_height = bbox[3] - bbox[1]  # bottom - top
+
+        if text_width > max_width or text_height > max_height:
+            break
+        font_size += 1
+        font = ImageFont.truetype(font_path, font_size)
+
+    # Subtract 1 because the loop exits after the size becomes too large
+    return font_size - 1
+
+
+def generate_word_image(
+    text: str,
+    width: int,
+    height: int,
+    margin_pct: float = 0.1,
+    line_spacing: int = 4,
+    text_align: Literal["left", "center", "right"] = "center",
+    font_path: str = f"{PKG_ROOT}/data/DejaVuSans.ttf",
+    font_color: str = "black",
+    background_color: str = "white",
+) -> Image.Image:
+    image = Image.new("RGB", (width, height), color=background_color)
+    draw = ImageDraw.Draw(image)
+
+    max_font_size = determine_max_font_size(
+        text, draw, font_path, width, height, margin_pct, line_spacing=line_spacing
+    )
+
+    font = ImageFont.truetype(font_path, max_font_size)
+
+    bbox = draw.multiline_textbbox((0, 0), text, font=font)
+
+    # Calculate text position
+    text_width = bbox[2] - bbox[0]
+    text_height = bbox[3] - bbox[1]
+    x = (width - text_width) / 2
+    y = (height - text_height) / 2 - bbox[1]
+
+    draw.multiline_text(
+        (x, y), text, fill=font_color, font=font, align=text_align, spacing=line_spacing
+    )
+
+    return image
+
+
+def image_from_textimg_str(text: str, width: int, height: int) -> Image.Image:
+    """
+    Create an image from a textimg string.
+    """
+    try:
+        data = parse_spaced_key_value_pairs(text)
+    except pyparsing.ParseException:
+        raise ValueError("Invalid format for textimg")  # noqa
+
+    first_key = next(iter(data))
+
+    if first_key != "textimg":
+        raise ValueError("Invalid format for textimg")
+
+    allowed_keys = {
+        "textimg",
+        "font",
+        "font_color",
+        "background_color",
+        "text_align",
+        "line_spacing",
+        "margin_pct",
+        "halo",
+    }
+    submitted_keys = set(data.keys())
+    invalid_keys = submitted_keys - allowed_keys
+    if invalid_keys:
+        msg = f"Invalid attributes for textimg: '{invalid_keys}'. Valid attributes are '{allowed_keys}'"
+        raise ValueError(msg)
+
+    text_align = data.get("text_align", "center")
+    valid_alignments = {"left", "center", "right"}
+    if text_align not in valid_alignments:
+        msg = f"Invalid text_align '{text_align}'. Valid options are 'left', 'center' and 'right'"
+        raise ValueError(msg)
+    assert text_align in valid_alignments
+    background_color: str = data.get("background_color", "white")
+    img = generate_word_image(
+        text=data["textimg"].replace("\\n", "\n"),
+        width=width,
+        height=height,
+        margin_pct=float(data.get("margin_pct", 0.1)),
+        line_spacing=int(data.get("line_spacing", 4)),
+        text_align=text_align,  # type: ignore
+        font_path=data.get("font", f"{PKG_ROOT}/data/DejaVuSans.ttf"),
+        font_color=data.get("font_color", "black"),
+        background_color=background_color,
+    )
+    bg_color_rgb = getrgb(background_color)
+    if data.get("halo", "0").lower() in ("true", "1", "yes"):
+        img = create_halo_effect(img, background_color=bg_color_rgb)
+
+    return img
--- a/tests/data/obey-halo.png
+++ b/tests/data/obey-halo.png
--- a/tests/data/obey.png
+++ b/tests/data/obey.png
--- a/tests/test_utils/test_img_utils.py
+++ b/tests/test_utils/test_img_utils.py
@ -0,0 +1,14 @@
+from imaginairy.utils.img_utils import create_halo_effect
+from imaginairy.utils.text_image import generate_word_image
+from tests import TESTS_FOLDER
+
+
+def test_create_halo_effect():
+    """Test if the image has the correct dimensions"""
+    width, height = 1920, 1080
+    bg_shade = 245
+    img = generate_word_image("OBEY", width, height, font_color="black")
+    img.save(f"{TESTS_FOLDER}/data/obey.png")
+
+    img = create_halo_effect(img, (bg_shade, bg_shade, bg_shade))
+    img.save(f"{TESTS_FOLDER}/data/obey-halo.png")
--- a/tests/test_utils/test_spaced_kv_parser.py
+++ b/tests/test_utils/test_spaced_kv_parser.py
@ -0,0 +1,88 @@
+import pyparsing as pp
+import pytest
+from pyparsing import ParseException
+
+from imaginairy.utils.spaced_kv_parser import parse_spaced_key_value_pairs
+
+
+def test_basic_parsing():
+    input_str = "text='Hello World' font='Arial' size=12 color='#FF0000'"
+
+    expected = {
+        "text": "Hello World",
+        "font": "Arial",
+        "size": "12",
+        "color": "#FF0000",
+    }
+
+    try:
+        assert parse_spaced_key_value_pairs(input_str) == expected
+    except pp.ParseException as e:
+        print(e.explain())
+        raise
+
+
+def test_unquoted_values():
+    input_str = "width=800 height=600 bg_color=#FFFFFF"
+    expected = {"width": "800", "height": "600", "bg_color": "#FFFFFF"}
+    assert parse_spaced_key_value_pairs(input_str) == expected
+
+
+def test_mixed_quoted_unquoted():
+    input_str = "title='My Title' resolution=1080p"
+    expected = {"title": "My Title", "resolution": "1080p"}
+    assert parse_spaced_key_value_pairs(input_str) == expected
+
+
+def test_empty_string():
+    input_str = ""
+    expected = {}
+    assert parse_spaced_key_value_pairs(input_str) == expected
+
+
+def test_invalid_format():
+    input_str = "This is not a valid format"
+    with pytest.raises(ParseException):  # noqa
+        parse_spaced_key_value_pairs(input_str)
+
+
+def test_only_keys():
+    input_str = "key1= key2="
+    expected = {"key1": "", "key2": ""}
+    assert parse_spaced_key_value_pairs(input_str) == expected
+
+
+challenging_test_queries = [
+    ("foo=\"bar'baz\" bar='foo\"bar'", {"foo": "bar'baz", "bar": 'foo"bar'}),
+    ("foo=\"'bar'\" bar='\"baz\"'", {"foo": "'bar'", "bar": '"baz"'}),
+    ("foo=\"bar\\\"baz\" bar='foo\\'bar'", {"foo": 'bar\\"baz', "bar": "foo\\'bar"}),
+    (
+        'special=👍 emoji="😀 😃" text="This is a test\\nwith newline"',
+        {"special": "👍", "emoji": "😀 😃", "text": "This is a test\\nwith newline"},
+    ),
+    ('name=" John  Doe " age=" 30 "', {"name": " John  Doe ", "age": " 30 "}),
+    ("special=@@!!", {"special": "@@!!"}),
+    (
+        'text="This is a test\\\\nwith incomplete escape\\\\"',
+        {"text": "This is a test\\\\nwith incomplete escape\\\\"},
+    ),
+    ("foo= bar=", {"foo": "", "bar": ""}),
+    ("foo= bar=30.4 zab=-1.2", {"foo": "", "bar": "30.4", "zab": "-1.2"}),
+    ("", {}),
+    (
+        'foo="bar" baz=\'qux\' specialChars="@@!!" empty= complex="\'This is a \\"complex\\" string\'"',
+        {
+            "foo": "bar",
+            "baz": "qux",
+            "specialChars": "@@!!",
+            "empty": "",
+            "complex": "'This is a \\\"complex\\\" string'",
+        },
+    ),
+]
+
+
+@pytest.mark.parametrize(("query", "expected"), challenging_test_queries)
+def test_challenging_queries(query, expected):
+    data = parse_spaced_key_value_pairs(query)
+    assert data == expected