feature: add ability to dynamically make word images (#417)

pull/418/head
Bryce Drennan 6 months ago committed by GitHub
parent 3bd3dfdeaf
commit 2144f26fa7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -59,7 +59,6 @@ from imaginairy.cli.shared import (
"shuffle",
"edit",
"inpaint",
"details",
"colorize",
"qrcode",
]
@ -129,6 +128,8 @@ def imagine_cmd(
Can be invoked via either `aimg imagine` or just `imagine`.
"""
from imaginairy.schema import ControlInput, LazyLoadingImage
from imaginairy.utils import named_resolutions
from imaginairy.utils.text_image import image_from_textimg_str
# hacky method of getting order of control images (mixing raw and normal images)
control_images = [
@ -143,6 +144,7 @@ def imagine_cmd(
]
control_inputs = []
resolved_width, resolved_height = named_resolutions.normalize_image_size(size)
if control_mode:
for i, cm in enumerate(control_mode):
option = index_default(control_images, i, None)
@ -154,8 +156,13 @@ def imagine_cmd(
elif option[0].name == "control_image":
control_image = option[1]
control_image_raw = None
if control_image and control_image.startswith("http"):
control_image = LazyLoadingImage(url=control_image)
if control_image:
if control_image.startswith("http"):
control_image = LazyLoadingImage(url=control_image)
elif control_image.startswith("textimg="):
control_image = image_from_textimg_str(
control_image, resolved_width, resolved_height
)
else:
control_image = None
control_image_raw = option[1]

@ -119,6 +119,16 @@ def _imagine_cmd(
for _init_image in init_images:
if _init_image and _init_image.startswith("http"):
_init_image = LazyLoadingImage(url=_init_image)
elif _init_image.startswith("textimg="):
from imaginairy.utils import named_resolutions
from imaginairy.utils.text_image import image_from_textimg_str
resolved_width, resolved_height = named_resolutions.normalize_image_size(
size
)
_init_image = image_from_textimg_str(
_init_image, resolved_width, resolved_height
)
else:
_init_image = LazyLoadingImage(filepath=_init_image)
new_init_images.append(_init_image)

@ -253,9 +253,6 @@ def adaptive_threshold_binarize(img: "Tensor") -> "Tensor":
from imaginairy.utils import get_device
# img = img.to("cpu")
# img = img.to(get_device())
if img.dim() != 4:
raise ValueError("Input should be a 4d tensor")
if img.size(1) != 3:
@ -276,7 +273,7 @@ def adaptive_threshold_binarize(img: "Tensor") -> "Tensor":
grayscale = to_grayscale(img)
grayscale = grayscale[:, 0:1, :, :]
grayscale_np = grayscale.squeeze(1).numpy()
grayscale_np = grayscale.squeeze(1).to("cpu").numpy()
blockSize = 129
C = 2

@ -183,3 +183,41 @@ def add_caption_to_image(
stroke_width=3,
stroke_fill=(0, 0, 0),
)
def create_halo_effect(
bw_image: PIL.Image.Image, background_color: tuple
) -> PIL.Image.Image:
from PIL import Image, ImageFilter
# Step 1: Make white portion of the image transparent
transparent_image = bw_image.convert("RGBA")
datas = transparent_image.getdata()
new_data = []
for item in datas:
# Change all white (also shades of whites)
# to transparent
if item[0] > 200 and item[1] > 200 and item[2] > 200:
new_data.append((255, 255, 255, 0))
else:
new_data.append(item)
transparent_image.putdata(new_data) # type: ignore
# Step 2: Make a copy of the image
eroded_image = transparent_image.copy()
# Step 3: Erode and blur the copy
# eroded_image = ImageOps.invert(eroded_image.convert("L")).convert("1")
# eroded_image = eroded_image.filter(ImageFilter.MinFilter(3)) # Erode
eroded_image = eroded_image.filter(ImageFilter.GaussianBlur(radius=25))
# Step 4: Create new canvas
new_canvas = Image.new("RGBA", bw_image.size, color=background_color)
# Step 5: Paste the blurred copy on the new canvas
new_canvas.paste(eroded_image, (0, 0), eroded_image)
# Step 6: Paste the original sharp image on the new canvas
new_canvas.paste(transparent_image, (0, 0), transparent_image)
return new_canvas

@ -0,0 +1,115 @@
from functools import lru_cache
from pyparsing import (
CharsNotIn,
Group,
OneOrMore,
Optional,
Word,
alphanums,
alphas,
quotedString,
removeQuotes,
)
@lru_cache
def _make_attribute_parser():
key_parser = Word(alphas, alphanums + "_")
quoted_value_parser = quotedString.setParseAction(removeQuotes)
unquoted_value_parser = CharsNotIn(" =\"'")
key_value_pair_parser = (
key_parser
+ "="
+ Optional(quoted_value_parser | unquoted_value_parser, default="")
)
multiple_pairs_parser = OneOrMore(Group(key_value_pair_parser))
return multiple_pairs_parser
def parse_spaced_key_value_pairs(text: str) -> dict[str, str]:
"""
Parses a string of key-value pairs separated by spaces.
:param text: String of key-value pairs separated by spaces.
:return: List of key-value pairs.
"""
if not text:
return {}
rows = _make_attribute_parser().parseString(text, parseAll=True)
data = {r[0]: r[2] for r in rows}
return data
def parse_spaced_key_value_pairs_html(text: str):
html_version = f"<foo {text}>"
parsed_html = parse_html_tag(html_version)
return parsed_html["attributes"]
def parse_html_tag(html_tag):
"""
Parses a single HTML tag and returns a dictionary with the tag name and its attributes.
Args:
html_tag (str): A string representing the HTML tag to be parsed.
Returns:
dict: A dictionary with 'tagname' and 'attributes'. 'tagname' is a string and 'attributes' is a dictionary.
"""
from html.parser import HTMLParser
class MyHTMLParser(HTMLParser):
def __init__(self):
super().__init__()
self.tagname = ""
self.attributes = {}
def handle_starttag(self, tag, attrs):
self.tagname = tag
self.attributes = dict(attrs)
parser = MyHTMLParser()
parser.feed(html_tag)
return {"tagname": parser.tagname, "attributes": parser.attributes}
def parse_spaced_key_value_pairs_re(text: str) -> dict[str, str]:
"""
Parses a string of key-value pairs separated by spaces.
:param text: String of key-value pairs separated by spaces.
:return: List of key-value pairs.
"""
if not text:
return {}
import re
# Building regex parts for readability
key_pattern = r"(?P<key>\w+)"
quoted_value_pattern = r'(?:"[^"\\]*(?:\\.[^"\\]*)*"|\'[^\'\\]*(?:\\.[^\'\\]*)*\')'
unquoted_value_pattern = r'[^\'"\s]*'
value_pattern = f"(?P<value>{quoted_value_pattern}|{unquoted_value_pattern})"
# Complete pattern with named groups
pattern = rf"{key_pattern}={value_pattern}"
# Find all matches
matches = re.findall(pattern, text)
# Validate the query string format
if not matches and text:
raise ValueError("Invalid format")
parsed_query = {}
for key, value in matches:
if (value.startswith('"') and value.endswith('"')) or (
value.startswith("'") and value.endswith("'")
):
# Remove quotes and handle escape sequences
value = bytes(value[1:-1], "utf-8").decode("unicode_escape")
parsed_query[key] = value
return parsed_query

@ -0,0 +1,141 @@
from typing import Literal
import pyparsing
from PIL import Image, ImageDraw, ImageFont
from PIL.ImageColor import getrgb
from imaginairy.utils.img_utils import create_halo_effect
from imaginairy.utils.paths import PKG_ROOT
from imaginairy.utils.spaced_kv_parser import parse_spaced_key_value_pairs
def determine_max_font_size(
text: str,
draw: ImageDraw.ImageDraw,
font_path: str,
width: int,
height: int,
margin_pct: float,
line_spacing: int = 4,
) -> int:
"""
Determine the maximum font size that allows the text to fit within the given image dimensions and margin constraints.
Updated to use multiline_textbbox in Pillow 10.1.0.
:param text: Text to be drawn.
:param draw: ImageDraw object to measure text size.
:param font_path: Path to the font file.
:param width: Width of the image.
:param height: Height of the image.
:param margin_pct: Margin percentage.
:return: Maximum font size.
"""
max_width = width - 2 * (width * margin_pct)
max_height = height - 2 * (height * margin_pct)
font_size = 1
font = ImageFont.truetype(font_path, font_size)
while True:
# Use multiline_textbbox to get the bounding box of the text
bbox = draw.multiline_textbbox((0, 0), text, font=font, spacing=line_spacing)
text_width = bbox[2] - bbox[0] # right - left
text_height = bbox[3] - bbox[1] # bottom - top
if text_width > max_width or text_height > max_height:
break
font_size += 1
font = ImageFont.truetype(font_path, font_size)
# Subtract 1 because the loop exits after the size becomes too large
return font_size - 1
def generate_word_image(
text: str,
width: int,
height: int,
margin_pct: float = 0.1,
line_spacing: int = 4,
text_align: Literal["left", "center", "right"] = "center",
font_path: str = f"{PKG_ROOT}/data/DejaVuSans.ttf",
font_color: str = "black",
background_color: str = "white",
) -> Image.Image:
image = Image.new("RGB", (width, height), color=background_color)
draw = ImageDraw.Draw(image)
max_font_size = determine_max_font_size(
text, draw, font_path, width, height, margin_pct, line_spacing=line_spacing
)
font = ImageFont.truetype(font_path, max_font_size)
bbox = draw.multiline_textbbox((0, 0), text, font=font)
# Calculate text position
text_width = bbox[2] - bbox[0]
text_height = bbox[3] - bbox[1]
x = (width - text_width) / 2
y = (height - text_height) / 2 - bbox[1]
draw.multiline_text(
(x, y), text, fill=font_color, font=font, align=text_align, spacing=line_spacing
)
return image
def image_from_textimg_str(text: str, width: int, height: int) -> Image.Image:
"""
Create an image from a textimg string.
"""
try:
data = parse_spaced_key_value_pairs(text)
except pyparsing.ParseException:
raise ValueError("Invalid format for textimg") # noqa
first_key = next(iter(data))
if first_key != "textimg":
raise ValueError("Invalid format for textimg")
allowed_keys = {
"textimg",
"font",
"font_color",
"background_color",
"text_align",
"line_spacing",
"margin_pct",
"halo",
}
submitted_keys = set(data.keys())
invalid_keys = submitted_keys - allowed_keys
if invalid_keys:
msg = f"Invalid attributes for textimg: '{invalid_keys}'. Valid attributes are '{allowed_keys}'"
raise ValueError(msg)
text_align = data.get("text_align", "center")
valid_alignments = {"left", "center", "right"}
if text_align not in valid_alignments:
msg = f"Invalid text_align '{text_align}'. Valid options are 'left', 'center' and 'right'"
raise ValueError(msg)
assert text_align in valid_alignments
background_color: str = data.get("background_color", "white")
img = generate_word_image(
text=data["textimg"].replace("\\n", "\n"),
width=width,
height=height,
margin_pct=float(data.get("margin_pct", 0.1)),
line_spacing=int(data.get("line_spacing", 4)),
text_align=text_align, # type: ignore
font_path=data.get("font", f"{PKG_ROOT}/data/DejaVuSans.ttf"),
font_color=data.get("font_color", "black"),
background_color=background_color,
)
bg_color_rgb = getrgb(background_color)
if data.get("halo", "0").lower() in ("true", "1", "yes"):
img = create_halo_effect(img, background_color=bg_color_rgb)
return img

Binary file not shown.

After

Width:  |  Height:  |  Size: 205 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 40 KiB

@ -0,0 +1,14 @@
from imaginairy.utils.img_utils import create_halo_effect
from imaginairy.utils.text_image import generate_word_image
from tests import TESTS_FOLDER
def test_create_halo_effect():
"""Test if the image has the correct dimensions"""
width, height = 1920, 1080
bg_shade = 245
img = generate_word_image("OBEY", width, height, font_color="black")
img.save(f"{TESTS_FOLDER}/data/obey.png")
img = create_halo_effect(img, (bg_shade, bg_shade, bg_shade))
img.save(f"{TESTS_FOLDER}/data/obey-halo.png")

@ -0,0 +1,88 @@
import pyparsing as pp
import pytest
from pyparsing import ParseException
from imaginairy.utils.spaced_kv_parser import parse_spaced_key_value_pairs
def test_basic_parsing():
input_str = "text='Hello World' font='Arial' size=12 color='#FF0000'"
expected = {
"text": "Hello World",
"font": "Arial",
"size": "12",
"color": "#FF0000",
}
try:
assert parse_spaced_key_value_pairs(input_str) == expected
except pp.ParseException as e:
print(e.explain())
raise
def test_unquoted_values():
input_str = "width=800 height=600 bg_color=#FFFFFF"
expected = {"width": "800", "height": "600", "bg_color": "#FFFFFF"}
assert parse_spaced_key_value_pairs(input_str) == expected
def test_mixed_quoted_unquoted():
input_str = "title='My Title' resolution=1080p"
expected = {"title": "My Title", "resolution": "1080p"}
assert parse_spaced_key_value_pairs(input_str) == expected
def test_empty_string():
input_str = ""
expected = {}
assert parse_spaced_key_value_pairs(input_str) == expected
def test_invalid_format():
input_str = "This is not a valid format"
with pytest.raises(ParseException): # noqa
parse_spaced_key_value_pairs(input_str)
def test_only_keys():
input_str = "key1= key2="
expected = {"key1": "", "key2": ""}
assert parse_spaced_key_value_pairs(input_str) == expected
challenging_test_queries = [
("foo=\"bar'baz\" bar='foo\"bar'", {"foo": "bar'baz", "bar": 'foo"bar'}),
("foo=\"'bar'\" bar='\"baz\"'", {"foo": "'bar'", "bar": '"baz"'}),
("foo=\"bar\\\"baz\" bar='foo\\'bar'", {"foo": 'bar\\"baz', "bar": "foo\\'bar"}),
(
'special=👍 emoji="😀 😃" text="This is a test\\nwith newline"',
{"special": "👍", "emoji": "😀 😃", "text": "This is a test\\nwith newline"},
),
('name=" John Doe " age=" 30 "', {"name": " John Doe ", "age": " 30 "}),
("special=@@!!", {"special": "@@!!"}),
(
'text="This is a test\\\\nwith incomplete escape\\\\"',
{"text": "This is a test\\\\nwith incomplete escape\\\\"},
),
("foo= bar=", {"foo": "", "bar": ""}),
("foo= bar=30.4 zab=-1.2", {"foo": "", "bar": "30.4", "zab": "-1.2"}),
("", {}),
(
'foo="bar" baz=\'qux\' specialChars="@@!!" empty= complex="\'This is a \\"complex\\" string\'"',
{
"foo": "bar",
"baz": "qux",
"specialChars": "@@!!",
"empty": "",
"complex": "'This is a \\\"complex\\\" string'",
},
),
]
@pytest.mark.parametrize(("query", "expected"), challenging_test_queries)
def test_challenging_queries(query, expected):
data = parse_spaced_key_value_pairs(query)
assert data == expected
Loading…
Cancel
Save