feature: added --size parameter to allow using named sizes

2024-10-31 03:20:40 +00:00 · 2023-12-04 08:31:51 -08:00 · 2023-12-04 08:31:51 -08:00 · 71d4992dca
commit 71d4992dca
parent 14ecf93c6a
7 changed files with 135 additions and 3 deletions
--- a/README.md
+++ b/README.md
@ -75,12 +75,13 @@ Options:
 ### Whats New
 **14.0.0**
 - 🎉 video generation using [Stable Video Diffusion](https://github.com/Stability-AI/generative-models)
- - add `--videogen` to any image generation to create a short video from the generated image
- - or use `aimg videogen` to generate a video from an image
+  - add `--videogen` to any image generation to create a short video from the generated image
+  - or use `aimg videogen` to generate a video from an image
 - 🎉 imaginairy is now backed by the [refiners library](https://github.com/finegrain-ai/refiners)
  - This was a huge rewrite which is why some features are not yet supported.  On the plus side, refiners supports
 cutting edge features (SDXL, image prompts, etc) which will be added to imaginairy soon.
  - [self-attention guidance](https://github.com/SusungHong/Self-Attention-Guidance) which makes details of images more accurate
+- feature: added `--size` parameter for more intuitive sizing (e.g. 512, 256x256, 4k, uhd, FHD, VGA, etc)
 - feature: better logging output: color, error handling
 - feature: support for pytorch 2.0
 - deprecated: support for python 3.8, 3.9
--- a/docs/todo.md
+++ b/docs/todo.md
@ -8,11 +8,13 @@
 - add --size parameter that accepts strings (e.g. 256x256, 4k, uhd, 8k, etc)
 - detect if cuda torch missing and give better error message
 - add method to install correct torch version
- - add composition cutoff parameter
 - allow selection of output video format
 - chain multiple operations together imggen => videogen
 - make sure terminal output on windows doesn't suck
 - add karras schedule to refiners
+ - add method to show cache size
+ - add method to clear model cache
+ - add method to clear cached items not recently used (does diffusers have one?)

 ### Old Todo

--- a/imaginairy/cli/edit.py
+++ b/imaginairy/cli/edit.py
@ -55,6 +55,7 @@ def edit_cmd(
    repeats,
    height,
    width,
+    size,
    steps,
    seed,
    upscale,
@ -117,6 +118,7 @@ def edit_cmd(
        repeats,
        height,
        width,
+        size,
        steps,
        seed,
        upscale,
--- a/imaginairy/cli/imagine.py
+++ b/imaginairy/cli/imagine.py
@ -74,6 +74,7 @@ def imagine_cmd(
    repeats,
    height,
    width,
+    size,
    steps,
    seed,
    upscale,
@ -161,6 +162,7 @@ def imagine_cmd(
        repeats,
        height,
        width,
+        size,
        steps,
        seed,
        upscale,
--- a/imaginairy/cli/shared.py
+++ b/imaginairy/cli/shared.py
@ -36,6 +36,7 @@ def _imagine_cmd(
    repeats,
    height,
    width,
+    size,
    steps,
    seed,
    upscale,
@ -95,6 +96,15 @@ def _imagine_cmd(

    configure_logging(log_level)

+    if (height is not None or width is not None) and size is not None:
+        msg = "You cannot specify both --size and --height/--width. Please choose one."
+        raise ValueError(msg)
+
+    if size is not None:
+        from imaginairy.utils.named_resolutions import get_named_resolution
+
+        width, height = get_named_resolution(size)
+
    init_images = [init_image] if isinstance(init_image, str) else init_image

    from imaginairy.utils import glob_expand_paths
@ -324,6 +334,13 @@ common_options = [
        type=int,
        help="Image width. Should be multiple of 8.",
    ),
+    click.option(
+        "--size",
+        default=None,
+        show_default=True,
+        type=str,
+        help="Image size as a string. Can be a named size or WIDTHxHEIGHT format. Should be multiple of 8. Examples: 512x512, 4k, UHD, 8k, ",
+    ),
    click.option(
        "--steps",
        default=None,
--- a/imaginairy/utils/named_resolutions.py
+++ b/imaginairy/utils/named_resolutions.py
@ -0,0 +1,69 @@
+import contextlib
+
+_NAMED_RESOLUTIONS = {
+    "HD": (1280, 720),
+    "FHD": (1920, 1080),
+    "2K": (2048, 1080),
+    "4K": (3840, 2160),
+    "UHD": (3840, 2160),
+    "8K": (7680, 4320),
+    "360p": (640, 360),
+    "VGA": (640, 480),
+    "SVGA": (800, 600),
+    "XGA": (1024, 768),
+    "SXGA": (1280, 1024),
+    "WXGA+": (1440, 900),
+    "HD+": (1600, 900),
+    "UXGA": (1600, 1200),
+    "WSXGA+": (1680, 1050),
+    "WUXGA": (1920, 1200),
+    "QWXGA": (2048, 1152),
+    "QXGA": (2048, 1536),
+    "UWFHD": (2560, 1080),
+    "QHD": (2560, 1440),
+    "WQXGA": (2560, 1600),
+    "UWQHD": (3440, 1440),
+    "240p": (426, 240),
+    "480p": (854, 480),
+    "720p": (1280, 720),
+    "1080p": (1920, 1080),
+    "1440p": (2560, 1440),
+    "2160p": (3840, 2160),
+    "NTSC": (720, 480),
+    "PAL": (720, 576),
+    "QVGA": (320, 240),
+    "WVGA": (800, 480),
+    "FWVGA": (854, 480),
+    "WSVGA": (1024, 600),
+    "HDV": (1440, 1080),
+    "WQHD": (2560, 1440),
+    "UW-UXGA": (2560, 1080),
+    "UHD+": (5120, 2880),
+    "8K UHD": (7680, 4320),
+    "SVD": (1024, 576),  # stable video diffusion
+}
+
+
+def get_named_resolution(resolution: str):
+    resolution = resolution.upper()
+
+    size = _NAMED_RESOLUTIONS.get(resolution)
+
+    if size is None:
+        # is it WIDTHxHEIGHT format?
+        try:
+            width, height = resolution.split("X")
+            size = (int(width), int(height))
+        except ValueError:
+            pass
+
+    if size is None:
+        # is it just a single number?
+        with contextlib.suppress(ValueError):
+            size = (int(resolution), int(resolution))
+
+    if size is None:
+        msg = f"Unknown resolution: {resolution}"
+        raise ValueError(msg)
+
+    return size
--- a/tests/test_utils/test_named_resolutions.py
+++ b/tests/test_utils/test_named_resolutions.py
@ -0,0 +1,39 @@
+import pytest
+
+from imaginairy.utils.named_resolutions import get_named_resolution
+
+valid_cases = [
+    ("HD", (1280, 720)),
+    ("FHD", (1920, 1080)),
+    ("hd", (1280, 720)),
+    ("fhd", (1920, 1080)),
+    ("Hd", (1280, 720)),
+    ("FhD", (1920, 1080)),
+    ("1920x1080", (1920, 1080)),
+    ("1280x720", (1280, 720)),
+    ("1024x768", (1024, 768)),
+    ("800", (800, 800)),
+    ("1024", (1024, 1024)),
+]
+invalid_cases = [
+    "abc",
+    "1920xABC",
+    "1920x1080x1234",
+    "x1920",
+    "123.1",
+    "12x",
+    "x12",
+    "x",
+    "12x12x12x12",
+]
+
+
+@pytest.mark.parametrize(("named_resolution", "expected"), valid_cases)
+def test_named_resolutions(named_resolution, expected):
+    assert get_named_resolution(named_resolution) == expected
+
+
+@pytest.mark.parametrize("named_resolution", invalid_cases)
+def test_invalid_inputs(named_resolution):
+    with pytest.raises(ValueError, match="Unknown resolution"):
+        get_named_resolution(named_resolution)