diff --git a/README.md b/README.md index f21341d..bb4f177 100644 --- a/README.md +++ b/README.md @@ -18,8 +18,6 @@ AI imagined images. Pythonic generation of stable diffusion images **and videos* # be sure to use Python 3.10, Python 3.11 is not supported at the moment >> pip install imaginairy >> imagine "a scenic landscape" "a photo of a dog" "photo of a fruit bowl" "portrait photo of a freckled woman" "a bluejay" -# Make an animation showing the generation process ->> imagine --gif "a flower" # Make an AI video >> aimg videogen --start-image rocket.png ``` @@ -84,15 +82,27 @@ Options: - This was a huge rewrite which is why some features are not yet supported. On the plus side, refiners supports cutting edge features (SDXL, image prompts, etc) which will be added to imaginairy soon. - [self-attention guidance](https://github.com/SusungHong/Self-Attention-Guidance) which makes details of images more accurate +- 🎉 feature: larger image generations now work MUCH better and stay faithful to the same image as it looks at a smaller size. +For example `--size 720p --seed 1` and `--size 1080p --seed 1` will produce the same image for SD15 +- 🎉 feature: loading diffusers based models now supported. Example `--model https://huggingface.co/ainz/diseny-pixar --model-architecture sd15` +- 🎉 feature: qrcode controlnet! +- feature: generate word images automatically. great for use with qrcode controlnet: `imagine "flowers" --gif --size hd --control-mode qrcode --control-image "textimg='JOY' font_color=white background_color=gray" -r 10` +- feature: opendalle 1.1 added. `--model opendalle` to use it - feature: added `--size` parameter for more intuitive sizing (e.g. 512, 256x256, 4k, uhd, FHD, VGA, etc) - feature: detect if wrong torch version is installed and provide instructions on how to install proper version - feature: better logging output: color, error handling - feature: support for pytorch 2.0 +- feature: command line output significantly cleaned up and easier to read +- feature: adds --composition-strength parameter to cli (#416) +- performance: lower memory usage for upscaling +- performance: lower memory usage at startup +- performance: add sliced attention to several models (lowers memory use) +- fix: simpler memory management that avoids some of the previous bugs - deprecated: support for python 3.8, 3.9 - deprecated: support for torch 1.13 - deprecated: support for Stable Diffusion versions 1.4, 2.0, and 2.1 - deprecated: image training -- broken: most samplers, tile/details controlnet, and model memory management +- broken: samplers other than ddim ### Run API server and StableStudio web interface (alpha) Generate images via API or web interface. Much smaller featureset compared to the command line tool. diff --git a/imaginairy/api/generate_refiners.py b/imaginairy/api/generate_refiners.py index 254d9cf..14fc0f3 100644 --- a/imaginairy/api/generate_refiners.py +++ b/imaginairy/api/generate_refiners.py @@ -336,6 +336,7 @@ def generate_single_image( condition_scale=prompt.prompt_strength, **text_conditioning_kwargs, ) + lc.progress_latent_callback(x) # trying to clear memory. not sure if this helps sd.unet.set_context(context="self_attention_map", value={}) sd.unet._reset_context() @@ -423,7 +424,7 @@ def generate_single_image( safety_score=safety_score, result_images=result_images, performance_stats=lc.get_performance_stats(), - progress_latents=[], # todo + progress_latents=progress_latents, ) _most_recent_result = result diff --git a/imaginairy/api/video_sample.py b/imaginairy/api/video_sample.py index f500660..128a36f 100644 --- a/imaginairy/api/video_sample.py +++ b/imaginairy/api/video_sample.py @@ -37,7 +37,7 @@ def generate_video( output_folder: str | None = None, num_frames: int = 6, num_steps: int = 30, - model_name: str = "svd_xt", + model_name: str = "svd-xt", fps_id: int = 6, output_fps: int = 6, motion_bucket_id: int = 127, diff --git a/imaginairy/utils/animations.py b/imaginairy/utils/animations.py index f4fefc2..a6b1aeb 100644 --- a/imaginairy/utils/animations.py +++ b/imaginairy/utils/animations.py @@ -38,11 +38,19 @@ def make_bounce_animation( # convert from latents converted_frames = [] + for frame in frames: if isinstance(frame, torch.Tensor): frame = model_latents_to_pillow_imgs(frame)[0] converted_frames.append(frame) frames = converted_frames + max_size = max([frame.size for frame in frames]) + converted_frames = [] + for frame in frames: + if frame.size != max_size: + frame = frame.resize(max_size) + converted_frames.append(frame) + frames = converted_frames durations = ( [start_pause_duration_ms] diff --git a/setup.py b/setup.py index 4d655fa..82af3e5 100644 --- a/setup.py +++ b/setup.py @@ -44,7 +44,7 @@ setup( name="imaginAIry", author="Bryce Drennan", # author_email="b r y p y d o t io", - version="14.0.0b9", + version="14.0.0", description="AI imagined images. Pythonic generation of images.", long_description=readme, long_description_content_type="text/markdown",