wip: integration of ip-adapter

4 months ago · 16f58e1f8e
parent 9c48b749d8
commit 16f58e1f8e
5 changed files with 66 additions and 0 deletions
--- a/imaginairy/cli/imagine.py
+++ b/imaginairy/cli/imagine.py
@ -83,6 +83,8 @@ def imagine_cmd(
    prompt_strength,
    init_image,
    init_image_strength,
+    image_prompt,
+    image_prompt_strength,
    outdir,
    output_file_extension,
    repeats,
@ -191,6 +193,8 @@ def imagine_cmd(
        prompt_strength=prompt_strength,
        init_image=init_image,
        init_image_strength=init_image_strength,
+        image_prompt=image_prompt,
+        image_prompt_strength=image_prompt_strength,
        outdir=outdir,
        output_file_extension=output_file_extension,
        repeats=repeats,
--- a/imaginairy/cli/shared.py
+++ b/imaginairy/cli/shared.py
@ -35,6 +35,8 @@ def _imagine_cmd(
    prompt_strength,
    init_image,
    init_image_strength,
+    image_prompt,
+    image_prompt_strength,
    outdir,
    output_file_extension,
    repeats,
@ -186,6 +188,8 @@ def _imagine_cmd(
                    prompt_strength=prompt_strength,
                    init_image=_init_image,
                    init_image_strength=init_image_strength,
+                    image_prompt=image_prompt,
+                    image_prompt_strength=image_prompt_strength,
                    control_inputs=control_inputs,
                    seed=seed,
                    solver_type=solver,
@ -312,6 +316,19 @@ common_options = [
        type=float,
        help="Starting image strength. Between 0 and 1.",
    ),
+    click.option(
+        "--image-prompt",
+        metavar="PATH|URL",
+        help="Starting image.",
+        multiple=True,
+    ),
+    click.option(
+        "--image-prompt-strength",
+        default=None,
+        show_default=False,
+        type=float,
+        help="Starting image strength. Between 0 and 1.",
+    ),
    click.option(
        "--outdir",
        default="./outputs",
--- a/imaginairy/image_prompts.py
+++ b/imaginairy/image_prompts.py
--- a/imaginairy/schema.py
+++ b/imaginairy/schema.py
@ -333,6 +333,8 @@ class ImaginePrompt(BaseModel, protected_namespaces=()):
            prompt_strength (float, optional): Strength of the influence of the prompt on the output.
            init_image (LazyLoadingImage, optional): Initial image to base the generation on.
            init_image_strength (float, optional): Strength of the influence of the initial image.
+            image_prompt (LazyLoadingImage, optional): Image to be used as part of the prompt using IP-Adapter.
+            image_prompt_strength (float, optional): Strength of the influence of the prompt_image.
            control_inputs (List[ControlInput], optional): Additional control inputs for image generation.
            mask_prompt (str, optional): Mask prompt for selective area generation.
            mask_image (LazyLoadingImage, optional): Image used for masking.
@ -370,6 +372,8 @@ class ImaginePrompt(BaseModel, protected_namespaces=()):
    init_image_strength: float | None = Field(
        ge=0, le=1, default=None, validate_default=True
    )
+    image_prompt: LazyLoadingImage | None = Field(None, validate_default=True)
+    image_prompt_strength: float | None = Field(ge=0, le=1, default=0.0)
    control_inputs: List[ControlInput] = Field(
        default_factory=list, validate_default=True
    )
@ -411,6 +415,8 @@ class ImaginePrompt(BaseModel, protected_namespaces=()):
        prompt_strength: float | None = 7.5,
        init_image: LazyLoadingImage | None = None,
        init_image_strength: float | None = None,
+        image_prompt: LazyLoadingImage | None = None,
+        image_prompt_strength: float | None = None,
        control_inputs: List[ControlInput] | None = None,
        mask_prompt: str | None = None,
        mask_image: LazyLoadingImage | None = None,
@ -440,6 +446,8 @@ class ImaginePrompt(BaseModel, protected_namespaces=()):
            prompt_strength=prompt_strength,
            init_image=init_image,
            init_image_strength=init_image_strength,
+            image_prompt=image_prompt,
+            image_prompt_strength=image_prompt_strength,
            control_inputs=control_inputs,
            mask_prompt=mask_prompt,
            mask_image=mask_image,
--- a/imaginairy/utils/model_manager.py
+++ b/imaginairy/utils/model_manager.py
@ -204,6 +204,42 @@ def get_diffusion_model_refiners(
    # ensures a "fresh" copy that doesn't have additional injected parts
    sd = sd.structural_copy()

+    # inject ip-adapter (img to img prompt)
+    from PIL import Image
+
+    from imaginairy.vendored.refiners.fluxion.utils import (
+        load_from_safetensors,
+        no_grad,
+    )
+    from imaginairy.vendored.refiners.foundationals.latent_diffusion import (
+        SDXLIPAdapter,
+    )
+
+    image_prompt = Image.open(
+        "/imaginAIry/docs/assets/000032_337692011_PLMS40_PS7.5_a_photo_of_a_dog.jpg"
+    )
+
+    ip_adapter = SDXLIPAdapter(
+        target=sd.unet,
+        weights=load_from_safetensors(
+            "/imaginAIry/imaginairy/utils/ip-adapter_sdxl_vit-h.safetensors"
+        ),
+    )
+    ip_adapter.clip_image_encoder.load_from_safetensors(
+        "/imaginAIry/imaginairy/utils/clip_image.safetensors"
+    )
+    ip_adapter.inject()
+
+    scale = 0.4
+    ip_adapter.set_scale(scale)
+    print(f"SCALE: {scale}")
+
+    with no_grad():
+        clip_image_embedding = ip_adapter.compute_clip_image_embedding(
+            ip_adapter.preprocess_image(image_prompt)
+        )
+        ip_adapter.set_clip_image_embedding(clip_image_embedding)
+
    sd.set_self_attention_guidance(enable=True)

    return sd
@ -222,6 +258,7 @@ def _get_diffusion_model_refiners(

    Weights location may also be shortcut name, e.g. "SD-1.5"
    """
+
    global MOST_RECENTLY_LOADED_MODEL
    _get_diffusion_model_refiners.cache_clear()
    clear_gpu_cache()