feature: prompt expansion (#51)

You can use `{}` to randomly pull values from lists. A list of values separated by `|` and enclosed in `{ }` will be randomly drawn from in a non-repeating fashion. Values that are surrounded by `_ _` will pull from a phrase list of the same name. Folders containing .txt phraselist files may be specified via `--prompt_library_path`. The option may be specified multiple times. Built-in categories: 3d-term, adj-architecture, adj-beauty, adj-detailed, adj-emotion, adj-general, adj-horror, animal, art-movement, art-site, artist, artist-botanical, artist-surreal, aspect-ratio, bird, body-of-water, body-pose, camera-brand, camera-model, color, cosmic-galaxy, cosmic-nebula, cosmic-star, cosmic-term, dinosaur, eyecolor, f-stop, fantasy-creature, fantasy-setting, fish, flower, focal-length, food, fruit, games, gen-modifier, hair, hd, iso-stop, landscape-type, national-park, nationality, neg-weight, noun-beauty, noun-fantasy, noun-general, noun-horror, occupation, photo-term, pop-culture, pop-location, punk-style, quantity, rpg-item, scenario-desc, skin-color, spaceship, style, tree-species, trippy, world-heritage-site Examples: `imagine "a {red|black} dog" -r 2 --seed 0` will generate both "a red dog" and "a black dog" `imagine "a {_color_} dog" -r 4 --seed 0` will generate four, different colored dogs. The colors will eb pulled from an included phraselist of colors. `imagine "a {_spaceship_|_fruit_|hot air balloon}. low-poly" -r 4 --seed 0` will generate images of spaceships or fruits or a hot air balloon Credit to [noodle-soup-prompts](https://github.com/WASasquatch/noodle-soup-prompts/) where most, but not all, of the wordlists originate.
2 years ago · 31c2160e21
parent 9ba302a5f4
commit 31c2160e21
77 changed files with 395 additions and 7 deletions
--- a/9
+++ b/9
@ -124,6 +124,15 @@ vendorize_kdiffusion:
 	sed -i '' -e 's#x = x + torch.randn_like(x) \* sigma_up#x = x + torch.randn_like(x, device="cpu").to(x.device) \* sigma_up#g' imaginairy/vendored/k_diffusion/sampling.py
 	make af

+vendorize_noodle_soup:
+	make download_repo REPO=git@github.com:WASasquatch/noodle-soup-prompts.git PKG=noodle-soup-prompts COMMIT=5642feb4d0e1340b9d145f5ff64f2b57eab1ae71
+	mkdir -p ./imaginairy/vendored/noodle_soup_prompts
+	rm ./imaginairy/vendored/noodle_soup_prompts/*
+	mv ./downloads/noodle-soup-prompts/LICENSE ./imaginairy/vendored/noodle_soup_prompts/
+	python scripts/prep_vocab_lists.py
+	make af
+
+
 vendorize:  ## vendorize a github repo.  `make vendorize REPO=git@github.com:openai/CLIP.git PKG=clip`
 	mkdir -p ./downloads
 	-cd ./downloads && git clone $(REPO) $(PKG)
--- a/README.md
+++ b/README.md
@ -105,7 +105,37 @@ operators also work.  When writing strength modifies know that pixel values are
 <img src="https://raw.githubusercontent.com/brycedrennan/imaginAIry/master/tests/data/girl_with_a_pearl_earring.jpg" height="256"> ➡️ 
 <img src="https://raw.githubusercontent.com/brycedrennan/imaginAIry/master/assets/000105_33084057_DDIM40_PS7.5_portrait_of_a_smiling_lady._oil_painting._.jpg" height="256"> 

-### Generate image captions
+### Prompt Expansion
+You can use `{}` to randomly pull values from lists.  A list of values separated by `|` 
+ and enclosed in `{ }` will be randomly drawn from in a non-repeating fashion. Values that are surrounded by `_ _` will 
+ pull from a phrase list of the same name.   Folders containing .txt phraselist files may be specified via
+`--prompt_library_path`. The option may be specified multiple times.  Built-in categories:
+    
+      3d-term, adj-architecture, adj-beauty, adj-detailed, adj-emotion, adj-general, adj-horror, animal, art-movement, 
+      art-site, artist, artist-botanical, artist-surreal, aspect-ratio, bird, body-of-water, body-pose, camera-brand,
+      camera-model, color, cosmic-galaxy, cosmic-nebula, cosmic-star, cosmic-term, dinosaur, eyecolor, f-stop, 
+      fantasy-creature, fantasy-setting, fish, flower, focal-length, food, fruit, games, gen-modifier, hair, hd,
+      iso-stop, landscape-type, national-park, nationality, neg-weight, noun-beauty, noun-fantasy, noun-general, 
+      noun-horror, occupation, photo-term, pop-culture, pop-location, punk-style, quantity, rpg-item, scenario-desc, 
+      skin-color, spaceship, style, tree-species, trippy, world-heritage-site
+
+   Examples:
+
+   `imagine "a {lime|blue|silver|aqua} colored dog" -r 2 --seed 0` will generate both "a red dog" and "a black dog"
+
+<img src="https://raw.githubusercontent.com/brycedrennan/imaginAIry/master/assets/000184_0_plms40_PS7.5_a_silver_colored_dog_[generated].jpg" height="256">
+<img src="https://raw.githubusercontent.com/brycedrennan/imaginAIry/master/assets/000186_0_plms40_PS7.5_a_aqua_colored_dog_[generated].jpg" height="256"> 
+<img src="https://raw.githubusercontent.com/brycedrennan/imaginAIry/master/assets/000210_0_plms40_PS7.5_a_lime_colored_dog_[generated].jpg" height="256"> 
+<img src="https://raw.githubusercontent.com/brycedrennan/imaginAIry/master/assets/000211_0_plms40_PS7.5_a_blue_colored_dog_[generated].jpg" height="256"> 
+
+   `imagine "a {_color_} dog" -r 4 --seed 0` will generate four, different colored dogs. The colors will eb pulled from an included 
+   phraselist of colors.
+    
+   `imagine "a {_spaceship_|_fruit_|hot air balloon}. low-poly" -r 4 --seed 0` will generate images of spaceships or fruits or a hot air balloon
+
+   Credit to [noodle-soup-prompts](https://github.com/WASasquatch/noodle-soup-prompts/) where most, but not all, of the wordlists originate.
+
+### Generate image captions (via [BLIP](https://github.com/salesforce/BLIP))
 ```bash
 >> aimg describe assets/mask_examples/bowl001.jpg
 a bowl full of gold bars sitting on a table
@ -185,6 +215,10 @@ docker run -it --gpus all -v $HOME/.cache/huggingface:/root/.cache/huggingface -

 ## ChangeLog

+**2.4.0**
+- 🎉 feature: prompt expansion
+- feature: make (blip) photo captions more descriptive
+
 **2.3.1**
 - fix: face fidelity default was broken

@ -258,6 +292,7 @@ would be uncorrelated to the rest of the surrounding image.  It created terrible
 ## Not Supported
 - a GUI. this is a python library
 - training
+ - exploratory features that don't work well

 ## Todo

@ -279,7 +314,7 @@ would be uncorrelated to the rest of the surrounding image.  It created terrible
   - delete more unused code
 - Interface improvements
   - ✅ init-image at command line
-   - prompt expansion
+   - ✅ prompt expansion
   - ✅ interactive cli
 - Image Generation Features
   - ✅ add k-diffusion sampling methods
--- a/assets/000184_0_plms40_PS7.5_a_silver_colored_dog_[generated].jpg
+++ b/assets/000184_0_plms40_PS7.5_a_silver_colored_dog_[generated].jpg
--- a/assets/000186_0_plms40_PS7.5_a_aqua_colored_dog_[generated].jpg
+++ b/assets/000186_0_plms40_PS7.5_a_aqua_colored_dog_[generated].jpg
--- a/assets/000210_0_plms40_PS7.5_a_lime_colored_dog_[generated].jpg
+++ b/assets/000210_0_plms40_PS7.5_a_lime_colored_dog_[generated].jpg
--- a/assets/000211_0_plms40_PS7.5_a_blue_colored_dog_[generated].jpg
+++ b/assets/000211_0_plms40_PS7.5_a_blue_colored_dog_[generated].jpg
--- a/imaginairy/cmds.py
+++ b/imaginairy/cmds.py
@ -1,10 +1,12 @@
 import logging.config
+import math

 import click
 from click_shell import shell

 from imaginairy import LazyLoadingImage, generate_caption
 from imaginairy.api import imagine_image_files
+from imaginairy.enhancers.prompt_expansion import expand_prompts
 from imaginairy.samplers.base import SAMPLER_TYPE_OPTIONS
 from imaginairy.schema import ImaginePrompt
 from imaginairy.suppress_logs import suppress_annoying_logs_and_warnings
@ -179,6 +181,13 @@ def configure_logging(level="INFO"):
    type=click.Path(exists=True),
    default=None,
 )
+@click.option(
+    "--prompt-library-path",
+    help="path to folder containing phaselists in txt files. use txt filename in prompt: {_filename_}",
+    type=click.Path(exists=True),
+    default=None,
+    multiple=True,
+)
@click.pass_context
 def imagine_cmd(
    ctx,
@ -208,6 +217,7 @@ def imagine_cmd(
    caption,
    precision,
    model_weights_path,
+    prompt_library_path,
 ):
    """Have the AI generate images. alias:imagine"""
    if ctx.invoked_subcommand is not None:
@ -230,10 +240,18 @@ def imagine_cmd(
    if fix_faces_fidelity is not None:
        fix_faces_fidelity = float(fix_faces_fidelity)
    prompts = []
+    prompt_expanding_iterators = {}
    for _ in range(repeats):
        for prompt_text in prompt_texts:
+            if prompt_text not in prompt_expanding_iterators:
+                prompt_expanding_iterators[prompt_text] = expand_prompts(
+                    n=math.inf,
+                    prompt_text=prompt_text,
+                    prompt_library_paths=prompt_library_path,
+                )
+            prompt_iterator = prompt_expanding_iterators[prompt_text]
            prompt = ImaginePrompt(
-                prompt_text,
+                next(prompt_iterator),
                prompt_strength=prompt_strength,
                init_image=init_image,
                init_image_strength=init_image_strength,
--- a/imaginairy/enhancers/describe_image_blip.py
+++ b/imaginairy/enhancers/describe_image_blip.py
@ -33,7 +33,8 @@ def blip_model():
    return model


-def generate_caption(image):
+def generate_caption(image, min_length=30):
+    """Given an image, return a caption"""
    gpu_image = (
        transforms.Compose(
            [
@ -54,6 +55,6 @@ def generate_caption(image):

    with torch.no_grad():
        caption = blip_model().generate(
-            gpu_image, sample=False, num_beams=3, max_length=20, min_length=5
+            gpu_image, sample=False, num_beams=3, max_length=80, min_length=min_length
        )
    return caption[0]
--- a/imaginairy/enhancers/describe_image_clip.py
+++ b/imaginairy/enhancers/describe_image_clip.py
@ -36,8 +36,8 @@ def find_embed_text_similarity(embed_features, phrases):
    with torch.no_grad():
        text_features = model.encode_text(text)

-    probs = cosine_distance(embed_features, text_features)
-    probs = [float(p) for p in probs.squeeze()]
+    probs = cosine_distance(text_features, embed_features)
+    probs = [float(p) for p in probs.squeeze(dim=0)]
    phrase_probs = list(zip(phrases, probs))
    phrase_probs.sort(key=lambda r: r[1], reverse=True)

--- a/imaginairy/enhancers/phraselists/init.py
+++ b/imaginairy/enhancers/phraselists/init.py
--- a/imaginairy/enhancers/phraselists/color.txt
+++ b/imaginairy/enhancers/phraselists/color.txt
@ -0,0 +1,17 @@
+aqua
+black
+blue
+fuchsia
+gray
+green
+lime
+maroon
+navy
+olive
+purple
+red
+silver
+teal
+white
+yellow
+hot pink
--- a/imaginairy/enhancers/phraselists/spaceship.txt
+++ b/imaginairy/enhancers/phraselists/spaceship.txt
@ -0,0 +1,16 @@
+space shuttle
+x-wing
+tie fighter
+uss enterpise
+death star
+star destoryer
+apollo landing craft
+protoss carrier
+covenant banshee
+covenant phantom
+unsc pelican
+the tardis
+millennium falcon
+Battlestar Galactica
+UFO
+spacex starship
--- a/imaginairy/enhancers/prompt_expansion.py
+++ b/imaginairy/enhancers/prompt_expansion.py
@ -0,0 +1,153 @@
+import gzip
+import os.path
+import random
+import re
+from functools import lru_cache
+from string import Formatter
+
+from imaginairy import PKG_ROOT
+
+DEFAULT_PROMPT_LIBRARY_PATHS = [
+    os.path.join(PKG_ROOT, "vendored", "noodle_soup_prompts"),
+    os.path.join(PKG_ROOT, "enhancers", "phraselists"),
+]
+formatter = Formatter()
+PROMPT_EXPANSION_PATTERN = re.compile(r"[|a-z0-9_ -]+")
+
+
+@lru_cache()
+def prompt_library_filepaths(prompt_library_paths=None):
+    """Return all available category/filepath pairs"""
+    prompt_library_paths = [] if not prompt_library_paths else prompt_library_paths
+    combined_prompt_library_filepaths = {}
+    for prompt_path in DEFAULT_PROMPT_LIBRARY_PATHS + prompt_library_paths:
+        library_prompts = prompt_library_filepath(prompt_path)
+        combined_prompt_library_filepaths.update(library_prompts)
+
+    return combined_prompt_library_filepaths
+
+
+@lru_cache()
+def category_list(prompt_library_paths=None):
+    """Return the names of available phrase-lists"""
+    categories = list(prompt_library_filepaths(prompt_library_paths).keys())
+    categories.sort()
+    return categories
+
+
+@lru_cache()
+def prompt_library_filepath(library_path):
+    lookup = {}
+
+    for filename in os.listdir(library_path):
+        if "." not in filename:
+            continue
+        base_filename, ext = filename.split(".", maxsplit=1)
+        if ext in {"txt.gz", "txt"}:
+            lookup[base_filename.lower()] = os.path.join(library_path, filename)
+    return lookup
+
+
+@lru_cache(maxsize=100)
+def get_phrases(category_name, prompt_library_paths=None):
+    category_name = category_name.lower()
+    lookup = prompt_library_filepaths(prompt_library_paths)
+    try:
+        filepath = lookup[category_name]
+    except KeyError as e:
+        raise LookupError(
+            f"'{category_name}' is not a valid prompt expansion category. Could not find the txt file."
+        ) from e
+    _open = open
+    if filepath.endswith(".gz"):
+        _open = gzip.open
+
+    with _open(filepath, "rb") as f:
+        lines = f.readlines()
+        phrases = [line.decode("utf-8").strip() for line in lines]
+    return phrases
+
+
+def expand_prompts(prompt_text, n=1, prompt_library_paths=None):
+    """
+    Replaces {vars} with random samples of corresponding phraselists
+
+    Example:
+        p = "a happy {animal}"
+        prompts = expand_prompts(p, n=2)
+        assert prompts = [
+            "a happy dog",
+            "a happy cat"
+        ]
+
+    """
+    prompt_parts = list(formatter.parse(prompt_text))
+    field_names = []
+    for literal_text, field_name, format_spec, conversion in prompt_parts:  # noqa
+        if field_name:
+            field_name = field_name.lower()
+            if not PROMPT_EXPANSION_PATTERN.match(field_name):
+                raise ValueError(
+                    "Invalid prompt expansion. Only a-z0-9_|- characters permitted. "
+                )
+            field_names.append(field_name)
+
+    phrases = []
+    for field_name in field_names:
+        field_phrases = []
+        expansion_tokens = [t.strip() for t in field_name.split("|")]
+        for token in expansion_tokens:
+            token = token.strip()
+            if token.startswith("_") and token.endswith("_"):
+                category_name = token.strip("_")
+                category_phrases = get_phrases(
+                    category_name, prompt_library_paths=prompt_library_paths
+                )
+                field_phrases.extend(category_phrases)
+            else:
+                field_phrases.append(token)
+        phrases.append(field_phrases)
+
+    for values in get_random_non_repeating_combination(n, *phrases):
+        # value_lookup = zip(field_names, values)
+        field_count = 0
+        output_prompt = ""
+        for literal_text, field_name, format_spec, conversion in prompt_parts:
+
+            output_prompt += literal_text
+            if field_name:
+                output_prompt += values[field_count]
+                field_count += 1
+        yield output_prompt
+
+
+def get_random_non_repeating_combination(  # noqa
+    n=1, *sequences, allow_oversampling=True
+):
+    """
+    Efficiently return a non-repeating random sample of the product sequences.
+
+    Will repeat if n > num_total_possible combinations and allow_oversampling=True
+
+    Will also potentially repeat after 1_000_000 combinations.
+    """
+    n_combinations = 1
+    for sequence in sequences:
+        n_combinations *= len(sequence)
+
+    while n > 0:
+        sub_n = n
+        if n > n_combinations and allow_oversampling:
+            sub_n = n_combinations
+        sub_n = min(1_000_000, sub_n)
+
+        indices = random.sample(range(n_combinations), sub_n)
+
+        for idx in indices:
+            values = []
+            for sequence in sequences:
+                seq_idx = idx % len(sequence)
+                values.append(sequence[seq_idx])
+                idx = idx // len(sequence)
+            yield values
+        n -= sub_n
--- a/imaginairy/vendored/noodle_soup_prompts/3d-term.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/3d-term.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/LICENSE
+++ b/imaginairy/vendored/noodle_soup_prompts/LICENSE
@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2022 Jordan Thompson
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/imaginairy/vendored/noodle_soup_prompts/adj-architecture.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/adj-architecture.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/adj-beauty.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/adj-beauty.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/adj-detailed.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/adj-detailed.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/adj-emotion.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/adj-emotion.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/adj-general.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/adj-general.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/adj-horror.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/adj-horror.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/animal.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/animal.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/art-movement.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/art-movement.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/art-site.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/art-site.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/artist-botanical.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/artist-botanical.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/artist-surreal.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/artist-surreal.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/artist.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/artist.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/aspect-ratio.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/aspect-ratio.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/bird.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/bird.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/body-of-water.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/body-of-water.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/body-pose.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/body-pose.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/camera-brand.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/camera-brand.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/camera-model.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/camera-model.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/cosmic-galaxy.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/cosmic-galaxy.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/cosmic-nebula.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/cosmic-nebula.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/cosmic-star.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/cosmic-star.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/cosmic-term.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/cosmic-term.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/dinosaur.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/dinosaur.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/eyecolor.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/eyecolor.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/f-stop.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/f-stop.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/fantasy-creature.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/fantasy-creature.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/fantasy-setting.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/fantasy-setting.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/fish.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/fish.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/flower.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/flower.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/focal-length.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/focal-length.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/food.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/food.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/fruit.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/fruit.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/gen-modifier.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/gen-modifier.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/hair.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/hair.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/hd.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/hd.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/iso-stop.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/iso-stop.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/landscape-type.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/landscape-type.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/national-park.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/national-park.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/nationality.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/nationality.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/neg-weight.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/neg-weight.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/noun-beauty.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/noun-beauty.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/noun-fantasy.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/noun-fantasy.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/noun-general.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/noun-general.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/noun-horror.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/noun-horror.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/occupation.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/occupation.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/photo-term.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/photo-term.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/pop-culture.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/pop-culture.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/pop-location.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/pop-location.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/punk-style.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/punk-style.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/quantity.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/quantity.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/rpg-Item.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/rpg-Item.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/scenario-desc.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/scenario-desc.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/skin-color.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/skin-color.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/style.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/style.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/tree-species.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/tree-species.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/trippy.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/trippy.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/video-game.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/video-game.txt.gz
--- a/imaginairy/vendored/noodle_soup_prompts/world-heritage-site.txt.gz
+++ b/imaginairy/vendored/noodle_soup_prompts/world-heritage-site.txt.gz
--- a/scripts/prep_vocab_lists.py
+++ b/scripts/prep_vocab_lists.py
@ -0,0 +1,72 @@
+import gzip
+import json
+import os.path
+import time
+from contextlib import contextmanager
+
+CURDIR = os.path.dirname(__file__)
+
+excluded_prefixes = ["identity", "gender", "body", "celeb", "color"]
+excluded_words = {
+    "sex",
+    "sexy",
+    "sex appeal",
+    "sex symbol",
+    "young",
+    "youth",
+    "youthful",
+    "child",
+    "baby",
+}
+category_renames = {
+    "3d-terms": "3d-term",
+    "animals": "animal",
+    "camera": "camera-model",
+    "camera-manu": "camera-brand",
+    "cosmic-terms": "cosmic-term",
+    "details": "adj-detailed",
+    "foods": "food",
+    "games": "video-game",
+    "movement": "art-movement",
+    "noun-emote": "adj-emotion",
+    "natl-park": "national-park",
+    "portrait-type": "body-pose",
+    "punk": "punk-style",
+    "site": "art-site",
+    "tree": "tree-species",
+    "water": "body-of-water",
+    "wh-site": "world-heritage-site",
+}
+
+
+@contextmanager
+def timed(description):
+    start = time.perf_counter()
+    yield
+    end = time.perf_counter()
+    duration = end - start
+    print(f"{description} {duration:2f}")
+
+
+def make_txts():
+    src_json = f"{CURDIR}/../downloads/noodle-soup-prompts/nsp_pantry.json"
+    dst_folder = f"{CURDIR}/../imaginairy/vendored/noodle_soup_prompts"
+    with open(src_json, "r", encoding="utf-8") as f:
+        prompts = json.load(f)
+    categories = []
+    for c in prompts.keys():
+        if any(c.startswith(p) for p in excluded_prefixes):
+            continue
+        categories.append(c)
+    categories.sort()
+    for c in categories:
+        print((c, len(prompts[c])))
+        filtered_phrases = [p.lower() for p in prompts[c] if p not in excluded_words]
+        renamed_c = category_renames.get(c, c)
+        with gzip.open(f"{dst_folder}/{renamed_c}.txt.gz", "wb") as f:
+            for p in filtered_phrases:
+                f.write(f"{p}\n".encode("utf-8"))
+
+
+if __name__ == "__main__":
+    make_txts()
--- a/setup.py
+++ b/setup.py
@ -25,9 +25,11 @@ setup(
    package_data={
        "imaginairy": [
            "configs/*.yaml",
+            "enhancers/wordlists/*.txt",
            "vendored/clip/*.txt.gz",
            "vendored/clipseg/*.pth",
            "vendored/blip/configs/*.*",
+            "vendored/noodle_soup_prompts/*.*",
        ]
    },
    install_requires=[
--- a/tests/enhancers/init.py
+++ b/tests/enhancers/init.py
--- a/tests/enhancers/test_prompt_expansion.py
+++ b/tests/enhancers/test_prompt_expansion.py
@ -0,0 +1,44 @@
+from imaginairy.enhancers.prompt_expansion import category_list, expand_prompts
+
+
+def test_prompt_expander_basic():
+    prompt = "a {red|blue|hot pink} dog"
+    prompts = list(expand_prompts(prompt, n=3))
+    # should output each possibility exactly once
+    expected = ["a blue dog", "a hot pink dog", "a red dog"]
+    prompts.sort()
+    expected.sort()
+    assert prompts == expected
+
+
+def test_prompt_expander_from_wordlist():
+    prompt = "a {_color_|golden} dog"
+    prompts = list(expand_prompts(prompt, n=18))
+    # should output each possibility exactly once
+    expected = [
+        "a aqua dog",
+        "a black dog",
+        "a blue dog",
+        "a fuchsia dog",
+        "a golden dog",
+        "a gray dog",
+        "a green dog",
+        "a hot pink dog",
+        "a lime dog",
+        "a maroon dog",
+        "a navy dog",
+        "a olive dog",
+        "a purple dog",
+        "a red dog",
+        "a silver dog",
+        "a teal dog",
+        "a white dog",
+        "a yellow dog",
+    ]
+    prompts.sort()
+    expected.sort()
+    assert prompts == expected
+
+
+def test_get_phraselist_names():
+    print(", ".join(category_list()))