fix: try to address #13 BFloat16 issue

Seems to be caused by incompatible types in group_norm when we use autocast. Patch group_norm to cast the weights to the same type as the inputs From what I can understand all the other repos just switch to full precision instead of addressing this. I think this would make things slower but I'm not sure. So maybe the patching solution I'm doing is better? https://github.com/pytorch/pytorch/pull/81852
2 years ago · 09bc1c70e6
parent e23e363bf5
commit 09bc1c70e6
4 changed files with 59 additions and 4 deletions
--- a/6
+++ b/6
@ -51,6 +51,12 @@ deploy:  ## Deploy the package to pypi.org
 	rm -rf dist
 	@echo "Deploy successful! ✨ 🍰 ✨"
 build-dev-image:
 	docker build -f tests/Dockerfile -t imaginairy-dev .
 run-dev: build-dev-image
 	docker run -it -v $$HOME/.cache/huggingface:/root/.cache/huggingface -v $$HOME/.cache/torch:/root/.cache/torch -v `pwd`/outputs:/outputs imaginairy-dev /bin/bash
 requirements:  ## Freeze the requirements.txt file
 	pip-compile setup.py requirements-dev.in --output-file=requirements-dev.txt --upgrade
--- a/imaginairy/api.py
+++ b/imaginairy/api.py
@ -29,6 +29,7 @@ from imaginairy.samplers.base import get_sampler
 from imaginairy.schema import ImaginePrompt, ImagineResult
 from imaginairy.utils import (
    expand_mask,
    fix_torch_group_norm,
    fix_torch_nn_layer_norm,
    get_device,
    instantiate_from_config,
@ -156,13 +157,16 @@ def imagine(
    prompts = [ImaginePrompt(prompts)] if isinstance(prompts, str) else prompts
    prompts = [prompts] if isinstance(prompts, ImaginePrompt) else prompts
    _img_callback = None
-
+    if get_device() == "cpu":
        logger.info("Running in CPU mode. it's gonna be slooooooow.")
    precision_scope = (
        autocast
        if precision == "autocast" and get_device() in ("cuda", "cpu")
        else nullcontext
    )
-    with torch.no_grad(), precision_scope(get_device()), fix_torch_nn_layer_norm():
+    with torch.no_grad(), precision_scope(
        get_device()
    ), fix_torch_nn_layer_norm(), fix_torch_group_norm():
        for prompt in prompts:
            with ImageLoggingContext(
                prompt=prompt,
--- a/imaginairy/cmds.py
+++ b/imaginairy/cmds.py
@ -149,6 +149,12 @@ def configure_logging(level="INFO"):
    is_flag=True,
    help="Generate a text description of the generated image",
 )
@click.option(
    "--precision",
    help="evaluate at this precision",
    type=click.Choice(["full", "autocast"]),
    default="autocast",
 )
@click.pass_context
 def imagine_cmd(
    ctx,
@ -174,6 +180,7 @@ def imagine_cmd(
    mask_mode,
    mask_expansion,
    caption,
    precision,
 ):
    """Have the AI generate images. alias:imagine"""
    if ctx.invoked_subcommand is not None:
@ -220,6 +227,7 @@ def imagine_cmd(
        record_step_images="images" in show_work,
        output_file_extension="png",
        print_caption=caption,
        precision=precision,
    )
--- a/imaginairy/utils.py
+++ b/imaginairy/utils.py
@ -2,7 +2,7 @@ import importlib
 import logging
 import os.path
 import platform
-from contextlib import contextmanager
+from contextlib import contextmanager, nullcontext
 from functools import lru_cache
 from typing import List, Optional
@ -10,7 +10,7 @@ import numpy as np
 import requests
 import torch
 from PIL import Image, ImageFilter
-from torch import Tensor
+from torch import Tensor, autocast
 from torch.nn import functional
 from torch.overrides import handle_torch_function, has_torch_function_variadic
 from transformers import cached_path
@ -104,6 +104,43 @@ def fix_torch_nn_layer_norm():
        functional.layer_norm = orig_function
@contextmanager
 def fix_torch_group_norm():
    """
    Patch group_norm to cast the weights to the same type as the inputs
    From what I can understand all the other repos just switch to full precision instead
    of addressing this.  I think this would make things slower but I'm not sure.
    https://github.com/pytorch/pytorch/pull/81852
    """
    orig_group_norm = functional.group_norm
    def _group_norm_wrapper(
        input: Tensor,
        num_groups: int,
        weight: Optional[Tensor] = None,
        bias: Optional[Tensor] = None,
        eps: float = 1e-5,
    ) -> Tensor:
        if weight is not None and weight.dtype != input.dtype:
            weight = weight.to(input.dtype)
        if bias is not None and bias.dtype != input.dtype:
            bias = bias.to(input.dtype)
        return orig_group_norm(
            input=input, num_groups=num_groups, weight=weight, bias=bias, eps=eps
        )
    functional.group_norm = _group_norm_wrapper
    try:
        yield
    finally:
        functional.group_norm = orig_group_norm
 def expand_mask(mask_image, size):
    if size < 0:
        threshold = 0.95