fix: fix model downloads that were broken

by [library change in transformers 4.27.0](8f3b4a1d5b)
This commit is contained in:
Bryce 2023-03-17 09:40:05 -07:00 committed by Bryce Drennan
parent eb26d5a7c5
commit 37d6642c83
27 changed files with 54 additions and 101 deletions

View File

@ -33,7 +33,7 @@ jobs:
python-version: 3.9
- name: Install dependencies
run: |
python -m pip install --disable-pip-version-check black==22.12.0 isort==5.11.4
python -m pip install --disable-pip-version-check black==23.1.0 isort==5.12.0
- name: Autoformatter
run: |
black --diff .

View File

@ -1,5 +1,5 @@
SHELL := /bin/bash
python_version = 3.10.6
python_version = 3.10.10
venv_prefix = imaginairy
venv_name = $(venv_prefix)-$(python_version)
pyenv_instructions=https://github.com/pyenv/pyenv#installation
@ -32,6 +32,7 @@ autoformat: ## Run the autoformatter.
@# ERA,T201
@-ruff --extend-ignore ANN,ARG001,C90,DTZ,D100,D101,D102,D103,D202,D203,D212,D415,E501,RET504,S101,UP006,UP007 --extend-select C,D400,I,W --unfixable T,ERA --fix-only .
@black .
@isort --atomic --profile black .
test: ## Run the tests.
@pytest

View File

@ -399,10 +399,13 @@ docker run -it --gpus all -v $HOME/.cache/huggingface:/root/.cache/huggingface -
## ChangeLog
**11.1.0**
- docs: add some example use cases
- feature: add art-scene, desktop-background, interior-style, painting-style phraselists
- fix: compilation animations create normal slideshows instead of "bounces"
- fix: file globbing works in the interactive shell
- fix: fix model downloads that were broken by [library change in transformers 4.27.0](https://github.com/huggingface/transformers/commit/8f3b4a1d5bd97045541c43179efe8cd9c58adb76)
**11.0.0**
- all these changes together mean same seed/sampler will not be guaranteed to produce same image (thus the version bump)

View File

@ -12,7 +12,6 @@ def describe_cmd(image_filepaths):
imgs = []
for p in image_filepaths:
if p.startswith("http"):
img = LazyLoadingImage(url=p)
elif os.path.isdir(p):

View File

@ -224,7 +224,6 @@ def add_options(options):
def replace_option(options, option_name, new_option):
for i, option in enumerate(options):
if option.name == option_name:
options[i] = new_option
return

View File

@ -85,7 +85,6 @@ def enhance_faces(img, fidelity=0):
try:
with torch.no_grad():
output = net(cropped_face_t, w=fidelity, adain=True)[0] # noqa
restored_face = tensor2img(output, rgb2bgr=True, min_max=(-1, 1))
del output

View File

@ -113,7 +113,6 @@ def expand_prompts(prompt_text, n=1, prompt_library_paths=None):
field_count = 0
output_prompt = ""
for literal_text, field_name, format_spec, conversion in prompt_parts:
output_prompt += literal_text
if field_name:
output_prompt += values[field_count]

View File

@ -115,7 +115,10 @@ def add_tiles(tiles, base_img, tile_coords, tile_size, overlap):
f_ovlp[1] = 0
t = 0
column, row, = (
(
column,
row,
) = (
0,
0,
)

View File

@ -386,7 +386,6 @@ class bodypose_model(nn.Module):
self.model6_2 = blocks["block6_2"]
def forward(self, x):
out1 = self.model0(x)
out1_1 = self.model1_1(out1)

View File

@ -8,11 +8,11 @@ from functools import wraps
import requests
import torch
from huggingface_hub import HfFolder
from huggingface_hub import hf_hub_download as _hf_hub_download
from huggingface_hub import try_to_load_from_cache
from omegaconf import OmegaConf
from safetensors.torch import load_file
from transformers.utils.hub import HfFolder
from imaginairy import config as iconfig
from imaginairy.config import MODEL_SHORT_NAMES

View File

@ -652,7 +652,6 @@ def merge_tensors(tensor_list, num_rows, num_cols):
print(f"final size {final_tensor.size()}")
for row_idx in range(num_rows):
for col_idx in range(num_cols):
list_idx = row_idx * num_cols + col_idx
chunk = tensor_list[list_idx]
print(f"chunk size: {chunk.size()}")

View File

@ -144,9 +144,9 @@ class ControlNet(nn.Module):
if num_attention_blocks is not None:
assert len(num_attention_blocks) == len(self.num_res_blocks)
assert all(
map(
lambda i: self.num_res_blocks[i] >= num_attention_blocks[i],
range(len(num_attention_blocks)),
(
self.num_res_blocks[i] >= num_attention_blocks[i]
for i in range(len(num_attention_blocks))
)
)
print(

View File

@ -1150,7 +1150,6 @@ class LatentDiffusion(DDPM):
return self.p_losses(x, c, t, *args, **kwargs)
def apply_model(self, x_noisy, t, cond, return_ids=False):
if isinstance(cond, dict):
# hybrid case, cond is expected to be a dict
pass

View File

@ -27,7 +27,6 @@ class DPT(BaseModel):
channels_last=False,
use_bn=False,
):
super().__init__()
self.channels_last = channels_last

View File

@ -524,7 +524,6 @@ def train_diffusion_model(
signal.signal(signal.SIGUSR1, melk)
try:
try:
trainer.fit(model, data)
except Exception:

View File

@ -207,5 +207,8 @@ def glob_expand_paths(paths):
expanded_paths = []
for p in paths:
expanded_paths.extend(glob.glob(p))
if p.startswith("http"):
expanded_paths.append(p)
else:
expanded_paths.extend(glob.glob(p))
return expanded_paths

View File

@ -47,7 +47,6 @@ class BLIP_Base(nn.Module):
self.text_encoder = BertModel(config=med_config, add_pooling_layer=False)
def forward(self, image, caption, mode):
assert mode in [
"image",
"text",
@ -118,7 +117,6 @@ class BLIP_Decoder(nn.Module):
self.prompt_length = len(self.tokenizer(self.prompt).input_ids) - 1
def forward(self, image, caption):
image_embeds = self.visual_encoder(image)
image_atts = torch.ones(image_embeds.size()[:-1], dtype=torch.long).to(
image.device
@ -242,7 +240,6 @@ def init_tokenizer():
def create_vit(
vit, image_size, use_grad_checkpointing=False, ckpt_layer=0, drop_path_rate=0
):
assert vit in ["base", "large"], "vit parameter must be base or large"
if vit == "base":
vision_width = 768

View File

@ -39,7 +39,6 @@ class BLIP_ITM(nn.Module):
self.itm_head = nn.Linear(text_width, 2)
def forward(self, image, caption, match_head="itm"):
image_embeds = self.visual_encoder(image)
image_atts = torch.ones(image_embeds.size()[:-1], dtype=torch.long).to(
image.device

View File

@ -43,7 +43,6 @@ class BLIP_NLVR(nn.Module):
)
def forward(self, image, text, targets, train=True):
image_embeds = self.visual_encoder(image)
image_atts = torch.ones(image_embeds.size()[:-1], dtype=torch.long).to(
image.device

View File

@ -46,7 +46,6 @@ class BLIP_VQA(nn.Module):
inference="rank",
k_test=128,
):
image_embeds = self.visual_encoder(image)
image_atts = torch.ones(image_embeds.size()[:-1], dtype=torch.long).to(
image.device
@ -160,7 +159,6 @@ class BLIP_VQA(nn.Module):
return max_ids
def rank_answer(self, question_states, question_atts, answer_ids, answer_atts, k):
num_ques = question_states.size(0)
start_ids = answer_ids[0, 0].repeat(num_ques, 1) # bos token

View File

@ -474,7 +474,6 @@ class BertEncoder(nn.Module):
past_key_value = past_key_values[i] if past_key_values is not None else None
if self.gradient_checkpointing and self.training:
if use_cache:
logger.warn(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
@ -909,7 +908,6 @@ class BertModel(BertPreTrainedModel):
class BertLMHeadModel(BertPreTrainedModel):
_keys_to_ignore_on_load_unexpected = [r"pooler"]
_keys_to_ignore_on_load_missing = [r"position_ids", r"predictions.decoder.bias"]

View File

@ -519,7 +519,6 @@ class BertEncoder(nn.Module):
past_key_value = past_key_values[i] if past_key_values is not None else None
if self.gradient_checkpointing and self.training:
if use_cache:
logger.warn(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."

View File

@ -7,7 +7,6 @@ from torch.nn import functional as nnf
def precompute_clip_vectors():
from trails.initialization import init_dataset
lvis = init_dataset(
@ -104,7 +103,6 @@ def forward_multihead_attention(x, b, with_aff=False, attn_mask=None):
q, k.transpose(1, 2)
) # n_heads * batch_size, tokens^2, tokens^2
if attn_mask is not None:
attn_mask_type, attn_mask = attn_mask
n_heads = attn_output_weights.size(0) // attn_mask.size(0)
attn_mask = attn_mask.repeat(n_heads, 1)
@ -196,9 +194,7 @@ class CLIPDenseBase(nn.Module):
return torch.cat([self.model.positional_embedding[:1], b])
def visual_forward(self, x_inp, extract_layers=(), skip=False, mask=None):
with torch.no_grad():
x_inp.shape[2:]
if self.n_tokens is not None:
@ -252,7 +248,6 @@ class CLIPDenseBase(nn.Module):
activations, affinities = [], []
for i, res_block in enumerate(self.model.transformer.resblocks):
if mask is not None:
mask_layer, mask_type, mask_tensor = mask
if mask_layer == i or mask_layer == "all":
@ -296,7 +291,6 @@ class CLIPDenseBase(nn.Module):
return x, activations, affinities
def sample_prompts(self, words, prompt_list=None):
prompt_list = prompt_list if prompt_list is not None else self.prompt_list
prompt_indices = torch.multinomial(
@ -420,7 +414,6 @@ class CLIPDensePredT(CLIPDenseBase):
n_tokens=None,
complex_trans_conv=False,
):
super().__init__(version, reduce_cond, reduce_dim, prompt, n_tokens)
# device = 'cpu'
@ -515,7 +508,6 @@ class CLIPDensePredT(CLIPDenseBase):
self.prompt_list = get_prompt_list(prompt)
def forward(self, inp_image, conditional=None, return_features=False, mask=None):
assert type(return_features) == bool
inp_image = inp_image.to(self.model.positional_embedding.device)
@ -543,7 +535,6 @@ class CLIPDensePredT(CLIPDenseBase):
for i, (activation, block, reduce) in enumerate(
zip(_activations, self.blocks, self.reduces)
):
if a is not None:
a = reduce(activation) + a
else:
@ -600,7 +591,6 @@ class CLIPDensePredTMasked(CLIPDensePredT):
add_calibration=False,
n_tokens=None,
):
super().__init__(
version=version,
extract_layers=extract_layers,
@ -622,7 +612,6 @@ class CLIPDensePredTMasked(CLIPDensePredT):
return super().visual_forward(img_s, mask=("all", "cls_token", seg_s))
def forward(self, img_q, cond_or_img_s, seg_s=None, return_features=False):
if seg_s is None:
cond = cond_or_img_s
else:
@ -647,7 +636,6 @@ class CLIPDenseBaseline(CLIPDenseBase):
limit_to_clip_only=False,
n_tokens=None,
):
super().__init__(version, reduce_cond, reduce_dim, prompt, n_tokens)
# self.cond_layer = cond_layer
@ -671,7 +659,6 @@ class CLIPDenseBaseline(CLIPDenseBase):
)
def forward(self, inp_image, conditional=None, return_features=False):
inp_image = inp_image.to(self.model.positional_embedding.device)
# x_inp = normalize(inp_image)
@ -723,12 +710,10 @@ class CLIPSegMultiLabel(nn.Module):
self.clipseg.eval()
def forward(self, x):
bs = x.shape[0]
out = torch.ones(21, bs, 352, 352).to(x.device) * -10
for class_id, class_name in enumerate(self.pascal_classes):
fac = 3 if class_name == "background" else 1
with torch.no_grad():

View File

@ -10,7 +10,7 @@ aiosignal==1.3.1
# via aiohttp
antlr4-python3-runtime==4.9.3
# via omegaconf
astroid==2.14.2
astroid==2.15.0
# via pylint
async-timeout==4.0.2
# via aiohttp
@ -22,7 +22,7 @@ black==23.1.0
# via -r requirements-dev.in
certifi==2022.12.7
# via requests
charset-normalizer==3.0.1
charset-normalizer==3.1.0
# via
# aiohttp
# requests
@ -39,42 +39,42 @@ click-shell==2.1
# via imaginAIry (setup.py)
contourpy==1.0.7
# via matplotlib
coverage==7.1.0
coverage==7.2.2
# via -r requirements-dev.in
cycler==0.11.0
# via matplotlib
diffusers==0.13.1
diffusers==0.14.0
# via imaginAIry (setup.py)
dill==0.3.6
# via pylint
einops==0.6.0
# via imaginAIry (setup.py)
exceptiongroup==1.1.0
exceptiongroup==1.1.1
# via pytest
facexlib==0.2.5
# via imaginAIry (setup.py)
fairscale==0.4.13
# via imaginAIry (setup.py)
filelock==3.9.0
filelock==3.10.0
# via
# diffusers
# huggingface-hub
# transformers
filterpy==1.4.5
# via facexlib
fonttools==4.38.0
fonttools==4.39.2
# via matplotlib
frozenlist==1.3.3
# via
# aiohttp
# aiosignal
fsspec[http]==2023.1.0
fsspec[http]==2023.3.0
# via pytorch-lightning
ftfy==6.1.1
# via
# imaginAIry (setup.py)
# open-clip-torch
huggingface-hub==0.12.1
huggingface-hub==0.13.2
# via
# diffusers
# open-clip-torch
@ -84,7 +84,7 @@ idna==3.4
# via
# requests
# yarl
imageio==2.25.1
imageio==2.26.0
# via imaginAIry (setup.py)
importlib-metadata==6.0.0
# via diffusers
@ -102,11 +102,11 @@ lazy-object-proxy==1.9.0
# via astroid
libcst==0.4.9
# via pycln
lightning-utilities==0.7.0
lightning-utilities==0.8.0
# via pytorch-lightning
llvmlite==0.39.1
# via numba
matplotlib==3.7.0
matplotlib==3.7.1
# via filterpy
mccabe==0.7.0
# via
@ -139,22 +139,11 @@ numpy==1.23.5
# torchmetrics
# torchvision
# transformers
# xformers
nvidia-cublas-cu11==11.10.3.66
# via
# nvidia-cudnn-cu11
# torch
nvidia-cuda-nvrtc-cu11==11.7.99
# via torch
nvidia-cuda-runtime-cu11==11.7.99
# via torch
nvidia-cudnn-cu11==8.5.0.96
# via torch
omegaconf==2.3.0
# via imaginAIry (setup.py)
open-clip-torch==2.14.0
open-clip-torch==2.16.0
# via imaginAIry (setup.py)
opencv-python==4.7.0.68
opencv-python==4.7.0.72
# via
# facexlib
# imaginAIry (setup.py)
@ -182,7 +171,7 @@ pillow==9.4.0
# imaginAIry (setup.py)
# matplotlib
# torchvision
platformdirs==3.0.0
platformdirs==3.1.1
# via
# black
# pylint
@ -204,13 +193,11 @@ pyflakes==3.0.1
# via pylama
pylama==8.4.1
# via -r requirements-dev.in
pylint==2.16.2
pylint==2.17.0
# via -r requirements-dev.in
pyparsing==3.0.9
# via matplotlib
pyre-extensions==0.0.23
# via xformers
pytest==7.2.1
pytest==7.2.2
# via
# -r requirements-dev.in
# pytest-randomly
@ -221,7 +208,7 @@ pytest-sugar==0.9.6
# via -r requirements-dev.in
python-dateutil==2.8.2
# via matplotlib
pytorch-lightning==1.9.2
pytorch-lightning==1.9.4
# via imaginAIry (setup.py)
pyyaml==6.0
# via
@ -230,6 +217,7 @@ pyyaml==6.0
# omegaconf
# pycln
# pytorch-lightning
# responses
# timm
# transformers
regex==2022.10.31
@ -246,11 +234,11 @@ requests==2.28.2
# responses
# torchvision
# transformers
responses==0.22.0
responses==0.23.1
# via -r requirements-dev.in
ruff==0.0.249
ruff==0.0.256
# via -r requirements-dev.in
safetensors==0.2.8
safetensors==0.3.0
# via imaginAIry (setup.py)
scipy==1.10.1
# via
@ -271,8 +259,6 @@ timm==0.6.12
# open-clip-torch
tokenizers==0.13.2
# via transformers
toml==0.10.2
# via responses
tomli==2.0.1
# via
# black
@ -294,10 +280,9 @@ torch==1.13.1
# torchdiffeq
# torchmetrics
# torchvision
# xformers
torchdiffeq==0.2.3
# via imaginAIry (setup.py)
torchmetrics==0.11.1
torchmetrics==0.11.4
# via
# imaginAIry (setup.py)
# pytorch-lightning
@ -307,7 +292,7 @@ torchvision==0.14.1
# imaginAIry (setup.py)
# open-clip-torch
# timm
tqdm==4.64.1
tqdm==4.65.0
# via
# facexlib
# huggingface-hub
@ -315,11 +300,11 @@ tqdm==4.64.1
# open-clip-torch
# pytorch-lightning
# transformers
transformers==4.26.1
transformers==4.27.1
# via imaginAIry (setup.py)
typer==0.7.0
# via pycln
types-toml==0.10.8.5
types-pyyaml==6.0.12.8
# via responses
typing-extensions==4.5.0
# via
@ -327,33 +312,25 @@ typing-extensions==4.5.0
# huggingface-hub
# libcst
# lightning-utilities
# pyre-extensions
# pytorch-lightning
# torch
# torchvision
# typing-inspect
typing-inspect==0.8.0
# via
# libcst
# pyre-extensions
urllib3==1.26.14
# via libcst
urllib3==1.26.15
# via
# requests
# responses
wcwidth==0.2.6
# via ftfy
wheel==0.38.4
# via
# -r requirements-dev.in
# nvidia-cublas-cu11
# nvidia-cuda-runtime-cu11
wrapt==1.14.1
wheel==0.40.0
# via -r requirements-dev.in
wrapt==1.15.0
# via astroid
xformers==0.0.16 ; sys_platform != "darwin"
# via imaginAIry (setup.py)
yarl==1.8.2
# via aiohttp
zipp==3.14.0
zipp==3.15.0
# via importlib-metadata
# The following packages are considered to be unsafe in a requirements file:

View File

@ -92,7 +92,6 @@ def extract_controlnet_essence(control_type, controlnet_url, dest_folder):
final_state_dict = {}
skip_prefixes = ("first_stage_model", "cond_stage_model")
for key in controlnet_state_dict:
if key.startswith(skip_prefixes):
continue

View File

@ -69,14 +69,16 @@ setup(
"facexlib",
"fairscale>=0.4.4", # for vendored blip
"ftfy", # for vendored clip
"torch>=1.13.1",
# 2.0.0 produced garbage images on MacOS
"torch>=1.13.1,<2.0.0",
"numpy",
"tqdm",
"diffusers",
"imageio>=2.9.0",
"Pillow>=8.0.0",
"psutil",
"pytorch-lightning>=1.4.2",
# 2.0.0 need to fix `ImportError: cannot import name 'rank_zero_only' from 'pytorch_lightning.utilities.distributed' `
"pytorch-lightning>=1.4.2,<2.0.0",
"omegaconf>=2.1.1",
"open-clip-torch",
"opencv-python",

View File

@ -90,7 +90,6 @@ def filename_base_for_orig_outputs(request):
@pytest.fixture(params=SAMPLERS_FOR_TESTING)
def sampler_type(request):
return request.param