From 26acdebafacb303bc6785307fa9c1d4648c5c763 Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Mon, 22 Jan 2024 12:14:55 -0500 Subject: [PATCH] convert: replace GPTJConfig with AutoConfig (#1866) Signed-off-by: Jared Van Bortel --- .../scripts/convert_gptj_to_gguf.py | 4 +- .../scripts/convert_mpt_hf_to_gguf.py | 168 ------------------ .../scripts/convert_replit_v1_hf_to_gguf.py | 145 --------------- 3 files changed, 2 insertions(+), 315 deletions(-) delete mode 100755 gpt4all-backend/scripts/convert_mpt_hf_to_gguf.py delete mode 100755 gpt4all-backend/scripts/convert_replit_v1_hf_to_gguf.py diff --git a/gpt4all-backend/scripts/convert_gptj_to_gguf.py b/gpt4all-backend/scripts/convert_gptj_to_gguf.py index 9b8f3a62..ed3ee576 100755 --- a/gpt4all-backend/scripts/convert_gptj_to_gguf.py +++ b/gpt4all-backend/scripts/convert_gptj_to_gguf.py @@ -27,7 +27,7 @@ from pathlib import Path import gguf import numpy as np -from transformers import AutoTokenizer, GPTJConfig, GPTJForCausalLM +from transformers import AutoConfig, AutoTokenizer, GPTJForCausalLM from transformers.models.gpt2 import tokenization_gpt2 @@ -63,7 +63,7 @@ gguf_writer = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH]) print("gguf: get model metadata") -config = GPTJConfig(dir_model) +config = AutoConfig.from_pretrained(dir_model) block_count = config.n_layer gguf_writer.add_name("GPT-J") diff --git a/gpt4all-backend/scripts/convert_mpt_hf_to_gguf.py b/gpt4all-backend/scripts/convert_mpt_hf_to_gguf.py deleted file mode 100755 index b2688e56..00000000 --- a/gpt4all-backend/scripts/convert_mpt_hf_to_gguf.py +++ /dev/null @@ -1,168 +0,0 @@ -#!/usr/bin/env python3 -# Convert Hugging Face fine-tuned bloom-like models to ggml format -# -# Usage: -# -# python3 models/convert-h5-to-ggml.py -# -# This script is similar to "convert-pt-to-ggml.py" -# - -from __future__ import annotations - -import json -import struct -import sys -from pathlib import Path - -import gguf -import numpy as np -import torch -from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig, MptConfig -from transformers.models.gpt2 import tokenization_gpt2 - - -if not 3 <= len(sys.argv) < 5: - print("Usage: {} model-name dir-output [ftype]".format(Path(__file__).name)) - print(" model-name: name of the model to convert. Example: 'bigscience/bloomz-560m'") - print(" dir-output: directory where the output file will be written") - print(" ftype == 0 -> float32") - print(" ftype == 1 -> float16") - sys.exit(1) - -dir_model = Path(sys.argv[1]) -dir_out = Path(sys.argv[2]) - -# make sure the output directory exists -dir_out.mkdir(exist_ok=True) - -# possible data types -# ftype == 0 -> float32 -# ftype == 1 -> float16 -# -# map from ftype to string -ftype_str = ["f32", "f16"] - -ftype = 1 -if len(sys.argv) > 3: - ftype = int(sys.argv[3]) - if ftype < 0 or ftype > 1: - print("Invalid ftype: " + str(ftype)) - sys.exit(1) - -fname_out = dir_out / f"ggml-model-{dir_model.name}-{ftype_str[ftype]}.gguf" - - -ARCH = gguf.MODEL_ARCH.MPT -gguf_writer = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH]) - -print("gguf: get model metadata") - -config = AutoConfig.from_pretrained(dir_model, trust_remote_code=True) - -block_count = config.n_layers -gguf_writer.add_name("MPT") -gguf_writer.add_context_length(config.max_seq_len) -gguf_writer.add_embedding_length(config.d_model) -gguf_writer.add_block_count(block_count) -gguf_writer.add_feed_forward_length(4 * config.d_model) -gguf_writer.add_head_count(config.n_heads) -if kv_n_heads := config.attn_config.get('kv_n_heads'): - gguf_writer.add_head_count_kv(kv_n_heads) -gguf_writer.add_max_alibi_bias(config.attn_config['alibi_bias_max']) -gguf_writer.add_layer_norm_eps(MptConfig().layer_norm_epsilon) # use default from upstream transformers -gguf_writer.add_file_type(ftype) - -clip_qkv = config.attn_config['clip_qkv'] -if clip_qkv is not None: - gguf_writer.add_clamp_kqv(clip_qkv) - -print("gguf: get gpt2 tokenizer vocab") - -tokenizer = AutoTokenizer.from_pretrained(dir_model) - -special_ids = tokenizer.all_special_ids - -reverse_vocab = {id: encoded_tok for encoded_tok, id in tokenizer.vocab.items()} -added_tokens = tokenizer.get_added_vocab().values() -byte_encoder = tokenization_gpt2.bytes_to_unicode() -byte_decoder = {v: k for k, v in byte_encoder.items()} - -tokens: list[bytearray] = [] -toktypes: list[gguf.TokenType] = [] - -# The number of tokens in tokenizer.json can differ from the expected vocab size. -# This causes downstream issues with mismatched tensor sizes when running the inference -for i in range(config.vocab_size): - if i not in reverse_vocab: - print(f"Key {i} not in tokenizer vocabulary. Padding with an arbitrary token.") - pad_token = f"[PAD{i}]".encode("utf8") - text = bytearray(pad_token) - elif i in added_tokens: - # these tokens are not encoded, for some reason - text = bytearray(reverse_vocab[i].encode('utf-8')) - else: - text = bytearray([byte_decoder[c] for c in reverse_vocab[i]]) - - tokens.append(text) - - # TODO(cebtenzzre): is there a better way to do this? - toktypes.append(gguf.TokenType.CONTROL if i in special_ids else gguf.TokenType.NORMAL) - -gguf_writer.add_tokenizer_model("gpt2") -gguf_writer.add_token_list(tokens) -gguf_writer.add_token_types(toktypes) - -special_vocab = gguf.SpecialVocab(dir_model, load_merges=True) -special_vocab.add_to_gguf(gguf_writer) - -print("gguf: get tensor metadata") - -print("Loading model:", dir_model) -model = AutoModelForCausalLM.from_pretrained( - dir_model, config=config, torch_dtype=torch.float16 if ftype == 1 else torch.float32, - low_cpu_mem_usage=True, trust_remote_code=True, -) -print("Model loaded:", dir_model) - -tensor_map = gguf.get_tensor_name_map(ARCH, block_count) - -list_vars = model.state_dict() -for name in list_vars.keys(): - data = list_vars[name].squeeze().numpy() - print("Processing variable:", name, "with shape:", data.shape) - - n_dims = len(data.shape) - - # ftype == 0 -> float32, ftype == 1 -> float16 - ftype_cur = 0 - # Keep token embeddings in fp32 - if ftype == 1 and name[-7:] == ".weight" and n_dims == 2 and ".wte" not in name: - print(" Converting to float16") - data = data.astype(np.float16) - ftype_cur = 1 - elif ftype == 1 or data.dtype != np.float32: - print(" Converting to float32") - data = data.astype(np.float32) - ftype_cur = 0 - - # map tensor names - new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias")) - if new_name is None: - print("Can not map tensor '" + name + "'") - sys.exit() - - gguf_writer.add_tensor(new_name, data) - - -print("gguf: write header") -gguf_writer.write_header_to_file() -print("gguf: write metadata") -gguf_writer.write_kv_data_to_file() -print("gguf: write tensors") -gguf_writer.write_tensors_to_file() - -gguf_writer.close() - -print(f"gguf: model successfully exported to '{fname_out}'") -print() diff --git a/gpt4all-backend/scripts/convert_replit_v1_hf_to_gguf.py b/gpt4all-backend/scripts/convert_replit_v1_hf_to_gguf.py deleted file mode 100755 index 43cb4871..00000000 --- a/gpt4all-backend/scripts/convert_replit_v1_hf_to_gguf.py +++ /dev/null @@ -1,145 +0,0 @@ -#!/usr/bin/env python3 -from __future__ import annotations - -import json -import struct -import sys -from pathlib import Path - -import gguf -import numpy as np -from sentencepiece import SentencePieceProcessor -from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer - - -if not 2 <= len(sys.argv) < 4: - print("Usage: {} dir-model [ftype]\n".format(Path(__file__).name)) - print(" ftype == 0 -> float32") - print(" ftype == 1 -> float16") - sys.exit(1) - -# output in the same directory as the model -dir_model = Path(sys.argv[1]) - -# possible data types -# ftype == 0 -> float32 -# ftype == 1 -> float16 -# -# map from ftype to string -ftype_str = ["f32", "f16"] - -ftype = 1 -if len(sys.argv) > 2: - ftype = int(sys.argv[2]) - if ftype < 0 or ftype > 1: - print("Invalid ftype: " + str(ftype)) - sys.exit(1) - -fname_out = dir_model / ("ggml-replit-code-v1-3b-" + ftype_str[ftype] + ".gguf") - - -ARCH = gguf.MODEL_ARCH.MPT -gguf_writer = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH]) - -print("gguf: get model metadata") - -config = AutoConfig.from_pretrained(dir_model) - -block_count = config.n_layers -gguf_writer.add_name("Replit") -gguf_writer.add_context_length(config.max_seq_len) -gguf_writer.add_embedding_length(config.d_model) -gguf_writer.add_block_count(block_count) -gguf_writer.add_feed_forward_length(4 * config.d_model) -gguf_writer.add_head_count(config.n_heads) -gguf_writer.add_max_alibi_bias(config.attn_config.alibi_bias_max) -gguf_writer.add_layer_norm_eps(config.layer_norm_epsilon) -gguf_writer.add_file_type(ftype) - -clip_qkv = config.attn_config.clip_qkv -if clip_qkv is not None: - gguf_writer.add_clamp_kqv(clip_qkv) - -print("gguf: get sentencepiece tokenizer vocab") - -tokenizer = SentencePieceProcessor(str(dir_model / "spiece.model")) -#print(tokenizer.encode('I believe the meaning of life is')) - -tokens: list[bytearray] = [] -scores: list[float] = [] -toktypes: list[int] = [] - -for i in range(tokenizer.vocab_size()): - tokens.append(tokenizer.id_to_piece(i).encode('utf-8')) - scores.append(tokenizer.get_score(i)) - - toktype = gguf.TokenType.NORMAL - if tokenizer.is_unknown(i): - toktype = gguf.TokenType.UNKNOWN - elif tokenizer.is_control(i): - toktype = gguf.TokenType.CONTROL - elif tokenizer.is_unused(i): - toktype = gguf.TokenType.UNUSED - elif tokenizer.is_byte(i): - toktype = gguf.TokenType.BYTE - - toktypes.append(toktype) - -gguf_writer.add_tokenizer_model("llama") # sentencepiece -gguf_writer.add_token_list(tokens) -gguf_writer.add_token_scores(scores) -gguf_writer.add_token_types(toktypes) - -special_vocab = gguf.SpecialVocab(dir_model, load_merges=True) -special_vocab.add_to_gguf(gguf_writer) - -print("gguf: get tensor metadata") - -model = AutoModelForCausalLM.from_pretrained(dir_model, config=config, low_cpu_mem_usage=True) -#print(model) - -tensor_map = gguf.get_tensor_name_map(ARCH, block_count) - -list_vars = model.state_dict() -for name in list_vars.keys(): - print(name, list_vars[name].shape, list_vars[name].dtype) - -print(config) - -for name in list_vars.keys(): - data = list_vars[name].squeeze().numpy() - print("Processing variable:", name, "with shape:", data.shape) - - n_dims = len(data.shape) - - # ftype == 0 -> float32, ftype == 1 -> float16 - ftype_cur = 0 - if ftype == 1 and name[-7:] == ".weight" and n_dims == 2: - print(" Converting to float16") - data = data.astype(np.float16) - ftype_cur = 1 - elif ftype == 1 or data.dtype != np.float32: - print(" Converting to float32") - data = data.astype(np.float32) - ftype_cur = 0 - - # map tensor names - new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias")) - if new_name is None: - print("Can not map tensor '" + name + "'") - sys.exit() - - gguf_writer.add_tensor(new_name, data) - - -print("gguf: write header") -gguf_writer.write_header_to_file() -print("gguf: write metadata") -gguf_writer.write_kv_data_to_file() -print("gguf: write tensors") -gguf_writer.write_tensors_to_file() - -gguf_writer.close() - -print(f"gguf: model successfully exported to '{fname_out}'") -print()