diff --git a/gpt4all-backend/scripts/convert_bert_hf_to_gguf.py b/gpt4all-backend/scripts/convert_bert_hf_to_gguf.py index 2b637fe3..f3fa8a2d 100644 --- a/gpt4all-backend/scripts/convert_bert_hf_to_gguf.py +++ b/gpt4all-backend/scripts/convert_bert_hf_to_gguf.py @@ -7,7 +7,7 @@ from pathlib import Path import gguf import numpy as np -from transformers import AutoModel, AutoTokenizer +from transformers import AutoConfig, AutoModel, AutoTokenizer if not 2 <= len(sys.argv) < 4: @@ -44,17 +44,15 @@ gguf_writer = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH]) print("gguf: get model metadata") -model = AutoModel.from_pretrained(dir_model, low_cpu_mem_usage=True) -hparams = model.config -print(model) +config = AutoConfig(dir_model) -block_count = hparams.num_hidden_layers +block_count = config.num_hidden_layers gguf_writer.add_name("BERT") -gguf_writer.add_context_length(hparams.max_position_embeddings) -gguf_writer.add_embedding_length(hparams.hidden_size) -gguf_writer.add_feed_forward_length(hparams.intermediate_size) +gguf_writer.add_context_length(config.max_position_embeddings) +gguf_writer.add_embedding_length(config.hidden_size) +gguf_writer.add_feed_forward_length(config.intermediate_size) gguf_writer.add_block_count(block_count) -gguf_writer.add_head_count(hparams.num_attention_heads) +gguf_writer.add_head_count(config.num_attention_heads) gguf_writer.add_file_type(ftype) print("gguf: get tokenizer metadata") @@ -76,7 +74,7 @@ reverse_vocab = {id: encoded_tok for encoded_tok, id in tokenizer.vocab.items()} # The number of tokens in tokenizer.json can differ from the expected vocab size. # This causes downstream issues with mismatched tensor sizes when running the inference -for i in range(hparams.vocab_size): +for i in range(config.vocab_size): try: text = reverse_vocab[i] except KeyError: @@ -94,6 +92,9 @@ special_vocab.add_to_gguf(gguf_writer) print("gguf: get tensor metadata") +model = AutoModel.from_pretrained(dir_model, config=config, low_cpu_mem_usage=True) +print(model) + tensor_map = gguf.get_tensor_name_map(ARCH, block_count) list_vars = model.state_dict() diff --git a/gpt4all-backend/scripts/convert_mpt_hf_to_gguf.py b/gpt4all-backend/scripts/convert_mpt_hf_to_gguf.py index df917dd6..591a2921 100644 --- a/gpt4all-backend/scripts/convert_mpt_hf_to_gguf.py +++ b/gpt4all-backend/scripts/convert_mpt_hf_to_gguf.py @@ -80,12 +80,6 @@ gguf_writer = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH]) print("gguf: get model metadata") config = AutoConfig.from_pretrained(model_name) -print("Loading model:", model_name) -model = AutoModelForCausalLM.from_pretrained( - model_name, config=config, torch_dtype=torch.float16 if ftype == 1 else torch.float32, low_cpu_mem_usage=True, -) -config = model.config -print("Model loaded:", model_name) block_count = config.n_layers gguf_writer.add_name("MPT") @@ -129,6 +123,12 @@ gguf_writer.add_token_types(toktypes) print("gguf: get tensor metadata") +print("Loading model:", model_name) +model = AutoModelForCausalLM.from_pretrained( + model_name, config=config, torch_dtype=torch.float16 if ftype == 1 else torch.float32, low_cpu_mem_usage=True, +) +print("Model loaded:", model_name) + tensor_map = gguf.get_tensor_name_map(ARCH, block_count) list_vars = model.state_dict() diff --git a/gpt4all-backend/scripts/convert_replit_hf_to_gguf.py b/gpt4all-backend/scripts/convert_replit_hf_to_gguf.py index 2a5f373b..e436e8ac 100644 --- a/gpt4all-backend/scripts/convert_replit_hf_to_gguf.py +++ b/gpt4all-backend/scripts/convert_replit_hf_to_gguf.py @@ -8,7 +8,7 @@ from pathlib import Path import gguf import numpy as np from sentencepiece import SentencePieceProcessor -from transformers import AutoModelForCausalLM, AutoTokenizer +from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer if not 2 <= len(sys.argv) < 4: @@ -42,9 +42,7 @@ gguf_writer = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH]) print("gguf: get model metadata") -model = AutoModelForCausalLM.from_pretrained(dir_model, low_cpu_mem_usage=True) -config = model.config -#print(model) +config = AutoConfig(dir_model) block_count = config.n_layers gguf_writer.add_name("Replit") @@ -95,6 +93,9 @@ special_vocab.add_to_gguf(gguf_writer) print("gguf: get tensor metadata") +model = AutoModelForCausalLM.from_pretrained(dir_model, config=config, low_cpu_mem_usage=True) +#print(model) + tensor_map = gguf.get_tensor_name_map(ARCH, block_count) list_vars = model.state_dict()