mirror of
https://github.com/nomic-ai/gpt4all
synced 2024-11-02 09:40:42 +00:00
feat: build works + tokenizer
This commit is contained in:
parent
525b703984
commit
2f6ecbe798
@ -15,11 +15,11 @@
|
|||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <thread>
|
#include <thread>
|
||||||
#include <unordered_set>
|
#include <unordered_set>
|
||||||
|
#include <regex>
|
||||||
|
|
||||||
static const size_t MB = 1024*1024;
|
static const size_t MB = 1024*1024;
|
||||||
|
|
||||||
struct mpt_hparams {
|
struct mpt_hparams {
|
||||||
// FIXME: for mpt
|
|
||||||
int32_t n_vocab = 50432;
|
int32_t n_vocab = 50432;
|
||||||
int32_t n_ctx = 2048;
|
int32_t n_ctx = 2048;
|
||||||
int32_t n_embd = 4096;
|
int32_t n_embd = 4096;
|
||||||
@ -150,6 +150,9 @@ struct mpt_vocab {
|
|||||||
|
|
||||||
std::map<token, id> token_to_id;
|
std::map<token, id> token_to_id;
|
||||||
std::map<id, token> id_to_token;
|
std::map<id, token> id_to_token;
|
||||||
|
std::vector<std::string> special_tokens;
|
||||||
|
|
||||||
|
void add_special_token(const std::string &token);
|
||||||
};
|
};
|
||||||
|
|
||||||
// load the model's weights from a stream
|
// load the model's weights from a stream
|
||||||
@ -316,6 +319,9 @@ bool mpt_model_load(const std::string &fname, std::istream &fin, mpt_model & mod
|
|||||||
layer.norm_1_g = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd);
|
layer.norm_1_g = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd);
|
||||||
layer.norm_1_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd);
|
layer.norm_1_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd);
|
||||||
|
|
||||||
|
layer.norm_2_g = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd);
|
||||||
|
layer.norm_2_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd);
|
||||||
|
|
||||||
layer.c_attn_q_proj_w = ggml_new_tensor_2d(ctx, wtype, n_embd, n_embd);
|
layer.c_attn_q_proj_w = ggml_new_tensor_2d(ctx, wtype, n_embd, n_embd);
|
||||||
layer.c_attn_k_proj_w = ggml_new_tensor_2d(ctx, wtype, n_embd, n_embd);
|
layer.c_attn_k_proj_w = ggml_new_tensor_2d(ctx, wtype, n_embd, n_embd);
|
||||||
layer.c_attn_v_proj_w = ggml_new_tensor_2d(ctx, wtype, n_embd, n_embd);
|
layer.c_attn_v_proj_w = ggml_new_tensor_2d(ctx, wtype, n_embd, n_embd);
|
||||||
@ -338,11 +344,14 @@ bool mpt_model_load(const std::string &fname, std::istream &fin, mpt_model & mod
|
|||||||
|
|
||||||
model.tensors["transformer.block." + std::to_string(i) + ".attn.out_proj.weight"] = layer.c_attn_proj_w;
|
model.tensors["transformer.block." + std::to_string(i) + ".attn.out_proj.weight"] = layer.c_attn_proj_w;
|
||||||
|
|
||||||
model.tensors["transformer.block." + std::to_string(i) + ".mlp.fc_in.weight"] = layer.c_mlp_fc_w;
|
model.tensors["transformer.block." + std::to_string(i) + ".mlp.up_proj.weight"] = layer.up_proj_w;
|
||||||
model.tensors["transformer.block." + std::to_string(i) + ".mlp.fc_in.bias"] = layer.c_mlp_fc_b;
|
model.tensors["transformer.block." + std::to_string(i) + ".mlp.up_proj.bias"] = layer.up_proj_b;
|
||||||
|
|
||||||
model.tensors["transformer.block." + std::to_string(i) + ".mlp.fc_out.weight"] = layer.c_mlp_proj_w;
|
model.tensors["transformer.block." + std::to_string(i) + ".mlp.down_proj.weight"] = layer.down_proj_w;
|
||||||
model.tensors["transformer.block." + std::to_string(i) + ".mlp.fc_out.bias"] = layer.c_mlp_proj_b;
|
model.tensors["transformer.block." + std::to_string(i) + ".mlp.down_proj.bias"] = layer.down_proj_b;
|
||||||
|
|
||||||
|
model.tensors["transformer.block." + std::to_string(i) + ".norm_2.weight"] = layer.norm_2_g;
|
||||||
|
model.tensors["transformer.block." + std::to_string(i) + ".norm_2.bias"] = layer.norm_2_b;
|
||||||
}
|
}
|
||||||
|
|
||||||
// key + value memory
|
// key + value memory
|
||||||
@ -552,7 +561,6 @@ bool mpt_eval(
|
|||||||
ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Kcur, k));
|
ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Kcur, k));
|
||||||
ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Vcur, v));
|
ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Vcur, v));
|
||||||
}
|
}
|
||||||
// we need to replace rope with alibi
|
|
||||||
|
|
||||||
// Q = Qcur.contiguous().view(n_embd/n_head, n_head, N).permute(0, 2, 1, 3)
|
// Q = Qcur.contiguous().view(n_embd/n_head, n_head, N).permute(0, 2, 1, 3)
|
||||||
struct ggml_tensor * Q =
|
struct ggml_tensor * Q =
|
||||||
@ -710,15 +718,77 @@ bool mpt_eval(
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::vector<int> mpt_tokenize(const mpt_vocab & vocab, const std::string & text) {
|
std::vector<int> mpt_tokenize(const mpt_vocab & vocab, const std::string & text) {
|
||||||
// FIXME
|
// taken from stablelm example in ggml
|
||||||
return std::vector<int>();
|
// they both use the gpt-neox tokenizer
|
||||||
|
// not sure if this entirely right?
|
||||||
|
std::vector<std::string> words;
|
||||||
|
|
||||||
|
|
||||||
|
// first split the text into words
|
||||||
|
{
|
||||||
|
std::string str = text;
|
||||||
|
std::string pat = R"('s|'t|'re|'ve|'m|'ll|'d| ?[[:alpha:]]+| ?[[:digit:]]+| ?[^\s[:alpha:][:digit:]]+|\s+(?!\S)|\s+)";
|
||||||
|
|
||||||
|
// Generate the subpattern from the special_tokens vector if it's not empty
|
||||||
|
if (!vocab.special_tokens.empty()) {
|
||||||
|
std::string special_tokens_subpattern;
|
||||||
|
for (const auto &token : vocab.special_tokens) {
|
||||||
|
if (!special_tokens_subpattern.empty()) {
|
||||||
|
special_tokens_subpattern += "|";
|
||||||
|
}
|
||||||
|
special_tokens_subpattern += token;
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::string mpt_token_to_str(const mpt_vocab & vocab, int token) {
|
// Modify the regex pattern with the generated special tokens subpattern
|
||||||
// FIXME
|
pat = special_tokens_subpattern + "|" + pat;
|
||||||
return std::string();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::regex re(pat);
|
||||||
|
std::smatch m;
|
||||||
|
|
||||||
|
while (std::regex_search(str, m, re)) {
|
||||||
|
for (auto x : m) {
|
||||||
|
words.push_back(x);
|
||||||
|
}
|
||||||
|
str = m.suffix();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// find the longest tokens that form the words:
|
||||||
|
std::vector<mpt_vocab::id> tokens;
|
||||||
|
for (const auto & word : words) {
|
||||||
|
if (word.size() == 0) continue;
|
||||||
|
|
||||||
|
int i = 0;
|
||||||
|
int n = word.size();
|
||||||
|
while (i < n) {
|
||||||
|
int j = n;
|
||||||
|
while (j > i) {
|
||||||
|
auto it = vocab.token_to_id.find(word.substr(i, j-i));
|
||||||
|
if (it != vocab.token_to_id.end()) {
|
||||||
|
tokens.push_back(it->second);
|
||||||
|
i = j;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
--j;
|
||||||
|
}
|
||||||
|
if (i == n) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (j == i) {
|
||||||
|
auto sub = word.substr(i, 1);
|
||||||
|
if (vocab.token_to_id.find(sub) != vocab.token_to_id.end()) {
|
||||||
|
tokens.push_back(vocab.token_to_id.at(sub));
|
||||||
|
} else {
|
||||||
|
fprintf(stderr, "%s: unknown token '%s'\n", __func__, sub.data());
|
||||||
|
}
|
||||||
|
++i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return tokens;
|
||||||
|
}
|
||||||
|
|
||||||
#define MPT_MAX_RNG_STATE 64*1024
|
#define MPT_MAX_RNG_STATE 64*1024
|
||||||
|
|
||||||
@ -1127,10 +1197,10 @@ void MPT::prompt(const std::string &prompt,
|
|||||||
// display text
|
// display text
|
||||||
++totalPredictions;
|
++totalPredictions;
|
||||||
|
|
||||||
if (id == 50256 /*end of text*/)
|
if (id == 0 /*end of text*/)
|
||||||
goto stop_generating;
|
goto stop_generating;
|
||||||
|
|
||||||
const std::string str = mpt_token_to_str(d_ptr->vocab, id);
|
const std::string str = d_ptr->vocab.id_to_token[id];
|
||||||
|
|
||||||
// Check if the provided str is part of our reverse prompts
|
// Check if the provided str is part of our reverse prompts
|
||||||
bool foundPartialReversePrompt = false;
|
bool foundPartialReversePrompt = false;
|
||||||
@ -1160,7 +1230,7 @@ void MPT::prompt(const std::string &prompt,
|
|||||||
if (promptCtx.tokens.size() == promptCtx.n_ctx)
|
if (promptCtx.tokens.size() == promptCtx.n_ctx)
|
||||||
promptCtx.tokens.erase(promptCtx.tokens.begin());
|
promptCtx.tokens.erase(promptCtx.tokens.begin());
|
||||||
promptCtx.tokens.push_back(t);
|
promptCtx.tokens.push_back(t);
|
||||||
if (!responseCallback(t, mpt_token_to_str(d_ptr->vocab, t)))
|
if (!responseCallback(t, d_ptr->vocab.id_to_token[t]))
|
||||||
goto stop_generating;
|
goto stop_generating;
|
||||||
}
|
}
|
||||||
cachedTokens.clear();
|
cachedTokens.clear();
|
||||||
|
Loading…
Reference in New Issue
Block a user