llamamodel: add 12 new architectures for CPU inference (#1914)

Baichuan, BLOOM, CodeShell, GPT-2, Orion, Persimmon, Phi and Phi-2,
Plamo, Qwen, Qwen2, Refact, StableLM

Signed-off-by: Jared Van Bortel <jared@nomic.ai>
pull/1927/head
Jared Van Bortel 8 months ago committed by GitHub
parent 4461af35c7
commit 92c025a7f6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -508,7 +508,14 @@ DLL_EXPORT bool magic_match(const char *fname) {
auto * ctx = load_gguf(fname, arch);
bool valid = true;
if (!(arch == "llama" || arch == "starcoder" || arch == "falcon" || arch == "mpt")) {
static const std::vector<const char *> known_arches {
"baichuan", "bloom", "codeshell", "falcon", "gpt2", "llama", "mpt", "orion", "persimmon", "phi2", "plamo",
"qwen", "qwen2", "refact", "stablelm", "starcoder"
};
if (std::find(known_arches.begin(), known_arches.end(), arch) == known_arches.end()) {
// not supported by this version of llama.cpp
if (!(arch == "gptj" || arch == "bert")) { // we support these via other modules
std::cerr << __func__ << ": unsupported model architecture: " << arch << "\n";
}

Loading…
Cancel
Save