llamamodel: update model whitelist

- Remove dbrx (never tested), mamba (not supported by CUDA), persimmon
  (CUDA generates garbage), and plamo (fails an assertion)
- Add internlm2 and starcoder2

Signed-off-by: Jared Van Bortel <jared@nomic.ai>
pull/2310/head
Jared Van Bortel 1 month ago
parent 41ce9307a3
commit 14588bc6aa

@ -36,14 +36,44 @@ static constexpr int GGUF_VER_MAX = 3;
static const char * const modelType_ = "LLaMA";
// note: same order as LLM_ARCH_NAMES in llama.cpp
static const std::vector<const char *> KNOWN_ARCHES {
"baichuan", "bert", "bloom", "codeshell", "command-r", "dbrx", "falcon", "gemma", "gpt2", "llama", "mamba", "mpt",
"nomic-bert", "olmo", "orion", "persimmon", "phi2", "phi3", "plamo", "qwen2moe", "qwen2", "qwen", "refact",
"stablelm", "starcoder", "xverse",
"llama",
"falcon",
// "grok", -- 314B parameters
"gpt2",
// "gptj", -- no inference code
// "gptneox", -- no inference code
"mpt",
"baichuan",
"starcoder",
// "persimmon", -- CUDA generates garbage
"refact",
"bert",
"nomic-bert",
"bloom",
"stablelm",
"qwen",
"qwen2",
"qwen2moe",
"phi2",
"phi3",
// "plamo", -- https://github.com/ggerganov/llama.cpp/issues/5669
"codeshell",
"orion",
"internlm2",
// "minicpm", -- CUDA generates garbage
"gemma",
"starcoder2",
// "mamba", -- CUDA missing SSM_CONV
"xverse",
"command-r",
// "dbrx", -- 16x12B parameters
"olmo",
};
static const std::vector<const char *> EMBEDDING_ARCHES {
"bert", "nomic-bert"
"bert", "nomic-bert",
};
static bool is_embedding_arch(const std::string &arch) {

Loading…
Cancel
Save