Try and fix build on mac.

pull/913/head
Adam Treat 1 year ago
parent 6be64e6b85
commit 1b755b6cba

@ -76,7 +76,7 @@ foreach(BUILD_VARIANT IN LISTS BUILD_VARIANTS)
# Add each individual implementations
add_library(llamamodel-mainline-${BUILD_VARIANT} SHARED
llamamodel.cpp)
llamamodel.cpp llmodel_shared.cpp)
target_compile_definitions(llamamodel-mainline-${BUILD_VARIANT} PRIVATE
LLAMA_VERSIONS=>=3 LLAMA_DATE=999999)
prepare_target(llamamodel-mainline llama-mainline)
@ -94,11 +94,11 @@ foreach(BUILD_VARIANT IN LISTS BUILD_VARIANTS)
prepare_target(llamamodel-230511 llama-230511)
add_library(gptj-${BUILD_VARIANT} SHARED
gptj.cpp utils.h utils.cpp)
gptj.cpp utils.h utils.cpp llmodel_shared.cpp)
prepare_target(gptj ggml-230511)
add_library(mpt-${BUILD_VARIANT} SHARED
mpt.cpp utils.h utils.cpp)
mpt.cpp utils.h utils.cpp llmodel_shared.cpp)
prepare_target(mpt ggml-230511)
endforeach()

@ -96,28 +96,6 @@ const LLModel::Implementation* LLModel::implementation(std::ifstream& f, const s
return nullptr;
}
void LLModel::recalculateContext(PromptContext &promptCtx, std::function<bool(bool)> recalculate) {
size_t i = 0;
promptCtx.n_past = 0;
while (i < promptCtx.tokens.size()) {
size_t batch_end = std::min(i + promptCtx.n_batch, promptCtx.tokens.size());
std::vector<int32_t> batch(promptCtx.tokens.begin() + i, promptCtx.tokens.begin() + batch_end);
assert(promptCtx.n_past + int32_t(batch.size()) <= promptCtx.n_ctx);
if (!evalTokens(promptCtx, batch)) {
std::cerr << "LLModel ERROR: Failed to process prompt\n";
goto stop_generating;
}
promptCtx.n_past += batch.size();
if (!recalculate(true))
goto stop_generating;
i = batch_end;
}
assert(promptCtx.n_past == int32_t(promptCtx.tokens.size()));
stop_generating:
recalculate(false);
}
LLModel *LLModel::construct(const std::string &modelPath, std::string buildVariant) {
//TODO: Auto-detect CUDA/OpenCL
if (buildVariant == "auto") {

@ -0,0 +1,26 @@
#include "llmodel.h"
#include <cassert>
#include <iostream>
void LLModel::recalculateContext(PromptContext &promptCtx, std::function<bool(bool)> recalculate) {
size_t i = 0;
promptCtx.n_past = 0;
while (i < promptCtx.tokens.size()) {
size_t batch_end = std::min(i + promptCtx.n_batch, promptCtx.tokens.size());
std::vector<int32_t> batch(promptCtx.tokens.begin() + i, promptCtx.tokens.begin() + batch_end);
assert(promptCtx.n_past + int32_t(batch.size()) <= promptCtx.n_ctx);
if (!evalTokens(promptCtx, batch)) {
std::cerr << "LLModel ERROR: Failed to process prompt\n";
goto stop_generating;
}
promptCtx.n_past += batch.size();
if (!recalculate(true))
goto stop_generating;
i = batch_end;
}
assert(promptCtx.n_past == int32_t(promptCtx.tokens.size()));
stop_generating:
recalculate(false);
}
Loading…
Cancel
Save