gpt4all/gpt4all-backend/llmodel_c.cpp

#include "llmodel_c.h"
#include "llmodel.h"

#include <cstring>
#include <cerrno>
#include <utility>

struct LLModelWrapper {
    LLModel *llModel = nullptr;
    LLModel::PromptContext promptContext;
    ~LLModelWrapper() { delete llModel; }
};

thread_local static std::string last_error_message;

llmodel_model llmodel_model_create(const char *model_path) {
    const char *error;
    auto fres = llmodel_model_create2(model_path, "auto", &error);
    if (!fres) {
        fprintf(stderr, "Unable to instantiate model: %s\n", error);
    }
    return fres;
}

llmodel_model llmodel_model_create2(const char *model_path, const char *build_variant, const char **error) {
    auto wrapper = new LLModelWrapper;

    try {
        wrapper->llModel = LLModel::Implementation::construct(model_path, build_variant);
        if (!wrapper->llModel) {
            last_error_message = "Model format not supported (no matching implementation found)";
        }
    } catch (const std::exception& e) {
        last_error_message = e.what();
    }

    if (!wrapper->llModel) {
        delete std::exchange(wrapper, nullptr);
        if (error) {
            *error = last_error_message.c_str();
        }
    }
    return reinterpret_cast<llmodel_model*>(wrapper);
}

void llmodel_model_destroy(llmodel_model model) {
    delete reinterpret_cast<LLModelWrapper*>(model);
}

size_t llmodel_required_mem(llmodel_model model, const char *model_path, int n_ctx)
{
    LLModelWrapper *wrapper = reinterpret_cast<LLModelWrapper*>(model);
    return wrapper->llModel->requiredMem(model_path, n_ctx);
}

bool llmodel_loadModel(llmodel_model model, const char *model_path, int n_ctx)
{
    LLModelWrapper *wrapper = reinterpret_cast<LLModelWrapper*>(model);
    return wrapper->llModel->loadModel(model_path, n_ctx);
}

bool llmodel_isModelLoaded(llmodel_model model)
{
    LLModelWrapper *wrapper = reinterpret_cast<LLModelWrapper*>(model);
    return wrapper->llModel->isModelLoaded();
}

uint64_t llmodel_get_state_size(llmodel_model model)
{
    LLModelWrapper *wrapper = reinterpret_cast<LLModelWrapper*>(model);
    return wrapper->llModel->stateSize();
}

uint64_t llmodel_save_state_data(llmodel_model model, uint8_t *dest)
{
    LLModelWrapper *wrapper = reinterpret_cast<LLModelWrapper*>(model);
    return wrapper->llModel->saveState(dest);
}

uint64_t llmodel_restore_state_data(llmodel_model model, const uint8_t *src)
{
    LLModelWrapper *wrapper = reinterpret_cast<LLModelWrapper*>(model);
    return wrapper->llModel->restoreState(src);
}

// Wrapper functions for the C callbacks
bool prompt_wrapper(int32_t token_id, void *user_data) {
    llmodel_prompt_callback callback = reinterpret_cast<llmodel_prompt_callback>(user_data);
    return callback(token_id);
}

bool response_wrapper(int32_t token_id, const std::string &response, void *user_data) {
    llmodel_response_callback callback = reinterpret_cast<llmodel_response_callback>(user_data);
    return callback(token_id, response.c_str());
}

bool recalculate_wrapper(bool is_recalculating, void *user_data) {
    llmodel_recalculate_callback callback = reinterpret_cast<llmodel_recalculate_callback>(user_data);
    return callback(is_recalculating);
}

void llmodel_prompt(llmodel_model model, const char *prompt,
                    llmodel_prompt_callback prompt_callback,
                    llmodel_response_callback response_callback,
                    llmodel_recalculate_callback recalculate_callback,
                    llmodel_prompt_context *ctx)
{
    LLModelWrapper *wrapper = reinterpret_cast<LLModelWrapper*>(model);

    // Create std::function wrappers that call the C function pointers
    std::function<bool(int32_t)> prompt_func =
        std::bind(&prompt_wrapper, std::placeholders::_1, reinterpret_cast<void*>(prompt_callback));
    std::function<bool(int32_t, const std::string&)> response_func =
        std::bind(&response_wrapper, std::placeholders::_1, std::placeholders::_2, reinterpret_cast<void*>(response_callback));
    std::function<bool(bool)> recalc_func =
        std::bind(&recalculate_wrapper, std::placeholders::_1, reinterpret_cast<void*>(recalculate_callback));

    if (size_t(ctx->n_past) < wrapper->promptContext.tokens.size())
        wrapper->promptContext.tokens.resize(ctx->n_past);

    // Copy the C prompt context
    wrapper->promptContext.n_past = ctx->n_past;
    wrapper->promptContext.n_ctx = ctx->n_ctx;
    wrapper->promptContext.n_predict = ctx->n_predict;
    wrapper->promptContext.top_k = ctx->top_k;
    wrapper->promptContext.top_p = ctx->top_p;
    wrapper->promptContext.temp = ctx->temp;
    wrapper->promptContext.n_batch = ctx->n_batch;
    wrapper->promptContext.repeat_penalty = ctx->repeat_penalty;
    wrapper->promptContext.repeat_last_n = ctx->repeat_last_n;
    wrapper->promptContext.contextErase = ctx->context_erase;

    // Call the C++ prompt method
    wrapper->llModel->prompt(prompt, prompt_func, response_func, recalc_func, wrapper->promptContext);

    // Update the C context by giving access to the wrappers raw pointers to std::vector data
    // which involves no copies
    ctx->logits = wrapper->promptContext.logits.data();
    ctx->logits_size = wrapper->promptContext.logits.size();
    ctx->tokens = wrapper->promptContext.tokens.data();
    ctx->tokens_size = wrapper->promptContext.tokens.size();

    // Update the rest of the C prompt context
    ctx->n_past = wrapper->promptContext.n_past;
    ctx->n_ctx = wrapper->promptContext.n_ctx;
    ctx->n_predict = wrapper->promptContext.n_predict;
    ctx->top_k = wrapper->promptContext.top_k;
    ctx->top_p = wrapper->promptContext.top_p;
    ctx->temp = wrapper->promptContext.temp;
    ctx->n_batch = wrapper->promptContext.n_batch;
    ctx->repeat_penalty = wrapper->promptContext.repeat_penalty;
    ctx->repeat_last_n = wrapper->promptContext.repeat_last_n;
    ctx->context_erase = wrapper->promptContext.contextErase;
}

float *llmodel_embedding(llmodel_model model, const char *text, size_t *embedding_size)
{
    if (model == nullptr || text == nullptr || !strlen(text)) {
        *embedding_size = 0;
        return nullptr;
    }
    LLModelWrapper *wrapper = reinterpret_cast<LLModelWrapper*>(model);
    std::vector<float> embeddingVector = wrapper->llModel->embedding(text);
    float *embedding = (float *)malloc(embeddingVector.size() * sizeof(float));
    if (embedding == nullptr) {
        *embedding_size = 0;
        return nullptr;
    }
    std::copy(embeddingVector.begin(), embeddingVector.end(), embedding);
    *embedding_size = embeddingVector.size();
    return embedding;
}

void llmodel_free_embedding(float *ptr)
{
    free(ptr);
}

void llmodel_setThreadCount(llmodel_model model, int32_t n_threads)
{
    LLModelWrapper *wrapper = reinterpret_cast<LLModelWrapper*>(model);
    wrapper->llModel->setThreadCount(n_threads);
}

int32_t llmodel_threadCount(llmodel_model model)
{
    LLModelWrapper *wrapper = reinterpret_cast<LLModelWrapper*>(model);
    return wrapper->llModel->threadCount();
}

void llmodel_set_implementation_search_path(const char *path)
{
    LLModel::Implementation::setImplementationsSearchPath(path);
}

const char *llmodel_get_implementation_search_path()
{
    return LLModel::Implementation::implementationsSearchPath().c_str();
}

struct llmodel_gpu_device* llmodel_available_gpu_devices(llmodel_model model, size_t memoryRequired, int* num_devices)
{
    LLModelWrapper *wrapper = reinterpret_cast<LLModelWrapper*>(model);
    std::vector<LLModel::GPUDevice> devices = wrapper->llModel->availableGPUDevices(memoryRequired);

    // Set the num_devices
    *num_devices = devices.size();

    if (*num_devices == 0) return nullptr;  // Return nullptr if no devices are found

    // Allocate memory for the output array
    struct llmodel_gpu_device* output = (struct llmodel_gpu_device*) malloc(*num_devices * sizeof(struct llmodel_gpu_device));

    for (int i = 0; i < *num_devices; i++) {
        output[i].index = devices[i].index;
        output[i].type = devices[i].type;
        output[i].heapSize = devices[i].heapSize;
        output[i].name = strdup(devices[i].name.c_str());  // Convert std::string to char* and allocate memory
        output[i].vendor = strdup(devices[i].vendor.c_str());  // Convert std::string to char* and allocate memory
    }

    return output;
}

bool llmodel_gpu_init_gpu_device_by_string(llmodel_model model, size_t memoryRequired, const char *device)
{
    LLModelWrapper *wrapper = reinterpret_cast<LLModelWrapper*>(model);
    return wrapper->llModel->initializeGPUDevice(memoryRequired, std::string(device));
}

bool llmodel_gpu_init_gpu_device_by_struct(llmodel_model model, const llmodel_gpu_device *device)
{
    LLModel::GPUDevice d;
    d.index = device->index;
    d.type = device->type;
    d.heapSize = device->heapSize;
    d.name = device->name;
    d.vendor = device->vendor;
    LLModelWrapper *wrapper = reinterpret_cast<LLModelWrapper*>(model);
    return wrapper->llModel->initializeGPUDevice(d);
}

bool llmodel_gpu_init_gpu_device_by_int(llmodel_model model, int device)
{
    LLModelWrapper *wrapper = reinterpret_cast<LLModelWrapper*>(model);
    return wrapper->llModel->initializeGPUDevice(device);
}

bool llmodel_has_gpu_device(llmodel_model model)
{
    LLModelWrapper *wrapper = reinterpret_cast<LLModelWrapper*>(model);
    return wrapper->llModel->hasGPUDevice();
}
Add this and unbreak the build. 2023-04-27 02:44:52 +00:00			`#include "llmodel_c.h"`
Dlopen backend 5 (#779) Major change to the backend that allows for pluggable versions of llama.cpp/ggml. This was squashed merged from dlopen_backend_5 where the history is preserved. 2023-05-31 21:04:01 +00:00			`#include "llmodel.h"`

			`#include <cstring>`
			`#include <cerrno>`
			`#include <utility>`
Add this and unbreak the build. 2023-04-27 02:44:52 +00:00
Provide an initial impl. of the C interface. NOTE: has not been tested. 2023-04-27 13:43:24 +00:00			`struct LLModelWrapper {`
			`LLModel *llModel = nullptr;`
			`LLModel::PromptContext promptContext;`
llmodel: add model wrapper destructor, fix mem leak in golang bindings (#862) Signed-off-by: Juuso Alasuutari <juuso.alasuutari@gmail.com> 2023-06-12 16:41:22 +00:00			`~LLModelWrapper() { delete llModel; }`
Provide an initial impl. of the C interface. NOTE: has not been tested. 2023-04-27 13:43:24 +00:00			`};`

Dlopen backend 5 (#779) Major change to the backend that allows for pluggable versions of llama.cpp/ggml. This was squashed merged from dlopen_backend_5 where the history is preserved. 2023-05-31 21:04:01 +00:00			`thread_local static std::string last_error_message;`
feat: load model 2023-05-07 10:03:04 +00:00
gpt4all-backend: Add llmodel create and destroy functions (#554) * Add llmodel create and destroy functions * Fix capitalization * Fix capitalization * Fix capitalization * Update CMakeLists.txt --------- Co-authored-by: kuvaus <kuvaus@users.noreply.github.com> 2023-05-16 15:36:46 +00:00			`llmodel_model llmodel_model_create(const char *model_path) {`
llmodel_c: improve quality of error messages (#1625) 2023-11-07 16:20:14 +00:00			`const char *error;`
			`auto fres = llmodel_model_create2(model_path, "auto", &error);`
Dlopen backend 5 (#779) Major change to the backend that allows for pluggable versions of llama.cpp/ggml. This was squashed merged from dlopen_backend_5 where the history is preserved. 2023-05-31 21:04:01 +00:00			`if (!fres) {`
llmodel_c: improve quality of error messages (#1625) 2023-11-07 16:20:14 +00:00			`fprintf(stderr, "Unable to instantiate model: %s\n", error);`
Dlopen backend 5 (#779) Major change to the backend that allows for pluggable versions of llama.cpp/ggml. This was squashed merged from dlopen_backend_5 where the history is preserved. 2023-05-31 21:04:01 +00:00			`}`
			`return fres;`
			`}`

llmodel_c: improve quality of error messages (#1625) 2023-11-07 16:20:14 +00:00			`llmodel_model llmodel_model_create2(const char model_path, const char build_variant, const char **error) {`
Dlopen backend 5 (#779) Major change to the backend that allows for pluggable versions of llama.cpp/ggml. This was squashed merged from dlopen_backend_5 where the history is preserved. 2023-05-31 21:04:01 +00:00			`auto wrapper = new LLModelWrapper;`

			`try {`
Move it back as internal class. 2023-07-09 15:00:20 +00:00			`wrapper->llModel = LLModel::Implementation::construct(model_path, build_variant);`
llmodel_c: improve quality of error messages (#1625) 2023-11-07 16:20:14 +00:00			`if (!wrapper->llModel) {`
			`last_error_message = "Model format not supported (no matching implementation found)";`
			`}`
Dlopen backend 5 (#779) Major change to the backend that allows for pluggable versions of llama.cpp/ggml. This was squashed merged from dlopen_backend_5 where the history is preserved. 2023-05-31 21:04:01 +00:00			`} catch (const std::exception& e) {`
			`last_error_message = e.what();`
			`}`

			`if (!wrapper->llModel) {`
			`delete std::exchange(wrapper, nullptr);`
llmodel: add model wrapper destructor, fix mem leak in golang bindings (#862) Signed-off-by: Juuso Alasuutari <juuso.alasuutari@gmail.com> 2023-06-12 16:41:22 +00:00			`if (error) {`
llmodel_c: improve quality of error messages (#1625) 2023-11-07 16:20:14 +00:00			`*error = last_error_message.c_str();`
llmodel: add model wrapper destructor, fix mem leak in golang bindings (#862) Signed-off-by: Juuso Alasuutari <juuso.alasuutari@gmail.com> 2023-06-12 16:41:22 +00:00			`}`
Dlopen backend 5 (#779) Major change to the backend that allows for pluggable versions of llama.cpp/ggml. This was squashed merged from dlopen_backend_5 where the history is preserved. 2023-05-31 21:04:01 +00:00			`}`
			`return reinterpret_cast<llmodel_model*>(wrapper);`
gpt4all-backend: Add llmodel create and destroy functions (#554) * Add llmodel create and destroy functions * Fix capitalization * Fix capitalization * Fix capitalization * Update CMakeLists.txt --------- Co-authored-by: kuvaus <kuvaus@users.noreply.github.com> 2023-05-16 15:36:46 +00:00			`}`

			`void llmodel_model_destroy(llmodel_model model) {`
llmodel: add model wrapper destructor, fix mem leak in golang bindings (#862) Signed-off-by: Juuso Alasuutari <juuso.alasuutari@gmail.com> 2023-06-12 16:41:22 +00:00			`delete reinterpret_cast<LLModelWrapper*>(model);`
gpt4all-backend: Add llmodel create and destroy functions (#554) * Add llmodel create and destroy functions * Fix capitalization * Fix capitalization * Fix capitalization * Update CMakeLists.txt --------- Co-authored-by: kuvaus <kuvaus@users.noreply.github.com> 2023-05-16 15:36:46 +00:00			`}`

Implement configurable context length (#1749) 2023-12-16 22:58:15 +00:00			`size_t llmodel_required_mem(llmodel_model model, const char *model_path, int n_ctx)`
add requiredMem method to llmodel impls most of these can just shortcut out of the model loading logic llama is a bit worse to deal with because we submodule it so I have to at least parse the hparams, and then I just use the size on disk as an estimate for the mem size (which seems reasonable since we mmap() the llama files anyway) 2023-06-26 19:17:34 +00:00			`{`
			`LLModelWrapper wrapper = reinterpret_cast<LLModelWrapper>(model);`
Implement configurable context length (#1749) 2023-12-16 22:58:15 +00:00			`return wrapper->llModel->requiredMem(model_path, n_ctx);`
add requiredMem method to llmodel impls most of these can just shortcut out of the model loading logic llama is a bit worse to deal with because we submodule it so I have to at least parse the hparams, and then I just use the size on disk as an estimate for the mem size (which seems reasonable since we mmap() the llama files anyway) 2023-06-26 19:17:34 +00:00			`}`

Implement configurable context length (#1749) 2023-12-16 22:58:15 +00:00			`bool llmodel_loadModel(llmodel_model model, const char *model_path, int n_ctx)`
Provide an initial impl. of the C interface. NOTE: has not been tested. 2023-04-27 13:43:24 +00:00			`{`
			`LLModelWrapper wrapper = reinterpret_cast<LLModelWrapper>(model);`
Implement configurable context length (#1749) 2023-12-16 22:58:15 +00:00			`return wrapper->llModel->loadModel(model_path, n_ctx);`
Provide an initial impl. of the C interface. NOTE: has not been tested. 2023-04-27 13:43:24 +00:00			`}`

			`bool llmodel_isModelLoaded(llmodel_model model)`
			`{`
Dlopen backend 5 (#779) Major change to the backend that allows for pluggable versions of llama.cpp/ggml. This was squashed merged from dlopen_backend_5 where the history is preserved. 2023-05-31 21:04:01 +00:00			`LLModelWrapper wrapper = reinterpret_cast<LLModelWrapper>(model);`
			`return wrapper->llModel->isModelLoaded();`
Provide an initial impl. of the C interface. NOTE: has not been tested. 2023-04-27 13:43:24 +00:00			`}`

First attempt at providing a persistent chat list experience. Limitations: 1) Context is not restored for gpt-j models 2) When you switch between different model types in an existing chat the context and all the conversation is lost 3) The settings are not chat or conversation specific 4) The sizes of the chat persisted files are very large due to how much data the llama.cpp backend tries to persist. Need to investigate how we can shrink this. 2023-05-04 19:31:41 +00:00			`uint64_t llmodel_get_state_size(llmodel_model model)`
			`{`
Dlopen backend 5 (#779) Major change to the backend that allows for pluggable versions of llama.cpp/ggml. This was squashed merged from dlopen_backend_5 where the history is preserved. 2023-05-31 21:04:01 +00:00			`LLModelWrapper wrapper = reinterpret_cast<LLModelWrapper>(model);`
			`return wrapper->llModel->stateSize();`
First attempt at providing a persistent chat list experience. Limitations: 1) Context is not restored for gpt-j models 2) When you switch between different model types in an existing chat the context and all the conversation is lost 3) The settings are not chat or conversation specific 4) The sizes of the chat persisted files are very large due to how much data the llama.cpp backend tries to persist. Need to investigate how we can shrink this. 2023-05-04 19:31:41 +00:00			`}`

			`uint64_t llmodel_save_state_data(llmodel_model model, uint8_t *dest)`
			`{`
Dlopen backend 5 (#779) Major change to the backend that allows for pluggable versions of llama.cpp/ggml. This was squashed merged from dlopen_backend_5 where the history is preserved. 2023-05-31 21:04:01 +00:00			`LLModelWrapper wrapper = reinterpret_cast<LLModelWrapper>(model);`
			`return wrapper->llModel->saveState(dest);`
First attempt at providing a persistent chat list experience. Limitations: 1) Context is not restored for gpt-j models 2) When you switch between different model types in an existing chat the context and all the conversation is lost 3) The settings are not chat or conversation specific 4) The sizes of the chat persisted files are very large due to how much data the llama.cpp backend tries to persist. Need to investigate how we can shrink this. 2023-05-04 19:31:41 +00:00			`}`

			`uint64_t llmodel_restore_state_data(llmodel_model model, const uint8_t *src)`
			`{`
			`LLModelWrapper wrapper = reinterpret_cast<LLModelWrapper>(model);`
			`return wrapper->llModel->restoreState(src);`
			`}`

Provide an initial impl. of the C interface. NOTE: has not been tested. 2023-04-27 13:43:24 +00:00			`// Wrapper functions for the C callbacks`
Move the promptCallback to own function. 2023-04-27 15:08:15 +00:00			`bool prompt_wrapper(int32_t token_id, void *user_data) {`
			`llmodel_prompt_callback callback = reinterpret_cast<llmodel_prompt_callback>(user_data);`
			`return callback(token_id);`
			`}`

Provide an initial impl. of the C interface. NOTE: has not been tested. 2023-04-27 13:43:24 +00:00			`bool response_wrapper(int32_t token_id, const std::string &response, void *user_data) {`
			`llmodel_response_callback callback = reinterpret_cast<llmodel_response_callback>(user_data);`
			`return callback(token_id, response.c_str());`
			`}`

			`bool recalculate_wrapper(bool is_recalculating, void *user_data) {`
			`llmodel_recalculate_callback callback = reinterpret_cast<llmodel_recalculate_callback>(user_data);`
			`return callback(is_recalculating);`
			`}`

			`void llmodel_prompt(llmodel_model model, const char *prompt,`
llmodel: fix wrong and/or missing prompt callback type Fix occurrences of the prompt callback being incorrectly specified, or the response callback's prototype being incorrectly used in its place. Signed-off-by: Juuso Alasuutari <juuso.alasuutari@gmail.com> 2023-05-21 19:43:45 +00:00			`llmodel_prompt_callback prompt_callback,`
Move the promptCallback to own function. 2023-04-27 15:08:15 +00:00			`llmodel_response_callback response_callback,`
			`llmodel_recalculate_callback recalculate_callback,`
Provide an initial impl. of the C interface. NOTE: has not been tested. 2023-04-27 13:43:24 +00:00			`llmodel_prompt_context *ctx)`
			`{`
			`LLModelWrapper wrapper = reinterpret_cast<LLModelWrapper>(model);`

			`// Create std::function wrappers that call the C function pointers`
Move the promptCallback to own function. 2023-04-27 15:08:15 +00:00			`std::function<bool(int32_t)> prompt_func =`
			`std::bind(&prompt_wrapper, std::placeholders::_1, reinterpret_cast<void*>(prompt_callback));`
Provide an initial impl. of the C interface. NOTE: has not been tested. 2023-04-27 13:43:24 +00:00			`std::function<bool(int32_t, const std::string&)> response_func =`
Move the promptCallback to own function. 2023-04-27 15:08:15 +00:00			`std::bind(&response_wrapper, std::placeholders::_1, std::placeholders::_2, reinterpret_cast<void*>(response_callback));`
Provide an initial impl. of the C interface. NOTE: has not been tested. 2023-04-27 13:43:24 +00:00			`std::function<bool(bool)> recalc_func =`
Move the promptCallback to own function. 2023-04-27 15:08:15 +00:00			`std::bind(&recalculate_wrapper, std::placeholders::_1, reinterpret_cast<void*>(recalculate_callback));`
Provide an initial impl. of the C interface. NOTE: has not been tested. 2023-04-27 13:43:24 +00:00
Python Bindings: Improved unit tests, documentation and unification of API (#1090) * Makefiles, black, isort * Black and isort * unit tests and generation method * chat context provider * context does not reset * Current state * Fixup * Python bindings with unit tests * GPT4All Python Bindings: chat contexts, tests * New python bindings and backend fixes * Black and Isort * Documentation error * preserved n_predict for backwords compat with langchain --------- Co-authored-by: Adam Treat <treat.adam@gmail.com> 2023-06-30 20:02:02 +00:00			`if (size_t(ctx->n_past) < wrapper->promptContext.tokens.size())`
			`wrapper->promptContext.tokens.resize(ctx->n_past);`

Provide an initial impl. of the C interface. NOTE: has not been tested. 2023-04-27 13:43:24 +00:00			`// Copy the C prompt context`
			`wrapper->promptContext.n_past = ctx->n_past;`
			`wrapper->promptContext.n_ctx = ctx->n_ctx;`
			`wrapper->promptContext.n_predict = ctx->n_predict;`
			`wrapper->promptContext.top_k = ctx->top_k;`
			`wrapper->promptContext.top_p = ctx->top_p;`
			`wrapper->promptContext.temp = ctx->temp;`
			`wrapper->promptContext.n_batch = ctx->n_batch;`
			`wrapper->promptContext.repeat_penalty = ctx->repeat_penalty;`
			`wrapper->promptContext.repeat_last_n = ctx->repeat_last_n;`
			`wrapper->promptContext.contextErase = ctx->context_erase;`

			`// Call the C++ prompt method`
Move the promptCallback to own function. 2023-04-27 15:08:15 +00:00			`wrapper->llModel->prompt(prompt, prompt_func, response_func, recalc_func, wrapper->promptContext);`
Provide an initial impl. of the C interface. NOTE: has not been tested. 2023-04-27 13:43:24 +00:00
			`// Update the C context by giving access to the wrappers raw pointers to std::vector data`
			`// which involves no copies`
			`ctx->logits = wrapper->promptContext.logits.data();`
			`ctx->logits_size = wrapper->promptContext.logits.size();`
			`ctx->tokens = wrapper->promptContext.tokens.data();`
			`ctx->tokens_size = wrapper->promptContext.tokens.size();`

			`// Update the rest of the C prompt context`
			`ctx->n_past = wrapper->promptContext.n_past;`
			`ctx->n_ctx = wrapper->promptContext.n_ctx;`
			`ctx->n_predict = wrapper->promptContext.n_predict;`
			`ctx->top_k = wrapper->promptContext.top_k;`
			`ctx->top_p = wrapper->promptContext.top_p;`
			`ctx->temp = wrapper->promptContext.temp;`
			`ctx->n_batch = wrapper->promptContext.n_batch;`
			`ctx->repeat_penalty = wrapper->promptContext.repeat_penalty;`
			`ctx->repeat_last_n = wrapper->promptContext.repeat_last_n;`
			`ctx->context_erase = wrapper->promptContext.contextErase;`
			`}`

Bert 2023-07-09 15:32:51 +00:00			`float llmodel_embedding(llmodel_model model, const char text, size_t *embedding_size)`
			`{`
Handle edge cases when generating embeddings (#1215) * Handle edge cases when generating embeddings * Improve Python handling & add llmodel_c.h note - In the Python bindings fail fast with a ValueError when text is empty - Advice other bindings authors to do likewise in llmodel_c.h 2023-07-17 20:21:03 +00:00			`if (model == nullptr \|\| text == nullptr \|\| !strlen(text)) {`
			`*embedding_size = 0;`
			`return nullptr;`
			`}`
Bert 2023-07-09 15:32:51 +00:00			`LLModelWrapper wrapper = reinterpret_cast<LLModelWrapper>(model);`
			`std::vector<float> embeddingVector = wrapper->llModel->embedding(text);`
			`float embedding = (float )malloc(embeddingVector.size() * sizeof(float));`
Handle edge cases when generating embeddings (#1215) * Handle edge cases when generating embeddings * Improve Python handling & add llmodel_c.h note - In the Python bindings fail fast with a ValueError when text is empty - Advice other bindings authors to do likewise in llmodel_c.h 2023-07-17 20:21:03 +00:00			`if (embedding == nullptr) {`
Bert 2023-07-09 15:32:51 +00:00			`*embedding_size = 0;`
			`return nullptr;`
			`}`
			`std::copy(embeddingVector.begin(), embeddingVector.end(), embedding);`
			`*embedding_size = embeddingVector.size();`
			`return embedding;`
			`}`

			`void llmodel_free_embedding(float *ptr)`
			`{`
			`free(ptr);`
			`}`

Provide an initial impl. of the C interface. NOTE: has not been tested. 2023-04-27 13:43:24 +00:00			`void llmodel_setThreadCount(llmodel_model model, int32_t n_threads)`
			`{`
			`LLModelWrapper wrapper = reinterpret_cast<LLModelWrapper>(model);`
			`wrapper->llModel->setThreadCount(n_threads);`
			`}`

			`int32_t llmodel_threadCount(llmodel_model model)`
			`{`
Dlopen backend 5 (#779) Major change to the backend that allows for pluggable versions of llama.cpp/ggml. This was squashed merged from dlopen_backend_5 where the history is preserved. 2023-05-31 21:04:01 +00:00			`LLModelWrapper wrapper = reinterpret_cast<LLModelWrapper>(model);`
			`return wrapper->llModel->threadCount();`
Provide an initial impl. of the C interface. NOTE: has not been tested. 2023-04-27 13:43:24 +00:00			`}`
fixed finding model libs 2023-06-02 14:57:21 +00:00
			`void llmodel_set_implementation_search_path(const char *path)`
			`{`
Move it back as internal class. 2023-07-09 15:00:20 +00:00			`LLModel::Implementation::setImplementationsSearchPath(path);`
fixed finding model libs 2023-06-02 14:57:21 +00:00			`}`

			`const char *llmodel_get_implementation_search_path()`
			`{`
Move it back as internal class. 2023-07-09 15:00:20 +00:00			`return LLModel::Implementation::implementationsSearchPath().c_str();`
fixed finding model libs 2023-06-02 14:57:21 +00:00			`}`
Nomic vulkan backend licensed under the Software for Open Models License (SOM), version 1.0. 2023-08-30 13:43:56 +00:00
			`struct llmodel_gpu_device* llmodel_available_gpu_devices(llmodel_model model, size_t memoryRequired, int* num_devices)`
			`{`
			`LLModelWrapper wrapper = reinterpret_cast<LLModelWrapper>(model);`
			`std::vector<LLModel::GPUDevice> devices = wrapper->llModel->availableGPUDevices(memoryRequired);`

			`// Set the num_devices`
			`*num_devices = devices.size();`

			`if (*num_devices == 0) return nullptr; // Return nullptr if no devices are found`

			`// Allocate memory for the output array`
			`struct llmodel_gpu_device* output = (struct llmodel_gpu_device) malloc(num_devices * sizeof(struct llmodel_gpu_device));`

			`for (int i = 0; i < *num_devices; i++) {`
			`output[i].index = devices[i].index;`
			`output[i].type = devices[i].type;`
			`output[i].heapSize = devices[i].heapSize;`
			`output[i].name = strdup(devices[i].name.c_str()); // Convert std::string to char* and allocate memory`
			`output[i].vendor = strdup(devices[i].vendor.c_str()); // Convert std::string to char* and allocate memory`
			`}`

			`return output;`
			`}`

			`bool llmodel_gpu_init_gpu_device_by_string(llmodel_model model, size_t memoryRequired, const char *device)`
			`{`
			`LLModelWrapper wrapper = reinterpret_cast<LLModelWrapper>(model);`
			`return wrapper->llModel->initializeGPUDevice(memoryRequired, std::string(device));`
			`}`

			`bool llmodel_gpu_init_gpu_device_by_struct(llmodel_model model, const llmodel_gpu_device *device)`
			`{`
			`LLModel::GPUDevice d;`
			`d.index = device->index;`
			`d.type = device->type;`
			`d.heapSize = device->heapSize;`
			`d.name = device->name;`
			`d.vendor = device->vendor;`
			`LLModelWrapper wrapper = reinterpret_cast<LLModelWrapper>(model);`
			`return wrapper->llModel->initializeGPUDevice(d);`
			`}`

			`bool llmodel_gpu_init_gpu_device_by_int(llmodel_model model, int device)`
			`{`
			`LLModelWrapper wrapper = reinterpret_cast<LLModelWrapper>(model);`
			`return wrapper->llModel->initializeGPUDevice(device);`
			`}`

			`bool llmodel_has_gpu_device(llmodel_model model)`
			`{`
			`LLModelWrapper wrapper = reinterpret_cast<LLModelWrapper>(model);`
			`return wrapper->llModel->hasGPUDevice();`
			`}`