gpt4all/gpt4all-backend/llmodel_shared.h

#pragma once
#include <cstdint>
#include <cstddef>
#include <ggml.h>

struct llm_buffer {
    uint8_t * addr = NULL;
    size_t size = 0;

    void resize(size_t size) {
        delete[] addr;
        addr = new uint8_t[size];
        this->size = size;
    }

    ~llm_buffer() {
        delete[] addr;
    }
};

struct llm_kv_cache {
    struct ggml_tensor * k;
    struct ggml_tensor * v;

    struct ggml_context * ctx = NULL;

    llm_buffer buf;

    int n; // number of tokens currently in the cache

    ~llm_kv_cache() {
        if (ctx) {
            ggml_free(ctx);
        }
    }
};
backend: factor out common elements in model code (#1089) * backend: factor out common structs in model code prepping to hack on these by hopefully making there be fewer places to fix the same bug rename * use common buffer wrapper instead of manual malloc * fix replit compile warnings 2023-06-29 00:35:07 +00:00			`#pragma once`
			`#include <cstdint>`
			`#include <cstddef>`
			`#include <ggml.h>`

			`struct llm_buffer {`
			`uint8_t * addr = NULL;`
			`size_t size = 0;`

			`void resize(size_t size) {`
			`delete[] addr;`
			`addr = new uint8_t[size];`
			`this->size = size;`
			`}`

			`~llm_buffer() {`
			`delete[] addr;`
			`}`
			`};`

			`struct llm_kv_cache {`
			`struct ggml_tensor * k;`
			`struct ggml_tensor * v;`

			`struct ggml_context * ctx = NULL;`

			`llm_buffer buf;`

			`int n; // number of tokens currently in the cache`

			`~llm_kv_cache() {`
			`if (ctx) {`
			`ggml_free(ctx);`
			`}`
			`}`
			`};`