mirror of
https://github.com/nomic-ai/gpt4all
synced 2024-11-06 09:20:33 +00:00
37 lines
596 B
C
37 lines
596 B
C
|
#pragma once
|
||
|
#include <cstdint>
|
||
|
#include <cstddef>
|
||
|
#include <ggml.h>
|
||
|
|
||
|
struct llm_buffer {
|
||
|
uint8_t * addr = NULL;
|
||
|
size_t size = 0;
|
||
|
|
||
|
void resize(size_t size) {
|
||
|
delete[] addr;
|
||
|
addr = new uint8_t[size];
|
||
|
this->size = size;
|
||
|
}
|
||
|
|
||
|
~llm_buffer() {
|
||
|
delete[] addr;
|
||
|
}
|
||
|
};
|
||
|
|
||
|
struct llm_kv_cache {
|
||
|
struct ggml_tensor * k;
|
||
|
struct ggml_tensor * v;
|
||
|
|
||
|
struct ggml_context * ctx = NULL;
|
||
|
|
||
|
llm_buffer buf;
|
||
|
|
||
|
int n; // number of tokens currently in the cache
|
||
|
|
||
|
~llm_kv_cache() {
|
||
|
if (ctx) {
|
||
|
ggml_free(ctx);
|
||
|
}
|
||
|
}
|
||
|
};
|