mirror of
https://github.com/nomic-ai/gpt4all
synced 2024-11-08 07:10:32 +00:00
797891c995
* Initial Library Loader * Load library as part of Model factory * Dynamically search and find the dlls * Update tests to use locally built runtimes * Fix dylib loading, add macos runtime support for sample/tests * Bypass automatic loading by default. * Only set CMAKE_OSX_ARCHITECTURES if not already set, allow cross-compile * Switch Loading again * Update build scripts for mac/linux * Update bindings to support newest breaking changes * Fix build * Use llmodel for Windows * Actually, it does need to be libllmodel * Name * Remove TFMs, bypass loading by default * Fix script * Delete mac script --------- Co-authored-by: Tim Miller <innerlogic4321@ghmail.com>
139 lines
3.3 KiB
C#
139 lines
3.3 KiB
C#
namespace Gpt4All.Bindings;
|
|
|
|
/// <summary>
|
|
/// Wrapper around the llmodel_prompt_context structure for holding the prompt context.
|
|
/// </summary>
|
|
/// <remarks>
|
|
/// The implementation takes care of all the memory handling of the raw logits pointer and the
|
|
/// raw tokens pointer.Attempting to resize them or modify them in any way can lead to undefined behavior
|
|
/// </remarks>
|
|
public unsafe class LLModelPromptContext
|
|
{
|
|
private llmodel_prompt_context _ctx;
|
|
|
|
internal ref llmodel_prompt_context UnderlyingContext => ref _ctx;
|
|
|
|
public LLModelPromptContext()
|
|
{
|
|
_ctx = new();
|
|
}
|
|
|
|
/// <summary>
|
|
/// logits of current context
|
|
/// </summary>
|
|
public Span<float> Logits => new(_ctx.logits, (int)_ctx.logits_size);
|
|
|
|
/// <summary>
|
|
/// the size of the raw logits vector
|
|
/// </summary>
|
|
public nuint LogitsSize
|
|
{
|
|
get => _ctx.logits_size;
|
|
set => _ctx.logits_size = value;
|
|
}
|
|
|
|
/// <summary>
|
|
/// current tokens in the context window
|
|
/// </summary>
|
|
public Span<int> Tokens => new(_ctx.tokens, (int)_ctx.tokens_size);
|
|
|
|
/// <summary>
|
|
/// the size of the raw tokens vector
|
|
/// </summary>
|
|
public nuint TokensSize
|
|
{
|
|
get => _ctx.tokens_size;
|
|
set => _ctx.tokens_size = value;
|
|
}
|
|
|
|
/// <summary>
|
|
/// top k logits to sample from
|
|
/// </summary>
|
|
public int TopK
|
|
{
|
|
get => _ctx.top_k;
|
|
set => _ctx.top_k = value;
|
|
}
|
|
|
|
/// <summary>
|
|
/// nucleus sampling probability threshold
|
|
/// </summary>
|
|
public float TopP
|
|
{
|
|
get => _ctx.top_p;
|
|
set => _ctx.top_p = value;
|
|
}
|
|
|
|
/// <summary>
|
|
/// temperature to adjust model's output distribution
|
|
/// </summary>
|
|
public float Temperature
|
|
{
|
|
get => _ctx.temp;
|
|
set => _ctx.temp = value;
|
|
}
|
|
|
|
/// <summary>
|
|
/// number of tokens in past conversation
|
|
/// </summary>
|
|
public int PastNum
|
|
{
|
|
get => _ctx.n_past;
|
|
set => _ctx.n_past = value;
|
|
}
|
|
|
|
/// <summary>
|
|
/// number of predictions to generate in parallel
|
|
/// </summary>
|
|
public int Batches
|
|
{
|
|
get => _ctx.n_batch;
|
|
set => _ctx.n_batch = value;
|
|
}
|
|
|
|
/// <summary>
|
|
/// number of tokens to predict
|
|
/// </summary>
|
|
public int TokensToPredict
|
|
{
|
|
get => _ctx.n_predict;
|
|
set => _ctx.n_predict = value;
|
|
}
|
|
|
|
/// <summary>
|
|
/// penalty factor for repeated tokens
|
|
/// </summary>
|
|
public float RepeatPenalty
|
|
{
|
|
get => _ctx.repeat_penalty;
|
|
set => _ctx.repeat_penalty = value;
|
|
}
|
|
|
|
/// <summary>
|
|
/// last n tokens to penalize
|
|
/// </summary>
|
|
public int RepeatLastN
|
|
{
|
|
get => _ctx.repeat_last_n;
|
|
set => _ctx.repeat_last_n = value;
|
|
}
|
|
|
|
/// <summary>
|
|
/// number of tokens possible in context window
|
|
/// </summary>
|
|
public int ContextSize
|
|
{
|
|
get => _ctx.n_ctx;
|
|
set => _ctx.n_ctx = value;
|
|
}
|
|
|
|
/// <summary>
|
|
/// percent of context to erase if we exceed the context window
|
|
/// </summary>
|
|
public float ContextErase
|
|
{
|
|
get => _ctx.context_erase;
|
|
set => _ctx.context_erase = value;
|
|
}
|
|
}
|