<?xml version="1.0" encoding="utf-8"?>
<Version Condition=" '$(VersionSuffix)' != '' ">$(Version)$(VersionSuffix)</Version>
<Using Include="System"/>
<PackageReference Include="Roslynator.Analyzers" Version="4.2.0">
<IncludeAssets>runtime; build; native; contentfiles; analyzers</IncludeAssets>
<PackageReference Include="Roslynator.CodeAnalysis.Analyzers" Version="4.2.0">
<IncludeAssets>runtime; build; native; contentfiles; analyzers</IncludeAssets>
<PackageReference Include="Roslynator.Formatting.Analyzers" Version="4.2.0">
<IncludeAssets>runtime; build; native; contentfiles; analyzers</IncludeAssets>

<Project Sdk="Microsoft.NET.Sdk">
<ProjectReference Include="..\Gpt4All\Gpt4All.csproj" />
<Folder Include="Properties\" />

using Gpt4All;
var modelFactory = new Gpt4AllModelFactory();
var modelPath = args[0];
using var model = modelFactory.LoadModel(modelPath);
var input = args.Length > 1 ? args[1] : "Name 3 colors.";
var result = await model.GetStreamingPredictionAsync(
await foreach (var token in result.GetPredictionStreamingAsync())

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 17
VisualStudioVersion = 17.5.33516.290
MinimumVisualStudioVersion = 10.0.40219.1
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Gpt4All.Samples", "Gpt4All.Samples\Gpt4All.Samples.csproj", "{59864AE8-E45D-42F7-A7C0-1308EF185F39}"
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{DA396C11-CEAD-4368-8234-FB12255A30D2}"
ProjectSection(SolutionItems) = preProject
.gitignore = .gitignore
build_linux.sh = build_linux.sh
build_win-mingw.ps1 = build_win-mingw.ps1
build_win-msvc.ps1 = build_win-msvc.ps1
docs\gpt4all_csharp.md = docs\gpt4all_csharp.md
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Gpt4All", "Gpt4All\Gpt4All.csproj", "{6015C62B-2008-426B-A334-740D6F1FE38B}"
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Release|Any CPU = Release|Any CPU
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{59864AE8-E45D-42F7-A7C0-1308EF185F39}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{59864AE8-E45D-42F7-A7C0-1308EF185F39}.Debug|Any CPU.Build.0 = Debug|Any CPU
{59864AE8-E45D-42F7-A7C0-1308EF185F39}.Release|Any CPU.ActiveCfg = Release|Any CPU
{59864AE8-E45D-42F7-A7C0-1308EF185F39}.Release|Any CPU.Build.0 = Release|Any CPU
{6015C62B-2008-426B-A334-740D6F1FE38B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{6015C62B-2008-426B-A334-740D6F1FE38B}.Debug|Any CPU.Build.0 = Debug|Any CPU
{6015C62B-2008-426B-A334-740D6F1FE38B}.Release|Any CPU.ActiveCfg = Release|Any CPU
{6015C62B-2008-426B-A334-740D6F1FE38B}.Release|Any CPU.Build.0 = Release|Any CPU
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {17632027-F4C2-4903-B88F-310CE3DE386B}

namespace Gpt4All.Bindings;
/// <summary>
/// Represents the interface exposed by the universal wrapper for GPT4All language models built around llmodel C-API.
/// </summary>
public interface ILLModel : IDisposable
ModelType ModelType { get; }
ulong GetStateSizeBytes();
int GetThreadCount();
void SetThreadCount(int threadCount);
bool IsLoaded();
bool Load(string modelPath);
void Prompt(
string text,
LLModelPromptContext context,
Func<ModelPromptEventArgs, bool>? promptCallback = null,
Func<ModelResponseEventArgs, bool>? responseCallback = null,
Func<ModelRecalculatingEventArgs, bool>? recalculateCallback = null,
CancellationToken cancellationToken = default);
unsafe ulong RestoreStateData(byte* destination);
unsafe ulong SaveStateData(byte* source);

namespace Gpt4All.Bindings;
/// <summary>
/// Arguments for the response processing callback
/// </summary>
/// <param name="TokenId">The token id of the response</param>
/// <param name="Response"> The response string. NOTE: a token_id of -1 indicates the string is an error string</param>
/// <return>
/// A bool indicating whether the model should keep generating
/// </return>
public record ModelResponseEventArgs(int TokenId, string Response)
public bool IsError => TokenId == -1;
/// <summary>
/// Arguments for the prompt processing callback
/// </summary>
/// <param name="TokenId">The token id of the prompt</param>
/// <return>
/// A bool indicating whether the model should keep processing
/// </return>
public record ModelPromptEventArgs(int TokenId)
/// <summary>
/// Arguments for the recalculating callback
/// </summary>
/// <param name="IsRecalculating"> whether the model is recalculating the context.</param>
/// <return>
/// A bool indicating whether the model should keep generating
/// </return>
public record ModelRecalculatingEventArgs(bool IsRecalculating);
/// <summary>
/// Base class and universal wrapper for GPT4All language models built around llmodel C-API.
/// </summary>
public class LLModel : ILLModel
protected readonly IntPtr _handle;
private readonly ModelType _modelType;
private bool _disposed;
public ModelType ModelType => _modelType;
internal LLModel(IntPtr handle, ModelType modelType)
_handle = handle;
_modelType = modelType;
/// <summary>
/// Create a new model from a pointer
/// </summary>
/// <param name="handle">Pointer to underlying model</param>
/// <param name="modelType">The model type</param>
public static LLModel Create(IntPtr handle, ModelType modelType)
return new LLModel(handle, modelType);
/// <summary>
/// Generate a response using the model
/// </summary>
/// <param name="text">The input promp</param>
/// <param name="context">The context</param>
/// <param name="promptCallback">A callback function for handling the processing of prompt</param>
/// <param name="responseCallback">A callback function for handling the generated response</param>
/// <param name="recalculateCallback">A callback function for handling recalculation requests</param>
/// <param name="cancellationToken"></param>
public void Prompt(
string text,
LLModelPromptContext context,
Func<ModelPromptEventArgs, bool>? promptCallback = null,
Func<ModelResponseEventArgs, bool>? responseCallback = null,
Func<ModelRecalculatingEventArgs, bool>? recalculateCallback = null,
CancellationToken cancellationToken = default)
(tokenId) =>
if (cancellationToken.IsCancellationRequested) return false;
if (promptCallback == null) return true;
var args = new ModelPromptEventArgs(tokenId);
return promptCallback(args);
(tokenId, response) =>
if (cancellationToken.IsCancellationRequested) return false;
if (responseCallback == null) return true;
var args = new ModelResponseEventArgs(tokenId, response);
return responseCallback(args);
(isRecalculating) =>
if (cancellationToken.IsCancellationRequested) return false;
if (recalculateCallback == null) return true;
var args = new ModelRecalculatingEventArgs(isRecalculating);
return recalculateCallback(args);
ref context.UnderlyingContext
/// <summary>
/// Set the number of threads to be used by the model.
/// </summary>
/// <param name="threadCount">The new thread count</param>
public void SetThreadCount(int threadCount)
NativeMethods.llmodel_setThreadCount(_handle, threadCount);
/// <summary>
/// Get the number of threads used by the model.
/// </summary>
/// <returns>the number of threads used by the model</returns>
public int GetThreadCount()
return NativeMethods.llmodel_threadCount(_handle);
/// <summary>
/// Get the size of the internal state of the model.
/// </summary>
/// <remarks>
/// This state data is specific to the type of model you have created.
/// </remarks>
/// <returns>the size in bytes of the internal state of the model</returns>
public ulong GetStateSizeBytes()
return NativeMethods.llmodel_get_state_size(_handle);
/// <summary>
/// Saves the internal state of the model to the specified destination address.
/// </summary>
/// <param name="source">A pointer to the src</param>
/// <returns>The number of bytes copied</returns>
public unsafe ulong SaveStateData(byte* source)
return NativeMethods.llmodel_save_state_data(_handle, source);
/// <summary>
/// Restores the internal state of the model using data from the specified address.
/// </summary>
/// <param name="destination">A pointer to destination</param>
/// <returns>the number of bytes read</returns>
public unsafe ulong RestoreStateData(byte* destination)
return NativeMethods.llmodel_restore_state_data(_handle, destination);
/// <summary>
/// Check if the model is loaded.
/// </summary>
/// <returns>true if the model was loaded successfully, false otherwise.</returns>
public bool IsLoaded()
return NativeMethods.llmodel_isModelLoaded(_handle);
/// <summary>
/// Load the model from a file.
/// </summary>
/// <param name="modelPath">The path to the model file.</param>
/// <returns>true if the model was loaded successfully, false otherwise.</returns>
public bool Load(string modelPath)
return NativeMethods.llmodel_loadModel(_handle, modelPath);
protected void Destroy()
protected void DestroyLLama()
protected void DestroyGptj()
protected void DestroyMtp()
protected virtual void Dispose(bool disposing)
if (_disposed) return;
if (disposing)
// dispose managed state
switch (_modelType)
case ModelType.LLAMA:
case ModelType.GPTJ:
case ModelType.MPT:
_disposed = true;
public void Dispose()
Dispose(disposing: true);

@ -0,0 +1,140 @@
using System.Reflection;
namespace Gpt4All.Bindings;
/// <summary>
/// Wrapper around the llmodel_prompt_context structure for holding the prompt context.
/// </summary>
/// <remarks>
/// The implementation takes care of all the memory handling of the raw logits pointer and the
/// raw tokens pointer.Attempting to resize them or modify them in any way can lead to undefined behavior
/// </remarks>
public unsafe class LLModelPromptContext
private llmodel_prompt_context _ctx;
internal ref llmodel_prompt_context UnderlyingContext => ref _ctx;
public LLModelPromptContext()
_ctx = new();
/// <summary>
/// logits of current context
/// </summary>
public Span<float> Logits => new(_ctx.logits, (int)_ctx.logits_size);
/// <summary>
/// the size of the raw logits vector
/// </summary>
public nuint LogitsSize
get => _ctx.logits_size;
set => _ctx.logits_size = value;
/// <summary>
/// current tokens in the context window
/// </summary>
public Span<int> Tokens => new(_ctx.tokens, (int)_ctx.tokens_size);
/// <summary>
/// the size of the raw tokens vector
/// </summary>
public nuint TokensSize
get => _ctx.tokens_size;
set => _ctx.tokens_size = value;
/// <summary>
/// top k logits to sample from
/// </summary>
public int TopK
get => _ctx.top_k;
set => _ctx.top_k = value;
/// <summary>
/// nucleus sampling probability threshold
/// </summary>
public float TopP
get => _ctx.top_p;
set => _ctx.top_p = value;
/// <summary>
/// temperature to adjust model's output distribution
/// </summary>
public float Temperature
get => _ctx.temp;
set => _ctx.temp = value;
/// <summary>
/// number of tokens in past conversation
/// </summary>
public int PastNum
get => _ctx.n_past;
set => _ctx.n_past = value;
/// <summary>
/// number of predictions to generate in parallel
/// </summary>
public int Batches
get => _ctx.n_batch;
set => _ctx.n_batch = value;
/// <summary>
/// number of tokens to predict
/// </summary>
public int TokensToPredict
get => _ctx.n_predict;
set => _ctx.n_predict = value;
/// <summary>
/// penalty factor for repeated tokens
/// </summary>
public float RepeatPenalty
get => _ctx.repeat_penalty;
set => _ctx.repeat_penalty = value;
/// <summary>
/// last n tokens to penalize
/// </summary>
public int RepeatLastN
get => _ctx.repeat_last_n;
set => _ctx.repeat_last_n = value;
/// <summary>
/// number of tokens possible in context window
/// </summary>
public int ContextSize
get => _ctx.n_ctx;
set => _ctx.n_ctx = value;
/// <summary>
/// percent of context to erase if we exceed the context window
/// </summary>
public float ContextErase
get => _ctx.context_erase;
set => _ctx.context_erase = value;

@ -0,0 +1,126 @@
using System.Runtime.InteropServices;
namespace Gpt4All.Bindings;
public unsafe partial struct llmodel_prompt_context
public float* logits;
public nuint logits_size;
[NativeTypeName("int32_t *")]
public int* tokens;
public nuint tokens_size;
public int n_past;
public int n_ctx;
public int n_predict;
public int top_k;
public float top_p;
public float temp;
public int n_batch;
public float repeat_penalty;
public int repeat_last_n;
public float context_erase;
internal static unsafe partial class NativeMethods
[return: MarshalAs(UnmanagedType.I1)]
public delegate bool LlmodelResponseCallback(int token_id, [MarshalAs(UnmanagedType.LPUTF8Str)] string response);
[return: MarshalAs(UnmanagedType.I1)]
public delegate bool LlmodelPromptCallback(int token_id);
[return: MarshalAs(UnmanagedType.I1)]
public delegate bool LlmodelRecalculateCallback(bool isRecalculating);
[DllImport("libllmodel", CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
[return: NativeTypeName("llmodel_model")]
public static extern IntPtr llmodel_gptj_create();
[DllImport("libllmodel", CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
public static extern void llmodel_gptj_destroy([NativeTypeName("llmodel_model")] IntPtr gptj);
[DllImport("libllmodel", CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
[return: NativeTypeName("llmodel_model")]
public static extern IntPtr llmodel_mpt_create();
[DllImport("libllmodel", CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
public static extern void llmodel_mpt_destroy([NativeTypeName("llmodel_model")] IntPtr mpt);
[DllImport("libllmodel", CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
[return: NativeTypeName("llmodel_model")]
public static extern IntPtr llmodel_llama_create();
[DllImport("libllmodel", CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
public static extern void llmodel_llama_destroy([NativeTypeName("llmodel_model")] IntPtr llama);
[DllImport("libllmodel", CallingConvention = CallingConvention.Cdecl, ExactSpelling = true, BestFitMapping = false, ThrowOnUnmappableChar = true)]
[return: NativeTypeName("llmodel_model")]
public static extern IntPtr llmodel_model_create(
[NativeTypeName("const char *")][MarshalAs(UnmanagedType.LPUTF8Str)] string model_path);
[DllImport("libllmodel", CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
public static extern void llmodel_model_destroy([NativeTypeName("llmodel_model")] IntPtr model);
[DllImport("libllmodel", CallingConvention = CallingConvention.Cdecl, ExactSpelling = true, BestFitMapping = false, ThrowOnUnmappableChar = true)]
[return: MarshalAs(UnmanagedType.I1)]
public static extern bool llmodel_loadModel(
[NativeTypeName("llmodel_model")] IntPtr model,
[NativeTypeName("const char *")][MarshalAs(UnmanagedType.LPUTF8Str)] string model_path);
[DllImport("libllmodel", CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
[return: MarshalAs(UnmanagedType.I1)]
public static extern bool llmodel_isModelLoaded([NativeTypeName("llmodel_model")] IntPtr model);
[DllImport("libllmodel", CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
[return: NativeTypeName("uint64_t")]
public static extern ulong llmodel_get_state_size([NativeTypeName("llmodel_model")] IntPtr model);
[DllImport("libllmodel", CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
[return: NativeTypeName("uint64_t")]
public static extern ulong llmodel_save_state_data([NativeTypeName("llmodel_model")] IntPtr model, [NativeTypeName("uint8_t *")] byte* dest);
[DllImport("libllmodel", CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
[return: NativeTypeName("uint64_t")]
public static extern ulong llmodel_restore_state_data([NativeTypeName("llmodel_model")] IntPtr model, [NativeTypeName("const uint8_t *")] byte* src);
[DllImport("libllmodel", CallingConvention = CallingConvention.Cdecl, ExactSpelling = true, BestFitMapping = false, ThrowOnUnmappableChar = true)]
public static extern void llmodel_prompt(
[NativeTypeName("llmodel_model")] IntPtr model,
[NativeTypeName("const char *")][MarshalAs(UnmanagedType.LPUTF8Str)] string prompt,
LlmodelPromptCallback prompt_callback,
LlmodelResponseCallback response_callback,
LlmodelRecalculateCallback recalculate_callback,
ref llmodel_prompt_context ctx);
[DllImport("libllmodel", CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
public static extern void llmodel_setThreadCount([NativeTypeName("llmodel_model")] IntPtr model, [NativeTypeName("int32_t")] int n_threads);
[DllImport("libllmodel", CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
[return: NativeTypeName("int32_t")]
public static extern int llmodel_threadCount([NativeTypeName("llmodel_model")] IntPtr model);

using System.Diagnostics;
namespace Gpt4All.Bindings;
/// <summary>Defines the type of a member as it was used in the native signature.</summary>
[AttributeUsage(AttributeTargets.Struct | AttributeTargets.Enum | AttributeTargets.Property | AttributeTargets.Field | AttributeTargets.Parameter | AttributeTargets.ReturnValue, AllowMultiple = false, Inherited = true)]
internal sealed partial class NativeTypeNameAttribute : Attribute
private readonly string _name;
/// <summary>Initializes a new instance of the <see cref="NativeTypeNameAttribute" /> class.</summary>
/// <param name="name">The name of the type that was used in the native signature.</param>
public NativeTypeNameAttribute(string name)
_name = name;
/// <summary>Gets the name of the type that was used in the native signature.</summary>
public string Name => _name;

using Gpt4All.Bindings;
namespace Gpt4All.Extensions;
public static class PredictRequestOptionsExtensions
public static LLModelPromptContext ToPromptContext(this PredictRequestOptions opts)
return new LLModelPromptContext
LogitsSize = opts.LogitsSize,
TokensSize = opts.TokensSize,
TopK = opts.TopK,
TopP = opts.TopP,
PastNum = opts.PastConversationTokensNum,
RepeatPenalty = opts.RepeatPenalty,
Temperature = opts.Temperature,
RepeatLastN = opts.RepeatLastN,
Batches = opts.Batches,
ContextErase = opts.ContextErase,
ContextSize = opts.ContextSize,
TokensToPredict = opts.TokensToPredict

using Gpt4All.Bindings;
using Gpt4All.Extensions;
namespace Gpt4All;
public class Gpt4All : IGpt4AllModel
private readonly ILLModel _model;
internal Gpt4All(ILLModel model)
_model = model;
public Task<ITextPredictionResult> GetPredictionAsync(string text, PredictRequestOptions opts, CancellationToken cancellationToken = default)
return Task.Run(() =>
var result = new TextPredictionResult();
var context = opts.ToPromptContext();
_model.Prompt(text, context, responseCallback: e =>
if (e.IsError)
result.Success = false;
result.ErrorMessage = e.Response;
return false;
return true;
}, cancellationToken: cancellationToken);
return (ITextPredictionResult)result;
}, CancellationToken.None);
public Task<ITextPredictionStreamingResult> GetStreamingPredictionAsync(string text, PredictRequestOptions opts, CancellationToken cancellationToken = default)
var result = new TextPredictionStreamingResult();
_ = Task.Run(() =>
var context = opts.ToPromptContext();
_model.Prompt(text, context, responseCallback: e =>
if (e.IsError)
result.Success = false;
result.ErrorMessage = e.Response;
return false;
return true;
}, cancellationToken: cancellationToken);
}, CancellationToken.None);
return Task.FromResult((ITextPredictionStreamingResult)result);
protected virtual void Dispose(bool disposing)
if (disposing)
public void Dispose()

<Project Sdk="Microsoft.NET.Sdk">
<!-- Windows -->
<None Include="..\runtimes\win-x64\native\*.dll" Pack="true" PackagePath="runtimes\win-x64\native\%(Filename)%(Extension)" />
<!-- Linux -->
<None Include="..\runtimes\linux-x64\native\*.so" Pack="true" PackagePath="runtimes\linux-x64\native\%(Filename)%(Extension)" />
<!-- Windows -->
<None Condition="$([MSBuild]::IsOSPlatform('Windows'))" Include="..\runtimes\win-x64\native\*.dll" Visible="False" CopyToOutputDirectory="PreserveNewest" />
<!-- Linux -->
<None Condition="$([MSBuild]::IsOSPlatform('Linux'))" Include="..\runtimes\linux-x64\native\*.so" Visible="False" CopyToOutputDirectory="PreserveNewest" />

using Gpt4All.Bindings;
using System.Diagnostics;
namespace Gpt4All;
public class Gpt4AllModelFactory : IGpt4AllModelFactory
private static IGpt4AllModel CreateModel(string modelPath, ModelType? modelType = null)
var modelType_ = modelType ?? ModelFileUtils.GetModelTypeFromModelFileHeader(modelPath);
var handle = modelType_ switch
ModelType.LLAMA => NativeMethods.llmodel_llama_create(),
ModelType.GPTJ => NativeMethods.llmodel_gptj_create(),
ModelType.MPT => NativeMethods.llmodel_mpt_create(),
_ => NativeMethods.llmodel_model_create(modelPath),
var loadedSuccesfully = NativeMethods.llmodel_loadModel(handle, modelPath);
if (loadedSuccesfully == false)
throw new Exception($"Failed to load model: '{modelPath}'");
var underlyingModel = LLModel.Create(handle, modelType_);
return new Gpt4All(underlyingModel);
public IGpt4AllModel LoadModel(string modelPath) => CreateModel(modelPath, modelType: null);
public IGpt4AllModel LoadMptModel(string modelPath) => CreateModel(modelPath, ModelType.MPT);
public IGpt4AllModel LoadGptjModel(string modelPath) => CreateModel(modelPath, ModelType.GPTJ);
public IGpt4AllModel LoadLlamaModel(string modelPath) => CreateModel(modelPath, ModelType.LLAMA);

namespace Gpt4All;
public interface IGpt4AllModel : ITextPrediction, IDisposable

namespace Gpt4All;
public interface IGpt4AllModelFactory
IGpt4AllModel LoadGptjModel(string modelPath);
IGpt4AllModel LoadLlamaModel(string modelPath);
IGpt4AllModel LoadModel(string modelPath);
IGpt4AllModel LoadMptModel(string modelPath);

namespace Gpt4All;
public static class ModelFileUtils
private const uint GPTJ_MAGIC = 0x67676d6c;
private const uint LLAMA_MAGIC = 0x67676a74;
private const uint MPT_MAGIC = 0x67676d6d;
public static ModelType GetModelTypeFromModelFileHeader(string modelPath)
using var fileStream = new FileStream(modelPath, FileMode.Open);
using var binReader = new BinaryReader(fileStream);
var magic = binReader.ReadUInt32();
return magic switch
MPT_MAGIC => ModelType.MPT,
_ => throw new ArgumentOutOfRangeException($"Invalid model file. magic=0x{magic:X8}"),

namespace Gpt4All;
public record ModelOptions
public int Threads { get; init; } = 4;
public ModelType ModelType { get; init; } = ModelType.GPTJ;

namespace Gpt4All;
/// <summary>
/// The supported model types
/// </summary>
public enum ModelType
LLAMA = 0,

namespace Gpt4All;
/// <summary>
/// Interface for text prediction services
/// </summary>
public interface ITextPrediction
/// <summary>
/// Get prediction results for the prompt and provided options.
/// </summary>
/// <param name="text">The text to complete</param>
/// <param name="opts">The prediction settings</param>
/// <param name="cancellationToken">The <see cref="CancellationToken"/> for cancellation requests. The default is <see cref="CancellationToken.None"/>.</param>
/// <returns>The prediction result generated by the model</returns>
Task<ITextPredictionResult> GetPredictionAsync(
string text,
PredictRequestOptions opts,
CancellationToken cancellation = default);
/// <summary>
/// Get streaming prediction results for the prompt and provided options.
/// </summary>
/// <param name="text">The text to complete</param>
/// <param name="opts">The prediction settings</param>
/// <param name="cancellationToken">The <see cref="CancellationToken"/> for cancellation requests. The default is <see cref="CancellationToken.None"/>.</param>
/// <returns>The prediction result generated by the model</returns>
Task<ITextPredictionStreamingResult> GetStreamingPredictionAsync(
string text,
PredictRequestOptions opts,
CancellationToken cancellationToken = default);

namespace Gpt4All;
public interface ITextPredictionResult
bool Success { get; }
string? ErrorMessage { get; }
Task<string> GetPredictionAsync(CancellationToken cancellationToken = default);

namespace Gpt4All;
public interface ITextPredictionStreamingResult : ITextPredictionResult
IAsyncEnumerable<string> GetPredictionStreamingAsync(CancellationToken cancellationToken = default);

namespace Gpt4All;
public record PredictRequestOptions
public nuint LogitsSize { get; init; } = 0;
public nuint TokensSize { get; init; } = 0;
public int PastConversationTokensNum { get; init; } = 0;
public int ContextSize { get; init; } = 1024;
public int TokensToPredict { get; init; } = 128;
public int TopK { get; init; } = 40;
public float TopP { get; init; } = 0.9f;
public float Temperature { get; init; } = 0.1f;
public int Batches { get; init; } = 8;
public float RepeatPenalty { get; init; } = 1.2f;
public int RepeatLastN { get; init; } = 10;
public float ContextErase { get; init; } = 0.5f;
public static readonly PredictRequestOptions Defaults = new();

using System.Text;
namespace Gpt4All;
public record TextPredictionResult : ITextPredictionResult
private readonly StringBuilder _result;
public bool Success { get; internal set; } = true;
public string? ErrorMessage { get; internal set; }
internal TextPredictionResult()
_result = new StringBuilder();
internal void Append(string token)
public Task<string> GetPredictionAsync(CancellationToken cancellationToken = default)
return Task.FromResult(_result.ToString());

using System.Text;
using System.Threading.Channels;
namespace Gpt4All;
public record TextPredictionStreamingResult : ITextPredictionStreamingResult
private readonly Channel<string> _channel;
public bool Success { get; internal set; } = true;
public string? ErrorMessage { get; internal set; }
public Task Completion => _channel.Reader.Completion;
internal TextPredictionStreamingResult()
_channel = Channel.CreateUnbounded<string>();
internal bool Append(string token)
return _channel.Writer.TryWrite(token);
internal void Complete()
public async Task<string> GetPredictionAsync(CancellationToken cancellationToken = default)
var sb = new StringBuilder();
var tokens = GetPredictionStreamingAsync(cancellationToken).ConfigureAwait(false);
await foreach (var token in tokens)
return sb.ToString();
public IAsyncEnumerable<string> GetPredictionStreamingAsync(CancellationToken cancellationToken = default)
return _channel.Reader.ReadAllAsync(cancellationToken);

ClangSharpPInvokeGenerator @(Get-Content .\GenLLModelBindings.rsp)

# C# GPT4All
This package contains a set of C# bindings around the `llmodel` C-API.
## Documentation
## Installation
## Project Structure
└── csharp
   ├── Gpt4All // .NET Bindigs
   ├── Gpt4All.Samples // Sample project
├── build_win-msvc.ps1 // Native build scripts
├── build_win-mingw.ps1
├── build_linux.sh
└── runtimes // [POST-BUILD] Platform-specific native libraries
├── win-x64
├── ...
└── linux-x64
## Local Build Instructions
> **Note**
> Tested On:
> - Windows 11 22H + VS2022 (CE) x64
> - Linux Ubuntu x64
> - Linux Ubuntu (WSL2) x64
1. Setup the repository
2. Build the native libraries for the platform of choice (see below)
3. Build the C# Bindings (NET6+ SDK is required)
git clone --recurse-submodules https://github.com/nomic-ai/gpt4all
cd gpt4all/gpt4all-bindings/csharp
### Linux
1. Setup build environment and install NET6+ SDK with the appropriate procedure for your distribution
sudo apt-get update
sudo apt-get install -y cmake build-essential
chmod +x ./build_linux.sh
2. `./build_linux.sh`
3. The native libraries should be present at `.\native\linux-x64`
### Windows - MinGW64
#### Additional requirements
- [MinGW64](https://www.mingw-w64.org/)
1. Setup
choco install mingw
$env:Path += ";C:\ProgramData\chocolatey\lib\mingw\tools\install\mingw64\bin"
choco install -y cmake --installargs 'ADD_CMAKE_TO_PATH=System'
2. Run the `./build_win-mingw.ps1` build script
3. The native libraries should be present at `.\native\win-x64`
### Windows - MSVC
#### Additional requirements
- Visual Studio 2022
1. Open a terminal using the `x64 Native Tools Command Prompt for VS 2022` (`vcvars64.bat`)
2. Run the `./build_win-msvc.ps1` build script
3. `libllmodel.dll` and `libllama.dll` should be present at `.\native\win-x64`
> **Warning**
> If the build fails with: '**error C7555: use of designated initializers requires at least '/std:c++20'**'
> Modify `cd gpt4all/gpt4all-backends/CMakeLists.txt` adding `CXX_STANDARD_20` to `llmodel` properties.
> ```cmake
> set_target_properties(llmodel PROPERTIES
> CXX_STANDARD 20 # <---- ADD THIS -----------------------
> ```
## C# Bindings Build Instructions
Build the `Gpt4All` (or `Gpt4All.Samples`) projects from within VisualStudio.
### Try the bindings
using Gpt4All;
// load the model
var modelFactory = new ModelFactory();
using var model = modelFactory.LoadModel("./path/to/ggml-gpt4all-j-v1.3-groovy.bin");
var input = "Name 3 Colors";
// request a prediction
var result = await model.GetStreamingPredictionAsync(
// asynchronously print the tokens as soon as they are produces by the model
await foreach(var token in result.GetPredictionStreamingAsync())
gptj_model_load: loading model from 'ggml-gpt4all-j-v1.3-groovy.bin' - please wait ...
gptj_model_load: n_vocab = 50400
gptj_model_load: ggml ctx size = 5401.45 MB
gptj_model_load: kv self size = 896.00 MB
gptj_model_load: ................................... done
gptj_model_load: model size = 3609.38 MB / num tensors = 285
Black, Blue and White

mkdir -p runtimes
rm -rf runtimes/linux-x64
mkdir -p runtimes/linux-x64/native
mkdir runtimes/linux-x64/build
cmake -S ../../gpt4all-backend -B runtimes/linux-x64/build
cmake --build runtimes/linux-x64/build --parallel --config Release
cp runtimes/linux-x64/build/libllmodel.so runtimes/linux-x64/native/libllmodel.so
cp runtimes/linux-x64/build/llama.cpp/libllama.so runtimes/linux-x64/native/libllama.so

$ROOT_DIR = '.\runtimes\win-x64'
$BUILD_DIR = '.\runtimes\win-x64\build\mingw'
$LIBS_DIR = '.\runtimes\win-x64\native'
# cleanup env
Remove-Item -Force -Recurse $ROOT_DIR -ErrorAction SilentlyContinue | Out-Null
mkdir $BUILD_DIR | Out-Null
mkdir $LIBS_DIR | Out-Null
# build
cmake -G "MinGW Makefiles" -S ..\..\gpt4all-backend -B $BUILD_DIR
cmake --build $BUILD_DIR --parallel --config Release
# copy native dlls
cp "C:\ProgramData\chocolatey\lib\mingw\tools\install\mingw64\bin\*dll" $LIBS_DIR
cp "$BUILD_DIR\libllmodel.dll" $LIBS_DIR
cp "$BUILD_DIR\bin\libllama.dll" $LIBS_DIR

Remove-Item -Force -Recurse .\runtimes\win-x64\msvc -ErrorAction SilentlyContinue
mkdir .\runtimes\win-x64\msvc\build | Out-Null
cmake -G "Visual Studio 17 2022" -A Win64 -S ..\..\gpt4all-backend -B .\runtimes\win-x64\msvc\build
cmake --build .\runtimes\win-x64\msvc\build --parallel --config Release
cp .\runtimes\win-x64\msvc\build\Release\llmodel.dll .\runtimes\win-x64\libllmodel.dll
cp .\runtimes\win-x64\msvc\build\bin\Release\llama.dll .\runtimes\win-x64\libllama.dll