diff --git a/.circleci/continue_config.yml b/.circleci/continue_config.yml index 28edab15..f4428df9 100644 --- a/.circleci/continue_config.yml +++ b/.circleci/continue_config.yml @@ -854,6 +854,7 @@ jobs: install-yarn: true node-version: "18.16" - run: node --version + - run: corepack enable - node/install-packages: app-dir: gpt4all-bindings/typescript pkg-manager: yarn @@ -884,6 +885,7 @@ jobs: install-yarn: true node-version: "18.16" - run: node --version + - run: corepack enable - node/install-packages: app-dir: gpt4all-bindings/typescript pkg-manager: yarn @@ -896,14 +898,14 @@ jobs: name: "Persisting all necessary things to workspace" command: | mkdir -p gpt4all-backend/prebuilds/darwin-x64 - mkdir -p gpt4all-backend/runtimes/darwin-x64 - cp /tmp/gpt4all-backend/runtimes/osx-x64/*-*.* gpt4all-backend/runtimes/darwin-x64 + mkdir -p gpt4all-backend/runtimes/darwin + cp /tmp/gpt4all-backend/runtimes/osx-x64/*-*.* gpt4all-backend/runtimes/darwin cp gpt4all-bindings/typescript/prebuilds/darwin-x64/*.node gpt4all-backend/prebuilds/darwin-x64 - persist_to_workspace: root: gpt4all-backend paths: - prebuilds/darwin-x64/*.node - - runtimes/darwin-x64/*-*.* + - runtimes/darwin/*-*.* build-nodejs-windows: executor: @@ -925,6 +927,7 @@ jobs: nvm install 18.16.0 nvm use 18.16.0 - run: node --version + - run: corepack enable - run: command: | npm install -g yarn @@ -958,6 +961,7 @@ jobs: install-yarn: true node-version: "18.16" - run: node --version + - run: corepack enable - run: command: | cd gpt4all-bindings/typescript @@ -972,9 +976,12 @@ jobs: cp /tmp/gpt4all-backend/runtimes/linux-x64/*-*.so runtimes/linux-x64/native/ cp /tmp/gpt4all-backend/prebuilds/linux-x64/*.node prebuilds/linux-x64/ - mkdir -p runtimes/darwin-x64/native + # darwin has univeral runtime libraries + mkdir -p runtimes/darwin/native mkdir -p prebuilds/darwin-x64/ - cp /tmp/gpt4all-backend/runtimes/darwin-x64/*-*.* runtimes/darwin-x64/native/ + + cp /tmp/gpt4all-backend/runtimes/darwin/*-*.* runtimes/darwin/native/ + cp /tmp/gpt4all-backend/prebuilds/darwin-x64/*.node prebuilds/darwin-x64/ # Fallback build if user is not on above prebuilds diff --git a/gpt4all-bindings/python/docs/gpt4all_typescript.md b/gpt4all-bindings/python/docs/gpt4all_nodejs.md similarity index 87% rename from gpt4all-bindings/python/docs/gpt4all_typescript.md rename to gpt4all-bindings/python/docs/gpt4all_nodejs.md index 87876ef3..798a3bd8 100644 --- a/gpt4all-bindings/python/docs/gpt4all_typescript.md +++ b/gpt4all-bindings/python/docs/gpt4all_nodejs.md @@ -1,11 +1,14 @@ # GPT4All Node.js API +Native Node.js LLM bindings for all. + ```sh -yarn add gpt4all@alpha +yarn add gpt4all@latest + +npm install gpt4all@latest -npm install gpt4all@alpha +pnpm install gpt4all@latest -pnpm install gpt4all@alpha ``` The original [GPT4All typescript bindings](https://github.com/nomic-ai/gpt4all-ts) are now out of date. @@ -15,12 +18,12 @@ The original [GPT4All typescript bindings](https://github.com/nomic-ai/gpt4all-t * Everything should work out the box. * See [API Reference](#api-reference) -### Chat Completion (alpha) +### Chat Completion ```js import { createCompletion, loadModel } from '../src/gpt4all.js' -const model = await loadModel('ggml-vicuna-7b-1.1-q4_2', { verbose: true }); +const model = await loadModel('mistral-7b-openorca.Q4_0.gguf', { verbose: true }); const response = await createCompletion(model, [ { role : 'system', content: 'You are meant to be annoying and unhelpful.' }, @@ -29,7 +32,7 @@ const response = await createCompletion(model, [ ``` -### Embedding (alpha) +### Embedding ```js import { createEmbedding, loadModel } from '../src/gpt4all.js' @@ -82,8 +85,6 @@ yarn git submodule update --init --depth 1 --recursive ``` -**AS OF NEW BACKEND** to build the backend, - ```sh yarn build:backend ``` @@ -152,13 +153,16 @@ This package is in active development, and breaking changes may happen until the ##### Table of Contents -* [ModelType](#modeltype) * [ModelFile](#modelfile) * [gptj](#gptj) * [llama](#llama) * [mpt](#mpt) * [replit](#replit) * [type](#type) +* [InferenceModel](#inferencemodel) + * [dispose](#dispose) +* [EmbeddingModel](#embeddingmodel) + * [dispose](#dispose-1) * [LLModel](#llmodel) * [constructor](#constructor) * [Parameters](#parameters) @@ -176,12 +180,20 @@ This package is in active development, and breaking changes may happen until the * [setLibraryPath](#setlibrarypath) * [Parameters](#parameters-4) * [getLibraryPath](#getlibrarypath) + * [initGpuByString](#initgpubystring) + * [Parameters](#parameters-5) + * [hasGpuDevice](#hasgpudevice) + * [listGpu](#listgpu) + * [dispose](#dispose-2) +* [GpuDevice](#gpudevice) + * [type](#type-2) +* [LoadModelOptions](#loadmodeloptions) * [loadModel](#loadmodel) - * [Parameters](#parameters-5) -* [createCompletion](#createcompletion) * [Parameters](#parameters-6) -* [createEmbedding](#createembedding) +* [createCompletion](#createcompletion) * [Parameters](#parameters-7) +* [createEmbedding](#createembedding) + * [Parameters](#parameters-8) * [CompletionOptions](#completionoptions) * [verbose](#verbose) * [systemPromptTemplate](#systemprompttemplate) @@ -214,14 +226,14 @@ This package is in active development, and breaking changes may happen until the * [repeatLastN](#repeatlastn) * [contextErase](#contexterase) * [createTokenStream](#createtokenstream) - * [Parameters](#parameters-8) + * [Parameters](#parameters-9) * [DEFAULT\_DIRECTORY](#default_directory) * [DEFAULT\_LIBRARIES\_DIRECTORY](#default_libraries_directory) * [DEFAULT\_MODEL\_CONFIG](#default_model_config) -* [DEFAULT\_PROMT\_CONTEXT](#default_promt_context) +* [DEFAULT\_PROMPT\_CONTEXT](#default_prompt_context) * [DEFAULT\_MODEL\_LIST\_URL](#default_model_list_url) * [downloadModel](#downloadmodel) - * [Parameters](#parameters-9) + * [Parameters](#parameters-10) * [Examples](#examples) * [DownloadModelOptions](#downloadmodeloptions) * [modelPath](#modelpath) @@ -232,16 +244,10 @@ This package is in active development, and breaking changes may happen until the * [cancel](#cancel) * [promise](#promise) -#### ModelType - -Type of the model - -Type: (`"gptj"` | `"llama"` | `"mpt"` | `"replit"`) - #### ModelFile Full list of models available -@deprecated These model names are outdated and this type will not be maintained, please use a string literal instead +DEPRECATED!! These model names are outdated and this type will not be maintained, please use a string literal instead ##### gptj @@ -271,7 +277,27 @@ Type: `"ggml-replit-code-v1-3b.bin"` Model architecture. This argument currently does not have any functionality and is just used as descriptive identifier for user. -Type: [ModelType](#modeltype) +Type: ModelType + +#### InferenceModel + +InferenceModel represents an LLM which can make chat predictions, similar to GPT transformers. + +##### dispose + +delete and cleanup the native model + +Returns **void** + +#### EmbeddingModel + +EmbeddingModel represents an LLM which can create embeddings, which are float arrays + +##### dispose + +delete and cleanup the native model + +Returns **void** #### LLModel @@ -294,7 +320,7 @@ Initialize a new LLModel. either 'gpt', mpt', or 'llama' or undefined -Returns **([ModelType](#modeltype) | [undefined](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/undefined))** +Returns **(ModelType | [undefined](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/undefined))** ##### name @@ -376,6 +402,52 @@ Where to get the pluggable backend libraries Returns **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** +##### initGpuByString + +Initiate a GPU by a string identifier. + +###### Parameters + +* `memory_required` **[number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)** Should be in the range size\_t or will throw +* `device_name` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** 'amd' | 'nvidia' | 'intel' | 'gpu' | gpu name. + read LoadModelOptions.device for more information + +Returns **[boolean](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean)** + +##### hasGpuDevice + +From C documentation + +Returns **[boolean](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean)** True if a GPU device is successfully initialized, false otherwise. + +##### listGpu + +GPUs that are usable for this LLModel + +* Throws **any** if hasGpuDevice returns false (i think) + +Returns **[Array](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Array)<[GpuDevice](#gpudevice)>** + +##### dispose + +delete and cleanup the native model + +Returns **void** + +#### GpuDevice + +an object that contains gpu data on this machine. + +##### type + +same as VkPhysicalDeviceType + +Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number) + +#### LoadModelOptions + +Options that configure a model's behavior. + #### loadModel Loads a machine learning model with the specified name. The defacto way to create a model. @@ -384,9 +456,9 @@ By default this will download a model from the official GPT4ALL website, if a mo ##### Parameters * `modelName` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** The name of the model to load. -* `options` **(LoadModelOptions | [undefined](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/undefined))?** (Optional) Additional options for loading the model. +* `options` **([LoadModelOptions](#loadmodeloptions) | [undefined](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/undefined))?** (Optional) Additional options for loading the model. -Returns **[Promise](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Promise)<(InferenceModel | EmbeddingModel)>** A promise that resolves to an instance of the loaded LLModel. +Returns **[Promise](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Promise)<([InferenceModel](#inferencemodel) | [EmbeddingModel](#embeddingmodel))>** A promise that resolves to an instance of the loaded LLModel. #### createCompletion @@ -394,7 +466,7 @@ The nodejs equivalent to python binding's chat\_completion ##### Parameters -* `model` **InferenceModel** The language model object. +* `model` **[InferenceModel](#inferencemodel)** The language model object. * `messages` **[Array](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Array)<[PromptMessage](#promptmessage)>** The array of messages for the conversation. * `options` **[CompletionOptions](#completionoptions)** The options for creating the completion. @@ -407,7 +479,7 @@ meow ##### Parameters -* `model` **EmbeddingModel** The language model object. +* `model` **[EmbeddingModel](#embeddingmodel)** The language model object. * `text` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** text to embed Returns **[Float32Array](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Float32Array)** The completion result. @@ -652,7 +724,7 @@ Default model configuration. Type: ModelConfig -#### DEFAULT\_PROMT\_CONTEXT +#### DEFAULT\_PROMPT\_CONTEXT Default prompt context. diff --git a/gpt4all-bindings/typescript/README.md b/gpt4all-bindings/typescript/README.md index 90275b16..7e20fd4e 100644 --- a/gpt4all-bindings/typescript/README.md +++ b/gpt4all-bindings/typescript/README.md @@ -1,11 +1,14 @@ # GPT4All Node.js API +Native Node.js LLM bindings for all. + ```sh yarn add gpt4all@latest npm install gpt4all@latest pnpm install gpt4all@latest + ``` The original [GPT4All typescript bindings](https://github.com/nomic-ai/gpt4all-ts) are now out of date. @@ -20,7 +23,7 @@ The original [GPT4All typescript bindings](https://github.com/nomic-ai/gpt4all-t ```js import { createCompletion, loadModel } from '../src/gpt4all.js' -const model = await loadModel('ggml-vicuna-7b-1.1-q4_2', { verbose: true }); +const model = await loadModel('mistral-7b-openorca.Q4_0.gguf', { verbose: true }); const response = await createCompletion(model, [ { role : 'system', content: 'You are meant to be annoying and unhelpful.' }, @@ -144,587 +147,3 @@ This package is in active development, and breaking changes may happen until the * \[ ] createChatSession ( the python equivalent to create\_chat\_session ) ### API Reference - - - -##### Table of Contents - -* [ModelType](#modeltype) -* [ModelFile](#modelfile) - * [gptj](#gptj) - * [llama](#llama) - * [mpt](#mpt) - * [replit](#replit) -* [type](#type) -* [LLModel](#llmodel) - * [constructor](#constructor) - * [Parameters](#parameters) - * [type](#type-1) - * [name](#name) - * [stateSize](#statesize) - * [threadCount](#threadcount) - * [setThreadCount](#setthreadcount) - * [Parameters](#parameters-1) - * [raw\_prompt](#raw_prompt) - * [Parameters](#parameters-2) - * [embed](#embed) - * [Parameters](#parameters-3) - * [isModelLoaded](#ismodelloaded) - * [setLibraryPath](#setlibrarypath) - * [Parameters](#parameters-4) - * [getLibraryPath](#getlibrarypath) -* [loadModel](#loadmodel) - * [Parameters](#parameters-5) -* [createCompletion](#createcompletion) - * [Parameters](#parameters-6) -* [createEmbedding](#createembedding) - * [Parameters](#parameters-7) -* [CompletionOptions](#completionoptions) - * [verbose](#verbose) - * [systemPromptTemplate](#systemprompttemplate) - * [promptTemplate](#prompttemplate) - * [promptHeader](#promptheader) - * [promptFooter](#promptfooter) -* [PromptMessage](#promptmessage) - * [role](#role) - * [content](#content) -* [prompt\_tokens](#prompt_tokens) -* [completion\_tokens](#completion_tokens) -* [total\_tokens](#total_tokens) -* [CompletionReturn](#completionreturn) - * [model](#model) - * [usage](#usage) - * [choices](#choices) -* [CompletionChoice](#completionchoice) - * [message](#message) -* [LLModelPromptContext](#llmodelpromptcontext) - * [logitsSize](#logitssize) - * [tokensSize](#tokenssize) - * [nPast](#npast) - * [nCtx](#nctx) - * [nPredict](#npredict) - * [topK](#topk) - * [topP](#topp) - * [temp](#temp) - * [nBatch](#nbatch) - * [repeatPenalty](#repeatpenalty) - * [repeatLastN](#repeatlastn) - * [contextErase](#contexterase) -* [createTokenStream](#createtokenstream) - * [Parameters](#parameters-8) -* [DEFAULT\_DIRECTORY](#default_directory) -* [DEFAULT\_LIBRARIES\_DIRECTORY](#default_libraries_directory) -* [DEFAULT\_MODEL\_CONFIG](#default_model_config) -* [DEFAULT\_PROMT\_CONTEXT](#default_promt_context) -* [DEFAULT\_MODEL\_LIST\_URL](#default_model_list_url) -* [downloadModel](#downloadmodel) - * [Parameters](#parameters-9) - * [Examples](#examples) -* [DownloadModelOptions](#downloadmodeloptions) - * [modelPath](#modelpath) - * [verbose](#verbose-1) - * [url](#url) - * [md5sum](#md5sum) -* [DownloadController](#downloadcontroller) - * [cancel](#cancel) - * [promise](#promise) - -#### ModelType - -Type of the model - -Type: (`"gptj"` | `"llama"` | `"mpt"` | `"replit"`) - -#### ModelFile - -Full list of models available -@deprecated These model names are outdated and this type will not be maintained, please use a string literal instead - -##### gptj - -List of GPT-J Models - -Type: (`"ggml-gpt4all-j-v1.3-groovy.bin"` | `"ggml-gpt4all-j-v1.2-jazzy.bin"` | `"ggml-gpt4all-j-v1.1-breezy.bin"` | `"ggml-gpt4all-j.bin"`) - -##### llama - -List Llama Models - -Type: (`"ggml-gpt4all-l13b-snoozy.bin"` | `"ggml-vicuna-7b-1.1-q4_2.bin"` | `"ggml-vicuna-13b-1.1-q4_2.bin"` | `"ggml-wizardLM-7B.q4_2.bin"` | `"ggml-stable-vicuna-13B.q4_2.bin"` | `"ggml-nous-gpt4-vicuna-13b.bin"` | `"ggml-v3-13b-hermes-q5_1.bin"`) - -##### mpt - -List of MPT Models - -Type: (`"ggml-mpt-7b-base.bin"` | `"ggml-mpt-7b-chat.bin"` | `"ggml-mpt-7b-instruct.bin"`) - -##### replit - -List of Replit Models - -Type: `"ggml-replit-code-v1-3b.bin"` - -#### type - -Model architecture. This argument currently does not have any functionality and is just used as descriptive identifier for user. - -Type: [ModelType](#modeltype) - -#### LLModel - -LLModel class representing a language model. -This is a base class that provides common functionality for different types of language models. - -##### constructor - -Initialize a new LLModel. - -###### Parameters - -* `path` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** Absolute path to the model file. - - - -* Throws **[Error](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Error)** If the model file does not exist. - -##### type - -either 'gpt', mpt', or 'llama' or undefined - -Returns **([ModelType](#modeltype) | [undefined](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/undefined))** - -##### name - -The name of the model. - -Returns **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** - -##### stateSize - -Get the size of the internal state of the model. -NOTE: This state data is specific to the type of model you have created. - -Returns **[number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)** the size in bytes of the internal state of the model - -##### threadCount - -Get the number of threads used for model inference. -The default is the number of physical cores your computer has. - -Returns **[number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)** The number of threads used for model inference. - -##### setThreadCount - -Set the number of threads used for model inference. - -###### Parameters - -* `newNumber` **[number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)** The new number of threads. - -Returns **void** - -##### raw\_prompt - -Prompt the model with a given input and optional parameters. -This is the raw output from model. -Use the prompt function exported for a value - -###### Parameters - -* `q` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** The prompt input. -* `params` **Partial<[LLModelPromptContext](#llmodelpromptcontext)>** Optional parameters for the prompt context. -* `callback` **function (res: [string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)): void** - -Returns **void** The result of the model prompt. - -##### embed - -Embed text with the model. Keep in mind that -not all models can embed text, (only bert can embed as of 07/16/2023 (mm/dd/yyyy)) -Use the prompt function exported for a value - -###### Parameters - -* `text` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** -* `q` The prompt input. -* `params` Optional parameters for the prompt context. - -Returns **[Float32Array](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Float32Array)** The result of the model prompt. - -##### isModelLoaded - -Whether the model is loaded or not. - -Returns **[boolean](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean)** - -##### setLibraryPath - -Where to search for the pluggable backend libraries - -###### Parameters - -* `s` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** - -Returns **void** - -##### getLibraryPath - -Where to get the pluggable backend libraries - -Returns **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** - -#### loadModel - -Loads a machine learning model with the specified name. The defacto way to create a model. -By default this will download a model from the official GPT4ALL website, if a model is not present at given path. - -##### Parameters - -* `modelName` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** The name of the model to load. -* `options` **(LoadModelOptions | [undefined](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/undefined))?** (Optional) Additional options for loading the model. - -Returns **[Promise](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Promise)<(InferenceModel | EmbeddingModel)>** A promise that resolves to an instance of the loaded LLModel. - -#### createCompletion - -The nodejs equivalent to python binding's chat\_completion - -##### Parameters - -* `model` **InferenceModel** The language model object. -* `messages` **[Array](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Array)<[PromptMessage](#promptmessage)>** The array of messages for the conversation. -* `options` **[CompletionOptions](#completionoptions)** The options for creating the completion. - -Returns **[CompletionReturn](#completionreturn)** The completion result. - -#### createEmbedding - -The nodejs moral equivalent to python binding's Embed4All().embed() -meow - -##### Parameters - -* `model` **EmbeddingModel** The language model object. -* `text` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** text to embed - -Returns **[Float32Array](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Float32Array)** The completion result. - -#### CompletionOptions - -**Extends Partial\** - -The options for creating the completion. - -##### verbose - -Indicates if verbose logging is enabled. - -Type: [boolean](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean) - -##### systemPromptTemplate - -Template for the system message. Will be put before the conversation with %1 being replaced by all system messages. -Note that if this is not defined, system messages will not be included in the prompt. - -Type: [string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String) - -##### promptTemplate - -Template for user messages, with %1 being replaced by the message. - -Type: [boolean](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean) - -##### promptHeader - -The initial instruction for the model, on top of the prompt - -Type: [string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String) - -##### promptFooter - -The last instruction for the model, appended to the end of the prompt. - -Type: [string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String) - -#### PromptMessage - -A message in the conversation, identical to OpenAI's chat message. - -##### role - -The role of the message. - -Type: (`"system"` | `"assistant"` | `"user"`) - -##### content - -The message content. - -Type: [string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String) - -#### prompt\_tokens - -The number of tokens used in the prompt. - -Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number) - -#### completion\_tokens - -The number of tokens used in the completion. - -Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number) - -#### total\_tokens - -The total number of tokens used. - -Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number) - -#### CompletionReturn - -The result of the completion, similar to OpenAI's format. - -##### model - -The model used for the completion. - -Type: [string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String) - -##### usage - -Token usage report. - -Type: {prompt\_tokens: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number), completion\_tokens: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number), total\_tokens: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)} - -##### choices - -The generated completions. - -Type: [Array](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Array)<[CompletionChoice](#completionchoice)> - -#### CompletionChoice - -A completion choice, similar to OpenAI's format. - -##### message - -Response message - -Type: [PromptMessage](#promptmessage) - -#### LLModelPromptContext - -Model inference arguments for generating completions. - -##### logitsSize - -The size of the raw logits vector. - -Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number) - -##### tokensSize - -The size of the raw tokens vector. - -Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number) - -##### nPast - -The number of tokens in the past conversation. - -Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number) - -##### nCtx - -The number of tokens possible in the context window. - -Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number) - -##### nPredict - -The number of tokens to predict. - -Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number) - -##### topK - -The top-k logits to sample from. -Top-K sampling selects the next token only from the top K most likely tokens predicted by the model. -It helps reduce the risk of generating low-probability or nonsensical tokens, but it may also limit -the diversity of the output. A higher value for top-K (eg., 100) will consider more tokens and lead -to more diverse text, while a lower value (eg., 10) will focus on the most probable tokens and generate -more conservative text. 30 - 60 is a good range for most tasks. - -Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number) - -##### topP - -The nucleus sampling probability threshold. -Top-P limits the selection of the next token to a subset of tokens with a cumulative probability -above a threshold P. This method, also known as nucleus sampling, finds a balance between diversity -and quality by considering both token probabilities and the number of tokens available for sampling. -When using a higher value for top-P (eg., 0.95), the generated text becomes more diverse. -On the other hand, a lower value (eg., 0.1) produces more focused and conservative text. -The default value is 0.4, which is aimed to be the middle ground between focus and diversity, but -for more creative tasks a higher top-p value will be beneficial, about 0.5-0.9 is a good range for that. - -Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number) - -##### temp - -The temperature to adjust the model's output distribution. -Temperature is like a knob that adjusts how creative or focused the output becomes. Higher temperatures -(eg., 1.2) increase randomness, resulting in more imaginative and diverse text. Lower temperatures (eg., 0.5) -make the output more focused, predictable, and conservative. When the temperature is set to 0, the output -becomes completely deterministic, always selecting the most probable next token and producing identical results -each time. A safe range would be around 0.6 - 0.85, but you are free to search what value fits best for you. - -Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number) - -##### nBatch - -The number of predictions to generate in parallel. -By splitting the prompt every N tokens, prompt-batch-size reduces RAM usage during processing. However, -this can increase the processing time as a trade-off. If the N value is set too low (e.g., 10), long prompts -with 500+ tokens will be most affected, requiring numerous processing runs to complete the prompt processing. -To ensure optimal performance, setting the prompt-batch-size to 2048 allows processing of all tokens in a single run. - -Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number) - -##### repeatPenalty - -The penalty factor for repeated tokens. -Repeat-penalty can help penalize tokens based on how frequently they occur in the text, including the input prompt. -A token that has already appeared five times is penalized more heavily than a token that has appeared only one time. -A value of 1 means that there is no penalty and values larger than 1 discourage repeated tokens. - -Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number) - -##### repeatLastN - -The number of last tokens to penalize. -The repeat-penalty-tokens N option controls the number of tokens in the history to consider for penalizing repetition. -A larger value will look further back in the generated text to prevent repetitions, while a smaller value will only -consider recent tokens. - -Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number) - -##### contextErase - -The percentage of context to erase if the context window is exceeded. - -Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number) - -#### createTokenStream - -TODO: Help wanted to implement this - -##### Parameters - -* `llmodel` **[LLModel](#llmodel)** -* `messages` **[Array](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Array)<[PromptMessage](#promptmessage)>** -* `options` **[CompletionOptions](#completionoptions)** - -Returns **function (ll: [LLModel](#llmodel)): AsyncGenerator<[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)>** - -#### DEFAULT\_DIRECTORY - -From python api: -models will be stored in (homedir)/.cache/gpt4all/\` - -Type: [string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String) - -#### DEFAULT\_LIBRARIES\_DIRECTORY - -From python api: -The default path for dynamic libraries to be stored. -You may separate paths by a semicolon to search in multiple areas. -This searches DEFAULT\_DIRECTORY/libraries, cwd/libraries, and finally cwd. - -Type: [string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String) - -#### DEFAULT\_MODEL\_CONFIG - -Default model configuration. - -Type: ModelConfig - -#### DEFAULT\_PROMT\_CONTEXT - -Default prompt context. - -Type: [LLModelPromptContext](#llmodelpromptcontext) - -#### DEFAULT\_MODEL\_LIST\_URL - -Default model list url. - -Type: [string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String) - -#### downloadModel - -Initiates the download of a model file. -By default this downloads without waiting. use the controller returned to alter this behavior. - -##### Parameters - -* `modelName` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** The model to be downloaded. -* `options` **DownloadOptions** to pass into the downloader. Default is { location: (cwd), verbose: false }. - -##### Examples - -```javascript -const download = downloadModel('ggml-gpt4all-j-v1.3-groovy.bin') -download.promise.then(() => console.log('Downloaded!')) -``` - -* Throws **[Error](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Error)** If the model already exists in the specified location. -* Throws **[Error](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Error)** If the model cannot be found at the specified url. - -Returns **[DownloadController](#downloadcontroller)** object that allows controlling the download process. - -#### DownloadModelOptions - -Options for the model download process. - -##### modelPath - -location to download the model. -Default is process.cwd(), or the current working directory - -Type: [string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String) - -##### verbose - -Debug mode -- check how long it took to download in seconds - -Type: [boolean](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean) - -##### url - -Remote download url. Defaults to `https://gpt4all.io/models/gguf/` - -Type: [string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String) - -##### md5sum - -MD5 sum of the model file. If this is provided, the downloaded file will be checked against this sum. -If the sums do not match, an error will be thrown and the file will be deleted. - -Type: [string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String) - -#### DownloadController - -Model download controller. - -##### cancel - -Cancel the request to download if this is called. - -Type: function (): void - -##### promise - -A promise resolving to the downloaded models config once the download is done - -Type: [Promise](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Promise)\ diff --git a/gpt4all-bindings/typescript/index.cc b/gpt4all-bindings/typescript/index.cc index 8a479236..bcca2c19 100644 --- a/gpt4all-bindings/typescript/index.cc +++ b/gpt4all-bindings/typescript/index.cc @@ -81,7 +81,7 @@ Napi::Value NodeModelWrapper::GetRequiredMemory(const Napi::CallbackInfo& info) Napi::Value NodeModelWrapper::InitGpuByString(const Napi::CallbackInfo& info) { auto env = info.Env(); - uint32_t memory_required = info[0].As(); + size_t memory_required = static_cast(info[0].As().Uint32Value()); std::string gpu_device_identifier = info[1].As(); @@ -149,16 +149,14 @@ Napi::Value NodeModelWrapper::GetRequiredMemory(const Napi::CallbackInfo& info) } if(device != "cpu") { size_t mem = llmodel_required_mem(GetInference(), full_weight_path.c_str()); - if(mem == 0) { - std::cout << "WARNING: no memory needed. does this model support gpu?\n"; - } std::cout << "Initiating GPU\n"; - std::cout << "Memory required estimation: " << mem << "\n"; auto success = llmodel_gpu_init_gpu_device_by_string(GetInference(), mem, device.c_str()); if(success) { std::cout << "GPU init successfully\n"; } else { + //https://github.com/nomic-ai/gpt4all/blob/3acbef14b7c2436fe033cae9036e695d77461a16/gpt4all-bindings/python/gpt4all/pyllmodel.py#L215 + //Haven't implemented this but it is still open to contribution std::cout << "WARNING: Failed to init GPU\n"; } } diff --git a/gpt4all-bindings/typescript/package.json b/gpt4all-bindings/typescript/package.json index 9bfb2351..bc91cdfe 100644 --- a/gpt4all-bindings/typescript/package.json +++ b/gpt4all-bindings/typescript/package.json @@ -1,6 +1,6 @@ { "name": "gpt4all", - "version": "3.0.0", + "version": "3.1.0", "packageManager": "yarn@3.6.1", "main": "src/gpt4all.js", "repository": "nomic-ai/gpt4all", @@ -9,9 +9,7 @@ "test": "jest", "build:backend": "node scripts/build.js", "build": "node-gyp-build", - "predocs:build": "node scripts/docs.js", - "docs:build": "documentation readme ./src/gpt4all.d.ts --parse-extension js d.ts --format md --section \"API Reference\" --readme-file ../python/docs/gpt4all_typescript.md", - "postdocs:build": "documentation readme ./src/gpt4all.d.ts --parse-extension js d.ts --format md --section \"API Reference\" --readme-file README.md" + "docs:build": "node scripts/docs.js && documentation readme ./src/gpt4all.d.ts --parse-extension js d.ts --format md --section \"API Reference\" --readme-file ../python/docs/gpt4all_nodejs.md" }, "files": [ "src/**/*", @@ -47,7 +45,7 @@ }, "jest": { "verbose": true - }, + }, "publishConfig": { "registry": "https://registry.npmjs.org/", "access": "public", diff --git a/gpt4all-bindings/typescript/scripts/docs.js b/gpt4all-bindings/typescript/scripts/docs.js index d723ac06..e68495ef 100644 --- a/gpt4all-bindings/typescript/scripts/docs.js +++ b/gpt4all-bindings/typescript/scripts/docs.js @@ -2,7 +2,11 @@ const fs = require('fs'); -const newPath = '../python/docs/gpt4all_typescript.md'; -const filepath = 'README.md'; -const data = fs.readFileSync(filepath); -fs.writeFileSync(newPath, data); +const newPath = '../python/docs/gpt4all_nodejs.md'; +const filepath = './README.md'; +const intro = fs.readFileSync(filepath); + +fs.writeFileSync( + newPath, intro +); + diff --git a/gpt4all-bindings/typescript/src/config.js b/gpt4all-bindings/typescript/src/config.js index e097ebb7..d4900d47 100644 --- a/gpt4all-bindings/typescript/src/config.js +++ b/gpt4all-bindings/typescript/src/config.js @@ -9,7 +9,13 @@ const librarySearchPaths = [ path.resolve( __dirname, "..", - `runtimes/${process.platform}-${process.arch}/native` + `runtimes/${process.platform}-${process.arch}/native`, + ), + //for darwin. This is hardcoded for now but it should work + path.resolve( + __dirname, + "..", + `runtimes/${process.platform}/native`, ), process.cwd(), ]; diff --git a/gpt4all-bindings/typescript/src/gpt4all.d.ts b/gpt4all-bindings/typescript/src/gpt4all.d.ts index aff45624..f3e557a9 100644 --- a/gpt4all-bindings/typescript/src/gpt4all.d.ts +++ b/gpt4all-bindings/typescript/src/gpt4all.d.ts @@ -1,13 +1,12 @@ /// declare module "gpt4all"; -/** Type of the model */ type ModelType = "gptj" | "llama" | "mpt" | "replit"; // NOTE: "deprecated" tag in below comment breaks the doc generator https://github.com/documentationjs/documentation/issues/1596 /** * Full list of models available - * @deprecated These model names are outdated and this type will not be maintained, please use a string literal instead + * DEPRECATED!! These model names are outdated and this type will not be maintained, please use a string literal instead */ interface ModelFile { /** List of GPT-J Models */ @@ -34,7 +33,6 @@ interface ModelFile { replit: "ggml-replit-code-v1-3b.bin"; } -//mirrors py options interface LLModelOptions { /** * Model architecture. This argument currently does not have any functionality and is just used as descriptive identifier for user. @@ -51,7 +49,11 @@ interface ModelConfig { path: string; url?: string; } - +/** + * + * InferenceModel represents an LLM which can make chat predictions, similar to GPT transformers. + * + */ declare class InferenceModel { constructor(llm: LLModel, config: ModelConfig); llm: LLModel; @@ -68,6 +70,9 @@ declare class InferenceModel { dispose(): void } +/** + * EmbeddingModel represents an LLM which can create embeddings, which are float arrays + */ declare class EmbeddingModel { constructor(llm: LLModel, config: ModelConfig); llm: LLModel; @@ -171,6 +176,7 @@ declare class LLModel { hasGpuDevice(): boolean /** * GPUs that are usable for this LLModel + * @throws if hasGpuDevice returns false (i think) * @returns */ listGpu() : GpuDevice[] @@ -181,8 +187,8 @@ declare class LLModel { dispose(): void } /** - * an object that contains gpu data on this machine. - */ + * an object that contains gpu data on this machine. + */ interface GpuDevice { index: number; /** @@ -194,6 +200,9 @@ interface GpuDevice { vendor: string; } +/** + * Options that configure a model's behavior. + */ interface LoadModelOptions { modelPath?: string; librariesPath?: string; diff --git a/gpt4all-bindings/typescript/src/gpt4all.js b/gpt4all-bindings/typescript/src/gpt4all.js index b56bb19a..e854463f 100644 --- a/gpt4all-bindings/typescript/src/gpt4all.js +++ b/gpt4all-bindings/typescript/src/gpt4all.js @@ -18,6 +18,7 @@ const { DEFAULT_MODEL_LIST_URL, } = require("./config.js"); const { InferenceModel, EmbeddingModel } = require("./models.js"); +const assert = require("assert"); /** * Loads a machine learning model with the specified name. The defacto way to create a model. @@ -45,23 +46,17 @@ async function loadModel(modelName, options = {}) { verbose: loadOptions.verbose, }); - const libSearchPaths = loadOptions.librariesPath.split(";"); + assert.ok(typeof loadOptions.librariesPath === 'string'); + const existingPaths = loadOptions.librariesPath + .split(";") + .filter(existsSync) + .join(';'); + console.log("Passing these paths into runtime library search:", existingPaths) - let libPath = null; - - for (const searchPath of libSearchPaths) { - if (existsSync(searchPath)) { - libPath = searchPath; - break; - } - } - if (!libPath) { - throw Error("Could not find a valid path from " + libSearchPaths); - } const llmOptions = { model_name: appendBinSuffixIfMissing(modelName), model_path: loadOptions.modelPath, - library_path: libPath, + library_path: existingPaths, device: loadOptions.device, }; diff --git a/gpt4all-bindings/typescript/test/gpt4all.test.js b/gpt4all-bindings/typescript/test/gpt4all.test.js index ea2c828d..f60efdb4 100644 --- a/gpt4all-bindings/typescript/test/gpt4all.test.js +++ b/gpt4all-bindings/typescript/test/gpt4all.test.js @@ -35,6 +35,11 @@ describe("config", () => { "..", `runtimes/${process.platform}-${process.arch}/native` ), + path.resolve( + __dirname, + "..", + `runtimes/${process.platform}/native`, + ), process.cwd(), ]; expect(typeof DEFAULT_LIBRARIES_DIRECTORY).toBe("string");