diff --git a/docs/docs/integrations/llms/llamacpp.ipynb b/docs/docs/integrations/llms/llamacpp.ipynb index 853787fc19..30e144f20f 100644 --- a/docs/docs/integrations/llms/llamacpp.ipynb +++ b/docs/docs/integrations/llms/llamacpp.ipynb @@ -186,7 +186,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 1, "metadata": { "tags": [] }, @@ -223,7 +223,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "metadata": { "tags": [] }, @@ -487,7 +487,7 @@ "\n", "Two of the most important GPU parameters are:\n", "\n", - "- `n_gpu_layers` - determines how many layers of the model are offloaded to your Metal GPU, in the most case, set it to `1` is enough for Metal\n", + "- `n_gpu_layers` - determines how many layers of the model are offloaded to your Metal GPU.\n", "- `n_batch` - how many tokens are processed in parallel, default is 8, set to bigger number.\n", "- `f16_kv` - for some reason, Metal only support `True`, otherwise you will get error such as `Asserting on type 0\n", "GGML_ASSERT: .../ggml-metal.m:706: false && \"not implemented\"`\n", @@ -501,7 +501,7 @@ "metadata": {}, "outputs": [], "source": [ - "n_gpu_layers = 1 # Metal set to 1 is enough.\n", + "n_gpu_layers = 1 # Change this value based on your model and your GPU VRAM pool.\n", "n_batch = 512 # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip.\n", "# Make sure the model path is correct for your system!\n", "llm = LlamaCpp(\n", @@ -680,7 +680,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3.10.12 ('langchain_venv': venv)", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -694,7 +694,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.11.6" }, "vscode": { "interpreter": {