|
|
@ -186,7 +186,7 @@
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 3,
|
|
|
|
"execution_count": 1,
|
|
|
|
"metadata": {
|
|
|
|
"metadata": {
|
|
|
|
"tags": []
|
|
|
|
"tags": []
|
|
|
|
},
|
|
|
|
},
|
|
|
@ -223,7 +223,7 @@
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 4,
|
|
|
|
"execution_count": 3,
|
|
|
|
"metadata": {
|
|
|
|
"metadata": {
|
|
|
|
"tags": []
|
|
|
|
"tags": []
|
|
|
|
},
|
|
|
|
},
|
|
|
@ -487,7 +487,7 @@
|
|
|
|
"\n",
|
|
|
|
"\n",
|
|
|
|
"Two of the most important GPU parameters are:\n",
|
|
|
|
"Two of the most important GPU parameters are:\n",
|
|
|
|
"\n",
|
|
|
|
"\n",
|
|
|
|
"- `n_gpu_layers` - determines how many layers of the model are offloaded to your Metal GPU, in the most case, set it to `1` is enough for Metal\n",
|
|
|
|
"- `n_gpu_layers` - determines how many layers of the model are offloaded to your Metal GPU.\n",
|
|
|
|
"- `n_batch` - how many tokens are processed in parallel, default is 8, set to bigger number.\n",
|
|
|
|
"- `n_batch` - how many tokens are processed in parallel, default is 8, set to bigger number.\n",
|
|
|
|
"- `f16_kv` - for some reason, Metal only support `True`, otherwise you will get error such as `Asserting on type 0\n",
|
|
|
|
"- `f16_kv` - for some reason, Metal only support `True`, otherwise you will get error such as `Asserting on type 0\n",
|
|
|
|
"GGML_ASSERT: .../ggml-metal.m:706: false && \"not implemented\"`\n",
|
|
|
|
"GGML_ASSERT: .../ggml-metal.m:706: false && \"not implemented\"`\n",
|
|
|
@ -501,7 +501,7 @@
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"source": [
|
|
|
|
"n_gpu_layers = 1 # Metal set to 1 is enough.\n",
|
|
|
|
"n_gpu_layers = 1 # Change this value based on your model and your GPU VRAM pool.\n",
|
|
|
|
"n_batch = 512 # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip.\n",
|
|
|
|
"n_batch = 512 # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip.\n",
|
|
|
|
"# Make sure the model path is correct for your system!\n",
|
|
|
|
"# Make sure the model path is correct for your system!\n",
|
|
|
|
"llm = LlamaCpp(\n",
|
|
|
|
"llm = LlamaCpp(\n",
|
|
|
@ -680,7 +680,7 @@
|
|
|
|
],
|
|
|
|
],
|
|
|
|
"metadata": {
|
|
|
|
"metadata": {
|
|
|
|
"kernelspec": {
|
|
|
|
"kernelspec": {
|
|
|
|
"display_name": "Python 3.10.12 ('langchain_venv': venv)",
|
|
|
|
"display_name": "Python 3 (ipykernel)",
|
|
|
|
"language": "python",
|
|
|
|
"language": "python",
|
|
|
|
"name": "python3"
|
|
|
|
"name": "python3"
|
|
|
|
},
|
|
|
|
},
|
|
|
@ -694,7 +694,7 @@
|
|
|
|
"name": "python",
|
|
|
|
"name": "python",
|
|
|
|
"nbconvert_exporter": "python",
|
|
|
|
"nbconvert_exporter": "python",
|
|
|
|
"pygments_lexer": "ipython3",
|
|
|
|
"pygments_lexer": "ipython3",
|
|
|
|
"version": "3.10.12"
|
|
|
|
"version": "3.11.6"
|
|
|
|
},
|
|
|
|
},
|
|
|
|
"vscode": {
|
|
|
|
"vscode": {
|
|
|
|
"interpreter": {
|
|
|
|
"interpreter": {
|
|
|
|