Update docs w/ prompt hub (#10197)

Small updates to docs
1 year ago · 8998060d85
parent a94dc6ee44
commit 8998060d85
2 changed files with 84 additions and 139 deletions
--- a/docs/extras/use_cases/question_answering/how_to/local_retrieval_qa.ipynb
+++ b/docs/extras/use_cases/question_answering/how_to/local_retrieval_qa.ipynb
@ -42,7 +42,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 1,
   "id": "f8cf5765",
   "metadata": {},
   "outputs": [],
@ -68,7 +68,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 2,
   "id": "fdce8923",
   "metadata": {},
   "outputs": [
@ -83,7 +83,7 @@
     "name": "stderr",
     "output_type": "stream",
     "text": [
-      "objc[31511]: Class GGMLMetalClass is implemented in both /Users/rlm/miniforge3/envs/llama2/lib/python3.9/site-packages/gpt4all/llmodel_DO_NOT_MODIFY/build/libreplit-mainline-metal.dylib (0x14f4e8208) and /Users/rlm/miniforge3/envs/llama2/lib/python3.9/site-packages/gpt4all/llmodel_DO_NOT_MODIFY/build/libllamamodel-mainline-metal.dylib (0x14f5fc208). One of the two will be used. Which one is undefined.\n"
+      "objc[49534]: Class GGMLMetalClass is implemented in both /Users/rlm/miniforge3/envs/llama2/lib/python3.9/site-packages/gpt4all/llmodel_DO_NOT_MODIFY/build/libreplit-mainline-metal.dylib (0x131614208) and /Users/rlm/miniforge3/envs/llama2/lib/python3.9/site-packages/gpt4all/llmodel_DO_NOT_MODIFY/build/libllamamodel-mainline-metal.dylib (0x131988208). One of the two will be used. Which one is undefined.\n"
     ]
    }
   ],
@ -104,7 +104,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 3,
   "id": "b0c55e98",
   "metadata": {},
   "outputs": [
@ -114,7 +114,7 @@
       "4"
      ]
     },
-     "execution_count": 6,
+     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -204,7 +204,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 4,
   "id": "cd7164e3",
   "metadata": {},
   "outputs": [],
@ -225,7 +225,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "id": "56158f83-6490-49b8-9f04-2e2e6ec3524b",
+   "id": "af1176bb-d52a-4cf0-b983-8b7433d45b4f",
   "metadata": {},
   "outputs": [],
   "source": [
@ -459,12 +459,11 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "id": "4ae37573-63a7-4564-90e1-196a8ea9b526",
+   "id": "cc638992-0924-41c0-8dae-8cf683e72b16",
   "metadata": {},
   "outputs": [],
   "source": [
-    "from langchain import hub\n",
-    "rag_prompt = hub.pull(\"rlm/rag-prompt-default\")"
+    "pip install langchainhub"
   ]
  },
  {
@ -512,6 +511,9 @@
    }
   ],
   "source": [
+    "# Prompt \n",
+    "from langchain import hub\n",
+    "rag_prompt = hub.pull(\"rlm/rag-prompt\")\n",
    "from langchain.chains.question_answering import load_qa_chain\n",
    "# Chain\n",
    "chain = load_qa_chain(llm, chain_type=\"stuff\", prompt=rag_prompt)\n",
@ -529,7 +531,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 3,
   "id": "78f6862d-b7a6-4e03-84e4-45667185bf9b",
   "metadata": {},
   "outputs": [
@ -539,12 +541,13 @@
       "ChatPromptTemplate(input_variables=['question', 'context'], output_parser=None, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question', 'context'], output_parser=None, partial_variables={}, template=\"[INST]<<SYS>> You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.<</SYS>> \\nQuestion: {question} \\nContext: {context} \\nAnswer: [/INST]\", template_format='f-string', validate_template=True), additional_kwargs={})])"
      ]
     },
-     "execution_count": 31,
+     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
+    "# Prompt\n",
    "rag_prompt_llama = hub.pull(\"rlm/rag-prompt-llama\")\n",
    "rag_prompt_llama"
   ]
--- a/docs/extras/use_cases/question_answering/question_answering.ipynb
+++ b/docs/extras/use_cases/question_answering/question_answering.ipynb
@ -52,7 +52,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 1,
   "id": "046cefc0",
   "metadata": {},
   "outputs": [],
@ -269,28 +269,10 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
-   "id": "c690f01a",
+   "execution_count": null,
+   "id": "9cfe3270-4e89-4c60-a2e5-9026b021bf76",
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:langchain.retrievers.multi_query:Generated queries: ['1. How can Task Decomposition be approached?', '2. What are the different methods for Task Decomposition?', '3. What are the various approaches to decomposing tasks?']\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "4"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
   "source": [
    "import logging\n",
    "\n",
@ -318,7 +300,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 9,
   "id": "99fa1aec",
   "metadata": {},
   "outputs": [
@ -326,10 +308,10 @@
     "data": {
      "text/plain": [
       "{'query': 'What are the approaches to Task Decomposition?',\n",
-       " 'result': 'There are three approaches to task decomposition:\\n\\n1. Using Language Model with simple prompting: This approach involves using a Language Model (LLM) with simple prompts like \"Steps for XYZ\" or \"What are the subgoals for achieving XYZ?\" to guide the task decomposition process.\\n\\n2. Using task-specific instructions: In this approach, task-specific instructions are provided to guide the task decomposition. For example, for the task of writing a novel, an instruction like \"Write a story outline\" can be given to help decompose the task into smaller subtasks.\\n\\n3. Human inputs: Task decomposition can also be done with the help of human inputs. This involves getting input and guidance from humans to break down a complex task into smaller, more manageable subtasks.'}"
+       " 'result': 'The approaches to task decomposition include:\\n\\n1. Simple prompting: This approach involves using simple prompts or questions to guide the agent in breaking down a task into smaller subgoals. For example, the agent can be prompted with \"Steps for XYZ\" or \"What are the subgoals for achieving XYZ?\" to facilitate task decomposition.\\n\\n2. Task-specific instructions: In this approach, task-specific instructions are provided to the agent to guide the decomposition process. For example, if the task is to write a novel, the agent can be instructed to \"Write a story outline\" as a step in the task decomposition.\\n\\n3. Human inputs: This approach involves incorporating human inputs in the task decomposition process. Humans can provide guidance, feedback, and assistance to the agent in breaking down complex tasks into manageable subgoals.\\n\\nThese approaches aim to enable efficient handling of complex tasks by breaking them down into smaller, more manageable subgoals.'}"
      ]
     },
-     "execution_count": 10,
+     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -355,97 +337,7 @@
    "#### Choosing LLMs\n",
    "- Browse the > 55 LLM and chat model integrations [here](https://integrations.langchain.com/).\n",
    "- See further documentation on LLMs and chat models [here](/docs/modules/model_io/models/).\n",
-    "- Use local LLMS: The popularity of [PrivateGPT](https://github.com/imartinez/privateGPT) and [GPT4All](https://github.com/nomic-ai/gpt4all) underscore the importance of running LLMs locally.\n",
-    "Using `GPT4All` is as simple as [downloading the binary]((/docs/integrations/llms/gpt4all)) and then:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "id": "02d6c9dc",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Found model file at  /Users/rlm/Desktop/Code/gpt4all/models/nous-hermes-13b.ggmlv3.q4_0.bin\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "objc[61331]: Class GGMLMetalClass is implemented in both /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/gpt4all/llmodel_DO_NOT_MODIFY/build/libreplit-mainline-metal.dylib (0x2e3384208) and /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/gpt4all/llmodel_DO_NOT_MODIFY/build/libllamamodel-mainline-metal.dylib (0x2e37b0208). One of the two will be used. Which one is undefined.\n",
-      "llama.cpp: using Metal\n",
-      "llama.cpp: loading model from /Users/rlm/Desktop/Code/gpt4all/models/nous-hermes-13b.ggmlv3.q4_0.bin\n",
-      "llama_model_load_internal: format     = ggjt v3 (latest)\n",
-      "llama_model_load_internal: n_vocab    = 32001\n",
-      "llama_model_load_internal: n_ctx      = 2048\n",
-      "llama_model_load_internal: n_embd     = 5120\n",
-      "llama_model_load_internal: n_mult     = 256\n",
-      "llama_model_load_internal: n_head     = 40\n",
-      "llama_model_load_internal: n_layer    = 40\n",
-      "llama_model_load_internal: n_rot      = 128\n",
-      "llama_model_load_internal: ftype      = 2 (mostly Q4_0)\n",
-      "llama_model_load_internal: n_ff       = 13824\n",
-      "llama_model_load_internal: n_parts    = 1\n",
-      "llama_model_load_internal: model size = 13B\n",
-      "llama_model_load_internal: ggml ctx size =    0.09 MB\n",
-      "llama_model_load_internal: mem required  = 9031.71 MB (+ 1608.00 MB per state)\n",
-      "llama_new_context_with_model: kv self size  = 1600.00 MB\n",
-      "ggml_metal_init: allocating\n",
-      "ggml_metal_init: using MPS\n",
-      "ggml_metal_init: loading '/Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/gpt4all/llmodel_DO_NOT_MODIFY/build/ggml-metal.metal'\n",
-      "ggml_metal_init: loaded kernel_add                            0x2bbbbc2f0\n",
-      "ggml_metal_init: loaded kernel_mul                            0x2bbbba840\n",
-      "ggml_metal_init: loaded kernel_mul_row                        0x2bb917dd0\n",
-      "ggml_metal_init: loaded kernel_scale                          0x2bb918150\n",
-      "ggml_metal_init: loaded kernel_silu                           0x2bb9184d0\n",
-      "ggml_metal_init: loaded kernel_relu                           0x2bb918850\n",
-      "ggml_metal_init: loaded kernel_gelu                           0x2bbbc3f10\n",
-      "ggml_metal_init: loaded kernel_soft_max                       0x2bbbc5840\n",
-      "ggml_metal_init: loaded kernel_diag_mask_inf                  0x2bbbc4c70\n",
-      "ggml_metal_init: loaded kernel_get_rows_f16                   0x2bbbc5fc0\n",
-      "ggml_metal_init: loaded kernel_get_rows_q4_0                  0x2bbbc6720\n",
-      "ggml_metal_init: loaded kernel_get_rows_q4_1                  0x2bb918c10\n",
-      "ggml_metal_init: loaded kernel_get_rows_q2_k                  0x2bbbc51b0\n",
-      "ggml_metal_init: loaded kernel_get_rows_q3_k                  0x2bbbc7630\n",
-      "ggml_metal_init: loaded kernel_get_rows_q4_k                  0x2d4394e30\n",
-      "ggml_metal_init: loaded kernel_get_rows_q5_k                  0x2bbbc7890\n",
-      "ggml_metal_init: loaded kernel_get_rows_q6_k                  0x2d4395210\n",
-      "ggml_metal_init: loaded kernel_rms_norm                       0x2bbbc8740\n",
-      "ggml_metal_init: loaded kernel_norm                           0x2bbbc8b30\n",
-      "ggml_metal_init: loaded kernel_mul_mat_f16_f32                0x2d4395470\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q4_0_f32               0x2d4395a70\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q4_1_f32               0x1242b1a00\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q2_k_f32               0x29f17d1c0\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q3_k_f32               0x2d4396050\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q4_k_f32               0x2bbbc98a0\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q5_k_f32               0x2bbbca4a0\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q6_k_f32               0x2bbbcae90\n",
-      "ggml_metal_init: loaded kernel_rope                           0x2bbbca700\n",
-      "ggml_metal_init: loaded kernel_alibi_f32                      0x2bbbcc6e0\n",
-      "ggml_metal_init: loaded kernel_cpy_f32_f16                    0x2bbbccf90\n",
-      "ggml_metal_init: loaded kernel_cpy_f32_f32                    0x2bbbcd900\n",
-      "ggml_metal_init: loaded kernel_cpy_f16_f16                    0x2bbbce1f0\n",
-      "ggml_metal_init: recommendedMaxWorkingSetSize = 21845.34 MB\n",
-      "ggml_metal_init: hasUnifiedMemory             = true\n",
-      "ggml_metal_init: maxTransferRate              = built-in GPU\n",
-      "ggml_metal_add_buffer: allocated 'data            ' buffer, size =  6984.06 MB, ( 6984.45 / 21845.34)\n",
-      "ggml_metal_add_buffer: allocated 'eval            ' buffer, size =  1024.00 MB, ( 8008.45 / 21845.34)\n",
-      "ggml_metal_add_buffer: allocated 'kv              ' buffer, size =  1602.00 MB, ( 9610.45 / 21845.34)\n",
-      "ggml_metal_add_buffer: allocated 'scr0            ' buffer, size =   512.00 MB, (10122.45 / 21845.34)\n",
-      "ggml_metal_add_buffer: allocated 'scr1            ' buffer, size =   512.00 MB, (10634.45 / 21845.34)\n"
-     ]
-    }
-   ],
-   "source": [
-    "from langchain.llms import GPT4All\n",
-    "from langchain.chains import RetrievalQA\n",
-    "\n",
-    "llm = GPT4All(model=\"/Users/rlm/Desktop/Code/gpt4all/models/nous-hermes-13b.ggmlv3.q4_0.bin\",max_tokens=2048)\n",
-    "qa_chain = RetrievalQA.from_chain_type(llm, retriever=vectorstore.as_retriever())"
+    "- See a guide on local LLMS [here](/docs/modules/use_cases/question_answering/how_to/local_retrieval_qa)."
   ]
  },
  {
@ -460,24 +352,17 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 10,
   "id": "e4fee704",
   "metadata": {},
   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "ggml_metal_free: deallocating\n"
-     ]
-    },
    {
     "data": {
      "text/plain": [
-       "'The approaches to task decomposition include using LLM with simple prompting, task-specific instructions, or human inputs. Thanks for asking!'"
+       "'The approaches to Task Decomposition are (1) using simple prompting by LLM, (2) using task-specific instructions, and (3) incorporating human inputs. Thanks for asking!'"
      ]
     },
-     "execution_count": 13,
+     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -507,8 +392,65 @@
  },
  {
   "cell_type": "markdown",
-   "id": "ff40e8db",
+   "id": "c825e9bf-6a56-46e4-8bbb-05441f76cb96",
+   "metadata": {},
+   "source": [
+    "We can also store and fetch prompts from the LangChain prompt hub.\n",
+    "\n",
+    "This will work with your [LangSmith API key](https://docs.smith.langchain.com/).\n",
+    "\n",
+    "For example, see [here](https://smith.langchain.com/hub/rlm/rag-prompt) is a common prompt for RAG.\n",
+    "\n",
+    "We can load this."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a896060f-ebc4-4236-a4ad-32960601c6e8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pip install langchainhub"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "aef8e734-ba54-48ae-b959-1898618f2d90",
   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'The approaches to task decomposition include using LLM with simple prompting, task-specific instructions, and human inputs.'"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# RAG prompt\n",
+    "from langchain import hub\n",
+    "QA_CHAIN_PROMPT_HUB = hub.pull(\"rlm/rag-prompt\")\n",
+    "\n",
+    "qa_chain = RetrievalQA.from_chain_type(\n",
+    "    llm,\n",
+    "    retriever=vectorstore.as_retriever(),\n",
+    "    chain_type_kwargs={\"prompt\": QA_CHAIN_PROMPT_HUB}\n",
+    ")\n",
+    "result = qa_chain({\"query\": question})\n",
+    "result[\"result\"]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ff40e8db",
+   "metadata": {
+    "jp-MarkdownHeadingCollapsed": true
+   },
   "source": [
    "#### Return source documents\n",
    "\n",