From 29064848f9cf3fbaa66195d562d2e13f61a54f83 Mon Sep 17 00:00:00 2001
From: Ethan Yang <ethan.yang@intel.com>
Date: Thu, 6 Jun 2024 06:38:54 +0800
Subject: [PATCH] [Community]add option to delete the prompt from HF output
 (#22225)

This will help to solve pattern mismatching issue when parsing the
output in Agent.

https://github.com/langchain-ai/langchain/issues/21912
---
 .../llms/huggingface_pipelines.ipynb          | 22 +++++++++++++++
 docs/docs/integrations/llms/openvino.ipynb    | 27 +++++++++++++++++--
 .../llms/huggingface_pipeline.py              |  4 ++-
 .../llms/huggingface_pipeline.py              |  4 ++-
 4 files changed, 53 insertions(+), 4 deletions(-)

diff --git a/docs/docs/integrations/llms/huggingface_pipelines.ipynb b/docs/docs/integrations/llms/huggingface_pipelines.ipynb
index 1c0100a421..be07404727 100644
--- a/docs/docs/integrations/llms/huggingface_pipelines.ipynb
+++ b/docs/docs/integrations/llms/huggingface_pipelines.ipynb
@@ -121,6 +121,28 @@
     "print(chain.invoke({\"question\": question}))"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "b4a31db5",
+   "metadata": {},
+   "source": [
+    "To get response without prompt, you can bind `skip_prompt=True` with LLM."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5e4aaad2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "chain = prompt | hf.bind(skip_prompt=True)\n",
+    "\n",
+    "question = \"What is electroencephalography?\"\n",
+    "\n",
+    "print(chain.invoke({\"question\": question}))"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "dbbc3a37",
diff --git a/docs/docs/integrations/llms/openvino.ipynb b/docs/docs/integrations/llms/openvino.ipynb
index 36a079a778..4ed3855a0f 100644
--- a/docs/docs/integrations/llms/openvino.ipynb
+++ b/docs/docs/integrations/llms/openvino.ipynb
@@ -31,7 +31,7 @@
    },
    "outputs": [],
    "source": [
-    "%pip install --upgrade-strategy eager \"optimum[openvino,nncf]\" --quiet"
+    "%pip install --upgrade-strategy eager \"optimum[openvino,nncf]\" langchain-huggingface --quiet"
    ]
   },
   {
@@ -130,6 +130,28 @@
     "print(chain.invoke({\"question\": question}))"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "446a01e0",
+   "metadata": {},
+   "source": [
+    "To get response without prompt, you can bind `skip_prompt=True` with LLM."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e3baeab2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "chain = prompt | ov_llm.bind(skip_prompt=True)\n",
+    "\n",
+    "question = \"What is electroencephalography?\"\n",
+    "\n",
+    "print(chain.invoke({\"question\": question}))"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "12524837-e9ab-455a-86be-66b95f4f893a",
@@ -243,7 +265,8 @@
     "    skip_prompt=True,\n",
     "    skip_special_tokens=True,\n",
     ")\n",
-    "ov_llm.pipeline._forward_params = {\"streamer\": streamer, \"max_new_tokens\": 100}\n",
+    "pipeline_kwargs = {\"pipeline_kwargs\": {\"streamer\": streamer, \"max_new_tokens\": 100}}\n",
+    "chain = prompt | ov_llm.bind(**pipeline_kwargs)\n",
     "\n",
     "t1 = Thread(target=chain.invoke, args=({\"question\": question},))\n",
     "t1.start()\n",
diff --git a/libs/community/langchain_community/llms/huggingface_pipeline.py b/libs/community/langchain_community/llms/huggingface_pipeline.py
index 5eebe0283e..2bb820ba52 100644
--- a/libs/community/langchain_community/llms/huggingface_pipeline.py
+++ b/libs/community/langchain_community/llms/huggingface_pipeline.py
@@ -265,6 +265,7 @@ class HuggingFacePipeline(BaseLLM):
         # List to hold all results
         text_generations: List[str] = []
         pipeline_kwargs = kwargs.get("pipeline_kwargs", {})
+        skip_prompt = kwargs.get("skip_prompt", False)
 
         for i in range(0, len(prompts), self.batch_size):
             batch_prompts = prompts[i : i + self.batch_size]
@@ -294,7 +295,8 @@ class HuggingFacePipeline(BaseLLM):
                         f"Got invalid task {self.pipeline.task}, "
                         f"currently only {VALID_TASKS} are supported"
                     )
-
+                if skip_prompt:
+                    text = text[len(batch_prompts[j]) :]
                 # Append the processed text to results
                 text_generations.append(text)
 
diff --git a/libs/partners/huggingface/langchain_huggingface/llms/huggingface_pipeline.py b/libs/partners/huggingface/langchain_huggingface/llms/huggingface_pipeline.py
index 070f141328..8ead766d78 100644
--- a/libs/partners/huggingface/langchain_huggingface/llms/huggingface_pipeline.py
+++ b/libs/partners/huggingface/langchain_huggingface/llms/huggingface_pipeline.py
@@ -261,6 +261,7 @@ class HuggingFacePipeline(BaseLLM):
         # List to hold all results
         text_generations: List[str] = []
         pipeline_kwargs = kwargs.get("pipeline_kwargs", {})
+        skip_prompt = kwargs.get("skip_prompt", False)
 
         for i in range(0, len(prompts), self.batch_size):
             batch_prompts = prompts[i : i + self.batch_size]
@@ -290,7 +291,8 @@ class HuggingFacePipeline(BaseLLM):
                         f"Got invalid task {self.pipeline.task}, "
                         f"currently only {VALID_TASKS} are supported"
                     )
-
+                if skip_prompt:
+                    text = text[len(batch_prompts[j]) :]
                 # Append the processed text to results
                 text_generations.append(text)