From dacf96895a7504db1bcacd2c1f4c823f529c52bc Mon Sep 17 00:00:00 2001
From: Tomaz Bratanic <bratanic.tomaz@gmail.com>
Date: Thu, 24 Aug 2023 20:50:38 +0200
Subject: [PATCH] Add the option to use separate LLMs for GraphCypherQA chain
 (#9689)

The Graph Chains are different in the way that it uses two LLMChains
instead of one like the retrievalQA chains. Therefore, sometimes you
want to use different LLM to generate the database query and to generate
the final answer.

This feature would make it more convenient to use different LLMs in the
same chain.

I have also renamed the Graph DB QA Chain to Neo4j DB QA Chain in the
documentation only as it is used only for Neo4j. The naming was
ambigious as it was the first graphQA chain added and wasn't sure how do
you want to spin it.
---
 docs/api_reference/guide_imports.json         |   6 +-
 .../more/graph/graph_cypher_qa.ipynb          | 167 ++++++++++++++++--
 .../langchain/chains/graph_qa/cypher.py       |  19 +-
 3 files changed, 174 insertions(+), 18 deletions(-)

diff --git a/docs/api_reference/guide_imports.json b/docs/api_reference/guide_imports.json
index 832c03f996..176c686e52 100644
--- a/docs/api_reference/guide_imports.json
+++ b/docs/api_reference/guide_imports.json
@@ -341,7 +341,7 @@
         "HugeGraph QA Chain": "https://python.langchain.com/docs/use_cases/more/graph/graph_hugegraph_qa",
         "GraphSparqlQAChain": "https://python.langchain.com/docs/use_cases/more/graph/graph_sparql_qa",
         "ArangoDB QA chain": "https://python.langchain.com/docs/use_cases/more/graph/graph_arangodb_qa",
-        "Graph DB QA chain": "https://python.langchain.com/docs/use_cases/more/graph/graph_cypher_qa",
+        "Neo4j DB QA chain": "https://python.langchain.com/docs/use_cases/more/graph/graph_cypher_qa",
         "How to use a SmartLLMChain": "https://python.langchain.com/docs/use_cases/more/self_check/smart_llm",
         "Multi-Agent Simulated Environment: Petting Zoo": "https://python.langchain.com/docs/use_cases/agent_simulations/petting_zoo",
         "Multi-agent decentralized speaker selection": "https://python.langchain.com/docs/use_cases/agent_simulations/multiagent_bidding",
@@ -3202,10 +3202,10 @@
         "Graph QA": "https://python.langchain.com/docs/use_cases/more/graph/graph_qa"
     },
     "GraphCypherQAChain": {
-        "Graph DB QA chain": "https://python.langchain.com/docs/use_cases/more/graph/graph_cypher_qa"
+        "Neo4j DB QA chain": "https://python.langchain.com/docs/use_cases/more/graph/graph_cypher_qa"
     },
     "Neo4jGraph": {
-        "Graph DB QA chain": "https://python.langchain.com/docs/use_cases/more/graph/graph_cypher_qa"
+        "Neo4j DB QA chain": "https://python.langchain.com/docs/use_cases/more/graph/graph_cypher_qa"
     },
     "LLMBashChain": {
         "Bash chain": "https://python.langchain.com/docs/use_cases/more/code_writing/llm_bash"
diff --git a/docs/extras/use_cases/more/graph/graph_cypher_qa.ipynb b/docs/extras/use_cases/more/graph/graph_cypher_qa.ipynb
index f6f9ca8182..84adde72b1 100644
--- a/docs/extras/use_cases/more/graph/graph_cypher_qa.ipynb
+++ b/docs/extras/use_cases/more/graph/graph_cypher_qa.ipynb
@@ -5,7 +5,7 @@
    "id": "c94240f5",
    "metadata": {},
    "source": [
-    "# Graph DB QA chain\n",
+    "# Neo4j DB QA chain\n",
     "\n",
     "This notebook shows how to use LLMs to provide a natural language interface to a graph database you can query with the Cypher query language."
    ]
@@ -177,7 +177,7 @@
       "\u001b[32;1m\u001b[1;3mMATCH (a:Actor)-[:ACTED_IN]->(m:Movie {name: 'Top Gun'})\n",
       "RETURN a.name\u001b[0m\n",
       "Full Context:\n",
-      "\u001b[32;1m\u001b[1;3m[{'a.name': 'Val Kilmer'}, {'a.name': 'Anthony Edwards'}, {'a.name': 'Meg Ryan'}, {'a.name': 'Tom Cruise'}]\u001b[0m\n",
+      "\u001b[32;1m\u001b[1;3m[{'a.name': 'Tom Cruise'}, {'a.name': 'Val Kilmer'}, {'a.name': 'Anthony Edwards'}, {'a.name': 'Meg Ryan'}]\u001b[0m\n",
       "\n",
       "\u001b[1m> Finished chain.\u001b[0m\n"
      ]
@@ -185,7 +185,7 @@
     {
      "data": {
       "text/plain": [
-       "'Val Kilmer, Anthony Edwards, Meg Ryan, and Tom Cruise played in Top Gun.'"
+       "'Tom Cruise, Val Kilmer, Anthony Edwards, and Meg Ryan played in Top Gun.'"
       ]
      },
      "execution_count": 7,
@@ -236,7 +236,7 @@
       "\u001b[32;1m\u001b[1;3mMATCH (a:Actor)-[:ACTED_IN]->(m:Movie {name: 'Top Gun'})\n",
       "RETURN a.name\u001b[0m\n",
       "Full Context:\n",
-      "\u001b[32;1m\u001b[1;3m[{'a.name': 'Val Kilmer'}, {'a.name': 'Anthony Edwards'}]\u001b[0m\n",
+      "\u001b[32;1m\u001b[1;3m[{'a.name': 'Tom Cruise'}, {'a.name': 'Val Kilmer'}]\u001b[0m\n",
       "\n",
       "\u001b[1m> Finished chain.\u001b[0m\n"
      ]
@@ -244,7 +244,7 @@
     {
      "data": {
       "text/plain": [
-       "'Val Kilmer and Anthony Edwards played in Top Gun.'"
+       "'Tom Cruise and Val Kilmer played in Top Gun.'"
       ]
      },
      "execution_count": 9,
@@ -294,11 +294,11 @@
       "\u001b[32;1m\u001b[1;3mMATCH (a:Actor)-[:ACTED_IN]->(m:Movie {name: 'Top Gun'})\n",
       "RETURN a.name\u001b[0m\n",
       "Full Context:\n",
-      "\u001b[32;1m\u001b[1;3m[{'a.name': 'Val Kilmer'}, {'a.name': 'Anthony Edwards'}, {'a.name': 'Meg Ryan'}, {'a.name': 'Tom Cruise'}]\u001b[0m\n",
+      "\u001b[32;1m\u001b[1;3m[{'a.name': 'Tom Cruise'}, {'a.name': 'Val Kilmer'}, {'a.name': 'Anthony Edwards'}, {'a.name': 'Meg Ryan'}]\u001b[0m\n",
       "\n",
       "\u001b[1m> Finished chain.\u001b[0m\n",
-      "Intermediate steps: [{'query': \"MATCH (a:Actor)-[:ACTED_IN]->(m:Movie {name: 'Top Gun'})\\nRETURN a.name\"}, {'context': [{'a.name': 'Val Kilmer'}, {'a.name': 'Anthony Edwards'}, {'a.name': 'Meg Ryan'}, {'a.name': 'Tom Cruise'}]}]\n",
-      "Final answer: Val Kilmer, Anthony Edwards, Meg Ryan, and Tom Cruise played in Top Gun.\n"
+      "Intermediate steps: [{'query': \"MATCH (a:Actor)-[:ACTED_IN]->(m:Movie {name: 'Top Gun'})\\nRETURN a.name\"}, {'context': [{'a.name': 'Tom Cruise'}, {'a.name': 'Val Kilmer'}, {'a.name': 'Anthony Edwards'}, {'a.name': 'Meg Ryan'}]}]\n",
+      "Final answer: Tom Cruise, Val Kilmer, Anthony Edwards, and Meg Ryan played in Top Gun.\n"
      ]
     }
    ],
@@ -352,10 +352,10 @@
     {
      "data": {
       "text/plain": [
-       "[{'a.name': 'Val Kilmer'},\n",
+       "[{'a.name': 'Tom Cruise'},\n",
+       " {'a.name': 'Val Kilmer'},\n",
        " {'a.name': 'Anthony Edwards'},\n",
-       " {'a.name': 'Meg Ryan'},\n",
-       " {'a.name': 'Tom Cruise'}]"
+       " {'a.name': 'Meg Ryan'}]"
       ]
      },
      "execution_count": 13,
@@ -367,10 +367,153 @@
     "chain.run(\"Who played in Top Gun?\")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "f01dfb72-24ec-4ae7-883a-ee6646889b59",
+   "metadata": {},
+   "source": [
+    "## Add examples in the Cypher generation prompt\n",
+    "You can define the Cypher statement you want the LLM to generate for particular questions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "59baeb88-adfa-4c26-8334-fcbff3a98efb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.prompts.prompt import PromptTemplate\n",
+    "\n",
+    "\n",
+    "CYPHER_GENERATION_TEMPLATE = \"\"\"Task:Generate Cypher statement to query a graph database.\n",
+    "Instructions:\n",
+    "Use only the provided relationship types and properties in the schema.\n",
+    "Do not use any other relationship types or properties that are not provided.\n",
+    "Schema:\n",
+    "{schema}\n",
+    "Note: Do not include any explanations or apologies in your responses.\n",
+    "Do not respond to any questions that might ask anything else than for you to construct a Cypher statement.\n",
+    "Do not include any text except the generated Cypher statement.\n",
+    "Examples: Here are a few examples of generated Cypher statements for particular questions:\n",
+    "# How many people played in Top Gun?\n",
+    "MATCH (m:Movie {{title:\"Top Gun\"}})<-[:ACTED_IN]-()\n",
+    "RETURN count(*) AS numberOfActors\n",
+    "\n",
+    "The question is:\n",
+    "{question}\"\"\"\n",
+    "\n",
+    "CYPHER_GENERATION_PROMPT = PromptTemplate(\n",
+    "    input_variables=[\"schema\", \"question\"], template=CYPHER_GENERATION_TEMPLATE\n",
+    ")\n",
+    "\n",
+    "chain = GraphCypherQAChain.from_llm(\n",
+    "    ChatOpenAI(temperature=0), graph=graph, verbose=True, cypher_prompt=CYPHER_GENERATION_PROMPT\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "47c64027-cf42-493a-9c76-2d10ba753728",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "\u001b[1m> Entering new GraphCypherQAChain chain...\u001b[0m\n",
+      "Generated Cypher:\n",
+      "\u001b[32;1m\u001b[1;3mMATCH (m:Movie {name:\"Top Gun\"})<-[:ACTED_IN]-(:Actor)\n",
+      "RETURN count(*) AS numberOfActors\u001b[0m\n",
+      "Full Context:\n",
+      "\u001b[32;1m\u001b[1;3m[{'numberOfActors': 4}]\u001b[0m\n",
+      "\n",
+      "\u001b[1m> Finished chain.\u001b[0m\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "'Four people played in Top Gun.'"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "chain.run(\"How many people played in Top Gun?\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3e721cad-aa87-4526-9231-2dfc0e365939",
+   "metadata": {},
+   "source": [
+    "## Use separate LLMs for Cypher and answer generation\n",
+    "You can use the `cypher_llm` and `qa_llm` parameters to define different llms"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "6f9becc2-f579-45bf-9b50-2ce02bde92da",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "chain = GraphCypherQAChain.from_llm(\n",
+    "     graph=graph,\n",
+    "     cypher_llm=ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo\"),\n",
+    "     qa_llm=ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo-16k\"),\n",
+    "     verbose=True,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "ff18e3e3-3402-4683-aec4-a19898f23ca1",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "\u001b[1m> Entering new GraphCypherQAChain chain...\u001b[0m\n",
+      "Generated Cypher:\n",
+      "\u001b[32;1m\u001b[1;3mMATCH (a:Actor)-[:ACTED_IN]->(m:Movie {name: 'Top Gun'})\n",
+      "RETURN a.name\u001b[0m\n",
+      "Full Context:\n",
+      "\u001b[32;1m\u001b[1;3m[{'a.name': 'Tom Cruise'}, {'a.name': 'Val Kilmer'}, {'a.name': 'Anthony Edwards'}, {'a.name': 'Meg Ryan'}]\u001b[0m\n",
+      "\n",
+      "\u001b[1m> Finished chain.\u001b[0m\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "'Tom Cruise, Val Kilmer, Anthony Edwards, and Meg Ryan played in Top Gun.'"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "chain.run(\"Who played in Top Gun?\")"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "74d0a36f",
+   "id": "48ff7cf8-18a3-43d7-8cb1-c1b91744608d",
    "metadata": {},
    "outputs": [],
    "source": []
diff --git a/libs/langchain/langchain/chains/graph_qa/cypher.py b/libs/langchain/langchain/chains/graph_qa/cypher.py
index 015ff5f47b..82c85ef1c0 100644
--- a/libs/langchain/langchain/chains/graph_qa/cypher.py
+++ b/libs/langchain/langchain/chains/graph_qa/cypher.py
@@ -73,15 +73,28 @@ class GraphCypherQAChain(Chain):
     @classmethod
     def from_llm(
         cls,
-        llm: BaseLanguageModel,
+        llm: Optional[BaseLanguageModel] = None,
         *,
         qa_prompt: BasePromptTemplate = CYPHER_QA_PROMPT,
         cypher_prompt: BasePromptTemplate = CYPHER_GENERATION_PROMPT,
+        cypher_llm: Optional[BaseLanguageModel] = None,
+        qa_llm: Optional[BaseLanguageModel] = None,
         **kwargs: Any,
     ) -> GraphCypherQAChain:
         """Initialize from LLM."""
-        qa_chain = LLMChain(llm=llm, prompt=qa_prompt)
-        cypher_generation_chain = LLMChain(llm=llm, prompt=cypher_prompt)
+
+        if not cypher_llm and not llm:
+            raise ValueError("Either `llm` or `cypher_llm` parameters must be provided")
+        if not qa_llm and not llm:
+            raise ValueError("Either `llm` or `qa_llm` parameters must be provided")
+        if cypher_llm and qa_llm and llm:
+            raise ValueError(
+                "You can specify up to two of 'cypher_llm', 'qa_llm'"
+                ", and 'llm', but not all three simultaneously."
+            )
+
+        qa_chain = LLMChain(llm=qa_llm or llm, prompt=qa_prompt)
+        cypher_generation_chain = LLMChain(llm=cypher_llm or llm, prompt=cypher_prompt)
 
         return cls(
             qa_chain=qa_chain,