Add additional parameters to Graph Cypher Chain (#5979)

Based on the inspiration from the SQL chain, the following three parameters are added to Graph Cypher Chain. - top_k: Limited the number of results from the database to be used as context - return_direct: Return database results without transforming them to natural language - return_intermediate_steps: Return intermediate steps
11 months ago · d5819a7ca7
parent 0ca37e613c
commit d5819a7ca7
4 changed files with 322 additions and 17 deletions
--- a/docs/modules/chains/examples/graph_cypher_qa.ipynb
+++ b/docs/modules/chains/examples/graph_cypher_qa.ipynb
@ -177,7 +177,7 @@
      "\u001b[32;1m\u001b[1;3mMATCH (a:Actor)-[:ACTED_IN]->(m:Movie {name: 'Top Gun'})\n",
      "RETURN a.name\u001b[0m\n",
      "Full Context:\n",
-      "\u001b[32;1m\u001b[1;3m[{'a.name': 'Tom Cruise'}, {'a.name': 'Val Kilmer'}, {'a.name': 'Anthony Edwards'}, {'a.name': 'Meg Ryan'}]\u001b[0m\n",
+      "\u001b[32;1m\u001b[1;3m[{'a.name': 'Val Kilmer'}, {'a.name': 'Anthony Edwards'}, {'a.name': 'Meg Ryan'}, {'a.name': 'Tom Cruise'}]\u001b[0m\n",
      "\n",
      "\u001b[1m> Finished chain.\u001b[0m\n"
     ]
@ -185,7 +185,7 @@
    {
     "data": {
      "text/plain": [
-       "'Tom Cruise, Val Kilmer, Anthony Edwards, and Meg Ryan played in Top Gun.'"
+       "'Val Kilmer, Anthony Edwards, Meg Ryan, and Tom Cruise played in Top Gun.'"
      ]
     },
     "execution_count": 7,
@ -197,10 +197,180 @@
    "chain.run(\"Who played in Top Gun?\")"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "2d28c4df",
+   "metadata": {},
+   "source": [
+    "## Limit the number of results\n",
+    "You can limit the number of results from the Cypher QA Chain using the `top_k` parameter.\n",
+    "The default is 10."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "df230946",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "chain = GraphCypherQAChain.from_llm(\n",
+    "    ChatOpenAI(temperature=0), graph=graph, verbose=True, top_k=2\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "3f1600ee",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "\u001b[1m> Entering new GraphCypherQAChain chain...\u001b[0m\n",
+      "Generated Cypher:\n",
+      "\u001b[32;1m\u001b[1;3mMATCH (a:Actor)-[:ACTED_IN]->(m:Movie {name: 'Top Gun'})\n",
+      "RETURN a.name\u001b[0m\n",
+      "Full Context:\n",
+      "\u001b[32;1m\u001b[1;3m[{'a.name': 'Val Kilmer'}, {'a.name': 'Anthony Edwards'}]\u001b[0m\n",
+      "\n",
+      "\u001b[1m> Finished chain.\u001b[0m\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "'Val Kilmer and Anthony Edwards played in Top Gun.'"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "chain.run(\"Who played in Top Gun?\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "88c16206",
+   "metadata": {},
+   "source": [
+    "## Return intermediate results\n",
+    "You can return intermediate steps from the Cypher QA Chain using the `return_intermediate_steps` parameter"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "e412f36b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "chain = GraphCypherQAChain.from_llm(\n",
+    "    ChatOpenAI(temperature=0), graph=graph, verbose=True, return_intermediate_steps=True\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "4f4699dc",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "\u001b[1m> Entering new GraphCypherQAChain chain...\u001b[0m\n",
+      "Generated Cypher:\n",
+      "\u001b[32;1m\u001b[1;3mMATCH (a:Actor)-[:ACTED_IN]->(m:Movie {name: 'Top Gun'})\n",
+      "RETURN a.name\u001b[0m\n",
+      "Full Context:\n",
+      "\u001b[32;1m\u001b[1;3m[{'a.name': 'Val Kilmer'}, {'a.name': 'Anthony Edwards'}, {'a.name': 'Meg Ryan'}, {'a.name': 'Tom Cruise'}]\u001b[0m\n",
+      "\n",
+      "\u001b[1m> Finished chain.\u001b[0m\n",
+      "Intermediate steps: [{'query': \"MATCH (a:Actor)-[:ACTED_IN]->(m:Movie {name: 'Top Gun'})\\nRETURN a.name\"}, {'context': [{'a.name': 'Val Kilmer'}, {'a.name': 'Anthony Edwards'}, {'a.name': 'Meg Ryan'}, {'a.name': 'Tom Cruise'}]}]\n",
+      "Final answer: Val Kilmer, Anthony Edwards, Meg Ryan, and Tom Cruise played in Top Gun.\n"
+     ]
+    }
+   ],
+   "source": [
+    "result = chain(\"Who played in Top Gun?\")\n",
+    "print(f\"Intermediate steps: {result['intermediate_steps']}\")\n",
+    "print(f\"Final answer: {result['result']}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d6e1b054",
+   "metadata": {},
+   "source": [
+    "## Return direct results\n",
+    "You can return direct results from the Cypher QA Chain using the `return_direct` parameter"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "2d3acf10",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "chain = GraphCypherQAChain.from_llm(\n",
+    "    ChatOpenAI(temperature=0), graph=graph, verbose=True, return_direct=True\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "b0a9d143",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "\u001b[1m> Entering new GraphCypherQAChain chain...\u001b[0m\n",
+      "Generated Cypher:\n",
+      "\u001b[32;1m\u001b[1;3mMATCH (a:Actor)-[:ACTED_IN]->(m:Movie {name: 'Top Gun'})\n",
+      "RETURN a.name\u001b[0m\n",
+      "\n",
+      "\u001b[1m> Finished chain.\u001b[0m\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "[{'a.name': 'Val Kilmer'},\n",
+       " {'a.name': 'Anthony Edwards'},\n",
+       " {'a.name': 'Meg Ryan'},\n",
+       " {'a.name': 'Tom Cruise'}]"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "chain.run(\"Who played in Top Gun?\")"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
-   "id": "b4825316",
+   "id": "74d0a36f",
   "metadata": {},
   "outputs": [],
   "source": []
@ -222,7 +392,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.9.1"
+   "version": "3.8.8"
  }
 },
 "nbformat": 4,
--- a/langchain/chains/graph_qa/cypher.py
+++ b/langchain/chains/graph_qa/cypher.py
@ -14,6 +14,8 @@ from langchain.chains.llm import LLMChain
 from langchain.graphs.neo4j_graph import Neo4jGraph
 from langchain.prompts.base import BasePromptTemplate

+INTERMEDIATE_STEPS_KEY = "intermediate_steps"
+

 def extract_cypher(text: str) -> str:
    # The pattern to find Cypher code enclosed in triple backticks
@ -33,6 +35,12 @@ class GraphCypherQAChain(Chain):
    qa_chain: LLMChain
    input_key: str = "query"  #: :meta private:
    output_key: str = "result"  #: :meta private:
+    top_k: int = 10
+    """Number of results to return from the query"""
+    return_intermediate_steps: bool = False
+    """Whether or not to return the intermediate steps along with the final answer."""
+    return_direct: bool = False
+    """Whether or not to return the result of querying the graph directly."""

    @property
    def input_keys(self) -> List[str]:
@ -74,12 +82,14 @@ class GraphCypherQAChain(Chain):
        self,
        inputs: Dict[str, Any],
        run_manager: Optional[CallbackManagerForChainRun] = None,
-    ) -> Dict[str, str]:
+    ) -> Dict[str, Any]:
        """Generate Cypher statement, use it to look up in db and answer question."""
        _run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()
        callbacks = _run_manager.get_child()
        question = inputs[self.input_key]

+        intermediate_steps: List = []
+
        generated_cypher = self.cypher_generation_chain.run(
            {"question": question, "schema": self.graph.get_schema}, callbacks=callbacks
        )
@ -91,14 +101,30 @@ class GraphCypherQAChain(Chain):
        _run_manager.on_text(
            generated_cypher, color="green", end="\n", verbose=self.verbose
        )
-        context = self.graph.query(generated_cypher)

-        _run_manager.on_text("Full Context:", end="\n", verbose=self.verbose)
-        _run_manager.on_text(
-            str(context), color="green", end="\n", verbose=self.verbose
-        )
-        result = self.qa_chain(
-            {"question": question, "context": context},
-            callbacks=callbacks,
-        )
-        return {self.output_key: result[self.qa_chain.output_key]}
+        intermediate_steps.append({"query": generated_cypher})
+
+        # Retrieve and limit the number of results
+        context = self.graph.query(generated_cypher)[: self.top_k]
+
+        if self.return_direct:
+            final_result = context
+        else:
+            _run_manager.on_text("Full Context:", end="\n", verbose=self.verbose)
+            _run_manager.on_text(
+                str(context), color="green", end="\n", verbose=self.verbose
+            )
+
+            intermediate_steps.append({"context": context})
+
+            result = self.qa_chain(
+                {"question": question, "context": context},
+                callbacks=callbacks,
+            )
+            final_result = result[self.qa_chain.output_key]
+
+        chain_result: Dict[str, Any] = {self.output_key: final_result}
+        if self.return_intermediate_steps:
+            chain_result[INTERMEDIATE_STEPS_KEY] = intermediate_steps
+
+        return chain_result
--- a/langchain/graphs/neo4j_graph.py
+++ b/langchain/graphs/neo4j_graph.py
@ -78,8 +78,7 @@ class Neo4jGraph:
        with self._driver.session(database=self._database) as session:
            try:
                data = session.run(query, params)
-                # Hard limit of 50 results
-                return [r.data() for r in data][:50]
+                return [r.data() for r in data]
            except CypherSyntaxError as e:
                raise ValueError("Generated Cypher Statement is not valid\n" f"{e}")

--- a/tests/integration_tests/chains/test_graph_database.py
+++ b/tests/integration_tests/chains/test_graph_database.py
@ -58,3 +58,113 @@ def test_cypher_generating_run() -> None:
    output = chain.run("Who played in Pulp Fiction?")
    expected_output = " Bruce Willis played in Pulp Fiction."
    assert output == expected_output
+
+
+def test_cypher_top_k() -> None:
+    """Test top_k parameter correctly limits the number of results in the context."""
+    url = os.environ.get("NEO4J_URL")
+    username = os.environ.get("NEO4J_USERNAME")
+    password = os.environ.get("NEO4J_PASSWORD")
+    assert url is not None
+    assert username is not None
+    assert password is not None
+
+    TOP_K = 1
+
+    graph = Neo4jGraph(
+        url=url,
+        username=username,
+        password=password,
+    )
+    # Delete all nodes in the graph
+    graph.query("MATCH (n) DETACH DELETE n")
+    # Create two nodes and a relationship
+    graph.query(
+        "CREATE (a:Actor {name:'Bruce Willis'})"
+        "-[:ACTED_IN]->(:Movie {title: 'Pulp Fiction'})"
+        "<-[:ACTED_IN]-(:Actor {name:'Foo'})"
+    )
+    # Refresh schema information
+    graph.refresh_schema()
+
+    chain = GraphCypherQAChain.from_llm(
+        OpenAI(temperature=0), graph=graph, return_direct=True, top_k=TOP_K
+    )
+    output = chain.run("Who played in Pulp Fiction?")
+    assert len(output) == TOP_K
+
+
+def test_cypher_intermediate_steps() -> None:
+    """Test the returning of the intermediate steps."""
+    url = os.environ.get("NEO4J_URL")
+    username = os.environ.get("NEO4J_USERNAME")
+    password = os.environ.get("NEO4J_PASSWORD")
+    assert url is not None
+    assert username is not None
+    assert password is not None
+
+    graph = Neo4jGraph(
+        url=url,
+        username=username,
+        password=password,
+    )
+    # Delete all nodes in the graph
+    graph.query("MATCH (n) DETACH DELETE n")
+    # Create two nodes and a relationship
+    graph.query(
+        "CREATE (a:Actor {name:'Bruce Willis'})"
+        "-[:ACTED_IN]->(:Movie {title: 'Pulp Fiction'})"
+    )
+    # Refresh schema information
+    graph.refresh_schema()
+
+    chain = GraphCypherQAChain.from_llm(
+        OpenAI(temperature=0), graph=graph, return_intermediate_steps=True
+    )
+    output = chain("Who played in Pulp Fiction?")
+
+    expected_output = " Bruce Willis played in Pulp Fiction."
+    assert output["result"] == expected_output
+
+    query = output["intermediate_steps"][0]["query"]
+    expected_query = (
+        "\n\nMATCH (a:Actor)-[:ACTED_IN]->"
+        "(m:Movie {title: 'Pulp Fiction'}) RETURN a.name"
+    )
+    assert query == expected_query
+
+    context = output["intermediate_steps"][1]["context"]
+    expected_context = [{"a.name": "Bruce Willis"}]
+    assert context == expected_context
+
+
+def test_cypher_return_direct() -> None:
+    """Test that chain returns direct results."""
+    url = os.environ.get("NEO4J_URL")
+    username = os.environ.get("NEO4J_USERNAME")
+    password = os.environ.get("NEO4J_PASSWORD")
+    assert url is not None
+    assert username is not None
+    assert password is not None
+
+    graph = Neo4jGraph(
+        url=url,
+        username=username,
+        password=password,
+    )
+    # Delete all nodes in the graph
+    graph.query("MATCH (n) DETACH DELETE n")
+    # Create two nodes and a relationship
+    graph.query(
+        "CREATE (a:Actor {name:'Bruce Willis'})"
+        "-[:ACTED_IN]->(:Movie {title: 'Pulp Fiction'})"
+    )
+    # Refresh schema information
+    graph.refresh_schema()
+
+    chain = GraphCypherQAChain.from_llm(
+        OpenAI(temperature=0), graph=graph, return_direct=True
+    )
+    output = chain.run("Who played in Pulp Fiction?")
+    expected_output = [{"a.name": "Bruce Willis"}]
+    assert output == expected_output