forked from Archives/langchain
Add KuzuQAChain (#6454)
This PR adds `KuzuGraph` and `KuzuQAChain` for interacting with [Kùzu database](https://github.com/kuzudb/kuzu). Kùzu is an in-process property graph database management system (GDBMS) built for query speed and scalability. The `KuzuGraph` and `KuzuQAChain` provide the same functionality as the existing integration with NebulaGraph and Neo4j and enables query generation and question answering over Kùzu database. A notebook example and a simple test case have also been added. --------- Co-authored-by: Dev 2049 <dev.dev2049@gmail.com>
This commit is contained in:
parent
6e07283dd5
commit
0fce8ef178
363
docs/extras/modules/chains/additional/graph_kuzu_qa.ipynb
Normal file
363
docs/extras/modules/chains/additional/graph_kuzu_qa.ipynb
Normal file
@ -0,0 +1,363 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"attachments": {},
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# KuzuQAChain\n",
|
||||||
|
"\n",
|
||||||
|
"This notebook shows how to use LLMs to provide a natural language interface to [Kùzu](https://kuzudb.com) database."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attachments": {},
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"[Kùzu](https://kuzudb.com) is an in-process property graph database management system. You can simply install it with `pip`:\n",
|
||||||
|
"\n",
|
||||||
|
"```bash\n",
|
||||||
|
"pip install kuzu\n",
|
||||||
|
"```\n",
|
||||||
|
"\n",
|
||||||
|
"Once installed, you can simply import it and start creating a database on the local machine and connect to it:\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import kuzu\n",
|
||||||
|
"db = kuzu.Database(\"test_db\")\n",
|
||||||
|
"conn = kuzu.Connection(db)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attachments": {},
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"First, we create the schema for a simple movie database:"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"<kuzu.query_result.QueryResult at 0x1066ff410>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"conn.execute(\"CREATE NODE TABLE Movie (name STRING, PRIMARY KEY(name))\")\n",
|
||||||
|
"conn.execute(\"CREATE NODE TABLE Person (name STRING, birthDate STRING, PRIMARY KEY(name))\")\n",
|
||||||
|
"conn.execute(\"CREATE REL TABLE ActedIn (FROM Person TO Movie)\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attachments": {},
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Then we can insert some data."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 3,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"<kuzu.query_result.QueryResult at 0x107016210>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 3,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"conn.execute(\"CREATE (:Person {name: 'Al Pacino', birthDate: '1940-04-25'})\")\n",
|
||||||
|
"conn.execute(\"CREATE (:Person {name: 'Robert De Niro', birthDate: '1943-08-17'})\")\n",
|
||||||
|
"conn.execute(\"CREATE (:Movie {name: 'The Godfather'})\")\n",
|
||||||
|
"conn.execute(\"CREATE (:Movie {name: 'The Godfather: Part II'})\")\n",
|
||||||
|
"conn.execute(\"CREATE (:Movie {name: 'The Godfather Coda: The Death of Michael Corleone'})\")\n",
|
||||||
|
"conn.execute(\"MATCH (p:Person), (m:Movie) WHERE p.name = 'Al Pacino' AND m.name = 'The Godfather' CREATE (p)-[:ActedIn]->(m)\")\n",
|
||||||
|
"conn.execute(\"MATCH (p:Person), (m:Movie) WHERE p.name = 'Al Pacino' AND m.name = 'The Godfather: Part II' CREATE (p)-[:ActedIn]->(m)\")\n",
|
||||||
|
"conn.execute(\"MATCH (p:Person), (m:Movie) WHERE p.name = 'Al Pacino' AND m.name = 'The Godfather Coda: The Death of Michael Corleone' CREATE (p)-[:ActedIn]->(m)\")\n",
|
||||||
|
"conn.execute(\"MATCH (p:Person), (m:Movie) WHERE p.name = 'Robert De Niro' AND m.name = 'The Godfather: Part II' CREATE (p)-[:ActedIn]->(m)\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attachments": {},
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Creating `KuzuQAChain`\n",
|
||||||
|
"\n",
|
||||||
|
"We can now create the `KuzuGraph` and `KuzuQAChain`. To create the `KuzuGraph` we simply need to pass the database object to the `KuzuGraph` constructor."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 4,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from langchain.chat_models import ChatOpenAI\n",
|
||||||
|
"from langchain.graphs import KuzuGraph\n",
|
||||||
|
"from langchain.chains import KuzuQAChain"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 5,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"graph = KuzuGraph(db)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 6,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"chain = KuzuQAChain.from_llm(\n",
|
||||||
|
" ChatOpenAI(temperature=0), graph=graph, verbose=True\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attachments": {},
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Refresh graph schema information\n",
|
||||||
|
"\n",
|
||||||
|
"If the schema of database changes, you can refresh the schema information needed to generate Cypher statements."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 7,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# graph.refresh_schema()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 8,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Node properties: [{'properties': [('name', 'STRING')], 'label': 'Movie'}, {'properties': [('name', 'STRING'), ('birthDate', 'STRING')], 'label': 'Person'}]\n",
|
||||||
|
"Relationships properties: [{'properties': [], 'label': 'ActedIn'}]\n",
|
||||||
|
"Relationships: ['(:Person)-[:ActedIn]->(:Movie)']\n",
|
||||||
|
"\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"print(graph.get_schema)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attachments": {},
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Querying the graph\n",
|
||||||
|
"\n",
|
||||||
|
"We can now use the `KuzuQAChain` to ask question of the graph"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 9,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||||
|
"Generated Cypher:\n",
|
||||||
|
"\u001b[32;1m\u001b[1;3mMATCH (p:Person)-[:ActedIn]->(m:Movie {name: 'The Godfather: Part II'}) RETURN p.name\u001b[0m\n",
|
||||||
|
"Full Context:\n",
|
||||||
|
"\u001b[32;1m\u001b[1;3m[{'p.name': 'Al Pacino'}, {'p.name': 'Robert De Niro'}]\u001b[0m\n",
|
||||||
|
"\n",
|
||||||
|
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"'Al Pacino and Robert De Niro both played in The Godfather: Part II.'"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 9,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"chain.run(\"Who played in The Godfather: Part II?\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 10,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||||
|
"Generated Cypher:\n",
|
||||||
|
"\u001b[32;1m\u001b[1;3mMATCH (p:Person {name: 'Robert De Niro'})-[:ActedIn]->(m:Movie)\n",
|
||||||
|
"RETURN m.name\u001b[0m\n",
|
||||||
|
"Full Context:\n",
|
||||||
|
"\u001b[32;1m\u001b[1;3m[{'m.name': 'The Godfather: Part II'}]\u001b[0m\n",
|
||||||
|
"\n",
|
||||||
|
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"'Robert De Niro played in The Godfather: Part II.'"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 10,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"chain.run(\"Robert De Niro played in which movies?\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 11,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||||
|
"Generated Cypher:\n",
|
||||||
|
"\u001b[32;1m\u001b[1;3mMATCH (p:Person {name: 'Robert De Niro'})-[:ActedIn]->(m:Movie)\n",
|
||||||
|
"RETURN p.birthDate\u001b[0m\n",
|
||||||
|
"Full Context:\n",
|
||||||
|
"\u001b[32;1m\u001b[1;3m[{'p.birthDate': '1943-08-17'}]\u001b[0m\n",
|
||||||
|
"\n",
|
||||||
|
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"'Robert De Niro was born on August 17, 1943.'"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 11,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"chain.run(\"Robert De Niro is born in which year?\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 12,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||||
|
"Generated Cypher:\n",
|
||||||
|
"\u001b[32;1m\u001b[1;3mMATCH (p:Person)-[:ActedIn]->(m:Movie{name:'The Godfather: Part II'})\n",
|
||||||
|
"WITH p, m, p.birthDate AS birthDate\n",
|
||||||
|
"ORDER BY birthDate ASC\n",
|
||||||
|
"LIMIT 1\n",
|
||||||
|
"RETURN p.name\u001b[0m\n",
|
||||||
|
"Full Context:\n",
|
||||||
|
"\u001b[32;1m\u001b[1;3m[{'p.name': 'Al Pacino'}]\u001b[0m\n",
|
||||||
|
"\n",
|
||||||
|
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"'The oldest actor who played in The Godfather: Part II is Al Pacino.'"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 12,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"chain.run(\"Who is the oldest actor who played in The Godfather: Part II?\")"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.11.4"
|
||||||
|
},
|
||||||
|
"orig_nbformat": 4
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
@ -15,6 +15,7 @@ from langchain.chains.conversational_retrieval.base import (
|
|||||||
from langchain.chains.flare.base import FlareChain
|
from langchain.chains.flare.base import FlareChain
|
||||||
from langchain.chains.graph_qa.base import GraphQAChain
|
from langchain.chains.graph_qa.base import GraphQAChain
|
||||||
from langchain.chains.graph_qa.cypher import GraphCypherQAChain
|
from langchain.chains.graph_qa.cypher import GraphCypherQAChain
|
||||||
|
from langchain.chains.graph_qa.kuzu import KuzuQAChain
|
||||||
from langchain.chains.graph_qa.nebulagraph import NebulaGraphQAChain
|
from langchain.chains.graph_qa.nebulagraph import NebulaGraphQAChain
|
||||||
from langchain.chains.hyde.base import HypotheticalDocumentEmbedder
|
from langchain.chains.hyde.base import HypotheticalDocumentEmbedder
|
||||||
from langchain.chains.llm import LLMChain
|
from langchain.chains.llm import LLMChain
|
||||||
@ -67,6 +68,7 @@ __all__ = [
|
|||||||
"GraphCypherQAChain",
|
"GraphCypherQAChain",
|
||||||
"GraphQAChain",
|
"GraphQAChain",
|
||||||
"HypotheticalDocumentEmbedder",
|
"HypotheticalDocumentEmbedder",
|
||||||
|
"KuzuQAChain",
|
||||||
"LLMBashChain",
|
"LLMBashChain",
|
||||||
"LLMChain",
|
"LLMChain",
|
||||||
"LLMCheckerChain",
|
"LLMCheckerChain",
|
||||||
|
93
langchain/chains/graph_qa/kuzu.py
Normal file
93
langchain/chains/graph_qa/kuzu.py
Normal file
@ -0,0 +1,93 @@
|
|||||||
|
"""Question answering over a graph."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
from pydantic import Field
|
||||||
|
|
||||||
|
from langchain.base_language import BaseLanguageModel
|
||||||
|
from langchain.callbacks.manager import CallbackManagerForChainRun
|
||||||
|
from langchain.chains.base import Chain
|
||||||
|
from langchain.chains.graph_qa.prompts import CYPHER_QA_PROMPT, KUZU_GENERATION_PROMPT
|
||||||
|
from langchain.chains.llm import LLMChain
|
||||||
|
from langchain.graphs.kuzu_graph import KuzuGraph
|
||||||
|
from langchain.prompts.base import BasePromptTemplate
|
||||||
|
|
||||||
|
|
||||||
|
class KuzuQAChain(Chain):
|
||||||
|
"""Chain for question-answering against a graph by generating Cypher statements for
|
||||||
|
Kùzu.
|
||||||
|
"""
|
||||||
|
|
||||||
|
graph: KuzuGraph = Field(exclude=True)
|
||||||
|
cypher_generation_chain: LLMChain
|
||||||
|
qa_chain: LLMChain
|
||||||
|
input_key: str = "query" #: :meta private:
|
||||||
|
output_key: str = "result" #: :meta private:
|
||||||
|
|
||||||
|
@property
|
||||||
|
def input_keys(self) -> List[str]:
|
||||||
|
"""Return the input keys.
|
||||||
|
|
||||||
|
:meta private:
|
||||||
|
"""
|
||||||
|
return [self.input_key]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def output_keys(self) -> List[str]:
|
||||||
|
"""Return the output keys.
|
||||||
|
|
||||||
|
:meta private:
|
||||||
|
"""
|
||||||
|
_output_keys = [self.output_key]
|
||||||
|
return _output_keys
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_llm(
|
||||||
|
cls,
|
||||||
|
llm: BaseLanguageModel,
|
||||||
|
*,
|
||||||
|
qa_prompt: BasePromptTemplate = CYPHER_QA_PROMPT,
|
||||||
|
cypher_prompt: BasePromptTemplate = KUZU_GENERATION_PROMPT,
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> KuzuQAChain:
|
||||||
|
"""Initialize from LLM."""
|
||||||
|
qa_chain = LLMChain(llm=llm, prompt=qa_prompt)
|
||||||
|
cypher_generation_chain = LLMChain(llm=llm, prompt=cypher_prompt)
|
||||||
|
|
||||||
|
return cls(
|
||||||
|
qa_chain=qa_chain,
|
||||||
|
cypher_generation_chain=cypher_generation_chain,
|
||||||
|
**kwargs,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _call(
|
||||||
|
self,
|
||||||
|
inputs: Dict[str, Any],
|
||||||
|
run_manager: Optional[CallbackManagerForChainRun] = None,
|
||||||
|
) -> Dict[str, str]:
|
||||||
|
"""Generate Cypher statement, use it to look up in db and answer question."""
|
||||||
|
_run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()
|
||||||
|
callbacks = _run_manager.get_child()
|
||||||
|
question = inputs[self.input_key]
|
||||||
|
|
||||||
|
generated_cypher = self.cypher_generation_chain.run(
|
||||||
|
{"question": question, "schema": self.graph.get_schema}, callbacks=callbacks
|
||||||
|
)
|
||||||
|
|
||||||
|
_run_manager.on_text("Generated Cypher:", end="\n", verbose=self.verbose)
|
||||||
|
_run_manager.on_text(
|
||||||
|
generated_cypher, color="green", end="\n", verbose=self.verbose
|
||||||
|
)
|
||||||
|
context = self.graph.query(generated_cypher)
|
||||||
|
|
||||||
|
_run_manager.on_text("Full Context:", end="\n", verbose=self.verbose)
|
||||||
|
_run_manager.on_text(
|
||||||
|
str(context), color="green", end="\n", verbose=self.verbose
|
||||||
|
)
|
||||||
|
|
||||||
|
result = self.qa_chain(
|
||||||
|
{"question": question, "context": context},
|
||||||
|
callbacks=callbacks,
|
||||||
|
)
|
||||||
|
return {self.output_key: result[self.qa_chain.output_key]}
|
@ -72,6 +72,23 @@ NGQL_GENERATION_PROMPT = PromptTemplate(
|
|||||||
input_variables=["schema", "question"], template=NGQL_GENERATION_TEMPLATE
|
input_variables=["schema", "question"], template=NGQL_GENERATION_TEMPLATE
|
||||||
)
|
)
|
||||||
|
|
||||||
|
KUZU_EXTRA_INSTRUCTIONS = """
|
||||||
|
Instructions:
|
||||||
|
|
||||||
|
Generate statement with Kùzu Cypher dialect (rather than standard):
|
||||||
|
1. do not use `WHERE EXISTS` clause to check the existence of a property because Kùzu database has a fixed schema.
|
||||||
|
2. do not omit relationship pattern. Always use `()-[]->()` instead of `()->()`.
|
||||||
|
3. do not include any notes or comments even if the statement does not produce the expected result.
|
||||||
|
```\n"""
|
||||||
|
|
||||||
|
KUZU_GENERATION_TEMPLATE = CYPHER_GENERATION_TEMPLATE.replace(
|
||||||
|
"Generate Cypher", "Generate Kùzu Cypher"
|
||||||
|
).replace("Instructions:", KUZU_EXTRA_INSTRUCTIONS)
|
||||||
|
|
||||||
|
KUZU_GENERATION_PROMPT = PromptTemplate(
|
||||||
|
input_variables=["schema", "question"], template=KUZU_GENERATION_TEMPLATE
|
||||||
|
)
|
||||||
|
|
||||||
CYPHER_QA_TEMPLATE = """You are an assistant that helps to form nice and human understandable answers.
|
CYPHER_QA_TEMPLATE = """You are an assistant that helps to form nice and human understandable answers.
|
||||||
The information part contains the provided information that you must use to construct an answer.
|
The information part contains the provided information that you must use to construct an answer.
|
||||||
The provided information is authorative, you must never doubt it or try to use your internal knowledge to correct it.
|
The provided information is authorative, you must never doubt it or try to use your internal knowledge to correct it.
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
"""Graph implementations."""
|
"""Graph implementations."""
|
||||||
|
from langchain.graphs.kuzu_graph import KuzuGraph
|
||||||
from langchain.graphs.nebula_graph import NebulaGraph
|
from langchain.graphs.nebula_graph import NebulaGraph
|
||||||
from langchain.graphs.neo4j_graph import Neo4jGraph
|
from langchain.graphs.neo4j_graph import Neo4jGraph
|
||||||
from langchain.graphs.networkx_graph import NetworkxEntityGraph
|
from langchain.graphs.networkx_graph import NetworkxEntityGraph
|
||||||
|
|
||||||
__all__ = ["NetworkxEntityGraph", "Neo4jGraph", "NebulaGraph"]
|
__all__ = ["NetworkxEntityGraph", "Neo4jGraph", "NebulaGraph", "KuzuGraph"]
|
||||||
|
90
langchain/graphs/kuzu_graph.py
Normal file
90
langchain/graphs/kuzu_graph.py
Normal file
@ -0,0 +1,90 @@
|
|||||||
|
from typing import Any, Dict, List
|
||||||
|
|
||||||
|
|
||||||
|
class KuzuGraph:
|
||||||
|
"""Kùzu wrapper for graph operations."""
|
||||||
|
|
||||||
|
def __init__(self, db: Any, database: str = "kuzu") -> None:
|
||||||
|
try:
|
||||||
|
import kuzu
|
||||||
|
except ImportError:
|
||||||
|
raise ImportError(
|
||||||
|
"Could not import Kùzu python package."
|
||||||
|
"Please install Kùzu with `pip install kuzu`."
|
||||||
|
)
|
||||||
|
self.db = db
|
||||||
|
self.conn = kuzu.Connection(self.db)
|
||||||
|
self.database = database
|
||||||
|
self.refresh_schema()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def get_schema(self) -> str:
|
||||||
|
"""Returns the schema of the Kùzu database"""
|
||||||
|
return self.schema
|
||||||
|
|
||||||
|
def query(self, query: str, params: dict = {}) -> List[Dict[str, Any]]:
|
||||||
|
"""Query Kùzu database"""
|
||||||
|
params_list = []
|
||||||
|
for param_name in params:
|
||||||
|
params_list.append([param_name, params[param_name]])
|
||||||
|
result = self.conn.execute(query, params_list)
|
||||||
|
column_names = result.get_column_names()
|
||||||
|
return_list = []
|
||||||
|
while result.has_next():
|
||||||
|
row = result.get_next()
|
||||||
|
return_list.append(dict(zip(column_names, row)))
|
||||||
|
return return_list
|
||||||
|
|
||||||
|
def refresh_schema(self) -> None:
|
||||||
|
"""Refreshes the Kùzu graph schema information"""
|
||||||
|
node_properties = []
|
||||||
|
node_table_names = self.conn._get_node_table_names()
|
||||||
|
for table_name in node_table_names:
|
||||||
|
current_table_schema = {"properties": [], "label": table_name}
|
||||||
|
properties = self.conn._get_node_property_names(table_name)
|
||||||
|
for property_name in properties:
|
||||||
|
property_type = properties[property_name]["type"]
|
||||||
|
list_type_flag = ""
|
||||||
|
if properties[property_name]["dimension"] > 0:
|
||||||
|
if "shape" in properties[property_name]:
|
||||||
|
for s in properties[property_name]["shape"]:
|
||||||
|
list_type_flag += "[%s]" % s
|
||||||
|
else:
|
||||||
|
for i in range(properties[property_name]["dimension"]):
|
||||||
|
list_type_flag += "[]"
|
||||||
|
property_type += list_type_flag
|
||||||
|
current_table_schema["properties"].append(
|
||||||
|
(property_name, property_type)
|
||||||
|
)
|
||||||
|
node_properties.append(current_table_schema)
|
||||||
|
|
||||||
|
relationships = []
|
||||||
|
rel_tables = self.conn._get_rel_table_names()
|
||||||
|
for table in rel_tables:
|
||||||
|
relationships.append(
|
||||||
|
"(:%s)-[:%s]->(:%s)" % (table["src"], table["name"], table["dst"])
|
||||||
|
)
|
||||||
|
|
||||||
|
rel_properties = []
|
||||||
|
for table in rel_tables:
|
||||||
|
current_table_schema = {"properties": [], "label": table["name"]}
|
||||||
|
properties_text = self.conn._connection.get_rel_property_names(
|
||||||
|
table["name"]
|
||||||
|
).split("\n")
|
||||||
|
for i, line in enumerate(properties_text):
|
||||||
|
# The first 3 lines defines src, dst and name, so we skip them
|
||||||
|
if i < 3:
|
||||||
|
continue
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
property_name, property_type = line.strip().split(" ")
|
||||||
|
current_table_schema["properties"].append(
|
||||||
|
(property_name, property_type)
|
||||||
|
)
|
||||||
|
rel_properties.append(current_table_schema)
|
||||||
|
|
||||||
|
self.schema = (
|
||||||
|
f"Node properties: {node_properties}\n"
|
||||||
|
f"Relationships properties: {rel_properties}\n"
|
||||||
|
f"Relationships: {relationships}\n"
|
||||||
|
)
|
10
poetry.lock
generated
10
poetry.lock
generated
@ -1,4 +1,4 @@
|
|||||||
# This file is automatically @generated by Poetry and should not be changed by hand.
|
# This file is automatically @generated by Poetry 1.4.2 and should not be changed by hand.
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "absl-py"
|
name = "absl-py"
|
||||||
@ -11473,13 +11473,13 @@ cffi = {version = ">=1.11", markers = "platform_python_implementation == \"PyPy\
|
|||||||
cffi = ["cffi (>=1.11)"]
|
cffi = ["cffi (>=1.11)"]
|
||||||
|
|
||||||
[extras]
|
[extras]
|
||||||
all = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "jina", "manifest-ml", "elasticsearch", "opensearch-py", "google-search-results", "faiss-cpu", "sentence-transformers", "transformers", "spacy", "nltk", "wikipedia", "beautifulsoup4", "tiktoken", "torch", "jinja2", "pinecone-client", "pinecone-text", "pymongo", "weaviate-client", "redis", "google-api-python-client", "google-auth", "wolframalpha", "qdrant-client", "tensorflow-text", "pypdf", "networkx", "nomic", "aleph-alpha-client", "deeplake", "pgvector", "psycopg2-binary", "pyowm", "pytesseract", "html2text", "atlassian-python-api", "gptcache", "duckduckgo-search", "arxiv", "azure-identity", "clickhouse-connect", "azure-cosmos", "lancedb", "langkit", "lark", "pexpect", "pyvespa", "O365", "jq", "docarray", "steamship", "pdfminer-six", "lxml", "requests-toolbelt", "neo4j", "openlm", "azure-ai-formrecognizer", "azure-ai-vision", "azure-cognitiveservices-speech", "momento", "singlestoredb", "tigrisdb", "nebula3-python", "awadb"]
|
all = ["O365", "aleph-alpha-client", "anthropic", "arxiv", "atlassian-python-api", "awadb", "azure-ai-formrecognizer", "azure-ai-vision", "azure-cognitiveservices-speech", "azure-cosmos", "azure-identity", "beautifulsoup4", "clickhouse-connect", "cohere", "deeplake", "docarray", "duckduckgo-search", "elasticsearch", "faiss-cpu", "google-api-python-client", "google-auth", "google-search-results", "gptcache", "html2text", "huggingface_hub", "jina", "jinja2", "jq", "lancedb", "langkit", "lark", "lxml", "manifest-ml", "momento", "nebula3-python", "neo4j", "networkx", "nlpcloud", "nltk", "nomic", "openai", "openlm", "opensearch-py", "pdfminer-six", "pexpect", "pgvector", "pinecone-client", "pinecone-text", "psycopg2-binary", "pymongo", "pyowm", "pypdf", "pytesseract", "pyvespa", "qdrant-client", "redis", "requests-toolbelt", "sentence-transformers", "singlestoredb", "spacy", "steamship", "tensorflow-text", "tigrisdb", "tiktoken", "torch", "transformers", "weaviate-client", "wikipedia", "wolframalpha"]
|
||||||
azure = ["azure-identity", "azure-cosmos", "openai", "azure-core", "azure-ai-formrecognizer", "azure-ai-vision", "azure-cognitiveservices-speech", "azure-search-documents"]
|
azure = ["azure-ai-formrecognizer", "azure-ai-vision", "azure-cognitiveservices-speech", "azure-core", "azure-cosmos", "azure-identity", "azure-search-documents", "openai"]
|
||||||
cohere = ["cohere"]
|
cohere = ["cohere"]
|
||||||
docarray = ["docarray"]
|
docarray = ["docarray"]
|
||||||
embeddings = ["sentence-transformers"]
|
embeddings = ["sentence-transformers"]
|
||||||
extended-testing = ["beautifulsoup4", "bibtexparser", "chardet", "jq", "pdfminer-six", "pgvector", "pypdf", "pymupdf", "pypdfium2", "tqdm", "lxml", "atlassian-python-api", "beautifulsoup4", "pandas", "telethon", "psychicapi", "zep-python", "gql", "requests-toolbelt", "html2text", "py-trello", "scikit-learn", "pyspark", "openai"]
|
extended-testing = ["atlassian-python-api", "beautifulsoup4", "beautifulsoup4", "bibtexparser", "chardet", "gql", "html2text", "jq", "lxml", "openai", "pandas", "pdfminer-six", "pgvector", "psychicapi", "py-trello", "pymupdf", "pypdf", "pypdfium2", "pyspark", "requests-toolbelt", "scikit-learn", "telethon", "tqdm", "zep-python"]
|
||||||
llms = ["anthropic", "cohere", "openai", "openlm", "nlpcloud", "huggingface_hub", "manifest-ml", "torch", "transformers"]
|
llms = ["anthropic", "cohere", "huggingface_hub", "manifest-ml", "nlpcloud", "openai", "openlm", "torch", "transformers"]
|
||||||
openai = ["openai", "tiktoken"]
|
openai = ["openai", "tiktoken"]
|
||||||
qdrant = ["qdrant-client"]
|
qdrant = ["qdrant-client"]
|
||||||
text-helpers = ["chardet"]
|
text-helpers = ["chardet"]
|
||||||
|
56
tests/integration_tests/test_kuzu.py
Normal file
56
tests/integration_tests/test_kuzu.py
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
import shutil
|
||||||
|
import tempfile
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
from langchain.graphs import KuzuGraph
|
||||||
|
|
||||||
|
EXPECTED_SCHEMA = """
|
||||||
|
Node properties: [{'properties': [('name', 'STRING')], 'label': 'Movie'}, {'properties': [('name', 'STRING'), ('birthDate', 'STRING')], 'label': 'Person'}]
|
||||||
|
Relationships properties: [{'properties': [], 'label': 'ActedIn'}]
|
||||||
|
Relationships: ['(:Person)-[:ActedIn]->(:Movie)']
|
||||||
|
""" # noqa: E501
|
||||||
|
|
||||||
|
|
||||||
|
class TestKuzu(unittest.TestCase):
|
||||||
|
def setUp(self) -> None:
|
||||||
|
try:
|
||||||
|
import kuzu
|
||||||
|
except ImportError as e:
|
||||||
|
raise ImportError(
|
||||||
|
"Cannot import Python package kuzu. Please install it by running "
|
||||||
|
"`pip install kuzu`."
|
||||||
|
) from e
|
||||||
|
|
||||||
|
self.tmpdir = tempfile.mkdtemp()
|
||||||
|
self.kuzu_database = kuzu.Database(self.tmpdir)
|
||||||
|
self.conn = kuzu.Connection(self.kuzu_database)
|
||||||
|
self.conn.execute("CREATE NODE TABLE Movie (name STRING, PRIMARY KEY(name))")
|
||||||
|
self.conn.execute("CREATE (:Movie {name: 'The Godfather'})")
|
||||||
|
self.conn.execute("CREATE (:Movie {name: 'The Godfather: Part II'})")
|
||||||
|
self.conn.execute(
|
||||||
|
"CREATE (:Movie {name: 'The Godfather Coda: The Death of Michael "
|
||||||
|
"Corleone'})"
|
||||||
|
)
|
||||||
|
self.kuzu_graph = KuzuGraph(self.kuzu_database)
|
||||||
|
|
||||||
|
def tearDown(self) -> None:
|
||||||
|
shutil.rmtree(self.tmpdir, ignore_errors=True)
|
||||||
|
|
||||||
|
def test_query(self) -> None:
|
||||||
|
result = self.kuzu_graph.query("MATCH (n:Movie) RETURN n.name ORDER BY n.name")
|
||||||
|
excepted_result = [
|
||||||
|
{"n.name": "The Godfather"},
|
||||||
|
{"n.name": "The Godfather Coda: The Death of Michael Corleone"},
|
||||||
|
{"n.name": "The Godfather: Part II"},
|
||||||
|
]
|
||||||
|
self.assertEqual(result, excepted_result)
|
||||||
|
|
||||||
|
def test_refresh_schema(self) -> None:
|
||||||
|
self.conn.execute(
|
||||||
|
"CREATE NODE TABLE Person (name STRING, birthDate STRING, PRIMARY "
|
||||||
|
"KEY(name))"
|
||||||
|
)
|
||||||
|
self.conn.execute("CREATE REL TABLE ActedIn (FROM Person TO Movie)")
|
||||||
|
self.kuzu_graph.refresh_schema()
|
||||||
|
schema = self.kuzu_graph.get_schema
|
||||||
|
self.assertEqual(schema, EXPECTED_SCHEMA)
|
Loading…
Reference in New Issue
Block a user