From 0fce8ef178eed2a5f898f65c17179c0a01275745 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=9B=A7=E5=9B=A7?= Date: Wed, 21 Jun 2023 01:07:00 -0400 Subject: [PATCH] Add KuzuQAChain (#6454) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR adds `KuzuGraph` and `KuzuQAChain` for interacting with [Kùzu database](https://github.com/kuzudb/kuzu). Kùzu is an in-process property graph database management system (GDBMS) built for query speed and scalability. The `KuzuGraph` and `KuzuQAChain` provide the same functionality as the existing integration with NebulaGraph and Neo4j and enables query generation and question answering over Kùzu database. A notebook example and a simple test case have also been added. --------- Co-authored-by: Dev 2049 --- .../chains/additional/graph_kuzu_qa.ipynb | 363 ++++++++++++++++++ langchain/chains/__init__.py | 2 + langchain/chains/graph_qa/kuzu.py | 93 +++++ langchain/chains/graph_qa/prompts.py | 17 + langchain/graphs/__init__.py | 3 +- langchain/graphs/kuzu_graph.py | 90 +++++ poetry.lock | 10 +- tests/integration_tests/test_kuzu.py | 56 +++ 8 files changed, 628 insertions(+), 6 deletions(-) create mode 100644 docs/extras/modules/chains/additional/graph_kuzu_qa.ipynb create mode 100644 langchain/chains/graph_qa/kuzu.py create mode 100644 langchain/graphs/kuzu_graph.py create mode 100644 tests/integration_tests/test_kuzu.py diff --git a/docs/extras/modules/chains/additional/graph_kuzu_qa.ipynb b/docs/extras/modules/chains/additional/graph_kuzu_qa.ipynb new file mode 100644 index 00000000..5a2a5a4d --- /dev/null +++ b/docs/extras/modules/chains/additional/graph_kuzu_qa.ipynb @@ -0,0 +1,363 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# KuzuQAChain\n", + "\n", + "This notebook shows how to use LLMs to provide a natural language interface to [Kùzu](https://kuzudb.com) database." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[Kùzu](https://kuzudb.com) is an in-process property graph database management system. You can simply install it with `pip`:\n", + "\n", + "```bash\n", + "pip install kuzu\n", + "```\n", + "\n", + "Once installed, you can simply import it and start creating a database on the local machine and connect to it:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import kuzu\n", + "db = kuzu.Database(\"test_db\")\n", + "conn = kuzu.Connection(db)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, we create the schema for a simple movie database:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "conn.execute(\"CREATE NODE TABLE Movie (name STRING, PRIMARY KEY(name))\")\n", + "conn.execute(\"CREATE NODE TABLE Person (name STRING, birthDate STRING, PRIMARY KEY(name))\")\n", + "conn.execute(\"CREATE REL TABLE ActedIn (FROM Person TO Movie)\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Then we can insert some data." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "conn.execute(\"CREATE (:Person {name: 'Al Pacino', birthDate: '1940-04-25'})\")\n", + "conn.execute(\"CREATE (:Person {name: 'Robert De Niro', birthDate: '1943-08-17'})\")\n", + "conn.execute(\"CREATE (:Movie {name: 'The Godfather'})\")\n", + "conn.execute(\"CREATE (:Movie {name: 'The Godfather: Part II'})\")\n", + "conn.execute(\"CREATE (:Movie {name: 'The Godfather Coda: The Death of Michael Corleone'})\")\n", + "conn.execute(\"MATCH (p:Person), (m:Movie) WHERE p.name = 'Al Pacino' AND m.name = 'The Godfather' CREATE (p)-[:ActedIn]->(m)\")\n", + "conn.execute(\"MATCH (p:Person), (m:Movie) WHERE p.name = 'Al Pacino' AND m.name = 'The Godfather: Part II' CREATE (p)-[:ActedIn]->(m)\")\n", + "conn.execute(\"MATCH (p:Person), (m:Movie) WHERE p.name = 'Al Pacino' AND m.name = 'The Godfather Coda: The Death of Michael Corleone' CREATE (p)-[:ActedIn]->(m)\")\n", + "conn.execute(\"MATCH (p:Person), (m:Movie) WHERE p.name = 'Robert De Niro' AND m.name = 'The Godfather: Part II' CREATE (p)-[:ActedIn]->(m)\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Creating `KuzuQAChain`\n", + "\n", + "We can now create the `KuzuGraph` and `KuzuQAChain`. To create the `KuzuGraph` we simply need to pass the database object to the `KuzuGraph` constructor." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.chat_models import ChatOpenAI\n", + "from langchain.graphs import KuzuGraph\n", + "from langchain.chains import KuzuQAChain" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "graph = KuzuGraph(db)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "chain = KuzuQAChain.from_llm(\n", + " ChatOpenAI(temperature=0), graph=graph, verbose=True\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Refresh graph schema information\n", + "\n", + "If the schema of database changes, you can refresh the schema information needed to generate Cypher statements." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# graph.refresh_schema()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Node properties: [{'properties': [('name', 'STRING')], 'label': 'Movie'}, {'properties': [('name', 'STRING'), ('birthDate', 'STRING')], 'label': 'Person'}]\n", + "Relationships properties: [{'properties': [], 'label': 'ActedIn'}]\n", + "Relationships: ['(:Person)-[:ActedIn]->(:Movie)']\n", + "\n" + ] + } + ], + "source": [ + "print(graph.get_schema)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Querying the graph\n", + "\n", + "We can now use the `KuzuQAChain` to ask question of the graph" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new chain...\u001b[0m\n", + "Generated Cypher:\n", + "\u001b[32;1m\u001b[1;3mMATCH (p:Person)-[:ActedIn]->(m:Movie {name: 'The Godfather: Part II'}) RETURN p.name\u001b[0m\n", + "Full Context:\n", + "\u001b[32;1m\u001b[1;3m[{'p.name': 'Al Pacino'}, {'p.name': 'Robert De Niro'}]\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "'Al Pacino and Robert De Niro both played in The Godfather: Part II.'" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chain.run(\"Who played in The Godfather: Part II?\")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new chain...\u001b[0m\n", + "Generated Cypher:\n", + "\u001b[32;1m\u001b[1;3mMATCH (p:Person {name: 'Robert De Niro'})-[:ActedIn]->(m:Movie)\n", + "RETURN m.name\u001b[0m\n", + "Full Context:\n", + "\u001b[32;1m\u001b[1;3m[{'m.name': 'The Godfather: Part II'}]\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "'Robert De Niro played in The Godfather: Part II.'" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chain.run(\"Robert De Niro played in which movies?\")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new chain...\u001b[0m\n", + "Generated Cypher:\n", + "\u001b[32;1m\u001b[1;3mMATCH (p:Person {name: 'Robert De Niro'})-[:ActedIn]->(m:Movie)\n", + "RETURN p.birthDate\u001b[0m\n", + "Full Context:\n", + "\u001b[32;1m\u001b[1;3m[{'p.birthDate': '1943-08-17'}]\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "'Robert De Niro was born on August 17, 1943.'" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chain.run(\"Robert De Niro is born in which year?\")" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new chain...\u001b[0m\n", + "Generated Cypher:\n", + "\u001b[32;1m\u001b[1;3mMATCH (p:Person)-[:ActedIn]->(m:Movie{name:'The Godfather: Part II'})\n", + "WITH p, m, p.birthDate AS birthDate\n", + "ORDER BY birthDate ASC\n", + "LIMIT 1\n", + "RETURN p.name\u001b[0m\n", + "Full Context:\n", + "\u001b[32;1m\u001b[1;3m[{'p.name': 'Al Pacino'}]\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "'The oldest actor who played in The Godfather: Part II is Al Pacino.'" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chain.run(\"Who is the oldest actor who played in The Godfather: Part II?\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/langchain/chains/__init__.py b/langchain/chains/__init__.py index b9607d77..63544755 100644 --- a/langchain/chains/__init__.py +++ b/langchain/chains/__init__.py @@ -15,6 +15,7 @@ from langchain.chains.conversational_retrieval.base import ( from langchain.chains.flare.base import FlareChain from langchain.chains.graph_qa.base import GraphQAChain from langchain.chains.graph_qa.cypher import GraphCypherQAChain +from langchain.chains.graph_qa.kuzu import KuzuQAChain from langchain.chains.graph_qa.nebulagraph import NebulaGraphQAChain from langchain.chains.hyde.base import HypotheticalDocumentEmbedder from langchain.chains.llm import LLMChain @@ -67,6 +68,7 @@ __all__ = [ "GraphCypherQAChain", "GraphQAChain", "HypotheticalDocumentEmbedder", + "KuzuQAChain", "LLMBashChain", "LLMChain", "LLMCheckerChain", diff --git a/langchain/chains/graph_qa/kuzu.py b/langchain/chains/graph_qa/kuzu.py new file mode 100644 index 00000000..c8de4a26 --- /dev/null +++ b/langchain/chains/graph_qa/kuzu.py @@ -0,0 +1,93 @@ +"""Question answering over a graph.""" +from __future__ import annotations + +from typing import Any, Dict, List, Optional + +from pydantic import Field + +from langchain.base_language import BaseLanguageModel +from langchain.callbacks.manager import CallbackManagerForChainRun +from langchain.chains.base import Chain +from langchain.chains.graph_qa.prompts import CYPHER_QA_PROMPT, KUZU_GENERATION_PROMPT +from langchain.chains.llm import LLMChain +from langchain.graphs.kuzu_graph import KuzuGraph +from langchain.prompts.base import BasePromptTemplate + + +class KuzuQAChain(Chain): + """Chain for question-answering against a graph by generating Cypher statements for + Kùzu. + """ + + graph: KuzuGraph = Field(exclude=True) + cypher_generation_chain: LLMChain + qa_chain: LLMChain + input_key: str = "query" #: :meta private: + output_key: str = "result" #: :meta private: + + @property + def input_keys(self) -> List[str]: + """Return the input keys. + + :meta private: + """ + return [self.input_key] + + @property + def output_keys(self) -> List[str]: + """Return the output keys. + + :meta private: + """ + _output_keys = [self.output_key] + return _output_keys + + @classmethod + def from_llm( + cls, + llm: BaseLanguageModel, + *, + qa_prompt: BasePromptTemplate = CYPHER_QA_PROMPT, + cypher_prompt: BasePromptTemplate = KUZU_GENERATION_PROMPT, + **kwargs: Any, + ) -> KuzuQAChain: + """Initialize from LLM.""" + qa_chain = LLMChain(llm=llm, prompt=qa_prompt) + cypher_generation_chain = LLMChain(llm=llm, prompt=cypher_prompt) + + return cls( + qa_chain=qa_chain, + cypher_generation_chain=cypher_generation_chain, + **kwargs, + ) + + def _call( + self, + inputs: Dict[str, Any], + run_manager: Optional[CallbackManagerForChainRun] = None, + ) -> Dict[str, str]: + """Generate Cypher statement, use it to look up in db and answer question.""" + _run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager() + callbacks = _run_manager.get_child() + question = inputs[self.input_key] + + generated_cypher = self.cypher_generation_chain.run( + {"question": question, "schema": self.graph.get_schema}, callbacks=callbacks + ) + + _run_manager.on_text("Generated Cypher:", end="\n", verbose=self.verbose) + _run_manager.on_text( + generated_cypher, color="green", end="\n", verbose=self.verbose + ) + context = self.graph.query(generated_cypher) + + _run_manager.on_text("Full Context:", end="\n", verbose=self.verbose) + _run_manager.on_text( + str(context), color="green", end="\n", verbose=self.verbose + ) + + result = self.qa_chain( + {"question": question, "context": context}, + callbacks=callbacks, + ) + return {self.output_key: result[self.qa_chain.output_key]} diff --git a/langchain/chains/graph_qa/prompts.py b/langchain/chains/graph_qa/prompts.py index df3f6f13..688d530a 100644 --- a/langchain/chains/graph_qa/prompts.py +++ b/langchain/chains/graph_qa/prompts.py @@ -72,6 +72,23 @@ NGQL_GENERATION_PROMPT = PromptTemplate( input_variables=["schema", "question"], template=NGQL_GENERATION_TEMPLATE ) +KUZU_EXTRA_INSTRUCTIONS = """ +Instructions: + +Generate statement with Kùzu Cypher dialect (rather than standard): +1. do not use `WHERE EXISTS` clause to check the existence of a property because Kùzu database has a fixed schema. +2. do not omit relationship pattern. Always use `()-[]->()` instead of `()->()`. +3. do not include any notes or comments even if the statement does not produce the expected result. +```\n""" + +KUZU_GENERATION_TEMPLATE = CYPHER_GENERATION_TEMPLATE.replace( + "Generate Cypher", "Generate Kùzu Cypher" +).replace("Instructions:", KUZU_EXTRA_INSTRUCTIONS) + +KUZU_GENERATION_PROMPT = PromptTemplate( + input_variables=["schema", "question"], template=KUZU_GENERATION_TEMPLATE +) + CYPHER_QA_TEMPLATE = """You are an assistant that helps to form nice and human understandable answers. The information part contains the provided information that you must use to construct an answer. The provided information is authorative, you must never doubt it or try to use your internal knowledge to correct it. diff --git a/langchain/graphs/__init__.py b/langchain/graphs/__init__.py index 138efa79..23c93b46 100644 --- a/langchain/graphs/__init__.py +++ b/langchain/graphs/__init__.py @@ -1,6 +1,7 @@ """Graph implementations.""" +from langchain.graphs.kuzu_graph import KuzuGraph from langchain.graphs.nebula_graph import NebulaGraph from langchain.graphs.neo4j_graph import Neo4jGraph from langchain.graphs.networkx_graph import NetworkxEntityGraph -__all__ = ["NetworkxEntityGraph", "Neo4jGraph", "NebulaGraph"] +__all__ = ["NetworkxEntityGraph", "Neo4jGraph", "NebulaGraph", "KuzuGraph"] diff --git a/langchain/graphs/kuzu_graph.py b/langchain/graphs/kuzu_graph.py new file mode 100644 index 00000000..85841165 --- /dev/null +++ b/langchain/graphs/kuzu_graph.py @@ -0,0 +1,90 @@ +from typing import Any, Dict, List + + +class KuzuGraph: + """Kùzu wrapper for graph operations.""" + + def __init__(self, db: Any, database: str = "kuzu") -> None: + try: + import kuzu + except ImportError: + raise ImportError( + "Could not import Kùzu python package." + "Please install Kùzu with `pip install kuzu`." + ) + self.db = db + self.conn = kuzu.Connection(self.db) + self.database = database + self.refresh_schema() + + @property + def get_schema(self) -> str: + """Returns the schema of the Kùzu database""" + return self.schema + + def query(self, query: str, params: dict = {}) -> List[Dict[str, Any]]: + """Query Kùzu database""" + params_list = [] + for param_name in params: + params_list.append([param_name, params[param_name]]) + result = self.conn.execute(query, params_list) + column_names = result.get_column_names() + return_list = [] + while result.has_next(): + row = result.get_next() + return_list.append(dict(zip(column_names, row))) + return return_list + + def refresh_schema(self) -> None: + """Refreshes the Kùzu graph schema information""" + node_properties = [] + node_table_names = self.conn._get_node_table_names() + for table_name in node_table_names: + current_table_schema = {"properties": [], "label": table_name} + properties = self.conn._get_node_property_names(table_name) + for property_name in properties: + property_type = properties[property_name]["type"] + list_type_flag = "" + if properties[property_name]["dimension"] > 0: + if "shape" in properties[property_name]: + for s in properties[property_name]["shape"]: + list_type_flag += "[%s]" % s + else: + for i in range(properties[property_name]["dimension"]): + list_type_flag += "[]" + property_type += list_type_flag + current_table_schema["properties"].append( + (property_name, property_type) + ) + node_properties.append(current_table_schema) + + relationships = [] + rel_tables = self.conn._get_rel_table_names() + for table in rel_tables: + relationships.append( + "(:%s)-[:%s]->(:%s)" % (table["src"], table["name"], table["dst"]) + ) + + rel_properties = [] + for table in rel_tables: + current_table_schema = {"properties": [], "label": table["name"]} + properties_text = self.conn._connection.get_rel_property_names( + table["name"] + ).split("\n") + for i, line in enumerate(properties_text): + # The first 3 lines defines src, dst and name, so we skip them + if i < 3: + continue + if not line: + continue + property_name, property_type = line.strip().split(" ") + current_table_schema["properties"].append( + (property_name, property_type) + ) + rel_properties.append(current_table_schema) + + self.schema = ( + f"Node properties: {node_properties}\n" + f"Relationships properties: {rel_properties}\n" + f"Relationships: {relationships}\n" + ) diff --git a/poetry.lock b/poetry.lock index 0cc68e5e..8de0b4ce 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry and should not be changed by hand. +# This file is automatically @generated by Poetry 1.4.2 and should not be changed by hand. [[package]] name = "absl-py" @@ -11473,13 +11473,13 @@ cffi = {version = ">=1.11", markers = "platform_python_implementation == \"PyPy\ cffi = ["cffi (>=1.11)"] [extras] -all = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "jina", "manifest-ml", "elasticsearch", "opensearch-py", "google-search-results", "faiss-cpu", "sentence-transformers", "transformers", "spacy", "nltk", "wikipedia", "beautifulsoup4", "tiktoken", "torch", "jinja2", "pinecone-client", "pinecone-text", "pymongo", "weaviate-client", "redis", "google-api-python-client", "google-auth", "wolframalpha", "qdrant-client", "tensorflow-text", "pypdf", "networkx", "nomic", "aleph-alpha-client", "deeplake", "pgvector", "psycopg2-binary", "pyowm", "pytesseract", "html2text", "atlassian-python-api", "gptcache", "duckduckgo-search", "arxiv", "azure-identity", "clickhouse-connect", "azure-cosmos", "lancedb", "langkit", "lark", "pexpect", "pyvespa", "O365", "jq", "docarray", "steamship", "pdfminer-six", "lxml", "requests-toolbelt", "neo4j", "openlm", "azure-ai-formrecognizer", "azure-ai-vision", "azure-cognitiveservices-speech", "momento", "singlestoredb", "tigrisdb", "nebula3-python", "awadb"] -azure = ["azure-identity", "azure-cosmos", "openai", "azure-core", "azure-ai-formrecognizer", "azure-ai-vision", "azure-cognitiveservices-speech", "azure-search-documents"] +all = ["O365", "aleph-alpha-client", "anthropic", "arxiv", "atlassian-python-api", "awadb", "azure-ai-formrecognizer", "azure-ai-vision", "azure-cognitiveservices-speech", "azure-cosmos", "azure-identity", "beautifulsoup4", "clickhouse-connect", "cohere", "deeplake", "docarray", "duckduckgo-search", "elasticsearch", "faiss-cpu", "google-api-python-client", "google-auth", "google-search-results", "gptcache", "html2text", "huggingface_hub", "jina", "jinja2", "jq", "lancedb", "langkit", "lark", "lxml", "manifest-ml", "momento", "nebula3-python", "neo4j", "networkx", "nlpcloud", "nltk", "nomic", "openai", "openlm", "opensearch-py", "pdfminer-six", "pexpect", "pgvector", "pinecone-client", "pinecone-text", "psycopg2-binary", "pymongo", "pyowm", "pypdf", "pytesseract", "pyvespa", "qdrant-client", "redis", "requests-toolbelt", "sentence-transformers", "singlestoredb", "spacy", "steamship", "tensorflow-text", "tigrisdb", "tiktoken", "torch", "transformers", "weaviate-client", "wikipedia", "wolframalpha"] +azure = ["azure-ai-formrecognizer", "azure-ai-vision", "azure-cognitiveservices-speech", "azure-core", "azure-cosmos", "azure-identity", "azure-search-documents", "openai"] cohere = ["cohere"] docarray = ["docarray"] embeddings = ["sentence-transformers"] -extended-testing = ["beautifulsoup4", "bibtexparser", "chardet", "jq", "pdfminer-six", "pgvector", "pypdf", "pymupdf", "pypdfium2", "tqdm", "lxml", "atlassian-python-api", "beautifulsoup4", "pandas", "telethon", "psychicapi", "zep-python", "gql", "requests-toolbelt", "html2text", "py-trello", "scikit-learn", "pyspark", "openai"] -llms = ["anthropic", "cohere", "openai", "openlm", "nlpcloud", "huggingface_hub", "manifest-ml", "torch", "transformers"] +extended-testing = ["atlassian-python-api", "beautifulsoup4", "beautifulsoup4", "bibtexparser", "chardet", "gql", "html2text", "jq", "lxml", "openai", "pandas", "pdfminer-six", "pgvector", "psychicapi", "py-trello", "pymupdf", "pypdf", "pypdfium2", "pyspark", "requests-toolbelt", "scikit-learn", "telethon", "tqdm", "zep-python"] +llms = ["anthropic", "cohere", "huggingface_hub", "manifest-ml", "nlpcloud", "openai", "openlm", "torch", "transformers"] openai = ["openai", "tiktoken"] qdrant = ["qdrant-client"] text-helpers = ["chardet"] diff --git a/tests/integration_tests/test_kuzu.py b/tests/integration_tests/test_kuzu.py new file mode 100644 index 00000000..c6cd36e3 --- /dev/null +++ b/tests/integration_tests/test_kuzu.py @@ -0,0 +1,56 @@ +import shutil +import tempfile +import unittest + +from langchain.graphs import KuzuGraph + +EXPECTED_SCHEMA = """ +Node properties: [{'properties': [('name', 'STRING')], 'label': 'Movie'}, {'properties': [('name', 'STRING'), ('birthDate', 'STRING')], 'label': 'Person'}] +Relationships properties: [{'properties': [], 'label': 'ActedIn'}] +Relationships: ['(:Person)-[:ActedIn]->(:Movie)'] +""" # noqa: E501 + + +class TestKuzu(unittest.TestCase): + def setUp(self) -> None: + try: + import kuzu + except ImportError as e: + raise ImportError( + "Cannot import Python package kuzu. Please install it by running " + "`pip install kuzu`." + ) from e + + self.tmpdir = tempfile.mkdtemp() + self.kuzu_database = kuzu.Database(self.tmpdir) + self.conn = kuzu.Connection(self.kuzu_database) + self.conn.execute("CREATE NODE TABLE Movie (name STRING, PRIMARY KEY(name))") + self.conn.execute("CREATE (:Movie {name: 'The Godfather'})") + self.conn.execute("CREATE (:Movie {name: 'The Godfather: Part II'})") + self.conn.execute( + "CREATE (:Movie {name: 'The Godfather Coda: The Death of Michael " + "Corleone'})" + ) + self.kuzu_graph = KuzuGraph(self.kuzu_database) + + def tearDown(self) -> None: + shutil.rmtree(self.tmpdir, ignore_errors=True) + + def test_query(self) -> None: + result = self.kuzu_graph.query("MATCH (n:Movie) RETURN n.name ORDER BY n.name") + excepted_result = [ + {"n.name": "The Godfather"}, + {"n.name": "The Godfather Coda: The Death of Michael Corleone"}, + {"n.name": "The Godfather: Part II"}, + ] + self.assertEqual(result, excepted_result) + + def test_refresh_schema(self) -> None: + self.conn.execute( + "CREATE NODE TABLE Person (name STRING, birthDate STRING, PRIMARY " + "KEY(name))" + ) + self.conn.execute("CREATE REL TABLE ActedIn (FROM Person TO Movie)") + self.kuzu_graph.refresh_schema() + schema = self.kuzu_graph.get_schema + self.assertEqual(schema, EXPECTED_SCHEMA)