From a01e8473f84c3ef08e5e22debf6f7b747dc7a335 Mon Sep 17 00:00:00 2001 From: Neli Hateva Date: Mon, 26 Feb 2024 05:05:21 +0200 Subject: [PATCH] community[patch]: Fix GraphSparqlQAChain so that it works with Ontotext GraphDB (#15009) - **Description:** Introduce a new parameter `graph_kwargs` to `RdfGraph` - parameters used to initialize the `rdflib.Graph` if `query_endpoint` is set. Also, do not set `rdflib.graph.DATASET_DEFAULT_GRAPH_ID` as default value for the `rdflib.Graph` `identifier` if `query_endpoint` is set. - **Issue:** N/A - **Dependencies:** N/A - **Twitter handle:** N/A --- .../langchain_community/graphs/rdf_graph.py | 9 +- .../Dockerfile | 5 +- .../berners-lee-card.ttl | 114 ++++++++++ .../config-langchain.ttl | 46 ++++ .../{config.ttl => config-starwars.ttl} | 0 .../graphdb_create.sh | 6 +- .../chains/test_graph_database_sparql.py | 214 +++++++++++++++--- 7 files changed, 363 insertions(+), 31 deletions(-) create mode 100644 libs/langchain/tests/integration_tests/chains/docker-compose-ontotext-graphdb/berners-lee-card.ttl create mode 100644 libs/langchain/tests/integration_tests/chains/docker-compose-ontotext-graphdb/config-langchain.ttl rename libs/langchain/tests/integration_tests/chains/docker-compose-ontotext-graphdb/{config.ttl => config-starwars.ttl} (100%) diff --git a/libs/community/langchain_community/graphs/rdf_graph.py b/libs/community/langchain_community/graphs/rdf_graph.py index 1a2b89ba87..d1061cc697 100644 --- a/libs/community/langchain_community/graphs/rdf_graph.py +++ b/libs/community/langchain_community/graphs/rdf_graph.py @@ -2,6 +2,7 @@ from __future__ import annotations from typing import ( TYPE_CHECKING, + Dict, List, Optional, ) @@ -115,6 +116,7 @@ class RdfGraph: update_endpoint: Optional[str] = None, standard: Optional[str] = "rdf", local_copy: Optional[str] = None, + graph_kwargs: Optional[Dict] = None, ) -> None: """ Set up the RDFlib graph @@ -125,6 +127,9 @@ class RdfGraph: :param update_endpoint: SPARQL endpoint for UPDATE queries, write access :param standard: RDF, RDFS, or OWL :param local_copy: new local copy for storing changes + :param graph_kwargs: Additional rdflib.Graph specific kwargs + that will be used to initialize it, + if query_endpoint is provided. """ self.source_file = source_file self.serialization = serialization @@ -135,7 +140,6 @@ class RdfGraph: try: import rdflib - from rdflib.graph import DATASET_DEFAULT_GRAPH_ID as default from rdflib.plugins.stores import sparqlstore except ImportError: raise ValueError( @@ -177,7 +181,8 @@ class RdfGraph: else: self._store = sparqlstore.SPARQLUpdateStore() self._store.open((query_endpoint, update_endpoint)) - self.graph = rdflib.Graph(self._store, identifier=default) + graph_kwargs = graph_kwargs or {} + self.graph = rdflib.Graph(self._store, **graph_kwargs) # Verify that the graph was loaded if not len(self.graph): diff --git a/libs/langchain/tests/integration_tests/chains/docker-compose-ontotext-graphdb/Dockerfile b/libs/langchain/tests/integration_tests/chains/docker-compose-ontotext-graphdb/Dockerfile index 29d0b239a8..5d8c968183 100644 --- a/libs/langchain/tests/integration_tests/chains/docker-compose-ontotext-graphdb/Dockerfile +++ b/libs/langchain/tests/integration_tests/chains/docker-compose-ontotext-graphdb/Dockerfile @@ -1,6 +1,9 @@ FROM ontotext/graphdb:10.5.1 RUN mkdir -p /opt/graphdb/dist/data/repositories/starwars -COPY config.ttl /opt/graphdb/dist/data/repositories/starwars/ +COPY config-starwars.ttl /opt/graphdb/dist/data/repositories/starwars/config.ttl +RUN mkdir -p /opt/graphdb/dist/data/repositories/langchain +COPY config-langchain.ttl /opt/graphdb/dist/data/repositories/langchain/config.ttl COPY starwars-data.trig / +COPY berners-lee-card.ttl / COPY graphdb_create.sh /run.sh ENTRYPOINT bash /run.sh \ No newline at end of file diff --git a/libs/langchain/tests/integration_tests/chains/docker-compose-ontotext-graphdb/berners-lee-card.ttl b/libs/langchain/tests/integration_tests/chains/docker-compose-ontotext-graphdb/berners-lee-card.ttl new file mode 100644 index 0000000000..503138dff8 --- /dev/null +++ b/libs/langchain/tests/integration_tests/chains/docker-compose-ontotext-graphdb/berners-lee-card.ttl @@ -0,0 +1,114 @@ +@prefix : . +@prefix Be: . +@prefix Pub: . +@prefix blog: . +@prefix card: . +@prefix cc: . +@prefix cert: . +@prefix con: . +@prefix dc: . +@prefix dct: . +@prefix doap: . +@prefix geo1: . +@prefix ldp: . +@prefix s: . +@prefix schema1: . +@prefix sioc: . +@prefix solid: . +@prefix space: . +@prefix vcard: . +@prefix w3c: . +@prefix xsd: . + + doap:developer card:i . + + :maker card:i . + + :member card:i . + + dc:title "Identity, Reference and the Web workshop 2006" ; + con:participant card:i . + + s:label "The Next Wave of the Web (Plenary Panel)" ; + con:participant card:i . + + doap:developer card:i . + + dct:title "Designing the Web for an Open Society" ; + :maker card:i . + +w3c:W3C :member card:i . + + dc:title "Design Issues for the World Wide Web" ; + :maker card:i . + +Be:card a :PersonalProfileDocument ; + cc:license ; + dc:title "Tim Berners-Lee's FOAF file" ; + :maker card:i ; + :primaryTopic card:i . + +blog:4 dc:title "timbl's blog on DIG" ; + s:seeAlso ; + :maker card:i . + +Pub:friends.ttl a :PersonalProfileDocument ; + cc:license ; + dc:title "Tim Berners-Lee's editable profile" ; + :maker card:i ; + :primaryTopic card:i . + +card:i a con:Male, + :Person ; + s:label "Tim Berners-Lee" ; + sioc:avatar ; + schema1:owns ; + s:seeAlso Pub:friends.ttl ; + con:assistant card:amy ; + con:homePage Be: ; + con:office [ con:address [ con:city "Cambridge" ; + con:country "USA" ; + con:postalCode "02139" ; + con:street "32 Vassar Street" ; + con:street2 "MIT CSAIL Building 32" ] ; + geo1:location [ geo1:lat "42.361860" ; + geo1:long "-71.091840" ] ] ; + con:preferredURI "https://www.w3.org/People/Berners-Lee/card#i" ; + con:publicHomePage Be: ; + vcard:fn "Tim Berners-Lee" ; + vcard:hasAddress [ a vcard:Work ; + vcard:locality "Cambridge" ; + vcard:postal-code "02139" ; + vcard:region "MA" ; + vcard:street-address "32 Vassar Street" ] ; + cert:key [ a cert:RSAPublicKey ; + cert:exponent 65537 ; + cert:modulus "ebe99c737bd3670239600547e5e2eb1d1497da39947b6576c3c44ffeca32cf0f2f7cbee3c47001278a90fc7fc5bcf292f741eb1fcd6bbe7f90650afb519cf13e81b2bffc6e02063ee5a55781d420b1dfaf61c15758480e66d47fb0dcb5fa7b9f7f1052e5ccbd01beee9553c3b6b51f4daf1fce991294cd09a3d1d636bc6c7656e4455d0aff06daec740ed0084aa6866fcae1359de61cc12dbe37c8fa42e977c6e727a8258bb9a3f265b27e3766fe0697f6aa0bcc81c3f026e387bd7bbc81580dc1853af2daa099186a9f59da526474ef6ec0a3d84cf400be3261b6b649dea1f78184862d34d685d2d587f09acc14cd8e578fdd2283387821296f0af39b8d8845"^^xsd:hexBinary ] ; + ldp:inbox Pub:Inbox ; + space:preferencesFile ; + space:storage Pub:, + , + ; + solid:editableProfile Pub:friends.ttl ; + solid:oidcIssuer ; + solid:profileBackgroundColor "#ffffff" ; + solid:profileHighlightColor "#00467E" ; + solid:publicTypeIndex Pub:PublicTypeIndex.ttl ; + :account , + , + ; + :based_near [ geo1:lat "42.361860" ; + geo1:long "-71.091840" ] ; + :family_name "Berners-Lee" ; + :givenname "Timothy" ; + :homepage Be: ; + :img ; + :mbox ; + :mbox_sha1sum "965c47c5a70db7407210cef6e4e6f5374a525c5c" ; + :name "Timothy Berners-Lee" ; + :nick "TimBL", + "timbl" ; + :openid Be: ; + :title "Sir" ; + :weblog blog:4 ; + :workplaceHomepage . diff --git a/libs/langchain/tests/integration_tests/chains/docker-compose-ontotext-graphdb/config-langchain.ttl b/libs/langchain/tests/integration_tests/chains/docker-compose-ontotext-graphdb/config-langchain.ttl new file mode 100644 index 0000000000..dcbdeeebe1 --- /dev/null +++ b/libs/langchain/tests/integration_tests/chains/docker-compose-ontotext-graphdb/config-langchain.ttl @@ -0,0 +1,46 @@ +@prefix rdfs: . +@prefix rep: . +@prefix sr: . +@prefix sail: . +@prefix graphdb: . + +[] a rep:Repository ; + rep:repositoryID "langchain" ; + rdfs:label "" ; + rep:repositoryImpl [ + rep:repositoryType "graphdb:SailRepository" ; + sr:sailImpl [ + sail:sailType "graphdb:Sail" ; + + graphdb:read-only "false" ; + + # Inference and Validation + graphdb:ruleset "empty" ; + graphdb:disable-sameAs "true" ; + graphdb:check-for-inconsistencies "false" ; + + # Indexing + graphdb:entity-id-size "32" ; + graphdb:enable-context-index "false" ; + graphdb:enablePredicateList "true" ; + graphdb:enable-fts-index "false" ; + graphdb:fts-indexes ("default" "iri") ; + graphdb:fts-string-literals-index "default" ; + graphdb:fts-iris-index "none" ; + + # Queries and Updates + graphdb:query-timeout "0" ; + graphdb:throw-QueryEvaluationException-on-timeout "false" ; + graphdb:query-limit-results "0" ; + + # Settable in the file but otherwise hidden in the UI and in the RDF4J console + graphdb:base-URL "http://example.org/owlim#" ; + graphdb:defaultNS "" ; + graphdb:imports "" ; + graphdb:repository-type "file-repository" ; + graphdb:storage-folder "storage" ; + graphdb:entity-index-size "10000000" ; + graphdb:in-memory-literal-properties "true" ; + graphdb:enable-literal-index "true" ; + ] + ]. diff --git a/libs/langchain/tests/integration_tests/chains/docker-compose-ontotext-graphdb/config.ttl b/libs/langchain/tests/integration_tests/chains/docker-compose-ontotext-graphdb/config-starwars.ttl similarity index 100% rename from libs/langchain/tests/integration_tests/chains/docker-compose-ontotext-graphdb/config.ttl rename to libs/langchain/tests/integration_tests/chains/docker-compose-ontotext-graphdb/config-starwars.ttl diff --git a/libs/langchain/tests/integration_tests/chains/docker-compose-ontotext-graphdb/graphdb_create.sh b/libs/langchain/tests/integration_tests/chains/docker-compose-ontotext-graphdb/graphdb_create.sh index 9e01c9ba48..48e2ce8d6f 100644 --- a/libs/langchain/tests/integration_tests/chains/docker-compose-ontotext-graphdb/graphdb_create.sh +++ b/libs/langchain/tests/integration_tests/chains/docker-compose-ontotext-graphdb/graphdb_create.sh @@ -1,5 +1,4 @@ #! /bin/bash -REPOSITORY_ID="starwars" GRAPHDB_URI="http://localhost:7200/" echo -e "\nUsing GraphDB: ${GRAPHDB_URI}" @@ -24,7 +23,10 @@ function waitGraphDBStart { function loadData { echo -e "\nImporting starwars-data.trig" - curl -X POST -H "Content-Type: application/x-trig" -T /starwars-data.trig ${GRAPHDB_URI}/repositories/${REPOSITORY_ID}/statements + curl -X POST -H "Content-Type: application/x-trig" -T /starwars-data.trig ${GRAPHDB_URI}/repositories/starwars/statements + + echo -e "\nImporting berners-lee-card.ttl" + curl -X POST -H "Content-Type:application/x-turtle" -T /berners-lee-card.ttl ${GRAPHDB_URI}/repositories/langchain/statements } startGraphDB & diff --git a/libs/langchain/tests/integration_tests/chains/test_graph_database_sparql.py b/libs/langchain/tests/integration_tests/chains/test_graph_database_sparql.py index 844e0ce86f..ffa1afae04 100644 --- a/libs/langchain/tests/integration_tests/chains/test_graph_database_sparql.py +++ b/libs/langchain/tests/integration_tests/chains/test_graph_database_sparql.py @@ -1,11 +1,18 @@ """Test RDF/ SPARQL Graph Database Chain.""" -import os +import pathlib +import re +from unittest.mock import MagicMock, Mock from langchain_community.graphs import RdfGraph -from langchain_community.llms.openai import OpenAI +from langchain.chains import LLMChain from langchain.chains.graph_qa.sparql import GraphSparqlQAChain +""" +cd libs/langchain/tests/integration_tests/chains/docker-compose-ontotext-graphdb +./start.sh +""" + def test_connect_file_rdf() -> None: """ @@ -28,6 +35,8 @@ def test_sparql_select() -> None: """ Test for generating and executing simple SPARQL SELECT query. """ + from langchain_openai import ChatOpenAI + berners_lee_card = "http://www.w3.org/People/Berners-Lee/card" graph = RdfGraph( @@ -35,33 +44,93 @@ def test_sparql_select() -> None: standard="rdf", ) - chain = GraphSparqlQAChain.from_llm(OpenAI(temperature=0), graph=graph) - output = chain.run("What is Tim Berners-Lee's work homepage?") - expected_output = ( - " The work homepage of Tim Berners-Lee is " - "http://www.w3.org/People/Berners-Lee/." + question = "What is Tim Berners-Lee's work homepage?" + answer = "Tim Berners-Lee's work homepage is http://www.w3.org/People/Berners-Lee/." + + chain = GraphSparqlQAChain.from_llm( + Mock(ChatOpenAI), + graph=graph, + ) + chain.sparql_intent_chain = Mock(LLMChain) + chain.sparql_generation_select_chain = Mock(LLMChain) + chain.sparql_generation_update_chain = Mock(LLMChain) + + chain.sparql_intent_chain.run = Mock(return_value="SELECT") + chain.sparql_generation_select_chain.run = Mock( + return_value="""PREFIX foaf: + PREFIX rdfs: + SELECT ?workHomepage + WHERE { + ?person rdfs:label "Tim Berners-Lee" . + ?person foaf:workplaceHomepage ?workHomepage . + }""" ) - assert output == expected_output + chain.qa_chain = MagicMock( + return_value={ + "text": answer, + "prompt": question, + "context": [], + } + ) + chain.qa_chain.output_key = "text" + + output = chain.invoke({chain.input_key: question})[chain.output_key] + assert output == answer + + assert chain.sparql_intent_chain.run.call_count == 1 + assert chain.sparql_generation_select_chain.run.call_count == 1 + assert chain.sparql_generation_update_chain.run.call_count == 0 + assert chain.qa_chain.call_count == 1 -def test_sparql_insert() -> None: +def test_sparql_insert(tmp_path: pathlib.Path) -> None: """ Test for generating and executing simple SPARQL INSERT query. """ + from langchain_openai import ChatOpenAI + berners_lee_card = "http://www.w3.org/People/Berners-Lee/card" - _local_copy = "test.ttl" + local_copy = tmp_path / "test.ttl" graph = RdfGraph( source_file=berners_lee_card, standard="rdf", - local_copy=_local_copy, + local_copy=str(local_copy), ) - chain = GraphSparqlQAChain.from_llm(OpenAI(temperature=0), graph=graph) - chain.run( + query = ( "Save that the person with the name 'Timothy Berners-Lee' " "has a work homepage at 'http://www.w3.org/foo/bar/'" ) + + chain = GraphSparqlQAChain.from_llm( + Mock(ChatOpenAI), + graph=graph, + ) + chain.sparql_intent_chain = Mock(LLMChain) + chain.sparql_generation_select_chain = Mock(LLMChain) + chain.sparql_generation_update_chain = Mock(LLMChain) + chain.qa_chain = Mock(LLMChain) + + chain.sparql_intent_chain.run = Mock(return_value="UPDATE") + chain.sparql_generation_update_chain.run = Mock( + return_value="""PREFIX foaf: + INSERT { + ?p foaf:workplaceHomepage . + } + WHERE { + ?p foaf:name "Timothy Berners-Lee" . + }""" + ) + + output = chain.invoke({chain.input_key: query})[chain.output_key] + assert output == "Successfully inserted triples into the graph." + + assert chain.sparql_intent_chain.run.call_count == 1 + assert chain.sparql_generation_select_chain.run.call_count == 0 + assert chain.sparql_generation_update_chain.run.call_count == 1 + assert chain.qa_chain.call_count == 0 + query = ( """PREFIX foaf: \n""" """SELECT ?hp\n""" @@ -73,18 +142,14 @@ def test_sparql_insert() -> None: output = graph.query(query) assert len(output) == 2 - # clean up - try: - os.remove(_local_copy) - except OSError: - pass - def test_sparql_select_return_query() -> None: """ Test for generating and executing simple SPARQL SELECT query and returning the generated SPARQL query. """ + from langchain_openai import ChatOpenAI + berners_lee_card = "http://www.w3.org/People/Berners-Lee/card" graph = RdfGraph( @@ -92,15 +157,112 @@ def test_sparql_select_return_query() -> None: standard="rdf", ) + question = "What is Tim Berners-Lee's work homepage?" + answer = "Tim Berners-Lee's work homepage is http://www.w3.org/People/Berners-Lee/." + chain = GraphSparqlQAChain.from_llm( - OpenAI(temperature=0), graph=graph, return_sparql_query=True + Mock(ChatOpenAI), + graph=graph, + return_sparql_query=True, ) - output = chain("What is Tim Berners-Lee's work homepage?") + chain.sparql_intent_chain = Mock(LLMChain) + chain.sparql_generation_select_chain = Mock(LLMChain) + chain.sparql_generation_update_chain = Mock(LLMChain) - # Verify the expected answer - expected_output = ( - " The work homepage of Tim Berners-Lee is " - "http://www.w3.org/People/Berners-Lee/." + chain.sparql_intent_chain.run = Mock(return_value="SELECT") + chain.sparql_generation_select_chain.run = Mock( + return_value="""PREFIX foaf: + PREFIX rdfs: + SELECT ?workHomepage + WHERE { + ?person rdfs:label "Tim Berners-Lee" . + ?person foaf:workplaceHomepage ?workHomepage . + }""" + ) + chain.qa_chain = MagicMock( + return_value={ + "text": answer, + "prompt": question, + "context": [], + } ) - assert output["result"] == expected_output + chain.qa_chain.output_key = "text" + + output = chain.invoke({chain.input_key: question}) + assert output[chain.output_key] == answer assert "sparql_query" in output + + assert chain.sparql_intent_chain.run.call_count == 1 + assert chain.sparql_generation_select_chain.run.call_count == 1 + assert chain.sparql_generation_update_chain.run.call_count == 0 + assert chain.qa_chain.call_count == 1 + + +def test_loading_schema_from_ontotext_graphdb() -> None: + graph = RdfGraph( + query_endpoint="http://localhost:7200/repositories/langchain", + graph_kwargs={"bind_namespaces": "none"}, + ) + schema = graph.get_schema + prefix = ( + "In the following, each IRI is followed by the local name and " + "optionally its description in parentheses. \n" + "The RDF graph supports the following node types:" + ) + assert schema.startswith(prefix) + + infix = "The RDF graph supports the following relationships:" + assert infix in schema + + classes = schema[len(prefix) : schema.index(infix)] + assert len(re.findall("<[^>]+> \\([^)]+\\)", classes)) == 5 + + relationships = schema[schema.index(infix) + len(infix) :] + assert len(re.findall("<[^>]+> \\([^)]+\\)", relationships)) == 58 + + +def test_graph_qa_chain_with_ontotext_graphdb() -> None: + from langchain_openai import ChatOpenAI + + question = "What is Tim Berners-Lee's work homepage?" + answer = "Tim Berners-Lee's work homepage is http://www.w3.org/People/Berners-Lee/." + + graph = RdfGraph( + query_endpoint="http://localhost:7200/repositories/langchain", + graph_kwargs={"bind_namespaces": "none"}, + ) + + chain = GraphSparqlQAChain.from_llm( + Mock(ChatOpenAI), + graph=graph, + ) + chain.sparql_intent_chain = Mock(LLMChain) + chain.sparql_generation_select_chain = Mock(LLMChain) + chain.sparql_generation_update_chain = Mock(LLMChain) + + chain.sparql_intent_chain.run = Mock(return_value="SELECT") + chain.sparql_generation_select_chain.run = Mock( + return_value="""PREFIX foaf: + PREFIX rdfs: + SELECT ?workHomepage + WHERE { + ?person rdfs:label "Tim Berners-Lee" . + ?person foaf:workplaceHomepage ?workHomepage . + }""" + ) + chain.qa_chain = MagicMock( + return_value={ + "text": answer, + "prompt": question, + "context": [], + } + ) + chain.qa_chain.output_key = "text" + + output = chain.invoke({chain.input_key: question})[chain.output_key] + assert output == answer + + assert chain.sparql_intent_chain.run.call_count == 1 + assert chain.sparql_generation_select_chain.run.call_count == 1 + assert chain.sparql_generation_update_chain.run.call_count == 0 + assert chain.qa_chain.call_count == 1