community[patch]: Fix GraphSparqlQAChain so that it works with Ontotext GraphDB (#15009)

- **Description:** Introduce a new parameter `graph_kwargs` to
`RdfGraph` - parameters used to initialize the `rdflib.Graph` if
`query_endpoint` is set. Also, do not set
`rdflib.graph.DATASET_DEFAULT_GRAPH_ID` as default value for the
`rdflib.Graph` `identifier` if `query_endpoint` is set.
  - **Issue:** N/A
  - **Dependencies:** N/A
  - **Twitter handle:** N/A
pull/18107/head
Neli Hateva 4 months ago committed by GitHub
parent 4d6cd5b46a
commit a01e8473f8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -2,6 +2,7 @@ from __future__ import annotations
from typing import ( from typing import (
TYPE_CHECKING, TYPE_CHECKING,
Dict,
List, List,
Optional, Optional,
) )
@ -115,6 +116,7 @@ class RdfGraph:
update_endpoint: Optional[str] = None, update_endpoint: Optional[str] = None,
standard: Optional[str] = "rdf", standard: Optional[str] = "rdf",
local_copy: Optional[str] = None, local_copy: Optional[str] = None,
graph_kwargs: Optional[Dict] = None,
) -> None: ) -> None:
""" """
Set up the RDFlib graph Set up the RDFlib graph
@ -125,6 +127,9 @@ class RdfGraph:
:param update_endpoint: SPARQL endpoint for UPDATE queries, write access :param update_endpoint: SPARQL endpoint for UPDATE queries, write access
:param standard: RDF, RDFS, or OWL :param standard: RDF, RDFS, or OWL
:param local_copy: new local copy for storing changes :param local_copy: new local copy for storing changes
:param graph_kwargs: Additional rdflib.Graph specific kwargs
that will be used to initialize it,
if query_endpoint is provided.
""" """
self.source_file = source_file self.source_file = source_file
self.serialization = serialization self.serialization = serialization
@ -135,7 +140,6 @@ class RdfGraph:
try: try:
import rdflib import rdflib
from rdflib.graph import DATASET_DEFAULT_GRAPH_ID as default
from rdflib.plugins.stores import sparqlstore from rdflib.plugins.stores import sparqlstore
except ImportError: except ImportError:
raise ValueError( raise ValueError(
@ -177,7 +181,8 @@ class RdfGraph:
else: else:
self._store = sparqlstore.SPARQLUpdateStore() self._store = sparqlstore.SPARQLUpdateStore()
self._store.open((query_endpoint, update_endpoint)) self._store.open((query_endpoint, update_endpoint))
self.graph = rdflib.Graph(self._store, identifier=default) graph_kwargs = graph_kwargs or {}
self.graph = rdflib.Graph(self._store, **graph_kwargs)
# Verify that the graph was loaded # Verify that the graph was loaded
if not len(self.graph): if not len(self.graph):

@ -1,6 +1,9 @@
FROM ontotext/graphdb:10.5.1 FROM ontotext/graphdb:10.5.1
RUN mkdir -p /opt/graphdb/dist/data/repositories/starwars RUN mkdir -p /opt/graphdb/dist/data/repositories/starwars
COPY config.ttl /opt/graphdb/dist/data/repositories/starwars/ COPY config-starwars.ttl /opt/graphdb/dist/data/repositories/starwars/config.ttl
RUN mkdir -p /opt/graphdb/dist/data/repositories/langchain
COPY config-langchain.ttl /opt/graphdb/dist/data/repositories/langchain/config.ttl
COPY starwars-data.trig / COPY starwars-data.trig /
COPY berners-lee-card.ttl /
COPY graphdb_create.sh /run.sh COPY graphdb_create.sh /run.sh
ENTRYPOINT bash /run.sh ENTRYPOINT bash /run.sh

@ -0,0 +1,114 @@
@prefix : <http://xmlns.com/foaf/0.1/> .
@prefix Be: <https://www.w3.org/People/Berners-Lee/> .
@prefix Pub: <https://timbl.com/timbl/Public/> .
@prefix blog: <http://dig.csail.mit.edu/breadcrumbs/blog/> .
@prefix card: <https://www.w3.org/People/Berners-Lee/card#> .
@prefix cc: <http://creativecommons.org/ns#> .
@prefix cert: <http://www.w3.org/ns/auth/cert#> .
@prefix con: <http://www.w3.org/2000/10/swap/pim/contact#> .
@prefix dc: <http://purl.org/dc/elements/1.1/> .
@prefix dct: <http://purl.org/dc/terms/> .
@prefix doap: <http://usefulinc.com/ns/doap#> .
@prefix geo1: <http://www.w3.org/2003/01/geo/wgs84_pos#> .
@prefix ldp: <http://www.w3.org/ns/ldp#> .
@prefix s: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix schema1: <http://schema.org/> .
@prefix sioc: <http://rdfs.org/sioc/ns#> .
@prefix solid: <http://www.w3.org/ns/solid/terms#> .
@prefix space: <http://www.w3.org/ns/pim/space#> .
@prefix vcard: <http://www.w3.org/2006/vcard/ns#> .
@prefix w3c: <http://www.w3.org/data#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
<http://dig.csail.mit.edu/2005/ajar/ajaw/data#Tabulator> doap:developer card:i .
<http://dig.csail.mit.edu/2007/01/camp/data#course> :maker card:i .
<http://dig.csail.mit.edu/data#DIG> :member card:i .
<http://wiki.ontoworld.org/index.php/_IRW2006> dc:title "Identity, Reference and the Web workshop 2006" ;
con:participant card:i .
<http://www.ecs.soton.ac.uk/~dt2/dlstuff/www2006_data#panel-panelk01> s:label "The Next Wave of the Web (Plenary Panel)" ;
con:participant card:i .
<http://www.w3.org/2000/10/swap/data#Cwm> doap:developer card:i .
<http://www.w3.org/2011/Talks/0331-hyderabad-tbl/data#talk> dct:title "Designing the Web for an Open Society" ;
:maker card:i .
w3c:W3C :member card:i .
<https://www.w3.org/DesignIssues/Overview.html> dc:title "Design Issues for the World Wide Web" ;
:maker card:i .
Be:card a :PersonalProfileDocument ;
cc:license <http://creativecommons.org/licenses/by-nc/3.0/> ;
dc:title "Tim Berners-Lee's FOAF file" ;
:maker card:i ;
:primaryTopic card:i .
blog:4 dc:title "timbl's blog on DIG" ;
s:seeAlso <http://dig.csail.mit.edu/breadcrumbs/blog/feed/4> ;
:maker card:i .
Pub:friends.ttl a :PersonalProfileDocument ;
cc:license <http://creativecommons.org/licenses/by-nc/3.0/> ;
dc:title "Tim Berners-Lee's editable profile" ;
:maker card:i ;
:primaryTopic card:i .
card:i a con:Male,
:Person ;
s:label "Tim Berners-Lee" ;
sioc:avatar <https://www.w3.org/People/Berners-Lee/images/timbl-image-by-Coz-cropped.jpg> ;
schema1:owns <https://timblbot.inrupt.net/profile/card#me> ;
s:seeAlso Pub:friends.ttl ;
con:assistant card:amy ;
con:homePage Be: ;
con:office [ con:address [ con:city "Cambridge" ;
con:country "USA" ;
con:postalCode "02139" ;
con:street "32 Vassar Street" ;
con:street2 "MIT CSAIL Building 32" ] ;
geo1:location [ geo1:lat "42.361860" ;
geo1:long "-71.091840" ] ] ;
con:preferredURI "https://www.w3.org/People/Berners-Lee/card#i" ;
con:publicHomePage Be: ;
vcard:fn "Tim Berners-Lee" ;
vcard:hasAddress [ a vcard:Work ;
vcard:locality "Cambridge" ;
vcard:postal-code "02139" ;
vcard:region "MA" ;
vcard:street-address "32 Vassar Street" ] ;
cert:key [ a cert:RSAPublicKey ;
cert:exponent 65537 ;
cert:modulus "ebe99c737bd3670239600547e5e2eb1d1497da39947b6576c3c44ffeca32cf0f2f7cbee3c47001278a90fc7fc5bcf292f741eb1fcd6bbe7f90650afb519cf13e81b2bffc6e02063ee5a55781d420b1dfaf61c15758480e66d47fb0dcb5fa7b9f7f1052e5ccbd01beee9553c3b6b51f4daf1fce991294cd09a3d1d636bc6c7656e4455d0aff06daec740ed0084aa6866fcae1359de61cc12dbe37c8fa42e977c6e727a8258bb9a3f265b27e3766fe0697f6aa0bcc81c3f026e387bd7bbc81580dc1853af2daa099186a9f59da526474ef6ec0a3d84cf400be3261b6b649dea1f78184862d34d685d2d587f09acc14cd8e578fdd2283387821296f0af39b8d8845"^^xsd:hexBinary ] ;
ldp:inbox Pub:Inbox ;
space:preferencesFile <https://timbl.com/timbl/Data/preferences.n3> ;
space:storage Pub:,
<https://timbl.inrupt.net/>,
<https://timbl.solid.community/> ;
solid:editableProfile Pub:friends.ttl ;
solid:oidcIssuer <https://timbl.com> ;
solid:profileBackgroundColor "#ffffff" ;
solid:profileHighlightColor "#00467E" ;
solid:publicTypeIndex Pub:PublicTypeIndex.ttl ;
:account <http://en.wikipedia.org/wiki/User:Timbl>,
<http://twitter.com/timberners_lee>,
<http://www.reddit.com/user/timbl/> ;
:based_near [ geo1:lat "42.361860" ;
geo1:long "-71.091840" ] ;
:family_name "Berners-Lee" ;
:givenname "Timothy" ;
:homepage Be: ;
:img <https://www.w3.org/Press/Stock/Berners-Lee/2001-europaeum-eighth.jpg> ;
:mbox <mailto:timbl@w3.org> ;
:mbox_sha1sum "965c47c5a70db7407210cef6e4e6f5374a525c5c" ;
:name "Timothy Berners-Lee" ;
:nick "TimBL",
"timbl" ;
:openid Be: ;
:title "Sir" ;
:weblog blog:4 ;
:workplaceHomepage <https://www.w3.org/> .

@ -0,0 +1,46 @@
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>.
@prefix rep: <http://www.openrdf.org/config/repository#>.
@prefix sr: <http://www.openrdf.org/config/repository/sail#>.
@prefix sail: <http://www.openrdf.org/config/sail#>.
@prefix graphdb: <http://www.ontotext.com/config/graphdb#>.
[] a rep:Repository ;
rep:repositoryID "langchain" ;
rdfs:label "" ;
rep:repositoryImpl [
rep:repositoryType "graphdb:SailRepository" ;
sr:sailImpl [
sail:sailType "graphdb:Sail" ;
graphdb:read-only "false" ;
# Inference and Validation
graphdb:ruleset "empty" ;
graphdb:disable-sameAs "true" ;
graphdb:check-for-inconsistencies "false" ;
# Indexing
graphdb:entity-id-size "32" ;
graphdb:enable-context-index "false" ;
graphdb:enablePredicateList "true" ;
graphdb:enable-fts-index "false" ;
graphdb:fts-indexes ("default" "iri") ;
graphdb:fts-string-literals-index "default" ;
graphdb:fts-iris-index "none" ;
# Queries and Updates
graphdb:query-timeout "0" ;
graphdb:throw-QueryEvaluationException-on-timeout "false" ;
graphdb:query-limit-results "0" ;
# Settable in the file but otherwise hidden in the UI and in the RDF4J console
graphdb:base-URL "http://example.org/owlim#" ;
graphdb:defaultNS "" ;
graphdb:imports "" ;
graphdb:repository-type "file-repository" ;
graphdb:storage-folder "storage" ;
graphdb:entity-index-size "10000000" ;
graphdb:in-memory-literal-properties "true" ;
graphdb:enable-literal-index "true" ;
]
].

@ -1,5 +1,4 @@
#! /bin/bash #! /bin/bash
REPOSITORY_ID="starwars"
GRAPHDB_URI="http://localhost:7200/" GRAPHDB_URI="http://localhost:7200/"
echo -e "\nUsing GraphDB: ${GRAPHDB_URI}" echo -e "\nUsing GraphDB: ${GRAPHDB_URI}"
@ -24,7 +23,10 @@ function waitGraphDBStart {
function loadData { function loadData {
echo -e "\nImporting starwars-data.trig" echo -e "\nImporting starwars-data.trig"
curl -X POST -H "Content-Type: application/x-trig" -T /starwars-data.trig ${GRAPHDB_URI}/repositories/${REPOSITORY_ID}/statements curl -X POST -H "Content-Type: application/x-trig" -T /starwars-data.trig ${GRAPHDB_URI}/repositories/starwars/statements
echo -e "\nImporting berners-lee-card.ttl"
curl -X POST -H "Content-Type:application/x-turtle" -T /berners-lee-card.ttl ${GRAPHDB_URI}/repositories/langchain/statements
} }
startGraphDB & startGraphDB &

@ -1,11 +1,18 @@
"""Test RDF/ SPARQL Graph Database Chain.""" """Test RDF/ SPARQL Graph Database Chain."""
import os import pathlib
import re
from unittest.mock import MagicMock, Mock
from langchain_community.graphs import RdfGraph from langchain_community.graphs import RdfGraph
from langchain_community.llms.openai import OpenAI
from langchain.chains import LLMChain
from langchain.chains.graph_qa.sparql import GraphSparqlQAChain from langchain.chains.graph_qa.sparql import GraphSparqlQAChain
"""
cd libs/langchain/tests/integration_tests/chains/docker-compose-ontotext-graphdb
./start.sh
"""
def test_connect_file_rdf() -> None: def test_connect_file_rdf() -> None:
""" """
@ -28,6 +35,8 @@ def test_sparql_select() -> None:
""" """
Test for generating and executing simple SPARQL SELECT query. Test for generating and executing simple SPARQL SELECT query.
""" """
from langchain_openai import ChatOpenAI
berners_lee_card = "http://www.w3.org/People/Berners-Lee/card" berners_lee_card = "http://www.w3.org/People/Berners-Lee/card"
graph = RdfGraph( graph = RdfGraph(
@ -35,33 +44,93 @@ def test_sparql_select() -> None:
standard="rdf", standard="rdf",
) )
chain = GraphSparqlQAChain.from_llm(OpenAI(temperature=0), graph=graph) question = "What is Tim Berners-Lee's work homepage?"
output = chain.run("What is Tim Berners-Lee's work homepage?") answer = "Tim Berners-Lee's work homepage is http://www.w3.org/People/Berners-Lee/."
expected_output = (
" The work homepage of Tim Berners-Lee is " chain = GraphSparqlQAChain.from_llm(
"http://www.w3.org/People/Berners-Lee/." Mock(ChatOpenAI),
graph=graph,
)
chain.sparql_intent_chain = Mock(LLMChain)
chain.sparql_generation_select_chain = Mock(LLMChain)
chain.sparql_generation_update_chain = Mock(LLMChain)
chain.sparql_intent_chain.run = Mock(return_value="SELECT")
chain.sparql_generation_select_chain.run = Mock(
return_value="""PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT ?workHomepage
WHERE {
?person rdfs:label "Tim Berners-Lee" .
?person foaf:workplaceHomepage ?workHomepage .
}"""
) )
assert output == expected_output chain.qa_chain = MagicMock(
return_value={
"text": answer,
"prompt": question,
"context": [],
}
)
chain.qa_chain.output_key = "text"
output = chain.invoke({chain.input_key: question})[chain.output_key]
assert output == answer
assert chain.sparql_intent_chain.run.call_count == 1
assert chain.sparql_generation_select_chain.run.call_count == 1
assert chain.sparql_generation_update_chain.run.call_count == 0
assert chain.qa_chain.call_count == 1
def test_sparql_insert() -> None: def test_sparql_insert(tmp_path: pathlib.Path) -> None:
""" """
Test for generating and executing simple SPARQL INSERT query. Test for generating and executing simple SPARQL INSERT query.
""" """
from langchain_openai import ChatOpenAI
berners_lee_card = "http://www.w3.org/People/Berners-Lee/card" berners_lee_card = "http://www.w3.org/People/Berners-Lee/card"
_local_copy = "test.ttl" local_copy = tmp_path / "test.ttl"
graph = RdfGraph( graph = RdfGraph(
source_file=berners_lee_card, source_file=berners_lee_card,
standard="rdf", standard="rdf",
local_copy=_local_copy, local_copy=str(local_copy),
) )
chain = GraphSparqlQAChain.from_llm(OpenAI(temperature=0), graph=graph) query = (
chain.run(
"Save that the person with the name 'Timothy Berners-Lee' " "Save that the person with the name 'Timothy Berners-Lee' "
"has a work homepage at 'http://www.w3.org/foo/bar/'" "has a work homepage at 'http://www.w3.org/foo/bar/'"
) )
chain = GraphSparqlQAChain.from_llm(
Mock(ChatOpenAI),
graph=graph,
)
chain.sparql_intent_chain = Mock(LLMChain)
chain.sparql_generation_select_chain = Mock(LLMChain)
chain.sparql_generation_update_chain = Mock(LLMChain)
chain.qa_chain = Mock(LLMChain)
chain.sparql_intent_chain.run = Mock(return_value="UPDATE")
chain.sparql_generation_update_chain.run = Mock(
return_value="""PREFIX foaf: <http://xmlns.com/foaf/0.1/>
INSERT {
?p foaf:workplaceHomepage <http://www.w3.org/foo/bar/> .
}
WHERE {
?p foaf:name "Timothy Berners-Lee" .
}"""
)
output = chain.invoke({chain.input_key: query})[chain.output_key]
assert output == "Successfully inserted triples into the graph."
assert chain.sparql_intent_chain.run.call_count == 1
assert chain.sparql_generation_select_chain.run.call_count == 0
assert chain.sparql_generation_update_chain.run.call_count == 1
assert chain.qa_chain.call_count == 0
query = ( query = (
"""PREFIX foaf: <http://xmlns.com/foaf/0.1/>\n""" """PREFIX foaf: <http://xmlns.com/foaf/0.1/>\n"""
"""SELECT ?hp\n""" """SELECT ?hp\n"""
@ -73,18 +142,14 @@ def test_sparql_insert() -> None:
output = graph.query(query) output = graph.query(query)
assert len(output) == 2 assert len(output) == 2
# clean up
try:
os.remove(_local_copy)
except OSError:
pass
def test_sparql_select_return_query() -> None: def test_sparql_select_return_query() -> None:
""" """
Test for generating and executing simple SPARQL SELECT query Test for generating and executing simple SPARQL SELECT query
and returning the generated SPARQL query. and returning the generated SPARQL query.
""" """
from langchain_openai import ChatOpenAI
berners_lee_card = "http://www.w3.org/People/Berners-Lee/card" berners_lee_card = "http://www.w3.org/People/Berners-Lee/card"
graph = RdfGraph( graph = RdfGraph(
@ -92,15 +157,112 @@ def test_sparql_select_return_query() -> None:
standard="rdf", standard="rdf",
) )
question = "What is Tim Berners-Lee's work homepage?"
answer = "Tim Berners-Lee's work homepage is http://www.w3.org/People/Berners-Lee/."
chain = GraphSparqlQAChain.from_llm( chain = GraphSparqlQAChain.from_llm(
OpenAI(temperature=0), graph=graph, return_sparql_query=True Mock(ChatOpenAI),
graph=graph,
return_sparql_query=True,
) )
output = chain("What is Tim Berners-Lee's work homepage?") chain.sparql_intent_chain = Mock(LLMChain)
chain.sparql_generation_select_chain = Mock(LLMChain)
chain.sparql_generation_update_chain = Mock(LLMChain)
# Verify the expected answer chain.sparql_intent_chain.run = Mock(return_value="SELECT")
expected_output = ( chain.sparql_generation_select_chain.run = Mock(
" The work homepage of Tim Berners-Lee is " return_value="""PREFIX foaf: <http://xmlns.com/foaf/0.1/>
"http://www.w3.org/People/Berners-Lee/." PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT ?workHomepage
WHERE {
?person rdfs:label "Tim Berners-Lee" .
?person foaf:workplaceHomepage ?workHomepage .
}"""
)
chain.qa_chain = MagicMock(
return_value={
"text": answer,
"prompt": question,
"context": [],
}
) )
assert output["result"] == expected_output chain.qa_chain.output_key = "text"
output = chain.invoke({chain.input_key: question})
assert output[chain.output_key] == answer
assert "sparql_query" in output assert "sparql_query" in output
assert chain.sparql_intent_chain.run.call_count == 1
assert chain.sparql_generation_select_chain.run.call_count == 1
assert chain.sparql_generation_update_chain.run.call_count == 0
assert chain.qa_chain.call_count == 1
def test_loading_schema_from_ontotext_graphdb() -> None:
graph = RdfGraph(
query_endpoint="http://localhost:7200/repositories/langchain",
graph_kwargs={"bind_namespaces": "none"},
)
schema = graph.get_schema
prefix = (
"In the following, each IRI is followed by the local name and "
"optionally its description in parentheses. \n"
"The RDF graph supports the following node types:"
)
assert schema.startswith(prefix)
infix = "The RDF graph supports the following relationships:"
assert infix in schema
classes = schema[len(prefix) : schema.index(infix)]
assert len(re.findall("<[^>]+> \\([^)]+\\)", classes)) == 5
relationships = schema[schema.index(infix) + len(infix) :]
assert len(re.findall("<[^>]+> \\([^)]+\\)", relationships)) == 58
def test_graph_qa_chain_with_ontotext_graphdb() -> None:
from langchain_openai import ChatOpenAI
question = "What is Tim Berners-Lee's work homepage?"
answer = "Tim Berners-Lee's work homepage is http://www.w3.org/People/Berners-Lee/."
graph = RdfGraph(
query_endpoint="http://localhost:7200/repositories/langchain",
graph_kwargs={"bind_namespaces": "none"},
)
chain = GraphSparqlQAChain.from_llm(
Mock(ChatOpenAI),
graph=graph,
)
chain.sparql_intent_chain = Mock(LLMChain)
chain.sparql_generation_select_chain = Mock(LLMChain)
chain.sparql_generation_update_chain = Mock(LLMChain)
chain.sparql_intent_chain.run = Mock(return_value="SELECT")
chain.sparql_generation_select_chain.run = Mock(
return_value="""PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT ?workHomepage
WHERE {
?person rdfs:label "Tim Berners-Lee" .
?person foaf:workplaceHomepage ?workHomepage .
}"""
)
chain.qa_chain = MagicMock(
return_value={
"text": answer,
"prompt": question,
"context": [],
}
)
chain.qa_chain.output_key = "text"
output = chain.invoke({chain.input_key: question})[chain.output_key]
assert output == answer
assert chain.sparql_intent_chain.run.call_count == 1
assert chain.sparql_generation_select_chain.run.call_count == 1
assert chain.sparql_generation_update_chain.run.call_count == 0
assert chain.qa_chain.call_count == 1

Loading…
Cancel
Save