community[patch]: Fix GraphSparqlQAChain so that it works with Ontotext GraphDB (#15009)

- **Description:** Introduce a new parameter `graph_kwargs` to
`RdfGraph` - parameters used to initialize the `rdflib.Graph` if
`query_endpoint` is set. Also, do not set
`rdflib.graph.DATASET_DEFAULT_GRAPH_ID` as default value for the
`rdflib.Graph` `identifier` if `query_endpoint` is set.
  - **Issue:** N/A
  - **Dependencies:** N/A
  - **Twitter handle:** N/A
pull/18107/head
Neli Hateva 4 months ago committed by GitHub
parent 4d6cd5b46a
commit a01e8473f8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -2,6 +2,7 @@ from __future__ import annotations
from typing import (
TYPE_CHECKING,
Dict,
List,
Optional,
)
@ -115,6 +116,7 @@ class RdfGraph:
update_endpoint: Optional[str] = None,
standard: Optional[str] = "rdf",
local_copy: Optional[str] = None,
graph_kwargs: Optional[Dict] = None,
) -> None:
"""
Set up the RDFlib graph
@ -125,6 +127,9 @@ class RdfGraph:
:param update_endpoint: SPARQL endpoint for UPDATE queries, write access
:param standard: RDF, RDFS, or OWL
:param local_copy: new local copy for storing changes
:param graph_kwargs: Additional rdflib.Graph specific kwargs
that will be used to initialize it,
if query_endpoint is provided.
"""
self.source_file = source_file
self.serialization = serialization
@ -135,7 +140,6 @@ class RdfGraph:
try:
import rdflib
from rdflib.graph import DATASET_DEFAULT_GRAPH_ID as default
from rdflib.plugins.stores import sparqlstore
except ImportError:
raise ValueError(
@ -177,7 +181,8 @@ class RdfGraph:
else:
self._store = sparqlstore.SPARQLUpdateStore()
self._store.open((query_endpoint, update_endpoint))
self.graph = rdflib.Graph(self._store, identifier=default)
graph_kwargs = graph_kwargs or {}
self.graph = rdflib.Graph(self._store, **graph_kwargs)
# Verify that the graph was loaded
if not len(self.graph):

@ -1,6 +1,9 @@
FROM ontotext/graphdb:10.5.1
RUN mkdir -p /opt/graphdb/dist/data/repositories/starwars
COPY config.ttl /opt/graphdb/dist/data/repositories/starwars/
COPY config-starwars.ttl /opt/graphdb/dist/data/repositories/starwars/config.ttl
RUN mkdir -p /opt/graphdb/dist/data/repositories/langchain
COPY config-langchain.ttl /opt/graphdb/dist/data/repositories/langchain/config.ttl
COPY starwars-data.trig /
COPY berners-lee-card.ttl /
COPY graphdb_create.sh /run.sh
ENTRYPOINT bash /run.sh

@ -0,0 +1,114 @@
@prefix : <http://xmlns.com/foaf/0.1/> .
@prefix Be: <https://www.w3.org/People/Berners-Lee/> .
@prefix Pub: <https://timbl.com/timbl/Public/> .
@prefix blog: <http://dig.csail.mit.edu/breadcrumbs/blog/> .
@prefix card: <https://www.w3.org/People/Berners-Lee/card#> .
@prefix cc: <http://creativecommons.org/ns#> .
@prefix cert: <http://www.w3.org/ns/auth/cert#> .
@prefix con: <http://www.w3.org/2000/10/swap/pim/contact#> .
@prefix dc: <http://purl.org/dc/elements/1.1/> .
@prefix dct: <http://purl.org/dc/terms/> .
@prefix doap: <http://usefulinc.com/ns/doap#> .
@prefix geo1: <http://www.w3.org/2003/01/geo/wgs84_pos#> .
@prefix ldp: <http://www.w3.org/ns/ldp#> .
@prefix s: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix schema1: <http://schema.org/> .
@prefix sioc: <http://rdfs.org/sioc/ns#> .
@prefix solid: <http://www.w3.org/ns/solid/terms#> .
@prefix space: <http://www.w3.org/ns/pim/space#> .
@prefix vcard: <http://www.w3.org/2006/vcard/ns#> .
@prefix w3c: <http://www.w3.org/data#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
<http://dig.csail.mit.edu/2005/ajar/ajaw/data#Tabulator> doap:developer card:i .
<http://dig.csail.mit.edu/2007/01/camp/data#course> :maker card:i .
<http://dig.csail.mit.edu/data#DIG> :member card:i .
<http://wiki.ontoworld.org/index.php/_IRW2006> dc:title "Identity, Reference and the Web workshop 2006" ;
con:participant card:i .
<http://www.ecs.soton.ac.uk/~dt2/dlstuff/www2006_data#panel-panelk01> s:label "The Next Wave of the Web (Plenary Panel)" ;
con:participant card:i .
<http://www.w3.org/2000/10/swap/data#Cwm> doap:developer card:i .
<http://www.w3.org/2011/Talks/0331-hyderabad-tbl/data#talk> dct:title "Designing the Web for an Open Society" ;
:maker card:i .
w3c:W3C :member card:i .
<https://www.w3.org/DesignIssues/Overview.html> dc:title "Design Issues for the World Wide Web" ;
:maker card:i .
Be:card a :PersonalProfileDocument ;
cc:license <http://creativecommons.org/licenses/by-nc/3.0/> ;
dc:title "Tim Berners-Lee's FOAF file" ;
:maker card:i ;
:primaryTopic card:i .
blog:4 dc:title "timbl's blog on DIG" ;
s:seeAlso <http://dig.csail.mit.edu/breadcrumbs/blog/feed/4> ;
:maker card:i .
Pub:friends.ttl a :PersonalProfileDocument ;
cc:license <http://creativecommons.org/licenses/by-nc/3.0/> ;
dc:title "Tim Berners-Lee's editable profile" ;
:maker card:i ;
:primaryTopic card:i .
card:i a con:Male,
:Person ;
s:label "Tim Berners-Lee" ;
sioc:avatar <https://www.w3.org/People/Berners-Lee/images/timbl-image-by-Coz-cropped.jpg> ;
schema1:owns <https://timblbot.inrupt.net/profile/card#me> ;
s:seeAlso Pub:friends.ttl ;
con:assistant card:amy ;
con:homePage Be: ;
con:office [ con:address [ con:city "Cambridge" ;
con:country "USA" ;
con:postalCode "02139" ;
con:street "32 Vassar Street" ;
con:street2 "MIT CSAIL Building 32" ] ;
geo1:location [ geo1:lat "42.361860" ;
geo1:long "-71.091840" ] ] ;
con:preferredURI "https://www.w3.org/People/Berners-Lee/card#i" ;
con:publicHomePage Be: ;
vcard:fn "Tim Berners-Lee" ;
vcard:hasAddress [ a vcard:Work ;
vcard:locality "Cambridge" ;
vcard:postal-code "02139" ;
vcard:region "MA" ;
vcard:street-address "32 Vassar Street" ] ;
cert:key [ a cert:RSAPublicKey ;
cert:exponent 65537 ;
cert:modulus "ebe99c737bd3670239600547e5e2eb1d1497da39947b6576c3c44ffeca32cf0f2f7cbee3c47001278a90fc7fc5bcf292f741eb1fcd6bbe7f90650afb519cf13e81b2bffc6e02063ee5a55781d420b1dfaf61c15758480e66d47fb0dcb5fa7b9f7f1052e5ccbd01beee9553c3b6b51f4daf1fce991294cd09a3d1d636bc6c7656e4455d0aff06daec740ed0084aa6866fcae1359de61cc12dbe37c8fa42e977c6e727a8258bb9a3f265b27e3766fe0697f6aa0bcc81c3f026e387bd7bbc81580dc1853af2daa099186a9f59da526474ef6ec0a3d84cf400be3261b6b649dea1f78184862d34d685d2d587f09acc14cd8e578fdd2283387821296f0af39b8d8845"^^xsd:hexBinary ] ;
ldp:inbox Pub:Inbox ;
space:preferencesFile <https://timbl.com/timbl/Data/preferences.n3> ;
space:storage Pub:,
<https://timbl.inrupt.net/>,
<https://timbl.solid.community/> ;
solid:editableProfile Pub:friends.ttl ;
solid:oidcIssuer <https://timbl.com> ;
solid:profileBackgroundColor "#ffffff" ;
solid:profileHighlightColor "#00467E" ;
solid:publicTypeIndex Pub:PublicTypeIndex.ttl ;
:account <http://en.wikipedia.org/wiki/User:Timbl>,
<http://twitter.com/timberners_lee>,
<http://www.reddit.com/user/timbl/> ;
:based_near [ geo1:lat "42.361860" ;
geo1:long "-71.091840" ] ;
:family_name "Berners-Lee" ;
:givenname "Timothy" ;
:homepage Be: ;
:img <https://www.w3.org/Press/Stock/Berners-Lee/2001-europaeum-eighth.jpg> ;
:mbox <mailto:timbl@w3.org> ;
:mbox_sha1sum "965c47c5a70db7407210cef6e4e6f5374a525c5c" ;
:name "Timothy Berners-Lee" ;
:nick "TimBL",
"timbl" ;
:openid Be: ;
:title "Sir" ;
:weblog blog:4 ;
:workplaceHomepage <https://www.w3.org/> .

@ -0,0 +1,46 @@
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>.
@prefix rep: <http://www.openrdf.org/config/repository#>.
@prefix sr: <http://www.openrdf.org/config/repository/sail#>.
@prefix sail: <http://www.openrdf.org/config/sail#>.
@prefix graphdb: <http://www.ontotext.com/config/graphdb#>.
[] a rep:Repository ;
rep:repositoryID "langchain" ;
rdfs:label "" ;
rep:repositoryImpl [
rep:repositoryType "graphdb:SailRepository" ;
sr:sailImpl [
sail:sailType "graphdb:Sail" ;
graphdb:read-only "false" ;
# Inference and Validation
graphdb:ruleset "empty" ;
graphdb:disable-sameAs "true" ;
graphdb:check-for-inconsistencies "false" ;
# Indexing
graphdb:entity-id-size "32" ;
graphdb:enable-context-index "false" ;
graphdb:enablePredicateList "true" ;
graphdb:enable-fts-index "false" ;
graphdb:fts-indexes ("default" "iri") ;
graphdb:fts-string-literals-index "default" ;
graphdb:fts-iris-index "none" ;
# Queries and Updates
graphdb:query-timeout "0" ;
graphdb:throw-QueryEvaluationException-on-timeout "false" ;
graphdb:query-limit-results "0" ;
# Settable in the file but otherwise hidden in the UI and in the RDF4J console
graphdb:base-URL "http://example.org/owlim#" ;
graphdb:defaultNS "" ;
graphdb:imports "" ;
graphdb:repository-type "file-repository" ;
graphdb:storage-folder "storage" ;
graphdb:entity-index-size "10000000" ;
graphdb:in-memory-literal-properties "true" ;
graphdb:enable-literal-index "true" ;
]
].

@ -1,5 +1,4 @@
#! /bin/bash
REPOSITORY_ID="starwars"
GRAPHDB_URI="http://localhost:7200/"
echo -e "\nUsing GraphDB: ${GRAPHDB_URI}"
@ -24,7 +23,10 @@ function waitGraphDBStart {
function loadData {
echo -e "\nImporting starwars-data.trig"
curl -X POST -H "Content-Type: application/x-trig" -T /starwars-data.trig ${GRAPHDB_URI}/repositories/${REPOSITORY_ID}/statements
curl -X POST -H "Content-Type: application/x-trig" -T /starwars-data.trig ${GRAPHDB_URI}/repositories/starwars/statements
echo -e "\nImporting berners-lee-card.ttl"
curl -X POST -H "Content-Type:application/x-turtle" -T /berners-lee-card.ttl ${GRAPHDB_URI}/repositories/langchain/statements
}
startGraphDB &

@ -1,11 +1,18 @@
"""Test RDF/ SPARQL Graph Database Chain."""
import os
import pathlib
import re
from unittest.mock import MagicMock, Mock
from langchain_community.graphs import RdfGraph
from langchain_community.llms.openai import OpenAI
from langchain.chains import LLMChain
from langchain.chains.graph_qa.sparql import GraphSparqlQAChain
"""
cd libs/langchain/tests/integration_tests/chains/docker-compose-ontotext-graphdb
./start.sh
"""
def test_connect_file_rdf() -> None:
"""
@ -28,6 +35,8 @@ def test_sparql_select() -> None:
"""
Test for generating and executing simple SPARQL SELECT query.
"""
from langchain_openai import ChatOpenAI
berners_lee_card = "http://www.w3.org/People/Berners-Lee/card"
graph = RdfGraph(
@ -35,33 +44,93 @@ def test_sparql_select() -> None:
standard="rdf",
)
chain = GraphSparqlQAChain.from_llm(OpenAI(temperature=0), graph=graph)
output = chain.run("What is Tim Berners-Lee's work homepage?")
expected_output = (
" The work homepage of Tim Berners-Lee is "
"http://www.w3.org/People/Berners-Lee/."
question = "What is Tim Berners-Lee's work homepage?"
answer = "Tim Berners-Lee's work homepage is http://www.w3.org/People/Berners-Lee/."
chain = GraphSparqlQAChain.from_llm(
Mock(ChatOpenAI),
graph=graph,
)
chain.sparql_intent_chain = Mock(LLMChain)
chain.sparql_generation_select_chain = Mock(LLMChain)
chain.sparql_generation_update_chain = Mock(LLMChain)
chain.sparql_intent_chain.run = Mock(return_value="SELECT")
chain.sparql_generation_select_chain.run = Mock(
return_value="""PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT ?workHomepage
WHERE {
?person rdfs:label "Tim Berners-Lee" .
?person foaf:workplaceHomepage ?workHomepage .
}"""
)
assert output == expected_output
chain.qa_chain = MagicMock(
return_value={
"text": answer,
"prompt": question,
"context": [],
}
)
chain.qa_chain.output_key = "text"
output = chain.invoke({chain.input_key: question})[chain.output_key]
assert output == answer
assert chain.sparql_intent_chain.run.call_count == 1
assert chain.sparql_generation_select_chain.run.call_count == 1
assert chain.sparql_generation_update_chain.run.call_count == 0
assert chain.qa_chain.call_count == 1
def test_sparql_insert() -> None:
def test_sparql_insert(tmp_path: pathlib.Path) -> None:
"""
Test for generating and executing simple SPARQL INSERT query.
"""
from langchain_openai import ChatOpenAI
berners_lee_card = "http://www.w3.org/People/Berners-Lee/card"
_local_copy = "test.ttl"
local_copy = tmp_path / "test.ttl"
graph = RdfGraph(
source_file=berners_lee_card,
standard="rdf",
local_copy=_local_copy,
local_copy=str(local_copy),
)
chain = GraphSparqlQAChain.from_llm(OpenAI(temperature=0), graph=graph)
chain.run(
query = (
"Save that the person with the name 'Timothy Berners-Lee' "
"has a work homepage at 'http://www.w3.org/foo/bar/'"
)
chain = GraphSparqlQAChain.from_llm(
Mock(ChatOpenAI),
graph=graph,
)
chain.sparql_intent_chain = Mock(LLMChain)
chain.sparql_generation_select_chain = Mock(LLMChain)
chain.sparql_generation_update_chain = Mock(LLMChain)
chain.qa_chain = Mock(LLMChain)
chain.sparql_intent_chain.run = Mock(return_value="UPDATE")
chain.sparql_generation_update_chain.run = Mock(
return_value="""PREFIX foaf: <http://xmlns.com/foaf/0.1/>
INSERT {
?p foaf:workplaceHomepage <http://www.w3.org/foo/bar/> .
}
WHERE {
?p foaf:name "Timothy Berners-Lee" .
}"""
)
output = chain.invoke({chain.input_key: query})[chain.output_key]
assert output == "Successfully inserted triples into the graph."
assert chain.sparql_intent_chain.run.call_count == 1
assert chain.sparql_generation_select_chain.run.call_count == 0
assert chain.sparql_generation_update_chain.run.call_count == 1
assert chain.qa_chain.call_count == 0
query = (
"""PREFIX foaf: <http://xmlns.com/foaf/0.1/>\n"""
"""SELECT ?hp\n"""
@ -73,18 +142,14 @@ def test_sparql_insert() -> None:
output = graph.query(query)
assert len(output) == 2
# clean up
try:
os.remove(_local_copy)
except OSError:
pass
def test_sparql_select_return_query() -> None:
"""
Test for generating and executing simple SPARQL SELECT query
and returning the generated SPARQL query.
"""
from langchain_openai import ChatOpenAI
berners_lee_card = "http://www.w3.org/People/Berners-Lee/card"
graph = RdfGraph(
@ -92,15 +157,112 @@ def test_sparql_select_return_query() -> None:
standard="rdf",
)
question = "What is Tim Berners-Lee's work homepage?"
answer = "Tim Berners-Lee's work homepage is http://www.w3.org/People/Berners-Lee/."
chain = GraphSparqlQAChain.from_llm(
OpenAI(temperature=0), graph=graph, return_sparql_query=True
Mock(ChatOpenAI),
graph=graph,
return_sparql_query=True,
)
output = chain("What is Tim Berners-Lee's work homepage?")
chain.sparql_intent_chain = Mock(LLMChain)
chain.sparql_generation_select_chain = Mock(LLMChain)
chain.sparql_generation_update_chain = Mock(LLMChain)
# Verify the expected answer
expected_output = (
" The work homepage of Tim Berners-Lee is "
"http://www.w3.org/People/Berners-Lee/."
chain.sparql_intent_chain.run = Mock(return_value="SELECT")
chain.sparql_generation_select_chain.run = Mock(
return_value="""PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT ?workHomepage
WHERE {
?person rdfs:label "Tim Berners-Lee" .
?person foaf:workplaceHomepage ?workHomepage .
}"""
)
chain.qa_chain = MagicMock(
return_value={
"text": answer,
"prompt": question,
"context": [],
}
)
assert output["result"] == expected_output
chain.qa_chain.output_key = "text"
output = chain.invoke({chain.input_key: question})
assert output[chain.output_key] == answer
assert "sparql_query" in output
assert chain.sparql_intent_chain.run.call_count == 1
assert chain.sparql_generation_select_chain.run.call_count == 1
assert chain.sparql_generation_update_chain.run.call_count == 0
assert chain.qa_chain.call_count == 1
def test_loading_schema_from_ontotext_graphdb() -> None:
graph = RdfGraph(
query_endpoint="http://localhost:7200/repositories/langchain",
graph_kwargs={"bind_namespaces": "none"},
)
schema = graph.get_schema
prefix = (
"In the following, each IRI is followed by the local name and "
"optionally its description in parentheses. \n"
"The RDF graph supports the following node types:"
)
assert schema.startswith(prefix)
infix = "The RDF graph supports the following relationships:"
assert infix in schema
classes = schema[len(prefix) : schema.index(infix)]
assert len(re.findall("<[^>]+> \\([^)]+\\)", classes)) == 5
relationships = schema[schema.index(infix) + len(infix) :]
assert len(re.findall("<[^>]+> \\([^)]+\\)", relationships)) == 58
def test_graph_qa_chain_with_ontotext_graphdb() -> None:
from langchain_openai import ChatOpenAI
question = "What is Tim Berners-Lee's work homepage?"
answer = "Tim Berners-Lee's work homepage is http://www.w3.org/People/Berners-Lee/."
graph = RdfGraph(
query_endpoint="http://localhost:7200/repositories/langchain",
graph_kwargs={"bind_namespaces": "none"},
)
chain = GraphSparqlQAChain.from_llm(
Mock(ChatOpenAI),
graph=graph,
)
chain.sparql_intent_chain = Mock(LLMChain)
chain.sparql_generation_select_chain = Mock(LLMChain)
chain.sparql_generation_update_chain = Mock(LLMChain)
chain.sparql_intent_chain.run = Mock(return_value="SELECT")
chain.sparql_generation_select_chain.run = Mock(
return_value="""PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT ?workHomepage
WHERE {
?person rdfs:label "Tim Berners-Lee" .
?person foaf:workplaceHomepage ?workHomepage .
}"""
)
chain.qa_chain = MagicMock(
return_value={
"text": answer,
"prompt": question,
"context": [],
}
)
chain.qa_chain.output_key = "text"
output = chain.invoke({chain.input_key: question})[chain.output_key]
assert output == answer
assert chain.sparql_intent_chain.run.call_count == 1
assert chain.sparql_generation_select_chain.run.call_count == 1
assert chain.sparql_generation_update_chain.run.call_count == 0
assert chain.qa_chain.call_count == 1

Loading…
Cancel
Save