langchain/libs/community/langchain_community/chains/graph_qa/neptune_sparql.py

"""
Question answering over an RDF or OWL graph using SPARQL.
"""
from __future__ import annotations

from typing import Any, Dict, List, Optional

from langchain.chains.base import Chain
from langchain.chains.llm import LLMChain
from langchain_core.callbacks.manager import CallbackManagerForChainRun
from langchain_core.language_models import BaseLanguageModel
from langchain_core.prompts.base import BasePromptTemplate
from langchain_core.prompts.prompt import PromptTemplate
from langchain_core.pydantic_v1 import Field

from langchain_community.chains.graph_qa.prompts import SPARQL_QA_PROMPT
from langchain_community.graphs import NeptuneRdfGraph

INTERMEDIATE_STEPS_KEY = "intermediate_steps"

SPARQL_GENERATION_TEMPLATE = """
Task: Generate a SPARQL SELECT statement for querying a graph database.
For instance, to find all email addresses of John Doe, the following 
query in backticks would be suitable:
```
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
SELECT ?email
WHERE {{
    ?person foaf:name "John Doe" .
    ?person foaf:mbox ?email .
}}
```
Instructions:
Use only the node types and properties provided in the schema.
Do not use any node types and properties that are not explicitly provided.
Include all necessary prefixes.

Examples:

Schema:
{schema}
Note: Be as concise as possible.
Do not include any explanations or apologies in your responses.
Do not respond to any questions that ask for anything else than 
for you to construct a SPARQL query.
Do not include any text except the SPARQL query generated.

The question is:
{prompt}"""

SPARQL_GENERATION_PROMPT = PromptTemplate(
    input_variables=["schema", "prompt"], template=SPARQL_GENERATION_TEMPLATE
)


def extract_sparql(query: str) -> str:
    """Extract SPARQL code from a text.

    Args:
        query: Text to extract SPARQL code from.

    Returns:
        SPARQL code extracted from the text.
    """
    query = query.strip()
    querytoks = query.split("```")
    if len(querytoks) == 3:
        query = querytoks[1]

        if query.startswith("sparql"):
            query = query[6:]
    elif query.startswith("<sparql>") and query.endswith("</sparql>"):
        query = query[8:-9]
    return query


class NeptuneSparqlQAChain(Chain):
    """Chain for question-answering against a Neptune graph
    by generating SPARQL statements.

    *Security note*: Make sure that the database connection uses credentials
        that are narrowly-scoped to only include necessary permissions.
        Failure to do so may result in data corruption or loss, since the calling
        code may attempt commands that would result in deletion, mutation
        of data if appropriately prompted or reading sensitive data if such
        data is present in the database.
        The best way to guard against such negative outcomes is to (as appropriate)
        limit the permissions granted to the credentials used with this tool.

        See https://python.langchain.com/docs/security for more information.

    Example:
        .. code-block:: python

        chain = NeptuneSparqlQAChain.from_llm(
            llm=llm,
            graph=graph
        )
        response = chain.invoke(query)
    """

    graph: NeptuneRdfGraph = Field(exclude=True)
    sparql_generation_chain: LLMChain
    qa_chain: LLMChain
    input_key: str = "query"  #: :meta private:
    output_key: str = "result"  #: :meta private:
    top_k: int = 10
    return_intermediate_steps: bool = False
    """Whether or not to return the intermediate steps along with the final answer."""
    return_direct: bool = False
    """Whether or not to return the result of querying the graph directly."""
    extra_instructions: Optional[str] = None
    """Extra instructions by the appended to the query generation prompt."""

    @property
    def input_keys(self) -> List[str]:
        return [self.input_key]

    @property
    def output_keys(self) -> List[str]:
        _output_keys = [self.output_key]
        return _output_keys

    @classmethod
    def from_llm(
        cls,
        llm: BaseLanguageModel,
        *,
        qa_prompt: BasePromptTemplate = SPARQL_QA_PROMPT,
        sparql_prompt: BasePromptTemplate = SPARQL_GENERATION_PROMPT,
        examples: Optional[str] = None,
        **kwargs: Any,
    ) -> NeptuneSparqlQAChain:
        """Initialize from LLM."""
        qa_chain = LLMChain(llm=llm, prompt=qa_prompt)
        template_to_use = SPARQL_GENERATION_TEMPLATE
        if examples:
            template_to_use = template_to_use.replace(
                "Examples:", "Examples: " + examples
            )
            sparql_prompt = PromptTemplate(
                input_variables=["schema", "prompt"], template=template_to_use
            )
        sparql_generation_chain = LLMChain(llm=llm, prompt=sparql_prompt)

        return cls(  # type: ignore[call-arg]
            qa_chain=qa_chain,
            sparql_generation_chain=sparql_generation_chain,
            examples=examples,
            **kwargs,
        )

    def _call(
        self,
        inputs: Dict[str, Any],
        run_manager: Optional[CallbackManagerForChainRun] = None,
    ) -> Dict[str, str]:
        """
        Generate SPARQL query, use it to retrieve a response from the gdb and answer
        the question.
        """
        _run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()
        callbacks = _run_manager.get_child()
        prompt = inputs[self.input_key]

        intermediate_steps: List = []

        generated_sparql = self.sparql_generation_chain.run(
            {"prompt": prompt, "schema": self.graph.get_schema}, callbacks=callbacks
        )

        # Extract SPARQL
        generated_sparql = extract_sparql(generated_sparql)

        _run_manager.on_text("Generated SPARQL:", end="\n", verbose=self.verbose)
        _run_manager.on_text(
            generated_sparql, color="green", end="\n", verbose=self.verbose
        )

        intermediate_steps.append({"query": generated_sparql})

        context = self.graph.query(generated_sparql)

        if self.return_direct:
            final_result = context
        else:
            _run_manager.on_text("Full Context:", end="\n", verbose=self.verbose)
            _run_manager.on_text(
                str(context), color="green", end="\n", verbose=self.verbose
            )

            intermediate_steps.append({"context": context})

            result = self.qa_chain(
                {"prompt": prompt, "context": context},
                callbacks=callbacks,
            )
            final_result = result[self.qa_chain.output_key]

        chain_result: Dict[str, Any] = {self.output_key: final_result}
        if self.return_intermediate_steps:
            chain_result[INTERMEDIATE_STEPS_KEY] = intermediate_steps

        return chain_result
multiple: langchain 0.2 in master (#21191) 0.2rc migrations - [x] Move memory - [x] Move remaining retrievers - [x] graph_qa chains - [x] some dependency from evaluation code potentially on math utils - [x] Move openapi chain from `langchain.chains.api.openapi` to `langchain_community.chains.openapi` - [x] Migrate `langchain.chains.ernie_functions` to `langchain_community.chains.ernie_functions` - [x] migrate `langchain/chains/llm_requests.py` to `langchain_community.chains.llm_requests` - [x] Moving `langchain_community.cross_enoders.base:BaseCrossEncoder` -> `langchain_community.retrievers.document_compressors.cross_encoder:BaseCrossEncoder` (namespace not ideal, but it needs to be moved to `langchain` to avoid circular deps) - [x] unit tests langchain -- add pytest.mark.community to some unit tests that will stay in langchain - [x] unit tests community -- move unit tests that depend on community to community - [x] mv integration tests that depend on community to community - [x] mypy checks Other todo - [x] Make deprecation warnings not noisy (need to use warn deprecated and check that things are implemented properly) - [x] Update deprecation messages with timeline for code removal (likely we actually won't be removing things until 0.4 release) -- will give people more time to transition their code. - [ ] Add information to deprecation warning to show users how to migrate their code base using langchain-cli - [ ] Remove any unnecessary requirements in langchain (e.g., is SQLALchemy required?) --------- Co-authored-by: Erick Friis <erick@langchain.dev> 2024-05-08 20:46:52 +00:00			`"""`
			`Question answering over an RDF or OWL graph using SPARQL.`
			`"""`
			`from __future__ import annotations`

			`from typing import Any, Dict, List, Optional`

			`from langchain.chains.base import Chain`
			`from langchain.chains.llm import LLMChain`
			`from langchain_core.callbacks.manager import CallbackManagerForChainRun`
			`from langchain_core.language_models import BaseLanguageModel`
			`from langchain_core.prompts.base import BasePromptTemplate`
			`from langchain_core.prompts.prompt import PromptTemplate`
			`from langchain_core.pydantic_v1 import Field`

			`from langchain_community.chains.graph_qa.prompts import SPARQL_QA_PROMPT`
			`from langchain_community.graphs import NeptuneRdfGraph`

			`INTERMEDIATE_STEPS_KEY = "intermediate_steps"`

			`SPARQL_GENERATION_TEMPLATE = """`
			`Task: Generate a SPARQL SELECT statement for querying a graph database.`
			`For instance, to find all email addresses of John Doe, the following`
			`query in backticks would be suitable:`
			```
			`PREFIX foaf: <http://xmlns.com/foaf/0.1/>`
			`SELECT ?email`
			`WHERE {{`
			`?person foaf:name "John Doe" .`
			`?person foaf:mbox ?email .`
			`}}`
			```
			`Instructions:`
			`Use only the node types and properties provided in the schema.`
			`Do not use any node types and properties that are not explicitly provided.`
			`Include all necessary prefixes.`

			`Examples:`

			`Schema:`
			`{schema}`
			`Note: Be as concise as possible.`
			`Do not include any explanations or apologies in your responses.`
			`Do not respond to any questions that ask for anything else than`
			`for you to construct a SPARQL query.`
			`Do not include any text except the SPARQL query generated.`

			`The question is:`
			`{prompt}"""`

			`SPARQL_GENERATION_PROMPT = PromptTemplate(`
			`input_variables=["schema", "prompt"], template=SPARQL_GENERATION_TEMPLATE`
			`)`


			`def extract_sparql(query: str) -> str:`
			`"""Extract SPARQL code from a text.`

			`Args:`
			`query: Text to extract SPARQL code from.`

			`Returns:`
			`SPARQL code extracted from the text.`
			`"""`
			`query = query.strip()`
			querytoks = query.split("```")
			`if len(querytoks) == 3:`
			`query = querytoks[1]`

			`if query.startswith("sparql"):`
			`query = query[6:]`
			`elif query.startswith("<sparql>") and query.endswith("</sparql>"):`
			`query = query[8:-9]`
			`return query`


			`class NeptuneSparqlQAChain(Chain):`
			`"""Chain for question-answering against a Neptune graph`
			`by generating SPARQL statements.`

			`Security note: Make sure that the database connection uses credentials`
			`that are narrowly-scoped to only include necessary permissions.`
			`Failure to do so may result in data corruption or loss, since the calling`
			`code may attempt commands that would result in deletion, mutation`
			`of data if appropriately prompted or reading sensitive data if such`
			`data is present in the database.`
			`The best way to guard against such negative outcomes is to (as appropriate)`
			`limit the permissions granted to the credentials used with this tool.`

			`See https://python.langchain.com/docs/security for more information.`

			`Example:`
			`.. code-block:: python`

			`chain = NeptuneSparqlQAChain.from_llm(`
			`llm=llm,`
			`graph=graph`
			`)`
			`response = chain.invoke(query)`
			`"""`

			`graph: NeptuneRdfGraph = Field(exclude=True)`
			`sparql_generation_chain: LLMChain`
			`qa_chain: LLMChain`
			`input_key: str = "query" #: :meta private:`
			`output_key: str = "result" #: :meta private:`
			`top_k: int = 10`
			`return_intermediate_steps: bool = False`
			`"""Whether or not to return the intermediate steps along with the final answer."""`
			`return_direct: bool = False`
			`"""Whether or not to return the result of querying the graph directly."""`
			`extra_instructions: Optional[str] = None`
			`"""Extra instructions by the appended to the query generation prompt."""`

			`@property`
			`def input_keys(self) -> List[str]:`
			`return [self.input_key]`

			`@property`
			`def output_keys(self) -> List[str]:`
			`_output_keys = [self.output_key]`
			`return _output_keys`

			`@classmethod`
			`def from_llm(`
			`cls,`
			`llm: BaseLanguageModel,`
			`*,`
			`qa_prompt: BasePromptTemplate = SPARQL_QA_PROMPT,`
			`sparql_prompt: BasePromptTemplate = SPARQL_GENERATION_PROMPT,`
			`examples: Optional[str] = None,`
			`**kwargs: Any,`
			`) -> NeptuneSparqlQAChain:`
			`"""Initialize from LLM."""`
			`qa_chain = LLMChain(llm=llm, prompt=qa_prompt)`
			`template_to_use = SPARQL_GENERATION_TEMPLATE`
			`if examples:`
			`template_to_use = template_to_use.replace(`
			`"Examples:", "Examples: " + examples`
			`)`
			`sparql_prompt = PromptTemplate(`
			`input_variables=["schema", "prompt"], template=template_to_use`
			`)`
			`sparql_generation_chain = LLMChain(llm=llm, prompt=sparql_prompt)`

			`return cls( # type: ignore[call-arg]`
			`qa_chain=qa_chain,`
			`sparql_generation_chain=sparql_generation_chain,`
			`examples=examples,`
			`**kwargs,`
			`)`

			`def _call(`
			`self,`
			`inputs: Dict[str, Any],`
			`run_manager: Optional[CallbackManagerForChainRun] = None,`
			`) -> Dict[str, str]:`
			`"""`
			`Generate SPARQL query, use it to retrieve a response from the gdb and answer`
			`the question.`
			`"""`
			`_run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()`
			`callbacks = _run_manager.get_child()`
			`prompt = inputs[self.input_key]`

			`intermediate_steps: List = []`

			`generated_sparql = self.sparql_generation_chain.run(`
			`{"prompt": prompt, "schema": self.graph.get_schema}, callbacks=callbacks`
			`)`

			`# Extract SPARQL`
			`generated_sparql = extract_sparql(generated_sparql)`

			`_run_manager.on_text("Generated SPARQL:", end="\n", verbose=self.verbose)`
			`_run_manager.on_text(`
			`generated_sparql, color="green", end="\n", verbose=self.verbose`
			`)`

			`intermediate_steps.append({"query": generated_sparql})`

			`context = self.graph.query(generated_sparql)`

			`if self.return_direct:`
			`final_result = context`
			`else:`
			`_run_manager.on_text("Full Context:", end="\n", verbose=self.verbose)`
			`_run_manager.on_text(`
			`str(context), color="green", end="\n", verbose=self.verbose`
			`)`

			`intermediate_steps.append({"context": context})`

			`result = self.qa_chain(`
			`{"prompt": prompt, "context": context},`
			`callbacks=callbacks,`
			`)`
			`final_result = result[self.qa_chain.output_key]`

			`chain_result: Dict[str, Any] = {self.output_key: final_result}`
			`if self.return_intermediate_steps:`
			`chain_result[INTERMEDIATE_STEPS_KEY] = intermediate_steps`

			`return chain_result`