langchain/templates/retrieval-agent-fireworks/retrieval_agent_fireworks/chain.py

from typing import List

from langchain import hub
from langchain.agents import AgentExecutor
from langchain.agents.format_scratchpad import format_log_to_str
from langchain.agents.output_parsers import ReActJsonSingleInputOutputParser
from langchain.callbacks.manager import CallbackManagerForRetrieverRun
from langchain.tools.render import render_text_description
from langchain.tools.retriever import create_retriever_tool
from langchain_community.chat_models.fireworks import ChatFireworks
from langchain_community.utilities.arxiv import ArxivAPIWrapper
from langchain_core.documents import Document
from langchain_core.pydantic_v1 import BaseModel
from langchain_core.retrievers import BaseRetriever

MODEL_ID = "accounts/fireworks/models/mixtral-8x7b-instruct"


class ArxivRetriever(BaseRetriever, ArxivAPIWrapper):
    """`Arxiv` retriever.

    It wraps load() to get_relevant_documents().
    It uses all ArxivAPIWrapper arguments without any change.
    """

    get_full_documents: bool = False

    def _get_relevant_documents(
        self, query: str, *, run_manager: CallbackManagerForRetrieverRun
    ) -> List[Document]:
        try:
            if self.is_arxiv_identifier(query):
                results = self.arxiv_search(
                    id_list=query.split(),
                    max_results=self.top_k_results,
                ).results()
            else:
                results = self.arxiv_search(  # type: ignore
                    query[: self.ARXIV_MAX_QUERY_LENGTH], max_results=self.top_k_results
                ).results()
        except self.arxiv_exceptions as ex:
            return [Document(page_content=f"Arxiv exception: {ex}")]
        docs = [
            Document(
                page_content=result.summary,
                metadata={
                    "Published": result.updated.date(),
                    "Title": result.title,
                    "Authors": ", ".join(a.name for a in result.authors),
                },
            )
            for result in results
        ]
        return docs


# Set up tool(s)
description = (
    "A wrapper around Arxiv.org "
    "Useful for when you need to answer questions about Physics, Mathematics, "
    "Computer Science, Quantitative Biology, Quantitative Finance, Statistics, "
    "Electrical Engineering, and Economics "
    "from scientific articles on arxiv.org. "
    "Input should be a search query."
)
arxiv_tool = create_retriever_tool(ArxivRetriever(), "arxiv", description)
tools = [arxiv_tool]

# Set up LLM
llm = ChatFireworks(
    model=MODEL_ID,
    model_kwargs={
        "temperature": 0,
        "max_tokens": 2048,
        "top_p": 1,
    },
    cache=True,
)

# setup ReAct style prompt
prompt = hub.pull("hwchase17/react-json")
prompt = prompt.partial(
    tools=render_text_description(tools),
    tool_names=", ".join([t.name for t in tools]),
)

# define the agent
model_with_stop = llm.bind(stop=["\nObservation"])
agent = (
    {
        "input": lambda x: x["input"],
        "agent_scratchpad": lambda x: format_log_to_str(x["intermediate_steps"]),
    }
    | prompt
    | model_with_stop
    | ReActJsonSingleInputOutputParser()
)


class InputType(BaseModel):
    input: str


# instantiate AgentExecutor
agent_executor = AgentExecutor(
    agent=agent,
    tools=tools,
    verbose=True,
    handle_parsing_errors=True,
).with_types(input_type=InputType)
template: tool-retrieval-fireworks (#17052) - Initial commit oss-tool-retrieval-agent - README update - lint - lock - format imports - Rename to retrieval-agent-fireworks - cr <!-- Thank you for contributing to LangChain! Please title your PR "<package>: <description>", where <package> is whichever of langchain, community, core, experimental, etc. is being modified. Replace this entire comment with: - Description: a description of the change, - Issue: the issue # it fixes if applicable, - Dependencies: any dependencies required for this change, - Twitter handle: we announce bigger features on Twitter. If your PR gets announced, and you'd like a mention, we'll gladly shout you out! Please make sure your PR is passing linting and testing before submitting. Run `make format`, `make lint` and `make test` from the root of the package you've modified to check this locally. See contribution guidelines for more information on how to write/run tests, lint, etc: https://python.langchain.com/docs/contributing/ If you're adding a new integration, please include: 1. a test for the integration, preferably unit tests that do not rely on network access, 2. an example notebook showing its use. It lives in `docs/docs/integrations` directory. If no one reviews your PR within a few days, please @-mention one of @baskaryan, @eyurtsev, @hwchase17. --> --------- Co-authored-by: Taqi Jaffri <tjaffri@docugami.com> 2024-02-05 19:50:17 +00:00			`from typing import List`

			`from langchain import hub`
			`from langchain.agents import AgentExecutor`
			`from langchain.agents.format_scratchpad import format_log_to_str`
			`from langchain.agents.output_parsers import ReActJsonSingleInputOutputParser`
			`from langchain.callbacks.manager import CallbackManagerForRetrieverRun`
			`from langchain.tools.render import render_text_description`
			`from langchain.tools.retriever import create_retriever_tool`
			`from langchain_community.chat_models.fireworks import ChatFireworks`
			`from langchain_community.utilities.arxiv import ArxivAPIWrapper`
docs, templates: update schema imports to core (#17885) - chat models, messages - documents - agentaction/finish - baseretriever,document - stroutputparser - more messages - basemessage - format_document - baseoutputparser --------- Co-authored-by: Bagatur <baskaryan@gmail.com> 2024-02-22 23:58:44 +00:00			`from langchain_core.documents import Document`
template: tool-retrieval-fireworks (#17052) - Initial commit oss-tool-retrieval-agent - README update - lint - lock - format imports - Rename to retrieval-agent-fireworks - cr <!-- Thank you for contributing to LangChain! Please title your PR "<package>: <description>", where <package> is whichever of langchain, community, core, experimental, etc. is being modified. Replace this entire comment with: - Description: a description of the change, - Issue: the issue # it fixes if applicable, - Dependencies: any dependencies required for this change, - Twitter handle: we announce bigger features on Twitter. If your PR gets announced, and you'd like a mention, we'll gladly shout you out! Please make sure your PR is passing linting and testing before submitting. Run `make format`, `make lint` and `make test` from the root of the package you've modified to check this locally. See contribution guidelines for more information on how to write/run tests, lint, etc: https://python.langchain.com/docs/contributing/ If you're adding a new integration, please include: 1. a test for the integration, preferably unit tests that do not rely on network access, 2. an example notebook showing its use. It lives in `docs/docs/integrations` directory. If no one reviews your PR within a few days, please @-mention one of @baskaryan, @eyurtsev, @hwchase17. --> --------- Co-authored-by: Taqi Jaffri <tjaffri@docugami.com> 2024-02-05 19:50:17 +00:00			`from langchain_core.pydantic_v1 import BaseModel`
docs, templates: update schema imports to core (#17885) - chat models, messages - documents - agentaction/finish - baseretriever,document - stroutputparser - more messages - basemessage - format_document - baseoutputparser --------- Co-authored-by: Bagatur <baskaryan@gmail.com> 2024-02-22 23:58:44 +00:00			`from langchain_core.retrievers import BaseRetriever`
template: tool-retrieval-fireworks (#17052) - Initial commit oss-tool-retrieval-agent - README update - lint - lock - format imports - Rename to retrieval-agent-fireworks - cr <!-- Thank you for contributing to LangChain! Please title your PR "<package>: <description>", where <package> is whichever of langchain, community, core, experimental, etc. is being modified. Replace this entire comment with: - Description: a description of the change, - Issue: the issue # it fixes if applicable, - Dependencies: any dependencies required for this change, - Twitter handle: we announce bigger features on Twitter. If your PR gets announced, and you'd like a mention, we'll gladly shout you out! Please make sure your PR is passing linting and testing before submitting. Run `make format`, `make lint` and `make test` from the root of the package you've modified to check this locally. See contribution guidelines for more information on how to write/run tests, lint, etc: https://python.langchain.com/docs/contributing/ If you're adding a new integration, please include: 1. a test for the integration, preferably unit tests that do not rely on network access, 2. an example notebook showing its use. It lives in `docs/docs/integrations` directory. If no one reviews your PR within a few days, please @-mention one of @baskaryan, @eyurtsev, @hwchase17. --> --------- Co-authored-by: Taqi Jaffri <tjaffri@docugami.com> 2024-02-05 19:50:17 +00:00
			`MODEL_ID = "accounts/fireworks/models/mixtral-8x7b-instruct"`


			`class ArxivRetriever(BaseRetriever, ArxivAPIWrapper):`
			"""`Arxiv` retriever.

			`It wraps load() to get_relevant_documents().`
			`It uses all ArxivAPIWrapper arguments without any change.`
			`"""`

			`get_full_documents: bool = False`

			`def _get_relevant_documents(`
			`self, query: str, *, run_manager: CallbackManagerForRetrieverRun`
			`) -> List[Document]:`
			`try:`
			`if self.is_arxiv_identifier(query):`
			`results = self.arxiv_search(`
			`id_list=query.split(),`
			`max_results=self.top_k_results,`
			`).results()`
			`else:`
			`results = self.arxiv_search( # type: ignore`
			`query[: self.ARXIV_MAX_QUERY_LENGTH], max_results=self.top_k_results`
			`).results()`
			`except self.arxiv_exceptions as ex:`
			`return [Document(page_content=f"Arxiv exception: {ex}")]`
			`docs = [`
			`Document(`
			`page_content=result.summary,`
			`metadata={`
			`"Published": result.updated.date(),`
			`"Title": result.title,`
			`"Authors": ", ".join(a.name for a in result.authors),`
			`},`
			`)`
			`for result in results`
			`]`
			`return docs`


			`# Set up tool(s)`
			`description = (`
			`"A wrapper around Arxiv.org "`
			`"Useful for when you need to answer questions about Physics, Mathematics, "`
			`"Computer Science, Quantitative Biology, Quantitative Finance, Statistics, "`
			`"Electrical Engineering, and Economics "`
			`"from scientific articles on arxiv.org. "`
			`"Input should be a search query."`
			`)`
			`arxiv_tool = create_retriever_tool(ArxivRetriever(), "arxiv", description)`
			`tools = [arxiv_tool]`

			`# Set up LLM`
			`llm = ChatFireworks(`
			`model=MODEL_ID,`
			`model_kwargs={`
			`"temperature": 0,`
			`"max_tokens": 2048,`
			`"top_p": 1,`
			`},`
			`cache=True,`
			`)`

			`# setup ReAct style prompt`
			`prompt = hub.pull("hwchase17/react-json")`
			`prompt = prompt.partial(`
			`tools=render_text_description(tools),`
			`tool_names=", ".join([t.name for t in tools]),`
			`)`

			`# define the agent`
			`model_with_stop = llm.bind(stop=["\nObservation"])`
			`agent = (`
			`{`
			`"input": lambda x: x["input"],`
			`"agent_scratchpad": lambda x: format_log_to_str(x["intermediate_steps"]),`
			`}`
			`\| prompt`
			`\| model_with_stop`
			`\| ReActJsonSingleInputOutputParser()`
			`)`


			`class InputType(BaseModel):`
			`input: str`


			`# instantiate AgentExecutor`
			`agent_executor = AgentExecutor(`
			`agent=agent,`
			`tools=tools,`
			`verbose=True,`
			`handle_parsing_errors=True,`
			`).with_types(input_type=InputType)`