mirror of
https://github.com/hwchase17/langchain
synced 2024-11-10 01:10:59 +00:00
1183769cf7
- Initial commit oss-tool-retrieval-agent - README update - lint - lock - format imports - Rename to retrieval-agent-fireworks - cr <!-- Thank you for contributing to LangChain! Please title your PR "<package>: <description>", where <package> is whichever of langchain, community, core, experimental, etc. is being modified. Replace this entire comment with: - **Description:** a description of the change, - **Issue:** the issue # it fixes if applicable, - **Dependencies:** any dependencies required for this change, - **Twitter handle:** we announce bigger features on Twitter. If your PR gets announced, and you'd like a mention, we'll gladly shout you out! Please make sure your PR is passing linting and testing before submitting. Run `make format`, `make lint` and `make test` from the root of the package you've modified to check this locally. See contribution guidelines for more information on how to write/run tests, lint, etc: https://python.langchain.com/docs/contributing/ If you're adding a new integration, please include: 1. a test for the integration, preferably unit tests that do not rely on network access, 2. an example notebook showing its use. It lives in `docs/docs/integrations` directory. If no one reviews your PR within a few days, please @-mention one of @baskaryan, @eyurtsev, @hwchase17. --> --------- Co-authored-by: Taqi Jaffri <tjaffri@docugami.com>
110 lines
3.3 KiB
Python
110 lines
3.3 KiB
Python
from typing import List
|
|
|
|
from langchain import hub
|
|
from langchain.agents import AgentExecutor
|
|
from langchain.agents.format_scratchpad import format_log_to_str
|
|
from langchain.agents.output_parsers import ReActJsonSingleInputOutputParser
|
|
from langchain.callbacks.manager import CallbackManagerForRetrieverRun
|
|
from langchain.schema import BaseRetriever, Document
|
|
from langchain.tools.render import render_text_description
|
|
from langchain.tools.retriever import create_retriever_tool
|
|
from langchain_community.chat_models.fireworks import ChatFireworks
|
|
from langchain_community.utilities.arxiv import ArxivAPIWrapper
|
|
from langchain_core.pydantic_v1 import BaseModel
|
|
|
|
MODEL_ID = "accounts/fireworks/models/mixtral-8x7b-instruct"
|
|
|
|
|
|
class ArxivRetriever(BaseRetriever, ArxivAPIWrapper):
|
|
"""`Arxiv` retriever.
|
|
|
|
It wraps load() to get_relevant_documents().
|
|
It uses all ArxivAPIWrapper arguments without any change.
|
|
"""
|
|
|
|
get_full_documents: bool = False
|
|
|
|
def _get_relevant_documents(
|
|
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
|
|
) -> List[Document]:
|
|
try:
|
|
if self.is_arxiv_identifier(query):
|
|
results = self.arxiv_search(
|
|
id_list=query.split(),
|
|
max_results=self.top_k_results,
|
|
).results()
|
|
else:
|
|
results = self.arxiv_search( # type: ignore
|
|
query[: self.ARXIV_MAX_QUERY_LENGTH], max_results=self.top_k_results
|
|
).results()
|
|
except self.arxiv_exceptions as ex:
|
|
return [Document(page_content=f"Arxiv exception: {ex}")]
|
|
docs = [
|
|
Document(
|
|
page_content=result.summary,
|
|
metadata={
|
|
"Published": result.updated.date(),
|
|
"Title": result.title,
|
|
"Authors": ", ".join(a.name for a in result.authors),
|
|
},
|
|
)
|
|
for result in results
|
|
]
|
|
return docs
|
|
|
|
|
|
# Set up tool(s)
|
|
description = (
|
|
"A wrapper around Arxiv.org "
|
|
"Useful for when you need to answer questions about Physics, Mathematics, "
|
|
"Computer Science, Quantitative Biology, Quantitative Finance, Statistics, "
|
|
"Electrical Engineering, and Economics "
|
|
"from scientific articles on arxiv.org. "
|
|
"Input should be a search query."
|
|
)
|
|
arxiv_tool = create_retriever_tool(ArxivRetriever(), "arxiv", description)
|
|
tools = [arxiv_tool]
|
|
|
|
# Set up LLM
|
|
llm = ChatFireworks(
|
|
model=MODEL_ID,
|
|
model_kwargs={
|
|
"temperature": 0,
|
|
"max_tokens": 2048,
|
|
"top_p": 1,
|
|
},
|
|
cache=True,
|
|
)
|
|
|
|
# setup ReAct style prompt
|
|
prompt = hub.pull("hwchase17/react-json")
|
|
prompt = prompt.partial(
|
|
tools=render_text_description(tools),
|
|
tool_names=", ".join([t.name for t in tools]),
|
|
)
|
|
|
|
# define the agent
|
|
model_with_stop = llm.bind(stop=["\nObservation"])
|
|
agent = (
|
|
{
|
|
"input": lambda x: x["input"],
|
|
"agent_scratchpad": lambda x: format_log_to_str(x["intermediate_steps"]),
|
|
}
|
|
| prompt
|
|
| model_with_stop
|
|
| ReActJsonSingleInputOutputParser()
|
|
)
|
|
|
|
|
|
class InputType(BaseModel):
|
|
input: str
|
|
|
|
|
|
# instantiate AgentExecutor
|
|
agent_executor = AgentExecutor(
|
|
agent=agent,
|
|
tools=tools,
|
|
verbose=True,
|
|
handle_parsing_errors=True,
|
|
).with_types(input_type=InputType)
|