template: tool-retrieval-fireworks (#17052)

- Initial commit oss-tool-retrieval-agent - README update - lint - lock - format imports - Rename to retrieval-agent-fireworks - cr  --------- Co-authored-by: Taqi Jaffri <tjaffri@docugami.com>
4 months ago · 1183769cf7
parent 4eda647fdd
commit 1183769cf7
8 changed files with 2081 additions and 0 deletions
--- a/templates/retrieval-agent-fireworks/.gitignore
+++ b/templates/retrieval-agent-fireworks/.gitignore
@ -0,0 +1 @@
+__pycache__
--- a/templates/retrieval-agent-fireworks/LICENSE
+++ b/templates/retrieval-agent-fireworks/LICENSE
@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2023 LangChain, Inc.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/templates/retrieval-agent-fireworks/README.md
+++ b/templates/retrieval-agent-fireworks/README.md
@ -0,0 +1,73 @@
+# retrieval-agent-fireworks
+
+This package uses open source models hosted on FireworksAI to do retrieval using an agent architecture. By default, this does retrieval over Arxiv.
+
+We will use `Mixtral8x7b-instruct-v0.1`, which is shown in this blog to yield reasonable
+results with function calling even though it is not fine tuned for this task: https://huggingface.co/blog/open-source-llms-as-agents
+
+
+## Environment Setup
+
+There are various great ways to run OSS models. We will use FireworksAI as an easy way to run the models. See [here](https://python.langchain.com/docs/integrations/providers/fireworks) for more information.
+
+Set the `FIREWORKS_API_KEY` environment variable to access Fireworks.
+
+
+## Usage
+
+To use this package, you should first have the LangChain CLI installed:
+
+```shell
+pip install -U langchain-cli
+```
+
+To create a new LangChain project and install this as the only package, you can do:
+
+```shell
+langchain app new my-app --package retrieval-agent-fireworks
+```
+
+If you want to add this to an existing project, you can just run:
+
+```shell
+langchain app add retrieval-agent-fireworks
+```
+
+And add the following code to your `server.py` file:
+```python
+from retrieval_agent_fireworks import chain as retrieval_agent_fireworks_chain
+
+add_routes(app, retrieval_agent_fireworks_chain, path="/retrieval-agent-fireworks")
+```
+
+(Optional) Let's now configure LangSmith. 
+LangSmith will help us trace, monitor and debug LangChain applications. 
+LangSmith is currently in private beta, you can sign up [here](https://smith.langchain.com/). 
+If you don't have access, you can skip this section
+
+
+```shell
+export LANGCHAIN_TRACING_V2=true
+export LANGCHAIN_API_KEY=<your-api-key>
+export LANGCHAIN_PROJECT=<your-project>  # if not specified, defaults to "default"
+```
+
+If you are inside this directory, then you can spin up a LangServe instance directly by:
+
+```shell
+langchain serve
+```
+
+This will start the FastAPI app with a server is running locally at 
+[http://localhost:8000](http://localhost:8000)
+
+We can see all templates at [http://127.0.0.1:8000/docs](http://127.0.0.1:8000/docs)
+We can access the playground at [http://127.0.0.1:8000/retrieval-agent-fireworks/playground](http://127.0.0.1:8000/retrieval-agent-fireworks/playground)  
+
+We can access the template from code with:
+
+```python
+from langserve.client import RemoteRunnable
+
+runnable = RemoteRunnable("http://localhost:8000/retrieval-agent-fireworks")
+```
--- a/templates/retrieval-agent-fireworks/poetry.lock
+++ b/templates/retrieval-agent-fireworks/poetry.lock
--- a/templates/retrieval-agent-fireworks/pyproject.toml
+++ b/templates/retrieval-agent-fireworks/pyproject.toml
@ -0,0 +1,34 @@
+[tool.poetry]
+name = "retrieval-agent-fireworks"
+version = "0.0.1"
+description = "Retrieval agent for open source models hosted on Fireworks"
+authors = []
+readme = "README.md"
+
+[tool.poetry.dependencies]
+python = ">=3.8.1,<4.0"
+langchain = "^0.1"
+arxiv = "^2.0.0"
+langchain-community = "^0.0.17"
+langchainhub = "^0.1.14"
+fireworks-ai = "^0.11.2"
+
+
+[tool.poetry.group.dev.dependencies]
+langchain-cli = ">=0.0.20"
+fastapi = "^0.104.0"
+sse-starlette = "^1.6.5"
+
+[tool.langserve]
+export_module = "retrieval_agent_fireworks"
+export_attr = "agent_executor"
+
+[tool.templates-hub]
+use-case = "research"
+author = "Docugami"
+integrations = ["HuggingFace"]
+tags = ["local", "agents"]
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
--- a/templates/retrieval-agent-fireworks/retrieval_agent_fireworks/init.py
+++ b/templates/retrieval-agent-fireworks/retrieval_agent_fireworks/init.py
@ -0,0 +1,3 @@
+from retrieval_agent_fireworks.chain import agent_executor
+
+__all__ = ["agent_executor"]
--- a/templates/retrieval-agent-fireworks/retrieval_agent_fireworks/chain.py
+++ b/templates/retrieval-agent-fireworks/retrieval_agent_fireworks/chain.py
@ -0,0 +1,109 @@
+from typing import List
+
+from langchain import hub
+from langchain.agents import AgentExecutor
+from langchain.agents.format_scratchpad import format_log_to_str
+from langchain.agents.output_parsers import ReActJsonSingleInputOutputParser
+from langchain.callbacks.manager import CallbackManagerForRetrieverRun
+from langchain.schema import BaseRetriever, Document
+from langchain.tools.render import render_text_description
+from langchain.tools.retriever import create_retriever_tool
+from langchain_community.chat_models.fireworks import ChatFireworks
+from langchain_community.utilities.arxiv import ArxivAPIWrapper
+from langchain_core.pydantic_v1 import BaseModel
+
+MODEL_ID = "accounts/fireworks/models/mixtral-8x7b-instruct"
+
+
+class ArxivRetriever(BaseRetriever, ArxivAPIWrapper):
+    """`Arxiv` retriever.
+
+    It wraps load() to get_relevant_documents().
+    It uses all ArxivAPIWrapper arguments without any change.
+    """
+
+    get_full_documents: bool = False
+
+    def _get_relevant_documents(
+        self, query: str, *, run_manager: CallbackManagerForRetrieverRun
+    ) -> List[Document]:
+        try:
+            if self.is_arxiv_identifier(query):
+                results = self.arxiv_search(
+                    id_list=query.split(),
+                    max_results=self.top_k_results,
+                ).results()
+            else:
+                results = self.arxiv_search(  # type: ignore
+                    query[: self.ARXIV_MAX_QUERY_LENGTH], max_results=self.top_k_results
+                ).results()
+        except self.arxiv_exceptions as ex:
+            return [Document(page_content=f"Arxiv exception: {ex}")]
+        docs = [
+            Document(
+                page_content=result.summary,
+                metadata={
+                    "Published": result.updated.date(),
+                    "Title": result.title,
+                    "Authors": ", ".join(a.name for a in result.authors),
+                },
+            )
+            for result in results
+        ]
+        return docs
+
+
+# Set up tool(s)
+description = (
+    "A wrapper around Arxiv.org "
+    "Useful for when you need to answer questions about Physics, Mathematics, "
+    "Computer Science, Quantitative Biology, Quantitative Finance, Statistics, "
+    "Electrical Engineering, and Economics "
+    "from scientific articles on arxiv.org. "
+    "Input should be a search query."
+)
+arxiv_tool = create_retriever_tool(ArxivRetriever(), "arxiv", description)
+tools = [arxiv_tool]
+
+# Set up LLM
+llm = ChatFireworks(
+    model=MODEL_ID,
+    model_kwargs={
+        "temperature": 0,
+        "max_tokens": 2048,
+        "top_p": 1,
+    },
+    cache=True,
+)
+
+# setup ReAct style prompt
+prompt = hub.pull("hwchase17/react-json")
+prompt = prompt.partial(
+    tools=render_text_description(tools),
+    tool_names=", ".join([t.name for t in tools]),
+)
+
+# define the agent
+model_with_stop = llm.bind(stop=["\nObservation"])
+agent = (
+    {
+        "input": lambda x: x["input"],
+        "agent_scratchpad": lambda x: format_log_to_str(x["intermediate_steps"]),
+    }
+    | prompt
+    | model_with_stop
+    | ReActJsonSingleInputOutputParser()
+)
+
+
+class InputType(BaseModel):
+    input: str
+
+
+# instantiate AgentExecutor
+agent_executor = AgentExecutor(
+    agent=agent,
+    tools=tools,
+    verbose=True,
+    handle_parsing_errors=True,
+).with_types(input_type=InputType)
--- a/templates/retrieval-agent-fireworks/tests/init.py
+++ b/templates/retrieval-agent-fireworks/tests/init.py