From 19912d755eef674e497e923e9e32fa161c065e3a Mon Sep 17 00:00:00 2001 From: Zander Chase <130414180+vowelparrot@users.noreply.github.com> Date: Sun, 30 Apr 2023 18:59:22 -0700 Subject: [PATCH] Vwp/arxiv (#3855) Co-authored-by: Mike Wang <62768671+skcoirz@users.noreply.github.com> --- .../modules/agents/tools/examples/arxiv.ipynb | 116 ++++++++++++++++-- langchain/agents/load_tools.py | 4 + .../integration_tests/utilities/test_arxiv.py | 34 ++++- 3 files changed, 143 insertions(+), 11 deletions(-) diff --git a/docs/modules/agents/tools/examples/arxiv.ipynb b/docs/modules/agents/tools/examples/arxiv.ipynb index 38027d3c..8ea15564 100644 --- a/docs/modules/agents/tools/examples/arxiv.ipynb +++ b/docs/modules/agents/tools/examples/arxiv.ipynb @@ -5,7 +5,7 @@ "id": "245a954a", "metadata": {}, "source": [ - "# Arxiv API\n", + "# ArXiv API Tool\n", "\n", "This notebook goes over how to use the `arxiv` component. \n", "\n", @@ -14,7 +14,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "d5a7209e", "metadata": { "tags": [], @@ -22,7 +22,17 @@ "languageId": "shellscript" } }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: arxiv in /Users/wfh/code/lc/lckg/.venv/lib/python3.11/site-packages (1.4.7)\n", + "Requirement already satisfied: feedparser in /Users/wfh/code/lc/lckg/.venv/lib/python3.11/site-packages (from arxiv) (6.0.10)\n", + "Requirement already satisfied: sgmllib3k in /Users/wfh/code/lc/lckg/.venv/lib/python3.11/site-packages (from feedparser->arxiv) (1.0.0)\n" + ] + } + ], "source": [ "!pip install arxiv" ] @@ -30,6 +40,92 @@ { "cell_type": "code", "execution_count": 2, + "id": "ce1a4827-ce89-4f31-a041-3246743e513a", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from langchain.chat_models import ChatOpenAI\n", + "from langchain.agents import load_tools, initialize_agent, AgentType\n", + "\n", + "llm = ChatOpenAI(temperature=0.0)\n", + "tools = load_tools(\n", + " [\"arxiv\"], \n", + ")\n", + "\n", + "agent_chain = initialize_agent(\n", + " tools,\n", + " llm,\n", + " agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,\n", + " verbose=True,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "ad7dd945-5ae3-49e5-b667-6d86b15050b6", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", + "\u001b[32;1m\u001b[1;3mI need to use Arxiv to search for the paper.\n", + "Action: Arxiv\n", + "Action Input: \"1605.08386\"\u001b[0m\n", + "Observation: \u001b[36;1m\u001b[1;3mPublished: 2016-05-26\n", + "Title: Heat-bath random walks with Markov bases\n", + "Authors: Caprice Stanley, Tobias Windisch\n", + "Summary: Graphs on lattice points are studied whose edges come from a finite set of\n", + "allowed moves of arbitrary length. We show that the diameter of these graphs on\n", + "fibers of a fixed integer matrix can be bounded from above by a constant. We\n", + "then study the mixing behaviour of heat-bath random walks on these graphs. We\n", + "also state explicit conditions on the set of moves so that the heat-bath random\n", + "walk, a generalization of the Glauber dynamics, is an expander in fixed\n", + "dimension.\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3mThe paper is about heat-bath random walks with Markov bases on graphs of lattice points.\n", + "Final Answer: The paper 1605.08386 is about heat-bath random walks with Markov bases on graphs of lattice points.\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "'The paper 1605.08386 is about heat-bath random walks with Markov bases on graphs of lattice points.'" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "agent_chain.run(\n", + " \"What's the paper 1605.08386 about?\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "b4183343-d69a-4be0-9b2c-cc98464a6825", + "metadata": {}, + "source": [ + "## The ArXiv API Wrapper\n", + "\n", + "The tool wraps the API Wrapper. Below, we can explore some of the features it provides." + ] + }, + { + "cell_type": "code", + "execution_count": 4, "id": "8d32b39a", "metadata": { "tags": [] @@ -57,7 +153,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "id": "34bb5968", "metadata": { "tags": [] @@ -69,7 +165,7 @@ "'Published: 2016-05-26\\nTitle: Heat-bath random walks with Markov bases\\nAuthors: Caprice Stanley, Tobias Windisch\\nSummary: Graphs on lattice points are studied whose edges come from a finite set of\\nallowed moves of arbitrary length. We show that the diameter of these graphs on\\nfibers of a fixed integer matrix can be bounded from above by a constant. We\\nthen study the mixing behaviour of heat-bath random walks on these graphs. We\\nalso state explicit conditions on the set of moves so that the heat-bath random\\nwalk, a generalization of the Glauber dynamics, is an expander in fixed\\ndimension.'" ] }, - "execution_count": 4, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -93,7 +189,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "id": "b0867fda-e119-4b19-9ec6-e354fa821db3", "metadata": { "tags": [] @@ -105,7 +201,7 @@ "'Published: 2017-10-10\\nTitle: On Mixing Behavior of a Family of Random Walks Determined by a Linear Recurrence\\nAuthors: Caprice Stanley, Seth Sullivant\\nSummary: We study random walks on the integers mod $G_n$ that are determined by an\\ninteger sequence $\\\\{ G_n \\\\}_{n \\\\geq 1}$ generated by a linear recurrence\\nrelation. Fourier analysis provides explicit formulas to compute the\\neigenvalues of the transition matrices and we use this to bound the mixing time\\nof the random walks.\\n\\nPublished: 2016-05-26\\nTitle: Heat-bath random walks with Markov bases\\nAuthors: Caprice Stanley, Tobias Windisch\\nSummary: Graphs on lattice points are studied whose edges come from a finite set of\\nallowed moves of arbitrary length. We show that the diameter of these graphs on\\nfibers of a fixed integer matrix can be bounded from above by a constant. We\\nthen study the mixing behaviour of heat-bath random walks on these graphs. We\\nalso state explicit conditions on the set of moves so that the heat-bath random\\nwalk, a generalization of the Glauber dynamics, is an expander in fixed\\ndimension.\\n\\nPublished: 2003-03-18\\nTitle: Calculation of fluxes of charged particles and neutrinos from atmospheric showers\\nAuthors: V. Plyaskin\\nSummary: The results on the fluxes of charged particles and neutrinos from a\\n3-dimensional (3D) simulation of atmospheric showers are presented. An\\nagreement of calculated fluxes with data on charged particles from the AMS and\\nCAPRICE detectors is demonstrated. Predictions on neutrino fluxes at different\\nexperimental sites are compared with results from other calculations.'" ] }, - "execution_count": 5, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -125,7 +221,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "id": "3580aeeb-086f-45ba-bcdc-b46f5134b3dd", "metadata": { "tags": [] @@ -137,7 +233,7 @@ "'No good Arxiv Result was found'" ] }, - "execution_count": 6, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -164,7 +260,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.6" + "version": "3.11.2" } }, "nbformat": 4, diff --git a/langchain/agents/load_tools.py b/langchain/agents/load_tools.py index d5255de7..d858e960 100644 --- a/langchain/agents/load_tools.py +++ b/langchain/agents/load_tools.py @@ -257,6 +257,10 @@ _EXTRA_OPTIONAL_TOOLS: Dict[str, Tuple[Callable[[KwArg(Any)], BaseTool], List[st "serpapi": (_get_serpapi, ["serpapi_api_key", "aiosession"]), "searx-search": (_get_searx_search, ["searx_host", "engines", "aiosession"]), "wikipedia": (_get_wikipedia, ["top_k_results", "lang"]), + "arxiv": ( + _get_arxiv, + ["top_k_results", "load_max_docs", "load_all_available_meta"], + ), "human": (_get_human_tool, ["prompt_func", "input_func"]), "awslambda": ( _get_lambda_api, diff --git a/tests/integration_tests/utilities/test_arxiv.py b/tests/integration_tests/utilities/test_arxiv.py index f8dc8f14..d55f6e39 100644 --- a/tests/integration_tests/utilities/test_arxiv.py +++ b/tests/integration_tests/utilities/test_arxiv.py @@ -1,9 +1,11 @@ """Integration test for Arxiv API Wrapper.""" -from typing import List +from typing import Any, List import pytest +from langchain.agents.load_tools import load_tools from langchain.schema import Document +from langchain.tools.base import BaseTool from langchain.utilities import ArxivAPIWrapper @@ -77,3 +79,33 @@ def test_load_returns_full_set_of_metadata() -> None: ) print(doc.metadata) assert len(set(doc.metadata)) > 4 + + +def _load_arxiv_from_universal_entry(**kwargs: Any) -> BaseTool: + tools = load_tools(["arxiv"], **kwargs) + assert len(tools) == 1, "loaded more than 1 tool" + return tools[0] + + +def test_load_arxiv_from_universal_entry() -> None: + arxiv_tool = _load_arxiv_from_universal_entry() + output = arxiv_tool("Caprice Stanley") + assert ( + "On Mixing Behavior of a Family of Random Walks" in output + ), "failed to fetch a valid result" + + +def test_load_arxiv_from_universal_entry_with_params() -> None: + params = { + "top_k_results": 1, + "load_max_docs": 10, + "load_all_available_meta": True, + } + arxiv_tool = _load_arxiv_from_universal_entry(**params) + assert isinstance(arxiv_tool, ArxivAPIWrapper) + wp = arxiv_tool.api_wrapper + assert wp.top_k_results == 1, "failed to assert top_k_results" + assert wp.load_max_docs == 10, "failed to assert load_max_docs" + assert ( + wp.load_all_available_meta is True + ), "failed to assert load_all_available_meta"