Harrison/pubmed integration (#5664)

Co-authored-by: younis basher <71520361+younis-ba@users.noreply.github.com> Co-authored-by: Younis Bashir <younis@omicmd.com>
12 months ago · ad09367a92
parent 9921f8cc3a
commit ad09367a92
14 changed files with 578 additions and 8 deletions
--- a/docs/modules/agents/tools/examples/pubmed.ipynb
+++ b/docs/modules/agents/tools/examples/pubmed.ipynb
@ -0,0 +1,86 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "64f20f38",
+   "metadata": {},
+   "source": [
+    "# PubMed Tool\n",
+    "\n",
+    "This notebook goes over how to use PubMed as a tool\n",
+    "\n",
+    "PubMed® comprises more than 35 million citations for biomedical literature from MEDLINE, life science journals, and online books. Citations may include links to full text content from PubMed Central and publisher web sites."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "c80b9273",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.tools import PubmedQueryRun"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "f203c965",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tool = PubmedQueryRun()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "baee7a2a",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'Published: <Year>2023</Year><Month>May</Month><Day>31</Day>\\nTitle: Dermatology in the wake of an AI revolution: who gets a say?\\nSummary: \\n\\nPublished: <Year>2023</Year><Month>May</Month><Day>30</Day>\\nTitle: What is ChatGPT and what do we do with it? Implications of the age of AI for nursing and midwifery practice and education: An editorial.\\nSummary: \\n\\nPublished: <Year>2023</Year><Month>Jun</Month><Day>02</Day>\\nTitle: The Impact of ChatGPT on the Nursing Profession: Revolutionizing Patient Care and Education.\\nSummary: The nursing field has undergone notable changes over time and is projected to undergo further modifications in the future, owing to the advent of sophisticated technologies and growing healthcare needs. The advent of ChatGPT, an AI-powered language model, is expected to exert a significant influence on the nursing profession, specifically in the domains of patient care and instruction. The present article delves into the ramifications of ChatGPT within the nursing domain and accentuates its capacity and constraints to transform the discipline.'"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "tool.run(\"chatgpt\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "965903ba",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/docs/modules/indexes/retrievers/examples/pubmed.ipynb
+++ b/docs/modules/indexes/retrievers/examples/pubmed.ipynb
@ -0,0 +1,80 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "3df0dcf8",
+   "metadata": {},
+   "source": [
+    "# PubMed Retriever\n",
+    "\n",
+    "This notebook goes over how to use PubMed as a retriever\n",
+    "\n",
+    "PubMed® comprises more than 35 million citations for biomedical literature from MEDLINE, life science journals, and online books. Citations may include links to full text content from PubMed Central and publisher web sites."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "aecaff63",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.retrievers import PubMedRetriever"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "f2f7e8d3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "retriever = PubMedRetriever()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "ed115aa1",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[Document(page_content='', metadata={'uid': '37268021', 'title': 'Dermatology in the wake of an AI revolution: who gets a say?', 'pub_date': '<Year>2023</Year><Month>May</Month><Day>31</Day>'}),\n",
+       " Document(page_content='', metadata={'uid': '37267643', 'title': 'What is ChatGPT and what do we do with it? Implications of the age of AI for nursing and midwifery practice and education: An editorial.', 'pub_date': '<Year>2023</Year><Month>May</Month><Day>30</Day>'}),\n",
+       " Document(page_content='The nursing field has undergone notable changes over time and is projected to undergo further modifications in the future, owing to the advent of sophisticated technologies and growing healthcare needs. The advent of ChatGPT, an AI-powered language model, is expected to exert a significant influence on the nursing profession, specifically in the domains of patient care and instruction. The present article delves into the ramifications of ChatGPT within the nursing domain and accentuates its capacity and constraints to transform the discipline.', metadata={'uid': '37266721', 'title': 'The Impact of ChatGPT on the Nursing Profession: Revolutionizing Patient Care and Education.', 'pub_date': '<Year>2023</Year><Month>Jun</Month><Day>02</Day>'})]"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "retriever.get_relevant_documents(\"chatgpt\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/docs/modules/indexes/retrievers/examples/wikipedia.ipynb
+++ b/docs/modules/indexes/retrievers/examples/wikipedia.ipynb
@ -1,7 +1,6 @@
 {
 "cells": [
  {
-   "attachments": {},
   "cell_type": "markdown",
   "id": "9fc6205b",
   "metadata": {},
@ -14,7 +13,6 @@
   ]
  },
  {
-   "attachments": {},
   "cell_type": "markdown",
   "id": "51489529-5dcd-4b86-bda6-de0a39d8ffd1",
   "metadata": {},
@ -23,7 +21,6 @@
   ]
  },
  {
-   "attachments": {},
   "cell_type": "markdown",
   "id": "1435c804-069d-4ade-9a7b-006b97b767c1",
   "metadata": {},
@ -44,7 +41,6 @@
   ]
  },
  {
-   "attachments": {},
   "cell_type": "markdown",
   "id": "6c15470b-a16b-4e0d-bc6a-6998bafbb5a4",
   "metadata": {},
@ -58,7 +54,6 @@
   ]
  },
  {
-   "attachments": {},
   "cell_type": "markdown",
   "id": "ae3c3d16",
   "metadata": {},
@ -67,7 +62,6 @@
   ]
  },
  {
-   "attachments": {},
   "cell_type": "markdown",
   "id": "6fafb73b-d6ec-4822-b161-edf0aaf5224a",
   "metadata": {},
@ -151,7 +145,6 @@
   ]
  },
  {
-   "attachments": {},
   "cell_type": "markdown",
   "id": "2670363b-3806-4c7e-b14d-90a4d5d2a200",
   "metadata": {},
@ -273,7 +266,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.6"
+   "version": "3.9.1"
  }
 },
 "nbformat": 4,
--- a/langchain/agents/load_tools.py
+++ b/langchain/agents/load_tools.py
@ -14,6 +14,7 @@ from langchain.chains.llm_math.base import LLMMathChain
 from langchain.chains.pal.base import PALChain
 from langchain.requests import TextRequestsWrapper
 from langchain.tools.arxiv.tool import ArxivQueryRun
+from langchain.tools.pubmed.tool import PubmedQueryRun
 from langchain.tools.base import BaseTool
 from langchain.tools.bing_search.tool import BingSearchRun
 from langchain.tools.ddg_search.tool import DuckDuckGoSearchRun
@ -37,6 +38,7 @@ from langchain.tools.wikipedia.tool import WikipediaQueryRun
 from langchain.tools.wolfram_alpha.tool import WolframAlphaQueryRun
 from langchain.tools.openweathermap.tool import OpenWeatherMapQueryRun
 from langchain.utilities import ArxivAPIWrapper
+from langchain.utilities import PubMedAPIWrapper
 from langchain.utilities.bing_search import BingSearchAPIWrapper
 from langchain.utilities.duckduckgo_search import DuckDuckGoSearchAPIWrapper
 from langchain.utilities.google_search import GoogleSearchAPIWrapper
@ -198,6 +200,10 @@ def _get_arxiv(**kwargs: Any) -> BaseTool:
    return ArxivQueryRun(api_wrapper=ArxivAPIWrapper(**kwargs))


+def _get_pupmed(**kwargs: Any) -> BaseTool:
+    return PubmedQueryRun(api_wrapper=PubMedAPIWrapper(**kwargs))
+
+
 def _get_google_serper(**kwargs: Any) -> BaseTool:
    return GoogleSerperRun(api_wrapper=GoogleSerperAPIWrapper(**kwargs))

@ -302,6 +308,10 @@ _EXTRA_OPTIONAL_TOOLS: Dict[str, Tuple[Callable[[KwArg(Any)], BaseTool], List[st
        _get_arxiv,
        ["top_k_results", "load_max_docs", "load_all_available_meta"],
    ),
+    "pupmed": (
+        _get_pupmed,
+        ["top_k_results", "load_max_docs", "load_all_available_meta"],
+    ),
    "human": (_get_human_tool, ["prompt_func", "input_func"]),
    "awslambda": (
        _get_lambda_api,
--- a/langchain/retrievers/init.py
+++ b/langchain/retrievers/init.py
@ -7,6 +7,7 @@ from langchain.retrievers.elastic_search_bm25 import ElasticSearchBM25Retriever
 from langchain.retrievers.knn import KNNRetriever
 from langchain.retrievers.metal import MetalRetriever
 from langchain.retrievers.pinecone_hybrid_search import PineconeHybridSearchRetriever
+from langchain.retrievers.pupmed import PubMedRetriever
 from langchain.retrievers.remote_retriever import RemoteLangChainRetriever
 from langchain.retrievers.self_query.base import SelfQueryRetriever
 from langchain.retrievers.svm import SVMRetriever
@ -21,6 +22,7 @@ from langchain.retrievers.zep import ZepRetriever

 __all__ = [
    "ArxivRetriever",
+    "PubMedRetriever",
    "AzureCognitiveSearchRetriever",
    "ChatGPTPluginRetriever",
    "ContextualCompressionRetriever",
--- a/langchain/retrievers/pupmed.py
+++ b/langchain/retrievers/pupmed.py
@ -0,0 +1,18 @@
+from typing import List
+
+from langchain.schema import BaseRetriever, Document
+from langchain.utilities.pupmed import PubMedAPIWrapper
+
+
+class PubMedRetriever(BaseRetriever, PubMedAPIWrapper):
+    """
+    It is effectively a wrapper for PubMedAPIWrapper.
+    It wraps load() to get_relevant_documents().
+    It uses all PubMedAPIWrapper arguments without any change.
+    """
+
+    def get_relevant_documents(self, query: str) -> List[Document]:
+        return self.load_docs(query=query)
+
+    async def aget_relevant_documents(self, query: str) -> List[Document]:
+        raise NotImplementedError
--- a/langchain/tools/init.py
+++ b/langchain/tools/init.py
@ -48,6 +48,7 @@ from langchain.tools.powerbi.tool import (
    ListPowerBITool,
    QueryPowerBITool,
 )
+from langchain.tools.pubmed.tool import PubmedQueryRun
 from langchain.tools.scenexplain.tool import SceneXplainTool
 from langchain.tools.shell.tool import ShellTool
 from langchain.tools.steamship_image_generation import SteamshipImageGenerationTool
@ -120,4 +121,5 @@ __all__ = [
    "tool",
    "YouTubeSearchTool",
    "BraveSearch",
+    "PubmedQueryRun",
 ]
--- a/langchain/tools/pubmed/init.py
+++ b/langchain/tools/pubmed/init.py
@ -0,0 +1 @@
+"""PubMed API toolkit."""
--- a/langchain/tools/pubmed/tool.py
+++ b/langchain/tools/pubmed/tool.py
@ -0,0 +1,43 @@
+"""Tool for the Pubmed API."""
+
+from typing import Optional
+
+from pydantic import Field
+
+from langchain.callbacks.manager import (
+    AsyncCallbackManagerForToolRun,
+    CallbackManagerForToolRun,
+)
+from langchain.tools.base import BaseTool
+from langchain.utilities.pupmed import PubMedAPIWrapper
+
+
+class PubmedQueryRun(BaseTool):
+    """Tool that adds the capability to search using the PubMed API."""
+
+    name = "PubMed"
+    description = (
+        "A wrapper around PubMed.org "
+        "Useful for when you need to answer questions about Physics, Mathematics, "
+        "Computer Science, Quantitative Biology, Quantitative Finance, Statistics, "
+        "Electrical Engineering, and Economics "
+        "from scientific articles on PubMed.org. "
+        "Input should be a search query."
+    )
+    api_wrapper: PubMedAPIWrapper = Field(default_factory=PubMedAPIWrapper)
+
+    def _run(
+        self,
+        query: str,
+        run_manager: Optional[CallbackManagerForToolRun] = None,
+    ) -> str:
+        """Use the Arxiv tool."""
+        return self.api_wrapper.run(query)
+
+    async def _arun(
+        self,
+        query: str,
+        run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
+    ) -> str:
+        """Use the PubMed tool asynchronously."""
+        raise NotImplementedError("PubMedAPIWrapper does not support async")
--- a/langchain/utilities/init.py
+++ b/langchain/utilities/init.py
@ -13,6 +13,7 @@ from langchain.utilities.graphql import GraphQLAPIWrapper
 from langchain.utilities.metaphor_search import MetaphorSearchAPIWrapper
 from langchain.utilities.openweathermap import OpenWeatherMapAPIWrapper
 from langchain.utilities.powerbi import PowerBIDataset
+from langchain.utilities.pupmed import PubMedAPIWrapper
 from langchain.utilities.python import PythonREPL
 from langchain.utilities.searx_search import SearxSearchWrapper
 from langchain.utilities.serpapi import SerpAPIWrapper
@ -24,6 +25,7 @@ from langchain.utilities.wolfram_alpha import WolframAlphaAPIWrapper
 __all__ = [
    "ApifyWrapper",
    "ArxivAPIWrapper",
+    "PubMedAPIWrapper",
    "BashProcess",
    "BingSearchAPIWrapper",
    "DuckDuckGoSearchAPIWrapper",
--- a/langchain/utilities/pupmed.py
+++ b/langchain/utilities/pupmed.py
@ -0,0 +1,171 @@
+import json
+import logging
+import time
+import urllib.error
+import urllib.request
+from typing import List
+
+from pydantic import BaseModel, Extra
+
+from langchain.schema import Document
+
+logger = logging.getLogger(__name__)
+
+
+class PubMedAPIWrapper(BaseModel):
+    """
+    Wrapper around PubMed API.
+
+    This wrapper will use the PubMed API to conduct searches and fetch
+    document summaries. By default, it will return the document summaries
+    of the top-k results of an input search.
+
+    Parameters:
+        top_k_results: number of the top-scored document used for the PubMed tool
+        load_max_docs: a limit to the number of loaded documents
+        load_all_available_meta:
+          if True: the `metadata` of the loaded Documents gets all available meta info
+            (see https://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.ESearch)
+          if False: the `metadata` gets only the most informative fields.
+    """
+
+    base_url_esearch = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?"
+    base_url_efetch = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?"
+    max_retry = 5
+    sleep_time = 0.2
+
+    # Default values for the parameters
+    top_k_results: int = 3
+    load_max_docs: int = 25
+    ARXIV_MAX_QUERY_LENGTH = 300
+    doc_content_chars_max: int = 2000
+    load_all_available_meta: bool = False
+    email: str = "your_email@example.com"
+
+    class Config:
+        """Configuration for this pydantic object."""
+
+        extra = Extra.forbid
+
+    def run(self, query: str) -> str:
+        """
+        Run PubMed search and get the article meta information.
+        See https://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.ESearch
+        It uses only the most informative fields of article meta information.
+        """
+
+        try:
+            # Retrieve the top-k results for the query
+            docs = [
+                f"Published: {result['pub_date']}\nTitle: {result['title']}\n"
+                f"Summary: {result['summary']}"
+                for result in self.load(query[: self.ARXIV_MAX_QUERY_LENGTH])
+            ]
+
+            # Join the results and limit the character count
+            return (
+                "\n\n".join(docs)[: self.doc_content_chars_max]
+                if docs
+                else "No good PubMed Result was found"
+            )
+        except Exception as ex:
+            return f"PubMed exception: {ex}"
+
+    def load(self, query: str) -> List[dict]:
+        """
+        Search PubMed for documents matching the query.
+        Return a list of dictionaries containing the document metadata.
+        """
+
+        url = (
+            self.base_url_esearch
+            + "db=pubmed&term="
+            + str({urllib.parse.quote(query)})
+            + f"&retmode=json&retmax={self.top_k_results}&usehistory=y"
+        )
+        result = urllib.request.urlopen(url)
+        text = result.read().decode("utf-8")
+        json_text = json.loads(text)
+
+        articles = []
+        webenv = json_text["esearchresult"]["webenv"]
+        for uid in json_text["esearchresult"]["idlist"]:
+            article = self.retrieve_article(uid, webenv)
+            articles.append(article)
+
+        # Convert the list of articles to a JSON string
+        return articles
+
+    def _transform_doc(self, doc: dict) -> Document:
+        summary = doc.pop("summary")
+        return Document(page_content=summary, metadata=doc)
+
+    def load_docs(self, query: str) -> List[Document]:
+        document_dicts = self.load(query=query)
+        return [self._transform_doc(d) for d in document_dicts]
+
+    def retrieve_article(self, uid: str, webenv: str) -> dict:
+        url = (
+            self.base_url_efetch
+            + "db=pubmed&retmode=xml&id="
+            + uid
+            + "&webenv="
+            + webenv
+        )
+
+        retry = 0
+        while True:
+            try:
+                result = urllib.request.urlopen(url)
+                break
+            except urllib.error.HTTPError as e:
+                if e.code == 429 and retry < self.max_retry:
+                    # Too Many Requests error
+                    # wait for an exponentially increasing amount of time
+                    print(
+                        f"Too Many Requests, "
+                        f"waiting for {self.sleep_time:.2f} seconds..."
+                    )
+                    time.sleep(self.sleep_time)
+                    self.sleep_time *= 2
+                    retry += 1
+                else:
+                    raise e
+
+        xml_text = result.read().decode("utf-8")
+
+        # Get title
+        title = ""
+        if "<ArticleTitle>" in xml_text and "</ArticleTitle>" in xml_text:
+            start_tag = "<ArticleTitle>"
+            end_tag = "</ArticleTitle>"
+            title = xml_text[
+                xml_text.index(start_tag) + len(start_tag) : xml_text.index(end_tag)
+            ]
+
+        # Get abstract
+        abstract = ""
+        if "<AbstractText>" in xml_text and "</AbstractText>" in xml_text:
+            start_tag = "<AbstractText>"
+            end_tag = "</AbstractText>"
+            abstract = xml_text[
+                xml_text.index(start_tag) + len(start_tag) : xml_text.index(end_tag)
+            ]
+
+        # Get publication date
+        pub_date = ""
+        if "<PubDate>" in xml_text and "</PubDate>" in xml_text:
+            start_tag = "<PubDate>"
+            end_tag = "</PubDate>"
+            pub_date = xml_text[
+                xml_text.index(start_tag) + len(start_tag) : xml_text.index(end_tag)
+            ]
+
+        # Return article as dictionary
+        article = {
+            "uid": uid,
+            "title": title,
+            "summary": abstract,
+            "pub_date": pub_date,
+        }
+        return article
--- a/tests/integration_tests/retrievers/test_pupmed.py
+++ b/tests/integration_tests/retrievers/test_pupmed.py
@ -0,0 +1,50 @@
+"""Integration test for PubMed API Wrapper."""
+from typing import List
+
+import pytest
+
+from langchain.retrievers import PubMedRetriever
+from langchain.schema import Document
+
+
+@pytest.fixture
+def retriever() -> PubMedRetriever:
+    return PubMedRetriever()
+
+
+def assert_docs(docs: List[Document], all_meta: bool = False) -> None:
+    for doc in docs:
+        assert doc.page_content
+        assert doc.metadata
+        main_meta = {"Published", "Title", "Authors", "Summary"}
+        assert set(doc.metadata).issuperset(main_meta)
+        if all_meta:
+            assert len(set(doc.metadata)) > len(main_meta)
+        else:
+            assert len(set(doc.metadata)) == len(main_meta)
+
+
+def test_load_success(retriever: PubMedRetriever) -> None:
+    docs = retriever.get_relevant_documents(query="1605.08386")
+    assert len(docs) == 1
+    assert_docs(docs, all_meta=False)
+
+
+def test_load_success_all_meta(retriever: PubMedRetriever) -> None:
+    retriever.load_all_available_meta = True
+    retriever.load_max_docs = 2
+    docs = retriever.get_relevant_documents(query="ChatGPT")
+    assert len(docs) > 1
+    assert_docs(docs, all_meta=True)
+
+
+def test_load_success_init_args() -> None:
+    retriever = PubMedRetriever(load_max_docs=1, load_all_available_meta=True)
+    docs = retriever.get_relevant_documents(query="ChatGPT")
+    assert len(docs) == 1
+    assert_docs(docs, all_meta=True)
+
+
+def test_load_no_result(retriever: PubMedRetriever) -> None:
+    docs = retriever.get_relevant_documents("1605.08386WWW")
+    assert not docs
--- a/tests/integration_tests/utilities/test_pupmed.py
+++ b/tests/integration_tests/utilities/test_pupmed.py
@ -0,0 +1,111 @@
+"""Integration test for PubMed API Wrapper."""
+from typing import Any, List
+
+import pytest
+
+from langchain.agents.load_tools import load_tools
+from langchain.schema import Document
+from langchain.tools.base import BaseTool
+from langchain.utilities import PubMedAPIWrapper
+
+
+@pytest.fixture
+def api_client() -> PubMedAPIWrapper:
+    return PubMedAPIWrapper()
+
+
+def test_run_success(api_client: PubMedAPIWrapper) -> None:
+    """Test that returns the correct answer"""
+
+    output = api_client.run("1605.08386")
+    assert "Heat-bath random walks with Markov bases" in output
+
+
+def test_run_returns_several_docs(api_client: PubMedAPIWrapper) -> None:
+    """Test that returns several docs"""
+
+    output = api_client.run("Caprice Stanley")
+    assert "On Mixing Behavior of a Family of Random Walks" in output
+
+
+def test_run_returns_no_result(api_client: PubMedAPIWrapper) -> None:
+    """Test that gives no result."""
+
+    output = api_client.run("1605.08386WWW")
+    assert "No good PubMed Result was found" == output
+
+
+def assert_docs(docs: List[Document]) -> None:
+    for doc in docs:
+        assert doc.page_content
+        assert doc.metadata
+        assert set(doc.metadata) == {"Published", "Title", "Authors", "Summary"}
+
+
+def test_load_success(api_client: PubMedAPIWrapper) -> None:
+    """Test that returns one document"""
+
+    docs = api_client.load_docs("1605.08386")
+    assert len(docs) == 1
+    assert_docs(docs)
+
+
+def test_load_returns_no_result(api_client: PubMedAPIWrapper) -> None:
+    """Test that returns no docs"""
+
+    docs = api_client.load("1605.08386WWW")
+    assert len(docs) == 0
+
+
+def test_load_returns_limited_docs() -> None:
+    """Test that returns several docs"""
+    expected_docs = 2
+    api_client = PubMedAPIWrapper(load_max_docs=expected_docs)
+    docs = api_client.load_docs("ChatGPT")
+    assert len(docs) == expected_docs
+    assert_docs(docs)
+
+
+def test_load_returns_full_set_of_metadata() -> None:
+    """Test that returns several docs"""
+    api_client = PubMedAPIWrapper(load_max_docs=1, load_all_available_meta=True)
+    docs = api_client.load_docs("ChatGPT")
+    assert len(docs) == 1
+    for doc in docs:
+        assert doc.page_content
+        assert doc.metadata
+        assert set(doc.metadata).issuperset(
+            {"Published", "Title", "Authors", "Summary"}
+        )
+        print(doc.metadata)
+        assert len(set(doc.metadata)) > 4
+
+
+def _load_pubmed_from_universal_entry(**kwargs: Any) -> BaseTool:
+    tools = load_tools(["pupmed"], **kwargs)
+    assert len(tools) == 1, "loaded more than 1 tool"
+    return tools[0]
+
+
+def test_load_pupmed_from_universal_entry() -> None:
+    pupmed_tool = _load_pubmed_from_universal_entry()
+    output = pupmed_tool("Caprice Stanley")
+    assert (
+        "On Mixing Behavior of a Family of Random Walks" in output
+    ), "failed to fetch a valid result"
+
+
+def test_load_pupmed_from_universal_entry_with_params() -> None:
+    params = {
+        "top_k_results": 1,
+        "load_max_docs": 10,
+        "load_all_available_meta": True,
+    }
+    pupmed_tool = _load_pubmed_from_universal_entry(**params)
+    assert isinstance(pupmed_tool, PubMedAPIWrapper)
+    wp = pupmed_tool.api_wrapper
+    assert wp.top_k_results == 1, "failed to assert top_k_results"
+    assert wp.load_max_docs == 10, "failed to assert load_max_docs"
+    assert (
+        wp.load_all_available_meta is True
+    ), "failed to assert load_all_available_meta"
--- a/tests/unit_tests/tools/test_public_api.py
+++ b/tests/unit_tests/tools/test_public_api.py
@ -61,6 +61,7 @@ _EXPECTED = [
    "tool",
    "YouTubeSearchTool",
    "BraveSearch",
+    "PubmedQueryRun",
 ]