Add podcast api tool to use NLP to search all podcasts or episodes. (#1833)

Use the following code to test: ```python import os from langchain.llms import OpenAI from langchain.chains.api import podcast_docs from langchain.chains import APIChain # Get api key here: https://openai.com/pricing os.environ["OPENAI_API_KEY"] = "sk-xxxxx" # Get api key here: https://www.listennotes.com/api/pricing/ listen_api_key = 'xxx' llm = OpenAI(temperature=0) headers = {"X-ListenAPI-Key": listen_api_key} chain = APIChain.from_llm_and_api_docs(llm, podcast_docs.PODCAST_DOCS, headers=headers, verbose=True) chain.run("Search for 'silicon valley bank' podcast episodes, audio length is more than 30 minutes, return only 1 results") ``` Known issues: the api response data might be too big, and we'll get such error: `openai.error.InvalidRequestError: This model's maximum context length is 4097 tokens, however you requested 6733 tokens (6477 in your prompt; 256 for the completion). Please reduce your prompt; or completion length.`
2024-11-06 03:20:49 +00:00 · 2023-03-20 22:04:17 -07:00 · 2023-03-20 22:04:17 -07:00 · a7e09d46c5
commit a7e09d46c5
parent fa2e546b76
3 changed files with 51 additions and 1 deletions
--- a/docs/modules/agents/tools.md
+++ b/docs/modules/agents/tools.md
@ -145,3 +145,10 @@ Below is a list of all supported tools and relevant information:
 - Requires LLM: No
 - Extra Parameters: `top_k_results`

+**podcast-api**
+
+- Tool Name: Podcast API
+- Tool Description: Use the Listen Notes Podcast API to search all podcasts or episodes. The input should be a question in natural language that this API can answer.
+- Notes: A natural language connection to the Listen Notes Podcast API (`https://www.PodcastAPI.com`), specifically the `/search/` endpoint.
+- Requires LLM: Yes
+- Extra Parameters: `listen_api_key` (your api key to access this endpoint)
--- a/langchain/agents/load_tools.py
+++ b/langchain/agents/load_tools.py
@ -4,7 +4,7 @@ from typing import Any, List, Optional

 from langchain.agents.tools import Tool
 from langchain.callbacks.base import BaseCallbackManager
-from langchain.chains.api import news_docs, open_meteo_docs, tmdb_docs
+from langchain.chains.api import news_docs, open_meteo_docs, tmdb_docs, podcast_docs
 from langchain.chains.api.base import APIChain
 from langchain.chains.llm_math.base import LLMMathChain
 from langchain.chains.pal.base import PALChain
@ -118,6 +118,20 @@ def _get_tmdb_api(llm: BaseLLM, **kwargs: Any) -> BaseTool:
    )


+def _get_podcast_api(llm: BaseLLM, **kwargs: Any) -> BaseTool:
+    listen_api_key = kwargs["listen_api_key"]
+    chain = APIChain.from_llm_and_api_docs(
+        llm,
+        podcast_docs.PODCAST_DOCS,
+        headers={"X-ListenAPI-Key": listen_api_key},
+    )
+    return Tool(
+        name="Podcast API",
+        description="Use the Listen Notes Podcast API to search all podcasts or episodes. The input should be a question in natural language that this API can answer.",
+        func=chain.run,
+    )
+
+
 def _get_wolfram_alpha(**kwargs: Any) -> BaseTool:
    return WolframAlphaQueryRun(api_wrapper=WolframAlphaAPIWrapper(**kwargs))

@ -166,6 +180,7 @@ def _get_bing_search(**kwargs: Any) -> BaseTool:
 _EXTRA_LLM_TOOLS = {
    "news-api": (_get_news_api, ["news_api_key"]),
    "tmdb-api": (_get_tmdb_api, ["tmdb_bearer_token"]),
+    "podcast-api": (_get_podcast_api, ["listen_api_key"]),
 }

 _EXTRA_OPTIONAL_TOOLS = {
--- a/langchain/chains/api/podcast_docs.py
+++ b/langchain/chains/api/podcast_docs.py
@ -0,0 +1,28 @@
+# flake8: noqa
+PODCAST_DOCS = """API documentation:
+Endpoint: https://listen-api.listennotes.com/api/v2
+GET /search
+
+This API is for searching podcasts or episodes.
+
+Query parameters table:
+q | string | Search term, e.g., person, place, topic... You can use double quotes to do verbatim match, e.g., "game of thrones". Otherwise, it's fuzzy search. | required
+type | string | What type of contents do you want to search for? Available values: episode, podcast, curated. default: episode | optional
+page_size | integer | The maximum number of search results per page. A valid value should be an integer between 1 and 10 (inclusive). default: 3 | optional
+language | string | Limit search results to a specific language, e.g., English, Chinese ... If not specified, it'll be any language. It works only when type is episode or podcast. | optional
+region | string | Limit search results to a specific region (e.g., us, gb, in...). If not specified, it'll be any region. It works only when type is episode or podcast. | optional
+len_min | integer | Minimum audio length in minutes. Applicable only when type parameter is episode or podcast. If type parameter is episode, it's for audio length of an episode. If type parameter is podcast, it's for average audio length of all episodes in a podcast. | optional
+len_max | integer | Maximum audio length in minutes. Applicable only when type parameter is episode or podcast. If type parameter is episode, it's for audio length of an episode. If type parameter is podcast, it's for average audio length of all episodes in a podcast. | optional
+
+Response schema (JSON object):
+next_offset | integer | optional
+total | integer | optional
+results | array[object] (Episode / Podcast List Result Object)
+
+Each object in the "results" key has the following schema:
+listennotes_url | string | optional
+id | integer | optional
+title_highlighted | string | optional
+
+Use page_size: 3
+"""