Add youdotcom retriever (#11304)

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
pull/10662/head^2
mrbean 1 year ago committed by GitHub
parent 1655ff2ded
commit 9903a70379
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -0,0 +1,62 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "47828a7a",
"metadata": {},
"source": [
"## Using the You.com Retriever\n",
"The retriever from You.com is good for retrieving lots of text. We return multiple of the best text snippets per URL we find to be relevant.\n",
"\n",
"First you just need to initialize the retriever"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a90d61d4",
"metadata": {},
"outputs": [],
"source": [
"from langchain.retrievers.you_retriever import YouRetriever\n",
"from langchain.chains import RetrievalQA\n",
"from langchain.llms import OpenAI\n",
"\n",
"yr = YouRetriever()\n",
"qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type=\"map_reduce\", retriever=yr)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4a223f2f",
"metadata": {},
"outputs": [],
"source": [
"query = \"what starting ohio state quarterback most recently went their entire college career without beating Michigan?\"\n",
"qa.run(query)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.17"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

@ -0,0 +1,46 @@
from typing import Any, Dict, List
from langchain.callbacks.manager import CallbackManagerForRetrieverRun
from langchain.pydantic_v1 import root_validator
from langchain.schema import BaseRetriever, Document
from langchain.utils import get_from_dict_or_env
class YouRetriever(BaseRetriever):
"""`You` retriever that uses You.com's search API.
To connect to the You.com api requires an API key which
you can get by emailing api@you.com.
You can check out our docs at https://documentation.you.com.
You need to set the environment variable `YDC_API_KEY` for retriever to operate.
"""
ydc_api_key: str
@root_validator(pre=True)
def validate_client(
cls,
values: Dict[str, Any],
) -> Dict[str, Any]:
values["ydc_api_key"] = get_from_dict_or_env(
values, "ydc_api_key", "YDC_API_KEY"
)
return values
def _get_relevant_documents(
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
) -> List[Document]:
import requests
headers = {"X-API-Key": self.ydc_api_key}
results = requests.get(
f"https://api.ydc-index.io/search?query={query}",
headers=headers,
).json()
docs = []
for hit in results["hits"]:
for snippet in hit["snippets"]:
docs.append(Document(page_content=snippet))
return docs

@ -0,0 +1,16 @@
import os
from langchain.retrievers.you import YouRetriever
class TestYouRetriever:
@classmethod
def setup_class(cls) -> None:
if not os.getenv("YDC_API_KEY"):
raise ValueError("YDC_API_KEY environment variable is not set")
def test_get_relevant_documents(self) -> None:
retriever = YouRetriever()
actual = retriever.get_relevant_documents("test")
assert len(actual) > 0

@ -0,0 +1,26 @@
import json
import os
from unittest import mock
from requests import Response
from langchain.retrievers.you import YouRetriever
from langchain.schema import Document
class TestYouRetriever:
def test_get_relevant_documents(self) -> None:
os.environ["YDC_API_KEY"] = "MOCK KEY!"
retriever = YouRetriever()
with mock.patch("requests.get") as mock_get:
fixture = {"hits": [{"snippets": ["yo"]}, {"snippets": ["bird up"]}]}
response = Response()
response._content = bytes(json.dumps(fixture).encode("utf-8"))
mock_get.return_value = response
actual = retriever.get_relevant_documents("test")
assert actual == [
Document(page_content="yo"),
Document(page_content="bird up"),
]
Loading…
Cancel
Save