retrievers interface (#1948)

This commit is contained in:
Harrison Chase 2023-03-23 19:00:38 -07:00 committed by GitHub
parent 52d6bf04d0
commit 8990122d5d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 139 additions and 0 deletions

View File

@ -67,6 +67,27 @@ In the below guides, we cover different types of vectorstores and how to use the
vectorstore_examples/* vectorstore_examples/*
Retrievers
------------
The retriever interface is a generic interface that makes it easy to combine documents with
language models. This interface exposes a `get_relevant_texts` method which takes in a query
(a string) and returns a list of documents.
`ChatGPT Plugin Retriever <./retriever_examples/chatgpt-plugin-retriever.html>`_: A walkthrough of how to use the ChatGPT Plugin Retriever within the LangChain framework.
.. toctree::
:maxdepth: 1
:glob:
:caption: Retrievers
:name: retrievers
:hidden:
retriever_examples/*
Chains Chains
------ ------

View File

@ -0,0 +1,88 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "074b0004",
"metadata": {},
"source": [
"# ChatGPT Plugin Retriever\n",
"\n",
"This notebook shows how to use the ChatGPT Retriever Plugin within LangChain.\n",
"\n",
"To set up the ChatGPT Retriever Plugin, please follow instructions [here](https://github.com/openai/chatgpt-retrieval-plugin)."
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "39d6074e",
"metadata": {},
"outputs": [],
"source": [
"from langchain.retrievers import ChatGPTPluginRetriever"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "33fd23d1",
"metadata": {},
"outputs": [],
"source": [
"retriever = ChatGPTPluginRetriever(url=\"http://0.0.0.0:8000\", bearer_token=\"foo\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "16250bdf",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[Document(page_content=\"This is Alice's phone number: 123-456-7890\", lookup_str='', metadata={'id': '456_0', 'metadata': {'source': 'email', 'source_id': '567', 'url': None, 'created_at': '1609592400.0', 'author': 'Alice', 'document_id': '456'}, 'embedding': None, 'score': 0.925571561}, lookup_index=0),\n",
" Document(page_content='This is a document about something', lookup_str='', metadata={'id': '123_0', 'metadata': {'source': 'file', 'source_id': 'https://example.com/doc1', 'url': 'https://example.com/doc1', 'created_at': '1609502400.0', 'author': 'Alice', 'document_id': '123'}, 'embedding': None, 'score': 0.6987589}, lookup_index=0),\n",
" Document(page_content='Team: Angels \"Payroll (millions)\": 154.49 \"Wins\": 89', lookup_str='', metadata={'id': '59c2c0c1-ae3f-4272-a1da-f44a723ea631_0', 'metadata': {'source': None, 'source_id': None, 'url': None, 'created_at': None, 'author': None, 'document_id': '59c2c0c1-ae3f-4272-a1da-f44a723ea631'}, 'embedding': None, 'score': 0.697888613}, lookup_index=0)]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"retriever.get_relevant_texts(\"alice's phone number\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c8b5794b",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.1"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@ -0,0 +1,3 @@
from langchain.retrievers.chatgpt_plugin_retriever import ChatGPTPluginRetriever
__all__ = ["ChatGPTPluginRetriever"]

View File

@ -0,0 +1,27 @@
from typing import List
import requests
from pydantic import BaseModel
from langchain.schema import BaseRetriever, Document
class ChatGPTPluginRetriever(BaseRetriever, BaseModel):
url: str
bearer_token: str
def get_relevant_texts(self, query: str) -> List[Document]:
response = requests.post(
f"{self.url}/query",
json={"queries": [{"query": query}]},
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {self.bearer_token}",
},
)
results = response.json()["results"][0]["results"]
docs = []
for d in results:
content = d.pop("text")
docs.append(Document(page_content=content, metadata=d))
return docs