diff --git a/docs/modules/indexes/how_to_guides.rst b/docs/modules/indexes/how_to_guides.rst index 20b53930..bd81adbc 100644 --- a/docs/modules/indexes/how_to_guides.rst +++ b/docs/modules/indexes/how_to_guides.rst @@ -67,6 +67,27 @@ In the below guides, we cover different types of vectorstores and how to use the vectorstore_examples/* +Retrievers +------------ + + +The retriever interface is a generic interface that makes it easy to combine documents with +language models. This interface exposes a `get_relevant_texts` method which takes in a query +(a string) and returns a list of documents. + +`ChatGPT Plugin Retriever <./retriever_examples/chatgpt-plugin-retriever.html>`_: A walkthrough of how to use the ChatGPT Plugin Retriever within the LangChain framework. + + +.. toctree:: + :maxdepth: 1 + :glob: + :caption: Retrievers + :name: retrievers + :hidden: + + retriever_examples/* + + Chains ------ diff --git a/docs/modules/indexes/retriever_examples/chatgpt-plugin-retriever.ipynb b/docs/modules/indexes/retriever_examples/chatgpt-plugin-retriever.ipynb new file mode 100644 index 00000000..ffcd1669 --- /dev/null +++ b/docs/modules/indexes/retriever_examples/chatgpt-plugin-retriever.ipynb @@ -0,0 +1,88 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "074b0004", + "metadata": {}, + "source": [ + "# ChatGPT Plugin Retriever\n", + "\n", + "This notebook shows how to use the ChatGPT Retriever Plugin within LangChain.\n", + "\n", + "To set up the ChatGPT Retriever Plugin, please follow instructions [here](https://github.com/openai/chatgpt-retrieval-plugin)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "39d6074e", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.retrievers import ChatGPTPluginRetriever" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "33fd23d1", + "metadata": {}, + "outputs": [], + "source": [ + "retriever = ChatGPTPluginRetriever(url=\"http://0.0.0.0:8000\", bearer_token=\"foo\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "16250bdf", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(page_content=\"This is Alice's phone number: 123-456-7890\", lookup_str='', metadata={'id': '456_0', 'metadata': {'source': 'email', 'source_id': '567', 'url': None, 'created_at': '1609592400.0', 'author': 'Alice', 'document_id': '456'}, 'embedding': None, 'score': 0.925571561}, lookup_index=0),\n", + " Document(page_content='This is a document about something', lookup_str='', metadata={'id': '123_0', 'metadata': {'source': 'file', 'source_id': 'https://example.com/doc1', 'url': 'https://example.com/doc1', 'created_at': '1609502400.0', 'author': 'Alice', 'document_id': '123'}, 'embedding': None, 'score': 0.6987589}, lookup_index=0),\n", + " Document(page_content='Team: Angels \"Payroll (millions)\": 154.49 \"Wins\": 89', lookup_str='', metadata={'id': '59c2c0c1-ae3f-4272-a1da-f44a723ea631_0', 'metadata': {'source': None, 'source_id': None, 'url': None, 'created_at': None, 'author': None, 'document_id': '59c2c0c1-ae3f-4272-a1da-f44a723ea631'}, 'embedding': None, 'score': 0.697888613}, lookup_index=0)]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "retriever.get_relevant_texts(\"alice's phone number\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c8b5794b", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/langchain/retrievers/__init__.py b/langchain/retrievers/__init__.py new file mode 100644 index 00000000..5007aef3 --- /dev/null +++ b/langchain/retrievers/__init__.py @@ -0,0 +1,3 @@ +from langchain.retrievers.chatgpt_plugin_retriever import ChatGPTPluginRetriever + +__all__ = ["ChatGPTPluginRetriever"] diff --git a/langchain/retrievers/chatgpt_plugin_retriever.py b/langchain/retrievers/chatgpt_plugin_retriever.py new file mode 100644 index 00000000..11205895 --- /dev/null +++ b/langchain/retrievers/chatgpt_plugin_retriever.py @@ -0,0 +1,27 @@ +from typing import List + +import requests +from pydantic import BaseModel + +from langchain.schema import BaseRetriever, Document + + +class ChatGPTPluginRetriever(BaseRetriever, BaseModel): + url: str + bearer_token: str + + def get_relevant_texts(self, query: str) -> List[Document]: + response = requests.post( + f"{self.url}/query", + json={"queries": [{"query": query}]}, + headers={ + "Content-Type": "application/json", + "Authorization": f"Bearer {self.bearer_token}", + }, + ) + results = response.json()["results"][0]["results"] + docs = [] + for d in results: + content = d.pop("text") + docs.append(Document(page_content=content, metadata=d)) + return docs