retrievers interface (#1948)

2024-11-08 07:10:35 +00:00 · 2023-03-23 19:00:38 -07:00 · 2023-03-23 19:00:38 -07:00 · 8990122d5d
commit 8990122d5d
parent 52d6bf04d0
4 changed files with 139 additions and 0 deletions
--- a/docs/modules/indexes/how_to_guides.rst
+++ b/docs/modules/indexes/how_to_guides.rst
@ -67,6 +67,27 @@ In the below guides, we cover different types of vectorstores and how to use the
   vectorstore_examples/*
 Retrievers
 ------------
 The retriever interface is a generic interface that makes it easy to combine documents with
 language models. This interface exposes a `get_relevant_texts` method which takes in a query
 (a string) and returns a list of documents.
 `ChatGPT Plugin Retriever <./retriever_examples/chatgpt-plugin-retriever.html>`_: A walkthrough of how to use the ChatGPT Plugin Retriever within the LangChain framework.
 .. toctree::
   :maxdepth: 1
   :glob:
   :caption: Retrievers
   :name: retrievers
   :hidden:
   retriever_examples/*
 Chains
 ------
--- a/docs/modules/indexes/retriever_examples/chatgpt-plugin-retriever.ipynb
+++ b/docs/modules/indexes/retriever_examples/chatgpt-plugin-retriever.ipynb
@ -0,0 +1,88 @@
 {
 "cells": [
  {
   "cell_type": "markdown",
   "id": "074b0004",
   "metadata": {},
   "source": [
    "# ChatGPT Plugin Retriever\n",
    "\n",
    "This notebook shows how to use the ChatGPT Retriever Plugin within LangChain.\n",
    "\n",
    "To set up the ChatGPT Retriever Plugin, please follow instructions [here](https://github.com/openai/chatgpt-retrieval-plugin)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "39d6074e",
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain.retrievers import ChatGPTPluginRetriever"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "33fd23d1",
   "metadata": {},
   "outputs": [],
   "source": [
    "retriever = ChatGPTPluginRetriever(url=\"http://0.0.0.0:8000\", bearer_token=\"foo\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "16250bdf",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[Document(page_content=\"This is Alice's phone number: 123-456-7890\", lookup_str='', metadata={'id': '456_0', 'metadata': {'source': 'email', 'source_id': '567', 'url': None, 'created_at': '1609592400.0', 'author': 'Alice', 'document_id': '456'}, 'embedding': None, 'score': 0.925571561}, lookup_index=0),\n",
       " Document(page_content='This is a document about something', lookup_str='', metadata={'id': '123_0', 'metadata': {'source': 'file', 'source_id': 'https://example.com/doc1', 'url': 'https://example.com/doc1', 'created_at': '1609502400.0', 'author': 'Alice', 'document_id': '123'}, 'embedding': None, 'score': 0.6987589}, lookup_index=0),\n",
       " Document(page_content='Team: Angels \"Payroll (millions)\": 154.49 \"Wins\": 89', lookup_str='', metadata={'id': '59c2c0c1-ae3f-4272-a1da-f44a723ea631_0', 'metadata': {'source': None, 'source_id': None, 'url': None, 'created_at': None, 'author': None, 'document_id': '59c2c0c1-ae3f-4272-a1da-f44a723ea631'}, 'embedding': None, 'score': 0.697888613}, lookup_index=0)]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "retriever.get_relevant_texts(\"alice's phone number\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c8b5794b",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
 }
--- a/langchain/retrievers/init.py
+++ b/langchain/retrievers/init.py
@ -0,0 +1,3 @@
 from langchain.retrievers.chatgpt_plugin_retriever import ChatGPTPluginRetriever
 __all__ = ["ChatGPTPluginRetriever"]
--- a/langchain/retrievers/chatgpt_plugin_retriever.py
+++ b/langchain/retrievers/chatgpt_plugin_retriever.py
@ -0,0 +1,27 @@
 from typing import List
 import requests
 from pydantic import BaseModel
 from langchain.schema import BaseRetriever, Document
 class ChatGPTPluginRetriever(BaseRetriever, BaseModel):
    url: str
    bearer_token: str
    def get_relevant_texts(self, query: str) -> List[Document]:
        response = requests.post(
            f"{self.url}/query",
            json={"queries": [{"query": query}]},
            headers={
                "Content-Type": "application/json",
                "Authorization": f"Bearer {self.bearer_token}",
            },
        )
        results = response.json()["results"][0]["results"]
        docs = []
        for d in results:
            content = d.pop("text")
            docs.append(Document(page_content=content, metadata=d))
        return docs
		`@ -0,0 +1,3 @@`
							`from langchain.retrievers.chatgpt_plugin_retriever import ChatGPTPluginRetriever`

							`__all__ = ["ChatGPTPluginRetriever"]`