From 6ec5780547779abd24666ca5da5b917ef9fa0ab1 Mon Sep 17 00:00:00 2001 From: Harrison Chase Date: Fri, 24 Mar 2023 08:24:33 -0700 Subject: [PATCH] add docs for openai retriever ingest (#1969) --- .../chatgpt-plugin-retriever.ipynb | 66 ++++++++++++++++++- 1 file changed, 63 insertions(+), 3 deletions(-) diff --git a/docs/modules/indexes/retriever_examples/chatgpt-plugin-retriever.ipynb b/docs/modules/indexes/retriever_examples/chatgpt-plugin-retriever.ipynb index 9b324b15..79228035 100644 --- a/docs/modules/indexes/retriever_examples/chatgpt-plugin-retriever.ipynb +++ b/docs/modules/indexes/retriever_examples/chatgpt-plugin-retriever.ipynb @@ -2,14 +2,74 @@ "cells": [ { "cell_type": "markdown", - "id": "074b0004", + "id": "1edb9e6b", "metadata": {}, "source": [ "# ChatGPT Plugin Retriever\n", "\n", - "This notebook shows how to use the ChatGPT Retriever Plugin within LangChain.\n", + "This notebook shows how to use the ChatGPT Retriever Plugin within LangChain." + ] + }, + { + "cell_type": "markdown", + "id": "074b0004", + "metadata": {}, + "source": [ + "## Create\n", + "\n", + "First, let's go over how to create the ChatGPT Retriever Plugin.\n", + "\n", + "To set up the ChatGPT Retriever Plugin, please follow instructions [here](https://github.com/openai/chatgpt-retrieval-plugin).\n", + "\n", + "You can also create the ChatGPT Retriever Plugin from LangChain document loaders. The below code walks through how to do that." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "bbe89ca0", + "metadata": {}, + "outputs": [], + "source": [ + "# STEP 1: Load\n", + "\n", + "# Load documents using LangChain's DocumentLoaders\n", + "# This is from https://langchain.readthedocs.io/en/latest/modules/document_loaders/examples/csv.html\n", + "\n", + "from langchain.document_loaders.csv_loader import CSVLoader\n", + "loader = CSVLoader(file_path='../../document_loaders/examples/example_data/mlb_teams_2012.csv')\n", + "data = loader.load()\n", + "\n", + "\n", + "# STEP 2: Convert\n", + "\n", + "# Convert Document to format expected by https://github.com/openai/chatgpt-retrieval-plugin\n", + "from typing import List\n", + "from langchain.docstore.document import Document\n", + "import json\n", + "\n", + "def write_json(path: str, documents: List[Document])-> None:\n", + " results = [{\"text\": doc.page_content} for doc in documents]\n", + " with open(path, \"w\") as f:\n", + " json.dump(results, f, indent=2)\n", + "\n", + "write_json(\"foo.json\", data)\n", + "\n", + "# STEP 3: Use\n", + "\n", + "# Ingest this as you would any other json file in https://github.com/openai/chatgpt-retrieval-plugin/tree/main/scripts/process_json\n" + ] + }, + { + "cell_type": "markdown", + "id": "0474661d", + "metadata": {}, + "source": [ + "## Using the ChatGPT Retriever Plugin\n", + "\n", + "Okay, so we've created the ChatGPT Retriever Plugin, but how do we actually use it?\n", "\n", - "To set up the ChatGPT Retriever Plugin, please follow instructions [here](https://github.com/openai/chatgpt-retrieval-plugin)." + "The below code walks through how to do that." ] }, {