From d966e4d13a06cbf5b6a1d726825b1ff74ccfb95d Mon Sep 17 00:00:00 2001 From: Daniel Chalef <131175+danielchalef@users.noreply.github.com> Date: Thu, 2 Nov 2023 13:39:17 -0700 Subject: [PATCH] zep: Update Zep docs and messaging (#12764) Update Zep documentation with messaging, more details. @baskaryan, @eyurtsev --- .../docs/integrations/memory/zep_memory.ipynb | 44 ++-- docs/docs/integrations/providers/zep.mdx | 62 ++++- .../retrievers/zep_memorystore.ipynb | 19 +- docs/docs/integrations/vectorstores/zep.ipynb | 211 +++++++++--------- 4 files changed, 194 insertions(+), 142 deletions(-) diff --git a/docs/docs/integrations/memory/zep_memory.ipynb b/docs/docs/integrations/memory/zep_memory.ipynb index 286e848644..a47e37c9be 100644 --- a/docs/docs/integrations/memory/zep_memory.ipynb +++ b/docs/docs/integrations/memory/zep_memory.ipynb @@ -6,32 +6,31 @@ "source": [ "# Zep\n", "\n", - ">[Zep](https://docs.getzep.com/) is a long-term memory store for LLM applications.\n", - ">\n", - ">`Zep` stores, summarizes, embeds, indexes, and enriches conversational AI chat histories, and exposes them via simple, low-latency APIs.\n", + "## Fast, Scalable Building Blocks for LLM Apps\n", + "Zep is an open source platform for productionizing LLM apps. Go from a prototype\n", + "built in LangChain or LlamaIndex, or a custom app, to production in minutes without\n", + "rewriting code.\n", "\n", "Key Features:\n", "\n", - "- **Fast!** Zep’s async extractors operate independently of your chat loop, ensuring a snappy user experience.\n", - "- **Long-term memory persistence**, with access to historical messages irrespective of your summarization strategy.\n", - "- **Auto-summarization** of memory messages based on a configurable message window. A series of summaries are stored, providing flexibility for future summarization strategies.\n", - "- **Hybrid search** over memories and metadata, with messages automatically embedded upon creation.\n", - "- **Entity Extractor** that automatically extracts named entities from messages and stores them in the message metadata.\n", - "- **Auto-token counting** of memories and summaries, allowing finer-grained control over prompt assembly.\n", - "- Python and JavaScript SDKs.\n", + "- **Fast!** Zep operates independently of the your chat loop, ensuring a snappy user experience.\n", + "- **Chat History Memory, Archival, and Enrichment**, populate your prompts with relevant chat history, sumamries, named entities, intent data, and more.\n", + "- **Vector Search over Chat History and Documents** Automatic embedding of documents, chat histories, and summaries. Use Zep's similarity or native MMR Re-ranked search to find the most relevant.\n", + "- **Manage Users and their Chat Sessions** Users and their Chat Sessions are first-class citizens in Zep, allowing you to manage user interactions with your bots or agents easily.\n", + "- **Records Retention and Privacy Compliance** Comply with corporate and regulatory mandates for records retention while ensuring compliance with privacy regulations such as CCPA and GDPR. Fulfill *Right To Be Forgotten* requests with a single API call\n", "\n", - "`Zep` project: [https://github.com/getzep/zep](https://github.com/getzep/zep)\n", + "Zep project: [https://github.com/getzep/zep](https://github.com/getzep/zep)\n", "Docs: [https://docs.getzep.com/](https://docs.getzep.com/)\n", "\n", "\n", "## Example\n", "\n", - "This notebook demonstrates how to use the [Zep Long-term Memory Store](https://docs.getzep.com/) as memory for your chatbot.\n", + "This notebook demonstrates how to use [Zep](https://www.getzep.com/) as memory for your chatbot.\n", "REACT Agent Chat Message History with Zep - A long-term memory store for LLM applications.\n", "\n", "We'll demonstrate:\n", "\n", - "1. Adding conversation history to the Zep memory store.\n", + "1. Adding conversation history to Zep.\n", "2. Running an agent and having message automatically added to the store.\n", "3. Viewing the enriched messages.\n", "4. Vector search over the conversation history." @@ -39,7 +38,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2023-07-09T19:20:49.003167Z", @@ -65,7 +64,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2023-07-09T19:23:14.378234Z", @@ -119,7 +118,10 @@ " Tool(\n", " name=\"Search\",\n", " func=search.run,\n", - " description=\"useful for when you need to search online for answers. You should ask targeted questions\",\n", + " description=(\n", + " \"useful for when you need to search online for answers. You should ask\"\n", + " \" targeted questions\"\n", + " ),\n", " ),\n", "]\n", "\n", @@ -223,9 +225,11 @@ "\n", "for msg in test_history:\n", " memory.chat_memory.add_message(\n", - " HumanMessage(content=msg[\"content\"])\n", - " if msg[\"role\"] == \"human\"\n", - " else AIMessage(content=msg[\"content\"]),\n", + " (\n", + " HumanMessage(content=msg[\"content\"])\n", + " if msg[\"role\"] == \"human\"\n", + " else AIMessage(content=msg[\"content\"])\n", + " ),\n", " metadata=msg.get(\"metadata\", {}),\n", " )" ] @@ -415,7 +419,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.11.6" } }, "nbformat": 4, diff --git a/docs/docs/integrations/providers/zep.mdx b/docs/docs/integrations/providers/zep.mdx index 9c224d40cd..914180e080 100644 --- a/docs/docs/integrations/providers/zep.mdx +++ b/docs/docs/integrations/providers/zep.mdx @@ -1,28 +1,72 @@ # Zep ->[Zep](https://docs.getzep.com/) - A long-term memory store for LLM applications. +## [Fast, Scalable Building Blocks for LLM Apps](http://www.getzep.com) +Zep is an open source platform for productionizing LLM apps. Go from a prototype +built in LangChain or LlamaIndex, or a custom app, to production in minutes without +rewriting code. ->`Zep` stores, summarizes, embeds, indexes, and enriches conversational AI chat histories, and exposes them via simple, low-latency APIs. ->- Long-term memory persistence, with access to historical messages irrespective of your summarization strategy. ->- Auto-summarization of memory messages based on a configurable message window. A series of summaries are stored, providing flexibility for future summarization strategies. ->- Vector search over memories, with messages automatically embedded on creation. ->- Auto-token counting of memories and summaries, allowing finer-grained control over prompt assembly. ->- Python and JavaScript SDKs. +Key Features: +- **Fast!** Zep operates independently of the your chat loop, ensuring a snappy user experience. +- **Chat History Memory, Archival, and Enrichment**, populate your prompts with relevant chat history, sumamries, named entities, intent data, and more. +- **Vector Search over Chat History and Documents** Automatic embedding of documents, chat histories, and summaries. Use Zep's similarity or native MMR Re-ranked search to find the most relevant. +- **Manage Users and their Chat Sessions** Users and their Chat Sessions are first-class citizens in Zep, allowing you to manage user interactions with your bots or agents easily. +- **Records Retention and Privacy Compliance** Comply with corporate and regulatory mandates for records retention while ensuring compliance with privacy regulations such as CCPA and GDPR. Fulfill *Right To Be Forgotten* requests with a single API call -`Zep` [project](https://github.com/getzep/zep) +Zep project: [https://github.com/getzep/zep](https://github.com/getzep/zep) +Docs: [https://docs.getzep.com/](https://docs.getzep.com/) ## Installation and Setup +1. Install the Zep service. See the [Zep Quick Start Guide](https://docs.getzep.com/deployment/quickstart/). + +2. Install the Zep Python SDK: + ```bash pip install zep_python ``` +## Zep Memory + +Zep's [Memory API](https://docs.getzep.com/sdk/chat_history/) persists your app's chat history and metadata to a Session, enriches the memory, automatically generates summaries, and enables vector similarity search over historical chat messages and summaries. + +There are two approaches to populating your prompt with chat history: + +1. Retrieve the most recent N messages (and potentionally a summary) from a Session and use them to construct your prompt. +2. Search over the Session's chat history for messages that are relevant and use them to construct your prompt. + +Both of these approaches may be useful, with the first providing the LLM with context as to the most recent interactions with a human. The second approach enables you to look back further in the chat history and retrieve messages that are relevant to the current conversation in a token-efficient manner. + +```python +from langchain.memory import ZepMemory +``` + +See a [RAG App Example here](/docs/docs/integrations/memory/zep_memory). -## Retriever +## Memory Retriever + +Zep's Memory Retriever is a LangChain Retriever that enables you to retrieve messages from a Zep Session and use them to construct your prompt. + +The Retriever supports searching over both individual messages and summaries of conversations. The latter is useful for providing rich, but succinct context to the LLM as to relevant past conversations. + +Zep's Memory Retriever supports both similarity search and [Maximum Marginal Relevance (MMR) reranking](https://docs.getzep.com/sdk/search_query/). MMR search is useful for ensuring that the retrieved messages are diverse and not too similar to each other See a [usage example](/docs/integrations/retrievers/zep_memorystore). ```python from langchain.retrievers import ZepRetriever ``` + +## Zep VectorStore + +Zep's [Document VectorStore API](https://docs.getzep.com/sdk/documents/) enables you to store and retrieve documents using vector similarity search. Zep doesn't require you to understand +distance functions, types of embeddings, or indexing best practices. You just pass in your chunked documents, and Zep handles the rest. + +Zep supports both similarity search and [Maximum Marginal Relevance (MMR) reranking](https://docs.getzep.com/sdk/search_query/). +MMR search is useful for ensuring that the retrieved documents are diverse and not too similar to each other. + +```python +from langchain.vectorstores.zep import ZepVectorStore +``` + +See a [usage example](/docs/integrations/vectorstores/zep). \ No newline at end of file diff --git a/docs/docs/integrations/retrievers/zep_memorystore.ipynb b/docs/docs/integrations/retrievers/zep_memorystore.ipynb index 2f91fd2b0f..d4c1f5f46c 100644 --- a/docs/docs/integrations/retrievers/zep_memorystore.ipynb +++ b/docs/docs/integrations/retrievers/zep_memorystore.ipynb @@ -6,23 +6,20 @@ "metadata": {}, "source": [ "# Zep\n", - "## Retriever Example for [Zep](https://docs.getzep.com/) - Fast, scalable building blocks for LLM Apps\n", - "\n", - "### More on Zep:\n", + "## Retriever Example for [Zep](https://docs.getzep.com/)\n", "\n", + "### Fast, Scalable Building Blocks for LLM Apps\n", "Zep is an open source platform for productionizing LLM apps. Go from a prototype\n", "built in LangChain or LlamaIndex, or a custom app, to production in minutes without\n", "rewriting code.\n", "\n", "Key Features:\n", "\n", - "- **Fast!** Zep’s async extractors operate independently of the your chat loop, ensuring a snappy user experience.\n", - "- **Long-term memory persistence**, with access to historical messages irrespective of your summarization strategy.\n", - "- **Auto-summarization** of memory messages based on a configurable message window. A series of summaries are stored, providing flexibility for future summarization strategies.\n", - "- **Hybrid search** over memories and metadata, with messages automatically embedded on creation.\n", - "- **Entity Extractor** that automatically extracts named entities from messages and stores them in the message metadata.\n", - "- **Auto-token counting** of memories and summaries, allowing finer-grained control over prompt assembly.\n", - "- Python and JavaScript SDKs.\n", + "- **Fast!** Zep operates independently of the your chat loop, ensuring a snappy user experience.\n", + "- **Chat History Memory, Archival, and Enrichment**, populate your prompts with relevant chat history, sumamries, named entities, intent data, and more.\n", + "- **Vector Search over Chat History and Documents** Automatic embedding of documents, chat histories, and summaries. Use Zep's similarity or native MMR Re-ranked search to find the most relevant.\n", + "- **Manage Users and their Chat Sessions** Users and their Chat Sessions are first-class citizens in Zep, allowing you to manage user interactions with your bots or agents easily.\n", + "- **Records Retention and Privacy Compliance** Comply with corporate and regulatory mandates for records retention while ensuring compliance with privacy regulations such as CCPA and GDPR. Fulfill *Right To Be Forgotten* requests with a single API call\n", "\n", "Zep project: [https://github.com/getzep/zep](https://github.com/getzep/zep)\n", "Docs: [https://docs.getzep.com/](https://docs.getzep.com/)\n" @@ -416,7 +413,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.5" + "version": "3.11.6" } }, "nbformat": 4, diff --git a/docs/docs/integrations/vectorstores/zep.ipynb b/docs/docs/integrations/vectorstores/zep.ipynb index e66cb4825a..cc969cb3ad 100644 --- a/docs/docs/integrations/vectorstores/zep.ipynb +++ b/docs/docs/integrations/vectorstores/zep.ipynb @@ -2,21 +2,30 @@ "cells": [ { "cell_type": "markdown", + "id": "9eb8dfa6fdb71ef5", + "metadata": { + "collapsed": false + }, "source": [ "# Zep\n", "\n", - "Zep is an open-source long-term memory store for LLM applications. Zep makes it easy to add relevant documents,\n", - "chat history memory & rich user data to your LLM app's prompts.\n", + "## Fast, Scalable Building Blocks for LLM Apps\n", + "Zep is an open source platform for productionizing LLM apps. Go from a prototype\n", + "built in LangChain or LlamaIndex, or a custom app, to production in minutes without\n", + "rewriting code.\n", + "\n", + "Key Features:\n", + "\n", + "- **Fast!** Zep operates independently of the your chat loop, ensuring a snappy user experience.\n", + "- **Chat History Memory, Archival, and Enrichment**, populate your prompts with relevant chat history, sumamries, named entities, intent data, and more.\n", + "- **Vector Search over Chat History and Documents** Automatic embedding of documents, chat histories, and summaries. Use Zep's similarity or native MMR Re-ranked search to find the most relevant.\n", + "- **Manage Users and their Chat Sessions** Users and their Chat Sessions are first-class citizens in Zep, allowing you to manage user interactions with your bots or agents easily.\n", + "- **Records Retention and Privacy Compliance** Comply with corporate and regulatory mandates for records retention while ensuring compliance with privacy regulations such as CCPA and GDPR. Fulfill *Right To Be Forgotten* requests with a single API call\n", "\n", "**Note:** The `ZepVectorStore` works with `Documents` and is intended to be used as a `Retriever`.\n", "It offers separate functionality to Zep's `ZepMemory` class, which is designed for persisting, enriching\n", "and searching your user's chat history.\n", "\n", - "## Why Zep's VectorStore? 🤖🚀\n", - "Zep automatically embeds documents added to the Zep Vector Store using low-latency models local to the Zep server.\n", - "The Zep client also offers async interfaces for all document operations. These two together with Zep's chat memory\n", - " functionality make Zep ideal for building conversational LLM apps where latency and performance are important.\n", - "\n", "## Installation\n", "Follow the [Zep Quickstart Guide](https://docs.getzep.com/deployment/quickstart/) to install and get started with Zep.\n", "\n", @@ -33,25 +42,29 @@ "- If you pass in an `Embeddings` instance Zep will use this to embed documents rather than auto-embed them.\n", "You must also set your document collection to `isAutoEmbedded === false`. \n", "- If you set your collection to `isAutoEmbedded === false`, you must pass in an `Embeddings` instance." - ], - "metadata": { - "collapsed": false - }, - "id": "9eb8dfa6fdb71ef5" + ] }, { "cell_type": "markdown", - "source": [ - "## Load or create a Collection from documents" - ], + "id": "9a3a11aab1412d98", "metadata": { "collapsed": false }, - "id": "9a3a11aab1412d98" + "source": [ + "## Load or create a Collection from documents" + ] }, { "cell_type": "code", "execution_count": 1, + "id": "519418421a32e4d", + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-13T01:07:50.672390Z", + "start_time": "2023-08-13T01:07:48.777799Z" + }, + "collapsed": false + }, "outputs": [], "source": [ "from uuid import uuid4\n", @@ -92,19 +105,19 @@ " api_url=ZEP_API_URL,\n", " api_key=ZEP_API_KEY,\n", ")" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-08-13T01:07:50.672390Z", - "start_time": "2023-08-13T01:07:48.777799Z" - } - }, - "id": "519418421a32e4d" + ] }, { "cell_type": "code", "execution_count": 2, + "id": "201dc57b124cb6d7", + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-13T01:07:53.807663Z", + "start_time": "2023-08-13T01:07:50.671241Z" + }, + "collapsed": false + }, "outputs": [ { "name": "stdout", @@ -138,29 +151,29 @@ "\n", "\n", "await wait_for_ready(collection_name)" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-08-13T01:07:53.807663Z", - "start_time": "2023-08-13T01:07:50.671241Z" - } - }, - "id": "201dc57b124cb6d7" + ] }, { "cell_type": "markdown", - "source": [ - "## Simarility Search Query over the Collection" - ], + "id": "94ca9dfa7d0ecaa5", "metadata": { "collapsed": false }, - "id": "94ca9dfa7d0ecaa5" + "source": [ + "## Simarility Search Query over the Collection" + ] }, { "cell_type": "code", "execution_count": 3, + "id": "1998de0a96fe89c3", + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-13T01:07:54.195988Z", + "start_time": "2023-08-13T01:07:53.808550Z" + }, + "collapsed": false + }, "outputs": [ { "name": "stdout", @@ -204,29 +217,29 @@ "# print results\n", "for d, s in docs_scores:\n", " print(d.page_content, \" -> \", s, \"\\n====\\n\")" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-08-13T01:07:54.195988Z", - "start_time": "2023-08-13T01:07:53.808550Z" - } - }, - "id": "1998de0a96fe89c3" + ] }, { "cell_type": "markdown", - "source": [ - "## Search over Collection Re-ranked by MMR" - ], + "id": "e02b61a9af0b2c80", "metadata": { "collapsed": false }, - "id": "e02b61a9af0b2c80" + "source": [ + "## Search over Collection Re-ranked by MMR" + ] }, { "cell_type": "code", "execution_count": 4, + "id": "488112da752b1d58", + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-13T01:07:54.394873Z", + "start_time": "2023-08-13T01:07:54.180901Z" + }, + "collapsed": false + }, "outputs": [ { "name": "stdout", @@ -268,31 +281,31 @@ "\n", "for d in docs:\n", " print(d.page_content, \"\\n====\\n\")" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-08-13T01:07:54.394873Z", - "start_time": "2023-08-13T01:07:54.180901Z" - } - }, - "id": "488112da752b1d58" + ] }, { "cell_type": "markdown", + "id": "42455e31d4ab0d68", + "metadata": { + "collapsed": false + }, "source": [ "# Filter by Metadata\n", "\n", "Use a metadata filter to narrow down results. First, load another book: \"Adventures of Sherlock Holmes\"" - ], - "metadata": { - "collapsed": false - }, - "id": "42455e31d4ab0d68" + ] }, { "cell_type": "code", "execution_count": 5, + "id": "146c8a96201c0ab9", + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-13T01:08:06.323569Z", + "start_time": "2023-08-13T01:07:54.381822Z" + }, + "collapsed": false + }, "outputs": [ { "name": "stdout", @@ -325,29 +338,29 @@ "await vs.aadd_documents(docs)\n", "\n", "await wait_for_ready(collection_name)" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-08-13T01:08:06.323569Z", - "start_time": "2023-08-13T01:07:54.381822Z" - } - }, - "id": "146c8a96201c0ab9" + ] }, { "cell_type": "markdown", - "source": [ - "### We see results from both books. Note the `source` metadata" - ], + "id": "5b225f3ae1e61de8", "metadata": { "collapsed": false }, - "id": "5b225f3ae1e61de8" + "source": [ + "### We see results from both books. Note the `source` metadata" + ] }, { "cell_type": "code", "execution_count": 6, + "id": "53700a9cd817cde4", + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-13T01:08:06.504769Z", + "start_time": "2023-08-13T01:08:06.325435Z" + }, + "collapsed": false + }, "outputs": [ { "name": "stdout", @@ -389,29 +402,29 @@ "\n", "for d in docs:\n", " print(d.page_content, \" -> \", d.metadata, \"\\n====\\n\")" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-08-13T01:08:06.504769Z", - "start_time": "2023-08-13T01:08:06.325435Z" - } - }, - "id": "53700a9cd817cde4" + ] }, { "cell_type": "markdown", - "source": [ - "### Let's try again using a filter for only the Sherlock Holmes document." - ], + "id": "7b81d7cae351a1ec", "metadata": { "collapsed": false }, - "id": "7b81d7cae351a1ec" + "source": [ + "### Let's try again using a filter for only the Sherlock Holmes document." + ] }, { "cell_type": "code", "execution_count": 7, + "id": "8f1bdcba03979d22", + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-13T01:08:06.672836Z", + "start_time": "2023-08-13T01:08:06.505944Z" + }, + "collapsed": false + }, "outputs": [ { "name": "stdout", @@ -455,7 +468,9 @@ "source": [ "filter = {\n", " \"where\": {\n", - " \"jsonpath\": \"$[*] ? (@.source == 'https://www.gutenberg.org/files/48320/48320-0.txt')\"\n", + " \"jsonpath\": (\n", + " \"$[*] ? (@.source == 'https://www.gutenberg.org/files/48320/48320-0.txt')\"\n", + " )\n", " },\n", "}\n", "\n", @@ -463,15 +478,7 @@ "\n", "for d in docs:\n", " print(d.page_content, \" -> \", d.metadata, \"\\n====\\n\")" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-08-13T01:08:06.672836Z", - "start_time": "2023-08-13T01:08:06.505944Z" - } - }, - "id": "8f1bdcba03979d22" + ] } ], "metadata": { @@ -490,7 +497,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", - "version": "2.7.6" + "version": "3.11.6" } }, "nbformat": 4,