From 7203c97e8fd1af1da9006e5a36b37afc496e159b Mon Sep 17 00:00:00 2001 From: Bagatur <22008038+baskaryan@users.noreply.github.com> Date: Fri, 8 Sep 2023 16:43:16 -0700 Subject: [PATCH] Add redis self-query support (#10199) --- .../integrations/vectorstores/redis.ipynb | 54 +- .../self_query/redis_self_query.ipynb | 472 ++++++++++++++++++ .../langchain/retrievers/self_query/base.py | 23 +- .../langchain/retrievers/self_query/redis.py | 102 ++++ .../langchain/vectorstores/redis/filters.py | 54 +- .../langchain/vectorstores/redis/schema.py | 30 +- .../retrievers/self_query/test_redis.py | 122 +++++ 7 files changed, 785 insertions(+), 72 deletions(-) create mode 100644 docs/extras/modules/data_connection/retrievers/self_query/redis_self_query.ipynb create mode 100644 libs/langchain/langchain/retrievers/self_query/redis.py create mode 100644 libs/langchain/tests/unit_tests/retrievers/self_query/test_redis.py diff --git a/docs/extras/integrations/vectorstores/redis.ipynb b/docs/extras/integrations/vectorstores/redis.ipynb index f729be1599..4b13672fc5 100644 --- a/docs/extras/integrations/vectorstores/redis.ipynb +++ b/docs/extras/integrations/vectorstores/redis.ipynb @@ -10,9 +10,9 @@ "\n", "## What is Redis?\n", "\n", - "Most developers from a web services background are probably familiar with Redis. At it's core, Redis is an open-source key-value store that can be used as a cache, message broker, and database. Developers choice Redis because it is fast, has a large ecosystem of client libraries, and has been deployed by major enterprises for years.\n", + "Most developers from a web services background are probably familiar with Redis. At it's core, Redis is an open-source key-value store that can be used as a cache, message broker, and database. Developers choose Redis because it is fast, has a large ecosystem of client libraries, and has been deployed by major enterprises for years.\n", "\n", - "In addition to the traditional uses of Redis. Redis also provides capabilities built directly into Redis. These capabilities include the Search and Query capability that allows users to create secondary index structures within Redis. This allows Redis to be a Vector Database, at the speed of a cache. \n", + "On top of these traditional use cases, Redis provides additional capabilities like the Search and Query capability that allows users to create secondary index structures within Redis. This allows Redis to be a Vector Database, at the speed of a cache. \n", "\n", "\n", "## Redis as a Vector Database\n", @@ -123,7 +123,7 @@ "source": [ "## Install Redis Python Client\n", "\n", - "Redis-py is the officially supported client by Redis. Recently released is the RedisVL client which is purpose built for the Vector Database use cases. Both can be installed with pip." + "Redis-py is the officially supported client by Redis. Recently released is the RedisVL client which is purpose-built for the Vector Database use cases. Both can be installed with pip." ] }, { @@ -153,9 +153,17 @@ "import os\n", "import getpass\n", "\n", - "os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")\n", - "\n", + "os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ "from langchain.embeddings import OpenAIEmbeddings\n", + "\n", "embeddings = OpenAIEmbeddings()" ] }, @@ -215,6 +223,12 @@ "source": [ "## Initializing Redis\n", "\n", + "To locally deploy Redis, run:\n", + "```console\n", + "docker run -d -p 6379:6379 -p 8001:8001 redis/redis-stack:latest\n", + "```\n", + "If things are running correctly you should see a nice Redis UI at http://localhost:8001. See the [Deployment Options](#deployment-options) section above for other ways to deploy.\n", + "\n", "The Redis VectorStore instance can be initialized in a number of ways. There are multiple class methods that can be used to initialize a Redis VectorStore instance.\n", "\n", "- ``Redis.__init__`` - Initialize directly\n", @@ -223,7 +237,7 @@ "- ``Redis.from_texts_return_keys`` - Initialize from a list of texts (optionally with metadata) and return the keys\n", "- ``Redis.from_existing_index`` - Initialize from an existing Redis index\n", "\n", - "Below we will use the ``Redis.from_documents`` method." + "Below we will use the ``Redis.from_texts`` method." ] }, { @@ -234,28 +248,12 @@ }, "outputs": [], "source": [ - "from langchain.vectorstores.redis import Redis" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "If you're not interested in the keys of your entries you can also create your redis instance from the documents." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "from langchain.docstore.document import Document\n", + "from langchain.vectorstores.redis import Redis\n", "\n", - "documents = [Document(page_content=t, metadata=m) for t, m in zip(texts, metadata)]\n", - "rds = Redis.from_documents(\n", - " documents,\n", + "rds = Redis.from_texts(\n", + " texts,\n", " embeddings,\n", + " metadatas=metadats,\n", " redis_url=\"redis://localhost:6379\",\n", " index_name=\"users\"\n", ")" @@ -454,7 +452,7 @@ "results = rds.similarity_search(\"foo\", k=3)\n", "meta = results[1].metadata\n", "print(\"Key of the document in Redis: \", meta.pop(\"id\"))\n", - "print(\"Metadata of the document: \", meta)\n" + "print(\"Metadata of the document: \", meta)" ] }, { @@ -1229,7 +1227,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.13" + "version": "3.11.3" } }, "nbformat": 4, diff --git a/docs/extras/modules/data_connection/retrievers/self_query/redis_self_query.ipynb b/docs/extras/modules/data_connection/retrievers/self_query/redis_self_query.ipynb new file mode 100644 index 0000000000..d74ea2dd68 --- /dev/null +++ b/docs/extras/modules/data_connection/retrievers/self_query/redis_self_query.ipynb @@ -0,0 +1,472 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "13afcae7", + "metadata": {}, + "source": [ + "# Redis self-querying \n", + "\n", + ">[Redis](https://redis.com) is an open-source key-value store that can be used as a cache, message broker, database, vector database and more.\n", + "\n", + "In the notebook we'll demo the `SelfQueryRetriever` wrapped around a Redis vector store. " + ] + }, + { + "cell_type": "markdown", + "id": "68e75fb9", + "metadata": {}, + "source": [ + "## Creating a Redis vector store\n", + "First we'll want to create a Redis vector store and seed it with some data. We've created a small demo set of documents that contain summaries of movies.\n", + "\n", + "**Note:** The self-query retriever requires you to have `lark` installed (`pip install lark`) along with integration-specific requirements." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "63a8af5b", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# !pip install redis redisvl openai tiktoken lark" + ] + }, + { + "cell_type": "markdown", + "id": "83811610-7df3-4ede-b268-68a6a83ba9e2", + "metadata": {}, + "source": [ + "We want to use `OpenAIEmbeddings` so we have to get the OpenAI API Key." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "dd01b61b-7d32-4a55-85d6-b2d2d4f18840", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import os\n", + "import getpass\n", + "\n", + "os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "cb4a5787", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from langchain.schema import Document\n", + "from langchain.embeddings.openai import OpenAIEmbeddings\n", + "from langchain.vectorstores import Redis\n", + "\n", + "embeddings = OpenAIEmbeddings()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "bcbe04d9", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "docs = [\n", + " Document(\n", + " page_content=\"A bunch of scientists bring back dinosaurs and mayhem breaks loose\",\n", + " metadata={\"year\": 1993, \"rating\": 7.7, \"director\": \"Steven Spielberg\", \"genre\": \"science fiction\"},\n", + " ),\n", + " Document(\n", + " page_content=\"Leo DiCaprio gets lost in a dream within a dream within a dream within a ...\",\n", + " metadata={\"year\": 2010, \"director\": \"Christopher Nolan\", \"genre\": \"science fiction\", \"rating\": 8.2},\n", + " ),\n", + " Document(\n", + " page_content=\"A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea\",\n", + " metadata={\"year\": 2006, \"director\": \"Satoshi Kon\", \"genre\": \"science fiction\", \"rating\": 8.6},\n", + " ),\n", + " Document(\n", + " page_content=\"A bunch of normal-sized women are supremely wholesome and some men pine after them\",\n", + " metadata={\"year\": 2019, \"director\": \"Greta Gerwig\", \"genre\": \"drama\", \"rating\": 8.3},\n", + " ),\n", + " Document(\n", + " page_content=\"Toys come alive and have a blast doing so\",\n", + " metadata={\"year\": 1995, \"director\": \"John Lasseter\", \"genre\": \"animated\", \"rating\": 9.1,},\n", + " ),\n", + " Document(\n", + " page_content=\"Three men walk into the Zone, three men walk out of the Zone\",\n", + " metadata={\n", + " \"year\": 1979,\n", + " \"rating\": 9.9,\n", + " \"director\": \"Andrei Tarkovsky\",\n", + " \"genre\": \"science fiction\",\n", + " },\n", + " ),\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "393aff3b", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "`index_schema` does not match generated metadata schema.\n", + "If you meant to manually override the schema, please ignore this message.\n", + "index_schema: {'tag': [{'name': 'genre'}], 'text': [{'name': 'director'}], 'numeric': [{'name': 'year'}, {'name': 'rating'}]}\n", + "generated_schema: {'text': [{'name': 'director'}, {'name': 'genre'}], 'numeric': [{'name': 'year'}, {'name': 'rating'}], 'tag': []}\n", + "\n" + ] + } + ], + "source": [ + "index_schema = {\n", + " \"tag\": [{\"name\": \"genre\"}],\n", + " \"text\": [{\"name\": \"director\"}],\n", + " \"numeric\": [{\"name\": \"year\"}, {\"name\": \"rating\"}],\n", + "}\n", + "\n", + "vectorstore = Redis.from_documents(\n", + " docs, \n", + " embeddings, \n", + " redis_url=\"redis://localhost:6379\",\n", + " index_name=\"movie_reviews\",\n", + " index_schema=index_schema,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "5ecaab6d", + "metadata": {}, + "source": [ + "## Creating our self-querying retriever\n", + "Now we can instantiate our retriever. To do this we'll need to provide some information upfront about the metadata fields that our documents support and a short description of the document contents." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "86e34dbf", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from langchain.llms import OpenAI\n", + "from langchain.retrievers.self_query.base import SelfQueryRetriever\n", + "from langchain.chains.query_constructor.base import AttributeInfo\n", + "\n", + "metadata_field_info = [\n", + " AttributeInfo(\n", + " name=\"genre\",\n", + " description=\"The genre of the movie\",\n", + " type=\"string or list[string]\",\n", + " ),\n", + " AttributeInfo(\n", + " name=\"year\",\n", + " description=\"The year the movie was released\",\n", + " type=\"integer\",\n", + " ),\n", + " AttributeInfo(\n", + " name=\"director\",\n", + " description=\"The name of the movie director\",\n", + " type=\"string\",\n", + " ),\n", + " AttributeInfo(\n", + " name=\"rating\", description=\"A 1-10 rating for the movie\", type=\"float\"\n", + " ),\n", + "]\n", + "document_content_description = \"Brief summary of a movie\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "ea1126cb", + "metadata": {}, + "outputs": [], + "source": [ + "llm = OpenAI(temperature=0)\n", + "retriever = SelfQueryRetriever.from_llm(\n", + " llm, \n", + " vectorstore, \n", + " document_content_description, \n", + " metadata_field_info, \n", + " verbose=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "ea9df8d4", + "metadata": {}, + "source": [ + "## Testing it out\n", + "And now we can try actually using our retriever!" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "38a126e9", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/bagatur/langchain/libs/langchain/langchain/chains/llm.py:278: UserWarning: The predict_and_parse method is deprecated, instead pass an output parser directly to LLMChain.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "query='dinosaur' filter=None limit=None\n" + ] + }, + { + "data": { + "text/plain": [ + "[Document(page_content='A bunch of scientists bring back dinosaurs and mayhem breaks loose', metadata={'id': 'doc:movie_reviews:7b5481d753bc4135851b66fa61def7fb', 'director': 'Steven Spielberg', 'genre': 'science fiction', 'year': '1993', 'rating': '7.7'}),\n", + " Document(page_content='Toys come alive and have a blast doing so', metadata={'id': 'doc:movie_reviews:9e4e84daa0374941a6aa4274e9bbb607', 'director': 'John Lasseter', 'genre': 'animated', 'year': '1995', 'rating': '9.1'}),\n", + " Document(page_content='Three men walk into the Zone, three men walk out of the Zone', metadata={'id': 'doc:movie_reviews:2cc66f38bfbd438eb3a045d90a1a4088', 'director': 'Andrei Tarkovsky', 'genre': 'science fiction', 'year': '1979', 'rating': '9.9'}),\n", + " Document(page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea', metadata={'id': 'doc:movie_reviews:edf567b1d5334e02b2a4c692d853c80c', 'director': 'Satoshi Kon', 'genre': 'science fiction', 'year': '2006', 'rating': '8.6'})]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# This example only specifies a relevant query\n", + "retriever.get_relevant_documents(\"What are some movies about dinosaurs\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "fc3f1e6e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "query=' ' filter=Comparison(comparator=, attribute='rating', value=8.4) limit=None\n" + ] + }, + { + "data": { + "text/plain": [ + "[Document(page_content='Toys come alive and have a blast doing so', metadata={'id': 'doc:movie_reviews:9e4e84daa0374941a6aa4274e9bbb607', 'director': 'John Lasseter', 'genre': 'animated', 'year': '1995', 'rating': '9.1'}),\n", + " Document(page_content='Three men walk into the Zone, three men walk out of the Zone', metadata={'id': 'doc:movie_reviews:2cc66f38bfbd438eb3a045d90a1a4088', 'director': 'Andrei Tarkovsky', 'genre': 'science fiction', 'year': '1979', 'rating': '9.9'}),\n", + " Document(page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea', metadata={'id': 'doc:movie_reviews:edf567b1d5334e02b2a4c692d853c80c', 'director': 'Satoshi Kon', 'genre': 'science fiction', 'year': '2006', 'rating': '8.6'})]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# This example only specifies a filter\n", + "retriever.get_relevant_documents(\"I want to watch a movie rated higher than 8.4\")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "b19d4da0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "query='women' filter=Comparison(comparator=, attribute='director', value='Greta Gerwig') limit=None\n" + ] + }, + { + "data": { + "text/plain": [ + "[Document(page_content='A bunch of normal-sized women are supremely wholesome and some men pine after them', metadata={'id': 'doc:movie_reviews:bb899807b93c442083fd45e75a4779d5', 'director': 'Greta Gerwig', 'genre': 'drama', 'year': '2019', 'rating': '8.3'})]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# This example specifies a query and a filter\n", + "retriever.get_relevant_documents(\"Has Greta Gerwig directed any movies about women\")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "f900e40e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "query=' ' filter=Operation(operator=, arguments=[Comparison(comparator=, attribute='rating', value=8.5), Comparison(comparator=, attribute='genre', value='science fiction')]) limit=None\n" + ] + }, + { + "data": { + "text/plain": [ + "[Document(page_content='Three men walk into the Zone, three men walk out of the Zone', metadata={'id': 'doc:movie_reviews:2cc66f38bfbd438eb3a045d90a1a4088', 'director': 'Andrei Tarkovsky', 'genre': 'science fiction', 'year': '1979', 'rating': '9.9'}),\n", + " Document(page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea', metadata={'id': 'doc:movie_reviews:edf567b1d5334e02b2a4c692d853c80c', 'director': 'Satoshi Kon', 'genre': 'science fiction', 'year': '2006', 'rating': '8.6'})]" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# This example specifies a composite filter\n", + "retriever.get_relevant_documents(\n", + " \"What's a highly rated (above 8.5) science fiction film?\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "12a51522", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "query='toys' filter=Operation(operator=, arguments=[Comparison(comparator=, attribute='year', value=1990), Comparison(comparator=, attribute='year', value=2005), Comparison(comparator=, attribute='genre', value='animated')]) limit=None\n" + ] + }, + { + "data": { + "text/plain": [ + "[Document(page_content='Toys come alive and have a blast doing so', metadata={'id': 'doc:movie_reviews:9e4e84daa0374941a6aa4274e9bbb607', 'director': 'John Lasseter', 'genre': 'animated', 'year': '1995', 'rating': '9.1'})]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# This example specifies a query and composite filter\n", + "retriever.get_relevant_documents(\n", + " \"What's a movie after 1990 but before 2005 that's all about toys, and preferably is animated\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "39bd1de1-b9fe-4a98-89da-58d8a7a6ae51", + "metadata": {}, + "source": [ + "## Filter k\n", + "\n", + "We can also use the self query retriever to specify `k`: the number of documents to fetch.\n", + "\n", + "We can do this by passing `enable_limit=True` to the constructor." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "bff36b88-b506-4877-9c63-e5a1a8d78e64", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "retriever = SelfQueryRetriever.from_llm(\n", + " llm,\n", + " vectorstore,\n", + " document_content_description,\n", + " metadata_field_info,\n", + " enable_limit=True,\n", + " verbose=True,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "2758d229-4f97-499c-819f-888acaf8ee10", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "query='dinosaur' filter=None limit=2\n" + ] + }, + { + "data": { + "text/plain": [ + "[Document(page_content='A bunch of scientists bring back dinosaurs and mayhem breaks loose', metadata={'id': 'doc:movie_reviews:7b5481d753bc4135851b66fa61def7fb', 'director': 'Steven Spielberg', 'genre': 'science fiction', 'year': '1993', 'rating': '7.7'}),\n", + " Document(page_content='Toys come alive and have a blast doing so', metadata={'id': 'doc:movie_reviews:9e4e84daa0374941a6aa4274e9bbb607', 'director': 'John Lasseter', 'genre': 'animated', 'year': '1995', 'rating': '9.1'})]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# This example only specifies a relevant query\n", + "retriever.get_relevant_documents(\"what are two movies about dinosaurs\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "poetry-venv", + "language": "python", + "name": "poetry-venv" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/libs/langchain/langchain/retrievers/self_query/base.py b/libs/langchain/langchain/retrievers/self_query/base.py index 62bce34847..2a7d53277b 100644 --- a/libs/langchain/langchain/retrievers/self_query/base.py +++ b/libs/langchain/langchain/retrievers/self_query/base.py @@ -2,8 +2,8 @@ from typing import Any, Dict, List, Optional, Type, cast -from langchain import LLMChain from langchain.callbacks.manager import CallbackManagerForRetrieverRun +from langchain.chains import LLMChain from langchain.chains.query_constructor.base import load_query_constructor_chain from langchain.chains.query_constructor.ir import StructuredQuery, Visitor from langchain.chains.query_constructor.schema import AttributeInfo @@ -16,6 +16,7 @@ from langchain.retrievers.self_query.milvus import MilvusTranslator from langchain.retrievers.self_query.myscale import MyScaleTranslator from langchain.retrievers.self_query.pinecone import PineconeTranslator from langchain.retrievers.self_query.qdrant import QdrantTranslator +from langchain.retrievers.self_query.redis import RedisTranslator from langchain.retrievers.self_query.supabase import SupabaseVectorTranslator from langchain.retrievers.self_query.vectara import VectaraTranslator from langchain.retrievers.self_query.weaviate import WeaviateTranslator @@ -30,6 +31,7 @@ from langchain.vectorstores import ( MyScale, Pinecone, Qdrant, + Redis, SupabaseVectorStore, Vectara, VectorStore, @@ -39,7 +41,6 @@ from langchain.vectorstores import ( def _get_builtin_translator(vectorstore: VectorStore) -> Visitor: """Get the translator class corresponding to the vector store class.""" - vectorstore_cls = vectorstore.__class__ BUILTIN_TRANSLATORS: Dict[Type[VectorStore], Type[Visitor]] = { Pinecone: PineconeTranslator, Chroma: ChromaTranslator, @@ -53,16 +54,19 @@ def _get_builtin_translator(vectorstore: VectorStore) -> Visitor: Milvus: MilvusTranslator, SupabaseVectorStore: SupabaseVectorTranslator, } - if vectorstore_cls not in BUILTIN_TRANSLATORS: - raise ValueError( - f"Self query retriever with Vector Store type {vectorstore_cls}" - f" not supported." - ) if isinstance(vectorstore, Qdrant): return QdrantTranslator(metadata_key=vectorstore.metadata_payload_key) elif isinstance(vectorstore, MyScale): return MyScaleTranslator(metadata_key=vectorstore.metadata_column) - return BUILTIN_TRANSLATORS[vectorstore_cls]() + elif isinstance(vectorstore, Redis): + return RedisTranslator.from_vectorstore(vectorstore) + elif vectorstore.__class__ in BUILTIN_TRANSLATORS: + return BUILTIN_TRANSLATORS[vectorstore.__class__]() + else: + raise ValueError( + f"Self query retriever with Vector Store type {vectorstore.__class__}" + f" not supported." + ) class SelfQueryRetriever(BaseRetriever, BaseModel): @@ -80,8 +84,9 @@ class SelfQueryRetriever(BaseRetriever, BaseModel): structured_query_translator: Visitor """Translator for turning internal query language into vectorstore search params.""" verbose: bool = False - """Use original query instead of the revised new query from LLM""" + use_original_query: bool = False + """Use original query instead of the revised new query from LLM""" class Config: """Configuration for this pydantic object.""" diff --git a/libs/langchain/langchain/retrievers/self_query/redis.py b/libs/langchain/langchain/retrievers/self_query/redis.py new file mode 100644 index 0000000000..963b58aa68 --- /dev/null +++ b/libs/langchain/langchain/retrievers/self_query/redis.py @@ -0,0 +1,102 @@ +from __future__ import annotations + +from typing import Any, Tuple + +from langchain.chains.query_constructor.ir import ( + Comparator, + Comparison, + Operation, + Operator, + StructuredQuery, + Visitor, +) +from langchain.vectorstores.redis import Redis +from langchain.vectorstores.redis.filters import ( + RedisFilterExpression, + RedisFilterField, + RedisFilterOperator, + RedisNum, + RedisTag, + RedisText, +) +from langchain.vectorstores.redis.schema import RedisModel + +_COMPARATOR_TO_BUILTIN_METHOD = { + Comparator.EQ: "__eq__", + Comparator.NE: "__ne__", + Comparator.LT: "__lt__", + Comparator.GT: "__gt__", + Comparator.LTE: "__le__", + Comparator.GTE: "__ge__", + Comparator.CONTAIN: "__eq__", + Comparator.LIKE: "__mod__", +} + + +class RedisTranslator(Visitor): + """Translate""" + + allowed_comparators = ( + Comparator.EQ, + Comparator.NE, + Comparator.LT, + Comparator.LTE, + Comparator.GT, + Comparator.GTE, + Comparator.CONTAIN, + Comparator.LIKE, + ) + """Subset of allowed logical comparators.""" + allowed_operators = (Operator.AND, Operator.OR) + """Subset of allowed logical operators.""" + + def __init__(self, schema: RedisModel) -> None: + self._schema = schema + + def _attribute_to_filter_field(self, attribute: str) -> RedisFilterField: + if attribute in [tf.name for tf in self._schema.text]: + return RedisText(attribute) + elif attribute in [tf.name for tf in self._schema.tag or []]: + return RedisTag(attribute) + elif attribute in [tf.name for tf in self._schema.numeric or []]: + return RedisNum(attribute) + else: + raise ValueError( + f"Invalid attribute {attribute} not in vector store schema. Schema is:" + f"\n{self._schema.as_dict()}" + ) + + def visit_comparison(self, comparison: Comparison) -> RedisFilterExpression: + filter_field = self._attribute_to_filter_field(comparison.attribute) + comparison_method = _COMPARATOR_TO_BUILTIN_METHOD[comparison.comparator] + return getattr(filter_field, comparison_method)(comparison.value) + + def visit_operation(self, operation: Operation) -> Any: + left = operation.arguments[0].accept(self) + if len(operation.arguments) > 2: + right = self.visit_operation( + Operation( + operator=operation.operator, arguments=operation.arguments[1:] + ) + ) + else: + right = operation.arguments[1].accept(self) + redis_operator = ( + RedisFilterOperator.OR + if operation.operator == Operator.OR + else RedisFilterOperator.AND + ) + return RedisFilterExpression(operator=redis_operator, left=left, right=right) + + def visit_structured_query( + self, structured_query: StructuredQuery + ) -> Tuple[str, dict]: + if structured_query.filter is None: + kwargs = {} + else: + kwargs = {"filter": structured_query.filter.accept(self)} + return structured_query.query, kwargs + + @classmethod + def from_vectorstore(cls, vectorstore: Redis) -> RedisTranslator: + return cls(vectorstore._schema) diff --git a/libs/langchain/langchain/vectorstores/redis/filters.py b/libs/langchain/langchain/vectorstores/redis/filters.py index 0f6608bae8..633c8f4073 100644 --- a/libs/langchain/langchain/vectorstores/redis/filters.py +++ b/libs/langchain/langchain/vectorstores/redis/filters.py @@ -1,5 +1,6 @@ from enum import Enum from functools import wraps +from numbers import Number from typing import Any, Callable, Dict, List, Optional, Union from langchain.utilities.redis import TokenEscaper @@ -56,14 +57,15 @@ class RedisFilterField: if operator not in self.OPERATORS: raise ValueError( f"Operator {operator} not supported by {self.__class__.__name__}. " - + f"Supported operators are {self.OPERATORS.values()}" + + f"Supported operators are {self.OPERATORS.values()}." ) if not isinstance(val, val_type): raise TypeError( f"Right side argument passed to operator {self.OPERATORS[operator]} " f"with left side " - f"argument {self.__class__.__name__} must be of type {val_type}" + f"argument {self.__class__.__name__} must be of type {val_type}, " + f"received value {val}" ) self._value = val self._operator = operator @@ -181,12 +183,12 @@ class RedisNum(RedisFilterField): RedisFilterOperator.GE: ">=", } OPERATOR_MAP: Dict[RedisFilterOperator, str] = { - RedisFilterOperator.EQ: "@%s:[%i %i]", - RedisFilterOperator.NE: "(-@%s:[%i %i])", - RedisFilterOperator.GT: "@%s:[(%i +inf]", - RedisFilterOperator.LT: "@%s:[-inf (%i]", - RedisFilterOperator.GE: "@%s:[%i +inf]", - RedisFilterOperator.LE: "@%s:[-inf %i]", + RedisFilterOperator.EQ: "@%s:[%f %f]", + RedisFilterOperator.NE: "(-@%s:[%f %f])", + RedisFilterOperator.GT: "@%s:[(%f +inf]", + RedisFilterOperator.LT: "@%s:[-inf (%f]", + RedisFilterOperator.GE: "@%s:[%f +inf]", + RedisFilterOperator.LE: "@%s:[-inf %f]", } def __str__(self) -> str: @@ -210,83 +212,83 @@ class RedisNum(RedisFilterField): return self.OPERATOR_MAP[self._operator] % (self._field, self._value) @check_operator_misuse - def __eq__(self, other: int) -> "RedisFilterExpression": + def __eq__(self, other: Union[int, float]) -> "RedisFilterExpression": """Create a Numeric equality filter expression Args: - other (int): The value to filter on. + other (Number): The value to filter on. Example: >>> from langchain.vectorstores.redis import RedisNum >>> filter = RedisNum("zipcode") == 90210 """ - self._set_value(other, int, RedisFilterOperator.EQ) + self._set_value(other, Number, RedisFilterOperator.EQ) return RedisFilterExpression(str(self)) @check_operator_misuse - def __ne__(self, other: int) -> "RedisFilterExpression": + def __ne__(self, other: Union[int, float]) -> "RedisFilterExpression": """Create a Numeric inequality filter expression Args: - other (int): The value to filter on. + other (Number): The value to filter on. Example: >>> from langchain.vectorstores.redis import RedisNum >>> filter = RedisNum("zipcode") != 90210 """ - self._set_value(other, int, RedisFilterOperator.NE) + self._set_value(other, Number, RedisFilterOperator.NE) return RedisFilterExpression(str(self)) - def __gt__(self, other: int) -> "RedisFilterExpression": + def __gt__(self, other: Union[int, float]) -> "RedisFilterExpression": """Create a RedisNumeric greater than filter expression Args: - other (int): The value to filter on. + other (Number): The value to filter on. Example: >>> from langchain.vectorstores.redis import RedisNum >>> filter = RedisNum("age") > 18 """ - self._set_value(other, int, RedisFilterOperator.GT) + self._set_value(other, Number, RedisFilterOperator.GT) return RedisFilterExpression(str(self)) - def __lt__(self, other: int) -> "RedisFilterExpression": + def __lt__(self, other: Union[int, float]) -> "RedisFilterExpression": """Create a Numeric less than filter expression Args: - other (int): The value to filter on. + other (Number): The value to filter on. Example: >>> from langchain.vectorstores.redis import RedisNum >>> filter = RedisNum("age") < 18 """ - self._set_value(other, int, RedisFilterOperator.LT) + self._set_value(other, Number, RedisFilterOperator.LT) return RedisFilterExpression(str(self)) - def __ge__(self, other: int) -> "RedisFilterExpression": + def __ge__(self, other: Union[int, float]) -> "RedisFilterExpression": """Create a Numeric greater than or equal to filter expression Args: - other (int): The value to filter on. + other (Number): The value to filter on. Example: >>> from langchain.vectorstores.redis import RedisNum >>> filter = RedisNum("age") >= 18 """ - self._set_value(other, int, RedisFilterOperator.GE) + self._set_value(other, Number, RedisFilterOperator.GE) return RedisFilterExpression(str(self)) - def __le__(self, other: int) -> "RedisFilterExpression": + def __le__(self, other: Union[int, float]) -> "RedisFilterExpression": """Create a Numeric less than or equal to filter expression Args: - other (int): The value to filter on. + other (Number): The value to filter on. Example: >>> from langchain.vectorstores.redis import RedisNum >>> filter = RedisNum("age") <= 18 """ - self._set_value(other, int, RedisFilterOperator.LE) + self._set_value(other, Number, RedisFilterOperator.LE) return RedisFilterExpression(str(self)) diff --git a/libs/langchain/langchain/vectorstores/redis/schema.py b/libs/langchain/langchain/vectorstores/redis/schema.py index 1ecd921928..79833a94bc 100644 --- a/libs/langchain/langchain/vectorstores/redis/schema.py +++ b/libs/langchain/langchain/vectorstores/redis/schema.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import os from enum import Enum from pathlib import Path @@ -5,19 +7,19 @@ from typing import Any, Dict, List, Optional, Union import numpy as np import yaml - -# ignore type error here as it's a redis-py type problem -from redis.commands.search.field import ( # type: ignore - NumericField, - TagField, - TextField, - VectorField, -) -from typing_extensions import Literal +from typing_extensions import TYPE_CHECKING, Literal from langchain.pydantic_v1 import BaseModel, Field, validator from langchain.vectorstores.redis.constants import REDIS_VECTOR_DTYPE_MAP +if TYPE_CHECKING: + from redis.commands.search.field import ( # type: ignore + NumericField, + TagField, + TextField, + VectorField, + ) + class RedisDistanceMetric(str, Enum): l2 = "L2" @@ -38,6 +40,8 @@ class TextFieldSchema(RedisField): sortable: Optional[bool] = False def as_field(self) -> TextField: + from redis.commands.search.field import TextField # type: ignore + return TextField( self.name, weight=self.weight, @@ -55,6 +59,8 @@ class TagFieldSchema(RedisField): sortable: Optional[bool] = False def as_field(self) -> TagField: + from redis.commands.search.field import TagField # type: ignore + return TagField( self.name, separator=self.separator, @@ -69,6 +75,8 @@ class NumericFieldSchema(RedisField): sortable: Optional[bool] = False def as_field(self) -> NumericField: + from redis.commands.search.field import NumericField # type: ignore + return NumericField(self.name, sortable=self.sortable, no_index=self.no_index) @@ -97,6 +105,8 @@ class FlatVectorField(RedisVectorField): block_size: int = Field(default=1000) def as_field(self) -> VectorField: + from redis.commands.search.field import VectorField # type: ignore + return VectorField( self.name, self.algorithm, @@ -118,6 +128,8 @@ class HNSWVectorField(RedisVectorField): epsilon: float = Field(default=0.8) def as_field(self) -> VectorField: + from redis.commands.search.field import VectorField # type: ignore + return VectorField( self.name, self.algorithm, diff --git a/libs/langchain/tests/unit_tests/retrievers/self_query/test_redis.py b/libs/langchain/tests/unit_tests/retrievers/self_query/test_redis.py new file mode 100644 index 0000000000..62c225fe02 --- /dev/null +++ b/libs/langchain/tests/unit_tests/retrievers/self_query/test_redis.py @@ -0,0 +1,122 @@ +from typing import Dict, Tuple + +import pytest + +from langchain.chains.query_constructor.ir import ( + Comparator, + Comparison, + Operation, + Operator, + StructuredQuery, +) +from langchain.retrievers.self_query.redis import RedisTranslator +from langchain.vectorstores.redis.filters import ( + RedisFilterExpression, + RedisNum, + RedisTag, + RedisText, +) +from langchain.vectorstores.redis.schema import ( + NumericFieldSchema, + RedisModel, + TagFieldSchema, + TextFieldSchema, +) + + +@pytest.fixture +def translator() -> RedisTranslator: + schema = RedisModel( + text=[TextFieldSchema(name="bar")], + numeric=[NumericFieldSchema(name="foo")], + tag=[TagFieldSchema(name="tag")], + ) + return RedisTranslator(schema) + + +@pytest.mark.parametrize( + ("comp", "expected"), + [ + ( + Comparison(comparator=Comparator.LT, attribute="foo", value=1), + RedisNum("foo") < 1, + ), + ( + Comparison(comparator=Comparator.LIKE, attribute="bar", value="baz*"), + RedisText("bar") % "baz*", + ), + ( + Comparison( + comparator=Comparator.CONTAIN, attribute="tag", value=["blue", "green"] + ), + RedisTag("tag") == ["blue", "green"], + ), + ], +) +def test_visit_comparison( + translator: RedisTranslator, comp: Comparison, expected: RedisFilterExpression +) -> None: + comp = Comparison(comparator=Comparator.LT, attribute="foo", value=1) + expected = RedisNum("foo") < 1 + actual = translator.visit_comparison(comp) + assert str(expected) == str(actual) + + +def test_visit_operation(translator: RedisTranslator) -> None: + op = Operation( + operator=Operator.AND, + arguments=[ + Comparison(comparator=Comparator.LT, attribute="foo", value=2), + Comparison(comparator=Comparator.EQ, attribute="bar", value="baz"), + Comparison(comparator=Comparator.EQ, attribute="tag", value="high"), + ], + ) + expected = (RedisNum("foo") < 2) & ( + (RedisText("bar") == "baz") & (RedisTag("tag") == "high") + ) + actual = translator.visit_operation(op) + assert str(expected) == str(actual) + + +def test_visit_structured_query_no_filter(translator: RedisTranslator) -> None: + query = "What is the capital of France?" + + structured_query = StructuredQuery( + query=query, + filter=None, + ) + expected: Tuple[str, Dict] = (query, {}) + actual = translator.visit_structured_query(structured_query) + assert expected == actual + + +def test_visit_structured_query_comparison(translator: RedisTranslator) -> None: + query = "What is the capital of France?" + comp = Comparison(comparator=Comparator.GTE, attribute="foo", value=2) + structured_query = StructuredQuery( + query=query, + filter=comp, + ) + expected_filter = RedisNum("foo") >= 2 + actual_query, actual_filter = translator.visit_structured_query(structured_query) + assert actual_query == query + assert str(actual_filter["filter"]) == str(expected_filter) + + +def test_visit_structured_query_operation(translator: RedisTranslator) -> None: + query = "What is the capital of France?" + op = Operation( + operator=Operator.OR, + arguments=[ + Comparison(comparator=Comparator.EQ, attribute="foo", value=2), + Comparison(comparator=Comparator.CONTAIN, attribute="bar", value="baz"), + ], + ) + structured_query = StructuredQuery( + query=query, + filter=op, + ) + expected_filter = (RedisNum("foo") == 2) | (RedisText("bar") == "baz") + actual_query, actual_filter = translator.visit_structured_query(structured_query) + assert actual_query == query + assert str(actual_filter["filter"]) == str(expected_filter)