mirror of
https://github.com/hwchase17/langchain
synced 2024-11-06 03:20:49 +00:00
Add redis self-query support (#10199)
This commit is contained in:
parent
4258c23867
commit
7203c97e8f
@ -10,9 +10,9 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"## What is Redis?\n",
|
"## What is Redis?\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Most developers from a web services background are probably familiar with Redis. At it's core, Redis is an open-source key-value store that can be used as a cache, message broker, and database. Developers choice Redis because it is fast, has a large ecosystem of client libraries, and has been deployed by major enterprises for years.\n",
|
"Most developers from a web services background are probably familiar with Redis. At it's core, Redis is an open-source key-value store that can be used as a cache, message broker, and database. Developers choose Redis because it is fast, has a large ecosystem of client libraries, and has been deployed by major enterprises for years.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"In addition to the traditional uses of Redis. Redis also provides capabilities built directly into Redis. These capabilities include the Search and Query capability that allows users to create secondary index structures within Redis. This allows Redis to be a Vector Database, at the speed of a cache. \n",
|
"On top of these traditional use cases, Redis provides additional capabilities like the Search and Query capability that allows users to create secondary index structures within Redis. This allows Redis to be a Vector Database, at the speed of a cache. \n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"## Redis as a Vector Database\n",
|
"## Redis as a Vector Database\n",
|
||||||
@ -123,7 +123,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"## Install Redis Python Client\n",
|
"## Install Redis Python Client\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Redis-py is the officially supported client by Redis. Recently released is the RedisVL client which is purpose built for the Vector Database use cases. Both can be installed with pip."
|
"Redis-py is the officially supported client by Redis. Recently released is the RedisVL client which is purpose-built for the Vector Database use cases. Both can be installed with pip."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -153,9 +153,17 @@
|
|||||||
"import os\n",
|
"import os\n",
|
||||||
"import getpass\n",
|
"import getpass\n",
|
||||||
"\n",
|
"\n",
|
||||||
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")\n",
|
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")"
|
||||||
"\n",
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
"from langchain.embeddings import OpenAIEmbeddings\n",
|
"from langchain.embeddings import OpenAIEmbeddings\n",
|
||||||
|
"\n",
|
||||||
"embeddings = OpenAIEmbeddings()"
|
"embeddings = OpenAIEmbeddings()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -215,6 +223,12 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"## Initializing Redis\n",
|
"## Initializing Redis\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"To locally deploy Redis, run:\n",
|
||||||
|
"```console\n",
|
||||||
|
"docker run -d -p 6379:6379 -p 8001:8001 redis/redis-stack:latest\n",
|
||||||
|
"```\n",
|
||||||
|
"If things are running correctly you should see a nice Redis UI at http://localhost:8001. See the [Deployment Options](#deployment-options) section above for other ways to deploy.\n",
|
||||||
|
"\n",
|
||||||
"The Redis VectorStore instance can be initialized in a number of ways. There are multiple class methods that can be used to initialize a Redis VectorStore instance.\n",
|
"The Redis VectorStore instance can be initialized in a number of ways. There are multiple class methods that can be used to initialize a Redis VectorStore instance.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"- ``Redis.__init__`` - Initialize directly\n",
|
"- ``Redis.__init__`` - Initialize directly\n",
|
||||||
@ -223,7 +237,7 @@
|
|||||||
"- ``Redis.from_texts_return_keys`` - Initialize from a list of texts (optionally with metadata) and return the keys\n",
|
"- ``Redis.from_texts_return_keys`` - Initialize from a list of texts (optionally with metadata) and return the keys\n",
|
||||||
"- ``Redis.from_existing_index`` - Initialize from an existing Redis index\n",
|
"- ``Redis.from_existing_index`` - Initialize from an existing Redis index\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Below we will use the ``Redis.from_documents`` method."
|
"Below we will use the ``Redis.from_texts`` method."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -234,28 +248,12 @@
|
|||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from langchain.vectorstores.redis import Redis"
|
"from langchain.vectorstores.redis import Redis\n",
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"If you're not interested in the keys of your entries you can also create your redis instance from the documents."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 5,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from langchain.docstore.document import Document\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"documents = [Document(page_content=t, metadata=m) for t, m in zip(texts, metadata)]\n",
|
"rds = Redis.from_texts(\n",
|
||||||
"rds = Redis.from_documents(\n",
|
" texts,\n",
|
||||||
" documents,\n",
|
|
||||||
" embeddings,\n",
|
" embeddings,\n",
|
||||||
|
" metadatas=metadats,\n",
|
||||||
" redis_url=\"redis://localhost:6379\",\n",
|
" redis_url=\"redis://localhost:6379\",\n",
|
||||||
" index_name=\"users\"\n",
|
" index_name=\"users\"\n",
|
||||||
")"
|
")"
|
||||||
@ -454,7 +452,7 @@
|
|||||||
"results = rds.similarity_search(\"foo\", k=3)\n",
|
"results = rds.similarity_search(\"foo\", k=3)\n",
|
||||||
"meta = results[1].metadata\n",
|
"meta = results[1].metadata\n",
|
||||||
"print(\"Key of the document in Redis: \", meta.pop(\"id\"))\n",
|
"print(\"Key of the document in Redis: \", meta.pop(\"id\"))\n",
|
||||||
"print(\"Metadata of the document: \", meta)\n"
|
"print(\"Metadata of the document: \", meta)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -1229,7 +1227,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.8.13"
|
"version": "3.11.3"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
@ -0,0 +1,472 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "13afcae7",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Redis self-querying \n",
|
||||||
|
"\n",
|
||||||
|
">[Redis](https://redis.com) is an open-source key-value store that can be used as a cache, message broker, database, vector database and more.\n",
|
||||||
|
"\n",
|
||||||
|
"In the notebook we'll demo the `SelfQueryRetriever` wrapped around a Redis vector store. "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "68e75fb9",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Creating a Redis vector store\n",
|
||||||
|
"First we'll want to create a Redis vector store and seed it with some data. We've created a small demo set of documents that contain summaries of movies.\n",
|
||||||
|
"\n",
|
||||||
|
"**Note:** The self-query retriever requires you to have `lark` installed (`pip install lark`) along with integration-specific requirements."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"id": "63a8af5b",
|
||||||
|
"metadata": {
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# !pip install redis redisvl openai tiktoken lark"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "83811610-7df3-4ede-b268-68a6a83ba9e2",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"We want to use `OpenAIEmbeddings` so we have to get the OpenAI API Key."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"id": "dd01b61b-7d32-4a55-85d6-b2d2d4f18840",
|
||||||
|
"metadata": {
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import os\n",
|
||||||
|
"import getpass\n",
|
||||||
|
"\n",
|
||||||
|
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 3,
|
||||||
|
"id": "cb4a5787",
|
||||||
|
"metadata": {
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from langchain.schema import Document\n",
|
||||||
|
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||||
|
"from langchain.vectorstores import Redis\n",
|
||||||
|
"\n",
|
||||||
|
"embeddings = OpenAIEmbeddings()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 4,
|
||||||
|
"id": "bcbe04d9",
|
||||||
|
"metadata": {
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"docs = [\n",
|
||||||
|
" Document(\n",
|
||||||
|
" page_content=\"A bunch of scientists bring back dinosaurs and mayhem breaks loose\",\n",
|
||||||
|
" metadata={\"year\": 1993, \"rating\": 7.7, \"director\": \"Steven Spielberg\", \"genre\": \"science fiction\"},\n",
|
||||||
|
" ),\n",
|
||||||
|
" Document(\n",
|
||||||
|
" page_content=\"Leo DiCaprio gets lost in a dream within a dream within a dream within a ...\",\n",
|
||||||
|
" metadata={\"year\": 2010, \"director\": \"Christopher Nolan\", \"genre\": \"science fiction\", \"rating\": 8.2},\n",
|
||||||
|
" ),\n",
|
||||||
|
" Document(\n",
|
||||||
|
" page_content=\"A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea\",\n",
|
||||||
|
" metadata={\"year\": 2006, \"director\": \"Satoshi Kon\", \"genre\": \"science fiction\", \"rating\": 8.6},\n",
|
||||||
|
" ),\n",
|
||||||
|
" Document(\n",
|
||||||
|
" page_content=\"A bunch of normal-sized women are supremely wholesome and some men pine after them\",\n",
|
||||||
|
" metadata={\"year\": 2019, \"director\": \"Greta Gerwig\", \"genre\": \"drama\", \"rating\": 8.3},\n",
|
||||||
|
" ),\n",
|
||||||
|
" Document(\n",
|
||||||
|
" page_content=\"Toys come alive and have a blast doing so\",\n",
|
||||||
|
" metadata={\"year\": 1995, \"director\": \"John Lasseter\", \"genre\": \"animated\", \"rating\": 9.1,},\n",
|
||||||
|
" ),\n",
|
||||||
|
" Document(\n",
|
||||||
|
" page_content=\"Three men walk into the Zone, three men walk out of the Zone\",\n",
|
||||||
|
" metadata={\n",
|
||||||
|
" \"year\": 1979,\n",
|
||||||
|
" \"rating\": 9.9,\n",
|
||||||
|
" \"director\": \"Andrei Tarkovsky\",\n",
|
||||||
|
" \"genre\": \"science fiction\",\n",
|
||||||
|
" },\n",
|
||||||
|
" ),\n",
|
||||||
|
"]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 5,
|
||||||
|
"id": "393aff3b",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stderr",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"`index_schema` does not match generated metadata schema.\n",
|
||||||
|
"If you meant to manually override the schema, please ignore this message.\n",
|
||||||
|
"index_schema: {'tag': [{'name': 'genre'}], 'text': [{'name': 'director'}], 'numeric': [{'name': 'year'}, {'name': 'rating'}]}\n",
|
||||||
|
"generated_schema: {'text': [{'name': 'director'}, {'name': 'genre'}], 'numeric': [{'name': 'year'}, {'name': 'rating'}], 'tag': []}\n",
|
||||||
|
"\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"index_schema = {\n",
|
||||||
|
" \"tag\": [{\"name\": \"genre\"}],\n",
|
||||||
|
" \"text\": [{\"name\": \"director\"}],\n",
|
||||||
|
" \"numeric\": [{\"name\": \"year\"}, {\"name\": \"rating\"}],\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"vectorstore = Redis.from_documents(\n",
|
||||||
|
" docs, \n",
|
||||||
|
" embeddings, \n",
|
||||||
|
" redis_url=\"redis://localhost:6379\",\n",
|
||||||
|
" index_name=\"movie_reviews\",\n",
|
||||||
|
" index_schema=index_schema,\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "5ecaab6d",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Creating our self-querying retriever\n",
|
||||||
|
"Now we can instantiate our retriever. To do this we'll need to provide some information upfront about the metadata fields that our documents support and a short description of the document contents."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 6,
|
||||||
|
"id": "86e34dbf",
|
||||||
|
"metadata": {
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from langchain.llms import OpenAI\n",
|
||||||
|
"from langchain.retrievers.self_query.base import SelfQueryRetriever\n",
|
||||||
|
"from langchain.chains.query_constructor.base import AttributeInfo\n",
|
||||||
|
"\n",
|
||||||
|
"metadata_field_info = [\n",
|
||||||
|
" AttributeInfo(\n",
|
||||||
|
" name=\"genre\",\n",
|
||||||
|
" description=\"The genre of the movie\",\n",
|
||||||
|
" type=\"string or list[string]\",\n",
|
||||||
|
" ),\n",
|
||||||
|
" AttributeInfo(\n",
|
||||||
|
" name=\"year\",\n",
|
||||||
|
" description=\"The year the movie was released\",\n",
|
||||||
|
" type=\"integer\",\n",
|
||||||
|
" ),\n",
|
||||||
|
" AttributeInfo(\n",
|
||||||
|
" name=\"director\",\n",
|
||||||
|
" description=\"The name of the movie director\",\n",
|
||||||
|
" type=\"string\",\n",
|
||||||
|
" ),\n",
|
||||||
|
" AttributeInfo(\n",
|
||||||
|
" name=\"rating\", description=\"A 1-10 rating for the movie\", type=\"float\"\n",
|
||||||
|
" ),\n",
|
||||||
|
"]\n",
|
||||||
|
"document_content_description = \"Brief summary of a movie\"\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 7,
|
||||||
|
"id": "ea1126cb",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"llm = OpenAI(temperature=0)\n",
|
||||||
|
"retriever = SelfQueryRetriever.from_llm(\n",
|
||||||
|
" llm, \n",
|
||||||
|
" vectorstore, \n",
|
||||||
|
" document_content_description, \n",
|
||||||
|
" metadata_field_info, \n",
|
||||||
|
" verbose=True\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "ea9df8d4",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Testing it out\n",
|
||||||
|
"And now we can try actually using our retriever!"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 8,
|
||||||
|
"id": "38a126e9",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stderr",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"/Users/bagatur/langchain/libs/langchain/langchain/chains/llm.py:278: UserWarning: The predict_and_parse method is deprecated, instead pass an output parser directly to LLMChain.\n",
|
||||||
|
" warnings.warn(\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"query='dinosaur' filter=None limit=None\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"[Document(page_content='A bunch of scientists bring back dinosaurs and mayhem breaks loose', metadata={'id': 'doc:movie_reviews:7b5481d753bc4135851b66fa61def7fb', 'director': 'Steven Spielberg', 'genre': 'science fiction', 'year': '1993', 'rating': '7.7'}),\n",
|
||||||
|
" Document(page_content='Toys come alive and have a blast doing so', metadata={'id': 'doc:movie_reviews:9e4e84daa0374941a6aa4274e9bbb607', 'director': 'John Lasseter', 'genre': 'animated', 'year': '1995', 'rating': '9.1'}),\n",
|
||||||
|
" Document(page_content='Three men walk into the Zone, three men walk out of the Zone', metadata={'id': 'doc:movie_reviews:2cc66f38bfbd438eb3a045d90a1a4088', 'director': 'Andrei Tarkovsky', 'genre': 'science fiction', 'year': '1979', 'rating': '9.9'}),\n",
|
||||||
|
" Document(page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea', metadata={'id': 'doc:movie_reviews:edf567b1d5334e02b2a4c692d853c80c', 'director': 'Satoshi Kon', 'genre': 'science fiction', 'year': '2006', 'rating': '8.6'})]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 8,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# This example only specifies a relevant query\n",
|
||||||
|
"retriever.get_relevant_documents(\"What are some movies about dinosaurs\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 9,
|
||||||
|
"id": "fc3f1e6e",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"query=' ' filter=Comparison(comparator=<Comparator.GT: 'gt'>, attribute='rating', value=8.4) limit=None\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"[Document(page_content='Toys come alive and have a blast doing so', metadata={'id': 'doc:movie_reviews:9e4e84daa0374941a6aa4274e9bbb607', 'director': 'John Lasseter', 'genre': 'animated', 'year': '1995', 'rating': '9.1'}),\n",
|
||||||
|
" Document(page_content='Three men walk into the Zone, three men walk out of the Zone', metadata={'id': 'doc:movie_reviews:2cc66f38bfbd438eb3a045d90a1a4088', 'director': 'Andrei Tarkovsky', 'genre': 'science fiction', 'year': '1979', 'rating': '9.9'}),\n",
|
||||||
|
" Document(page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea', metadata={'id': 'doc:movie_reviews:edf567b1d5334e02b2a4c692d853c80c', 'director': 'Satoshi Kon', 'genre': 'science fiction', 'year': '2006', 'rating': '8.6'})]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 9,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# This example only specifies a filter\n",
|
||||||
|
"retriever.get_relevant_documents(\"I want to watch a movie rated higher than 8.4\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 10,
|
||||||
|
"id": "b19d4da0",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"query='women' filter=Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='director', value='Greta Gerwig') limit=None\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"[Document(page_content='A bunch of normal-sized women are supremely wholesome and some men pine after them', metadata={'id': 'doc:movie_reviews:bb899807b93c442083fd45e75a4779d5', 'director': 'Greta Gerwig', 'genre': 'drama', 'year': '2019', 'rating': '8.3'})]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 10,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# This example specifies a query and a filter\n",
|
||||||
|
"retriever.get_relevant_documents(\"Has Greta Gerwig directed any movies about women\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 11,
|
||||||
|
"id": "f900e40e",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"query=' ' filter=Operation(operator=<Operator.AND: 'and'>, arguments=[Comparison(comparator=<Comparator.GTE: 'gte'>, attribute='rating', value=8.5), Comparison(comparator=<Comparator.CONTAIN: 'contain'>, attribute='genre', value='science fiction')]) limit=None\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"[Document(page_content='Three men walk into the Zone, three men walk out of the Zone', metadata={'id': 'doc:movie_reviews:2cc66f38bfbd438eb3a045d90a1a4088', 'director': 'Andrei Tarkovsky', 'genre': 'science fiction', 'year': '1979', 'rating': '9.9'}),\n",
|
||||||
|
" Document(page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea', metadata={'id': 'doc:movie_reviews:edf567b1d5334e02b2a4c692d853c80c', 'director': 'Satoshi Kon', 'genre': 'science fiction', 'year': '2006', 'rating': '8.6'})]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 11,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# This example specifies a composite filter\n",
|
||||||
|
"retriever.get_relevant_documents(\n",
|
||||||
|
" \"What's a highly rated (above 8.5) science fiction film?\"\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 12,
|
||||||
|
"id": "12a51522",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"query='toys' filter=Operation(operator=<Operator.AND: 'and'>, arguments=[Comparison(comparator=<Comparator.GT: 'gt'>, attribute='year', value=1990), Comparison(comparator=<Comparator.LT: 'lt'>, attribute='year', value=2005), Comparison(comparator=<Comparator.CONTAIN: 'contain'>, attribute='genre', value='animated')]) limit=None\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"[Document(page_content='Toys come alive and have a blast doing so', metadata={'id': 'doc:movie_reviews:9e4e84daa0374941a6aa4274e9bbb607', 'director': 'John Lasseter', 'genre': 'animated', 'year': '1995', 'rating': '9.1'})]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 12,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# This example specifies a query and composite filter\n",
|
||||||
|
"retriever.get_relevant_documents(\n",
|
||||||
|
" \"What's a movie after 1990 but before 2005 that's all about toys, and preferably is animated\"\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "39bd1de1-b9fe-4a98-89da-58d8a7a6ae51",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Filter k\n",
|
||||||
|
"\n",
|
||||||
|
"We can also use the self query retriever to specify `k`: the number of documents to fetch.\n",
|
||||||
|
"\n",
|
||||||
|
"We can do this by passing `enable_limit=True` to the constructor."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 13,
|
||||||
|
"id": "bff36b88-b506-4877-9c63-e5a1a8d78e64",
|
||||||
|
"metadata": {
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"retriever = SelfQueryRetriever.from_llm(\n",
|
||||||
|
" llm,\n",
|
||||||
|
" vectorstore,\n",
|
||||||
|
" document_content_description,\n",
|
||||||
|
" metadata_field_info,\n",
|
||||||
|
" enable_limit=True,\n",
|
||||||
|
" verbose=True,\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 14,
|
||||||
|
"id": "2758d229-4f97-499c-819f-888acaf8ee10",
|
||||||
|
"metadata": {
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"query='dinosaur' filter=None limit=2\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"[Document(page_content='A bunch of scientists bring back dinosaurs and mayhem breaks loose', metadata={'id': 'doc:movie_reviews:7b5481d753bc4135851b66fa61def7fb', 'director': 'Steven Spielberg', 'genre': 'science fiction', 'year': '1993', 'rating': '7.7'}),\n",
|
||||||
|
" Document(page_content='Toys come alive and have a blast doing so', metadata={'id': 'doc:movie_reviews:9e4e84daa0374941a6aa4274e9bbb607', 'director': 'John Lasseter', 'genre': 'animated', 'year': '1995', 'rating': '9.1'})]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 14,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# This example only specifies a relevant query\n",
|
||||||
|
"retriever.get_relevant_documents(\"what are two movies about dinosaurs\")"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "poetry-venv",
|
||||||
|
"language": "python",
|
||||||
|
"name": "poetry-venv"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.9.1"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
@ -2,8 +2,8 @@
|
|||||||
|
|
||||||
from typing import Any, Dict, List, Optional, Type, cast
|
from typing import Any, Dict, List, Optional, Type, cast
|
||||||
|
|
||||||
from langchain import LLMChain
|
|
||||||
from langchain.callbacks.manager import CallbackManagerForRetrieverRun
|
from langchain.callbacks.manager import CallbackManagerForRetrieverRun
|
||||||
|
from langchain.chains import LLMChain
|
||||||
from langchain.chains.query_constructor.base import load_query_constructor_chain
|
from langchain.chains.query_constructor.base import load_query_constructor_chain
|
||||||
from langchain.chains.query_constructor.ir import StructuredQuery, Visitor
|
from langchain.chains.query_constructor.ir import StructuredQuery, Visitor
|
||||||
from langchain.chains.query_constructor.schema import AttributeInfo
|
from langchain.chains.query_constructor.schema import AttributeInfo
|
||||||
@ -16,6 +16,7 @@ from langchain.retrievers.self_query.milvus import MilvusTranslator
|
|||||||
from langchain.retrievers.self_query.myscale import MyScaleTranslator
|
from langchain.retrievers.self_query.myscale import MyScaleTranslator
|
||||||
from langchain.retrievers.self_query.pinecone import PineconeTranslator
|
from langchain.retrievers.self_query.pinecone import PineconeTranslator
|
||||||
from langchain.retrievers.self_query.qdrant import QdrantTranslator
|
from langchain.retrievers.self_query.qdrant import QdrantTranslator
|
||||||
|
from langchain.retrievers.self_query.redis import RedisTranslator
|
||||||
from langchain.retrievers.self_query.supabase import SupabaseVectorTranslator
|
from langchain.retrievers.self_query.supabase import SupabaseVectorTranslator
|
||||||
from langchain.retrievers.self_query.vectara import VectaraTranslator
|
from langchain.retrievers.self_query.vectara import VectaraTranslator
|
||||||
from langchain.retrievers.self_query.weaviate import WeaviateTranslator
|
from langchain.retrievers.self_query.weaviate import WeaviateTranslator
|
||||||
@ -30,6 +31,7 @@ from langchain.vectorstores import (
|
|||||||
MyScale,
|
MyScale,
|
||||||
Pinecone,
|
Pinecone,
|
||||||
Qdrant,
|
Qdrant,
|
||||||
|
Redis,
|
||||||
SupabaseVectorStore,
|
SupabaseVectorStore,
|
||||||
Vectara,
|
Vectara,
|
||||||
VectorStore,
|
VectorStore,
|
||||||
@ -39,7 +41,6 @@ from langchain.vectorstores import (
|
|||||||
|
|
||||||
def _get_builtin_translator(vectorstore: VectorStore) -> Visitor:
|
def _get_builtin_translator(vectorstore: VectorStore) -> Visitor:
|
||||||
"""Get the translator class corresponding to the vector store class."""
|
"""Get the translator class corresponding to the vector store class."""
|
||||||
vectorstore_cls = vectorstore.__class__
|
|
||||||
BUILTIN_TRANSLATORS: Dict[Type[VectorStore], Type[Visitor]] = {
|
BUILTIN_TRANSLATORS: Dict[Type[VectorStore], Type[Visitor]] = {
|
||||||
Pinecone: PineconeTranslator,
|
Pinecone: PineconeTranslator,
|
||||||
Chroma: ChromaTranslator,
|
Chroma: ChromaTranslator,
|
||||||
@ -53,16 +54,19 @@ def _get_builtin_translator(vectorstore: VectorStore) -> Visitor:
|
|||||||
Milvus: MilvusTranslator,
|
Milvus: MilvusTranslator,
|
||||||
SupabaseVectorStore: SupabaseVectorTranslator,
|
SupabaseVectorStore: SupabaseVectorTranslator,
|
||||||
}
|
}
|
||||||
if vectorstore_cls not in BUILTIN_TRANSLATORS:
|
|
||||||
raise ValueError(
|
|
||||||
f"Self query retriever with Vector Store type {vectorstore_cls}"
|
|
||||||
f" not supported."
|
|
||||||
)
|
|
||||||
if isinstance(vectorstore, Qdrant):
|
if isinstance(vectorstore, Qdrant):
|
||||||
return QdrantTranslator(metadata_key=vectorstore.metadata_payload_key)
|
return QdrantTranslator(metadata_key=vectorstore.metadata_payload_key)
|
||||||
elif isinstance(vectorstore, MyScale):
|
elif isinstance(vectorstore, MyScale):
|
||||||
return MyScaleTranslator(metadata_key=vectorstore.metadata_column)
|
return MyScaleTranslator(metadata_key=vectorstore.metadata_column)
|
||||||
return BUILTIN_TRANSLATORS[vectorstore_cls]()
|
elif isinstance(vectorstore, Redis):
|
||||||
|
return RedisTranslator.from_vectorstore(vectorstore)
|
||||||
|
elif vectorstore.__class__ in BUILTIN_TRANSLATORS:
|
||||||
|
return BUILTIN_TRANSLATORS[vectorstore.__class__]()
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
f"Self query retriever with Vector Store type {vectorstore.__class__}"
|
||||||
|
f" not supported."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class SelfQueryRetriever(BaseRetriever, BaseModel):
|
class SelfQueryRetriever(BaseRetriever, BaseModel):
|
||||||
@ -80,8 +84,9 @@ class SelfQueryRetriever(BaseRetriever, BaseModel):
|
|||||||
structured_query_translator: Visitor
|
structured_query_translator: Visitor
|
||||||
"""Translator for turning internal query language into vectorstore search params."""
|
"""Translator for turning internal query language into vectorstore search params."""
|
||||||
verbose: bool = False
|
verbose: bool = False
|
||||||
"""Use original query instead of the revised new query from LLM"""
|
|
||||||
use_original_query: bool = False
|
use_original_query: bool = False
|
||||||
|
"""Use original query instead of the revised new query from LLM"""
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
"""Configuration for this pydantic object."""
|
"""Configuration for this pydantic object."""
|
||||||
|
102
libs/langchain/langchain/retrievers/self_query/redis.py
Normal file
102
libs/langchain/langchain/retrievers/self_query/redis.py
Normal file
@ -0,0 +1,102 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Tuple
|
||||||
|
|
||||||
|
from langchain.chains.query_constructor.ir import (
|
||||||
|
Comparator,
|
||||||
|
Comparison,
|
||||||
|
Operation,
|
||||||
|
Operator,
|
||||||
|
StructuredQuery,
|
||||||
|
Visitor,
|
||||||
|
)
|
||||||
|
from langchain.vectorstores.redis import Redis
|
||||||
|
from langchain.vectorstores.redis.filters import (
|
||||||
|
RedisFilterExpression,
|
||||||
|
RedisFilterField,
|
||||||
|
RedisFilterOperator,
|
||||||
|
RedisNum,
|
||||||
|
RedisTag,
|
||||||
|
RedisText,
|
||||||
|
)
|
||||||
|
from langchain.vectorstores.redis.schema import RedisModel
|
||||||
|
|
||||||
|
_COMPARATOR_TO_BUILTIN_METHOD = {
|
||||||
|
Comparator.EQ: "__eq__",
|
||||||
|
Comparator.NE: "__ne__",
|
||||||
|
Comparator.LT: "__lt__",
|
||||||
|
Comparator.GT: "__gt__",
|
||||||
|
Comparator.LTE: "__le__",
|
||||||
|
Comparator.GTE: "__ge__",
|
||||||
|
Comparator.CONTAIN: "__eq__",
|
||||||
|
Comparator.LIKE: "__mod__",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class RedisTranslator(Visitor):
|
||||||
|
"""Translate"""
|
||||||
|
|
||||||
|
allowed_comparators = (
|
||||||
|
Comparator.EQ,
|
||||||
|
Comparator.NE,
|
||||||
|
Comparator.LT,
|
||||||
|
Comparator.LTE,
|
||||||
|
Comparator.GT,
|
||||||
|
Comparator.GTE,
|
||||||
|
Comparator.CONTAIN,
|
||||||
|
Comparator.LIKE,
|
||||||
|
)
|
||||||
|
"""Subset of allowed logical comparators."""
|
||||||
|
allowed_operators = (Operator.AND, Operator.OR)
|
||||||
|
"""Subset of allowed logical operators."""
|
||||||
|
|
||||||
|
def __init__(self, schema: RedisModel) -> None:
|
||||||
|
self._schema = schema
|
||||||
|
|
||||||
|
def _attribute_to_filter_field(self, attribute: str) -> RedisFilterField:
|
||||||
|
if attribute in [tf.name for tf in self._schema.text]:
|
||||||
|
return RedisText(attribute)
|
||||||
|
elif attribute in [tf.name for tf in self._schema.tag or []]:
|
||||||
|
return RedisTag(attribute)
|
||||||
|
elif attribute in [tf.name for tf in self._schema.numeric or []]:
|
||||||
|
return RedisNum(attribute)
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
f"Invalid attribute {attribute} not in vector store schema. Schema is:"
|
||||||
|
f"\n{self._schema.as_dict()}"
|
||||||
|
)
|
||||||
|
|
||||||
|
def visit_comparison(self, comparison: Comparison) -> RedisFilterExpression:
|
||||||
|
filter_field = self._attribute_to_filter_field(comparison.attribute)
|
||||||
|
comparison_method = _COMPARATOR_TO_BUILTIN_METHOD[comparison.comparator]
|
||||||
|
return getattr(filter_field, comparison_method)(comparison.value)
|
||||||
|
|
||||||
|
def visit_operation(self, operation: Operation) -> Any:
|
||||||
|
left = operation.arguments[0].accept(self)
|
||||||
|
if len(operation.arguments) > 2:
|
||||||
|
right = self.visit_operation(
|
||||||
|
Operation(
|
||||||
|
operator=operation.operator, arguments=operation.arguments[1:]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
right = operation.arguments[1].accept(self)
|
||||||
|
redis_operator = (
|
||||||
|
RedisFilterOperator.OR
|
||||||
|
if operation.operator == Operator.OR
|
||||||
|
else RedisFilterOperator.AND
|
||||||
|
)
|
||||||
|
return RedisFilterExpression(operator=redis_operator, left=left, right=right)
|
||||||
|
|
||||||
|
def visit_structured_query(
|
||||||
|
self, structured_query: StructuredQuery
|
||||||
|
) -> Tuple[str, dict]:
|
||||||
|
if structured_query.filter is None:
|
||||||
|
kwargs = {}
|
||||||
|
else:
|
||||||
|
kwargs = {"filter": structured_query.filter.accept(self)}
|
||||||
|
return structured_query.query, kwargs
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_vectorstore(cls, vectorstore: Redis) -> RedisTranslator:
|
||||||
|
return cls(vectorstore._schema)
|
@ -1,5 +1,6 @@
|
|||||||
from enum import Enum
|
from enum import Enum
|
||||||
from functools import wraps
|
from functools import wraps
|
||||||
|
from numbers import Number
|
||||||
from typing import Any, Callable, Dict, List, Optional, Union
|
from typing import Any, Callable, Dict, List, Optional, Union
|
||||||
|
|
||||||
from langchain.utilities.redis import TokenEscaper
|
from langchain.utilities.redis import TokenEscaper
|
||||||
@ -56,14 +57,15 @@ class RedisFilterField:
|
|||||||
if operator not in self.OPERATORS:
|
if operator not in self.OPERATORS:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Operator {operator} not supported by {self.__class__.__name__}. "
|
f"Operator {operator} not supported by {self.__class__.__name__}. "
|
||||||
+ f"Supported operators are {self.OPERATORS.values()}"
|
+ f"Supported operators are {self.OPERATORS.values()}."
|
||||||
)
|
)
|
||||||
|
|
||||||
if not isinstance(val, val_type):
|
if not isinstance(val, val_type):
|
||||||
raise TypeError(
|
raise TypeError(
|
||||||
f"Right side argument passed to operator {self.OPERATORS[operator]} "
|
f"Right side argument passed to operator {self.OPERATORS[operator]} "
|
||||||
f"with left side "
|
f"with left side "
|
||||||
f"argument {self.__class__.__name__} must be of type {val_type}"
|
f"argument {self.__class__.__name__} must be of type {val_type}, "
|
||||||
|
f"received value {val}"
|
||||||
)
|
)
|
||||||
self._value = val
|
self._value = val
|
||||||
self._operator = operator
|
self._operator = operator
|
||||||
@ -181,12 +183,12 @@ class RedisNum(RedisFilterField):
|
|||||||
RedisFilterOperator.GE: ">=",
|
RedisFilterOperator.GE: ">=",
|
||||||
}
|
}
|
||||||
OPERATOR_MAP: Dict[RedisFilterOperator, str] = {
|
OPERATOR_MAP: Dict[RedisFilterOperator, str] = {
|
||||||
RedisFilterOperator.EQ: "@%s:[%i %i]",
|
RedisFilterOperator.EQ: "@%s:[%f %f]",
|
||||||
RedisFilterOperator.NE: "(-@%s:[%i %i])",
|
RedisFilterOperator.NE: "(-@%s:[%f %f])",
|
||||||
RedisFilterOperator.GT: "@%s:[(%i +inf]",
|
RedisFilterOperator.GT: "@%s:[(%f +inf]",
|
||||||
RedisFilterOperator.LT: "@%s:[-inf (%i]",
|
RedisFilterOperator.LT: "@%s:[-inf (%f]",
|
||||||
RedisFilterOperator.GE: "@%s:[%i +inf]",
|
RedisFilterOperator.GE: "@%s:[%f +inf]",
|
||||||
RedisFilterOperator.LE: "@%s:[-inf %i]",
|
RedisFilterOperator.LE: "@%s:[-inf %f]",
|
||||||
}
|
}
|
||||||
|
|
||||||
def __str__(self) -> str:
|
def __str__(self) -> str:
|
||||||
@ -210,83 +212,83 @@ class RedisNum(RedisFilterField):
|
|||||||
return self.OPERATOR_MAP[self._operator] % (self._field, self._value)
|
return self.OPERATOR_MAP[self._operator] % (self._field, self._value)
|
||||||
|
|
||||||
@check_operator_misuse
|
@check_operator_misuse
|
||||||
def __eq__(self, other: int) -> "RedisFilterExpression":
|
def __eq__(self, other: Union[int, float]) -> "RedisFilterExpression":
|
||||||
"""Create a Numeric equality filter expression
|
"""Create a Numeric equality filter expression
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
other (int): The value to filter on.
|
other (Number): The value to filter on.
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
>>> from langchain.vectorstores.redis import RedisNum
|
>>> from langchain.vectorstores.redis import RedisNum
|
||||||
>>> filter = RedisNum("zipcode") == 90210
|
>>> filter = RedisNum("zipcode") == 90210
|
||||||
"""
|
"""
|
||||||
self._set_value(other, int, RedisFilterOperator.EQ)
|
self._set_value(other, Number, RedisFilterOperator.EQ)
|
||||||
return RedisFilterExpression(str(self))
|
return RedisFilterExpression(str(self))
|
||||||
|
|
||||||
@check_operator_misuse
|
@check_operator_misuse
|
||||||
def __ne__(self, other: int) -> "RedisFilterExpression":
|
def __ne__(self, other: Union[int, float]) -> "RedisFilterExpression":
|
||||||
"""Create a Numeric inequality filter expression
|
"""Create a Numeric inequality filter expression
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
other (int): The value to filter on.
|
other (Number): The value to filter on.
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
>>> from langchain.vectorstores.redis import RedisNum
|
>>> from langchain.vectorstores.redis import RedisNum
|
||||||
>>> filter = RedisNum("zipcode") != 90210
|
>>> filter = RedisNum("zipcode") != 90210
|
||||||
"""
|
"""
|
||||||
self._set_value(other, int, RedisFilterOperator.NE)
|
self._set_value(other, Number, RedisFilterOperator.NE)
|
||||||
return RedisFilterExpression(str(self))
|
return RedisFilterExpression(str(self))
|
||||||
|
|
||||||
def __gt__(self, other: int) -> "RedisFilterExpression":
|
def __gt__(self, other: Union[int, float]) -> "RedisFilterExpression":
|
||||||
"""Create a RedisNumeric greater than filter expression
|
"""Create a RedisNumeric greater than filter expression
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
other (int): The value to filter on.
|
other (Number): The value to filter on.
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
>>> from langchain.vectorstores.redis import RedisNum
|
>>> from langchain.vectorstores.redis import RedisNum
|
||||||
>>> filter = RedisNum("age") > 18
|
>>> filter = RedisNum("age") > 18
|
||||||
"""
|
"""
|
||||||
self._set_value(other, int, RedisFilterOperator.GT)
|
self._set_value(other, Number, RedisFilterOperator.GT)
|
||||||
return RedisFilterExpression(str(self))
|
return RedisFilterExpression(str(self))
|
||||||
|
|
||||||
def __lt__(self, other: int) -> "RedisFilterExpression":
|
def __lt__(self, other: Union[int, float]) -> "RedisFilterExpression":
|
||||||
"""Create a Numeric less than filter expression
|
"""Create a Numeric less than filter expression
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
other (int): The value to filter on.
|
other (Number): The value to filter on.
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
>>> from langchain.vectorstores.redis import RedisNum
|
>>> from langchain.vectorstores.redis import RedisNum
|
||||||
>>> filter = RedisNum("age") < 18
|
>>> filter = RedisNum("age") < 18
|
||||||
"""
|
"""
|
||||||
self._set_value(other, int, RedisFilterOperator.LT)
|
self._set_value(other, Number, RedisFilterOperator.LT)
|
||||||
return RedisFilterExpression(str(self))
|
return RedisFilterExpression(str(self))
|
||||||
|
|
||||||
def __ge__(self, other: int) -> "RedisFilterExpression":
|
def __ge__(self, other: Union[int, float]) -> "RedisFilterExpression":
|
||||||
"""Create a Numeric greater than or equal to filter expression
|
"""Create a Numeric greater than or equal to filter expression
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
other (int): The value to filter on.
|
other (Number): The value to filter on.
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
>>> from langchain.vectorstores.redis import RedisNum
|
>>> from langchain.vectorstores.redis import RedisNum
|
||||||
>>> filter = RedisNum("age") >= 18
|
>>> filter = RedisNum("age") >= 18
|
||||||
"""
|
"""
|
||||||
self._set_value(other, int, RedisFilterOperator.GE)
|
self._set_value(other, Number, RedisFilterOperator.GE)
|
||||||
return RedisFilterExpression(str(self))
|
return RedisFilterExpression(str(self))
|
||||||
|
|
||||||
def __le__(self, other: int) -> "RedisFilterExpression":
|
def __le__(self, other: Union[int, float]) -> "RedisFilterExpression":
|
||||||
"""Create a Numeric less than or equal to filter expression
|
"""Create a Numeric less than or equal to filter expression
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
other (int): The value to filter on.
|
other (Number): The value to filter on.
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
>>> from langchain.vectorstores.redis import RedisNum
|
>>> from langchain.vectorstores.redis import RedisNum
|
||||||
>>> filter = RedisNum("age") <= 18
|
>>> filter = RedisNum("age") <= 18
|
||||||
"""
|
"""
|
||||||
self._set_value(other, int, RedisFilterOperator.LE)
|
self._set_value(other, Number, RedisFilterOperator.LE)
|
||||||
return RedisFilterExpression(str(self))
|
return RedisFilterExpression(str(self))
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import os
|
import os
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
@ -5,19 +7,19 @@ from typing import Any, Dict, List, Optional, Union
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import yaml
|
import yaml
|
||||||
|
from typing_extensions import TYPE_CHECKING, Literal
|
||||||
# ignore type error here as it's a redis-py type problem
|
|
||||||
from redis.commands.search.field import ( # type: ignore
|
|
||||||
NumericField,
|
|
||||||
TagField,
|
|
||||||
TextField,
|
|
||||||
VectorField,
|
|
||||||
)
|
|
||||||
from typing_extensions import Literal
|
|
||||||
|
|
||||||
from langchain.pydantic_v1 import BaseModel, Field, validator
|
from langchain.pydantic_v1 import BaseModel, Field, validator
|
||||||
from langchain.vectorstores.redis.constants import REDIS_VECTOR_DTYPE_MAP
|
from langchain.vectorstores.redis.constants import REDIS_VECTOR_DTYPE_MAP
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from redis.commands.search.field import ( # type: ignore
|
||||||
|
NumericField,
|
||||||
|
TagField,
|
||||||
|
TextField,
|
||||||
|
VectorField,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class RedisDistanceMetric(str, Enum):
|
class RedisDistanceMetric(str, Enum):
|
||||||
l2 = "L2"
|
l2 = "L2"
|
||||||
@ -38,6 +40,8 @@ class TextFieldSchema(RedisField):
|
|||||||
sortable: Optional[bool] = False
|
sortable: Optional[bool] = False
|
||||||
|
|
||||||
def as_field(self) -> TextField:
|
def as_field(self) -> TextField:
|
||||||
|
from redis.commands.search.field import TextField # type: ignore
|
||||||
|
|
||||||
return TextField(
|
return TextField(
|
||||||
self.name,
|
self.name,
|
||||||
weight=self.weight,
|
weight=self.weight,
|
||||||
@ -55,6 +59,8 @@ class TagFieldSchema(RedisField):
|
|||||||
sortable: Optional[bool] = False
|
sortable: Optional[bool] = False
|
||||||
|
|
||||||
def as_field(self) -> TagField:
|
def as_field(self) -> TagField:
|
||||||
|
from redis.commands.search.field import TagField # type: ignore
|
||||||
|
|
||||||
return TagField(
|
return TagField(
|
||||||
self.name,
|
self.name,
|
||||||
separator=self.separator,
|
separator=self.separator,
|
||||||
@ -69,6 +75,8 @@ class NumericFieldSchema(RedisField):
|
|||||||
sortable: Optional[bool] = False
|
sortable: Optional[bool] = False
|
||||||
|
|
||||||
def as_field(self) -> NumericField:
|
def as_field(self) -> NumericField:
|
||||||
|
from redis.commands.search.field import NumericField # type: ignore
|
||||||
|
|
||||||
return NumericField(self.name, sortable=self.sortable, no_index=self.no_index)
|
return NumericField(self.name, sortable=self.sortable, no_index=self.no_index)
|
||||||
|
|
||||||
|
|
||||||
@ -97,6 +105,8 @@ class FlatVectorField(RedisVectorField):
|
|||||||
block_size: int = Field(default=1000)
|
block_size: int = Field(default=1000)
|
||||||
|
|
||||||
def as_field(self) -> VectorField:
|
def as_field(self) -> VectorField:
|
||||||
|
from redis.commands.search.field import VectorField # type: ignore
|
||||||
|
|
||||||
return VectorField(
|
return VectorField(
|
||||||
self.name,
|
self.name,
|
||||||
self.algorithm,
|
self.algorithm,
|
||||||
@ -118,6 +128,8 @@ class HNSWVectorField(RedisVectorField):
|
|||||||
epsilon: float = Field(default=0.8)
|
epsilon: float = Field(default=0.8)
|
||||||
|
|
||||||
def as_field(self) -> VectorField:
|
def as_field(self) -> VectorField:
|
||||||
|
from redis.commands.search.field import VectorField # type: ignore
|
||||||
|
|
||||||
return VectorField(
|
return VectorField(
|
||||||
self.name,
|
self.name,
|
||||||
self.algorithm,
|
self.algorithm,
|
||||||
|
@ -0,0 +1,122 @@
|
|||||||
|
from typing import Dict, Tuple
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from langchain.chains.query_constructor.ir import (
|
||||||
|
Comparator,
|
||||||
|
Comparison,
|
||||||
|
Operation,
|
||||||
|
Operator,
|
||||||
|
StructuredQuery,
|
||||||
|
)
|
||||||
|
from langchain.retrievers.self_query.redis import RedisTranslator
|
||||||
|
from langchain.vectorstores.redis.filters import (
|
||||||
|
RedisFilterExpression,
|
||||||
|
RedisNum,
|
||||||
|
RedisTag,
|
||||||
|
RedisText,
|
||||||
|
)
|
||||||
|
from langchain.vectorstores.redis.schema import (
|
||||||
|
NumericFieldSchema,
|
||||||
|
RedisModel,
|
||||||
|
TagFieldSchema,
|
||||||
|
TextFieldSchema,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def translator() -> RedisTranslator:
|
||||||
|
schema = RedisModel(
|
||||||
|
text=[TextFieldSchema(name="bar")],
|
||||||
|
numeric=[NumericFieldSchema(name="foo")],
|
||||||
|
tag=[TagFieldSchema(name="tag")],
|
||||||
|
)
|
||||||
|
return RedisTranslator(schema)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
("comp", "expected"),
|
||||||
|
[
|
||||||
|
(
|
||||||
|
Comparison(comparator=Comparator.LT, attribute="foo", value=1),
|
||||||
|
RedisNum("foo") < 1,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
Comparison(comparator=Comparator.LIKE, attribute="bar", value="baz*"),
|
||||||
|
RedisText("bar") % "baz*",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
Comparison(
|
||||||
|
comparator=Comparator.CONTAIN, attribute="tag", value=["blue", "green"]
|
||||||
|
),
|
||||||
|
RedisTag("tag") == ["blue", "green"],
|
||||||
|
),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_visit_comparison(
|
||||||
|
translator: RedisTranslator, comp: Comparison, expected: RedisFilterExpression
|
||||||
|
) -> None:
|
||||||
|
comp = Comparison(comparator=Comparator.LT, attribute="foo", value=1)
|
||||||
|
expected = RedisNum("foo") < 1
|
||||||
|
actual = translator.visit_comparison(comp)
|
||||||
|
assert str(expected) == str(actual)
|
||||||
|
|
||||||
|
|
||||||
|
def test_visit_operation(translator: RedisTranslator) -> None:
|
||||||
|
op = Operation(
|
||||||
|
operator=Operator.AND,
|
||||||
|
arguments=[
|
||||||
|
Comparison(comparator=Comparator.LT, attribute="foo", value=2),
|
||||||
|
Comparison(comparator=Comparator.EQ, attribute="bar", value="baz"),
|
||||||
|
Comparison(comparator=Comparator.EQ, attribute="tag", value="high"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
expected = (RedisNum("foo") < 2) & (
|
||||||
|
(RedisText("bar") == "baz") & (RedisTag("tag") == "high")
|
||||||
|
)
|
||||||
|
actual = translator.visit_operation(op)
|
||||||
|
assert str(expected) == str(actual)
|
||||||
|
|
||||||
|
|
||||||
|
def test_visit_structured_query_no_filter(translator: RedisTranslator) -> None:
|
||||||
|
query = "What is the capital of France?"
|
||||||
|
|
||||||
|
structured_query = StructuredQuery(
|
||||||
|
query=query,
|
||||||
|
filter=None,
|
||||||
|
)
|
||||||
|
expected: Tuple[str, Dict] = (query, {})
|
||||||
|
actual = translator.visit_structured_query(structured_query)
|
||||||
|
assert expected == actual
|
||||||
|
|
||||||
|
|
||||||
|
def test_visit_structured_query_comparison(translator: RedisTranslator) -> None:
|
||||||
|
query = "What is the capital of France?"
|
||||||
|
comp = Comparison(comparator=Comparator.GTE, attribute="foo", value=2)
|
||||||
|
structured_query = StructuredQuery(
|
||||||
|
query=query,
|
||||||
|
filter=comp,
|
||||||
|
)
|
||||||
|
expected_filter = RedisNum("foo") >= 2
|
||||||
|
actual_query, actual_filter = translator.visit_structured_query(structured_query)
|
||||||
|
assert actual_query == query
|
||||||
|
assert str(actual_filter["filter"]) == str(expected_filter)
|
||||||
|
|
||||||
|
|
||||||
|
def test_visit_structured_query_operation(translator: RedisTranslator) -> None:
|
||||||
|
query = "What is the capital of France?"
|
||||||
|
op = Operation(
|
||||||
|
operator=Operator.OR,
|
||||||
|
arguments=[
|
||||||
|
Comparison(comparator=Comparator.EQ, attribute="foo", value=2),
|
||||||
|
Comparison(comparator=Comparator.CONTAIN, attribute="bar", value="baz"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
structured_query = StructuredQuery(
|
||||||
|
query=query,
|
||||||
|
filter=op,
|
||||||
|
)
|
||||||
|
expected_filter = (RedisNum("foo") == 2) | (RedisText("bar") == "baz")
|
||||||
|
actual_query, actual_filter = translator.visit_structured_query(structured_query)
|
||||||
|
assert actual_query == query
|
||||||
|
assert str(actual_filter["filter"]) == str(expected_filter)
|
Loading…
Reference in New Issue
Block a user