From 8c08cf46194ba77d9ef47d6b2967e1581e230432 Mon Sep 17 00:00:00 2001
From: Tomaz Bratanic <bratanic.tomaz@gmail.com>
Date: Fri, 19 Apr 2024 20:22:42 +0200
Subject: [PATCH] community: Add support for relationship indexes in neo4j
 vector (#20657)

Neo4j has added relationship vector indexes.
We can't populate them, but we can use existing indexes for retrieval
---
 .../vectorstores/neo4j_vector.py              | 186 ++++++++++++++----
 .../vectorstores/test_neo4jvector.py          |  92 ++++++++-
 2 files changed, 237 insertions(+), 41 deletions(-)

diff --git a/libs/community/langchain_community/vectorstores/neo4j_vector.py b/libs/community/langchain_community/vectorstores/neo4j_vector.py
index 30897a5cc9..3a9c57de42 100644
--- a/libs/community/langchain_community/vectorstores/neo4j_vector.py
+++ b/libs/community/langchain_community/vectorstores/neo4j_vector.py
@@ -68,31 +68,50 @@ class SearchType(str, enum.Enum):
 DEFAULT_SEARCH_TYPE = SearchType.VECTOR
 
 
-def _get_search_index_query(search_type: SearchType) -> str:
-    type_to_query_map = {
-        SearchType.VECTOR: (
-            "CALL db.index.vector.queryNodes($index, $k, $embedding) YIELD node, score "
-        ),
-        SearchType.HYBRID: (
-            "CALL { "
-            "CALL db.index.vector.queryNodes($index, $k, $embedding) "
-            "YIELD node, score "
-            "WITH collect({node:node, score:score}) AS nodes, max(score) AS max "
-            "UNWIND nodes AS n "
-            # We use 0 as min
-            "RETURN n.node AS node, (n.score / max) AS score UNION "
-            "CALL db.index.fulltext.queryNodes($keyword_index, $query, {limit: $k}) "
-            "YIELD node, score "
-            "WITH collect({node:node, score:score}) AS nodes, max(score) AS max "
-            "UNWIND nodes AS n "
-            # We use 0 as min
-            "RETURN n.node AS node, (n.score / max) AS score "
-            "} "
-            # dedup
-            "WITH node, max(score) AS score ORDER BY score DESC LIMIT $k "
-        ),
-    }
-    return type_to_query_map[search_type]
+class IndexType(str, enum.Enum):
+    """Enumerator of the index types."""
+
+    NODE = "NODE"
+    RELATIONSHIP = "RELATIONSHIP"
+
+
+DEFAULT_INDEX_TYPE = IndexType.NODE
+
+
+def _get_search_index_query(
+    search_type: SearchType, index_type: IndexType = DEFAULT_INDEX_TYPE
+) -> str:
+    if index_type == IndexType.NODE:
+        type_to_query_map = {
+            SearchType.VECTOR: (
+                "CALL db.index.vector.queryNodes($index, $k, $embedding) "
+                "YIELD node, score "
+            ),
+            SearchType.HYBRID: (
+                "CALL { "
+                "CALL db.index.vector.queryNodes($index, $k, $embedding) "
+                "YIELD node, score "
+                "WITH collect({node:node, score:score}) AS nodes, max(score) AS max "
+                "UNWIND nodes AS n "
+                # We use 0 as min
+                "RETURN n.node AS node, (n.score / max) AS score UNION "
+                "CALL db.index.fulltext.queryNodes($keyword_index, $query, "
+                "{limit: $k}) YIELD node, score "
+                "WITH collect({node:node, score:score}) AS nodes, max(score) AS max "
+                "UNWIND nodes AS n "
+                # We use 0 as min
+                "RETURN n.node AS node, (n.score / max) AS score "
+                "} "
+                # dedup
+                "WITH node, max(score) AS score ORDER BY score DESC LIMIT $k "
+            ),
+        }
+        return type_to_query_map[search_type]
+    else:
+        return (
+            "CALL db.index.vector.queryRelationships($index, $k, $embedding) "
+            "YIELD relationship, score "
+        )
 
 
 def check_if_not_null(props: List[str], values: List[Any]) -> None:
@@ -463,6 +482,7 @@ class Neo4jVector(VectorStore):
         pre_delete_collection: bool = False,
         retrieval_query: str = "",
         relevance_score_fn: Optional[Callable[[float], float]] = None,
+        index_type: IndexType = DEFAULT_INDEX_TYPE,
     ) -> None:
         try:
             import neo4j
@@ -541,6 +561,7 @@ class Neo4jVector(VectorStore):
         self.override_relevance_score_fn = relevance_score_fn
         self.retrieval_query = retrieval_query
         self.search_type = search_type
+        self._index_type = index_type
         # Calculate embedding dimension
         self.embedding_dimension = len(embedding.embed_query("foo"))
 
@@ -615,7 +636,7 @@ class Neo4jVector(VectorStore):
         # Flag for enterprise
         self._is_enterprise = True if db_data[0]["edition"] == "enterprise" else False
 
-    def retrieve_existing_index(self) -> Optional[int]:
+    def retrieve_existing_index(self) -> Tuple[Optional[int], Optional[str]]:
         """
         Check if the vector index exists in the Neo4j database
         and returns its embedding dimension.
@@ -630,11 +651,11 @@ class Neo4jVector(VectorStore):
         """
 
         index_information = self.query(
-            "SHOW INDEXES YIELD name, type, labelsOrTypes, properties, options "
-            "WHERE type = 'VECTOR' AND (name = $index_name "
+            "SHOW INDEXES YIELD name, type, entityType, labelsOrTypes, "
+            "properties, options WHERE type = 'VECTOR' AND (name = $index_name "
             "OR (labelsOrTypes[0] = $node_label AND "
             "properties[0] = $embedding_node_property)) "
-            "RETURN name, labelsOrTypes, properties, options ",
+            "RETURN name, entityType, labelsOrTypes, properties, options ",
             params={
                 "index_name": self.index_name,
                 "node_label": self.node_label,
@@ -647,13 +668,14 @@ class Neo4jVector(VectorStore):
             self.index_name = index_information[0]["name"]
             self.node_label = index_information[0]["labelsOrTypes"][0]
             self.embedding_node_property = index_information[0]["properties"][0]
+            self._index_type = index_information[0]["entityType"]
             embedding_dimension = index_information[0]["options"]["indexConfig"][
                 "vector.dimensions"
             ]
 
-            return embedding_dimension
+            return embedding_dimension, index_information[0]["entityType"]
         except IndexError:
-            return None
+            return None, None
 
     def retrieve_existing_fts_index(
         self, text_node_properties: List[str] = []
@@ -754,7 +776,13 @@ class Neo4jVector(VectorStore):
             **kwargs,
         )
         # Check if the vector index already exists
-        embedding_dimension = store.retrieve_existing_index()
+        embedding_dimension, index_type = store.retrieve_existing_index()
+
+        # Raise error if relationship index type
+        if index_type == "RELATIONSHIP":
+            raise ValueError(
+                "Data ingestion is not supported with relationship vector index."
+            )
 
         # If the vector index doesn't exist yet
         if not embedding_dimension:
@@ -976,14 +1004,21 @@ class Neo4jVector(VectorStore):
             index_query = base_index_query + filter_snippets + base_cosine_query
 
         else:
-            index_query = _get_search_index_query(self.search_type)
+            index_query = _get_search_index_query(self.search_type, self._index_type)
             filter_params = {}
 
-        default_retrieval = (
-            f"RETURN node.`{self.text_node_property}` AS text, score, "
-            f"node {{.*, `{self.text_node_property}`: Null, "
-            f"`{self.embedding_node_property}`: Null, id: Null }} AS metadata"
-        )
+        if self._index_type == IndexType.RELATIONSHIP:
+            default_retrieval = (
+                f"RETURN relationship.`{self.text_node_property}` AS text, score, "
+                f"relationship {{.*, `{self.text_node_property}`: Null, "
+                f"`{self.embedding_node_property}`: Null, id: Null }} AS metadata"
+            )
+        else:
+            default_retrieval = (
+                f"RETURN node.`{self.text_node_property}` AS text, score, "
+                f"node {{.*, `{self.text_node_property}`: Null, "
+                f"`{self.embedding_node_property}`: Null, id: Null }} AS metadata"
+            )
 
         retrieval_query = (
             self.retrieval_query if self.retrieval_query else default_retrieval
@@ -1141,7 +1176,15 @@ class Neo4jVector(VectorStore):
             **kwargs,
         )
 
-        embedding_dimension = store.retrieve_existing_index()
+        embedding_dimension, index_type = store.retrieve_existing_index()
+
+        # Raise error if relationship index type
+        if index_type == "RELATIONSHIP":
+            raise ValueError(
+                "Relationship vector index is not supported with "
+                "`from_existing_index` method. Please use the "
+                "`from_existing_relationship_index` method."
+            )
 
         if not embedding_dimension:
             raise ValueError(
@@ -1174,6 +1217,61 @@ class Neo4jVector(VectorStore):
 
         return store
 
+    @classmethod
+    def from_existing_relationship_index(
+        cls: Type[Neo4jVector],
+        embedding: Embeddings,
+        index_name: str,
+        search_type: SearchType = DEFAULT_SEARCH_TYPE,
+        **kwargs: Any,
+    ) -> Neo4jVector:
+        """
+        Get instance of an existing Neo4j relationship vector index.
+        This method will return the instance of the store without
+        inserting any new embeddings.
+        Neo4j credentials are required in the form of `url`, `username`,
+        and `password` and optional `database` parameters along with
+        the `index_name` definition.
+        """
+
+        if search_type == SearchType.HYBRID:
+            raise ValueError(
+                "Hybrid search is not supported in combination "
+                "with relationship vector index"
+            )
+
+        store = cls(
+            embedding=embedding,
+            index_name=index_name,
+            **kwargs,
+        )
+
+        embedding_dimension, index_type = store.retrieve_existing_index()
+
+        if not embedding_dimension:
+            raise ValueError(
+                "The specified vector index name does not exist. "
+                "Make sure to check if you spelled it correctly"
+            )
+        # Raise error if relationship index type
+        if index_type == "NODE":
+            raise ValueError(
+                "Node vector index is not supported with "
+                "`from_existing_relationship_index` method. Please use the "
+                "`from_existing_index` method."
+            )
+
+        # Check if embedding function and vector index dimensions match
+        if not store.embedding_dimension == embedding_dimension:
+            raise ValueError(
+                "The provided embedding function and vector index "
+                "dimensions do not match.\n"
+                f"Embedding function dimension: {store.embedding_dimension}\n"
+                f"Vector index dimension: {embedding_dimension}"
+            )
+
+        return store
+
     @classmethod
     def from_documents(
         cls: Type[Neo4jVector],
@@ -1266,7 +1364,15 @@ class Neo4jVector(VectorStore):
         )
 
         # Check if the vector index already exists
-        embedding_dimension = store.retrieve_existing_index()
+        embedding_dimension, index_type = store.retrieve_existing_index()
+
+        # Raise error if relationship index type
+        if index_type == "RELATIONSHIP":
+            raise ValueError(
+                "`from_existing_graph` method does not support "
+                " existing relationship vector index. "
+                "Please use `from_existing_relationship_index` method"
+            )
 
         # If the vector index doesn't exist yet
         if not embedding_dimension:
diff --git a/libs/community/tests/integration_tests/vectorstores/test_neo4jvector.py b/libs/community/tests/integration_tests/vectorstores/test_neo4jvector.py
index de68c59631..a1261de81c 100644
--- a/libs/community/tests/integration_tests/vectorstores/test_neo4jvector.py
+++ b/libs/community/tests/integration_tests/vectorstores/test_neo4jvector.py
@@ -43,7 +43,9 @@ def drop_vector_indexes(store: Neo4jVector) -> None:
                               """
     )
     for index in all_indexes:
-        store.query(f"DROP INDEX {index['name']}")
+        store.query(f"DROP INDEX `{index['name']}`")
+
+    store.query("MATCH (n) DETACH DELETE n;")
 
 
 class FakeEmbeddingsWithOsDimension(FakeEmbeddings):
@@ -812,3 +814,91 @@ def test_metadata_filters_type1() -> None:
 
         assert output == expected_output
     drop_vector_indexes(docsearch)
+
+
+def test_neo4jvector_relationship_index() -> None:
+    """Test end to end construction and search."""
+    embeddings = FakeEmbeddingsWithOsDimension()
+    docsearch = Neo4jVector.from_texts(
+        texts=texts,
+        embedding=embeddings,
+        url=url,
+        username=username,
+        password=password,
+        pre_delete_collection=True,
+    )
+    # Ingest data
+    docsearch.query(
+        (
+            "CREATE ()-[:REL {text: 'foo', embedding: $e1}]->()"
+            ", ()-[:REL {text: 'far', embedding: $e2}]->()"
+        ),
+        params={
+            "e1": embeddings.embed_query("foo"),
+            "e2": embeddings.embed_query("bar"),
+        },
+    )
+    # Create relationship index
+    docsearch.query(
+        """CREATE VECTOR INDEX `relationship`
+FOR ()-[r:REL]-() ON (r.embedding)
+OPTIONS {indexConfig: {
+ `vector.dimensions`: 1536,
+ `vector.similarity_function`: 'cosine'
+}}
+"""
+    )
+    relationship_index = Neo4jVector.from_existing_relationship_index(
+        embeddings, index_name="relationship"
+    )
+
+    output = relationship_index.similarity_search("foo", k=1)
+    assert output == [Document(page_content="foo")]
+
+    drop_vector_indexes(docsearch)
+
+
+def test_neo4jvector_relationship_index_retrieval() -> None:
+    """Test end to end construction and search."""
+    embeddings = FakeEmbeddingsWithOsDimension()
+    docsearch = Neo4jVector.from_texts(
+        texts=texts,
+        embedding=embeddings,
+        url=url,
+        username=username,
+        password=password,
+        pre_delete_collection=True,
+    )
+    # Ingest data
+    docsearch.query(
+        (
+            "CREATE ({node:'text'})-[:REL {text: 'foo', embedding: $e1}]->()"
+            ", ({node:'text'})-[:REL {text: 'far', embedding: $e2}]->()"
+        ),
+        params={
+            "e1": embeddings.embed_query("foo"),
+            "e2": embeddings.embed_query("bar"),
+        },
+    )
+    # Create relationship index
+    docsearch.query(
+        """CREATE VECTOR INDEX `relationship`
+FOR ()-[r:REL]-() ON (r.embedding)
+OPTIONS {indexConfig: {
+ `vector.dimensions`: 1536,
+ `vector.similarity_function`: 'cosine'
+}}
+"""
+    )
+    retrieval_query = (
+        "RETURN relationship.text + '-' + startNode(relationship).node "
+        "AS text, score, {foo:'bar'} AS metadata"
+    )
+    relationship_index = Neo4jVector.from_existing_relationship_index(
+        embeddings, index_name="relationship", retrieval_query=retrieval_query
+    )
+
+    output = relationship_index.similarity_search("foo", k=1)
+    assert output == [Document(page_content="foo-text", metadata={"foo": "bar"})]
+
+    drop_vector_indexes(docsearch)