community: Add support for relationship indexes in neo4j vector (#20657)

Neo4j has added relationship vector indexes. We can't populate them, but we can use existing indexes for retrieval
3 months ago · 8c08cf4619
parent 940242c1ec
commit 8c08cf4619
2 changed files with 237 additions and 41 deletions
--- a/libs/community/langchain_community/vectorstores/neo4j_vector.py
+++ b/libs/community/langchain_community/vectorstores/neo4j_vector.py
@ -68,10 +68,24 @@ class SearchType(str, enum.Enum):
 DEFAULT_SEARCH_TYPE = SearchType.VECTOR
-def _get_search_index_query(search_type: SearchType) -> str:
+class IndexType(str, enum.Enum):
    """Enumerator of the index types."""
    NODE = "NODE"
    RELATIONSHIP = "RELATIONSHIP"
 DEFAULT_INDEX_TYPE = IndexType.NODE
 def _get_search_index_query(
    search_type: SearchType, index_type: IndexType = DEFAULT_INDEX_TYPE
 ) -> str:
    if index_type == IndexType.NODE:
        type_to_query_map = {
            SearchType.VECTOR: (
-            "CALL db.index.vector.queryNodes($index, $k, $embedding) YIELD node, score "
+                "CALL db.index.vector.queryNodes($index, $k, $embedding) "
                "YIELD node, score "
            ),
            SearchType.HYBRID: (
                "CALL { "
@ -81,8 +95,8 @@ def _get_search_index_query(search_type: SearchType) -> str:
                "UNWIND nodes AS n "
                # We use 0 as min
                "RETURN n.node AS node, (n.score / max) AS score UNION "
-            "CALL db.index.fulltext.queryNodes($keyword_index, $query, {limit: $k}) "
+                "CALL db.index.fulltext.queryNodes($keyword_index, $query, "
-            "YIELD node, score "
+                "{limit: $k}) YIELD node, score "
                "WITH collect({node:node, score:score}) AS nodes, max(score) AS max "
                "UNWIND nodes AS n "
                # We use 0 as min
@ -93,6 +107,11 @@ def _get_search_index_query(search_type: SearchType) -> str:
            ),
        }
        return type_to_query_map[search_type]
    else:
        return (
            "CALL db.index.vector.queryRelationships($index, $k, $embedding) "
            "YIELD relationship, score "
        )
 def check_if_not_null(props: List[str], values: List[Any]) -> None:
@ -463,6 +482,7 @@ class Neo4jVector(VectorStore):
        pre_delete_collection: bool = False,
        retrieval_query: str = "",
        relevance_score_fn: Optional[Callable[[float], float]] = None,
        index_type: IndexType = DEFAULT_INDEX_TYPE,
    ) -> None:
        try:
            import neo4j
@ -541,6 +561,7 @@ class Neo4jVector(VectorStore):
        self.override_relevance_score_fn = relevance_score_fn
        self.retrieval_query = retrieval_query
        self.search_type = search_type
        self._index_type = index_type
        # Calculate embedding dimension
        self.embedding_dimension = len(embedding.embed_query("foo"))
@ -615,7 +636,7 @@ class Neo4jVector(VectorStore):
        # Flag for enterprise
        self._is_enterprise = True if db_data[0]["edition"] == "enterprise" else False
-    def retrieve_existing_index(self) -> Optional[int]:
+    def retrieve_existing_index(self) -> Tuple[Optional[int], Optional[str]]:
        """
        Check if the vector index exists in the Neo4j database
        and returns its embedding dimension.
@ -630,11 +651,11 @@ class Neo4jVector(VectorStore):
        """
        index_information = self.query(
-            "SHOW INDEXES YIELD name, type, labelsOrTypes, properties, options "
+            "SHOW INDEXES YIELD name, type, entityType, labelsOrTypes, "
-            "WHERE type = 'VECTOR' AND (name = $index_name "
+            "properties, options WHERE type = 'VECTOR' AND (name = $index_name "
            "OR (labelsOrTypes[0] = $node_label AND "
            "properties[0] = $embedding_node_property)) "
-            "RETURN name, labelsOrTypes, properties, options ",
+            "RETURN name, entityType, labelsOrTypes, properties, options ",
            params={
                "index_name": self.index_name,
                "node_label": self.node_label,
@ -647,13 +668,14 @@ class Neo4jVector(VectorStore):
            self.index_name = index_information[0]["name"]
            self.node_label = index_information[0]["labelsOrTypes"][0]
            self.embedding_node_property = index_information[0]["properties"][0]
            self._index_type = index_information[0]["entityType"]
            embedding_dimension = index_information[0]["options"]["indexConfig"][
                "vector.dimensions"
            ]
-            return embedding_dimension
+            return embedding_dimension, index_information[0]["entityType"]
        except IndexError:
-            return None
+            return None, None
    def retrieve_existing_fts_index(
        self, text_node_properties: List[str] = []
@ -754,7 +776,13 @@ class Neo4jVector(VectorStore):
            **kwargs,
        )
        # Check if the vector index already exists
-        embedding_dimension = store.retrieve_existing_index()
+        embedding_dimension, index_type = store.retrieve_existing_index()
        # Raise error if relationship index type
        if index_type == "RELATIONSHIP":
            raise ValueError(
                "Data ingestion is not supported with relationship vector index."
            )
        # If the vector index doesn't exist yet
        if not embedding_dimension:
@ -976,9 +1004,16 @@ class Neo4jVector(VectorStore):
            index_query = base_index_query + filter_snippets + base_cosine_query
        else:
-            index_query = _get_search_index_query(self.search_type)
+            index_query = _get_search_index_query(self.search_type, self._index_type)
            filter_params = {}
        if self._index_type == IndexType.RELATIONSHIP:
            default_retrieval = (
                f"RETURN relationship.`{self.text_node_property}` AS text, score, "
                f"relationship {{.*, `{self.text_node_property}`: Null, "
                f"`{self.embedding_node_property}`: Null, id: Null }} AS metadata"
            )
        else:
            default_retrieval = (
                f"RETURN node.`{self.text_node_property}` AS text, score, "
                f"node {{.*, `{self.text_node_property}`: Null, "
@ -1141,7 +1176,15 @@ class Neo4jVector(VectorStore):
            **kwargs,
        )
-        embedding_dimension = store.retrieve_existing_index()
+        embedding_dimension, index_type = store.retrieve_existing_index()
        # Raise error if relationship index type
        if index_type == "RELATIONSHIP":
            raise ValueError(
                "Relationship vector index is not supported with "
                "`from_existing_index` method. Please use the "
                "`from_existing_relationship_index` method."
            )
        if not embedding_dimension:
            raise ValueError(
@ -1174,6 +1217,61 @@ class Neo4jVector(VectorStore):
        return store
    @classmethod
    def from_existing_relationship_index(
        cls: Type[Neo4jVector],
        embedding: Embeddings,
        index_name: str,
        search_type: SearchType = DEFAULT_SEARCH_TYPE,
        **kwargs: Any,
    ) -> Neo4jVector:
        """
        Get instance of an existing Neo4j relationship vector index.
        This method will return the instance of the store without
        inserting any new embeddings.
        Neo4j credentials are required in the form of `url`, `username`,
        and `password` and optional `database` parameters along with
        the `index_name` definition.
        """
        if search_type == SearchType.HYBRID:
            raise ValueError(
                "Hybrid search is not supported in combination "
                "with relationship vector index"
            )
        store = cls(
            embedding=embedding,
            index_name=index_name,
            **kwargs,
        )
        embedding_dimension, index_type = store.retrieve_existing_index()
        if not embedding_dimension:
            raise ValueError(
                "The specified vector index name does not exist. "
                "Make sure to check if you spelled it correctly"
            )
        # Raise error if relationship index type
        if index_type == "NODE":
            raise ValueError(
                "Node vector index is not supported with "
                "`from_existing_relationship_index` method. Please use the "
                "`from_existing_index` method."
            )
        # Check if embedding function and vector index dimensions match
        if not store.embedding_dimension == embedding_dimension:
            raise ValueError(
                "The provided embedding function and vector index "
                "dimensions do not match.\n"
                f"Embedding function dimension: {store.embedding_dimension}\n"
                f"Vector index dimension: {embedding_dimension}"
            )
        return store
    @classmethod
    def from_documents(
        cls: Type[Neo4jVector],
@ -1266,7 +1364,15 @@ class Neo4jVector(VectorStore):
        )
        # Check if the vector index already exists
-        embedding_dimension = store.retrieve_existing_index()
+        embedding_dimension, index_type = store.retrieve_existing_index()
        # Raise error if relationship index type
        if index_type == "RELATIONSHIP":
            raise ValueError(
                "`from_existing_graph` method does not support "
                " existing relationship vector index. "
                "Please use `from_existing_relationship_index` method"
            )
        # If the vector index doesn't exist yet
        if not embedding_dimension:
--- a/libs/community/tests/integration_tests/vectorstores/test_neo4jvector.py
+++ b/libs/community/tests/integration_tests/vectorstores/test_neo4jvector.py
@ -43,7 +43,9 @@ def drop_vector_indexes(store: Neo4jVector) -> None:
                              """
    )
    for index in all_indexes:
-        store.query(f"DROP INDEX {index['name']}")
+        store.query(f"DROP INDEX `{index['name']}`")
    store.query("MATCH (n) DETACH DELETE n;")
 class FakeEmbeddingsWithOsDimension(FakeEmbeddings):
@ -812,3 +814,91 @@ def test_metadata_filters_type1() -> None:
        assert output == expected_output
    drop_vector_indexes(docsearch)
 def test_neo4jvector_relationship_index() -> None:
    """Test end to end construction and search."""
    embeddings = FakeEmbeddingsWithOsDimension()
    docsearch = Neo4jVector.from_texts(
        texts=texts,
        embedding=embeddings,
        url=url,
        username=username,
        password=password,
        pre_delete_collection=True,
    )
    # Ingest data
    docsearch.query(
        (
            "CREATE ()-[:REL {text: 'foo', embedding: $e1}]->()"
            ", ()-[:REL {text: 'far', embedding: $e2}]->()"
        ),
        params={
            "e1": embeddings.embed_query("foo"),
            "e2": embeddings.embed_query("bar"),
        },
    )
    # Create relationship index
    docsearch.query(
        """CREATE VECTOR INDEX `relationship`
 FOR ()-[r:REL]-() ON (r.embedding)
 OPTIONS {indexConfig: {
 `vector.dimensions`: 1536,
 `vector.similarity_function`: 'cosine'
 }}
 """
    )
    relationship_index = Neo4jVector.from_existing_relationship_index(
        embeddings, index_name="relationship"
    )
    output = relationship_index.similarity_search("foo", k=1)
    assert output == [Document(page_content="foo")]
    drop_vector_indexes(docsearch)
 def test_neo4jvector_relationship_index_retrieval() -> None:
    """Test end to end construction and search."""
    embeddings = FakeEmbeddingsWithOsDimension()
    docsearch = Neo4jVector.from_texts(
        texts=texts,
        embedding=embeddings,
        url=url,
        username=username,
        password=password,
        pre_delete_collection=True,
    )
    # Ingest data
    docsearch.query(
        (
            "CREATE ({node:'text'})-[:REL {text: 'foo', embedding: $e1}]->()"
            ", ({node:'text'})-[:REL {text: 'far', embedding: $e2}]->()"
        ),
        params={
            "e1": embeddings.embed_query("foo"),
            "e2": embeddings.embed_query("bar"),
        },
    )
    # Create relationship index
    docsearch.query(
        """CREATE VECTOR INDEX `relationship`
 FOR ()-[r:REL]-() ON (r.embedding)
 OPTIONS {indexConfig: {
 `vector.dimensions`: 1536,
 `vector.similarity_function`: 'cosine'
 }}
 """
    )
    retrieval_query = (
        "RETURN relationship.text + '-' + startNode(relationship).node "
        "AS text, score, {foo:'bar'} AS metadata"
    )
    relationship_index = Neo4jVector.from_existing_relationship_index(
        embeddings, index_name="relationship", retrieval_query=retrieval_query
    )
    output = relationship_index.similarity_search("foo", k=1)
    assert output == [Document(page_content="foo-text", metadata={"foo": "bar"})]
    drop_vector_indexes(docsearch)