community: Add support for relationship indexes in neo4j vector (#20657)

Neo4j has added relationship vector indexes.
We can't populate them, but we can use existing indexes for retrieval
pull/20455/head^2
Tomaz Bratanic 3 months ago committed by GitHub
parent 940242c1ec
commit 8c08cf4619
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -68,10 +68,24 @@ class SearchType(str, enum.Enum):
DEFAULT_SEARCH_TYPE = SearchType.VECTOR DEFAULT_SEARCH_TYPE = SearchType.VECTOR
def _get_search_index_query(search_type: SearchType) -> str: class IndexType(str, enum.Enum):
"""Enumerator of the index types."""
NODE = "NODE"
RELATIONSHIP = "RELATIONSHIP"
DEFAULT_INDEX_TYPE = IndexType.NODE
def _get_search_index_query(
search_type: SearchType, index_type: IndexType = DEFAULT_INDEX_TYPE
) -> str:
if index_type == IndexType.NODE:
type_to_query_map = { type_to_query_map = {
SearchType.VECTOR: ( SearchType.VECTOR: (
"CALL db.index.vector.queryNodes($index, $k, $embedding) YIELD node, score " "CALL db.index.vector.queryNodes($index, $k, $embedding) "
"YIELD node, score "
), ),
SearchType.HYBRID: ( SearchType.HYBRID: (
"CALL { " "CALL { "
@ -81,8 +95,8 @@ def _get_search_index_query(search_type: SearchType) -> str:
"UNWIND nodes AS n " "UNWIND nodes AS n "
# We use 0 as min # We use 0 as min
"RETURN n.node AS node, (n.score / max) AS score UNION " "RETURN n.node AS node, (n.score / max) AS score UNION "
"CALL db.index.fulltext.queryNodes($keyword_index, $query, {limit: $k}) " "CALL db.index.fulltext.queryNodes($keyword_index, $query, "
"YIELD node, score " "{limit: $k}) YIELD node, score "
"WITH collect({node:node, score:score}) AS nodes, max(score) AS max " "WITH collect({node:node, score:score}) AS nodes, max(score) AS max "
"UNWIND nodes AS n " "UNWIND nodes AS n "
# We use 0 as min # We use 0 as min
@ -93,6 +107,11 @@ def _get_search_index_query(search_type: SearchType) -> str:
), ),
} }
return type_to_query_map[search_type] return type_to_query_map[search_type]
else:
return (
"CALL db.index.vector.queryRelationships($index, $k, $embedding) "
"YIELD relationship, score "
)
def check_if_not_null(props: List[str], values: List[Any]) -> None: def check_if_not_null(props: List[str], values: List[Any]) -> None:
@ -463,6 +482,7 @@ class Neo4jVector(VectorStore):
pre_delete_collection: bool = False, pre_delete_collection: bool = False,
retrieval_query: str = "", retrieval_query: str = "",
relevance_score_fn: Optional[Callable[[float], float]] = None, relevance_score_fn: Optional[Callable[[float], float]] = None,
index_type: IndexType = DEFAULT_INDEX_TYPE,
) -> None: ) -> None:
try: try:
import neo4j import neo4j
@ -541,6 +561,7 @@ class Neo4jVector(VectorStore):
self.override_relevance_score_fn = relevance_score_fn self.override_relevance_score_fn = relevance_score_fn
self.retrieval_query = retrieval_query self.retrieval_query = retrieval_query
self.search_type = search_type self.search_type = search_type
self._index_type = index_type
# Calculate embedding dimension # Calculate embedding dimension
self.embedding_dimension = len(embedding.embed_query("foo")) self.embedding_dimension = len(embedding.embed_query("foo"))
@ -615,7 +636,7 @@ class Neo4jVector(VectorStore):
# Flag for enterprise # Flag for enterprise
self._is_enterprise = True if db_data[0]["edition"] == "enterprise" else False self._is_enterprise = True if db_data[0]["edition"] == "enterprise" else False
def retrieve_existing_index(self) -> Optional[int]: def retrieve_existing_index(self) -> Tuple[Optional[int], Optional[str]]:
""" """
Check if the vector index exists in the Neo4j database Check if the vector index exists in the Neo4j database
and returns its embedding dimension. and returns its embedding dimension.
@ -630,11 +651,11 @@ class Neo4jVector(VectorStore):
""" """
index_information = self.query( index_information = self.query(
"SHOW INDEXES YIELD name, type, labelsOrTypes, properties, options " "SHOW INDEXES YIELD name, type, entityType, labelsOrTypes, "
"WHERE type = 'VECTOR' AND (name = $index_name " "properties, options WHERE type = 'VECTOR' AND (name = $index_name "
"OR (labelsOrTypes[0] = $node_label AND " "OR (labelsOrTypes[0] = $node_label AND "
"properties[0] = $embedding_node_property)) " "properties[0] = $embedding_node_property)) "
"RETURN name, labelsOrTypes, properties, options ", "RETURN name, entityType, labelsOrTypes, properties, options ",
params={ params={
"index_name": self.index_name, "index_name": self.index_name,
"node_label": self.node_label, "node_label": self.node_label,
@ -647,13 +668,14 @@ class Neo4jVector(VectorStore):
self.index_name = index_information[0]["name"] self.index_name = index_information[0]["name"]
self.node_label = index_information[0]["labelsOrTypes"][0] self.node_label = index_information[0]["labelsOrTypes"][0]
self.embedding_node_property = index_information[0]["properties"][0] self.embedding_node_property = index_information[0]["properties"][0]
self._index_type = index_information[0]["entityType"]
embedding_dimension = index_information[0]["options"]["indexConfig"][ embedding_dimension = index_information[0]["options"]["indexConfig"][
"vector.dimensions" "vector.dimensions"
] ]
return embedding_dimension return embedding_dimension, index_information[0]["entityType"]
except IndexError: except IndexError:
return None return None, None
def retrieve_existing_fts_index( def retrieve_existing_fts_index(
self, text_node_properties: List[str] = [] self, text_node_properties: List[str] = []
@ -754,7 +776,13 @@ class Neo4jVector(VectorStore):
**kwargs, **kwargs,
) )
# Check if the vector index already exists # Check if the vector index already exists
embedding_dimension = store.retrieve_existing_index() embedding_dimension, index_type = store.retrieve_existing_index()
# Raise error if relationship index type
if index_type == "RELATIONSHIP":
raise ValueError(
"Data ingestion is not supported with relationship vector index."
)
# If the vector index doesn't exist yet # If the vector index doesn't exist yet
if not embedding_dimension: if not embedding_dimension:
@ -976,9 +1004,16 @@ class Neo4jVector(VectorStore):
index_query = base_index_query + filter_snippets + base_cosine_query index_query = base_index_query + filter_snippets + base_cosine_query
else: else:
index_query = _get_search_index_query(self.search_type) index_query = _get_search_index_query(self.search_type, self._index_type)
filter_params = {} filter_params = {}
if self._index_type == IndexType.RELATIONSHIP:
default_retrieval = (
f"RETURN relationship.`{self.text_node_property}` AS text, score, "
f"relationship {{.*, `{self.text_node_property}`: Null, "
f"`{self.embedding_node_property}`: Null, id: Null }} AS metadata"
)
else:
default_retrieval = ( default_retrieval = (
f"RETURN node.`{self.text_node_property}` AS text, score, " f"RETURN node.`{self.text_node_property}` AS text, score, "
f"node {{.*, `{self.text_node_property}`: Null, " f"node {{.*, `{self.text_node_property}`: Null, "
@ -1141,7 +1176,15 @@ class Neo4jVector(VectorStore):
**kwargs, **kwargs,
) )
embedding_dimension = store.retrieve_existing_index() embedding_dimension, index_type = store.retrieve_existing_index()
# Raise error if relationship index type
if index_type == "RELATIONSHIP":
raise ValueError(
"Relationship vector index is not supported with "
"`from_existing_index` method. Please use the "
"`from_existing_relationship_index` method."
)
if not embedding_dimension: if not embedding_dimension:
raise ValueError( raise ValueError(
@ -1174,6 +1217,61 @@ class Neo4jVector(VectorStore):
return store return store
@classmethod
def from_existing_relationship_index(
cls: Type[Neo4jVector],
embedding: Embeddings,
index_name: str,
search_type: SearchType = DEFAULT_SEARCH_TYPE,
**kwargs: Any,
) -> Neo4jVector:
"""
Get instance of an existing Neo4j relationship vector index.
This method will return the instance of the store without
inserting any new embeddings.
Neo4j credentials are required in the form of `url`, `username`,
and `password` and optional `database` parameters along with
the `index_name` definition.
"""
if search_type == SearchType.HYBRID:
raise ValueError(
"Hybrid search is not supported in combination "
"with relationship vector index"
)
store = cls(
embedding=embedding,
index_name=index_name,
**kwargs,
)
embedding_dimension, index_type = store.retrieve_existing_index()
if not embedding_dimension:
raise ValueError(
"The specified vector index name does not exist. "
"Make sure to check if you spelled it correctly"
)
# Raise error if relationship index type
if index_type == "NODE":
raise ValueError(
"Node vector index is not supported with "
"`from_existing_relationship_index` method. Please use the "
"`from_existing_index` method."
)
# Check if embedding function and vector index dimensions match
if not store.embedding_dimension == embedding_dimension:
raise ValueError(
"The provided embedding function and vector index "
"dimensions do not match.\n"
f"Embedding function dimension: {store.embedding_dimension}\n"
f"Vector index dimension: {embedding_dimension}"
)
return store
@classmethod @classmethod
def from_documents( def from_documents(
cls: Type[Neo4jVector], cls: Type[Neo4jVector],
@ -1266,7 +1364,15 @@ class Neo4jVector(VectorStore):
) )
# Check if the vector index already exists # Check if the vector index already exists
embedding_dimension = store.retrieve_existing_index() embedding_dimension, index_type = store.retrieve_existing_index()
# Raise error if relationship index type
if index_type == "RELATIONSHIP":
raise ValueError(
"`from_existing_graph` method does not support "
" existing relationship vector index. "
"Please use `from_existing_relationship_index` method"
)
# If the vector index doesn't exist yet # If the vector index doesn't exist yet
if not embedding_dimension: if not embedding_dimension:

@ -43,7 +43,9 @@ def drop_vector_indexes(store: Neo4jVector) -> None:
""" """
) )
for index in all_indexes: for index in all_indexes:
store.query(f"DROP INDEX {index['name']}") store.query(f"DROP INDEX `{index['name']}`")
store.query("MATCH (n) DETACH DELETE n;")
class FakeEmbeddingsWithOsDimension(FakeEmbeddings): class FakeEmbeddingsWithOsDimension(FakeEmbeddings):
@ -812,3 +814,91 @@ def test_metadata_filters_type1() -> None:
assert output == expected_output assert output == expected_output
drop_vector_indexes(docsearch) drop_vector_indexes(docsearch)
def test_neo4jvector_relationship_index() -> None:
"""Test end to end construction and search."""
embeddings = FakeEmbeddingsWithOsDimension()
docsearch = Neo4jVector.from_texts(
texts=texts,
embedding=embeddings,
url=url,
username=username,
password=password,
pre_delete_collection=True,
)
# Ingest data
docsearch.query(
(
"CREATE ()-[:REL {text: 'foo', embedding: $e1}]->()"
", ()-[:REL {text: 'far', embedding: $e2}]->()"
),
params={
"e1": embeddings.embed_query("foo"),
"e2": embeddings.embed_query("bar"),
},
)
# Create relationship index
docsearch.query(
"""CREATE VECTOR INDEX `relationship`
FOR ()-[r:REL]-() ON (r.embedding)
OPTIONS {indexConfig: {
`vector.dimensions`: 1536,
`vector.similarity_function`: 'cosine'
}}
"""
)
relationship_index = Neo4jVector.from_existing_relationship_index(
embeddings, index_name="relationship"
)
output = relationship_index.similarity_search("foo", k=1)
assert output == [Document(page_content="foo")]
drop_vector_indexes(docsearch)
def test_neo4jvector_relationship_index_retrieval() -> None:
"""Test end to end construction and search."""
embeddings = FakeEmbeddingsWithOsDimension()
docsearch = Neo4jVector.from_texts(
texts=texts,
embedding=embeddings,
url=url,
username=username,
password=password,
pre_delete_collection=True,
)
# Ingest data
docsearch.query(
(
"CREATE ({node:'text'})-[:REL {text: 'foo', embedding: $e1}]->()"
", ({node:'text'})-[:REL {text: 'far', embedding: $e2}]->()"
),
params={
"e1": embeddings.embed_query("foo"),
"e2": embeddings.embed_query("bar"),
},
)
# Create relationship index
docsearch.query(
"""CREATE VECTOR INDEX `relationship`
FOR ()-[r:REL]-() ON (r.embedding)
OPTIONS {indexConfig: {
`vector.dimensions`: 1536,
`vector.similarity_function`: 'cosine'
}}
"""
)
retrieval_query = (
"RETURN relationship.text + '-' + startNode(relationship).node "
"AS text, score, {foo:'bar'} AS metadata"
)
relationship_index = Neo4jVector.from_existing_relationship_index(
embeddings, index_name="relationship", retrieval_query=retrieval_query
)
output = relationship_index.similarity_search("foo", k=1)
assert output == [Document(page_content="foo-text", metadata={"foo": "bar"})]
drop_vector_indexes(docsearch)

Loading…
Cancel
Save