community: Add support for relationship indexes in neo4j vector (#20657)

Neo4j has added relationship vector indexes.
We can't populate them, but we can use existing indexes for retrieval
pull/20455/head^2
Tomaz Bratanic 3 months ago committed by GitHub
parent 940242c1ec
commit 8c08cf4619
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -68,31 +68,50 @@ class SearchType(str, enum.Enum):
DEFAULT_SEARCH_TYPE = SearchType.VECTOR
def _get_search_index_query(search_type: SearchType) -> str:
type_to_query_map = {
SearchType.VECTOR: (
"CALL db.index.vector.queryNodes($index, $k, $embedding) YIELD node, score "
),
SearchType.HYBRID: (
"CALL { "
"CALL db.index.vector.queryNodes($index, $k, $embedding) "
"YIELD node, score "
"WITH collect({node:node, score:score}) AS nodes, max(score) AS max "
"UNWIND nodes AS n "
# We use 0 as min
"RETURN n.node AS node, (n.score / max) AS score UNION "
"CALL db.index.fulltext.queryNodes($keyword_index, $query, {limit: $k}) "
"YIELD node, score "
"WITH collect({node:node, score:score}) AS nodes, max(score) AS max "
"UNWIND nodes AS n "
# We use 0 as min
"RETURN n.node AS node, (n.score / max) AS score "
"} "
# dedup
"WITH node, max(score) AS score ORDER BY score DESC LIMIT $k "
),
}
return type_to_query_map[search_type]
class IndexType(str, enum.Enum):
"""Enumerator of the index types."""
NODE = "NODE"
RELATIONSHIP = "RELATIONSHIP"
DEFAULT_INDEX_TYPE = IndexType.NODE
def _get_search_index_query(
search_type: SearchType, index_type: IndexType = DEFAULT_INDEX_TYPE
) -> str:
if index_type == IndexType.NODE:
type_to_query_map = {
SearchType.VECTOR: (
"CALL db.index.vector.queryNodes($index, $k, $embedding) "
"YIELD node, score "
),
SearchType.HYBRID: (
"CALL { "
"CALL db.index.vector.queryNodes($index, $k, $embedding) "
"YIELD node, score "
"WITH collect({node:node, score:score}) AS nodes, max(score) AS max "
"UNWIND nodes AS n "
# We use 0 as min
"RETURN n.node AS node, (n.score / max) AS score UNION "
"CALL db.index.fulltext.queryNodes($keyword_index, $query, "
"{limit: $k}) YIELD node, score "
"WITH collect({node:node, score:score}) AS nodes, max(score) AS max "
"UNWIND nodes AS n "
# We use 0 as min
"RETURN n.node AS node, (n.score / max) AS score "
"} "
# dedup
"WITH node, max(score) AS score ORDER BY score DESC LIMIT $k "
),
}
return type_to_query_map[search_type]
else:
return (
"CALL db.index.vector.queryRelationships($index, $k, $embedding) "
"YIELD relationship, score "
)
def check_if_not_null(props: List[str], values: List[Any]) -> None:
@ -463,6 +482,7 @@ class Neo4jVector(VectorStore):
pre_delete_collection: bool = False,
retrieval_query: str = "",
relevance_score_fn: Optional[Callable[[float], float]] = None,
index_type: IndexType = DEFAULT_INDEX_TYPE,
) -> None:
try:
import neo4j
@ -541,6 +561,7 @@ class Neo4jVector(VectorStore):
self.override_relevance_score_fn = relevance_score_fn
self.retrieval_query = retrieval_query
self.search_type = search_type
self._index_type = index_type
# Calculate embedding dimension
self.embedding_dimension = len(embedding.embed_query("foo"))
@ -615,7 +636,7 @@ class Neo4jVector(VectorStore):
# Flag for enterprise
self._is_enterprise = True if db_data[0]["edition"] == "enterprise" else False
def retrieve_existing_index(self) -> Optional[int]:
def retrieve_existing_index(self) -> Tuple[Optional[int], Optional[str]]:
"""
Check if the vector index exists in the Neo4j database
and returns its embedding dimension.
@ -630,11 +651,11 @@ class Neo4jVector(VectorStore):
"""
index_information = self.query(
"SHOW INDEXES YIELD name, type, labelsOrTypes, properties, options "
"WHERE type = 'VECTOR' AND (name = $index_name "
"SHOW INDEXES YIELD name, type, entityType, labelsOrTypes, "
"properties, options WHERE type = 'VECTOR' AND (name = $index_name "
"OR (labelsOrTypes[0] = $node_label AND "
"properties[0] = $embedding_node_property)) "
"RETURN name, labelsOrTypes, properties, options ",
"RETURN name, entityType, labelsOrTypes, properties, options ",
params={
"index_name": self.index_name,
"node_label": self.node_label,
@ -647,13 +668,14 @@ class Neo4jVector(VectorStore):
self.index_name = index_information[0]["name"]
self.node_label = index_information[0]["labelsOrTypes"][0]
self.embedding_node_property = index_information[0]["properties"][0]
self._index_type = index_information[0]["entityType"]
embedding_dimension = index_information[0]["options"]["indexConfig"][
"vector.dimensions"
]
return embedding_dimension
return embedding_dimension, index_information[0]["entityType"]
except IndexError:
return None
return None, None
def retrieve_existing_fts_index(
self, text_node_properties: List[str] = []
@ -754,7 +776,13 @@ class Neo4jVector(VectorStore):
**kwargs,
)
# Check if the vector index already exists
embedding_dimension = store.retrieve_existing_index()
embedding_dimension, index_type = store.retrieve_existing_index()
# Raise error if relationship index type
if index_type == "RELATIONSHIP":
raise ValueError(
"Data ingestion is not supported with relationship vector index."
)
# If the vector index doesn't exist yet
if not embedding_dimension:
@ -976,14 +1004,21 @@ class Neo4jVector(VectorStore):
index_query = base_index_query + filter_snippets + base_cosine_query
else:
index_query = _get_search_index_query(self.search_type)
index_query = _get_search_index_query(self.search_type, self._index_type)
filter_params = {}
default_retrieval = (
f"RETURN node.`{self.text_node_property}` AS text, score, "
f"node {{.*, `{self.text_node_property}`: Null, "
f"`{self.embedding_node_property}`: Null, id: Null }} AS metadata"
)
if self._index_type == IndexType.RELATIONSHIP:
default_retrieval = (
f"RETURN relationship.`{self.text_node_property}` AS text, score, "
f"relationship {{.*, `{self.text_node_property}`: Null, "
f"`{self.embedding_node_property}`: Null, id: Null }} AS metadata"
)
else:
default_retrieval = (
f"RETURN node.`{self.text_node_property}` AS text, score, "
f"node {{.*, `{self.text_node_property}`: Null, "
f"`{self.embedding_node_property}`: Null, id: Null }} AS metadata"
)
retrieval_query = (
self.retrieval_query if self.retrieval_query else default_retrieval
@ -1141,7 +1176,15 @@ class Neo4jVector(VectorStore):
**kwargs,
)
embedding_dimension = store.retrieve_existing_index()
embedding_dimension, index_type = store.retrieve_existing_index()
# Raise error if relationship index type
if index_type == "RELATIONSHIP":
raise ValueError(
"Relationship vector index is not supported with "
"`from_existing_index` method. Please use the "
"`from_existing_relationship_index` method."
)
if not embedding_dimension:
raise ValueError(
@ -1174,6 +1217,61 @@ class Neo4jVector(VectorStore):
return store
@classmethod
def from_existing_relationship_index(
cls: Type[Neo4jVector],
embedding: Embeddings,
index_name: str,
search_type: SearchType = DEFAULT_SEARCH_TYPE,
**kwargs: Any,
) -> Neo4jVector:
"""
Get instance of an existing Neo4j relationship vector index.
This method will return the instance of the store without
inserting any new embeddings.
Neo4j credentials are required in the form of `url`, `username`,
and `password` and optional `database` parameters along with
the `index_name` definition.
"""
if search_type == SearchType.HYBRID:
raise ValueError(
"Hybrid search is not supported in combination "
"with relationship vector index"
)
store = cls(
embedding=embedding,
index_name=index_name,
**kwargs,
)
embedding_dimension, index_type = store.retrieve_existing_index()
if not embedding_dimension:
raise ValueError(
"The specified vector index name does not exist. "
"Make sure to check if you spelled it correctly"
)
# Raise error if relationship index type
if index_type == "NODE":
raise ValueError(
"Node vector index is not supported with "
"`from_existing_relationship_index` method. Please use the "
"`from_existing_index` method."
)
# Check if embedding function and vector index dimensions match
if not store.embedding_dimension == embedding_dimension:
raise ValueError(
"The provided embedding function and vector index "
"dimensions do not match.\n"
f"Embedding function dimension: {store.embedding_dimension}\n"
f"Vector index dimension: {embedding_dimension}"
)
return store
@classmethod
def from_documents(
cls: Type[Neo4jVector],
@ -1266,7 +1364,15 @@ class Neo4jVector(VectorStore):
)
# Check if the vector index already exists
embedding_dimension = store.retrieve_existing_index()
embedding_dimension, index_type = store.retrieve_existing_index()
# Raise error if relationship index type
if index_type == "RELATIONSHIP":
raise ValueError(
"`from_existing_graph` method does not support "
" existing relationship vector index. "
"Please use `from_existing_relationship_index` method"
)
# If the vector index doesn't exist yet
if not embedding_dimension:

@ -43,7 +43,9 @@ def drop_vector_indexes(store: Neo4jVector) -> None:
"""
)
for index in all_indexes:
store.query(f"DROP INDEX {index['name']}")
store.query(f"DROP INDEX `{index['name']}`")
store.query("MATCH (n) DETACH DELETE n;")
class FakeEmbeddingsWithOsDimension(FakeEmbeddings):
@ -812,3 +814,91 @@ def test_metadata_filters_type1() -> None:
assert output == expected_output
drop_vector_indexes(docsearch)
def test_neo4jvector_relationship_index() -> None:
"""Test end to end construction and search."""
embeddings = FakeEmbeddingsWithOsDimension()
docsearch = Neo4jVector.from_texts(
texts=texts,
embedding=embeddings,
url=url,
username=username,
password=password,
pre_delete_collection=True,
)
# Ingest data
docsearch.query(
(
"CREATE ()-[:REL {text: 'foo', embedding: $e1}]->()"
", ()-[:REL {text: 'far', embedding: $e2}]->()"
),
params={
"e1": embeddings.embed_query("foo"),
"e2": embeddings.embed_query("bar"),
},
)
# Create relationship index
docsearch.query(
"""CREATE VECTOR INDEX `relationship`
FOR ()-[r:REL]-() ON (r.embedding)
OPTIONS {indexConfig: {
`vector.dimensions`: 1536,
`vector.similarity_function`: 'cosine'
}}
"""
)
relationship_index = Neo4jVector.from_existing_relationship_index(
embeddings, index_name="relationship"
)
output = relationship_index.similarity_search("foo", k=1)
assert output == [Document(page_content="foo")]
drop_vector_indexes(docsearch)
def test_neo4jvector_relationship_index_retrieval() -> None:
"""Test end to end construction and search."""
embeddings = FakeEmbeddingsWithOsDimension()
docsearch = Neo4jVector.from_texts(
texts=texts,
embedding=embeddings,
url=url,
username=username,
password=password,
pre_delete_collection=True,
)
# Ingest data
docsearch.query(
(
"CREATE ({node:'text'})-[:REL {text: 'foo', embedding: $e1}]->()"
", ({node:'text'})-[:REL {text: 'far', embedding: $e2}]->()"
),
params={
"e1": embeddings.embed_query("foo"),
"e2": embeddings.embed_query("bar"),
},
)
# Create relationship index
docsearch.query(
"""CREATE VECTOR INDEX `relationship`
FOR ()-[r:REL]-() ON (r.embedding)
OPTIONS {indexConfig: {
`vector.dimensions`: 1536,
`vector.similarity_function`: 'cosine'
}}
"""
)
retrieval_query = (
"RETURN relationship.text + '-' + startNode(relationship).node "
"AS text, score, {foo:'bar'} AS metadata"
)
relationship_index = Neo4jVector.from_existing_relationship_index(
embeddings, index_name="relationship", retrieval_query=retrieval_query
)
output = relationship_index.similarity_search("foo", k=1)
assert output == [Document(page_content="foo-text", metadata={"foo": "bar"})]
drop_vector_indexes(docsearch)

Loading…
Cancel
Save