|
|
|
@ -68,10 +68,24 @@ class SearchType(str, enum.Enum):
|
|
|
|
|
DEFAULT_SEARCH_TYPE = SearchType.VECTOR
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _get_search_index_query(search_type: SearchType) -> str:
|
|
|
|
|
class IndexType(str, enum.Enum):
|
|
|
|
|
"""Enumerator of the index types."""
|
|
|
|
|
|
|
|
|
|
NODE = "NODE"
|
|
|
|
|
RELATIONSHIP = "RELATIONSHIP"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
DEFAULT_INDEX_TYPE = IndexType.NODE
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _get_search_index_query(
|
|
|
|
|
search_type: SearchType, index_type: IndexType = DEFAULT_INDEX_TYPE
|
|
|
|
|
) -> str:
|
|
|
|
|
if index_type == IndexType.NODE:
|
|
|
|
|
type_to_query_map = {
|
|
|
|
|
SearchType.VECTOR: (
|
|
|
|
|
"CALL db.index.vector.queryNodes($index, $k, $embedding) YIELD node, score "
|
|
|
|
|
"CALL db.index.vector.queryNodes($index, $k, $embedding) "
|
|
|
|
|
"YIELD node, score "
|
|
|
|
|
),
|
|
|
|
|
SearchType.HYBRID: (
|
|
|
|
|
"CALL { "
|
|
|
|
@ -81,8 +95,8 @@ def _get_search_index_query(search_type: SearchType) -> str:
|
|
|
|
|
"UNWIND nodes AS n "
|
|
|
|
|
# We use 0 as min
|
|
|
|
|
"RETURN n.node AS node, (n.score / max) AS score UNION "
|
|
|
|
|
"CALL db.index.fulltext.queryNodes($keyword_index, $query, {limit: $k}) "
|
|
|
|
|
"YIELD node, score "
|
|
|
|
|
"CALL db.index.fulltext.queryNodes($keyword_index, $query, "
|
|
|
|
|
"{limit: $k}) YIELD node, score "
|
|
|
|
|
"WITH collect({node:node, score:score}) AS nodes, max(score) AS max "
|
|
|
|
|
"UNWIND nodes AS n "
|
|
|
|
|
# We use 0 as min
|
|
|
|
@ -93,6 +107,11 @@ def _get_search_index_query(search_type: SearchType) -> str:
|
|
|
|
|
),
|
|
|
|
|
}
|
|
|
|
|
return type_to_query_map[search_type]
|
|
|
|
|
else:
|
|
|
|
|
return (
|
|
|
|
|
"CALL db.index.vector.queryRelationships($index, $k, $embedding) "
|
|
|
|
|
"YIELD relationship, score "
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def check_if_not_null(props: List[str], values: List[Any]) -> None:
|
|
|
|
@ -463,6 +482,7 @@ class Neo4jVector(VectorStore):
|
|
|
|
|
pre_delete_collection: bool = False,
|
|
|
|
|
retrieval_query: str = "",
|
|
|
|
|
relevance_score_fn: Optional[Callable[[float], float]] = None,
|
|
|
|
|
index_type: IndexType = DEFAULT_INDEX_TYPE,
|
|
|
|
|
) -> None:
|
|
|
|
|
try:
|
|
|
|
|
import neo4j
|
|
|
|
@ -541,6 +561,7 @@ class Neo4jVector(VectorStore):
|
|
|
|
|
self.override_relevance_score_fn = relevance_score_fn
|
|
|
|
|
self.retrieval_query = retrieval_query
|
|
|
|
|
self.search_type = search_type
|
|
|
|
|
self._index_type = index_type
|
|
|
|
|
# Calculate embedding dimension
|
|
|
|
|
self.embedding_dimension = len(embedding.embed_query("foo"))
|
|
|
|
|
|
|
|
|
@ -615,7 +636,7 @@ class Neo4jVector(VectorStore):
|
|
|
|
|
# Flag for enterprise
|
|
|
|
|
self._is_enterprise = True if db_data[0]["edition"] == "enterprise" else False
|
|
|
|
|
|
|
|
|
|
def retrieve_existing_index(self) -> Optional[int]:
|
|
|
|
|
def retrieve_existing_index(self) -> Tuple[Optional[int], Optional[str]]:
|
|
|
|
|
"""
|
|
|
|
|
Check if the vector index exists in the Neo4j database
|
|
|
|
|
and returns its embedding dimension.
|
|
|
|
@ -630,11 +651,11 @@ class Neo4jVector(VectorStore):
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
index_information = self.query(
|
|
|
|
|
"SHOW INDEXES YIELD name, type, labelsOrTypes, properties, options "
|
|
|
|
|
"WHERE type = 'VECTOR' AND (name = $index_name "
|
|
|
|
|
"SHOW INDEXES YIELD name, type, entityType, labelsOrTypes, "
|
|
|
|
|
"properties, options WHERE type = 'VECTOR' AND (name = $index_name "
|
|
|
|
|
"OR (labelsOrTypes[0] = $node_label AND "
|
|
|
|
|
"properties[0] = $embedding_node_property)) "
|
|
|
|
|
"RETURN name, labelsOrTypes, properties, options ",
|
|
|
|
|
"RETURN name, entityType, labelsOrTypes, properties, options ",
|
|
|
|
|
params={
|
|
|
|
|
"index_name": self.index_name,
|
|
|
|
|
"node_label": self.node_label,
|
|
|
|
@ -647,13 +668,14 @@ class Neo4jVector(VectorStore):
|
|
|
|
|
self.index_name = index_information[0]["name"]
|
|
|
|
|
self.node_label = index_information[0]["labelsOrTypes"][0]
|
|
|
|
|
self.embedding_node_property = index_information[0]["properties"][0]
|
|
|
|
|
self._index_type = index_information[0]["entityType"]
|
|
|
|
|
embedding_dimension = index_information[0]["options"]["indexConfig"][
|
|
|
|
|
"vector.dimensions"
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
return embedding_dimension
|
|
|
|
|
return embedding_dimension, index_information[0]["entityType"]
|
|
|
|
|
except IndexError:
|
|
|
|
|
return None
|
|
|
|
|
return None, None
|
|
|
|
|
|
|
|
|
|
def retrieve_existing_fts_index(
|
|
|
|
|
self, text_node_properties: List[str] = []
|
|
|
|
@ -754,7 +776,13 @@ class Neo4jVector(VectorStore):
|
|
|
|
|
**kwargs,
|
|
|
|
|
)
|
|
|
|
|
# Check if the vector index already exists
|
|
|
|
|
embedding_dimension = store.retrieve_existing_index()
|
|
|
|
|
embedding_dimension, index_type = store.retrieve_existing_index()
|
|
|
|
|
|
|
|
|
|
# Raise error if relationship index type
|
|
|
|
|
if index_type == "RELATIONSHIP":
|
|
|
|
|
raise ValueError(
|
|
|
|
|
"Data ingestion is not supported with relationship vector index."
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# If the vector index doesn't exist yet
|
|
|
|
|
if not embedding_dimension:
|
|
|
|
@ -976,9 +1004,16 @@ class Neo4jVector(VectorStore):
|
|
|
|
|
index_query = base_index_query + filter_snippets + base_cosine_query
|
|
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
index_query = _get_search_index_query(self.search_type)
|
|
|
|
|
index_query = _get_search_index_query(self.search_type, self._index_type)
|
|
|
|
|
filter_params = {}
|
|
|
|
|
|
|
|
|
|
if self._index_type == IndexType.RELATIONSHIP:
|
|
|
|
|
default_retrieval = (
|
|
|
|
|
f"RETURN relationship.`{self.text_node_property}` AS text, score, "
|
|
|
|
|
f"relationship {{.*, `{self.text_node_property}`: Null, "
|
|
|
|
|
f"`{self.embedding_node_property}`: Null, id: Null }} AS metadata"
|
|
|
|
|
)
|
|
|
|
|
else:
|
|
|
|
|
default_retrieval = (
|
|
|
|
|
f"RETURN node.`{self.text_node_property}` AS text, score, "
|
|
|
|
|
f"node {{.*, `{self.text_node_property}`: Null, "
|
|
|
|
@ -1141,7 +1176,15 @@ class Neo4jVector(VectorStore):
|
|
|
|
|
**kwargs,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
embedding_dimension = store.retrieve_existing_index()
|
|
|
|
|
embedding_dimension, index_type = store.retrieve_existing_index()
|
|
|
|
|
|
|
|
|
|
# Raise error if relationship index type
|
|
|
|
|
if index_type == "RELATIONSHIP":
|
|
|
|
|
raise ValueError(
|
|
|
|
|
"Relationship vector index is not supported with "
|
|
|
|
|
"`from_existing_index` method. Please use the "
|
|
|
|
|
"`from_existing_relationship_index` method."
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
if not embedding_dimension:
|
|
|
|
|
raise ValueError(
|
|
|
|
@ -1174,6 +1217,61 @@ class Neo4jVector(VectorStore):
|
|
|
|
|
|
|
|
|
|
return store
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
|
def from_existing_relationship_index(
|
|
|
|
|
cls: Type[Neo4jVector],
|
|
|
|
|
embedding: Embeddings,
|
|
|
|
|
index_name: str,
|
|
|
|
|
search_type: SearchType = DEFAULT_SEARCH_TYPE,
|
|
|
|
|
**kwargs: Any,
|
|
|
|
|
) -> Neo4jVector:
|
|
|
|
|
"""
|
|
|
|
|
Get instance of an existing Neo4j relationship vector index.
|
|
|
|
|
This method will return the instance of the store without
|
|
|
|
|
inserting any new embeddings.
|
|
|
|
|
Neo4j credentials are required in the form of `url`, `username`,
|
|
|
|
|
and `password` and optional `database` parameters along with
|
|
|
|
|
the `index_name` definition.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
if search_type == SearchType.HYBRID:
|
|
|
|
|
raise ValueError(
|
|
|
|
|
"Hybrid search is not supported in combination "
|
|
|
|
|
"with relationship vector index"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
store = cls(
|
|
|
|
|
embedding=embedding,
|
|
|
|
|
index_name=index_name,
|
|
|
|
|
**kwargs,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
embedding_dimension, index_type = store.retrieve_existing_index()
|
|
|
|
|
|
|
|
|
|
if not embedding_dimension:
|
|
|
|
|
raise ValueError(
|
|
|
|
|
"The specified vector index name does not exist. "
|
|
|
|
|
"Make sure to check if you spelled it correctly"
|
|
|
|
|
)
|
|
|
|
|
# Raise error if relationship index type
|
|
|
|
|
if index_type == "NODE":
|
|
|
|
|
raise ValueError(
|
|
|
|
|
"Node vector index is not supported with "
|
|
|
|
|
"`from_existing_relationship_index` method. Please use the "
|
|
|
|
|
"`from_existing_index` method."
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Check if embedding function and vector index dimensions match
|
|
|
|
|
if not store.embedding_dimension == embedding_dimension:
|
|
|
|
|
raise ValueError(
|
|
|
|
|
"The provided embedding function and vector index "
|
|
|
|
|
"dimensions do not match.\n"
|
|
|
|
|
f"Embedding function dimension: {store.embedding_dimension}\n"
|
|
|
|
|
f"Vector index dimension: {embedding_dimension}"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
return store
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
|
def from_documents(
|
|
|
|
|
cls: Type[Neo4jVector],
|
|
|
|
@ -1266,7 +1364,15 @@ class Neo4jVector(VectorStore):
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Check if the vector index already exists
|
|
|
|
|
embedding_dimension = store.retrieve_existing_index()
|
|
|
|
|
embedding_dimension, index_type = store.retrieve_existing_index()
|
|
|
|
|
|
|
|
|
|
# Raise error if relationship index type
|
|
|
|
|
if index_type == "RELATIONSHIP":
|
|
|
|
|
raise ValueError(
|
|
|
|
|
"`from_existing_graph` method does not support "
|
|
|
|
|
" existing relationship vector index. "
|
|
|
|
|
"Please use `from_existing_relationship_index` method"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# If the vector index doesn't exist yet
|
|
|
|
|
if not embedding_dimension:
|
|
|
|
|