From dec00d30506db448b064ce2a770bc6cc7a9cfc09 Mon Sep 17 00:00:00 2001 From: Tomaz Bratanic Date: Fri, 29 Mar 2024 16:33:48 +0100 Subject: [PATCH] community[patch]: Add the ability to pass maps to neo4j retrieval query (#19758) Makes it easier to flatten complex values to text, so you don't have to use a lot of Cypher to do it. --- .../vectorstores/neo4j_vector.py | 29 ++++++++++++++++++- .../vectorstores/test_neo4jvector.py | 26 +++++++++++++++++ .../unit_tests/vectorstores/test_neo4j.py | 24 ++++++++++++++- 3 files changed, 77 insertions(+), 2 deletions(-) diff --git a/libs/community/langchain_community/vectorstores/neo4j_vector.py b/libs/community/langchain_community/vectorstores/neo4j_vector.py index e8afcc3ff6..05f4b3b38b 100644 --- a/libs/community/langchain_community/vectorstores/neo4j_vector.py +++ b/libs/community/langchain_community/vectorstores/neo4j_vector.py @@ -108,6 +108,31 @@ def remove_lucene_chars(text: str) -> str: return text.strip() +def dict_to_yaml_str(input_dict: Dict, indent: int = 0) -> str: + """ + Converts a dictionary to a YAML-like string without using external libraries. + + Parameters: + - input_dict (dict): The dictionary to convert. + - indent (int): The current indentation level. + + Returns: + - str: The YAML-like string representation of the input dictionary. + """ + yaml_str = "" + for key, value in input_dict.items(): + padding = " " * indent + if isinstance(value, dict): + yaml_str += f"{padding}{key}:\n{dict_to_yaml_str(value, indent + 1)}" + elif isinstance(value, list): + yaml_str += f"{padding}{key}:\n" + for item in value: + yaml_str += f"{padding}- {item}\n" + else: + yaml_str += f"{padding}{key}: {value}\n" + return yaml_str + + class Neo4jVector(VectorStore): """`Neo4j` vector index. @@ -646,7 +671,9 @@ class Neo4jVector(VectorStore): docs = [ ( Document( - page_content=result["text"], + page_content=dict_to_yaml_str(result["text"]) + if isinstance(result["text"], dict) + else result["text"], metadata={ k: v for k, v in result["metadata"].items() if v is not None }, diff --git a/libs/community/tests/integration_tests/vectorstores/test_neo4jvector.py b/libs/community/tests/integration_tests/vectorstores/test_neo4jvector.py index ffa340398f..8ef132b489 100644 --- a/libs/community/tests/integration_tests/vectorstores/test_neo4jvector.py +++ b/libs/community/tests/integration_tests/vectorstores/test_neo4jvector.py @@ -741,3 +741,29 @@ def test_retrieval_params() -> None: Document(page_content="test", metadata={"test": "test1"}), Document(page_content="test", metadata={"test": "test1"}), ] + + +def test_retrieval_dictionary() -> None: + """Test if we use parameters in retrieval query""" + docsearch = Neo4jVector.from_texts( + texts=texts, + embedding=FakeEmbeddings(), + pre_delete_collection=True, + retrieval_query=""" + RETURN { + name:'John', + age: 30, + skills: ["Python", "Data Analysis", "Machine Learning"]} as text, + score, {} AS metadata + """, + ) + expected_output = [ + Document( + page_content=( + "skills:\n- Python\n- Data Analysis\n- " + "Machine Learning\nage: 30\nname: John\n" + ) + ) + ] + output = docsearch.similarity_search("Foo", k=1) + assert output == expected_output diff --git a/libs/community/tests/unit_tests/vectorstores/test_neo4j.py b/libs/community/tests/unit_tests/vectorstores/test_neo4j.py index 280334283e..1bc85d1bbb 100644 --- a/libs/community/tests/unit_tests/vectorstores/test_neo4j.py +++ b/libs/community/tests/unit_tests/vectorstores/test_neo4j.py @@ -1,6 +1,9 @@ """Test Neo4j functionality.""" -from langchain_community.vectorstores.neo4j_vector import remove_lucene_chars +from langchain_community.vectorstores.neo4j_vector import ( + dict_to_yaml_str, + remove_lucene_chars, +) def test_escaping_lucene() -> None: @@ -43,3 +46,22 @@ def test_escaping_lucene() -> None: remove_lucene_chars("It is the end of the world. Take shelter~") == "It is the end of the world. Take shelter" ) + + +def test_converting_to_yaml() -> None: + example_dict = { + "name": "John Doe", + "age": 30, + "skills": ["Python", "Data Analysis", "Machine Learning"], + "location": {"city": "Ljubljana", "country": "Slovenia"}, + } + + yaml_str = dict_to_yaml_str(example_dict) + + expected_output = ( + "name: John Doe\nage: 30\nskills:\n- Python\n- " + "Data Analysis\n- Machine Learning\nlocation:\n city: Ljubljana\n" + " country: Slovenia\n" + ) + + assert yaml_str == expected_output