from pathlib import Path from langchain_community.document_loaders import TextLoader from langchain_community.embeddings.openai import OpenAIEmbeddings from langchain_community.graphs import Neo4jGraph from langchain_community.vectorstores import Neo4jVector from langchain_text_splitters import TokenTextSplitter txt_path = Path(__file__).parent / "dune.txt" graph = Neo4jGraph() # Load the text file loader = TextLoader(str(txt_path)) documents = loader.load() # Define chunking strategy parent_splitter = TokenTextSplitter(chunk_size=512, chunk_overlap=24) child_splitter = TokenTextSplitter(chunk_size=100, chunk_overlap=24) # Store parent-child patterns into graph parent_documents = parent_splitter.split_documents(documents) for parent in parent_documents: child_documents = child_splitter.split_documents([parent]) params = { "parent": parent.page_content, "children": [c.page_content for c in child_documents], } graph.query( """ CREATE (p:Parent {text: $parent}) WITH p UNWIND $children AS child CREATE (c:Child {text: child}) CREATE (c)-[:HAS_PARENT]->(p) """, params, ) # Calculate embedding values on the child nodes Neo4jVector.from_existing_graph( OpenAIEmbeddings(), index_name="retrieval", node_label="Child", text_node_properties=["text"], embedding_node_property="embedding", )