2023-11-07 21:00:49 +00:00
|
|
|
from pathlib import Path
|
|
|
|
|
|
|
|
from langchain.text_splitter import TokenTextSplitter
|
2024-01-02 21:47:11 +00:00
|
|
|
from langchain_community.document_loaders import TextLoader
|
2024-01-02 20:32:16 +00:00
|
|
|
from langchain_community.embeddings.openai import OpenAIEmbeddings
|
2024-01-02 21:47:11 +00:00
|
|
|
from langchain_community.vectorstores import Neo4jVector
|
2023-11-07 21:00:49 +00:00
|
|
|
|
|
|
|
txt_path = Path(__file__).parent / "dune.txt"
|
|
|
|
|
|
|
|
# Load the text file
|
|
|
|
loader = TextLoader(str(txt_path))
|
|
|
|
raw_documents = loader.load()
|
|
|
|
|
|
|
|
# Define chunking strategy
|
|
|
|
splitter = TokenTextSplitter(chunk_size=512, chunk_overlap=24)
|
|
|
|
documents = splitter.split_documents(raw_documents)
|
|
|
|
|
|
|
|
# Calculate embedding values and store them in the graph
|
|
|
|
Neo4jVector.from_documents(
|
|
|
|
documents,
|
|
|
|
OpenAIEmbeddings(),
|
|
|
|
index_name="dune",
|
|
|
|
)
|