diff --git a/src/rag/mod.rs b/src/rag/mod.rs
index 3fdad02..ab7c3c7 100644
--- a/src/rag/mod.rs
+++ b/src/rag/mod.rs
@@ -262,8 +262,10 @@ impl Rag {
);
let documents = load(&path, &extension)
.with_context(|| format!("Failed to load file at '{path}'"))?;
- let documents =
- splitter.split_documents(&documents, &SplitterChunkHeaderOptions::default());
+ let split_options = SplitterChunkHeaderOptions::default().with_chunk_header(&format!(
+ "\npath: {path}\n\n\n"
+ ));
+ let documents = splitter.split_documents(&documents, &split_options);
rag_files.push(RagFile { path, documents });
progress(
&progress_tx,
diff --git a/src/rag/splitter/mod.rs b/src/rag/splitter/mod.rs
index 7fb48f2..88054f7 100644
--- a/src/rag/splitter/mod.rs
+++ b/src/rag/splitter/mod.rs
@@ -106,7 +106,6 @@ impl RecursiveCharacterTextSplitter {
let SplitterChunkHeaderOptions {
chunk_header,
chunk_overlap_header,
- append_chunk_overlap_header,
} = chunk_header_options;
let mut documents = Vec::new();
@@ -144,7 +143,7 @@ impl RecursiveCharacterTextSplitter {
Ordering::Equal => {}
}
- if *append_chunk_overlap_header {
+ if let Some(chunk_overlap_header) = chunk_overlap_header {
page_content += chunk_overlap_header;
}
}
@@ -288,16 +287,14 @@ impl RecursiveCharacterTextSplitter {
pub struct SplitterChunkHeaderOptions {
pub chunk_header: String,
- pub chunk_overlap_header: String,
- pub append_chunk_overlap_header: bool,
+ pub chunk_overlap_header: Option,
}
impl Default for SplitterChunkHeaderOptions {
fn default() -> Self {
Self {
chunk_header: "".into(),
- chunk_overlap_header: "(cont'd) ".into(),
- append_chunk_overlap_header: false,
+ chunk_overlap_header: None,
}
}
}
@@ -313,14 +310,7 @@ impl SplitterChunkHeaderOptions {
// Set the value of chunk_overlap_header
#[allow(unused)]
pub fn with_chunk_overlap_header(mut self, overlap_header: &str) -> Self {
- self.chunk_overlap_header = overlap_header.to_string();
- self
- }
-
- // Set the value of append_chunk_overlap_header
- #[allow(unused)]
- pub fn with_append_chunk_overlap_header(mut self, value: bool) -> Self {
- self.append_chunk_overlap_header = value;
+ self.chunk_overlap_header = Some(overlap_header.to_string());
self
}
}
@@ -414,7 +404,7 @@ mod tests {
let splitter = RecursiveCharacterTextSplitter::new(3, 0, &[" "]);
let chunk_header_options = SplitterChunkHeaderOptions::default()
.with_chunk_header("SOURCE NAME: testing\n-----\n")
- .with_append_chunk_overlap_header(true);
+ .with_chunk_overlap_header("(cont'd) ");
let mut metadata1 = IndexMap::new();
metadata1.insert("source".into(), "1".into());
let mut metadata2 = IndexMap::new();