diff --git a/Cargo.toml b/Cargo.toml index c28b6b6..f4c520b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "rga" -description = "ripgrep but for pdf, ebooks, Office documents, archives, etc" +description = "ripgrep but also search in PDFs, E-Books, Office documents, etc, and also in archives" license = "AGPL-3.0-or-later" version = "0.4.0" repository = "https://github.com/phiresky/rga" diff --git a/exampledir/droste.zip b/exampledir/droste.zip new file mode 100644 index 0000000..8b53ca5 Binary files /dev/null and b/exampledir/droste.zip differ diff --git a/src/adapters/mod.rs b/src/adapters/mod.rs index 219a8d4..055e064 100644 --- a/src/adapters/mod.rs +++ b/src/adapters/mod.rs @@ -57,7 +57,7 @@ pub struct AdaptInfo<'a> { /// prefix every output line with this string to better indicate the file's location if it is in some archive pub line_prefix: &'a str, // pub adapt_subobject: &'a dyn Fn(AdaptInfo) -> Fallible<()>, - pub config: &'a mut PreprocConfig, + pub config: PreprocConfig, } pub fn extension_to_regex(extension: &str) -> Regex { diff --git a/src/adapters/sqlite.rs b/src/adapters/sqlite.rs index 5386aa6..71fd679 100644 --- a/src/adapters/sqlite.rs +++ b/src/adapters/sqlite.rs @@ -1,13 +1,9 @@ -use super::spawning::map_exe_error; use super::*; use failure::*; use lazy_static::lazy_static; -use rusqlite::types::{ToSql, ValueRef}; +use rusqlite::types::ValueRef; use rusqlite::*; -use serde::{Deserialize, Serialize}; use std::convert::TryInto; -use std::io::BufReader; -use std::process::*; static EXTENSIONS: &[&str] = &["db", "db3", "sqlite", "sqlite3"]; diff --git a/src/adapters/tar.rs b/src/adapters/tar.rs index ff808c9..2c6ff92 100644 --- a/src/adapters/tar.rs +++ b/src/adapters/tar.rs @@ -82,7 +82,7 @@ impl FileAdapter for TarAdapter { inp: &mut file, oup, line_prefix, - config, + config: config.clone(), }; rga_preproc(ai2)?; } diff --git a/src/adapters/zip.rs b/src/adapters/zip.rs index 38a505c..3096470 100644 --- a/src/adapters/zip.rs +++ b/src/adapters/zip.rs @@ -61,7 +61,8 @@ impl FileAdapter for ZipAdapter { continue; } eprintln!( - "{}|{}: {} bytes ({} bytes packed)", + "{}{}|{}: {} bytes ({} bytes packed)", + line_prefix, filepath_hint.to_string_lossy(), file.name(), file.size(), @@ -75,7 +76,7 @@ impl FileAdapter for ZipAdapter { oup, line_prefix, archive_recursion_depth: archive_recursion_depth + 1, - config, + config: config.clone(), })?; } Err(e) => return Err(e.into()), diff --git a/src/bin/rga-preproc.rs b/src/bin/rga-preproc.rs index 1026a9c..fad5284 100644 --- a/src/bin/rga-preproc.rs +++ b/src/bin/rga-preproc.rs @@ -4,7 +4,6 @@ use rga::preproc::*; use std::env; use std::fs::File; use std::io::BufReader; - fn main() -> Result<(), Error> { let path = { let filepath = std::env::args_os() @@ -28,7 +27,10 @@ fn main() -> Result<(), Error> { oup: &mut o, line_prefix: "", archive_recursion_depth: 0, - config: &mut PreprocConfig { cache }, + config: PreprocConfig { + cache, + max_archive_recursion: 3, + }, }; rga_preproc(ai) diff --git a/src/preproc.rs b/src/preproc.rs index 9bbdb87..423e79d 100644 --- a/src/preproc.rs +++ b/src/preproc.rs @@ -3,14 +3,16 @@ use crate::CachingWriter; use failure::Fallible; use failure::{format_err, Error}; use path_clean::PathClean; -use std::convert::AsRef; use std::io::BufWriter; // longest compressed conversion output to save in cache const MAX_DB_BLOB_LEN: usize = 2_000_000; const ZSTD_LEVEL: i32 = 12; +use std::sync::{Arc, RwLock}; +#[derive(Clone)] pub struct PreprocConfig { - pub cache: Option>, + pub cache: Option>>, + pub max_archive_recursion: i32, } /** * preprocess a file as defined in `ai`. @@ -27,11 +29,21 @@ pub fn rga_preproc(ai: AdaptInfo) -> Result<(), Error> { oup, line_prefix, config, + archive_recursion_depth, .. } = ai; + let PreprocConfig { + mut cache, + max_archive_recursion, + } = config; let filename = filepath_hint .file_name() .ok_or_else(|| format_err!("Empty filename"))?; + eprintln!("depth: {}", archive_recursion_depth); + if archive_recursion_depth >= config.max_archive_recursion { + writeln!(oup, "{}[rga: max archive recursion reached]", line_prefix)?; + return Ok(()); + } eprintln!("path_hint: {:?}", filepath_hint); @@ -49,7 +61,7 @@ pub fn rga_preproc(ai: AdaptInfo) -> Result<(), Error> { let meta = ad.metadata(); eprintln!("adapter: {}", &meta.name); let db_name = format!("{}.v{}", meta.name, meta.version); - if let Some(cache) = config.cache.as_mut() { + if let Some(cache) = cache.as_mut() { let cache_key: Vec = { let clean_path = filepath_hint.to_owned().clean(); let meta = std::fs::metadata(&filepath_hint)?; @@ -62,7 +74,7 @@ pub fn rga_preproc(ai: AdaptInfo) -> Result<(), Error> { bincode::serialize(&key).expect("could not serialize path") // key in the cache database }; - cache.get_or_run( + cache.write().unwrap().get_or_run( &db_name, &cache_key, Box::new(|| -> Fallible>> { @@ -76,8 +88,11 @@ pub fn rga_preproc(ai: AdaptInfo) -> Result<(), Error> { is_real_file, inp, oup: &mut compbuf, - archive_recursion_depth: 0, - config: &mut PreprocConfig { cache: None }, + archive_recursion_depth, + config: PreprocConfig { + cache: None, + max_archive_recursion, + }, })?; let compressed = compbuf .into_inner() @@ -104,8 +119,11 @@ pub fn rga_preproc(ai: AdaptInfo) -> Result<(), Error> { is_real_file, inp, oup, - archive_recursion_depth: 0, - config: &mut PreprocConfig { cache: None }, + archive_recursion_depth, + config: PreprocConfig { + cache: None, + max_archive_recursion, + }, })?; Ok(()) } diff --git a/src/preproc_cache.rs b/src/preproc_cache.rs index f041ac6..ce9ba3b 100644 --- a/src/preproc_cache.rs +++ b/src/preproc_cache.rs @@ -1,7 +1,8 @@ use failure::{format_err, Fallible}; +use std::sync::{Arc, RwLock}; -pub fn open() -> Fallible> { - Ok(Box::new(LmdbCache::open()?)) +pub fn open() -> Fallible>> { + Ok(Arc::new(RwLock::new(LmdbCache::open()?))) } pub trait PreprocCache { // possible without second lambda?