2019-06-06 09:00:13 +00:00
|
|
|
use crate::adapters::*;
|
2019-06-07 21:04:18 +00:00
|
|
|
use crate::args::RgaArgs;
|
2019-06-06 09:00:13 +00:00
|
|
|
use crate::CachingWriter;
|
2019-06-07 17:00:24 +00:00
|
|
|
use failure::Fallible;
|
2019-06-06 09:00:13 +00:00
|
|
|
use failure::{format_err, Error};
|
|
|
|
use path_clean::PathClean;
|
2019-06-07 21:04:18 +00:00
|
|
|
use std::convert::TryInto;
|
2019-06-11 12:11:08 +00:00
|
|
|
use std::io::BufRead;
|
|
|
|
use std::io::BufReader;
|
2019-06-06 23:17:55 +00:00
|
|
|
use std::io::BufWriter;
|
2019-06-07 18:12:24 +00:00
|
|
|
use std::sync::{Arc, RwLock};
|
2019-06-06 09:00:13 +00:00
|
|
|
|
2019-06-07 18:12:24 +00:00
|
|
|
#[derive(Clone)]
|
2019-06-07 21:04:18 +00:00
|
|
|
pub struct PreprocConfig<'a> {
|
2019-06-07 18:12:24 +00:00
|
|
|
pub cache: Option<Arc<RwLock<dyn crate::preproc_cache::PreprocCache>>>,
|
2019-06-07 21:04:18 +00:00
|
|
|
pub args: &'a RgaArgs,
|
2019-06-06 09:00:13 +00:00
|
|
|
}
|
2019-06-06 21:50:58 +00:00
|
|
|
/**
|
|
|
|
* preprocess a file as defined in `ai`.
|
|
|
|
*
|
|
|
|
* If a cache is passed, read/write to it.
|
|
|
|
*
|
|
|
|
*/
|
2019-06-07 17:00:24 +00:00
|
|
|
pub fn rga_preproc(ai: AdaptInfo) -> Result<(), Error> {
|
2019-06-06 09:00:13 +00:00
|
|
|
let AdaptInfo {
|
|
|
|
filepath_hint,
|
2019-06-06 21:43:30 +00:00
|
|
|
is_real_file,
|
2019-06-06 09:00:13 +00:00
|
|
|
inp,
|
|
|
|
oup,
|
|
|
|
line_prefix,
|
2019-06-07 17:00:24 +00:00
|
|
|
config,
|
2019-06-07 18:12:24 +00:00
|
|
|
archive_recursion_depth,
|
2019-06-06 09:00:13 +00:00
|
|
|
..
|
|
|
|
} = ai;
|
2019-06-07 21:04:18 +00:00
|
|
|
let PreprocConfig { mut cache, args } = config;
|
2019-06-11 11:34:04 +00:00
|
|
|
let adapters = adapter_matcher(&args.adapters[..], args.accurate)?;
|
2019-06-06 09:00:13 +00:00
|
|
|
let filename = filepath_hint
|
|
|
|
.file_name()
|
2019-06-06 21:43:30 +00:00
|
|
|
.ok_or_else(|| format_err!("Empty filename"))?;
|
2019-06-07 18:12:24 +00:00
|
|
|
eprintln!("depth: {}", archive_recursion_depth);
|
2019-06-11 11:34:04 +00:00
|
|
|
if archive_recursion_depth >= args.max_archive_recursion {
|
2019-06-07 18:12:24 +00:00
|
|
|
writeln!(oup, "{}[rga: max archive recursion reached]", line_prefix)?;
|
|
|
|
return Ok(());
|
|
|
|
}
|
2019-06-06 09:00:13 +00:00
|
|
|
|
2019-06-06 22:18:04 +00:00
|
|
|
eprintln!("path_hint: {:?}", filepath_hint);
|
2019-06-06 09:00:13 +00:00
|
|
|
|
2019-06-11 12:11:08 +00:00
|
|
|
// todo: figure out when using a bufreader is a good idea and when it is not
|
|
|
|
// seems to beed for File::open() reads, but not sure about within archives (tar, zip)
|
|
|
|
let inp = &mut BufReader::with_capacity(1 << 13, inp);
|
|
|
|
|
|
|
|
let mimetype = if args.accurate {
|
|
|
|
let buf = inp.fill_buf()?; // fill but do not consume!
|
|
|
|
let mimetype = tree_magic::from_u8(buf);
|
|
|
|
eprintln!("mimetype: {:?}", mimetype);
|
|
|
|
Some(mimetype)
|
|
|
|
} else {
|
|
|
|
None
|
|
|
|
};
|
2019-06-06 09:00:13 +00:00
|
|
|
let adapter = adapters(FileMeta {
|
2019-06-11 12:11:08 +00:00
|
|
|
mimetype,
|
2019-06-06 09:00:13 +00:00
|
|
|
lossy_filename: filename.to_string_lossy().to_string(),
|
|
|
|
});
|
|
|
|
match adapter {
|
|
|
|
Some(ad) => {
|
|
|
|
let meta = ad.metadata();
|
|
|
|
eprintln!("adapter: {}", &meta.name);
|
|
|
|
let db_name = format!("{}.v{}", meta.name, meta.version);
|
2019-06-07 18:12:24 +00:00
|
|
|
if let Some(cache) = cache.as_mut() {
|
2019-06-06 09:00:13 +00:00
|
|
|
let cache_key: Vec<u8> = {
|
|
|
|
let clean_path = filepath_hint.to_owned().clean();
|
|
|
|
let meta = std::fs::metadata(&filepath_hint)?;
|
|
|
|
|
|
|
|
let key = (
|
|
|
|
clean_path,
|
|
|
|
meta.modified().expect("weird OS that can't into mtime"),
|
|
|
|
);
|
|
|
|
eprintln!("cache key: {:?}", key);
|
|
|
|
|
|
|
|
bincode::serialize(&key).expect("could not serialize path") // key in the cache database
|
|
|
|
};
|
2019-06-07 18:12:24 +00:00
|
|
|
cache.write().unwrap().get_or_run(
|
2019-06-07 17:00:24 +00:00
|
|
|
&db_name,
|
|
|
|
&cache_key,
|
|
|
|
Box::new(|| -> Fallible<Option<Vec<u8>>> {
|
2019-06-06 23:17:55 +00:00
|
|
|
// wrapping BufWriter here gives ~10% perf boost
|
2019-06-07 21:04:18 +00:00
|
|
|
let mut compbuf = BufWriter::new(CachingWriter::new(
|
|
|
|
oup,
|
2019-06-11 11:34:04 +00:00
|
|
|
args.cache_max_blob_len.try_into().unwrap(),
|
|
|
|
args.cache_compression_level.try_into().unwrap(),
|
2019-06-07 21:04:18 +00:00
|
|
|
)?);
|
2019-06-06 09:00:13 +00:00
|
|
|
eprintln!("adapting...");
|
|
|
|
ad.adapt(AdaptInfo {
|
|
|
|
line_prefix,
|
|
|
|
filepath_hint,
|
2019-06-06 21:43:30 +00:00
|
|
|
is_real_file,
|
2019-06-06 09:00:13 +00:00
|
|
|
inp,
|
|
|
|
oup: &mut compbuf,
|
2019-06-07 18:12:24 +00:00
|
|
|
archive_recursion_depth,
|
2019-06-07 21:04:18 +00:00
|
|
|
config: PreprocConfig { cache: None, args },
|
2019-06-06 09:00:13 +00:00
|
|
|
})?;
|
2019-06-06 23:17:55 +00:00
|
|
|
let compressed = compbuf
|
|
|
|
.into_inner()
|
|
|
|
.map_err(|_| "could not finish zstd")
|
|
|
|
.unwrap()
|
|
|
|
.finish()?;
|
2019-06-06 09:00:13 +00:00
|
|
|
if let Some(cached) = compressed {
|
|
|
|
eprintln!("compressed len: {}", cached.len());
|
2019-06-07 21:17:33 +00:00
|
|
|
Ok(Some(cached))
|
|
|
|
} else {
|
|
|
|
Ok(None)
|
|
|
|
}
|
2019-06-07 17:00:24 +00:00
|
|
|
}),
|
|
|
|
Box::new(|cached| {
|
|
|
|
let stdouti = std::io::stdout();
|
|
|
|
zstd::stream::copy_decode(cached, stdouti.lock())?;
|
2019-06-06 09:00:13 +00:00
|
|
|
Ok(())
|
2019-06-07 17:00:24 +00:00
|
|
|
}),
|
|
|
|
)?;
|
|
|
|
Ok(())
|
2019-06-06 09:00:13 +00:00
|
|
|
} else {
|
|
|
|
eprintln!("adapting...");
|
|
|
|
ad.adapt(AdaptInfo {
|
|
|
|
line_prefix,
|
|
|
|
filepath_hint,
|
2019-06-06 21:43:30 +00:00
|
|
|
is_real_file,
|
2019-06-06 09:00:13 +00:00
|
|
|
inp,
|
|
|
|
oup,
|
2019-06-07 18:12:24 +00:00
|
|
|
archive_recursion_depth,
|
2019-06-07 21:04:18 +00:00
|
|
|
config: PreprocConfig { cache: None, args },
|
2019-06-06 09:00:13 +00:00
|
|
|
})?;
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
None => {
|
2019-06-06 22:57:53 +00:00
|
|
|
// allow passthrough if the file is in an archive,
|
|
|
|
// otherwise it should have been filtered out by rg pre-glob since rg can handle those better than us
|
2019-06-06 21:43:30 +00:00
|
|
|
let allow_cat = !is_real_file;
|
2019-06-06 09:00:13 +00:00
|
|
|
if allow_cat {
|
2019-06-06 22:18:04 +00:00
|
|
|
spawning::postproc_line_prefix(line_prefix, inp, oup)?;
|
2019-06-06 09:00:13 +00:00
|
|
|
Ok(())
|
|
|
|
} else {
|
|
|
|
Err(format_err!("No adapter found for file {:?}", filename))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|