diff --git a/exampledir/screenshot.png b/exampledir/screenshot.png new file mode 100644 index 0000000..4d683e5 Binary files /dev/null and b/exampledir/screenshot.png differ diff --git a/src/adapters/pdfpages.rs b/src/adapters/pdfpages.rs index 7931317..0705935 100644 --- a/src/adapters/pdfpages.rs +++ b/src/adapters/pdfpages.rs @@ -41,38 +41,6 @@ impl GetMetadata for PdfPagesAdapter { } } -/*// todo: do this in an actually streaming fashion and less slow -// IEND chunk + PDF magic -// 4945 4e44 ae42 6082 8950 4e47 0d0a 1a0a -let split_seq = hex_literal::hex!("4945 4e44 ae42 6082 8950 4e47 0d0a 1a0a"); -let split_seq_inx = 8; -fn split_by_seq<'a>( - split_seq: &'a [u8], - split_inx: usize, - read: &mut Read, -) -> Fallible + 'a> { - let regex = split_seq - .iter() - .map(|c| format!("\\x{:0>2x}", c)) - .collect::>() - .join(""); - let restr = format!("(?-u){}", regex); - eprintln!("re: {}", restr); - let re = regex::bytes::Regex::new(&restr)?; - - let mut all = Vec::new(); - read.read_to_end(&mut all)?; - let mut out: Vec>> = Vec::new(); - let mut last = 0; - for (i, split) in re.find_iter(&all).enumerate() { - let pos = split.start() + split_inx; - out.push(Cursor::new(Vec::from(&all[last..pos]))); - last = pos; - } - out.push(Cursor::new(Vec::from(&all[last..]))); - Ok(out) -}*/ - impl FileAdapter for PdfPagesAdapter { fn adapt(&self, ai: AdaptInfo) -> Fallible<()> { let AdaptInfo { @@ -98,17 +66,13 @@ impl FileAdapter for PdfPagesAdapter { let mut cmd = Command::new(exe_name); cmd.arg("convert") .arg("-density") - .arg("300") + .arg("200") .arg(inp_fname) .arg("+adjoin") .arg(out_fname); let mut cmd = cmd.spawn().map_err(|e| { - map_exe_error( - e, - exe_name, - "Could not find gm. Make sure you have graphicsmagick installed.", - ) + map_exe_error(e, exe_name, "Make sure you have graphicsmagick installed.") })?; let args = config.args; // TODO: how to handle this copying better? @@ -133,7 +97,7 @@ impl FileAdapter for PdfPagesAdapter { is_real_file: false, inp: &mut ele, oup, - line_prefix, + line_prefix: &format!("{}Page {}:", line_prefix, i + 1), archive_recursion_depth: archive_recursion_depth + 1, config: PreprocConfig { cache: None, args }, })?; @@ -141,3 +105,35 @@ impl FileAdapter for PdfPagesAdapter { Ok(()) } } + +/*// todo: do this in an actually streaming fashion and less slow +// IEND chunk + PDF magic +// 4945 4e44 ae42 6082 8950 4e47 0d0a 1a0a +let split_seq = hex_literal::hex!("4945 4e44 ae42 6082 8950 4e47 0d0a 1a0a"); +let split_seq_inx = 8; +fn split_by_seq<'a>( + split_seq: &'a [u8], + split_inx: usize, + read: &mut Read, +) -> Fallible + 'a> { + let regex = split_seq + .iter() + .map(|c| format!("\\x{:0>2x}", c)) + .collect::>() + .join(""); + let restr = format!("(?-u){}", regex); + eprintln!("re: {}", restr); + let re = regex::bytes::Regex::new(&restr)?; + + let mut all = Vec::new(); + read.read_to_end(&mut all)?; + let mut out: Vec>> = Vec::new(); + let mut last = 0; + for (i, split) in re.find_iter(&all).enumerate() { + let pos = split.start() + split_inx; + out.push(Cursor::new(Vec::from(&all[last..pos]))); + last = pos; + } + out.push(Cursor::new(Vec::from(&all[last..]))); + Ok(out) +}*/ diff --git a/src/adapters/tesseract.rs b/src/adapters/tesseract.rs index fc7bea9..42ebe10 100644 --- a/src/adapters/tesseract.rs +++ b/src/adapters/tesseract.rs @@ -36,7 +36,8 @@ impl SpawningFileAdapter for TesseractAdapter { "tesseract" } fn command(&self, _filepath_hint: &Path, mut cmd: Command) -> Command { - cmd.arg("-").arg("-"); + // rg already does threading + cmd.env("OMP_THREAD_LIMIT", "1").arg("-").arg("-"); cmd } }