|
|
@ -41,38 +41,6 @@ impl GetMetadata for PdfPagesAdapter {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*// todo: do this in an actually streaming fashion and less slow
|
|
|
|
|
|
|
|
// IEND chunk + PDF magic
|
|
|
|
|
|
|
|
// 4945 4e44 ae42 6082 8950 4e47 0d0a 1a0a
|
|
|
|
|
|
|
|
let split_seq = hex_literal::hex!("4945 4e44 ae42 6082 8950 4e47 0d0a 1a0a");
|
|
|
|
|
|
|
|
let split_seq_inx = 8;
|
|
|
|
|
|
|
|
fn split_by_seq<'a>(
|
|
|
|
|
|
|
|
split_seq: &'a [u8],
|
|
|
|
|
|
|
|
split_inx: usize,
|
|
|
|
|
|
|
|
read: &mut Read,
|
|
|
|
|
|
|
|
) -> Fallible<impl IntoIterator<Item = impl Read> + 'a> {
|
|
|
|
|
|
|
|
let regex = split_seq
|
|
|
|
|
|
|
|
.iter()
|
|
|
|
|
|
|
|
.map(|c| format!("\\x{:0>2x}", c))
|
|
|
|
|
|
|
|
.collect::<Vec<_>>()
|
|
|
|
|
|
|
|
.join("");
|
|
|
|
|
|
|
|
let restr = format!("(?-u){}", regex);
|
|
|
|
|
|
|
|
eprintln!("re: {}", restr);
|
|
|
|
|
|
|
|
let re = regex::bytes::Regex::new(&restr)?;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
let mut all = Vec::new();
|
|
|
|
|
|
|
|
read.read_to_end(&mut all)?;
|
|
|
|
|
|
|
|
let mut out: Vec<Cursor<Vec<u8>>> = Vec::new();
|
|
|
|
|
|
|
|
let mut last = 0;
|
|
|
|
|
|
|
|
for (i, split) in re.find_iter(&all).enumerate() {
|
|
|
|
|
|
|
|
let pos = split.start() + split_inx;
|
|
|
|
|
|
|
|
out.push(Cursor::new(Vec::from(&all[last..pos])));
|
|
|
|
|
|
|
|
last = pos;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
out.push(Cursor::new(Vec::from(&all[last..])));
|
|
|
|
|
|
|
|
Ok(out)
|
|
|
|
|
|
|
|
}*/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
impl FileAdapter for PdfPagesAdapter {
|
|
|
|
impl FileAdapter for PdfPagesAdapter {
|
|
|
|
fn adapt(&self, ai: AdaptInfo) -> Fallible<()> {
|
|
|
|
fn adapt(&self, ai: AdaptInfo) -> Fallible<()> {
|
|
|
|
let AdaptInfo {
|
|
|
|
let AdaptInfo {
|
|
|
@ -98,17 +66,13 @@ impl FileAdapter for PdfPagesAdapter {
|
|
|
|
let mut cmd = Command::new(exe_name);
|
|
|
|
let mut cmd = Command::new(exe_name);
|
|
|
|
cmd.arg("convert")
|
|
|
|
cmd.arg("convert")
|
|
|
|
.arg("-density")
|
|
|
|
.arg("-density")
|
|
|
|
.arg("300")
|
|
|
|
.arg("200")
|
|
|
|
.arg(inp_fname)
|
|
|
|
.arg(inp_fname)
|
|
|
|
.arg("+adjoin")
|
|
|
|
.arg("+adjoin")
|
|
|
|
.arg(out_fname);
|
|
|
|
.arg(out_fname);
|
|
|
|
|
|
|
|
|
|
|
|
let mut cmd = cmd.spawn().map_err(|e| {
|
|
|
|
let mut cmd = cmd.spawn().map_err(|e| {
|
|
|
|
map_exe_error(
|
|
|
|
map_exe_error(e, exe_name, "Make sure you have graphicsmagick installed.")
|
|
|
|
e,
|
|
|
|
|
|
|
|
exe_name,
|
|
|
|
|
|
|
|
"Could not find gm. Make sure you have graphicsmagick installed.",
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
})?;
|
|
|
|
})?;
|
|
|
|
let args = config.args;
|
|
|
|
let args = config.args;
|
|
|
|
// TODO: how to handle this copying better?
|
|
|
|
// TODO: how to handle this copying better?
|
|
|
@ -133,7 +97,7 @@ impl FileAdapter for PdfPagesAdapter {
|
|
|
|
is_real_file: false,
|
|
|
|
is_real_file: false,
|
|
|
|
inp: &mut ele,
|
|
|
|
inp: &mut ele,
|
|
|
|
oup,
|
|
|
|
oup,
|
|
|
|
line_prefix,
|
|
|
|
line_prefix: &format!("{}Page {}:", line_prefix, i + 1),
|
|
|
|
archive_recursion_depth: archive_recursion_depth + 1,
|
|
|
|
archive_recursion_depth: archive_recursion_depth + 1,
|
|
|
|
config: PreprocConfig { cache: None, args },
|
|
|
|
config: PreprocConfig { cache: None, args },
|
|
|
|
})?;
|
|
|
|
})?;
|
|
|
@ -141,3 +105,35 @@ impl FileAdapter for PdfPagesAdapter {
|
|
|
|
Ok(())
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*// todo: do this in an actually streaming fashion and less slow
|
|
|
|
|
|
|
|
// IEND chunk + PDF magic
|
|
|
|
|
|
|
|
// 4945 4e44 ae42 6082 8950 4e47 0d0a 1a0a
|
|
|
|
|
|
|
|
let split_seq = hex_literal::hex!("4945 4e44 ae42 6082 8950 4e47 0d0a 1a0a");
|
|
|
|
|
|
|
|
let split_seq_inx = 8;
|
|
|
|
|
|
|
|
fn split_by_seq<'a>(
|
|
|
|
|
|
|
|
split_seq: &'a [u8],
|
|
|
|
|
|
|
|
split_inx: usize,
|
|
|
|
|
|
|
|
read: &mut Read,
|
|
|
|
|
|
|
|
) -> Fallible<impl IntoIterator<Item = impl Read> + 'a> {
|
|
|
|
|
|
|
|
let regex = split_seq
|
|
|
|
|
|
|
|
.iter()
|
|
|
|
|
|
|
|
.map(|c| format!("\\x{:0>2x}", c))
|
|
|
|
|
|
|
|
.collect::<Vec<_>>()
|
|
|
|
|
|
|
|
.join("");
|
|
|
|
|
|
|
|
let restr = format!("(?-u){}", regex);
|
|
|
|
|
|
|
|
eprintln!("re: {}", restr);
|
|
|
|
|
|
|
|
let re = regex::bytes::Regex::new(&restr)?;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
let mut all = Vec::new();
|
|
|
|
|
|
|
|
read.read_to_end(&mut all)?;
|
|
|
|
|
|
|
|
let mut out: Vec<Cursor<Vec<u8>>> = Vec::new();
|
|
|
|
|
|
|
|
let mut last = 0;
|
|
|
|
|
|
|
|
for (i, split) in re.find_iter(&all).enumerate() {
|
|
|
|
|
|
|
|
let pos = split.start() + split_inx;
|
|
|
|
|
|
|
|
out.push(Cursor::new(Vec::from(&all[last..pos])));
|
|
|
|
|
|
|
|
last = pos;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
out.push(Cursor::new(Vec::from(&all[last..])));
|
|
|
|
|
|
|
|
Ok(out)
|
|
|
|
|
|
|
|
}*/
|
|
|
|