ripgrep-all/src/adapters/postproc.rs

177 lines
5.3 KiB
Rust
Raw Normal View History

2020-06-11 12:38:50 +00:00
//trait RunFnAdapter: GetMetadata {}
//impl<T> FileAdapter for T where T: RunFnAdapter {}
use anyhow::Result;
2020-09-30 14:22:54 +00:00
use std::{cmp::min, io::Read};
2020-09-30 14:49:51 +00:00
use crate::read_iter::{ReadIterBox, SingleReadIter};
use super::{AdaptInfo, AdapterMeta, FileAdapter, GetMetadata};
2020-06-11 12:38:50 +00:00
struct ByteReplacer<R>
where
R: Read,
{
inner: R,
next_read: Vec<u8>,
replacer: Box<dyn FnMut(u8) -> Vec<u8>>,
2020-06-11 22:09:37 +00:00
haystacker: Box<dyn Fn(&[u8]) -> Option<usize>>,
2020-06-11 12:38:50 +00:00
}
impl<R> ByteReplacer<R>
where
R: Read,
{
fn output_next(&mut self, buf: &mut [u8], buf_valid_until: usize, replacement: &[u8]) -> usize {
let after_part1 = Vec::from(&buf[1..buf_valid_until]);
/*let mut after_part = Vec::with_capacity(replacement.len() + replaced_len);
after_part.extend_from_slice(replacement);
after_part.extend_from_slice(&buf[..replaced_len]);*/
let writeable_count = min(buf.len(), replacement.len());
buf[..writeable_count].copy_from_slice(&replacement[0..writeable_count]);
let after_rep = &replacement[writeable_count..];
let mut ov = Vec::new();
ov.extend_from_slice(&after_rep);
ov.extend_from_slice(&after_part1);
ov.extend_from_slice(&self.next_read);
self.next_read = ov;
return writeable_count;
}
}
impl<R> Read for ByteReplacer<R>
where
R: Read,
{
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
let read = if self.next_read.len() > 0 {
let count = std::cmp::min(self.next_read.len(), buf.len());
buf[0..count].copy_from_slice(&self.next_read[0..count]);
self.next_read.drain(0..count).count();
Ok(count)
} else {
self.inner.read(buf)
};
match read {
Ok(u) => {
2020-06-11 22:09:37 +00:00
match (self.haystacker)(&buf[0..u]) {
2020-06-11 12:38:50 +00:00
Some(i) => {
let data = (self.replacer)(buf[i]);
Ok(i + self.output_next(&mut buf[i..], u - i, &data))
}
None => Ok(u),
}
// todo: use memchr2?
}
Err(e) => Err(e),
}
}
}
2020-09-30 14:22:54 +00:00
pub struct PostprocPrefix {}
impl GetMetadata for PostprocPrefix {
fn metadata(&self) -> &super::AdapterMeta {
lazy_static::lazy_static! {
static ref METADATA: AdapterMeta = AdapterMeta {
name: "postprocprefix".to_owned(),
version: 1,
description: "Adds the line prefix to each line".to_owned(),
recurses: true,
fast_matchers: vec![],
slow_matchers: None,
keep_fast_matchers_if_accurate: false,
disabled_by_default: false
};
}
&METADATA
}
}
impl FileAdapter for PostprocPrefix {
fn adapt<'a>(
&self,
a: super::AdaptInfo<'a>,
_detection_reason: &crate::matching::FileMatcher,
2020-09-30 14:49:51 +00:00
) -> Result<ReadIterBox<'a>> {
2020-09-30 14:22:54 +00:00
let read = postproc_prefix(&a.line_prefix, a.inp)?;
// keep adapt info (filename etc) except replace inp
let ai = AdaptInfo {
inp: Box::new(read),
postprocess: false,
..a
};
Ok(Box::new(SingleReadIter::new(ai)))
}
}
pub fn postproc_prefix(line_prefix: &str, inp: impl Read) -> Result<impl Read> {
let line_prefix = line_prefix.to_string(); // clone since we need it later
Ok(ByteReplacer {
inner: inp,
next_read: format!("{}", line_prefix).into_bytes(),
haystacker: Box::new(|buf| memchr::memchr(b'\n', buf)),
replacer: Box::new(move |_| format!("\n{}", line_prefix).into_bytes()),
})
}
pub fn postproc_pagebreaks(line_prefix: &str, inp: impl Read) -> Result<impl Read> {
let line_prefix = line_prefix.to_string(); // clone since
2020-06-11 12:38:50 +00:00
let mut page_count = 1;
Ok(ByteReplacer {
inner: inp,
2020-09-30 14:22:54 +00:00
next_read: format!("{}Page {}:", line_prefix, page_count).into_bytes(),
2020-06-11 22:09:37 +00:00
haystacker: Box::new(|buf| memchr::memchr2(b'\n', b'\x0c', buf)),
2020-06-11 12:38:50 +00:00
replacer: Box::new(move |b| match b {
2020-09-30 14:22:54 +00:00
b'\n' => format!("\n{}Page {}:", line_prefix, page_count).into_bytes(),
2020-06-11 12:38:50 +00:00
b'\x0c' => {
page_count += 1;
2020-09-30 14:22:54 +00:00
format!("\n{}Page {}:", line_prefix, page_count).into_bytes()
2020-06-11 12:38:50 +00:00
}
_ => b"[[imposs]]".to_vec(),
}),
})
}
#[cfg(test)]
mod tests {
2020-09-30 14:22:54 +00:00
use super::postproc_pagebreaks;
2020-06-11 12:38:50 +00:00
use anyhow::Result;
use std::io::Read;
fn test_from_strs(a: &str, b: &str) -> Result<()> {
let mut oup = Vec::new();
2020-09-30 14:22:54 +00:00
postproc_pagebreaks("", a.as_bytes())?.read_to_end(&mut oup)?;
2020-06-11 12:38:50 +00:00
let c = String::from_utf8_lossy(&oup);
if b != c {
anyhow::bail!("{}\nshould be\n{}\nbut is\n{}", a, b, c);
}
Ok(())
}
#[test]
fn post1() -> Result<()> {
let inp = "What is this\nThis is a test\nFoo";
2020-09-30 14:22:54 +00:00
let oup = "Page 1:What is this\nPage 1:This is a test\nPage 1:Foo";
2020-06-11 12:38:50 +00:00
test_from_strs(inp, oup)?;
println!("\n\n\n\n");
let inp = "What is this\nThis is a test\nFoo\x0c\nHelloooo\nHow are you?\x0c\nGreat!";
2020-09-30 14:22:54 +00:00
let oup = "Page 1:What is this\nPage 1:This is a test\nPage 1:Foo\nPage 2:\nPage 2:Helloooo\nPage 2:How are you?\nPage 3:\nPage 3:Great!";
2020-06-11 12:38:50 +00:00
test_from_strs(inp, oup)?;
Ok(())
}
}