diff --git a/CHANGELOG.md b/CHANGELOG.md index 134e66a..3de0a2f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # 0.9.2 (2019-06-17) - Fix file ending regex ([#13](https://github.com/phiresky/ripgrep-all/issues/13)) +- Fix decoding of UTF16 with BOM ([#5](https://github.com/phiresky/ripgrep-all/issues/5)) # 0.9.1 (2019-06-16) diff --git a/src/adapters/spawning.rs b/src/adapters/spawning.rs index 340d26a..37c65d9 100644 --- a/src/adapters/spawning.rs +++ b/src/adapters/spawning.rs @@ -1,4 +1,5 @@ use super::*; +use encoding_rs_io::DecodeReaderBytesBuilder; use failure::*; use std::io::prelude::*; use std::io::BufReader; @@ -10,13 +11,27 @@ use std::process::Stdio; * * Try to detect binary files and ignore them. Does not ensure any encoding in the output. * - * This is needed because the rg binary detection does not apply to preprocessed files + * Binary detection is needed because the rg binary detection does not apply to preprocessed files */ + +/**/ pub fn postproc_line_prefix( line_prefix: &str, inp: &mut dyn Read, oup: &mut dyn Write, ) -> Fallible<()> { + // TODO: parse these options from ripgrep's configuration + let encoding = None; // detect bom but usually assume utf8 + let bom_sniffing = true; + let mut decode_builder = DecodeReaderBytesBuilder::new(); + // https://github.com/BurntSushi/ripgrep/blob/a7d26c8f144a4957b75f71087a66692d0b25759a/grep-searcher/src/searcher/mod.rs#L706 + let inp = decode_builder + .encoding(encoding) + .utf8_passthru(true) + .strip_bom(bom_sniffing) + .bom_override(true) + .bom_sniffing(bom_sniffing) + .build(inp); // check for null byte in first 8kB let mut reader = BufReader::with_capacity(1 << 12, inp); let fourk = reader.fill_buf()?;