tar adapter (broken compression)

pull/11/head
phiresky 5 years ago
parent 92eb76e534
commit 83114f66bf

1
.gitignore vendored

@ -1,2 +1,3 @@
/target
/.idea
**/*.rs.bk

115
Cargo.lock generated

@ -85,6 +85,11 @@ name = "bitflags"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "build_const"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "byteorder"
version = "1.3.1"
@ -148,6 +153,14 @@ dependencies = [
"bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "crc"
version = "1.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"build_const 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "crc32fast"
version = "1.2.0"
@ -285,11 +298,33 @@ dependencies = [
"synstructure 0.10.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "filetime"
version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.57 (registry+https://github.com/rust-lang/crates.io-index)",
"redox_syscall 0.1.54 (registry+https://github.com/rust-lang/crates.io-index)",
"winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "fixedbitset"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "flate2"
version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"crc32fast 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.57 (registry+https://github.com/rust-lang/crates.io-index)",
"miniz-sys 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)",
"miniz_oxide_c_api 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "fnv"
version = "1.0.6"
@ -385,6 +420,16 @@ dependencies = [
"cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "lzma-sys"
version = "0.1.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"cc 1.0.37 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.57 (registry+https://github.com/rust-lang/crates.io-index)",
"pkg-config 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "matches"
version = "0.1.8"
@ -400,6 +445,34 @@ name = "memoffset"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "miniz-sys"
version = "0.1.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"cc 1.0.37 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.57 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "miniz_oxide"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"adler32 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "miniz_oxide_c_api"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"cc 1.0.37 (registry+https://github.com/rust-lang/crates.io-index)",
"crc 1.8.1 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.57 (registry+https://github.com/rust-lang/crates.io-index)",
"miniz_oxide 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "nodrop"
version = "0.1.13"
@ -682,11 +755,13 @@ name = "rga"
version = "0.1.0"
dependencies = [
"bincode 1.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
"bzip2 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
"cachedir 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)",
"crossbeam 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)",
"env_logger 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)",
"failure 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
"flate2 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
"path-clean 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
@ -694,7 +769,9 @@ dependencies = [
"rkv 0.9.5 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.92 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_json 1.0.39 (registry+https://github.com/rust-lang/crates.io-index)",
"tar 0.4.26 (registry+https://github.com/rust-lang/crates.io-index)",
"tree_magic_fork 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
"xz2 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
"zip 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)",
"zstd 0.4.24+zstd.1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
@ -816,6 +893,17 @@ dependencies = [
"unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "tar"
version = "0.4.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"filetime 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.57 (registry+https://github.com/rust-lang/crates.io-index)",
"redox_syscall 0.1.54 (registry+https://github.com/rust-lang/crates.io-index)",
"xattr 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "termcolor"
version = "1.0.5"
@ -965,6 +1053,22 @@ dependencies = [
"winapi-util 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "xattr"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"libc 0.2.57 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "xz2"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"lzma-sys 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "zip"
version = "0.5.2"
@ -1016,6 +1120,7 @@ dependencies = [
"checksum backtrace-sys 0.1.28 (registry+https://github.com/rust-lang/crates.io-index)" = "797c830ac25ccc92a7f8a7b9862bde440715531514594a6154e3d4a54dd769b6"
"checksum bincode 1.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "9f04a5e50dc80b3d5d35320889053637d15011aed5e66b66b37ae798c65da6f7"
"checksum bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "228047a76f468627ca71776ecdebd732a3423081fcf5125585bcd7c49886ce12"
"checksum build_const 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "39092a32794787acd8525ee150305ff051b0aa6cc2abaf193924f5ab05425f39"
"checksum byteorder 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a019b10a2a7cdeb292db131fc8113e57ea2a908f6e7894b0c3c671893b65dbeb"
"checksum bzip2 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "42b7c3cbf0fa9c1b82308d57191728ca0256cb821220f4e2fd410a72ade26e3b"
"checksum bzip2-sys 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "6584aa36f5ad4c9247f5323b0a42f37802b37a836f0ad87084d7a33961abe25f"
@ -1024,6 +1129,7 @@ dependencies = [
"checksum cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "b486ce3ccf7ffd79fdeb678eac06a9e6c09fc88d33836340becb8fffe87c5e33"
"checksum clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5067f5bb2d80ef5d68b4c87db81601f0b75bca627bc2ef76b141d7b846a3c6d9"
"checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f"
"checksum crc 1.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "d663548de7f5cca343f1e0a48d14dcfb0e9eb4e079ec58883b7251539fa10aeb"
"checksum crc32fast 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ba125de2af0df55319f41944744ad91c71113bf74a4646efff39afe1f6842db1"
"checksum crossbeam 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b14492071ca110999a20bf90e3833406d5d66bfd93b4e52ec9539025ff43fe0d"
"checksum crossbeam-channel 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)" = "0f0ed1a4de2235cabda8558ff5840bffb97fcb64c97827f354a451307df5f72b"
@ -1038,7 +1144,9 @@ dependencies = [
"checksum env_logger 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b61fa891024a945da30a9581546e8cfaf5602c7b3f4c137a2805cf388f92075a"
"checksum failure 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "795bd83d3abeb9220f257e597aa0080a508b27533824adf336529648f6abf7e2"
"checksum failure_derive 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "ea1063915fd7ef4309e222a5a07cf9c319fb9c7836b1f89b85458672dbb127e1"
"checksum filetime 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "450537dc346f0c4d738dda31e790da1da5d4bd12145aad4da0d03d713cb3794f"
"checksum fixedbitset 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "86d4de0081402f5e88cdac65c8dcdcc73118c1a7a465e2a05f0da05843a8ea33"
"checksum flate2 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)" = "f87e68aa82b2de08a6e037f1385455759df6e445a8df5e005b4297191dbf18aa"
"checksum fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "2fad85553e09a6f881f739c29f0b00b0f01357c743266d478b68951ce23285f3"
"checksum fuchsia-cprng 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba"
"checksum glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "8be18de09a56b60ed0edf84bc9df007e30040691af7acd1c41874faac5895bfb"
@ -1052,9 +1160,13 @@ dependencies = [
"checksum lmdb-rkv-sys 0.8.3 (registry+https://github.com/rust-lang/crates.io-index)" = "1470e0168f1832e35afd6d0931ae60db625685332837b97aa156773ec9c5e393"
"checksum lock_api 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ed946d4529956a20f2d63ebe1b69996d5a2137c91913fe3ebbeff957f5bca7ff"
"checksum log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c84ec4b527950aa83a329754b01dbe3f58361d1c5efacd1f6d68c494d08a17c6"
"checksum lzma-sys 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)" = "16b5c59c57cc4d39e7999f50431aa312ea78af7c93b23fbb0c3567bd672e7f35"
"checksum matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08"
"checksum memchr 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2efc7bc57c883d4a4d6e3246905283d8dae951bb3bd32f49d6ef297f546e1c39"
"checksum memoffset 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0f9dc261e2b62d7a622bf416ea3c5245cdd5d9a7fcc428c0d06804dfce1775b3"
"checksum miniz-sys 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)" = "1e9e3ae51cea1576ceba0dde3d484d30e6e5b86dee0b2d412fe3a16a15c98202"
"checksum miniz_oxide 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c468f2369f07d651a5d0bb2c9079f8488a66d5466efe42d0c5c6466edcb7f71e"
"checksum miniz_oxide_c_api 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b7fe927a42e3807ef71defb191dc87d4e24479b221e67015fe38ae2b7b447bab"
"checksum nodrop 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)" = "2f9667ddcc6cc8a43afc9b7917599d7216aa09c463919ea32c59ed6cac8bc945"
"checksum nom 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cf51a729ecf40266a2368ad335a5fdde43471f545a967109cd62146ecf8b66ff"
"checksum num-traits 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "6ba9a427cfca2be13aa6f6403b0b7e7368fe982bfa16fccc450ce74c46cd9b32"
@ -1104,6 +1216,7 @@ dependencies = [
"checksum strsim 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
"checksum syn 0.15.34 (registry+https://github.com/rust-lang/crates.io-index)" = "a1393e4a97a19c01e900df2aec855a29f71cf02c402e2f443b8d2747c25c5dbe"
"checksum synstructure 0.10.2 (registry+https://github.com/rust-lang/crates.io-index)" = "02353edf96d6e4dc81aea2d8490a7e9db177bf8acb0e951c24940bf866cb313f"
"checksum tar 0.4.26 (registry+https://github.com/rust-lang/crates.io-index)" = "b3196bfbffbba3e57481b6ea32249fbaf590396a52505a2615adbb79d9d826d3"
"checksum termcolor 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "96d6098003bde162e4277c70665bd87c326f5a0c3f3fbfb285787fa482d54e6e"
"checksum termion 1.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "dde0593aeb8d47accea5392b39350015b5eccb12c0d98044d856983d89548dea"
"checksum textwrap 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
@ -1124,6 +1237,8 @@ dependencies = [
"checksum winapi-util 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7168bab6e1daee33b4557efd0e95d5ca70a03706d39fa5f3fe7a236f584b03c9"
"checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
"checksum wincolor 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "561ed901ae465d6185fa7864d63fbd5720d0ef718366c9a4dc83cf6170d7e9ba"
"checksum xattr 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "244c3741f4240ef46274860397c7c74e50eb23624996930e484c16679633a54c"
"checksum xz2 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c179869f34fc7c01830d3ce7ea2086bc3a07e0d35289b667d0a8bf910258926c"
"checksum zip 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "c18fc320faf909036e46ac785ea827f72e485304877faf1a3a39538d3714dbc3"
"checksum zstd 0.4.24+zstd.1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2c5a6414958b49ee80f2dd0042023ac8f37cfe1d31fbeec0b9749cf6f2c03683"
"checksum zstd-safe 1.4.9+zstd.1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1d98332212af687878b146a6549c188e9b72971972d23089c831472f938e6272"

@ -33,3 +33,7 @@ crossbeam = "0.7.1"
clap = "2.33.0"
log = "0.4.6"
env_logger = "0.6.1"
xz2 = "0.1.6"
flate2 = "1.0.7"
bzip2 = "0.3.3"
tar = "0.4.26"

@ -1 +1,8 @@
similar: https://gist.github.com/ColonolBuendia/314826e37ec35c616d70506c38dc65aa
similar:
- pdfgrep
- https://gist.github.com/ColonolBuendia/314826e37ec35c616d70506c38dc65aa
# considerations
- matching on mime (magic bytes) instead of filename

@ -2,6 +2,7 @@ pub mod ffmpeg;
pub mod pandoc;
pub mod poppler;
pub mod spawning;
pub mod tar;
pub mod zip;
use failure::*;
use regex::{Regex, RegexSet};
@ -13,6 +14,10 @@ use std::rc::Rc;
pub enum Matcher {
// MimeType(Regex),
/**
* without the dot. e.g. "jpg" or "tar.gz" matched as /.*\.ext$/
*
*/
FileExtension(String),
}
@ -53,6 +58,7 @@ pub fn get_adapters() -> Vec<Rc<dyn FileAdapter>> {
Rc::new(pandoc::PandocAdapter::new()),
Rc::new(poppler::PopplerAdapter::new()),
Rc::new(zip::ZipAdapter::new()),
Rc::new(tar::TarAdapter::new()),
];
adapters
}

@ -103,7 +103,7 @@ where
fn adapt(&self, ai: AdaptInfo) -> Fallible<()> {
let AdaptInfo {
filepath_hint,
inp,
mut inp,
oup,
line_prefix,
..
@ -112,7 +112,7 @@ where
pipe_output(
line_prefix,
self.command(filepath_hint, cmd),
inp,
&mut inp,
oup,
self.get_exe(),
"",

@ -0,0 +1,91 @@
use super::*;
use crate::preproc::rga_preproc;
use ::tar::EntryType::Regular;
use failure::*;
use lazy_static::lazy_static;
use std::fs::File;
use std::path::PathBuf;
static EXTENSIONS: &[&str] = &["tar", "tar.gz", "tar.bz2", "tar.xz", "tar.zst"];
lazy_static! {
static ref METADATA: AdapterMeta = AdapterMeta {
name: "tar".to_owned(),
version: 1,
matchers: EXTENSIONS
.iter()
.map(|s| Matcher::FileExtension(s.to_string()))
.collect(),
};
}
pub struct TarAdapter;
impl TarAdapter {
pub fn new() -> TarAdapter {
TarAdapter
}
}
impl GetMetadata for TarAdapter {
fn metadata<'a>(&'a self) -> &'a AdapterMeta {
&METADATA
}
}
/*struct WrapRead<'a> {
inner: &mut 'a Read;
}
impl Read for WrapRead {
r
}*/
/*fn decompress_any(filename: &Path, inp: &mut Read) -> Fallible<Box<Read>> {
let extension = filename.extension().map(|e| e.to_string_lossy().to_owned());
match extension {
Some(e) => Ok(match e.to_owned().as_ref() {
"gz" => Box::new(flate2::read::MultiGzDecoder::new(inp)),
"bz2" => Box::new(bzip2::read::BzDecoder::new(inp)),
"xz" => Box::new(xz2::read::XzDecoder::new_multi_decoder(inp)),
"zst" => Box::new(zstd::stream::read::Decoder::new(inp)?),
e => Err(format_err!("don't know how to decompress {}", e))?,
}),
None => Err(format_err!("no extension")),
}
}*/
impl FileAdapter for TarAdapter {
fn adapt<'a>(&self, ai: AdaptInfo) -> Fallible<()> {
use std::io::prelude::*;
let AdaptInfo {
filepath_hint,
mut inp,
oup,
line_prefix,
..
} = ai;
let decompress = inp; //decompress_any(filepath_hint, &inp)?;
let mut archive = ::tar::Archive::new(decompress);
for entry in archive.entries()? {
let mut file = entry.unwrap();
let path = PathBuf::from(file.path()?.to_owned());
eprintln!(
"{}|{}: {} bytes",
filepath_hint.display(),
path.display(),
file.header().size()?,
);
if Regular == file.header().entry_type() {
let line_prefix = &format!("{}{}: ", line_prefix, path.display());
rga_preproc(
AdaptInfo {
filepath_hint: &path,
inp: &mut file,
oup: oup,
line_prefix,
},
None,
)?;
}
}
Ok(())
}
}
Loading…
Cancel
Save