add zip support!
parent
e0002a789d
commit
1a0bbc798e
Binary file not shown.
Binary file not shown.
@ -0,0 +1,72 @@
|
||||
use super::*;
|
||||
use crate::preproc::rga_preproc;
|
||||
use failure::*;
|
||||
use lazy_static::lazy_static;
|
||||
use std::fs::File;
|
||||
// todo:
|
||||
// maybe todo: read list of extensions from
|
||||
//ffmpeg -demuxers | tail -n+5 | awk '{print $2}' | while read demuxer; do echo MUX=$demuxer; ffmpeg -h demuxer=$demuxer | grep 'Common extensions'; done 2>/dev/null
|
||||
static EXTENSIONS: &[&str] = &["zip"];
|
||||
|
||||
lazy_static! {
|
||||
static ref METADATA: AdapterMeta = AdapterMeta {
|
||||
name: "zip".to_owned(),
|
||||
version: 1,
|
||||
matchers: EXTENSIONS
|
||||
.iter()
|
||||
.map(|s| Matcher::FileExtension(s.to_string()))
|
||||
.collect(),
|
||||
};
|
||||
}
|
||||
|
||||
pub struct ZipAdapter;
|
||||
|
||||
impl ZipAdapter {
|
||||
pub fn new() -> ZipAdapter {
|
||||
ZipAdapter
|
||||
}
|
||||
}
|
||||
impl GetMetadata for ZipAdapter {
|
||||
fn metadata<'a>(&'a self) -> &'a AdapterMeta {
|
||||
&METADATA
|
||||
}
|
||||
}
|
||||
|
||||
impl FileAdapter for ZipAdapter {
|
||||
fn adapt(&self, ai: AdaptInfo) -> Fallible<()> {
|
||||
use std::io::prelude::*;
|
||||
let AdaptInfo {
|
||||
filepath_hint,
|
||||
mut inp,
|
||||
oup,
|
||||
line_prefix,
|
||||
..
|
||||
} = ai;
|
||||
loop {
|
||||
match ::zip::read::read_zipfile_from_stream(&mut inp) {
|
||||
Ok(None) => break,
|
||||
Ok(Some(mut file)) => {
|
||||
eprintln!(
|
||||
"{}|{}: {} bytes ({} bytes packed)",
|
||||
filepath_hint.to_string_lossy(),
|
||||
file.name(),
|
||||
file.size(),
|
||||
file.compressed_size()
|
||||
);
|
||||
let line_prefix = &format!("{}{}:/", line_prefix, file.name().clone());
|
||||
rga_preproc(
|
||||
AdaptInfo {
|
||||
filepath_hint: &file.sanitized_name(),
|
||||
inp: &mut file,
|
||||
oup: oup,
|
||||
line_prefix,
|
||||
},
|
||||
None,
|
||||
)?;
|
||||
}
|
||||
Err(e) => return Err(e.into()),
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
@ -1,110 +1,30 @@
|
||||
use failure::{format_err, Error};
|
||||
use path_clean::PathClean;
|
||||
use rga::adapters::*;
|
||||
use rga::preproc::*;
|
||||
use rga::CachingWriter;
|
||||
use failure::{Error, format_err};
|
||||
|
||||
// longest compressed conversion output to save in cache
|
||||
const MAX_DB_BLOB_LEN: usize = 2000000;
|
||||
const ZSTD_LEVEL: i32 = 12;
|
||||
|
||||
fn open_db() -> Result<std::sync::Arc<std::sync::RwLock<rkv::Rkv>>, Error> {
|
||||
let app_cache = cachedir::CacheDirConfig::new("rga").get_cache_dir()?;
|
||||
|
||||
let db_arc = rkv::Manager::singleton()
|
||||
.write()
|
||||
.expect("could not write db manager")
|
||||
.get_or_create(app_cache.as_path(), |p| {
|
||||
let mut builder = rkv::Rkv::environment_builder();
|
||||
builder
|
||||
.set_flags(rkv::EnvironmentFlags::NO_SYNC | rkv::EnvironmentFlags::WRITE_MAP) // not durable
|
||||
.set_map_size(2 * 1024 * 1024 * 1024)
|
||||
.set_max_dbs(100);
|
||||
rkv::Rkv::from_env(p, builder)
|
||||
})
|
||||
.expect("could not get/create db");
|
||||
Ok(db_arc)
|
||||
}
|
||||
use std::fs::File;
|
||||
use std::path::PathBuf;
|
||||
use std::rc::Rc;
|
||||
|
||||
fn main() -> Result<(), Error> {
|
||||
//db.
|
||||
let adapters = adapter_matcher()?;
|
||||
let filepath = std::env::args_os()
|
||||
.skip(1)
|
||||
.next()
|
||||
.ok_or(format_err!("No filename specified"))?;
|
||||
eprintln!("inp fname: {:?}", filepath);
|
||||
let path = std::env::current_dir()?.join(&filepath);
|
||||
let path = {
|
||||
let filepath = std::env::args_os()
|
||||
.skip(1)
|
||||
.next()
|
||||
.ok_or(format_err!("No filename specified"))?;
|
||||
eprintln!("inp fname: {:?}", filepath);
|
||||
std::env::current_dir()?.join(&filepath)
|
||||
};
|
||||
|
||||
eprintln!("abs path: {:?}", path);
|
||||
eprintln!("clean path: {:?}", path.clean());
|
||||
let serialized_path: Vec<u8> =
|
||||
bincode::serialize(&path.clean()).expect("could not serialize path"); // key in the cache database
|
||||
let filename = path.file_name().ok_or(format_err!("Empty filename"))?;
|
||||
|
||||
/*let mimetype = tree_magic::from_filepath(path).ok_or(lerr(format!(
|
||||
"File {} does not exist",
|
||||
filename.to_string_lossy()
|
||||
)))?;
|
||||
println!("mimetype: {:?}", mimetype);*/
|
||||
let adapter = adapters(FileMeta {
|
||||
// mimetype,
|
||||
lossy_filename: filename.to_string_lossy().to_string(),
|
||||
});
|
||||
match adapter {
|
||||
Some(ad) => {
|
||||
let meta = ad.metadata();
|
||||
eprintln!("adapter: {}", &meta.name);
|
||||
let db_name = format!("{}.v{}", meta.name, meta.version);
|
||||
let db_arc = open_db()?;
|
||||
let db_env = db_arc.read().unwrap();
|
||||
let db = db_env
|
||||
.open_single(db_name.as_str(), rkv::store::Options::create())
|
||||
.map_err(|p| format_err!("could not open db store: {:?}", p))?;
|
||||
let reader = db_env.read().expect("could not get reader");
|
||||
match db
|
||||
.get(&reader, &serialized_path)
|
||||
.map_err(|p| format_err!("could not read from db: {:?}", p))?
|
||||
{
|
||||
Some(rkv::Value::Blob(cached)) => {
|
||||
let stdouti = std::io::stdout();
|
||||
zstd::stream::copy_decode(cached, stdouti.lock())?;
|
||||
Ok(())
|
||||
}
|
||||
Some(_) => Err(format_err!("Integrity: value not blob")),
|
||||
None => {
|
||||
let stdouti = std::io::stdout();
|
||||
let mut compbuf =
|
||||
CachingWriter::new(stdouti.lock(), MAX_DB_BLOB_LEN, ZSTD_LEVEL)?;
|
||||
ad.adapt(&path, &mut compbuf)?;
|
||||
let compressed = compbuf.finish()?;
|
||||
if let Some(cached) = compressed {
|
||||
eprintln!("compressed len: {}", cached.len());
|
||||
let ai = AdaptInfo {
|
||||
inp: &mut File::open(&path)?,
|
||||
filepath_hint: &path,
|
||||
oup: &mut std::io::stdout(),
|
||||
line_prefix: "",
|
||||
};
|
||||
|
||||
{
|
||||
let mut writer = db_env.write().map_err(|p| {
|
||||
format_err!("could not open write handle to cache: {:?}", p)
|
||||
})?;
|
||||
db.put(&mut writer, &serialized_path, &rkv::Value::Blob(&cached))
|
||||
.map_err(|p| format_err!("could not write to cache: {:?}", p))?;
|
||||
writer.commit().unwrap();
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
None => {
|
||||
let allow_cat = false;
|
||||
if allow_cat {
|
||||
eprintln!("no adapter for that file, running cat!");
|
||||
let stdini = std::io::stdin();
|
||||
let mut stdin = stdini.lock();
|
||||
let stdouti = std::io::stdout();
|
||||
let mut stdout = stdouti.lock();
|
||||
std::io::copy(&mut stdin, &mut stdout)?;
|
||||
Ok(())
|
||||
} else {
|
||||
Err(format_err!("No adapter found for file {:?}", filename))
|
||||
}
|
||||
}
|
||||
}
|
||||
rga_preproc(ai, Some(open_cache_db()?))
|
||||
}
|
||||
|
@ -1,4 +1,5 @@
|
||||
pub mod adapters;
|
||||
mod caching_writer;
|
||||
pub mod errors;
|
||||
pub mod preproc;
|
||||
pub use caching_writer::CachingWriter;
|
||||
|
@ -0,0 +1,151 @@
|
||||
use crate::adapters::*;
|
||||
use crate::CachingWriter;
|
||||
use failure::{format_err, Error};
|
||||
use path_clean::PathClean;
|
||||
use std::io::Read;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use std::rc::Rc;
|
||||
|
||||
// longest compressed conversion output to save in cache
|
||||
const MAX_DB_BLOB_LEN: usize = 2000000;
|
||||
const ZSTD_LEVEL: i32 = 12;
|
||||
|
||||
pub fn open_cache_db() -> Result<std::sync::Arc<std::sync::RwLock<rkv::Rkv>>, Error> {
|
||||
let app_cache = cachedir::CacheDirConfig::new("rga").get_cache_dir()?;
|
||||
|
||||
let db_arc = rkv::Manager::singleton()
|
||||
.write()
|
||||
.expect("could not write db manager")
|
||||
.get_or_create(app_cache.as_path(), |p| {
|
||||
let mut builder = rkv::Rkv::environment_builder();
|
||||
builder
|
||||
.set_flags(rkv::EnvironmentFlags::NO_SYNC | rkv::EnvironmentFlags::WRITE_MAP) // not durable
|
||||
.set_map_size(2 * 1024 * 1024 * 1024)
|
||||
.set_max_dbs(100);
|
||||
rkv::Rkv::from_env(p, builder)
|
||||
})
|
||||
.expect("could not get/create db");
|
||||
Ok(db_arc)
|
||||
}
|
||||
|
||||
pub fn rga_preproc(
|
||||
ai: AdaptInfo,
|
||||
mb_db_arc: Option<std::sync::Arc<std::sync::RwLock<rkv::Rkv>>>,
|
||||
) -> Result<(), Error> {
|
||||
let adapters = adapter_matcher()?;
|
||||
let AdaptInfo {
|
||||
filepath_hint,
|
||||
inp,
|
||||
oup,
|
||||
line_prefix,
|
||||
..
|
||||
} = ai;
|
||||
let filename = filepath_hint
|
||||
.file_name()
|
||||
.ok_or(format_err!("Empty filename"))?;
|
||||
|
||||
eprintln!("abs path: {:?}", filepath_hint);
|
||||
|
||||
/*let mimetype = tree_magic::from_filepath(path).ok_or(lerr(format!(
|
||||
"File {} does not exist",
|
||||
filename.to_string_lossy()
|
||||
)))?;
|
||||
println!("mimetype: {:?}", mimetype);*/
|
||||
let adapter = adapters(FileMeta {
|
||||
// mimetype,
|
||||
lossy_filename: filename.to_string_lossy().to_string(),
|
||||
});
|
||||
match adapter {
|
||||
Some(ad) => {
|
||||
let meta = ad.metadata();
|
||||
eprintln!("adapter: {}", &meta.name);
|
||||
let db_name = format!("{}.v{}", meta.name, meta.version);
|
||||
if let Some(db_arc) = mb_db_arc {
|
||||
let cache_key: Vec<u8> = {
|
||||
let clean_path = filepath_hint.to_owned().clean();
|
||||
eprintln!("clean path: {:?}", clean_path);
|
||||
let meta = std::fs::metadata(&filepath_hint)?;
|
||||
|
||||
let key = (
|
||||
clean_path,
|
||||
meta.modified().expect("weird OS that can't into mtime"),
|
||||
);
|
||||
eprintln!("cache key: {:?}", key);
|
||||
|
||||
bincode::serialize(&key).expect("could not serialize path") // key in the cache database
|
||||
};
|
||||
let db_env = db_arc.read().unwrap();
|
||||
let db = db_env
|
||||
.open_single(db_name.as_str(), rkv::store::Options::create())
|
||||
.map_err(|p| format_err!("could not open db store: {:?}", p))?;
|
||||
let reader = db_env.read().expect("could not get reader");
|
||||
let cached = db
|
||||
.get(&reader, &cache_key)
|
||||
.map_err(|p| format_err!("could not read from db: {:?}", p))?;
|
||||
match cached {
|
||||
Some(rkv::Value::Blob(cached)) => {
|
||||
let stdouti = std::io::stdout();
|
||||
zstd::stream::copy_decode(cached, stdouti.lock())?;
|
||||
Ok(())
|
||||
}
|
||||
Some(_) => Err(format_err!("Integrity: value not blob")),
|
||||
None => {
|
||||
let mut compbuf = CachingWriter::new(oup, MAX_DB_BLOB_LEN, ZSTD_LEVEL)?;
|
||||
// start dupe
|
||||
eprintln!("adapting...");
|
||||
ad.adapt(AdaptInfo {
|
||||
line_prefix,
|
||||
filepath_hint,
|
||||
inp,
|
||||
oup: &mut compbuf,
|
||||
})?;
|
||||
// end dupe
|
||||
let compressed = compbuf.finish()?;
|
||||
if let Some(cached) = compressed {
|
||||
eprintln!("compressed len: {}", cached.len());
|
||||
|
||||
{
|
||||
let mut writer = db_env.write().map_err(|p| {
|
||||
format_err!("could not open write handle to cache: {:?}", p)
|
||||
})?;
|
||||
db.put(&mut writer, &cache_key, &rkv::Value::Blob(&cached))
|
||||
.map_err(|p| {
|
||||
format_err!("could not write to cache: {:?}", p)
|
||||
})?;
|
||||
writer.commit().unwrap();
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// todo: duplicate code
|
||||
// start dupe
|
||||
eprintln!("adapting...");
|
||||
ad.adapt(AdaptInfo {
|
||||
line_prefix,
|
||||
filepath_hint,
|
||||
inp,
|
||||
oup,
|
||||
})?;
|
||||
// end dupe
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
None => {
|
||||
let allow_cat = false;
|
||||
if allow_cat {
|
||||
eprintln!("no adapter for that file, running cat!");
|
||||
let stdini = std::io::stdin();
|
||||
let mut stdin = stdini.lock();
|
||||
let stdouti = std::io::stdout();
|
||||
let mut stdout = stdouti.lock();
|
||||
std::io::copy(&mut stdin, &mut stdout)?;
|
||||
Ok(())
|
||||
} else {
|
||||
Err(format_err!("No adapter found for file {:?}", filename))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue