implement caching

pull/11/head
phiresky 5 years ago
parent f53688269e
commit e98c60001d

155
Cargo.lock generated

@ -13,6 +13,14 @@ name = "arrayref"
version = "0.3.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "arrayvec"
version = "0.4.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"nodrop 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "autocfg"
version = "0.1.4"
@ -38,10 +46,18 @@ name = "byteorder"
version = "1.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "cachedir"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "cc"
version = "1.0.37"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"rayon 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "cfg-if"
@ -56,6 +72,42 @@ dependencies = [
"bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "crossbeam-deque"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"crossbeam-epoch 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
"crossbeam-utils 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "crossbeam-epoch"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"arrayvec 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)",
"cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
"crossbeam-utils 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"memoffset 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"nodrop 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)",
"scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "crossbeam-utils"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "either"
version = "1.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "failure"
version = "0.1.5"
@ -90,6 +142,11 @@ name = "fuchsia-cprng"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "glob"
version = "0.2.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "idna"
version = "0.1.5"
@ -149,6 +206,16 @@ name = "memchr"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "memoffset"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "nodrop"
version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "nom"
version = "2.2.1"
@ -162,6 +229,14 @@ dependencies = [
"autocfg 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "num_cpus"
version = "1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"libc 0.2.57 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "ordered-float"
version = "1.0.2"
@ -200,6 +275,11 @@ dependencies = [
"winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "path-clean"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "percent-encoding"
version = "1.0.1"
@ -331,6 +411,27 @@ dependencies = [
"rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rayon"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"crossbeam-deque 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"either 1.5.2 (registry+https://github.com/rust-lang/crates.io-index)",
"rayon-core 1.4.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rayon-core"
version = "1.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"crossbeam-deque 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.57 (registry+https://github.com/rust-lang/crates.io-index)",
"num_cpus 1.10.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rdrand"
version = "0.4.0"
@ -368,9 +469,14 @@ dependencies = [
name = "rga"
version = "0.1.0"
dependencies = [
"bincode 1.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
"cachedir 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"path-clean 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 1.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
"rkv 0.9.5 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.92 (registry+https://github.com/rust-lang/crates.io-index)",
"tree_magic_fork 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
"zstd 0.4.24+zstd.1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
@ -398,6 +504,11 @@ dependencies = [
"semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "scopeguard"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "scopeguard"
version = "1.0.0"
@ -542,21 +653,55 @@ name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "zstd"
version = "0.4.24+zstd.1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"zstd-safe 1.4.9+zstd.1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "zstd-safe"
version = "1.4.9+zstd.1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"libc 0.2.57 (registry+https://github.com/rust-lang/crates.io-index)",
"zstd-sys 1.4.10+zstd.1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "zstd-sys"
version = "1.4.10+zstd.1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"cc 1.0.37 (registry+https://github.com/rust-lang/crates.io-index)",
"glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.57 (registry+https://github.com/rust-lang/crates.io-index)",
]
[metadata]
"checksum aho-corasick 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)" = "e6f484ae0c99fec2e858eb6134949117399f222608d84cadb3f58c1f97c2364c"
"checksum arrayref 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "0d382e583f07208808f6b1249e60848879ba3543f57c32277bf52d69c2f0f0ee"
"checksum arrayvec 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)" = "92c7fb76bc8826a8b33b4ee5bb07a247a81e76764ab4d55e8f73e3a4d8808c71"
"checksum autocfg 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "0e49efa51329a5fd37e7c79db4621af617cd4e3e5bc224939808d076077077bf"
"checksum bincode 1.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "9f04a5e50dc80b3d5d35320889053637d15011aed5e66b66b37ae798c65da6f7"
"checksum bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "228047a76f468627ca71776ecdebd732a3423081fcf5125585bcd7c49886ce12"
"checksum byteorder 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a019b10a2a7cdeb292db131fc8113e57ea2a908f6e7894b0c3c671893b65dbeb"
"checksum cachedir 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c06509d1f4ffa658939bd23f076cd929ef218241363796551528e7eec69128c8"
"checksum cc 1.0.37 (registry+https://github.com/rust-lang/crates.io-index)" = "39f75544d7bbaf57560d2168f28fd649ff9c76153874db88bdbdfd839b1a7e7d"
"checksum cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "b486ce3ccf7ffd79fdeb678eac06a9e6c09fc88d33836340becb8fffe87c5e33"
"checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f"
"checksum crossbeam-deque 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f739f8c5363aca78cfb059edf753d8f0d36908c348f3d8d1503f03d8b75d9cf3"
"checksum crossbeam-epoch 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "927121f5407de9956180ff5e936fe3cf4324279280001cd56b669d28ee7e9150"
"checksum crossbeam-utils 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "2760899e32a1d58d5abb31129f8fae5de75220bc2176e77ff7c627ae45c918d9"
"checksum either 1.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "5527cfe0d098f36e3f8839852688e63c8fff1c90b2b405aef730615f9a7bcf7b"
"checksum failure 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "795bd83d3abeb9220f257e597aa0080a508b27533824adf336529648f6abf7e2"
"checksum failure_derive 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "ea1063915fd7ef4309e222a5a07cf9c319fb9c7836b1f89b85458672dbb127e1"
"checksum fixedbitset 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "86d4de0081402f5e88cdac65c8dcdcc73118c1a7a465e2a05f0da05843a8ea33"
"checksum fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "2fad85553e09a6f881f739c29f0b00b0f01357c743266d478b68951ce23285f3"
"checksum fuchsia-cprng 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba"
"checksum glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "8be18de09a56b60ed0edf84bc9df007e30040691af7acd1c41874faac5895bfb"
"checksum idna 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "38f09e0f0b1fb55fdee1f17470ad800da77af5186a1a76c026b679358b7e844e"
"checksum lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bc5729f27f159ddd61f4df6228e827e86643d4d3e7c32183cb30a1c08f604a14"
"checksum libc 0.2.57 (registry+https://github.com/rust-lang/crates.io-index)" = "a844cabbd5a77e60403a58af576f0a1baa83c3dd2670be63e615bd24fc58b82d"
@ -565,12 +710,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum lock_api 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ed946d4529956a20f2d63ebe1b69996d5a2137c91913fe3ebbeff957f5bca7ff"
"checksum matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08"
"checksum memchr 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2efc7bc57c883d4a4d6e3246905283d8dae951bb3bd32f49d6ef297f546e1c39"
"checksum memoffset 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0f9dc261e2b62d7a622bf416ea3c5245cdd5d9a7fcc428c0d06804dfce1775b3"
"checksum nodrop 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)" = "2f9667ddcc6cc8a43afc9b7917599d7216aa09c463919ea32c59ed6cac8bc945"
"checksum nom 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cf51a729ecf40266a2368ad335a5fdde43471f545a967109cd62146ecf8b66ff"
"checksum num-traits 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "6ba9a427cfca2be13aa6f6403b0b7e7368fe982bfa16fccc450ce74c46cd9b32"
"checksum num_cpus 1.10.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1a23f0ed30a54abaa0c7e83b1d2d87ada7c3c23078d1d87815af3e3b6385fbba"
"checksum ordered-float 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "18869315e81473c951eb56ad5558bbc56978562d3ecfb87abb7a1e944cea4518"
"checksum ordermap 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "a86ed3f5f244b372d6b1a00b72ef7f8876d0bc6a78a4c9985c53614041512063"
"checksum parking_lot 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fa7767817701cce701d5585b9c4db3cdd02086398322c1d7e8bf5094a96a2ce7"
"checksum parking_lot_core 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "cb88cb1cb3790baa6776844f968fea3be44956cf184fa1be5a03341f5491278c"
"checksum path-clean 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ecba01bf2678719532c5e3059e0b5f0811273d94b397088b82e3bd0a78c78fdd"
"checksum percent-encoding 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "31010dd2e1ac33d5b46a5b413495239882813e0369f8ed8a5e266f173602f831"
"checksum petgraph 0.4.13 (registry+https://github.com/rust-lang/crates.io-index)" = "9c3659d1ee90221741f65dd128d9998311b0e40c5d3c23a62445938214abce4f"
"checksum pkg-config 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)" = "676e8eb2b1b4c9043511a9b7bea0915320d7e502b0a079fb03f9635a5252b18c"
@ -586,12 +735,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum rand_os 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "7b75f676a1e053fc562eafbb47838d67c84801e38fc1ba459e8f180deabd5071"
"checksum rand_pcg 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "abf9b09b01790cfe0364f52bf32995ea3c39f4d2dd011eac241d2914146d0b44"
"checksum rand_xorshift 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cbf7e9e623549b0e21f6e97cf8ecf247c1a8fd2e8a992ae265314300b2455d5c"
"checksum rayon 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "373814f27745b2686b350dd261bfd24576a6fb0e2c5919b3a2b6005f820b0473"
"checksum rayon-core 1.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b055d1e92aba6877574d8fe604a63c8b5df60f60e5982bf7ccbb1338ea527356"
"checksum rdrand 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2"
"checksum redox_syscall 0.1.54 (registry+https://github.com/rust-lang/crates.io-index)" = "12229c14a0f65c4f1cb046a3b52047cdd9da1f4b30f8a39c5063c8bae515e252"
"checksum regex 1.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "8f0a0bcab2fd7d1d7c54fa9eae6f43eddeb9ce2e7352f8518a814a4f65d60c58"
"checksum regex-syntax 0.6.6 (registry+https://github.com/rust-lang/crates.io-index)" = "dcfd8681eebe297b81d98498869d4aae052137651ad7b96822f09ceb690d0a96"
"checksum rkv 0.9.5 (registry+https://github.com/rust-lang/crates.io-index)" = "2c1b8d667bf149bfac7c47bb728dfb7246f35fdf61c2f16f9f588194f087d23c"
"checksum rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a"
"checksum scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "94258f53601af11e6a49f722422f6e3425c52b06245a5cf9bc09908b174f5e27"
"checksum scopeguard 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b42e15e59b18a828bbf5c58ea01debb36b9b096346de35d941dcb89009f24a0d"
"checksum semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403"
"checksum semver-parser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
@ -612,3 +764,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)" = "f10e386af2b13e47c89e7236a7a14a086791a2b88ebad6df9bf42040195cf770"
"checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
"checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
"checksum zstd 0.4.24+zstd.1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2c5a6414958b49ee80f2dd0042023ac8f37cfe1d31fbeec0b9749cf6f2c03683"
"checksum zstd-safe 1.4.9+zstd.1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1d98332212af687878b146a6549c188e9b72971972d23089c831472f938e6272"
"checksum zstd-sys 1.4.10+zstd.1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "46f433134fbd0c37c9eb5929733df5f34bcdff464722eb93155fcee93eb57652"

@ -19,3 +19,8 @@ exclude = [
tree_magic = { package = "tree_magic_fork", version="0.2" }
regex = "1.1.6"
rkv = "0.9.5"
cachedir = "0.1.1"
path-clean = "0.1.0"
bincode = "1.1.4"
serde = "1.0.92"
zstd = "0.4.24"

@ -13,7 +13,7 @@ use std::io::Write;
use std::rc::Rc;
pub enum Matcher {
MimeType(Regex), // todo: generic pattern?
// MimeType(Regex),
FileName(Regex),
}
@ -27,7 +27,7 @@ pub struct FileMeta {
// filename is not actually a utf8 string, but since we can't do regex on OsStr and can't get a &[u8] from OsStr either,
// and since we probably only want to do matching on ascii stuff anyways, this is the filename as a string with non-valid bytes removed
pub lossy_filename: String,
pub mimetype: String,
// pub mimetype: String,
}
pub trait GetMetadata {
@ -51,26 +51,26 @@ pub fn init_adapters() -> Result<impl Fn(FileMeta) -> Option<Rc<dyn FileAdapter>
];
let mut fname_regexes = vec![];
let mut mime_regexes = vec![];
//let mut mime_regexes = vec![];
for adapter in adapters.into_iter() {
let metadata = adapter.metadata();
for matcher in &metadata.matchers {
match matcher {
Matcher::MimeType(re) => mime_regexes.push((re.clone(), adapter.clone())),
//Matcher::MimeType(re) => mime_regexes.push((re.clone(), adapter.clone())),
Matcher::FileName(re) => fname_regexes.push((re.clone(), adapter.clone())),
};
}
}
let fname_regex_set = RegexSet::new(fname_regexes.iter().map(|p| p.0.as_str()))?;
let mime_regex_set = RegexSet::new(mime_regexes.iter().map(|p| p.0.as_str()))?;
//let mime_regex_set = RegexSet::new(mime_regexes.iter().map(|p| p.0.as_str()))?;
return Ok(move |meta: FileMeta| {
// todo: handle multiple matches
for m in fname_regex_set.matches(&meta.lossy_filename) {
return Some(fname_regexes[m].1.clone());
}
for m in mime_regex_set.matches(&meta.mimetype) {
/*for m in mime_regex_set.matches(&meta.mimetype) {
return Some(mime_regexes[m].1.clone());
}
}*/
return None;
});
}

@ -15,7 +15,7 @@ impl FFmpegAdapter {
pub fn new() -> FFmpegAdapter {
FFmpegAdapter {
_metadata: AdapterMeta {
name: "FFmpeg".to_owned(),
name: "ffmpeg".to_owned(),
version: 1,
matchers: extensions.iter().map(|s| ExtensionMatcher(s)).collect(),
},

@ -39,6 +39,7 @@ use std::process::Command;
//"txt" -> Just "markdown"
//"xhtml" -> Just "html"
//"wiki" -> Just "mediawiki"
static extensions: &[&str] = &["epub", "odt", "docx", "pptx", "fb2", "icml", "rtf", "ipynb"];
pub struct PandocAdapter {
@ -67,7 +68,7 @@ impl SpawningFileAdapter for PandocAdapter {
let mut cmd = Command::new("pandoc");
cmd
// simpler markown (with more information loss but plainer text)
.arg("--to=markdown-header_attributes-link_attributes-fenced_divs-markdown_in_html_blocks-raw_html-native_divs-native_spans-bracketed_spans")
.arg("--to=commonmark-header_attributes-link_attributes-fenced_divs-markdown_in_html_blocks-raw_html-native_divs-native_spans-bracketed_spans")
.arg("--wrap=none")
.arg("--atx-headers")
.arg("--")

@ -14,7 +14,7 @@ impl PopplerAdapter {
pub fn new() -> PopplerAdapter {
PopplerAdapter {
_metadata: AdapterMeta {
name: "poppler pdftotext".to_owned(),
name: "poppler".to_owned(),
version: 1,
// todo: read from ffmpeg -demuxers?
matchers: extensions.iter().map(|s| ExtensionMatcher(s)).collect(),

@ -1,41 +1,101 @@
use path_clean::PathClean;
use rga::adapters::*;
use rga::CachingWriter;
use serde::{Deserialize, Serialize};
use std::error::Error;
use std::fmt;
use std::path::Path;
use std::io::Write;
use std::path::{Path, PathBuf};
use tree_magic;
const max_db_blob_len: usize = 2000000;
// lazy error
fn lerr(inp: impl AsRef<str>) -> Box<dyn Error> {
return inp.as_ref().into();
}
fn open_db() -> Result<std::sync::Arc<std::sync::RwLock<rkv::Rkv>>, Box<dyn Error>> {
let app_cache = cachedir::CacheDirConfig::new("rga").get_cache_dir()?;
let db_arc = rkv::Manager::singleton()
.write()
.expect("could not write db manager")
.get_or_create(app_cache.as_path(), |p| {
let mut builder = rkv::Rkv::environment_builder();
builder
.set_flags(rkv::EnvironmentFlags::NO_SYNC | rkv::EnvironmentFlags::WRITE_MAP) // not durable
.set_map_size(2 * 1024 * 1024 * 1024)
.set_max_dbs(100);
rkv::Rkv::from_env(p, builder)
})
.expect("could not get/create db");
Ok(db_arc)
}
fn main() -> Result<(), Box<dyn Error>> {
//db.
let adapters = init_adapters()?;
let filepath = std::env::args()
.skip(1)
.next()
.ok_or(lerr("No filename specified"))?;
println!("fname: {}", filepath);
let path = Path::new(&filepath);
eprintln!("fname: {}", filepath);
let path = PathBuf::from(&filepath);
let serialized_path: Vec<u8> =
bincode::serialize(&path.clean()).expect("could not serialize path");
let filename = path.file_name().ok_or(lerr("Empty filename"))?;
let mimetype = tree_magic::from_filepath(path).ok_or(lerr(format!(
/*let mimetype = tree_magic::from_filepath(path).ok_or(lerr(format!(
"File {} does not exist",
filename.to_string_lossy()
)))?;
println!("mimetype: {:?}", mimetype);
println!("mimetype: {:?}", mimetype);*/
let adapter = adapters(FileMeta {
mimetype,
// mimetype,
lossy_filename: filename.to_string_lossy().to_string(),
});
match adapter {
Some(ad) => {
println!("adapter: {}", &ad.metadata().name);
let stdouti = std::io::stdout();
let mut stdout = stdouti.lock();
ad.adapt(&filepath, &mut stdout)?;
Ok(())
let meta = ad.metadata();
eprintln!("adapter: {}", &meta.name);
let db_name = format!("{}.v{}", meta.name, meta.version);
let db_arc = open_db()?;
let db_env = db_arc.read().unwrap();
let db = db_env
.open_single(db_name.as_str(), rkv::store::Options::create())
.map_err(|p| lerr(format!("could not open db store: {:?}", p)))?;
let reader = db_env.read().expect("could not get reader");
match db
.get(&reader, &serialized_path)
.map_err(|p| lerr(format!("could not read from db: {:?}", p)))?
{
Some(rkv::Value::Blob(cached)) => {
let stdouti = std::io::stdout();
zstd::stream::copy_decode(cached, stdouti.lock())?;
Ok(())
}
Some(_) => Err(lerr("Integrity: value not blob")),
None => {
let stdouti = std::io::stdout();
let mut compbuf = CachingWriter::new(stdouti.lock(), max_db_blob_len, 12)?;
ad.adapt(&filepath, &mut compbuf)?;
let compressed = compbuf.finish()?;
if let Some(cached) = compressed {
eprintln!("compressed len: {}", cached.len());
{
let mut writer = db_env.write().map_err(|p| {
lerr(format!("could not open write handle to cache: {:?}", p))
})?;
db.put(&mut writer, &serialized_path, &rkv::Value::Blob(&cached))
.map_err(|p| lerr(format!("could not write to cache: {:?}", p)))?;
writer.commit().unwrap();
}
}
Ok(())
}
}
}
None => {
eprintln!("no adapter for that file, running cat!");

@ -0,0 +1,70 @@
use std::io::Write;
enum Sta<'t> {
ToZstd(Vec<u8>, zstd::stream::write::Encoder<&'t mut Vec<u8>>),
}
/**
* wrap a writer so that it is passthrough,
* but also the written data is compressed and written into a buffer, unless more than X bytes is written
*/
pub struct CachingWriter<W: Write> {
max_cache_size: usize,
zstd_writer: Option<zstd::stream::write::Encoder<Vec<u8>>>,
out: W,
}
impl<W: Write> CachingWriter<W> {
pub fn new(
out: W,
max_cache_size: usize,
compression_level: i32,
) -> std::io::Result<CachingWriter<W>> {
Ok(CachingWriter {
out,
max_cache_size,
zstd_writer: Some(zstd::stream::write::Encoder::new(
Vec::new(),
compression_level,
)?),
})
}
pub fn finish(self) -> std::io::Result<Option<Vec<u8>>> {
if let Some(writer) = self.zstd_writer {
let res = writer.finish()?;
if res.len() <= self.max_cache_size {
Ok(Some(res))
} else {
// drop cache
Ok(None)
}
} else {
Ok(None)
}
}
}
impl<W: Write> Write for CachingWriter<W> {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
match self.zstd_writer.as_mut() {
Some(writer) => {
let wrote = writer.write(buf)?;
let compressed_len = writer.get_ref().len();
eprintln!("wrote {} to zstd, len now {}", wrote, compressed_len);
if compressed_len > self.max_cache_size {
eprintln!("cache longer than max, dropping");
//writer.finish();
self.zstd_writer.take().unwrap().finish()?;
}
self.out.write_all(&buf[0..wrote])?;
return Ok(wrote);
}
None => self.out.write(buf),
}
}
fn flush(&mut self) -> std::io::Result<()> {
eprintln!("flushing");
if let Some(writer) = self.zstd_writer.as_mut() {
writer.flush()?;
}
self.out.flush()
}
}

@ -1 +1,3 @@
pub mod adapters;
mod caching_writer;
pub use caching_writer::CachingWriter;

Loading…
Cancel
Save