You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

198 lines
7.2 KiB

pub mod custom;
pub mod decompress;
pub mod ffmpeg;
pub mod postproc;
use std::sync::Arc;
pub mod sqlite;
pub mod tar;
pub mod writing;
2 years ago
pub mod zip;
4 years ago
use crate::{adapted_iter::AdaptedFilesIterBox, config::RgaConfig, matching::*};
use anyhow::{format_err, Context, Result};
use custom::CustomAdapterConfig;
2 years ago
use log::*;
use tokio::io::AsyncRead;
use core::fmt::Debug;
use std::borrow::Cow;
use std::collections::HashMap;
use std::iter::Iterator;
use std::path::PathBuf;
use std::pin::Pin;
use self::postproc::PostprocPageBreaks;
pub type ReadBox = Pin<Box<dyn AsyncRead + Send>>;
pub struct AdapterMeta {
/// unique short name of this adapter (a-z0-9 only)
pub name: String,
/// version identifier. used to key cache entries, change if your output format changes
pub version: i32,
pub description: String,
5 years ago
/// indicates whether this adapter can descend (=call rga_preproc again). if true, the cache key needs to include the list of active adapters
pub recurses: bool,
/// list of matchers (interpreted as a OR b OR ...)
pub fast_matchers: Vec<FastFileMatcher>,
/// list of matchers when we have mime type detection active (interpreted as ORed)
/// warning: this *overrides* the fast matchers
pub slow_matchers: Option<Vec<FileMatcher>>,
/// if true, slow_matchers is merged with fast matchers if accurate is enabled
/// for example, in sqlite you want this disabled since the db extension can mean other things and the mime type matching is very accurate for sqlite.
/// but for tar you want it enabled, since the tar extension is very accurate but the tar mime matcher can have false negatives
pub keep_fast_matchers_if_accurate: bool,
// if true, adapter is only used when user lists it in `--rga-adapters`
pub disabled_by_default: bool,
impl AdapterMeta {
// todo: this is pretty ugly
5 years ago
pub fn get_matchers<'a>(
&'a self,
slow: bool,
) -> Box<dyn Iterator<Item = Cow<FileMatcher>> + 'a> {
match (
) {
2 years ago
(true, false, Some(ref sm)) => Box::new(sm.iter().map(Cow::Borrowed)),
(true, true, Some(ref sm)) => Box::new(
2 years ago
.map(|e| Cow::Owned(FileMatcher::Fast(e.clone()))),
// don't have slow matchers or slow matching disabled
(true, _, None) | (false, _, _) => Box::new(
.map(|e| Cow::Owned(FileMatcher::Fast(e.clone()))),
pub trait GetMetadata {
5 years ago
fn metadata(&self) -> &AdapterMeta;
pub trait FileAdapter: GetMetadata + Send + Sync {
/// adapt a file.
/// detection_reason is the Matcher that was used to identify this file. Unless --rga-accurate was given, it is always a FastMatcher
2 years ago
fn adapt(&self, a: AdaptInfo, detection_reason: &FileMatcher) -> Result<AdaptedFilesIterBox>;
pub struct AdaptInfo {
5 years ago
/// file path. May not be an actual file on the file system (e.g. in an archive). Used for matching file extensions.
pub filepath_hint: PathBuf,
5 years ago
/// true if filepath_hint is an actual file on the file system
pub is_real_file: bool,
/// depth at which this file is in archives. 0 for real filesystem
pub archive_recursion_depth: i32,
5 years ago
/// stream to read the file from. can be from a file or from some decoder
pub inp: ReadBox,
5 years ago
/// prefix every output line with this string to better indicate the file's location if it is in some archive
pub line_prefix: String,
pub postprocess: bool,
pub config: RgaConfig,
/// (enabledAdapters, disabledAdapters)
type AdaptersTuple = (Vec<Arc<dyn FileAdapter>>, Vec<Arc<dyn FileAdapter>>);
pub fn get_all_adapters(custom_adapters: Option<Vec<CustomAdapterConfig>>) -> AdaptersTuple {
// order in descending priority
let mut adapters: Vec<Arc<dyn FileAdapter>> = vec![];
if let Some(custom_adapters) = custom_adapters {
for adapter_config in custom_adapters {
let internal_adapters: Vec<Arc<dyn FileAdapter>> = vec![
2 years ago
2 years ago
2 years ago
.map(|e| -> Arc<dyn FileAdapter> { Arc::new(e.to_adapter()) }),
.partition(|e| !e.metadata().disabled_by_default)
* filter adapters by given names:
* - "" means use default enabled adapter list
* - "a,b" means use adapters a,b
* - "-a,b" means use default list except for a and b
* - "+a,b" means use default list but also a and b (a,b will be prepended to the list so given higher priority)
pub fn get_adapters_filtered<T: AsRef<str>>(
custom_adapters: Option<Vec<CustomAdapterConfig>>,
adapter_names: &Vec<T>,
) -> Result<Vec<Arc<dyn FileAdapter>>> {
let (def_enabled_adapters, def_disabled_adapters) = get_all_adapters(custom_adapters);
let adapters = if !adapter_names.is_empty() {
let adapters_map: HashMap<_, _> = def_enabled_adapters
.map(|e| (e.metadata().name.clone(), e.clone()))
let mut adapters = vec![];
let mut subtractive = false;
let mut additive = false;
for (i, name) in adapter_names.iter().enumerate() {
let mut name = name.as_ref();
if i == 0 && (name.starts_with('-')) {
subtractive = true;
name = &name[1..];
adapters = def_enabled_adapters.clone();
} else if i == 0 && (name.starts_with('+')) {
name = &name[1..];
adapters = def_enabled_adapters.clone();
additive = true;
if subtractive {
let inx = adapters
.position(|a| a.metadata().name == name)
.ok_or_else(|| format_err!("Could not remove {}: Not in list", name))?;
} else {
let adapter = adapters_map
.ok_or_else(|| format_err!("Unknown adapter: \"{}\"", name))?
if additive {
adapters.insert(0, adapter);
} else {
} else {
"Chosen available adapters: {}",
.map(|a| a.metadata().name.clone())