From 44fc7d7e716a36c615b93ce393b66f66fb619f4b Mon Sep 17 00:00:00 2001 From: phiresky Date: Tue, 16 Jan 2024 01:11:43 +0100 Subject: [PATCH] add rga-no-prefix-filenames flag (fixes #154) --- CHANGELOG.md | 6 +++++- src/adapters/postproc.rs | 1 - src/bin/rga-preproc.rs | 2 +- src/config.rs | 11 ++++++++++ src/preproc.rs | 7 ++++++- src/preproc_cache.rs | 44 +++++++++++++++++++++++++++------------- 6 files changed, 53 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4cf82cc..c96f683 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,8 @@ -# 0.10.3 +# 0.10.4 (2024-01-16) + +- add `--rga-no-prefix-filenames` flag (https://github.com/phiresky/ripgrep-all/issues/154) + +# 0.10.3 (2024-01-16) This was originally supposed to be version 1.0.0, but I don't feel confident enough in the stability to call it that. diff --git a/src/adapters/postproc.rs b/src/adapters/postproc.rs index bccb95f..45ec2a7 100644 --- a/src/adapters/postproc.rs +++ b/src/adapters/postproc.rs @@ -183,7 +183,6 @@ impl FileAdapter for PostprocPageBreaks { // keep adapt info (filename etc) except replace inp let ai = AdaptInfo { inp: Box::pin(read), - postprocess: true, archive_recursion_depth: a.archive_recursion_depth + 1, filepath_hint: a .filepath_hint diff --git a/src/bin/rga-preproc.rs b/src/bin/rga-preproc.rs index 2b25cef..7268e3d 100644 --- a/src/bin/rga-preproc.rs +++ b/src/bin/rga-preproc.rs @@ -30,7 +30,7 @@ async fn main() -> anyhow::Result<()> { is_real_file: true, line_prefix: "".to_string(), archive_recursion_depth: 0, - postprocess: true, + postprocess: !config.no_prefix_filenames, config, }; diff --git a/src/config.rs b/src/config.rs index a998da6..9a327f7 100644 --- a/src/config.rs +++ b/src/config.rs @@ -144,6 +144,9 @@ pub struct RgaConfig { pub cache: CacheConfig, /// Maximum nestedness of archives to recurse into + /// + /// When searching in archives, rga will recurse into archives inside archives. + /// This option limits the depth. #[serde(default, skip_serializing_if = "is_default")] #[structopt( default_value, @@ -153,6 +156,14 @@ pub struct RgaConfig { )] pub max_archive_recursion: MaxArchiveRecursion, + /// Don't prefix lines of files within archive with the path inside the archive. + /// + /// Inside archives, by default rga prefixes the content of each file with the file path within the archive. + /// This is usually useful, but can cause problems because then the inner path is also searched for the pattern. + #[serde(default, skip_serializing_if = "is_default")] + #[structopt(long = "--rga-no-prefix-filenames")] + pub no_prefix_filenames: bool, + ////////////////////////////////////////// //////////////////////////// Config file only ////////////////////////////////////////// diff --git a/src/preproc.rs b/src/preproc.rs index ccc8632..32f3fa8 100644 --- a/src/preproc.rs +++ b/src/preproc.rs @@ -153,7 +153,12 @@ async fn adapt_caching( }; let mut cache = cache.context("No cache?")?; - let cache_key = CacheKey::new(&ai.filepath_hint, adapter.as_ref(), &active_adapters)?; + let cache_key = CacheKey::new( + ai.postprocess, + &ai.filepath_hint, + adapter.as_ref(), + &active_adapters, + )?; // let dbg_ctx = format!("adapter {}", &adapter.metadata().name); let cached = cache.get(&cache_key).await.context("cache.get")?; match cached { diff --git a/src/preproc_cache.rs b/src/preproc_cache.rs index a48c414..45918b1 100644 --- a/src/preproc_cache.rs +++ b/src/preproc_cache.rs @@ -1,12 +1,15 @@ use crate::{adapters::FileAdapter, preproc::ActiveAdapters}; use anyhow::{Context, Result}; +use log::warn; use path_clean::PathClean; use rusqlite::{named_params, OptionalExtension}; use std::{path::Path, time::UNIX_EPOCH}; use tokio_rusqlite::Connection; +static SCHEMA_VERSION: i32 = 3; #[derive(Clone)] pub struct CacheKey { + config_hash: String, adapter: String, adapter_version: i32, active_adapters: String, @@ -15,6 +18,7 @@ pub struct CacheKey { } impl CacheKey { pub fn new( + postprocess: bool, filepath_hint: &Path, adapter: &dyn FileAdapter, active_adapters: &ActiveAdapters, @@ -34,6 +38,11 @@ impl CacheKey { "null".to_string() }; Ok(CacheKey { + config_hash: if postprocess { + "a41e2e9".to_string() + } else { + "f1502a3".to_string() + }, // todo: when we add more config options that affect caching, create a struct and actually hash it adapter: adapter.metadata().name.clone(), adapter_version: adapter.metadata().version, file_path: filepath_hint.clean().to_string_lossy().to_string(), @@ -63,6 +72,7 @@ async fn connect_pragmas(db: &Connection) -> Result<()> { db.pragma_update(None, "mmap_size", "2000000000")?; db.execute(" create table if not exists preproc_cache ( + config_hash text not null, adapter text not null, adapter_version integer not null, created_unix_ms integer not null default (unixepoch() * 1000), @@ -73,7 +83,7 @@ async fn connect_pragmas(db: &Connection) -> Result<()> { ) strict", [] )?; - db.execute("create unique index if not exists preproc_cache_idx on preproc_cache (adapter, adapter_version, file_path, active_adapters)", [])?; + db.execute("create unique index if not exists preproc_cache_idx on preproc_cache (config_hash, adapter, adapter_version, file_path, active_adapters)", [])?; Ok(()) }) @@ -83,26 +93,29 @@ async fn connect_pragmas(db: &Connection) -> Result<()> { .await?; if jm != 924716026 { // (probably) newly created db - create_pragmas(db).await.context("create_pragmas")?; + db.call(|db| Ok(db.pragma_update(None, "application_id", "924716026")?)) + .await?; } Ok(()) } -async fn create_pragmas(db: &Connection) -> Result<()> { - db.call(|db| { - db.pragma_update(None, "application_id", "924716026")?; - db.pragma_update(None, "user_version", "2")?; // todo: on upgrade clear db if version is unexpected - Ok(()) - }) - .await?; - Ok(()) -} struct SqliteCache { db: Connection, } impl SqliteCache { async fn new(path: &Path) -> Result { let db = Connection::open(path.join("cache.sqlite3")).await?; + db.call(|db| { + let schema_version: i32 = db.pragma_query_value(None, "user_version", |r| r.get(0))?; + if schema_version != SCHEMA_VERSION { + warn!("Cache schema version mismatch, clearing cache"); + db.execute("drop table if exists preproc_cache", [])?; + db.pragma_update(None, "user_version", format!("{SCHEMA_VERSION}"))?; + } + Ok(()) + }) + .await?; + connect_pragmas(&db).await?; Ok(SqliteCache { db }) @@ -120,12 +133,14 @@ impl PreprocCache for SqliteCache { .query_row( "select text_content_zstd from preproc_cache where adapter = :adapter + and config_hash = :config_hash and adapter_version = :adapter_version and active_adapters = :active_adapters and file_path = :file_path and file_mtime_unix_ms = :file_mtime_unix_ms ", named_params! { + ":config_hash": &key.config_hash, ":adapter": &key.adapter, ":adapter_version": &key.adapter_version, ":active_adapters": &key.active_adapters, @@ -152,13 +167,14 @@ impl PreprocCache for SqliteCache { .db .call(move |db| { db.execute( - "insert into preproc_cache (adapter, adapter_version, active_adapters, file_path, file_mtime_unix_ms, text_content_zstd) values - (:adapter, :adapter_version, :active_adapters, :file_path, :file_mtime_unix_ms, :text_content_zstd) - on conflict (adapter, adapter_version, active_adapters, file_path) do update set + "insert into preproc_cache (config_hash, adapter, adapter_version, active_adapters, file_path, file_mtime_unix_ms, text_content_zstd) values + (:config_hash, :adapter, :adapter_version, :active_adapters, :file_path, :file_mtime_unix_ms, :text_content_zstd) + on conflict (config_hash, adapter, adapter_version, active_adapters, file_path) do update set file_mtime_unix_ms = :file_mtime_unix_ms, created_unix_ms = unixepoch() * 1000, text_content_zstd = :text_content_zstd", named_params! { + ":config_hash": &key.config_hash, ":adapter": &key.adapter, ":adapter_version": &key.adapter_version, ":active_adapters": &key.active_adapters,