Allow removing from CLI found image/music/video files (#1087)

* Bind delete

* Audio tags/content

* Tests

* Test videos

* FFmpeg
pull/1091/head
Rafał Mikrut 7 months ago committed by GitHub
parent e50d930683
commit 8b20f78573
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -19,7 +19,7 @@ jobs:
- uses: actions/checkout@v3
- name: Install basic libraries
run: sudo apt-get update; sudo apt install libheif-dev -y
run: sudo apt-get update; sudo apt install libheif-dev ffmpeg -y
- name: Setup rust version
run: rustup default ${{ matrix.toolchain }}

@ -15,6 +15,7 @@ static CZKAWKA_PATH: state::InitCell<String> = state::InitCell::new();
static COLLECTED_FILES: state::InitCell<CollectedFiles> = state::InitCell::new();
const ATTEMPTS: u32 = 10;
const PRINT_MESSAGES_CZKAWKA: bool = true;
// App runs - ./ci_tester PATH_TO_CZKAWKA
fn main() {
@ -41,10 +42,113 @@ fn main() {
test_remove_duplicates_one_newest();
test_remove_duplicates_all_expect_newest();
test_remove_duplicates_all_expect_oldest();
test_remove_same_music_tags_one_oldest();
test_remove_same_music_tags_one_newest();
test_remove_same_music_tags_all_expect_oldest();
test_remove_same_music_tags_all_expect_newest();
test_remove_same_music_content_one_oldest();
test_remove_same_music_content_all_expect_oldest();
test_remove_same_music_content_one_newest();
test_remove_same_music_content_all_expect_newest();
test_remove_videos_one_oldest();
test_remove_videos_one_newest();
test_remove_videos_all_expect_oldest();
test_remove_videos_all_expect_newest();
}
println!("Completed checking");
}
fn test_remove_videos_one_oldest() {
info!("test_remove_videos_one_oldest");
run_test(&["video", "-d", "TestFiles", "-D", "OO"], vec!["Videos/V3.webm"], vec![], vec![]);
}
fn test_remove_videos_one_newest() {
info!("test_remove_videos_one_newest");
run_test(&["video", "-d", "TestFiles", "-D", "ON"], vec!["Videos/V5.mp4"], vec![], vec![]);
}
fn test_remove_videos_all_expect_oldest() {
info!("test_remove_videos_all_expect_oldest");
run_test(
&["video", "-d", "TestFiles", "-D", "AEO"],
vec!["Videos/V1.mp4", "Videos/V2.mp4", "Videos/V5.mp4"],
vec![],
vec![],
);
}
fn test_remove_videos_all_expect_newest() {
info!("test_remove_videos_all_expect_newest");
run_test(
&["video", "-d", "TestFiles", "-D", "AEN"],
vec!["Videos/V1.mp4", "Videos/V2.mp4", "Videos/V3.webm"],
vec![],
vec![],
);
}
fn test_remove_same_music_content_one_newest() {
info!("test_remove_same_music_content_one_newest");
run_test(
&["music", "-d", "TestFiles", "-s", "CONTENT", "-l", "2.0", "-D", "ON"],
vec!["Music/M2.mp3"],
vec![],
vec![],
);
}
fn test_remove_same_music_content_all_expect_newest() {
info!("test_remove_same_music_content_all_expect_newest");
run_test(
&["music", "-d", "TestFiles", "-s", "CONTENT", "-l", "2.0", "-D", "AEN"],
vec!["Music/M1.mp3", "Music/M3.flac", "Music/M5.mp3"],
vec![],
vec![],
);
}
fn test_remove_same_music_content_all_expect_oldest() {
info!("test_remove_same_music_content_all_expect_oldest");
run_test(
&["music", "-d", "TestFiles", "-s", "CONTENT", "-l", "2.0", "-D", "AEO"],
vec!["Music/M1.mp3", "Music/M2.mp3", "Music/M3.flac"],
vec![],
vec![],
);
}
fn test_remove_same_music_content_one_oldest() {
info!("test_remove_same_music_content_one_oldest");
run_test(
&["music", "-d", "TestFiles", "-s", "CONTENT", "-l", "2.0", "-D", "OO"],
vec!["Music/M5.mp3"],
vec![],
vec![],
);
}
fn test_remove_same_music_tags_one_oldest() {
info!("test_remove_same_music_one_oldest");
run_test(&["music", "-d", "TestFiles", "-D", "OO"], vec!["Music/M5.mp3"], vec![], vec![]);
}
fn test_remove_same_music_tags_one_newest() {
info!("test_remove_same_music_one_newest");
run_test(&["music", "-d", "TestFiles", "-D", "ON"], vec!["Music/M2.mp3"], vec![], vec![]);
}
fn test_remove_same_music_tags_all_expect_oldest() {
info!("test_remove_same_music_all_expect_oldest");
run_test(
&["music", "-d", "TestFiles", "-D", "AEO"],
vec!["Music/M1.mp3", "Music/M2.mp3", "Music/M3.flac"],
vec![],
vec![],
);
}
fn test_remove_same_music_tags_all_expect_newest() {
info!("test_remove_same_music_all_expect_newest");
run_test(
&["music", "-d", "TestFiles", "-D", "AEN"],
vec!["Music/M1.mp3", "Music/M3.flac", "Music/M5.mp3"],
vec![],
vec![],
);
}
fn test_remove_duplicates_all_expect_oldest() {
info!("test_remove_duplicates_all_expect_oldest");
run_test(
@ -138,7 +242,7 @@ fn run_test(arguments: &[&str], expected_files_differences: Vec<&'static str>, e
let mut all_arguments = vec![];
all_arguments.push(CZKAWKA_PATH.get().as_str());
all_arguments.extend_from_slice(arguments);
run_with_good_status(&all_arguments, true);
run_with_good_status(&all_arguments, PRINT_MESSAGES_CZKAWKA);
file_folder_diffs(
COLLECTED_FILES.get(),
expected_files_differences,

@ -127,20 +127,13 @@ pub struct DuplicatesArgs {
short,
long,
default_value = "HASH",
value_parser = parse_checking_method,
value_parser = parse_checking_method_duplicate,
help = "Search method (NAME, SIZE, HASH)",
long_help = "Methods to search files.\nNAME - Fast but but rarely usable,\nSIZE - Fast but not accurate, checking by the file's size,\nHASH - The slowest method, checking by the hash of the entire file"
)]
pub search_method: CheckingMethod,
#[clap(
short = 'D',
long,
default_value = "NONE",
value_parser = parse_delete_method,
help = "Delete method (AEN, AEO, ON, OO, HARD)",
long_help = "Methods to delete the files.\nAEN - All files except the newest,\nAEO - All files except the oldest,\nON - Only 1 file, the newest,\nOO - Only 1 file, the oldest\nHARD - create hard link\nNONE - not delete files"
)]
pub delete_method: DeleteMethod,
#[clap(flatten)]
pub delete_method: DMethod,
#[clap(
short = 't',
long,
@ -165,7 +158,7 @@ pub struct DuplicatesArgs {
#[clap(flatten)]
pub allow_hard_links: AllowHardLinks,
#[clap(flatten)]
pub dryrun: DryRun,
pub dry_run: DryRun,
}
#[derive(Debug, clap::Args)]
@ -314,6 +307,10 @@ pub struct SimilarImagesArgs {
#[clap(flatten)]
pub file_to_save: FileToSave,
#[clap(flatten)]
pub delete_method: DMethod,
#[clap(flatten)]
pub dry_run: DryRun,
#[clap(flatten)]
pub json_compact_file_to_save: JsonCompactFileToSave,
#[clap(flatten)]
pub json_pretty_file_to_save: JsonPrettyFileToSave,
@ -358,8 +355,10 @@ pub struct SameMusicArgs {
pub excluded_directories: ExcludedDirectories,
#[clap(flatten)]
pub excluded_items: ExcludedItems,
// #[clap(short = 'D', long, help = "Delete found files")]
// delete_files: bool, TODO
#[clap(flatten)]
pub delete_method: DMethod,
#[clap(flatten)]
pub dry_run: DryRun,
#[clap(
short = 'z',
long,
@ -369,6 +368,15 @@ pub struct SameMusicArgs {
long_help = "Sets which rows must be equal to set this files as duplicates(may be mixed, but must be divided by commas)."
)]
pub music_similarity: MusicSimilarity,
#[clap(
short,
long,
default_value = "TAGS",
value_parser = parse_checking_method_same_music,
help = "Search method (CONTENT, TAGS)",
long_help = "Methods to search files.\nCONTENT - finds similar audio files by content, TAGS - finds similar images by tags, needs to set"
)]
pub search_method: CheckingMethod,
#[clap(flatten)]
pub file_to_save: FileToSave,
#[clap(flatten)]
@ -398,6 +406,53 @@ pub struct SameMusicArgs {
long_help = "Maximum size of checked files in bytes, assigning lower value may speed up searching"
)]
pub maximal_file_size: u64,
#[clap(
short = 'l',
long,
value_parser = parse_minimum_segment_duration,
default_value = "10.0",
help = "Maximum size in bytes",
long_help = "Minimum segment duration, smaller value will finds also shorter similar segments, which may increase false positives number"
)]
pub minimum_segment_duration: f32,
#[clap(
short = 'd',
long,
value_parser = parse_maximum_difference,
default_value = "2.0",
help = "Maximum difference between segments",
long_help = "Maximum difference between segments, 0.0 will find only identical segments, 10.0 will find also segments which are almost not similar at all"
)]
pub maximum_difference: f64,
}
fn parse_maximum_difference(src: &str) -> Result<f64, String> {
match src.parse::<f64>() {
Ok(maximum_difference) => {
if maximum_difference <= 0.0 {
Err("Maximum difference must be bigger than 0".to_string())
} else if maximum_difference >= 10.0 {
Err("Maximum difference must be smaller than 10.0".to_string())
} else {
Ok(maximum_difference)
}
}
Err(e) => Err(e.to_string()),
}
}
fn parse_minimum_segment_duration(src: &str) -> Result<f32, String> {
match src.parse::<f32>() {
Ok(minimum_segment_duration) => {
if minimum_segment_duration <= 0.0 {
Err("Minimum segment duration must be bigger than 0".to_string())
} else if minimum_segment_duration >= 3600.0 {
Err("Minimum segment duration must be smaller than 3600(greater values not have much sense)".to_string())
} else {
Ok(minimum_segment_duration)
}
}
Err(e) => Err(e.to_string()),
}
}
#[derive(Debug, clap::Args)]
@ -464,8 +519,10 @@ pub struct SimilarVideosArgs {
pub excluded_directories: ExcludedDirectories,
#[clap(flatten)]
pub excluded_items: ExcludedItems,
// #[clap(short = 'D', long, help = "Delete found files")]
// delete_files: bool, TODO
#[clap(flatten)]
pub delete_method: DMethod,
#[clap(flatten)]
pub dry_run: DryRun,
#[clap(flatten)]
pub file_to_save: FileToSave,
#[clap(flatten)]
@ -533,6 +590,19 @@ pub struct BadExtensionsArgs {
pub exclude_other_filesystems: ExcludeOtherFilesystems,
}
#[derive(Debug, clap::Args)]
pub struct DMethod {
#[clap(
short = 'D',
long,
default_value = "NONE",
value_parser = parse_delete_method,
help = "Delete method (AEN, AEO, ON, OO, HARD)",
long_help = "Methods to delete the files.\nAEN - All files except the newest,\nAEO - All files except the oldest,\nON - Only 1 file, the newest,\nOO - Only 1 file, the oldest\nHARD - create hard link\nNONE - not delete files"
)]
pub delete_method: DeleteMethod,
}
#[derive(Debug, clap::Args)]
pub struct Directories {
#[clap(
@ -630,7 +700,7 @@ pub struct CaseSensitiveNameComparison {
#[derive(Debug, clap::Args)]
pub struct DryRun {
#[clap(long, help = "Do nothing and print the operation that would happen.")]
pub dryrun: bool,
pub dry_run: bool,
}
impl FileToSave {
@ -683,7 +753,7 @@ fn parse_tolerance(src: &str) -> Result<i32, &'static str> {
}
}
fn parse_checking_method(src: &str) -> Result<CheckingMethod, &'static str> {
fn parse_checking_method_duplicate(src: &str) -> Result<CheckingMethod, &'static str> {
match src.to_ascii_lowercase().as_str() {
"name" => Ok(CheckingMethod::Name),
"size" => Ok(CheckingMethod::Size),
@ -693,6 +763,14 @@ fn parse_checking_method(src: &str) -> Result<CheckingMethod, &'static str> {
}
}
fn parse_checking_method_same_music(src: &str) -> Result<CheckingMethod, &'static str> {
match src.to_ascii_lowercase().as_str() {
"tags" => Ok(CheckingMethod::AudioTags),
"content" => Ok(CheckingMethod::AudioContent),
_ => Err("Couldn't parse the searc method (allowed: TAGS, CONTENT)"),
}
}
fn parse_delete_method(src: &str) -> Result<DeleteMethod, &'static str> {
match src.to_ascii_lowercase().as_str() {
"none" => Ok(DeleteMethod::None),
@ -773,7 +851,7 @@ fn parse_image_hash_size(src: &str) -> Result<u8, String> {
}
fn parse_music_duplicate_type(src: &str) -> Result<MusicSimilarity, String> {
if src.is_empty() {
if src.trim().is_empty() {
return Ok(MusicSimilarity::NONE);
}
@ -781,22 +859,22 @@ fn parse_music_duplicate_type(src: &str) -> Result<MusicSimilarity, String> {
let parts: Vec<String> = src.split(',').map(|e| e.to_lowercase().replace('_', "")).collect();
if parts.iter().any(|e| e.contains("tracktitle")) {
if parts.contains(&"tracktitle".into()) {
similarity |= MusicSimilarity::TRACK_TITLE;
}
if parts.iter().any(|e| e.contains("trackartist")) {
if parts.contains(&"trackartist".into()) {
similarity |= MusicSimilarity::TRACK_ARTIST;
}
if parts.iter().any(|e| e.contains("year")) {
if parts.contains(&"year".into()) {
similarity |= MusicSimilarity::YEAR;
}
if parts.iter().any(|e| e.contains("bitrate")) {
if parts.contains(&"bitrate".into()) {
similarity |= MusicSimilarity::BITRATE;
}
if parts.iter().any(|e| e.contains("genre")) {
if parts.contains(&"genre".into()) {
similarity |= MusicSimilarity::GENRE;
}
if parts.iter().any(|e| e.contains("length")) {
if parts.contains(&"length".into()) {
similarity |= MusicSimilarity::LENGTH;
}

@ -75,7 +75,7 @@ fn duplicates(duplicates: DuplicatesArgs) {
#[cfg(target_family = "unix")]
exclude_other_filesystems,
allow_hard_links,
dryrun,
dry_run,
case_sensitive_name_comparison,
} = duplicates;
@ -91,13 +91,13 @@ fn duplicates(duplicates: DuplicatesArgs) {
item.set_minimal_cache_file_size(minimal_cached_file_size);
item.set_allowed_extensions(allowed_extensions.allowed_extensions.join(","));
item.set_check_method(search_method);
item.set_delete_method(delete_method);
item.set_delete_method(delete_method.delete_method);
item.set_hash_type(hash_type);
item.set_recursive_search(!not_recursive.not_recursive);
#[cfg(target_family = "unix")]
item.set_exclude_other_filesystems(exclude_other_filesystems.exclude_other_filesystems);
item.set_ignore_hard_links(!allow_hard_links.allow_hard_links);
item.set_dryrun(dryrun.dryrun);
item.set_dry_run(dry_run.dry_run);
item.set_case_sensitive_name_comparison(case_sensitive_name_comparison.case_sensitive_name_comparison);
item.find_duplicates(None, None);
@ -131,7 +131,9 @@ fn empty_folders(empty_folders: EmptyFoldersArgs) {
item.set_included_directory(directories.directories);
item.set_excluded_directory(excluded_directories.excluded_directories);
item.set_excluded_items(excluded_items.excluded_items);
item.set_delete_folder(delete_folders);
if delete_folders {
item.set_delete_method(DeleteMethod::Delete);
}
#[cfg(target_family = "unix")]
item.set_exclude_other_filesystems(exclude_other_filesystems.exclude_other_filesystems);
@ -292,6 +294,8 @@ fn similar_images(similar_images: SimilarImagesArgs) {
hash_alg,
image_filter,
hash_size,
delete_method,
dry_run,
} = similar_images;
set_number_of_threads(thread_number.thread_number);
@ -309,6 +313,8 @@ fn similar_images(similar_images: SimilarImagesArgs) {
item.set_image_filter(image_filter);
item.set_hash_alg(hash_alg);
item.set_hash_size(hash_size);
item.set_delete_method(delete_method.delete_method);
item.set_dry_run(dry_run.dry_run);
item.set_similarity(return_similarity_from_similarity_preset(&similarity_preset, hash_size));
@ -328,7 +334,7 @@ fn same_music(same_music: SameMusicArgs) {
directories,
excluded_directories,
excluded_items,
// delete_files,
delete_method,
file_to_save,
json_compact_file_to_save,
json_pretty_file_to_save,
@ -338,6 +344,10 @@ fn same_music(same_music: SameMusicArgs) {
minimal_file_size,
maximal_file_size,
music_similarity,
dry_run,
minimum_segment_duration,
maximum_difference,
search_method,
} = same_music;
set_number_of_threads(thread_number.thread_number);
@ -353,10 +363,11 @@ fn same_music(same_music: SameMusicArgs) {
#[cfg(target_family = "unix")]
item.set_exclude_other_filesystems(exclude_other_filesystems.exclude_other_filesystems);
item.set_music_similarity(music_similarity);
// if delete_files {
// // TODO item.set_delete_method(same_music::DeleteMethod::Delete);
// }
item.set_delete_method(delete_method.delete_method);
item.set_dry_run(dry_run.dry_run);
item.set_minimum_segment_duration(minimum_segment_duration);
item.set_maximum_difference(maximum_difference);
item.set_check_type(search_method);
item.find_same_music(None, None);
@ -467,6 +478,8 @@ fn similar_videos(similar_videos: SimilarVideosArgs) {
minimal_file_size,
maximal_file_size,
allowed_extensions,
delete_method,
dry_run,
} = similar_videos;
set_number_of_threads(thread_number.thread_number);
@ -483,6 +496,8 @@ fn similar_videos(similar_videos: SimilarVideosArgs) {
item.set_minimal_file_size(minimal_file_size);
item.set_maximal_file_size(maximal_file_size);
item.set_tolerance(tolerance);
item.set_delete_method(delete_method.delete_method);
item.set_dry_run(dry_run.dry_run);
item.find_similar_videos(None, None);

@ -25,7 +25,10 @@ use log::{info, LevelFilter, Record};
use crate::common_dir_traversal::{CheckingMethod, ProgressData, ToolType};
use crate::common_directory::Directories;
use crate::common_items::ExcludedItems;
use crate::common_messages::Messages;
use crate::common_tool::DeleteMethod;
use crate::common_traits::ResultEntry;
use crate::duplicate::make_hard_link;
use crate::CZKAWKA_VERSION;
static NUMBER_OF_THREADS: state::InitCell<usize> = state::InitCell::new();
@ -234,35 +237,6 @@ pub fn create_crash_message(library_name: &str, file_path: &str, home_library_ur
}
impl Common {
pub fn delete_multiple_entries(entries: &[String]) -> Vec<String> {
let mut path: &Path;
let mut warnings: Vec<String> = Vec::new();
for entry in entries {
path = Path::new(entry);
if path.is_dir() {
if let Err(e) = fs::remove_dir_all(entry) {
warnings.push(format!("Failed to remove folder {entry}, reason {e}"));
}
} else if let Err(e) = fs::remove_file(entry) {
warnings.push(format!("Failed to remove file {entry}, reason {e}"));
}
}
warnings
}
pub fn delete_one_entry(entry: &str) -> String {
let path: &Path = Path::new(entry);
let mut warning: String = String::new();
if path.is_dir() {
if let Err(e) = fs::remove_dir_all(entry) {
warning = format!("Failed to remove folder {entry}, reason {e}");
}
} else if let Err(e) = fs::remove_file(entry) {
warning = format!("Failed to remove file {entry}, reason {e}");
}
warning
}
pub fn regex_check(expression: &str, directory: impl AsRef<Path>) -> bool {
if expression == "*" {
return true;
@ -374,6 +348,98 @@ pub fn check_folder_children(
dir_result.push(next_folder);
}
// Here we assume, that internal Vec<> have at least 1 object
#[allow(clippy::ptr_arg)]
pub fn delete_files_custom<T>(items: &Vec<&Vec<T>>, delete_method: &DeleteMethod, text_messages: &mut Messages, dry_run: bool) -> (u64, usize, usize)
where
T: ResultEntry + Clone,
{
let res = items
.iter()
.map(|values| {
let mut gained_space: u64 = 0;
let mut removed_files: usize = 0;
let mut failed_to_remove_files: usize = 0;
let mut infos = Vec::new();
let mut errors = Vec::new();
let mut all_values = (*values).clone();
let len = all_values.len();
// Sorted from oldest to newest - from smallest value to bigger
all_values.sort_unstable_by_key(ResultEntry::get_modified_date);
if delete_method == &DeleteMethod::HardLink {
let original_file = &all_values[0];
for file_entry in &all_values[1..] {
if dry_run {
infos.push(format!(
"dry_run - would create hardlink from {:?} to {:?}",
original_file.get_path(),
original_file.get_path()
));
} else {
if dry_run {
infos.push(format!("Replace file {:?} with hard link to {:?}", original_file.get_path(), file_entry.get_path()));
} else {
if let Err(e) = make_hard_link(original_file.get_path(), file_entry.get_path()) {
errors.push(format!(
"Cannot create hard link from {:?} to {:?} - {}",
file_entry.get_path(),
original_file.get_path(),
e
));
failed_to_remove_files += 1;
} else {
gained_space += 1;
removed_files += 1;
}
}
}
}
return (infos, errors, gained_space, removed_files, failed_to_remove_files);
}
let items = match delete_method {
DeleteMethod::Delete => &all_values,
DeleteMethod::AllExceptNewest => &all_values[..(len - 1)],
DeleteMethod::AllExceptOldest => &all_values[1..],
DeleteMethod::OneOldest => &all_values[..1],
DeleteMethod::OneNewest => &all_values[(len - 1)..],
DeleteMethod::HardLink | DeleteMethod::None => unreachable!("HardLink and None should be handled before"),
};
for i in items {
if dry_run {
infos.push(format!("dry_run - would delete file: {:?}", i.get_path()));
} else {
if let Err(e) = std::fs::remove_file(i.get_path()) {
errors.push(format!("Cannot delete file: {:?} - {e}", i.get_path()));
failed_to_remove_files += 1;
} else {
removed_files += 1;
gained_space += i.get_size();
}
}
}
(infos, errors, gained_space, removed_files, failed_to_remove_files)
})
.collect::<Vec<_>>();
let mut gained_space = 0;
let mut removed_files = 0;
let mut failed_to_remove_files = 0;
for (infos, errors, gained_space_v, removed_files_v, failed_to_remove_files_v) in res {
text_messages.messages.extend(infos);
text_messages.errors.extend(errors);
gained_space += gained_space_v;
removed_files += removed_files_v;
failed_to_remove_files += failed_to_remove_files_v;
}
(gained_space, removed_files, failed_to_remove_files)
}
pub fn filter_reference_folders_generic<T>(entries_to_check: Vec<Vec<T>>, directories: &Directories) -> Vec<(T, Vec<T>)>
where
T: ResultEntry,

@ -100,14 +100,14 @@ pub enum ErrorType {
/// Enum with values which show if folder is empty.
/// In function "`optimize_folders`" automatically "Maybe" is changed to "Yes", so it is not necessary to put it here
#[derive(Eq, PartialEq, Copy, Clone)]
#[derive(Eq, PartialEq, Copy, Clone, Debug)]
pub(crate) enum FolderEmptiness {
No,
Maybe,
}
/// Struct assigned to each checked folder with parent path(used to ignore parent if children are not empty) and flag which shows if folder is empty
#[derive(Clone)]
#[derive(Clone, Debug)]
pub struct FolderEntry {
pub(crate) parent_path: Option<PathBuf>,
// Usable only when finding

@ -22,6 +22,7 @@ pub struct CommonToolData {
pub(crate) delete_outdated_cache: bool,
pub(crate) save_also_as_json: bool,
pub(crate) use_reference_folders: bool,
pub(crate) dry_run: bool,
}
#[derive(Eq, PartialEq, Clone, Debug, Copy, Default)]
@ -53,6 +54,7 @@ impl CommonToolData {
delete_outdated_cache: true,
save_also_as_json: false,
use_reference_folders: false,
dry_run: false,
}
}
}
@ -61,6 +63,13 @@ pub trait CommonData {
fn get_cd(&self) -> &CommonToolData;
fn get_cd_mut(&mut self) -> &mut CommonToolData;
fn set_dry_run(&mut self, dry_run: bool) {
self.get_cd_mut().dry_run = dry_run;
}
fn get_dry_run(&self) -> bool {
self.get_cd().dry_run
}
fn set_use_cache(&mut self, use_cache: bool) {
self.get_cd_mut().use_cache = use_cache;
}
@ -189,6 +198,8 @@ pub trait CommonData {
println!("Delete outdated cache: {:?}", self.get_cd().delete_outdated_cache);
println!("Save also as json: {:?}", self.get_cd().save_also_as_json);
println!("Delete method: {:?}", self.get_cd().delete_method);
println!("Use reference folders: {:?}", self.get_cd().use_reference_folders);
println!("Dry run: {:?}", self.get_cd().dry_run);
println!("---------------DEBUG PRINT MESSAGES---------------");
println!("Errors size - {}", self.get_cd().text_messages.errors.len());

@ -1,5 +1,4 @@
use std::collections::HashMap;
use std::collections::{BTreeMap, HashSet};
use std::collections::{BTreeMap, HashMap, HashSet};
use std::fmt::Debug;
use std::fs::File;
use std::hash::Hasher;
@ -19,10 +18,9 @@ use log::debug;
use rayon::prelude::*;
use xxhash_rust::xxh3::Xxh3;
use crate::common::{prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads};
use crate::common::{delete_files_custom, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads};
use crate::common_cache::{get_duplicate_cache_file, load_cache_from_file_generalized_by_size, save_cache_to_file_generalized};
use crate::common_dir_traversal::{CheckingMethod, DirTraversalBuilder, DirTraversalResult, FileEntry, ProgressData, ToolType};
use crate::common_messages::Messages;
use crate::common_tool::{CommonData, CommonToolData, DeleteMethod};
use crate::common_traits::*;
@ -82,7 +80,6 @@ pub struct DuplicateFinder {
check_method: CheckingMethod,
hash_type: HashType,
ignore_hard_links: bool,
dryrun: bool,
use_prehash_cache: bool,
minimal_cache_file_size: u64,
minimal_prehash_cache_file_size: u64,
@ -105,7 +102,6 @@ impl DuplicateFinder {
check_method: CheckingMethod::None,
ignore_hard_links: true,
hash_type: HashType::Blake3,
dryrun: false,
use_prehash_cache: true,
minimal_cache_file_size: 1024 * 256, // By default cache only >= 256 KB files
minimal_prehash_cache_file_size: 0,
@ -823,115 +819,26 @@ impl DuplicateFinder {
match self.check_method {
CheckingMethod::Name => {
let vec_files = self.files_with_identical_names.values().collect::<Vec<_>>();
delete_files(&vec_files, &self.common_data.delete_method, &mut self.common_data.text_messages, self.dryrun);
delete_files_custom(&vec_files, &self.common_data.delete_method, &mut self.common_data.text_messages, self.common_data.dry_run);
}
CheckingMethod::SizeName => {
let vec_files = self.files_with_identical_size_names.values().collect::<Vec<_>>();
delete_files(&vec_files, &self.common_data.delete_method, &mut self.common_data.text_messages, self.dryrun);
delete_files_custom(&vec_files, &self.common_data.delete_method, &mut self.common_data.text_messages, self.common_data.dry_run);
}
CheckingMethod::Hash => {
for vec_files in self.files_with_identical_hashes.values() {
let vev: Vec<&Vec<FileEntry>> = vec_files.iter().collect::<Vec<_>>();
delete_files(&vev, &self.common_data.delete_method, &mut self.common_data.text_messages, self.dryrun);
delete_files_custom(&vev, &self.common_data.delete_method, &mut self.common_data.text_messages, self.common_data.dry_run);
}
}
CheckingMethod::Size => {
let vec_files = self.files_with_identical_size.values().collect::<Vec<_>>();
delete_files(&vec_files, &self.common_data.delete_method, &mut self.common_data.text_messages, self.dryrun);
delete_files_custom(&vec_files, &self.common_data.delete_method, &mut self.common_data.text_messages, self.common_data.dry_run);
}
_ => panic!(),
}
}
}
// Here we assume, that internal Vec<> have at least 1 object
#[allow(clippy::ptr_arg)]
fn delete_files(items: &Vec<&Vec<FileEntry>>, delete_method: &DeleteMethod, text_messages: &mut Messages, dryrun: bool) -> (u64, usize, usize) {
let res = items
.iter()
.map(|values| {
let mut gained_space: u64 = 0;
let mut removed_files: usize = 0;
let mut failed_to_remove_files: usize = 0;
let mut infos = Vec::new();
let mut errors = Vec::new();
let mut all_values = (*values).clone();
let len = all_values.len();
// Sorted from oldest to newest - from smallest value to bigger
all_values.sort_unstable_by_key(ResultEntry::get_modified_date);
if delete_method == &DeleteMethod::HardLink {
let original_file = &all_values[0];
for file_entry in &all_values[1..] {
if dryrun {
infos.push(format!(
"Dryrun - would create hardlink from {:?} to {:?}",
original_file.get_path(),
original_file.get_path()
));
} else {
if dryrun {
infos.push(format!("Replace file {:?} with hard link to {:?}", original_file.get_path(), file_entry.get_path()));
} else {
if let Err(e) = make_hard_link(original_file.get_path(), file_entry.get_path()) {
errors.push(format!(
"Cannot create hard link from {:?} to {:?} - {}",
file_entry.get_path(),
original_file.get_path(),
e
));
failed_to_remove_files += 1;
} else {
gained_space += 1;
removed_files += 1;
}
}
}
}
return (infos, errors, gained_space, removed_files, failed_to_remove_files);
}
let items = match delete_method {
DeleteMethod::Delete => &all_values,
DeleteMethod::AllExceptNewest => &all_values[..(len - 1)],
DeleteMethod::AllExceptOldest => &all_values[1..],
DeleteMethod::OneOldest => &all_values[..1],
DeleteMethod::OneNewest => &all_values[(len - 1)..],
DeleteMethod::HardLink | DeleteMethod::None => unreachable!("HardLink and None should be handled before"),
};
for i in items {
if dryrun {
infos.push(format!("Dryrun - would delete file: {:?}", i.get_path()));
} else {
if let Err(e) = std::fs::remove_file(i.get_path()) {
errors.push(format!("Cannot delete file: {:?} - {e}", i.get_path()));
failed_to_remove_files += 1;
} else {
removed_files += 1;
gained_space += i.get_size();
}
}
}
(infos, errors, gained_space, removed_files, failed_to_remove_files)
})
.collect::<Vec<_>>();
let mut gained_space = 0;
let mut removed_files = 0;
let mut failed_to_remove_files = 0;
for (infos, errors, gained_space_v, removed_files_v, failed_to_remove_files_v) in res {
text_messages.messages.extend(infos);
text_messages.errors.extend(errors);
gained_space += gained_space_v;
removed_files += removed_files_v;
failed_to_remove_files += failed_to_remove_files_v;
}
(gained_space, removed_files, failed_to_remove_files)
}
impl DuplicateFinder {
pub fn set_case_sensitive_name_comparison(&mut self, case_sensitive_name_comparison: bool) {
@ -982,8 +889,8 @@ impl DuplicateFinder {
self.ignore_hard_links = ignore_hard_links;
}
pub fn set_dryrun(&mut self, dryrun: bool) {
self.dryrun = dryrun;
pub fn set_dry_run(&mut self, dry_run: bool) {
self.common_data.dry_run = dry_run;
}
pub fn set_check_method(&mut self, check_method: CheckingMethod) {

@ -1,6 +1,5 @@
use std::collections::BTreeMap;
use std::fs;
use std::io::Write;
use std::path::PathBuf;
@ -8,15 +7,15 @@ use crossbeam_channel::Receiver;
use fun_time::fun_time;
use futures::channel::mpsc::UnboundedSender;
use log::debug;
use rayon::prelude::*;
use crate::common_dir_traversal::{Collect, DirTraversalBuilder, DirTraversalResult, FolderEmptiness, FolderEntry, ProgressData, ToolType};
use crate::common_tool::{CommonData, CommonToolData};
use crate::common_tool::{CommonData, CommonToolData, DeleteMethod};
use crate::common_traits::{DebugPrint, PrintResults};
pub struct EmptyFolder {
common_data: CommonToolData,
information: Info,
delete_folders: bool,
empty_folder_list: BTreeMap<PathBuf, FolderEntry>, // Path, FolderEntry
}
@ -30,7 +29,6 @@ impl EmptyFolder {
Self {
common_data: CommonToolData::new(ToolType::EmptyFolders),
information: Default::default(),
delete_folders: false,
empty_folder_list: Default::default(),
}
}
@ -51,9 +49,8 @@ impl EmptyFolder {
return;
}
self.optimize_folders();
if self.delete_folders {
self.delete_empty_folders();
}
self.delete_files();
self.debug_print();
}
@ -109,19 +106,24 @@ impl EmptyFolder {
}
}
#[fun_time(message = "delete_empty_folders")]
fn delete_empty_folders(&mut self) {
// Folders may be deleted or require too big privileges
for name in self.empty_folder_list.keys() {
match fs::remove_dir_all(name) {
Ok(()) => (),
Err(e) => self
.common_data
.text_messages
.warnings
.push(format!("Failed to remove folder {}, reason {}", name.display(), e)),
};
// #[fun_time(message = "delete_files")]
fn delete_files(&mut self) {
if self.get_delete_method() == DeleteMethod::None {
return;
}
let folders_to_remove = self.empty_folder_list.keys().collect::<Vec<_>>();
let errors: Vec<_> = folders_to_remove
.into_par_iter()
.filter_map(|name| {
if let Err(e) = fs::remove_dir_all(name) {
Some(format!("Failed to remove folder {name:?}, reason {e}"))
} else {
None
}
})
.collect();
self.get_text_messages_mut().errors.extend(errors);
}
}
@ -172,8 +174,3 @@ impl CommonData for EmptyFolder {
&mut self.common_data
}
}
impl EmptyFolder {
pub fn set_delete_folder(&mut self, delete_folder: bool) {
self.delete_folders = delete_folder;
}
}

@ -25,10 +25,12 @@ use symphonia::core::io::MediaSourceStream;
use symphonia::core::meta::MetadataOptions;
use symphonia::core::probe::Hint;
use crate::common::{create_crash_message, filter_reference_folders_generic, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads, AUDIO_FILES_EXTENSIONS};
use crate::common::{
create_crash_message, delete_files_custom, filter_reference_folders_generic, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads, AUDIO_FILES_EXTENSIONS,
};
use crate::common_cache::{get_similar_music_cache_file, load_cache_from_file_generalized_by_path, save_cache_to_file_generalized};
use crate::common_dir_traversal::{CheckingMethod, DirTraversalBuilder, DirTraversalResult, FileEntry, ProgressData, ToolType};
use crate::common_tool::{CommonData, CommonToolData};
use crate::common_tool::{CommonData, CommonToolData, DeleteMethod};
use crate::common_traits::*;
bitflags! {
@ -123,7 +125,7 @@ impl SameMusic {
music_to_check: Default::default(),
approximate_comparison: true,
duplicated_music_entries_referenced: vec![],
check_type: CheckingMethod::AudioContent,
check_type: CheckingMethod::AudioTags,
hash_preset_config: Configuration::preset_test1(), // TODO allow to change this
minimum_segment_duration: 10.0,
maximum_difference: 2.0,
@ -653,20 +655,12 @@ impl SameMusic {
#[fun_time(message = "delete_files")]
fn delete_files(&mut self) {
if self.common_data.delete_method == DeleteMethod::None {
return;
}
// TODO
// match self.delete_method {
// DeleteMethod::Delete => {
// for file_entry in &self.music_entries {
// if fs::remove_file(file_entry.path.clone()).is_err() {
// self.common_data.text_messages.warnings.push(file_entry.path.display().to_string());
// }
// }
// }
// DeleteMethod::None => {
// //Just do nothing
// }
// }
let vec_files = self.duplicated_music_entries.iter().collect::<Vec<_>>();
delete_files_custom(&vec_files, &self.common_data.delete_method, &mut self.common_data.text_messages, self.common_data.dry_run);
}
}

@ -20,12 +20,12 @@ use serde::{Deserialize, Serialize};
#[cfg(feature = "heif")]
use crate::common::get_dynamic_image_from_heic;
use crate::common::{
check_folder_children, create_crash_message, get_dynamic_image_from_raw_image, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads, HEIC_EXTENSIONS,
IMAGE_RS_SIMILAR_IMAGES_EXTENSIONS, RAW_IMAGE_EXTENSIONS,
check_folder_children, create_crash_message, delete_files_custom, get_dynamic_image_from_raw_image, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads,
HEIC_EXTENSIONS, IMAGE_RS_SIMILAR_IMAGES_EXTENSIONS, RAW_IMAGE_EXTENSIONS,
};
use crate::common_cache::{get_similar_images_cache_file, load_cache_from_file_generalized_by_path, save_cache_to_file_generalized};
use crate::common_dir_traversal::{common_get_entry_data_metadata, common_read_dir, get_lowercase_name, get_modified_time, CheckingMethod, ProgressData, ToolType};
use crate::common_tool::{CommonData, CommonToolData};
use crate::common_tool::{CommonData, CommonToolData, DeleteMethod};
use crate::common_traits::{DebugPrint, PrintResults, ResultEntry};
use crate::flc;
@ -140,6 +140,7 @@ impl SimilarImages {
self.common_data.stopped_search = true;
return;
}
self.delete_files();
self.debug_print();
}
@ -807,6 +808,15 @@ impl SimilarImages {
}
assert!(!found, "Found Invalid entries, verify errors before"); // TODO crashes with empty result with reference folder, verify why
}
fn delete_files(&mut self) {
if self.common_data.delete_method == DeleteMethod::None {
return;
}
let vec_files = self.similar_vectors.iter().collect::<Vec<_>>();
delete_files_custom(&vec_files, &self.common_data.delete_method, &mut self.common_data.text_messages, self.common_data.dry_run);
}
}
fn is_in_reference_folder(reference_directories: &[PathBuf], path: &Path) -> bool {

@ -15,10 +15,10 @@ use serde::{Deserialize, Serialize};
use vid_dup_finder_lib::HashCreationErrorKind::DetermineVideo;
use vid_dup_finder_lib::{NormalizedTolerance, VideoHash};
use crate::common::{check_folder_children, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads, VIDEO_FILES_EXTENSIONS};
use crate::common::{check_folder_children, delete_files_custom, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads, VIDEO_FILES_EXTENSIONS};
use crate::common_cache::{get_similar_videos_cache_file, load_cache_from_file_generalized_by_path, save_cache_to_file_generalized};
use crate::common_dir_traversal::{common_get_entry_data_metadata, common_read_dir, get_lowercase_name, get_modified_time, CheckingMethod, ProgressData, ToolType};
use crate::common_tool::{CommonData, CommonToolData};
use crate::common_tool::{CommonData, CommonToolData, DeleteMethod};
use crate::common_traits::{DebugPrint, PrintResults, ResultEntry};
use crate::flc;
use crate::localizer_core::generate_translation_hashmap;
@ -123,6 +123,7 @@ impl SimilarVideos {
return;
}
}
self.delete_files();
self.debug_print();
}
@ -401,6 +402,15 @@ impl SimilarVideos {
.collect::<Vec<(FileEntry, Vec<FileEntry>)>>();
}
}
fn delete_files(&mut self) {
if self.common_data.delete_method == DeleteMethod::None {
return;
}
let vec_files = self.similar_vectors.iter().collect::<Vec<_>>();
delete_files_custom(&vec_files, &self.common_data.delete_method, &mut self.common_data.text_messages, self.common_data.dry_run);
}
}
impl Default for SimilarVideos {

Loading…
Cancel
Save