pull/1082/head
Rafał Mikrut 8 months ago
parent 38c944e566
commit edcd7d5b5c

@ -10,6 +10,7 @@ use crossbeam_channel::Receiver;
use fun_time::fun_time;
use futures::channel::mpsc::UnboundedSender;
use humansize::{format_size, BINARY};
use log::debug;
use rayon::prelude::*;
use crate::common::{check_folder_children, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads, split_path};
@ -40,7 +41,6 @@ pub struct BigFile {
information: Info,
big_files: Vec<(u64, FileEntry)>,
number_of_files_to_check: usize,
delete_method: DeleteMethod,
search_mode: SearchMode,
}
@ -51,7 +51,6 @@ impl BigFile {
information: Info::default(),
big_files: Default::default(),
number_of_files_to_check: 50,
delete_method: DeleteMethod::None,
search_mode: SearchMode::BiggestFiles,
}
}
@ -134,11 +133,13 @@ impl BigFile {
}
}
}
debug!("Collected {} big files",);
debug!("Collected {} files", old_map.len());
send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle);
self.extract_n_biggest_files(old_map);
true
}
@ -211,7 +212,7 @@ impl BigFile {
}
fn delete_files(&mut self) {
match self.delete_method {
match self.common_data.delete_method {
DeleteMethod::Delete => {
for (_, file_entry) in &self.big_files {
if fs::remove_file(&file_entry.path).is_err() {
@ -327,10 +328,6 @@ impl BigFile {
&self.information
}
pub fn set_delete_method(&mut self, delete_method: DeleteMethod) {
self.delete_method = delete_method;
}
pub fn set_number_of_files_to_check(&mut self, number_of_files_to_check: usize) {
self.number_of_files_to_check = number_of_files_to_check;
}

@ -10,6 +10,7 @@ use std::{fs, mem, panic};
use crossbeam_channel::Receiver;
use fun_time::fun_time;
use futures::channel::mpsc::UnboundedSender;
use log::debug;
use pdf::file::FileOptions;
use pdf::object::ParseOptions;
use pdf::PdfError;
@ -77,7 +78,6 @@ pub struct BrokenFiles {
information: Info,
files_to_check: BTreeMap<String, FileEntry>,
broken_files: Vec<FileEntry>,
delete_method: DeleteMethod,
checked_types: CheckedTypes,
}
@ -87,7 +87,6 @@ impl BrokenFiles {
common_data: CommonToolData::new(ToolType::BrokenFiles),
information: Info::default(),
files_to_check: Default::default(),
delete_method: DeleteMethod::None,
broken_files: Default::default(),
checked_types: CheckedTypes::PDF | CheckedTypes::AUDIO | CheckedTypes::IMAGE | CheckedTypes::ARCHIVE,
}
@ -108,22 +107,6 @@ impl BrokenFiles {
self.debug_print();
}
pub const fn get_broken_files(&self) -> &Vec<FileEntry> {
&self.broken_files
}
pub fn set_checked_types(&mut self, checked_types: CheckedTypes) {
self.checked_types = checked_types;
}
pub const fn get_information(&self) -> &Info {
&self.information
}
pub fn set_delete_method(&mut self, delete_method: DeleteMethod) {
self.delete_method = delete_method;
}
#[fun_time(message = "check_files")]
fn check_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&UnboundedSender<ProgressData>>) -> bool {
let mut folders_to_check: Vec<PathBuf> = Vec::with_capacity(1024 * 2); // This should be small enough too not see to big difference and big enough to store most of paths without needing to resize vector
@ -136,6 +119,7 @@ impl BrokenFiles {
let (progress_thread_handle, progress_thread_run, atomic_counter, _check_was_stopped) =
prepare_thread_handler_common(progress_sender, 0, 1, 0, CheckingMethod::None, self.common_data.tool_type);
debug!("check_files - starting to collect files");
while !folders_to_check.is_empty() {
if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() {
send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle);
@ -178,6 +162,7 @@ impl BrokenFiles {
(dir_result, warnings, fe_result)
})
.collect();
debug!("check_files - collected files");
// Advance the frontier
folders_to_check.clear();
@ -195,6 +180,7 @@ impl BrokenFiles {
send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle);
true
}
fn get_file_entry(
&self,
metadata: &Metadata,
@ -367,6 +353,7 @@ impl BrokenFiles {
let (progress_thread_handle, progress_thread_run, atomic_counter, _check_was_stopped) =
prepare_thread_handler_common(progress_sender, 1, 1, non_cached_files_to_check.len(), CheckingMethod::None, self.common_data.tool_type);
debug!("look_for_broken_files - started finding for broken files");
let mut vec_file_entry: Vec<FileEntry> = non_cached_files_to_check
.into_par_iter()
.map(|(_, file_entry)| {
@ -388,6 +375,7 @@ impl BrokenFiles {
.filter(Option::is_some)
.map(Option::unwrap)
.collect::<Vec<FileEntry>>();
debug!("look_for_broken_files - ended finding for broken files");
send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle);
@ -402,7 +390,7 @@ impl BrokenFiles {
.collect();
self.information.number_of_broken_files = self.broken_files.len();
debug!("Found {} broken files.", self.information.number_of_broken_files);
// Clean unused data
self.files_to_check = Default::default();
@ -426,8 +414,9 @@ impl BrokenFiles {
}
}
#[fun_time(message = "delete_files")]
fn delete_files(&mut self) {
match self.delete_method {
match self.common_data.delete_method {
DeleteMethod::Delete => {
for file_entry in &self.broken_files {
if fs::remove_file(&file_entry.path).is_err() {
@ -445,6 +434,19 @@ impl BrokenFiles {
}
}
impl BrokenFiles {
pub const fn get_broken_files(&self) -> &Vec<FileEntry> {
&self.broken_files
}
pub fn set_checked_types(&mut self, checked_types: CheckedTypes) {
self.checked_types = checked_types;
}
pub const fn get_information(&self) -> &Info {
&self.information
}
}
impl Default for BrokenFiles {
fn default() -> Self {
Self::new()
@ -456,14 +458,12 @@ impl DebugPrint for BrokenFiles {
if !cfg!(debug_assertions) {
return;
}
println!("---------------DEBUG PRINT---------------");
println!("Delete Method - {:?}", self.delete_method);
self.debug_print_common();
println!("-----------------------------------------");
}
}
impl SaveResults for BrokenFiles {
#[fun_time(message = "save_results_to_file")]
fn save_results_to_file(&mut self, file_name: &str) -> bool {
let file_name: String = match file_name {
"" => "results.txt".to_string(),
@ -505,6 +505,7 @@ impl SaveResults for BrokenFiles {
}
impl PrintResults for BrokenFiles {
#[fun_time(message = "print_results")]
fn print_results(&self) {
println!("Found {} broken files.\n", self.information.number_of_broken_files);
for file_entry in &self.broken_files {

@ -3,6 +3,7 @@ use crate::common_messages::Messages;
use crate::common_traits::ResultEntry;
use crate::duplicate::HashType;
use crate::similar_images::{convert_algorithm_to_string, convert_filters_to_string};
use fun_time::fun_time;
use image::imageops::FilterType;
use image_hasher::HashAlg;
use log::debug;
@ -40,11 +41,11 @@ pub fn get_duplicate_cache_file(type_of_hash: &HashType, is_prehash: bool) -> St
format!("cache_duplicates_{type_of_hash:?}{prehash_str}_61.bin")
}
#[fun_time(message = "save_cache_to_file_generalized")]
pub fn save_cache_to_file_generalized<T>(cache_file_name: &str, hashmap: &BTreeMap<String, T>, save_also_as_json: bool, minimum_file_size: u64) -> Messages
where
T: Serialize + ResultEntry + Sized + Send + Sync,
{
debug!("Saving cache to file {} (or also json alternative) - {} results", cache_file_name, hashmap.len());
let mut text_messages = Messages::new();
if let Some(((file_handler, cache_file), (file_handler_json, cache_file_json))) =
common::open_cache_folder(cache_file_name, true, save_also_as_json, &mut text_messages.warnings)
@ -83,6 +84,7 @@ where
text_messages
}
#[fun_time(message = "load_cache_from_file_generalized_by_path")]
pub fn load_cache_from_file_generalized_by_path<T>(cache_file_name: &str, delete_outdated_cache: bool, used_files: &BTreeMap<String, T>) -> (Messages, Option<BTreeMap<String, T>>)
where
for<'a> T: Deserialize<'a> + ResultEntry + Sized + Send + Sync + Clone,
@ -102,6 +104,7 @@ where
(text_messages, Some(map_loaded_entries))
}
#[fun_time(message = "load_cache_from_file_generalized_by_size")]
pub fn load_cache_from_file_generalized_by_size<T>(
cache_file_name: &str,
delete_outdated_cache: bool,
@ -132,6 +135,7 @@ where
(text_messages, Some(map_loaded_entries))
}
#[fun_time(message = "load_cache_from_file_generalized_by_path_from_size")]
pub fn load_cache_from_file_generalized_by_path_from_size<T>(
cache_file_name: &str,
delete_outdated_cache: bool,
@ -162,11 +166,11 @@ where
(text_messages, Some(map_loaded_entries))
}
#[fun_time(message = "load_cache_from_file_generalized")]
fn load_cache_from_file_generalized<T>(cache_file_name: &str, delete_outdated_cache: bool, used_files: &BTreeMap<String, T>) -> (Messages, Option<Vec<T>>)
where
for<'a> T: Deserialize<'a> + ResultEntry + Sized + Send + Sync + Clone,
{
debug!("Loading cache from file {} (or json alternative)", cache_file_name);
let mut text_messages = Messages::new();
if let Some(((file_handler, cache_file), (file_handler_json, cache_file_json))) = common::open_cache_folder(cache_file_name, false, true, &mut text_messages.warnings) {
@ -198,8 +202,10 @@ where
};
}
// Don't load cache data if destination file not exists
debug!("Starting to removing outdated cache entries");
debug!(
"Starting removing outdated cache entries (removing non existent files from cache - {})",
delete_outdated_cache
);
let initial_number_of_entries = vec_loaded_entries.len();
vec_loaded_entries = vec_loaded_entries
.into_par_iter()

@ -14,7 +14,7 @@ pub struct CommonToolData {
pub(crate) allowed_extensions: Extensions,
pub(crate) excluded_items: ExcludedItems,
pub(crate) recursive_search: bool,
// delete_method: DeleteMethod, // ?
pub(crate) delete_method: DeleteMethod,
pub(crate) maximal_file_size: u64,
pub(crate) minimal_file_size: u64,
pub(crate) stopped_search: bool,
@ -45,6 +45,7 @@ impl CommonToolData {
allowed_extensions: Extensions::new(),
excluded_items: ExcludedItems::new(),
recursive_search: true,
delete_method: DeleteMethod::None,
maximal_file_size: u64::MAX,
minimal_file_size: 8192,
stopped_search: false,
@ -140,6 +141,13 @@ pub trait CommonData {
self.get_cd().use_reference_folders
}
fn set_delete_method(&mut self, delete_method: DeleteMethod) {
self.get_cd_mut().delete_method = delete_method;
}
fn get_delete_method(&self) -> DeleteMethod {
self.get_cd().delete_method
}
fn set_included_directory(&mut self, included_directory: Vec<PathBuf>) {
let messages = self.get_cd_mut().directories.set_included_directory(included_directory);
self.get_cd_mut().text_messages.extend_with_another_messages(messages);
@ -180,6 +188,7 @@ pub trait CommonData {
println!("Use cache: {:?}", self.get_cd().use_cache);
println!("Delete outdated cache: {:?}", self.get_cd().delete_outdated_cache);
println!("Save also as json: {:?}", self.get_cd().save_also_as_json);
println!("Delete method: {:?}", self.get_cd().delete_method);
println!("---------------DEBUG PRINT MESSAGES---------------");
println!("Errors size - {}", self.get_cd().text_messages.errors.len());

@ -78,7 +78,6 @@ pub struct DuplicateFinder {
// File Size, next grouped by file size, next grouped by hash
files_with_identical_hashes_referenced: BTreeMap<u64, Vec<(FileEntry, Vec<FileEntry>)>>,
check_method: CheckingMethod,
delete_method: DeleteMethod,
hash_type: HashType,
ignore_hard_links: bool,
dryrun: bool,
@ -102,7 +101,6 @@ impl DuplicateFinder {
files_with_identical_size_referenced: Default::default(),
files_with_identical_hashes_referenced: Default::default(),
check_method: CheckingMethod::None,
delete_method: DeleteMethod::None,
ignore_hard_links: true,
hash_type: HashType::Blake3,
dryrun: false,
@ -175,21 +173,13 @@ impl DuplicateFinder {
.maximal_file_size(self.common_data.maximal_file_size)
.build()
.run();
debug!("check_files_name - after finding file sizes");
let res = match result {
match result {
DirTraversalResult::SuccessFiles { grouped_file_entries, warnings } => {
self.files_with_identical_names = grouped_file_entries;
self.common_data.text_messages.warnings.extend(warnings);
// Create new BTreeMap without single size entries(files have not duplicates)
let mut new_map: BTreeMap<String, Vec<FileEntry>> = Default::default();
for (name, vector) in &self.files_with_identical_names {
if vector.len() > 1 {
new_map.insert(name.clone(), vector.clone());
}
}
self.files_with_identical_names = new_map;
self.files_with_identical_names = grouped_file_entries.into_iter().filter(|(_name, vector)| vector.len() > 1).collect();
// Reference - only use in size, because later hash will be counted differently
if self.common_data.use_reference_folders {
@ -219,8 +209,7 @@ impl DuplicateFinder {
unreachable!()
}
DirTraversalResult::Stopped => false,
};
res
}
}
fn calculate_name_stats(&mut self) {
@ -259,21 +248,12 @@ impl DuplicateFinder {
.maximal_file_size(self.common_data.maximal_file_size)
.build()
.run();
debug!("check_files_size_name - after finding file sizes");
let res = match result {
match result {
DirTraversalResult::SuccessFiles { grouped_file_entries, warnings } => {
self.files_with_identical_size_names = grouped_file_entries;
self.common_data.text_messages.warnings.extend(warnings);
// Create new BTreeMap without single size entries(files have not duplicates)
let mut new_map: BTreeMap<(u64, String), Vec<FileEntry>> = Default::default();
for (name_size, vector) in &self.files_with_identical_size_names {
if vector.len() > 1 {
new_map.insert(name_size.clone(), vector.clone());
}
}
self.files_with_identical_size_names = new_map;
self.files_with_identical_size_names = grouped_file_entries.into_iter().filter(|(_name, vector)| vector.len() > 1).collect();
// Reference - only use in size, because later hash will be counted differently
if self.common_data.use_reference_folders {
@ -304,8 +284,7 @@ impl DuplicateFinder {
unreachable!()
}
DirTraversalResult::Stopped => false,
};
res
}
}
fn calculate_size_name_stats(&mut self) {
@ -346,16 +325,12 @@ impl DuplicateFinder {
.maximal_file_size(self.common_data.maximal_file_size)
.build()
.run();
debug!("check_file_size - after finding file sizes");
let res = match result {
match result {
DirTraversalResult::SuccessFiles { grouped_file_entries, warnings } => {
self.files_with_identical_size = grouped_file_entries;
self.common_data.text_messages.warnings.extend(warnings);
// Create new BTreeMap without single size entries(files have not duplicates)
let old_map: BTreeMap<u64, Vec<FileEntry>> = mem::take(&mut self.files_with_identical_size);
for (size, vec) in old_map {
for (size, vec) in grouped_file_entries {
if vec.len() <= 1 {
continue;
}
@ -370,21 +345,21 @@ impl DuplicateFinder {
self.filter_reference_folders_by_size();
self.calculate_size_stats();
debug!(
"check_file_size - after calculating size stats/duplicates, found in {} groups, {} files with same size | referenced {} groups, {} files",
self.files_with_identical_size.len(),
self.files_with_identical_size.values().map(Vec::len).sum::<usize>(),
self.files_with_identical_size_referenced.len(),
self.files_with_identical_size_referenced.values().map(|(_fe, vec)| vec.len()).sum::<usize>()
);
true
}
DirTraversalResult::SuccessFolders { .. } => {
unreachable!()
}
DirTraversalResult::Stopped => false,
};
debug!(
"check_file_size - after calculating size stats/duplicates, found in {} groups, {} files with same size | referenced {} groups, {} files",
self.files_with_identical_size.len(),
self.files_with_identical_size.values().map(Vec::len).sum::<usize>(),
self.files_with_identical_size_referenced.len(),
self.files_with_identical_size_referenced.values().map(|(_fe, vec)| vec.len()).sum::<usize>()
);
res
}
}
fn calculate_size_stats(&mut self) {
@ -403,6 +378,7 @@ impl DuplicateFinder {
}
}
#[fun_time(message = "filter_reference_folders_by_size")]
fn filter_reference_folders_by_size(&mut self) {
if self.common_data.use_reference_folders && self.check_method == CheckingMethod::Size {
let vec = mem::take(&mut self.files_with_identical_size)
@ -528,6 +504,7 @@ impl DuplicateFinder {
let (loaded_hash_map, records_already_cached, non_cached_files_to_check) = self.prehash_load_cache_at_start();
debug!("Starting calculating prehash");
#[allow(clippy::type_complexity)]
let pre_hash_results: Vec<(u64, BTreeMap<String, Vec<FileEntry>>, Vec<String>)> = non_cached_files_to_check
.par_iter()
@ -553,6 +530,7 @@ impl DuplicateFinder {
})
.while_some()
.collect();
debug!("Completed calculating prehash");
send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle);
@ -568,7 +546,9 @@ impl DuplicateFinder {
// Check results
for (size, hash_map, errors) in &pre_hash_results {
self.common_data.text_messages.warnings.append(&mut errors.clone());
if !errors.is_empty() {
self.common_data.text_messages.warnings.append(&mut errors.clone());
}
for vec_file_entry in hash_map.values() {
if vec_file_entry.len() > 1 {
pre_checked_map.entry(*size).or_default().append(&mut vec_file_entry.clone());
@ -700,53 +680,52 @@ impl DuplicateFinder {
);
///////////////////////////////////////////////////////////////////////////// HASHING START
{
let (loaded_hash_map, records_already_cached, non_cached_files_to_check) = self.full_hashing_load_cache_at_start(pre_checked_map);
debug!("Starting full hashing of {} files", non_cached_files_to_check.values().map(Vec::len).sum::<usize>());
let mut full_hash_results: Vec<(u64, BTreeMap<String, Vec<FileEntry>>, Vec<String>)> = non_cached_files_to_check
.into_par_iter()
.map(|(size, vec_file_entry)| {
let mut hashmap_with_hash: BTreeMap<String, Vec<FileEntry>> = Default::default();
let mut errors: Vec<String> = Vec::new();
let mut buffer = [0u8; 1024 * 16];
atomic_counter.fetch_add(vec_file_entry.len(), Ordering::Relaxed);
for mut file_entry in vec_file_entry {
if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() {
check_was_stopped.store(true, Ordering::Relaxed);
return None;
}
match hash_calculation(&mut buffer, &file_entry, &check_type, u64::MAX) {
Ok(hash_string) => {
file_entry.hash = hash_string.clone();
hashmap_with_hash.entry(hash_string.clone()).or_default().push(file_entry);
}
Err(s) => errors.push(s),
let (loaded_hash_map, records_already_cached, non_cached_files_to_check) = self.full_hashing_load_cache_at_start(pre_checked_map);
debug!("Starting full hashing of {} files", non_cached_files_to_check.values().map(Vec::len).sum::<usize>());
let mut full_hash_results: Vec<(u64, BTreeMap<String, Vec<FileEntry>>, Vec<String>)> = non_cached_files_to_check
.into_par_iter()
.map(|(size, vec_file_entry)| {
let mut hashmap_with_hash: BTreeMap<String, Vec<FileEntry>> = Default::default();
let mut errors: Vec<String> = Vec::new();
let mut buffer = [0u8; 1024 * 16];
atomic_counter.fetch_add(vec_file_entry.len(), Ordering::Relaxed);
for mut file_entry in vec_file_entry {
if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() {
check_was_stopped.store(true, Ordering::Relaxed);
return None;
}
match hash_calculation(&mut buffer, &file_entry, &check_type, u64::MAX) {
Ok(hash_string) => {
file_entry.hash = hash_string.clone();
hashmap_with_hash.entry(hash_string.clone()).or_default().push(file_entry);
}
Err(s) => errors.push(s),
}
Some((size, hashmap_with_hash, errors))
})
.while_some()
.collect();
debug!("Finished full hashing");
}
Some((size, hashmap_with_hash, errors))
})
.while_some()
.collect();
debug!("Finished full hashing");
self.full_hashing_save_cache_at_exit(records_already_cached, &mut full_hash_results, loaded_hash_map);
self.full_hashing_save_cache_at_exit(records_already_cached, &mut full_hash_results, loaded_hash_map);
send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle);
send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle);
// Break if stop was clicked after saving to cache
if check_was_stopped.load(Ordering::Relaxed) {
return None;
}
// Break if stop was clicked after saving to cache
if check_was_stopped.load(Ordering::Relaxed) {
return None;
}
for (size, hash_map, mut errors) in full_hash_results {
self.common_data.text_messages.warnings.append(&mut errors);
for (_hash, vec_file_entry) in hash_map {
if vec_file_entry.len() > 1 {
self.files_with_identical_hashes.entry(size).or_default().push(vec_file_entry);
}
for (size, hash_map, mut errors) in full_hash_results {
self.common_data.text_messages.warnings.append(&mut errors);
for (_hash, vec_file_entry) in hash_map {
if vec_file_entry.len() > 1 {
self.files_with_identical_hashes.entry(size).or_default().push(vec_file_entry);
}
}
}
@ -803,8 +782,6 @@ impl DuplicateFinder {
}
}
/// The slowest checking type, which must be applied after checking for size
#[fun_time(message = "check_files_hash")]
fn check_files_hash(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&UnboundedSender<ProgressData>>) -> bool {
assert_eq!(self.check_method, CheckingMethod::Hash);
@ -828,35 +805,33 @@ impl DuplicateFinder {
true
}
/// Function to delete files, from filed before `BTreeMap`
/// Using another function to delete files to avoid duplicates data
#[fun_time(message = "delete_files")]
fn delete_files(&mut self) {
if self.delete_method == DeleteMethod::None {
if self.common_data.delete_method == DeleteMethod::None {
return;
}
match self.check_method {
CheckingMethod::Name => {
for vector in self.files_with_identical_names.values() {
let _tuple: (u64, usize, usize) = delete_files(vector, &self.delete_method, &mut self.common_data.text_messages, self.dryrun);
let _tuple: (u64, usize, usize) = delete_files(vector, &self.common_data.delete_method, &mut self.common_data.text_messages, self.dryrun);
}
}
CheckingMethod::SizeName => {
for vector in self.files_with_identical_size_names.values() {
let _tuple: (u64, usize, usize) = delete_files(vector, &self.delete_method, &mut self.common_data.text_messages, self.dryrun);
let _tuple: (u64, usize, usize) = delete_files(vector, &self.common_data.delete_method, &mut self.common_data.text_messages, self.dryrun);
}
}
CheckingMethod::Hash => {
for vector_vectors in self.files_with_identical_hashes.values() {
for vector in vector_vectors {
let _tuple: (u64, usize, usize) = delete_files(vector, &self.delete_method, &mut self.common_data.text_messages, self.dryrun);
let _tuple: (u64, usize, usize) = delete_files(vector, &self.common_data.delete_method, &mut self.common_data.text_messages, self.dryrun);
}
}
}
CheckingMethod::Size => {
for vector in self.files_with_identical_size.values() {
let _tuple: (u64, usize, usize) = delete_files(vector, &self.delete_method, &mut self.common_data.text_messages, self.dryrun);
let _tuple: (u64, usize, usize) = delete_files(vector, &self.common_data.delete_method, &mut self.common_data.text_messages, self.dryrun);
}
}
_ => panic!(),
@ -921,10 +896,6 @@ impl DuplicateFinder {
self.check_method = check_method;
}
pub fn set_delete_method(&mut self, delete_method: DeleteMethod) {
self.delete_method = delete_method;
}
pub fn get_use_reference(&self) -> bool {
self.common_data.use_reference_folders
}
@ -953,7 +924,6 @@ impl Default for DuplicateFinder {
}
impl DebugPrint for DuplicateFinder {
/// Debugging printing - only available on debug build
fn debug_print(&self) {
if !cfg!(debug_assertions) {
return;
@ -987,7 +957,6 @@ impl DebugPrint for DuplicateFinder {
println!("Files list size - {}", self.files_with_identical_size.len());
println!("Hashed Files list size - {}", self.files_with_identical_hashes.len());
println!("Checking Method - {:?}", self.check_method);
println!("Delete Method - {:?}", self.delete_method);
self.debug_print_common();
println!("-----------------------------------------");
}
@ -1129,8 +1098,6 @@ impl SaveResults for DuplicateFinder {
}
impl PrintResults for DuplicateFinder {
/// Print information's about duplicated entries
/// Only needed for CLI
fn print_results(&self) {
let mut number_of_files: u64 = 0;
let mut number_of_groups: u64 = 0;
@ -1212,8 +1179,6 @@ impl PrintResults for DuplicateFinder {
}
}
/// Functions to remove slice(vector) of files with provided method
/// Returns size of removed elements, number of deleted and failed to delete files and modified warning list
fn delete_files(vector: &[FileEntry], delete_method: &DeleteMethod, text_messages: &mut Messages, dryrun: bool) -> (u64, usize, usize) {
assert!(vector.len() > 1, "Vector length must be bigger than 1(This should be done in previous steps).");
let mut gained_space: u64 = 0;

@ -21,7 +21,6 @@ pub struct EmptyFiles {
common_data: CommonToolData,
information: Info,
empty_files: Vec<FileEntry>,
delete_method: DeleteMethod,
}
impl CommonData for EmptyFiles {
@ -39,7 +38,6 @@ impl EmptyFiles {
common_data: CommonToolData::new(ToolType::EmptyFiles),
information: Info::default(),
empty_files: vec![],
delete_method: DeleteMethod::None,
}
}
@ -69,28 +67,27 @@ impl EmptyFiles {
.recursive_search(self.common_data.recursive_search)
.build()
.run();
debug!("check_files - collected files to check");
let res = match result {
match result {
DirTraversalResult::SuccessFiles { grouped_file_entries, warnings } => {
if let Some(empty_files) = grouped_file_entries.get(&()) {
self.empty_files = empty_files.clone();
}
self.empty_files = grouped_file_entries.into_values().flatten().collect();
self.information.number_of_empty_files = self.empty_files.len();
self.common_data.text_messages.warnings.extend(warnings);
debug!("Found {} empty files.", self.information.number_of_empty_files);
true
}
DirTraversalResult::SuccessFolders { .. } => {
unreachable!()
}
DirTraversalResult::Stopped => false,
};
res
}
}
#[fun_time(message = "delete_files")]
fn delete_files(&mut self) {
match self.delete_method {
match self.common_data.delete_method {
DeleteMethod::Delete => {
for file_entry in &self.empty_files {
if fs::remove_file(file_entry.path.clone()).is_err() {
@ -121,7 +118,6 @@ impl DebugPrint for EmptyFiles {
}
println!("---------------DEBUG PRINT---------------");
println!("Empty list size - {}", self.empty_files.len());
println!("Delete Method - {:?}", self.delete_method);
self.debug_print_common();
println!("-----------------------------------------");
}
@ -187,8 +183,4 @@ impl EmptyFiles {
pub const fn get_information(&self) -> &Info {
&self.information
}
pub fn set_delete_method(&mut self, delete_method: DeleteMethod) {
self.delete_method = delete_method;
}
}

@ -7,6 +7,7 @@ use std::path::PathBuf;
use crossbeam_channel::Receiver;
use fun_time::fun_time;
use futures::channel::mpsc::UnboundedSender;
use log::debug;
use crate::common_dir_traversal::{Collect, DirTraversalBuilder, DirTraversalResult, FolderEmptiness, FolderEntry, ProgressData, ToolType};
use crate::common_tool::{CommonData, CommonToolData};
@ -101,7 +102,7 @@ impl EmptyFolder {
}
self.common_data.text_messages.warnings.extend(warnings);
debug!("Found {} empty folders.", self.empty_folder_list.len());
true
}
DirTraversalResult::Stopped => false,
@ -191,6 +192,7 @@ impl SaveResults for EmptyFolder {
}
impl PrintResults for EmptyFolder {
#[fun_time(message = "print_results")]
fn print_results(&self) {
if !self.empty_folder_list.is_empty() {
println!("Found {} empty folders", self.empty_folder_list.len());

@ -21,7 +21,6 @@ pub struct InvalidSymlinks {
common_data: CommonToolData,
information: Info,
invalid_symlinks: Vec<FileEntry>,
delete_method: DeleteMethod,
}
impl InvalidSymlinks {
pub fn new() -> Self {
@ -29,7 +28,6 @@ impl InvalidSymlinks {
common_data: CommonToolData::new(ToolType::InvalidSymlinks),
information: Info::default(),
invalid_symlinks: vec![],
delete_method: DeleteMethod::None,
}
}
@ -58,14 +56,13 @@ impl InvalidSymlinks {
.recursive_search(self.common_data.recursive_search)
.build()
.run();
debug!("check_files - collected files");
match result {
DirTraversalResult::SuccessFiles { grouped_file_entries, warnings } => {
if let Some(((), invalid_symlinks)) = grouped_file_entries.into_iter().next() {
self.invalid_symlinks = invalid_symlinks;
}
self.invalid_symlinks = grouped_file_entries.into_values().flatten().collect();
self.information.number_of_invalid_symlinks = self.invalid_symlinks.len();
self.common_data.text_messages.warnings.extend(warnings);
debug!("Found {} invalid symlinks.", self.information.number_of_invalid_symlinks);
true
}
DirTraversalResult::SuccessFolders { .. } => unreachable!(),
@ -75,7 +72,7 @@ impl InvalidSymlinks {
#[fun_time(message = "delete_files")]
fn delete_files(&mut self) {
match self.delete_method {
match self.common_data.delete_method {
DeleteMethod::Delete => {
for file_entry in &self.invalid_symlinks {
if fs::remove_file(file_entry.path.clone()).is_err() {
@ -104,13 +101,13 @@ impl DebugPrint for InvalidSymlinks {
}
println!("---------------DEBUG PRINT---------------");
println!("Invalid symlinks list size - {}", self.invalid_symlinks.len());
println!("Delete Method - {:?}", self.delete_method);
self.debug_print_common();
println!("-----------------------------------------");
}
}
impl SaveResults for InvalidSymlinks {
#[fun_time(message = "save_results_to_file")]
fn save_results_to_file(&mut self, file_name: &str) -> bool {
let file_name: String = match file_name {
"" => "results.txt".to_string(),
@ -161,8 +158,6 @@ impl SaveResults for InvalidSymlinks {
}
impl PrintResults for InvalidSymlinks {
/// Print information's about duplicated entries
/// Only needed for CLI
fn print_results(&self) {
println!("Found {} invalid symlinks.\n", self.information.number_of_invalid_symlinks);
for file_entry in &self.invalid_symlinks {
@ -196,8 +191,4 @@ impl InvalidSymlinks {
pub const fn get_information(&self) -> &Info {
&self.information
}
pub fn set_delete_method(&mut self, delete_method: DeleteMethod) {
self.delete_method = delete_method;
}
}

@ -12,6 +12,8 @@ use anyhow::Context;
use crossbeam_channel::Receiver;
use fun_time::fun_time;
use futures::channel::mpsc::UnboundedSender;
use humansize::format_size;
use humansize::BINARY;
use lofty::{read_from, AudioFile, ItemKey, TaggedFileExt};
use log::debug;
use rayon::prelude::*;
@ -27,7 +29,7 @@ use symphonia::core::probe::Hint;
use crate::common::{create_crash_message, filter_reference_folders_generic, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads, AUDIO_FILES_EXTENSIONS};
use crate::common_cache::{get_similar_music_cache_file, load_cache_from_file_generalized_by_path, save_cache_to_file_generalized};
use crate::common_dir_traversal::{CheckingMethod, DirTraversalBuilder, DirTraversalResult, FileEntry, ProgressData, ToolType};
use crate::common_tool::{CommonData, CommonToolData, DeleteMethod};
use crate::common_tool::{CommonData, CommonToolData};
use crate::common_traits::*;
bitflags! {
@ -103,7 +105,6 @@ pub struct SameMusic {
music_entries: Vec<MusicEntry>,
duplicated_music_entries: Vec<Vec<MusicEntry>>,
duplicated_music_entries_referenced: Vec<(MusicEntry, Vec<MusicEntry>)>,
delete_method: DeleteMethod,
music_similarity: MusicSimilarity,
approximate_comparison: bool,
check_type: CheckingMethod,
@ -118,7 +119,6 @@ impl SameMusic {
common_data: CommonToolData::new(ToolType::SameMusic),
information: Info::default(),
music_entries: Vec::with_capacity(2048),
delete_method: DeleteMethod::None,
music_similarity: MusicSimilarity::NONE,
duplicated_music_entries: vec![],
music_to_check: Default::default(),
@ -195,15 +195,16 @@ impl SameMusic {
.max_stage(2)
.build()
.run();
match result {
DirTraversalResult::SuccessFiles { grouped_file_entries, warnings } => {
if let Some(music_to_check) = grouped_file_entries.get(&()) {
for fe in music_to_check {
self.music_to_check.insert(fe.path.to_string_lossy().to_string(), fe.to_music_entry());
}
}
self.music_to_check = grouped_file_entries
.into_values()
.flatten()
.map(|fe| (fe.path.to_string_lossy().to_string(), fe.to_music_entry()))
.collect();
self.common_data.text_messages.warnings.extend(warnings);
debug!("check_files - Found {} music files.", self.music_to_check.len());
true
}
DirTraversalResult::SuccessFolders { .. } => {
@ -226,6 +227,7 @@ impl SameMusic {
self.get_text_messages_mut().extend_with_another_messages(messages);
loaded_hash_map = loaded_items.unwrap_or_default();
debug!("load_cache - Starting to check for differences");
for (name, file_entry) in mem::take(&mut self.music_to_check) {
if let Some(cached_file_entry) = loaded_hash_map.get(&name) {
records_already_cached.insert(name.clone(), cached_file_entry.clone());
@ -233,6 +235,13 @@ impl SameMusic {
non_cached_files_to_check.insert(name, file_entry);
}
}
debug!(
"load_cache - completed diff between loaded and prechecked files, {}({}) - non cached, {}({}) - already cached",
non_cached_files_to_check.len(),
format_size(non_cached_files_to_check.values().map(|e| e.size).sum::<u64>(), BINARY),
records_already_cached.len(),
format_size(records_already_cached.values().map(|e| e.size).sum::<u64>(), BINARY),
);
} else {
loaded_hash_map = Default::default();
mem::swap(&mut self.music_to_check, &mut non_cached_files_to_check);
@ -264,7 +273,7 @@ impl SameMusic {
prepare_thread_handler_common(progress_sender, 1, 3, non_cached_files_to_check.len(), self.check_type, self.common_data.tool_type);
let configuration = &self.hash_preset_config;
// Clean for duplicate files
debug!("calculate_fingerprint - starting fingerprinting");
let mut vec_file_entry = non_cached_files_to_check
.into_par_iter()
.map(|(path, mut music_entry)| {
@ -285,6 +294,7 @@ impl SameMusic {
.filter(Option::is_some)
.map(Option::unwrap)
.collect::<Vec<_>>();
debug!("calculate_fingerprint - ended fingerprinting");
send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle);
@ -453,7 +463,6 @@ impl SameMusic {
let (progress_thread_handle, progress_thread_run, atomic_counter, check_was_stopped) =
prepare_thread_handler_common(progress_sender, 3, 3, groups_to_check, self.check_type, self.common_data.tool_type);
// TODO is ther a way to just run iterator and not collect any info?
if !self.duplicated_music_entries.is_empty() {
let _: Vec<_> = self
.duplicated_music_entries
@ -675,10 +684,6 @@ impl SameMusic {
&self.information
}
pub fn set_delete_method(&mut self, delete_method: DeleteMethod) {
self.delete_method = delete_method;
}
pub fn set_approximate_comparison(&mut self, approximate_comparison: bool) {
self.approximate_comparison = approximate_comparison;
}
@ -897,7 +902,6 @@ impl DebugPrint for SameMusic {
println!("---------------DEBUG PRINT---------------");
println!("Found files music - {}", self.music_entries.len());
println!("Found duplicated files music - {}", self.duplicated_music_entries.len());
println!("Delete Method - {:?}", self.delete_method);
self.debug_print_common();
println!("-----------------------------------------");
}

@ -273,6 +273,7 @@ impl SimilarImages {
self.get_text_messages_mut().extend_with_another_messages(messages);
loaded_hash_map = loaded_items.unwrap_or_default();
debug!("hash_images-load_cache - starting calculating diff");
for (name, file_entry) in mem::take(&mut self.images_to_check) {
if let Some(cached_file_entry) = loaded_hash_map.get(&name) {
records_already_cached.insert(name.clone(), cached_file_entry.clone());
@ -280,6 +281,13 @@ impl SimilarImages {
non_cached_files_to_check.insert(name, file_entry);
}
}
debug!(
"hash_images_load_cache - completed diff between loaded and prechecked files, {}({}) - non cached, {}({}) - already cached",
non_cached_files_to_check.len(),
format_size(non_cached_files_to_check.values().map(|e| e.size).sum::<u64>(), BINARY),
records_already_cached.len(),
format_size(records_already_cached.values().map(|e| e.size).sum::<u64>(), BINARY),
);
} else {
loaded_hash_map = Default::default();
mem::swap(&mut self.images_to_check, &mut non_cached_files_to_check);

@ -47,7 +47,6 @@ pub struct Temporary {
common_data: CommonToolData,
information: Info,
temporary_files: Vec<FileEntry>,
delete_method: DeleteMethod,
}
impl Temporary {
@ -55,7 +54,6 @@ impl Temporary {
Self {
common_data: CommonToolData::new(ToolType::TemporaryFiles),
information: Info::default(),
delete_method: DeleteMethod::None,
temporary_files: vec![],
}
}
@ -175,7 +173,7 @@ impl Temporary {
#[fun_time(message = "delete_files")]
fn delete_files(&mut self) {
match self.delete_method {
match self.common_data.delete_method {
DeleteMethod::Delete => {
let mut warnings = Vec::new();
for file_entry in &self.temporary_files {
@ -258,7 +256,6 @@ impl DebugPrint for Temporary {
}
println!("### Information's");
println!("Temporary list size - {}", self.temporary_files.len());
println!("Delete Method - {:?}", self.delete_method);
self.debug_print_common();
}
}
@ -280,8 +277,4 @@ impl Temporary {
pub const fn get_information(&self) -> &Info {
&self.information
}
pub fn set_delete_method(&mut self, delete_method: DeleteMethod) {
self.delete_method = delete_method;
}
}

Loading…
Cancel
Save