pull/1070/head
Rafał Mikrut 9 months ago
parent 4efc452e97
commit 38e5f24b5e

@ -1,3 +1,6 @@
## Version 6.1.0 - ?
## Version 6.0.0 - 11.06.2023r
- Add finding similar audio files by content - [#970](https://github.com/qarmin/czkawka/pull/970)
- Allow to find duplicates by name/size at once - [#956](https://github.com/qarmin/czkawka/pull/956)

@ -9,6 +9,7 @@ use std::sync::Arc;
use crossbeam_channel::Receiver;
use futures::channel::mpsc::UnboundedSender;
use log::{debug, info};
use mime_guess::get_mime_extensions;
use rayon::prelude::*;
@ -72,7 +73,15 @@ const WORKAROUNDS: &[(&str, &str)] = &[
("xml", "mum"),
("xml", "resx"),
("zip", "wmz"),
// Games specific extensions - cannot be used here common extensions like zip
("gz", "h3m"), // Heroes 3
("zip", "hashdb"), // Gog
("c2", "zip"), // King of the Dark Age
("c2", "bmp"), // King of the Dark Age
("c2", "avi"), // King of the Dark Age
("c2", "exe"), // King of the Dark Age
// Other
("der", "keystore"), // Godot/Android keystore
("exe", "pyd"), // Python/Mingw
("gz", "blend"), // Blender
("gz", "crate"), // Cargo
@ -81,6 +90,7 @@ const WORKAROUNDS: &[(&str, &str)] = &[
("html", "dtd"), // Mingw
("html", "ent"), // Mingw
("html", "md"), // Markdown
("html", "svelte"), // Svelte
("jpg", "jfif"), // Photo format
("m4v", "mp4"), // m4v and mp4 are interchangeable
("mobi", "azw3"), // Ebook format
@ -91,6 +101,7 @@ const WORKAROUNDS: &[(&str, &str)] = &[
("ods", "ots"), // Libreoffice
("odt", "ott"), // Libreoffice
("ogg", "ogv"), // Audio format
("pem", "key"), // curl, openssl
("pptx", "ppsx"), // Powerpoint
("sh", "bash"), // Linux
("sh", "guess"), // GNU
@ -109,6 +120,7 @@ const WORKAROUNDS: &[(&str, &str)] = &[
("xml", "dae"), // 3D models
("xml", "docbook"), //
("xml", "fb2"), //
("xml", "filters"), // Visual studio
("xml", "gir"), // GTK
("xml", "glade"), // Glade
("xml", "iml"), // Intelij Idea
@ -211,6 +223,7 @@ impl BadExtensions {
}
pub fn find_bad_extensions_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&UnboundedSender<ProgressData>>) {
info!("Starting finding extensions files");
self.directories.optimize_directories(self.recursive_search, &mut self.text_messages);
if !self.check_files(stop_receiver, progress_sender) {
self.stopped_search = true;
@ -286,6 +299,7 @@ impl BadExtensions {
}
fn check_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&UnboundedSender<ProgressData>>) -> bool {
debug!("check_files - start");
let result = DirTraversalBuilder::new()
.root_dirs(self.directories.included_directories.clone())
.group_by(|_fe| ())
@ -299,7 +313,8 @@ impl BadExtensions {
.recursive_search(self.recursive_search)
.build()
.run();
match result {
debug!("check_files - collected files");
let res = match result {
DirTraversalResult::SuccessFiles { grouped_file_entries, warnings } => {
if let Some(files_to_check) = grouped_file_entries.get(&()) {
self.files_to_check = files_to_check.clone();
@ -312,10 +327,13 @@ impl BadExtensions {
unreachable!()
}
DirTraversalResult::Stopped => false,
}
};
debug!("check_files - end");
res
}
fn look_for_bad_extensions_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&UnboundedSender<ProgressData>>) -> bool {
debug!("look_for_bad_extensions_files - start");
let (progress_thread_handle, progress_thread_run, atomic_counter, check_was_stopped) =
prepare_thread_handler_common(progress_sender, 1, 1, self.files_to_check.len(), CheckingMethod::None, self.tool_type);
@ -344,6 +362,7 @@ impl BadExtensions {
// Clean unused data
self.files_to_check = Default::default();
debug!("look_for_bad_extensions_files - end");
true
}
@ -355,7 +374,8 @@ impl BadExtensions {
check_was_stopped: &AtomicBool,
hashmap_workarounds: &HashMap<&str, Vec<&str>>,
) -> Vec<BadFileEntry> {
files_to_check
debug!("verify_extensions - start");
let res = files_to_check
.into_par_iter()
.map(|file_entry| {
atomic_counter.fetch_add(1, Ordering::Relaxed);
@ -404,7 +424,9 @@ impl BadExtensions {
.while_some()
.filter(Option::is_some)
.map(Option::unwrap)
.collect::<Vec<_>>()
.collect::<Vec<_>>();
debug!("verify_extensions - end");
res
}
fn get_and_validate_extension(&self, file_entry: &FileEntry, proper_extension: &str) -> Option<String> {

@ -10,6 +10,7 @@ use std::sync::Arc;
use crossbeam_channel::Receiver;
use futures::channel::mpsc::UnboundedSender;
use humansize::{format_size, BINARY};
use log::{debug, info};
use rayon::prelude::*;
use crate::common::{check_folder_children, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads, split_path};
@ -88,6 +89,7 @@ impl BigFile {
}
pub fn find_big_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&UnboundedSender<ProgressData>>) {
info!("Starting finding big files");
self.optimize_directories();
if !self.look_for_big_files(stop_receiver, progress_sender) {
self.stopped_search = true;
@ -141,6 +143,7 @@ impl BigFile {
}
fn look_for_big_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&UnboundedSender<ProgressData>>) -> bool {
debug!("look_for_big_files - start");
let mut folders_to_check: Vec<PathBuf> = Vec::with_capacity(1024 * 2); // This should be small enough too not see to big difference and big enough to store most of paths without needing to resize vector
let mut old_map: BTreeMap<u64, Vec<FileEntry>> = Default::default();
@ -210,6 +213,7 @@ impl BigFile {
self.extract_n_biggest_files(old_map);
debug!("look_for_big_files - end");
true
}
@ -251,6 +255,7 @@ impl BigFile {
}
pub fn extract_n_biggest_files(&mut self, old_map: BTreeMap<u64, Vec<FileEntry>>) {
debug!("extract_n_biggest_files - start");
let iter: Box<dyn Iterator<Item = _>>;
if self.search_mode == SearchMode::SmallestFiles {
iter = Box::new(old_map.into_iter());
@ -278,6 +283,7 @@ impl BigFile {
break;
}
}
debug!("extract_n_biggest_files - end");
}
pub fn set_number_of_files_to_check(&mut self, number_of_files_to_check: usize) {

@ -10,6 +10,7 @@ use std::{fs, mem, panic};
use crossbeam_channel::Receiver;
use futures::channel::mpsc::UnboundedSender;
use log::{debug, info};
use pdf::file::FileOptions;
use pdf::object::ParseOptions;
use pdf::PdfError;
@ -119,6 +120,7 @@ impl BrokenFiles {
}
pub fn find_broken_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&UnboundedSender<ProgressData>>) {
info!("Starting finding broken files");
self.directories.optimize_directories(self.recursive_search, &mut self.text_messages);
if !self.check_files(stop_receiver, progress_sender) {
self.stopped_search = true;
@ -195,6 +197,7 @@ impl BrokenFiles {
}
fn check_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&UnboundedSender<ProgressData>>) -> bool {
debug!("check_files - start");
let mut folders_to_check: Vec<PathBuf> = Vec::with_capacity(1024 * 2); // This should be small enough too not see to big difference and big enough to store most of paths without needing to resize vector
// Add root folders for finding
@ -263,6 +266,7 @@ impl BrokenFiles {
send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle);
debug!("check_files - end");
true
}
fn get_file_entry(
@ -406,7 +410,8 @@ impl BrokenFiles {
}
}
fn look_for_broken_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&UnboundedSender<ProgressData>>) -> bool {
fn load_cache(&mut self) -> (BTreeMap<String, FileEntry>, BTreeMap<String, FileEntry>, BTreeMap<String, FileEntry>) {
debug!("load_cache - start (using cache {})", self.use_cache);
let loaded_hash_map;
let mut records_already_cached: BTreeMap<String, FileEntry> = Default::default();
@ -440,6 +445,13 @@ impl BrokenFiles {
loaded_hash_map = Default::default();
non_cached_files_to_check = files_to_check;
}
debug!("load_cache - end");
(loaded_hash_map, records_already_cached, non_cached_files_to_check)
}
fn look_for_broken_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&UnboundedSender<ProgressData>>) -> bool {
debug!("look_for_broken_files - start");
let (loaded_hash_map, records_already_cached, non_cached_files_to_check) = self.load_cache();
let (progress_thread_handle, progress_thread_run, atomic_counter, _check_was_stopped) =
prepare_thread_handler_common(progress_sender, 1, 1, non_cached_files_to_check.len(), CheckingMethod::None, self.tool_type);
@ -471,18 +483,7 @@ impl BrokenFiles {
// Just connect loaded results with already calculated
vec_file_entry.extend(records_already_cached.into_values());
if self.use_cache {
// Must save all results to file, old loaded from file with all currently counted results
let mut all_results: BTreeMap<String, FileEntry> = Default::default();
for file_entry in vec_file_entry.clone() {
all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry);
}
for (_name, file_entry) in loaded_hash_map {
all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry);
}
save_cache_to_file(&all_results, &mut self.text_messages, self.save_also_as_json);
}
self.save_to_cache(&vec_file_entry, loaded_hash_map);
self.broken_files = vec_file_entry
.into_par_iter()
@ -494,8 +495,25 @@ impl BrokenFiles {
// Clean unused data
self.files_to_check = Default::default();
debug!("look_for_broken_files - end");
true
}
fn save_to_cache(&mut self, vec_file_entry: &[FileEntry], loaded_hash_map: BTreeMap<String, FileEntry>) {
debug!("save_to_cache - start, using cache {}", self.use_cache);
if self.use_cache {
// Must save all results to file, old loaded from file with all currently counted results
let mut all_results: BTreeMap<String, FileEntry> = Default::default();
for file_entry in vec_file_entry.iter().cloned() {
all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry);
}
for (_name, file_entry) in loaded_hash_map {
all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry);
}
save_cache_to_file(&all_results, &mut self.text_messages, self.save_also_as_json);
}
debug!("save_to_cache - end");
}
/// Function to delete files, from filed Vector
fn delete_files(&mut self) {

@ -17,7 +17,7 @@ use image::{DynamicImage, ImageBuffer, Rgb};
use imagepipe::{ImageSource, Pipeline};
#[cfg(feature = "heif")]
use libheif_rs::{ColorSpace, HeifContext, RgbChroma};
use log::{LevelFilter, Record};
use log::{debug, LevelFilter, Record};
// #[cfg(feature = "heif")]
// use libheif_rs::LibHeif;
@ -39,7 +39,7 @@ pub fn get_number_of_threads() -> usize {
fn filtering_messages(record: &Record) -> bool {
if let Some(module_path) = record.module_path() {
!module_path.contains("i18n_embed")
!["symphonia", "i18n_embed"].iter().any(|&x| module_path.contains(x))
} else {
true
}
@ -441,8 +441,10 @@ pub fn prepare_thread_handler_common(
}
pub fn send_info_and_wait_for_ending_all_threads(progress_thread_run: &Arc<AtomicBool>, progress_thread_handle: JoinHandle<()>) {
debug!("Sending info to stop all threads");
progress_thread_run.store(false, Ordering::Relaxed);
progress_thread_handle.join().unwrap();
debug!("All threads stopped");
}
#[cfg(test)]

@ -15,7 +15,7 @@ use std::{fs, mem};
use crossbeam_channel::Receiver;
use futures::channel::mpsc::UnboundedSender;
use humansize::{format_size, BINARY};
use log::debug;
use log::{debug, info};
use rayon::prelude::*;
use xxhash_rust::xxh3::Xxh3;
@ -150,6 +150,7 @@ impl DuplicateFinder {
}
pub fn find_duplicates(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&UnboundedSender<ProgressData>>) {
info!("Starting finding duplicates");
self.directories.optimize_directories(self.recursive_search, &mut self.text_messages);
self.use_reference_folders = !self.directories.reference_directories.is_empty();
@ -341,6 +342,7 @@ impl DuplicateFinder {
}
fn check_files_name(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&UnboundedSender<ProgressData>>) -> bool {
debug!("check_files_name - starting checking for same names");
let group_by_func = if self.case_sensitive_name_comparison {
|fe: &FileEntry| fe.path.file_name().unwrap().to_string_lossy().to_string()
} else {
@ -361,7 +363,8 @@ impl DuplicateFinder {
.maximal_file_size(self.maximal_file_size)
.build()
.run();
match result {
debug!("check_files_name - after finding file sizes");
let res = match result {
DirTraversalResult::SuccessFiles { grouped_file_entries, warnings } => {
self.files_with_identical_names = grouped_file_entries;
self.text_messages.warnings.extend(warnings);
@ -403,7 +406,9 @@ impl DuplicateFinder {
unreachable!()
}
DirTraversalResult::Stopped => false,
}
};
debug!("check_files_name - finished checking for same names");
res
}
fn calculate_name_stats(&mut self) {
@ -421,6 +426,7 @@ impl DuplicateFinder {
}
fn check_files_size_name(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&UnboundedSender<ProgressData>>) -> bool {
debug!("check_files_size_name - starting checking for same size and name");
let group_by_func = if self.case_sensitive_name_comparison {
|fe: &FileEntry| (fe.size, fe.path.file_name().unwrap().to_string_lossy().to_string())
} else {
@ -441,7 +447,8 @@ impl DuplicateFinder {
.maximal_file_size(self.maximal_file_size)
.build()
.run();
match result {
debug!("check_files_size_name - after finding file sizes");
let res = match result {
DirTraversalResult::SuccessFiles { grouped_file_entries, warnings } => {
self.files_with_identical_size_names = grouped_file_entries;
self.text_messages.warnings.extend(warnings);
@ -484,7 +491,9 @@ impl DuplicateFinder {
unreachable!()
}
DirTraversalResult::Stopped => false,
}
};
debug!("check_files_size_name - finished checking for same size and name");
res
}
fn calculate_size_name_stats(&mut self) {
@ -528,7 +537,7 @@ impl DuplicateFinder {
.build()
.run();
debug!("check_file_size - after finding file sizes");
match result {
let res = match result {
DirTraversalResult::SuccessFiles { grouped_file_entries, warnings } => {
self.files_with_identical_size = grouped_file_entries;
self.text_messages.warnings.extend(warnings);
@ -551,14 +560,15 @@ impl DuplicateFinder {
self.filter_reference_folders_by_size();
self.calculate_size_stats();
debug!("check_file_size - after calculating size stats/duplicates");
true
}
DirTraversalResult::SuccessFolders { .. } => {
unreachable!()
}
DirTraversalResult::Stopped => false,
}
};
debug!("check_file_size - after calculating size stats/duplicates");
res
}
fn calculate_size_stats(&mut self) {
@ -639,19 +649,18 @@ impl DuplicateFinder {
}
}
}
debug!("prehash_load_cache_at_start - using prehash cache end");
} else {
debug!("prehash_load_cache_at_start - not using prehash cache start");
loaded_hash_map = Default::default();
mem::swap(&mut self.files_with_identical_size, &mut non_cached_files_to_check);
debug!("prehash_load_cache_at_start - not using prehash cache end");
}
debug!("prehash_load_cache_at_start - end");
(loaded_hash_map, records_already_cached, non_cached_files_to_check)
}
fn prehash_save_cache_at_exit(&mut self, loaded_hash_map: BTreeMap<u64, Vec<FileEntry>>, pre_hash_results: &Vec<(u64, BTreeMap<String, Vec<FileEntry>>, Vec<String>)>) {
debug!("prehash_save_cache_at_exit - start - using prehash cache {}", self.use_prehash_cache);
if self.use_prehash_cache {
debug!("prehash_save_cache_at_exit - saving prehash cache start");
// All results = records already cached + computed results
let mut save_cache_to_hashmap: BTreeMap<String, FileEntry> = Default::default();
@ -723,9 +732,7 @@ impl DuplicateFinder {
.while_some()
.collect();
debug!("prehashing - start sending info to progress thread");
send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle);
debug!("prehashing - got info about progress thread end");
// Check if user aborted search(only from GUI)
if check_was_stopped.load(Ordering::Relaxed) {
@ -893,9 +900,7 @@ impl DuplicateFinder {
self.full_hashing_save_cache_at_exit(records_already_cached, &mut full_hash_results, loaded_hash_map);
debug!("full_hashing - starting sending info to progress thread");
send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle);
debug!("full_hashing - after sending info to progress thread");
// Break if stop was clicked after saving to cache
if check_was_stopped.load(Ordering::Relaxed) {

@ -6,6 +6,7 @@ use std::path::PathBuf;
use crossbeam_channel::Receiver;
use futures::channel::mpsc::UnboundedSender;
use log::{debug, info};
use crate::common_dir_traversal::{DirTraversalBuilder, DirTraversalResult, FileEntry, ProgressData, ToolType};
use crate::common_directory::Directories;
@ -67,6 +68,7 @@ impl EmptyFiles {
/// Finding empty files, save results to internal struct variables
pub fn find_empty_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&UnboundedSender<ProgressData>>) {
info!("Starting finding empty files");
self.directories.optimize_directories(self.recursive_search, &mut self.text_messages);
if !self.check_files(stop_receiver, progress_sender) {
self.stopped_search = true;
@ -128,6 +130,7 @@ impl EmptyFiles {
/// Check files for any with size == 0
fn check_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&UnboundedSender<ProgressData>>) -> bool {
debug!("check_files - start");
let result = DirTraversalBuilder::new()
.root_dirs(self.directories.included_directories.clone())
.group_by(|_fe| ())
@ -141,7 +144,8 @@ impl EmptyFiles {
.recursive_search(self.recursive_search)
.build()
.run();
match result {
debug!("check_files - collected files to check");
let res = match result {
DirTraversalResult::SuccessFiles { grouped_file_entries, warnings } => {
if let Some(empty_files) = grouped_file_entries.get(&()) {
self.empty_files = empty_files.clone();
@ -155,7 +159,9 @@ impl EmptyFiles {
unreachable!()
}
DirTraversalResult::Stopped => false,
}
};
debug!("check_files - end");
res
}
/// Function to delete files, from filed Vector

@ -6,6 +6,7 @@ use std::path::PathBuf;
use crossbeam_channel::Receiver;
use futures::channel::mpsc::UnboundedSender;
use log::{debug, info};
use crate::common_dir_traversal::{Collect, DirTraversalBuilder, DirTraversalResult, FolderEmptiness, FolderEntry, ProgressData, ToolType};
use crate::common_directory::Directories;
@ -91,6 +92,7 @@ impl EmptyFolder {
}
/// Public function used by CLI to search for empty folders
pub fn find_empty_folders(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&UnboundedSender<ProgressData>>) {
info!("Starting finding empty folders");
self.directories.optimize_directories(true, &mut self.text_messages);
if !self.check_for_empty_folders(stop_receiver, progress_sender) {
self.stopped_search = true;
@ -131,6 +133,7 @@ impl EmptyFolder {
/// Function to check if folder are empty.
/// Parameter `initial_checking` for second check before deleting to be sure that checked folder is still empty
fn check_for_empty_folders(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&UnboundedSender<ProgressData>>) -> bool {
debug!("check_for_empty_folders - start");
let result = DirTraversalBuilder::new()
.root_dirs(self.directories.included_directories.clone())
.group_by(|_fe| ())
@ -142,7 +145,8 @@ impl EmptyFolder {
.max_stage(0)
.build()
.run();
match result {
debug!("check_for_empty_folders - collected folders to check");
let res = match result {
DirTraversalResult::SuccessFiles { .. } => {
unreachable!()
}
@ -160,7 +164,9 @@ impl EmptyFolder {
true
}
DirTraversalResult::Stopped => false,
}
};
debug!("check_for_empty_folders - end");
res
}
/// Deletes earlier found empty folders

@ -6,6 +6,7 @@ use std::path::PathBuf;
use crossbeam_channel::Receiver;
use futures::channel::mpsc::UnboundedSender;
use log::{debug, info};
use crate::common_dir_traversal::{Collect, DirTraversalBuilder, DirTraversalResult, ErrorType, FileEntry, ProgressData, ToolType};
use crate::common_directory::Directories;
@ -66,6 +67,7 @@ impl InvalidSymlinks {
}
pub fn find_invalid_links(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&UnboundedSender<ProgressData>>) {
info!("Starting finding invalid links");
self.directories.optimize_directories(self.recursive_search, &mut self.text_messages);
if !self.check_files(stop_receiver, progress_sender) {
self.stopped_search = true;
@ -127,6 +129,7 @@ impl InvalidSymlinks {
/// Check files for any with size == 0
fn check_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&UnboundedSender<ProgressData>>) -> bool {
debug!("check_files - start");
let result = DirTraversalBuilder::new()
.root_dirs(self.directories.included_directories.clone())
.group_by(|_fe| ())
@ -139,7 +142,8 @@ impl InvalidSymlinks {
.recursive_search(self.recursive_search)
.build()
.run();
match result {
debug!("check_files - collected files");
let res = match result {
DirTraversalResult::SuccessFiles { grouped_file_entries, warnings } => {
if let Some(((), invalid_symlinks)) = grouped_file_entries.into_iter().next() {
self.invalid_symlinks = invalid_symlinks;
@ -150,7 +154,9 @@ impl InvalidSymlinks {
}
DirTraversalResult::SuccessFolders { .. } => unreachable!(),
DirTraversalResult::Stopped => false,
}
};
debug!("check_files - end");
res
}
/// Function to delete files, from filed Vector

@ -12,6 +12,7 @@ use anyhow::Context;
use crossbeam_channel::Receiver;
use futures::channel::mpsc::UnboundedSender;
use lofty::{read_from, AudioFile, ItemKey, TaggedFileExt};
use log::{debug, info};
use rayon::prelude::*;
use rusty_chromaprint::{match_fingerprints, Configuration, Fingerprinter};
use serde::{Deserialize, Serialize};
@ -168,6 +169,7 @@ impl SameMusic {
}
pub fn find_same_music(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&UnboundedSender<ProgressData>>) {
info!("Starting finding same music");
self.directories.optimize_directories(self.recursive_search, &mut self.text_messages);
self.use_reference_folders = !self.directories.reference_directories.is_empty();
if !self.check_files(stop_receiver, progress_sender) {
@ -367,6 +369,7 @@ impl SameMusic {
}
fn load_cache(&mut self, checking_tags: bool) -> (HashMap<String, MusicEntry>, HashMap<String, MusicEntry>, HashMap<String, MusicEntry>) {
debug!("load_cache - start, using cache {}", self.use_cache);
let loaded_hash_map;
let mut records_already_cached: HashMap<String, MusicEntry> = Default::default();
@ -397,10 +400,12 @@ impl SameMusic {
loaded_hash_map = Default::default();
mem::swap(&mut self.music_to_check, &mut non_cached_files_to_check);
}
debug!("load_cache - end");
(loaded_hash_map, records_already_cached, non_cached_files_to_check)
}
fn save_cache(&mut self, vec_file_entry: Vec<MusicEntry>, loaded_hash_map: HashMap<String, MusicEntry>, checking_tags: bool) {
debug!("save_cache - start, using cache {}", self.use_cache);
if !self.use_cache {
return;
}
@ -411,9 +416,11 @@ impl SameMusic {
all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry);
}
save_cache_to_file(&all_results, &mut self.text_messages, self.save_also_as_json, checking_tags);
debug!("save_cache - end");
}
fn calculate_fingerprint(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&UnboundedSender<ProgressData>>) -> bool {
debug!("calculate_fingerprint - start");
let (loaded_hash_map, records_already_cached, non_cached_files_to_check) = self.load_cache(false);
let (progress_thread_handle, progress_thread_run, atomic_counter, check_was_stopped) =
@ -455,16 +462,18 @@ impl SameMusic {
if check_was_stopped.load(Ordering::Relaxed) {
return false;
}
debug!("calculate_fingerprint - end");
true
}
fn read_tags(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&UnboundedSender<ProgressData>>) -> bool {
debug!("read_tags - start");
let (loaded_hash_map, records_already_cached, non_cached_files_to_check) = self.load_cache(true);
let (progress_thread_handle, progress_thread_run, atomic_counter, check_was_stopped) =
prepare_thread_handler_common(progress_sender, 1, 2, non_cached_files_to_check.len(), self.check_type, self.tool_type);
debug!("read_tags - starting reading tags");
// Clean for duplicate files
let mut vec_file_entry = non_cached_files_to_check
.into_par_iter()
@ -484,6 +493,7 @@ impl SameMusic {
.filter(Option::is_some)
.map(Option::unwrap)
.collect::<Vec<_>>();
debug!("read_tags - ended reading tags");
send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle);
@ -499,10 +509,13 @@ impl SameMusic {
return false;
}
debug!("read_tags - end");
true
}
fn check_for_duplicate_tags(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&UnboundedSender<ProgressData>>) -> bool {
debug!("check_for_duplicate_tags - start");
let (progress_thread_handle, progress_thread_run, atomic_counter, _check_was_stopped) =
prepare_thread_handler_common(progress_sender, 2, 2, self.music_to_check.len(), self.check_type, self.tool_type);
@ -598,9 +611,12 @@ impl SameMusic {
// Clear unused data
self.music_entries.clear();
debug!("check_for_duplicate_tags - end");
true
}
fn read_tags_to_files_similar_by_content(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&UnboundedSender<ProgressData>>) -> bool {
debug!("read_tags_to_files_similar_by_content - start");
let groups_to_check = max(self.duplicated_music_entries.len(), self.duplicated_music_entries_referenced.len());
let (progress_thread_handle, progress_thread_run, atomic_counter, check_was_stopped) =
prepare_thread_handler_common(progress_sender, 3, 3, groups_to_check, self.check_type, self.tool_type);
@ -648,6 +664,7 @@ impl SameMusic {
send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle);
debug!("read_tags_to_files_similar_by_content - end");
!check_was_stopped.load(Ordering::Relaxed)
}
@ -674,6 +691,7 @@ impl SameMusic {
base_files: Vec<MusicEntry>,
files_to_compare: &[MusicEntry],
) -> Option<Vec<Vec<MusicEntry>>> {
debug!("compare_fingerprints - start");
let mut used_paths: HashSet<String> = Default::default();
let configuration = &self.hash_preset_config;
@ -722,10 +740,12 @@ impl SameMusic {
duplicated_music_entries.push(music_entries);
}
}
debug!("compare_fingerprints - end");
Some(duplicated_music_entries)
}
fn check_for_duplicate_fingerprints(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&UnboundedSender<ProgressData>>) -> bool {
debug!("check_for_duplicate_fingerprints - start");
let (base_files, files_to_compare) = self.split_fingerprints_to_check();
let (progress_thread_handle, progress_thread_run, atomic_counter, _check_was_stopped) =
prepare_thread_handler_common(progress_sender, 2, 3, base_files.len(), self.check_type, self.tool_type);
@ -758,6 +778,7 @@ impl SameMusic {
// Clear unused data
self.music_entries.clear();
debug!("check_for_duplicate_fingerprints - end");
true
}
@ -768,6 +789,7 @@ impl SameMusic {
get_item: fn(&MusicEntry) -> &str,
approximate_comparison: bool,
) -> Vec<Vec<MusicEntry>> {
debug!("check_music_item - start");
let mut new_duplicates: Vec<_> = Default::default();
let old_duplicates_len = old_duplicates.len();
for vec_file_entry in old_duplicates {
@ -789,6 +811,7 @@ impl SameMusic {
}
atomic_counter.fetch_add(old_duplicates_len, Ordering::Relaxed);
debug!("check_music_item - end");
new_duplicates
}

@ -12,6 +12,7 @@ use futures::channel::mpsc::UnboundedSender;
use humansize::{format_size, BINARY};
use image::GenericImageView;
use image_hasher::{FilterType, HashAlg, HasherConfig};
use log::{debug, info};
use rayon::prelude::*;
use serde::{Deserialize, Serialize};
@ -251,6 +252,7 @@ impl SimilarImages {
/// Public function used by CLI to search for empty folders
pub fn find_similar_images(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&UnboundedSender<ProgressData>>) {
info!("Starting finding similar images");
self.directories.optimize_directories(true, &mut self.text_messages);
self.use_reference_folders = !self.directories.reference_directories.is_empty();
if !self.check_for_similar_images(stop_receiver, progress_sender) {
@ -278,6 +280,7 @@ impl SimilarImages {
/// Function to check if folder are empty.
/// Parameter `initial_checking` for second check before deleting to be sure that checked folder is still empty
fn check_for_similar_images(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&UnboundedSender<ProgressData>>) -> bool {
debug!("check_for_similar_images - start");
let mut folders_to_check: Vec<PathBuf> = Vec::with_capacity(1024 * 2); // This should be small enough too not see to big difference and big enough to store most of paths without needing to resize vector
if !self.allowed_extensions.using_custom_extensions() {
@ -357,6 +360,7 @@ impl SimilarImages {
send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle);
debug!("check_for_similar_images - end");
true
}
@ -390,6 +394,7 @@ impl SimilarImages {
}
fn hash_images_load_cache(&mut self) -> (HashMap<String, FileEntry>, HashMap<String, FileEntry>, HashMap<String, FileEntry>) {
debug!("hash_images_load_cache - start, use cache: {}", self.use_cache);
let loaded_hash_map;
let mut records_already_cached: HashMap<String, FileEntry> = Default::default();
@ -420,6 +425,7 @@ impl SimilarImages {
loaded_hash_map = Default::default();
mem::swap(&mut self.images_to_check, &mut non_cached_files_to_check);
}
debug!("hash_images_load_cache - end");
(loaded_hash_map, records_already_cached, non_cached_files_to_check)
}
@ -431,11 +437,13 @@ impl SimilarImages {
// - Join all hashes and save it to file
fn hash_images(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&UnboundedSender<ProgressData>>) -> bool {
debug!("hash_images - start");
let (loaded_hash_map, records_already_cached, non_cached_files_to_check) = self.hash_images_load_cache();
let (progress_thread_handle, progress_thread_run, atomic_counter, check_was_stopped) =
prepare_thread_handler_common(progress_sender, 1, 2, non_cached_files_to_check.len(), CheckingMethod::None, self.tool_type);
debug!("hash_images - start hashing images");
let mut vec_file_entry: Vec<(FileEntry, ImHash)> = non_cached_files_to_check
.into_par_iter()
.map(|(_s, file_entry)| {
@ -450,6 +458,7 @@ impl SimilarImages {
.filter(Option::is_some)
.map(Option::unwrap)
.collect::<Vec<(FileEntry, ImHash)>>();
debug!("hash_images - end hashing images");
send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle);
@ -466,6 +475,20 @@ impl SimilarImages {
}
}
self.save_to_cache(vec_file_entry, loaded_hash_map);
// Break if stop was clicked after saving to cache
if check_was_stopped.load(Ordering::Relaxed) {
return false;
}
debug!("hash_images - end");
true
}
fn save_to_cache(&mut self, vec_file_entry: Vec<(FileEntry, ImHash)>, loaded_hash_map: HashMap<String, FileEntry>) {
debug!("save_to_cache - start, using cache: {}", self.use_cache);
if self.use_cache {
// Must save all results to file, old loaded from file with all currently counted results
let mut all_results: HashMap<String, FileEntry> = loaded_hash_map;
@ -481,14 +504,9 @@ impl SimilarImages {
self.image_filter,
);
}
// Break if stop was clicked after saving to cache
if check_was_stopped.load(Ordering::Relaxed) {
return false;
}
true
debug!("save_to_cache - end");
}
fn collect_image_file_entry(&self, mut file_entry: FileEntry) -> (FileEntry, ImHash) {
let file_name_lowercase = file_entry.path.to_string_lossy().to_lowercase();
@ -562,6 +580,7 @@ impl SimilarImages {
// Split hashes at 2 parts, base hashes and hashes to compare, 3 argument is set of hashes with multiple images
fn split_hashes(&mut self, all_hashed_images: &HashMap<ImHash, Vec<FileEntry>>) -> (Vec<ImHash>, HashSet<ImHash>) {
debug!("split_hashes - start");
let hashes_with_multiple_images: HashSet<ImHash> = all_hashed_images
.iter()
.filter_map(|(hash, vec_file_entry)| {
@ -599,6 +618,7 @@ impl SimilarImages {
}
base_hashes = all_hashed_images.keys().cloned().collect::<Vec<_>>();
}
debug!("split_hashes - end");
(base_hashes, hashes_with_multiple_images)
}
@ -610,6 +630,7 @@ impl SimilarImages {
collected_similar_images: &mut HashMap<ImHash, Vec<FileEntry>>,
hashes_similarity: HashMap<ImHash, (ImHash, u32)>,
) {
debug!("collect_hash_compare_result - start, use reference: {}", self.use_reference_folders);
if self.use_reference_folders {
// This is same step as without reference folders, but also checks if children are inside/outside reference directories, because may happen, that one file is inside reference folder and other outside
@ -659,6 +680,7 @@ impl SimilarImages {
collected_similar_images.get_mut(&parent_hash).unwrap().append(&mut vec_fe);
}
}
debug!("collect_hash_compare_result - end");
}
fn compare_hashes_with_non_zero_tolerance(
@ -669,6 +691,7 @@ impl SimilarImages {
stop_receiver: Option<&Receiver<()>>,
tolerance: u32,
) -> bool {
debug!("compare_hashes_with_non_zero_tolerance - start");
// Don't use hashes with multiple images in bktree, because they will always be master of group and cannot be find by other hashes
let (base_hashes, hashes_with_multiple_images) = self.split_hashes(all_hashed_images);
@ -733,6 +756,7 @@ impl SimilarImages {
debug_check_for_duplicated_things(self.use_reference_folders, &hashes_parents, &hashes_similarity, all_hashed_images, "LATTER");
self.collect_hash_compare_result(hashes_parents, &hashes_with_multiple_images, all_hashed_images, collected_similar_images, hashes_similarity);
debug!("compare_hashes_with_non_zero_tolerance - end");
true
}
@ -743,6 +767,7 @@ impl SimilarImages {
hashes_similarity: &mut HashMap<ImHash, (ImHash, u32)>,
hashes_with_multiple_images: &HashSet<ImHash>,
) {
debug!("connect_results - start");
for (original_hash, vec_compared_hashes) in partial_results {
let mut number_of_added_child_items = 0;
for (similarity, compared_hash) in vec_compared_hashes {
@ -795,9 +820,11 @@ impl SimilarImages {
hashes_parents.insert((*original_hash).clone(), number_of_added_child_items);
}
}
debug!("connect_results - end");
}
fn find_similar_hashes(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&UnboundedSender<ProgressData>>) -> bool {
debug!("find_similar_hashes - start");
if self.image_hashes.is_empty() {
return true;
}
@ -848,10 +875,12 @@ impl SimilarImages {
self.images_to_check = Default::default();
self.bktree = BKTree::new(Hamming);
debug!("find_similar_hashes - end");
true
}
fn exclude_items_with_same_size(&mut self) {
debug!("exclude_items_with_same_size - start, exclude: {}", self.exclude_images_with_same_size);
if self.exclude_images_with_same_size {
for vec_file_entry in mem::take(&mut self.similar_vectors) {
let mut bt_sizes: BTreeSet<u64> = Default::default();
@ -867,9 +896,11 @@ impl SimilarImages {
}
}
}
debug!("exclude_items_with_same_size - end");
}
fn remove_multiple_records_from_reference_folders(&mut self) {
debug!("remove_multiple_records_from_reference_folders - start, use reference: {}", self.use_reference_folders);
if self.use_reference_folders {
self.similar_referenced_vectors = mem::take(&mut self.similar_vectors)
.into_iter()
@ -885,6 +916,7 @@ impl SimilarImages {
})
.collect::<Vec<(FileEntry, Vec<FileEntry>)>>();
}
debug!("remove_multiple_records_from_reference_folders - end");
}
#[allow(dead_code)]

@ -9,6 +9,7 @@ use crossbeam_channel::Receiver;
use ffmpeg_cmdline_utils::FfmpegErrorKind::FfmpegNotFound;
use futures::channel::mpsc::UnboundedSender;
use humansize::{format_size, BINARY};
use log::{debug, info};
use rayon::prelude::*;
use serde::{Deserialize, Serialize};
use vid_dup_finder_lib::HashCreationErrorKind::DetermineVideo;
@ -210,6 +211,7 @@ impl SimilarVideos {
/// Public function used by CLI to search for empty folders
pub fn find_similar_videos(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&UnboundedSender<ProgressData>>) {
info!("Starting finding similar videos");
if !check_if_ffmpeg_is_installed() {
self.text_messages.errors.push(flc!("core_ffmpeg_not_found"));
#[cfg(target_os = "windows")]
@ -244,6 +246,7 @@ impl SimilarVideos {
/// Function to check if folder are empty.
/// Parameter `initial_checking` for second check before deleting to be sure that checked folder is still empty
fn check_for_similar_videos(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&UnboundedSender<ProgressData>>) -> bool {
debug!("check_for_similar_videos - start");
let mut folders_to_check: Vec<PathBuf> = Vec::with_capacity(1024 * 2); // This should be small enough too not see to big difference and big enough to store most of paths without needing to resize vector
if !self.allowed_extensions.using_custom_extensions() {
@ -320,6 +323,7 @@ impl SimilarVideos {
send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle);
debug!("check_for_similar_videos - end");
true
}
@ -353,6 +357,7 @@ impl SimilarVideos {
}
fn load_cache_at_start(&mut self) -> (BTreeMap<String, FileEntry>, BTreeMap<String, FileEntry>, BTreeMap<String, FileEntry>) {
debug!("load_cache_at_start - start, use cache: {}", self.use_cache);
let loaded_hash_map;
let mut records_already_cached: BTreeMap<String, FileEntry> = Default::default();
let mut non_cached_files_to_check: BTreeMap<String, FileEntry> = Default::default();
@ -382,10 +387,12 @@ impl SimilarVideos {
loaded_hash_map = Default::default();
mem::swap(&mut self.videos_to_check, &mut non_cached_files_to_check);
}
debug!("load_cache_at_start - end");
(loaded_hash_map, records_already_cached, non_cached_files_to_check)
}
fn sort_videos(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&UnboundedSender<ProgressData>>) -> bool {
debug!("sort_videos - start");
let (loaded_hash_map, records_already_cached, non_cached_files_to_check) = self.load_cache_at_start();
let (progress_thread_handle, progress_thread_run, atomic_counter, check_was_stopped) =
@ -435,14 +442,7 @@ impl SimilarVideos {
}
}
if self.use_cache {
// Must save all results to file, old loaded from file with all currently counted results
let mut all_results: BTreeMap<String, FileEntry> = loaded_hash_map;
for file_entry in vec_file_entry {
all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry);
}
save_hashes_to_file(&all_results, &mut self.text_messages, self.save_also_as_json);
}
self.save_cache(vec_file_entry, loaded_hash_map);
// Break if stop was clicked after saving to cache
if check_was_stopped.load(Ordering::Relaxed) {
@ -468,10 +468,24 @@ impl SimilarVideos {
self.videos_hashes = Default::default();
self.videos_to_check = Default::default();
debug!("sort_videos - end");
true
}
fn save_cache(&mut self, vec_file_entry: Vec<FileEntry>, loaded_hash_map: BTreeMap<String, FileEntry>) {
debug!("save_cache - start, use cache: {}", self.use_cache);
if self.use_cache {
// Must save all results to file, old loaded from file with all currently counted results
let mut all_results: BTreeMap<String, FileEntry> = loaded_hash_map;
for file_entry in vec_file_entry {
all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry);
}
save_hashes_to_file(&all_results, &mut self.text_messages, self.save_also_as_json);
}
debug!("save_cache - end");
}
fn match_groups_of_videos(&mut self, vector_of_hashes: Vec<VideoHash>, hashmap_with_file_entries: &HashMap<String, FileEntry>) {
debug!("match_groups_of_videos - start");
let match_group = vid_dup_finder_lib::search(vector_of_hashes, NormalizedTolerance::new(self.tolerance as f64 / 100.0f64));
let mut collected_similar_videos: Vec<Vec<FileEntry>> = Default::default();
for i in match_group {
@ -494,9 +508,11 @@ impl SimilarVideos {
}
self.similar_vectors = collected_similar_videos;
debug!("match_groups_of_videos - end");
}
fn remove_from_reference_folders(&mut self) {
debug!("remove_from_reference_folders - start, use reference folders: {}", self.use_reference_folders);
if self.use_reference_folders {
self.similar_referenced_vectors = mem::take(&mut self.similar_vectors)
.into_iter()
@ -512,6 +528,7 @@ impl SimilarVideos {
})
.collect::<Vec<(FileEntry, Vec<FileEntry>)>>();
}
debug!("remove_from_reference_folders - end");
}
/// Set included dir which needs to be relative, exists etc.

@ -8,6 +8,7 @@ use std::sync::Arc;
use crossbeam_channel::Receiver;
use futures::channel::mpsc::UnboundedSender;
use log::{debug, info};
use rayon::prelude::*;
use crate::common::{check_folder_children, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads};
@ -89,6 +90,7 @@ impl Temporary {
/// Finding temporary files, save results to internal struct variables
pub fn find_temporary_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&UnboundedSender<ProgressData>>) {
info!("Starting finding temporary files");
self.directories.optimize_directories(self.recursive_search, &mut self.text_messages);
if !self.check_files(stop_receiver, progress_sender) {
self.stopped_search = true;
@ -144,6 +146,7 @@ impl Temporary {
}
fn check_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&UnboundedSender<ProgressData>>) -> bool {
debug!("check_files - start");
let mut folders_to_check: Vec<PathBuf> = Vec::with_capacity(1024 * 2); // This should be small enough too not see to big difference and big enough to store most of paths without needing to resize vector
// Add root folders for finding
@ -213,6 +216,7 @@ impl Temporary {
send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle);
self.information.number_of_temporary_files = self.temporary_files.len();
debug!("check_files - end");
true
}
pub fn get_file_entry(

Loading…
Cancel
Save