@ -6,7 +6,7 @@ use std::io::prelude::*;
use std ::io ::{ self , Error , ErrorKind } ;
#[ cfg(target_family = " unix " ) ]
use std ::os ::unix ::fs ::MetadataExt ;
use std ::path ::Path ;
use std ::path ::{ Path , PathBuf } ;
use std ::sync ::atomic ::Ordering ;
use std ::{ fs , mem } ;
@ -15,6 +15,7 @@ use fun_time::fun_time;
use humansize ::{ format_size , BINARY } ;
use log ::debug ;
use rayon ::prelude ::* ;
use serde ::{ Deserialize , Serialize } ;
use xxhash_rust ::xxh3 ::Xxh3 ;
use crate ::common ::{ check_if_stop_received , delete_files_custom , prepare_thread_handler_common , send_info_and_wait_for_ending_all_threads } ;
@ -44,6 +45,36 @@ impl HashType {
}
}
#[ derive(Clone, Serialize, Deserialize, Debug, Default) ]
pub struct DuplicateEntry {
pub path : PathBuf ,
pub modified_date : u64 ,
pub size : u64 ,
pub hash : String ,
}
impl ResultEntry for DuplicateEntry {
fn get_path ( & self ) -> & Path {
& self . path
}
fn get_modified_date ( & self ) -> u64 {
self . modified_date
}
fn get_size ( & self ) -> u64 {
self . size
}
}
impl FileEntry {
fn into_duplicate_entry ( self ) -> DuplicateEntry {
DuplicateEntry {
size : self . size ,
path : self . path ,
modified_date : self . modified_date ,
hash : String ::new ( ) ,
}
}
}
#[ derive(Default) ]
pub struct Info {
pub number_of_groups_by_size : usize ,
@ -62,21 +93,21 @@ pub struct DuplicateFinder {
common_data : CommonToolData ,
information : Info ,
// File Size, File Entry
files_with_identical_names : BTreeMap < String , Vec < Fil eEntry> > ,
files_with_identical_names : BTreeMap < String , Vec < Duplicat eEntry> > ,
// File (Size, Name), File Entry
files_with_identical_size_names : BTreeMap < ( u64 , String ) , Vec < Fil eEntry> > ,
files_with_identical_size_names : BTreeMap < ( u64 , String ) , Vec < Duplicat eEntry> > ,
// File Size, File Entry
files_with_identical_size : BTreeMap < u64 , Vec < Fil eEntry> > ,
files_with_identical_size : BTreeMap < u64 , Vec < Duplicat eEntry> > ,
// File Size, next grouped by file size, next grouped by hash
files_with_identical_hashes : BTreeMap < u64 , Vec < Vec < Fil eEntry> > > ,
files_with_identical_hashes : BTreeMap < u64 , Vec < Vec < Duplicat eEntry> > > ,
// File Size, File Entry
files_with_identical_names_referenced : BTreeMap < String , ( FileEntry, Vec < Fil eEntry> ) > ,
files_with_identical_names_referenced : BTreeMap < String , ( DuplicateEntry, Vec < Duplicat eEntry> ) > ,
// File (Size, Name), File Entry
files_with_identical_size_names_referenced : BTreeMap < ( u64 , String ) , ( FileEntry, Vec < Fil eEntry> ) > ,
files_with_identical_size_names_referenced : BTreeMap < ( u64 , String ) , ( DuplicateEntry, Vec < Duplicat eEntry> ) > ,
// File Size, File Entry
files_with_identical_size_referenced : BTreeMap < u64 , ( FileEntry, Vec < Fil eEntry> ) > ,
files_with_identical_size_referenced : BTreeMap < u64 , ( DuplicateEntry, Vec < Duplicat eEntry> ) > ,
// File Size, next grouped by file size, next grouped by hash
files_with_identical_hashes_referenced : BTreeMap < u64 , Vec < ( FileEntry, Vec < Fil eEntry> ) > > ,
files_with_identical_hashes_referenced : BTreeMap < u64 , Vec < ( DuplicateEntry, Vec < Duplicat eEntry> ) > > ,
check_method : CheckingMethod ,
hash_type : HashType ,
ignore_hard_links : bool ,
@ -158,18 +189,11 @@ impl DuplicateFinder {
} ;
let result = DirTraversalBuilder ::new ( )
. root_dirs( self . common_data . directories . included_directories . clone ( ) )
. common_data( & self . common_data )
. group_by ( group_by_func )
. stop_receiver ( stop_receiver )
. progress_sender ( progress_sender )
. checking_method ( CheckingMethod ::Name )
. directories ( self . common_data . directories . clone ( ) )
. allowed_extensions ( self . common_data . allowed_extensions . clone ( ) )
. excluded_items ( self . common_data . excluded_items . clone ( ) )
. recursive_search ( self . common_data . recursive_search )
. minimal_file_size ( self . common_data . minimal_file_size )
. maximal_file_size ( self . common_data . maximal_file_size )
. tool_type ( self . common_data . tool_type )
. build ( )
. run ( ) ;
@ -178,7 +202,16 @@ impl DuplicateFinder {
self . common_data . text_messages . warnings . extend ( warnings ) ;
// Create new BTreeMap without single size entries(files have not duplicates)
self . files_with_identical_names = grouped_file_entries . into_iter ( ) . filter ( | ( _name , vector ) | vector . len ( ) > 1 ) . collect ( ) ;
self . files_with_identical_names = grouped_file_entries
. into_iter ( )
. filter_map ( | ( name , vector ) | {
if vector . len ( ) > 1 {
Some ( ( name , vector . into_iter ( ) . map ( FileEntry ::into_duplicate_entry ) . collect ( ) ) )
} else {
None
}
} )
. collect ( ) ;
// Reference - only use in size, because later hash will be counted differently
if self . common_data . use_reference_folders {
@ -195,7 +228,7 @@ impl DuplicateFinder {
Some ( ( files_from_referenced_folders . pop ( ) . unwrap ( ) , normal_files ) )
}
} )
. collect ::< Vec < ( FileEntry, Vec < Fil eEntry> ) > > ( ) ;
. collect ::< Vec < ( DuplicateEntry, Vec < Duplicat eEntry> ) > > ( ) ;
for ( fe , vec_fe ) in vec {
self . files_with_identical_names_referenced . insert ( fe . path . to_string_lossy ( ) . to_string ( ) , ( fe , vec_fe ) ) ;
}
@ -231,18 +264,11 @@ impl DuplicateFinder {
} ;
let result = DirTraversalBuilder ::new ( )
. root_dirs( self . common_data . directories . included_directories . clone ( ) )
. common_data( & self . common_data )
. group_by ( group_by_func )
. stop_receiver ( stop_receiver )
. progress_sender ( progress_sender )
. checking_method ( CheckingMethod ::Name )
. directories ( self . common_data . directories . clone ( ) )
. allowed_extensions ( self . common_data . allowed_extensions . clone ( ) )
. excluded_items ( self . common_data . excluded_items . clone ( ) )
. recursive_search ( self . common_data . recursive_search )
. minimal_file_size ( self . common_data . minimal_file_size )
. maximal_file_size ( self . common_data . maximal_file_size )
. tool_type ( self . common_data . tool_type )
. build ( )
. run ( ) ;
@ -250,7 +276,16 @@ impl DuplicateFinder {
DirTraversalResult ::SuccessFiles { grouped_file_entries , warnings } = > {
self . common_data . text_messages . warnings . extend ( warnings ) ;
self . files_with_identical_size_names = grouped_file_entries . into_iter ( ) . filter ( | ( _name , vector ) | vector . len ( ) > 1 ) . collect ( ) ;
self . files_with_identical_size_names = grouped_file_entries
. into_iter ( )
. filter_map ( | ( size_name , vector ) | {
if vector . len ( ) > 1 {
Some ( ( size_name , vector . into_iter ( ) . map ( FileEntry ::into_duplicate_entry ) . collect ( ) ) )
} else {
None
}
} )
. collect ( ) ;
// Reference - only use in size, because later hash will be counted differently
if self . common_data . use_reference_folders {
@ -267,7 +302,7 @@ impl DuplicateFinder {
Some ( ( files_from_referenced_folders . pop ( ) . unwrap ( ) , normal_files ) )
}
} )
. collect ::< Vec < ( FileEntry, Vec < Fil eEntry> ) > > ( ) ;
. collect ::< Vec < ( DuplicateEntry, Vec < Duplicat eEntry> ) > > ( ) ;
for ( fe , vec_fe ) in vec {
self . files_with_identical_size_names_referenced
. insert ( ( fe . size , fe . path . to_string_lossy ( ) . to_string ( ) ) , ( fe , vec_fe ) ) ;
@ -306,19 +341,12 @@ impl DuplicateFinder {
_ = > panic! ( ) ,
} ;
let result = DirTraversalBuilder ::new ( )
. root_dirs( self . common_data . directories . included_directories . clone ( ) )
. common_data( & self . common_data )
. group_by ( | fe | fe . size )
. stop_receiver ( stop_receiver )
. progress_sender ( progress_sender )
. checking_method ( self . check_method )
. max_stage ( max_stage )
. directories ( self . common_data . directories . clone ( ) )
. allowed_extensions ( self . common_data . allowed_extensions . clone ( ) )
. excluded_items ( self . common_data . excluded_items . clone ( ) )
. recursive_search ( self . common_data . recursive_search )
. minimal_file_size ( self . common_data . minimal_file_size )
. maximal_file_size ( self . common_data . maximal_file_size )
. tool_type ( self . common_data . tool_type )
. build ( )
. run ( ) ;
@ -334,7 +362,8 @@ impl DuplicateFinder {
let vector = if self . ignore_hard_links { filter_hard_links ( & vec ) } else { vec } ;
if vector . len ( ) > 1 {
self . files_with_identical_size . insert ( size , vector ) ;
self . files_with_identical_size
. insert ( size , vector . into_iter ( ) . map ( FileEntry ::into_duplicate_entry ) . collect ( ) ) ;
}
}
@ -388,7 +417,7 @@ impl DuplicateFinder {
Some ( ( files_from_referenced_folders . pop ( ) . unwrap ( ) , normal_files ) )
}
} )
. collect ::< Vec < ( FileEntry, Vec < Fil eEntry> ) > > ( ) ;
. collect ::< Vec < ( DuplicateEntry, Vec < Duplicat eEntry> ) > > ( ) ;
for ( fe , vec_fe ) in vec {
self . files_with_identical_size_referenced . insert ( fe . size , ( fe , vec_fe ) ) ;
}
@ -396,17 +425,17 @@ impl DuplicateFinder {
}
#[ fun_time(message = " prehash_load_cache_at_start " , level = " debug " ) ]
fn prehash_load_cache_at_start ( & mut self ) -> ( BTreeMap < u64 , Vec < Fil eEntry> > , BTreeMap < u64 , Vec < Fil eEntry> > , BTreeMap < u64 , Vec < Fil eEntry> > ) {
fn prehash_load_cache_at_start ( & mut self ) -> ( BTreeMap < u64 , Vec < Duplicat eEntry> > , BTreeMap < u64 , Vec < Duplicat eEntry> > , BTreeMap < u64 , Vec < Duplicat eEntry> > ) {
// Cache algorithm
// - Load data from cache
// - Convert from BT<u64,Vec< FileEntry>> to BT<String,Fil eEntry>
// - Convert from BT<u64,Vec< DuplicateEntry>> to BT<String,Duplicat eEntry>
// - Save to proper values
let loaded_hash_map ;
let mut records_already_cached : BTreeMap < u64 , Vec < Fil eEntry> > = Default ::default ( ) ;
let mut non_cached_files_to_check : BTreeMap < u64 , Vec < Fil eEntry> > = Default ::default ( ) ;
let mut records_already_cached : BTreeMap < u64 , Vec < Duplicat eEntry> > = Default ::default ( ) ;
let mut non_cached_files_to_check : BTreeMap < u64 , Vec < Duplicat eEntry> > = Default ::default ( ) ;
if self . use_prehash_cache {
let ( messages , loaded_items ) = load_cache_from_file_generalized_by_size ::< Fil eEntry> (
let ( messages , loaded_items ) = load_cache_from_file_generalized_by_size ::< Duplicat eEntry> (
& get_duplicate_cache_file ( & self . hash_type , true ) ,
self . get_delete_outdated_cache ( ) ,
& self . files_with_identical_size ,
@ -418,7 +447,7 @@ impl DuplicateFinder {
for ( size , mut vec_file_entry ) in mem ::take ( & mut self . files_with_identical_size ) {
if let Some ( cached_vec_file_entry ) = loaded_hash_map . get ( & size ) {
// TODO maybe hashmap is not needed when using < 4 elements
let mut cached_path_entries : HashMap < & Path , Fil eEntry> = HashMap ::new ( ) ;
let mut cached_path_entries : HashMap < & Path , Duplicat eEntry> = HashMap ::new ( ) ;
for file_entry in cached_vec_file_entry {
cached_path_entries . insert ( & file_entry . path , file_entry . clone ( ) ) ;
}
@ -449,10 +478,14 @@ impl DuplicateFinder {
}
#[ fun_time(message = " prehash_save_cache_at_exit " , level = " debug " ) ]
fn prehash_save_cache_at_exit ( & mut self , loaded_hash_map : BTreeMap < u64 , Vec < FileEntry > > , pre_hash_results : & Vec < ( u64 , BTreeMap < String , Vec < FileEntry > > , Vec < String > ) > ) {
fn prehash_save_cache_at_exit (
& mut self ,
loaded_hash_map : BTreeMap < u64 , Vec < DuplicateEntry > > ,
pre_hash_results : & Vec < ( u64 , BTreeMap < String , Vec < DuplicateEntry > > , Vec < String > ) > ,
) {
if self . use_prehash_cache {
// All results = records already cached + computed results
let mut save_cache_to_hashmap : BTreeMap < String , FileEntry > = Default ::default ( ) ;
let mut save_cache_to_hashmap : BTreeMap < String , Duplicat eEntry> = Default ::default ( ) ;
for ( size , vec_file_entry ) in loaded_hash_map {
if size > = self . minimal_prehash_cache_file_size {
@ -487,7 +520,7 @@ impl DuplicateFinder {
& mut self ,
stop_receiver : Option < & Receiver < ( ) > > ,
progress_sender : Option < & Sender < ProgressData > > ,
pre_checked_map : & mut BTreeMap < u64 , Vec < Fil eEntry> > ,
pre_checked_map : & mut BTreeMap < u64 , Vec < Duplicat eEntry> > ,
) -> Option < ( ) > {
let check_type = self . hash_type ;
let ( progress_thread_handle , progress_thread_run , _atomic_counter , _check_was_stopped ) =
@ -510,10 +543,10 @@ impl DuplicateFinder {
debug ! ( "Starting calculating prehash" ) ;
#[ allow(clippy::type_complexity) ]
let pre_hash_results : Vec < ( u64 , BTreeMap < String , Vec < Fil eEntry> > , Vec < String > ) > = non_cached_files_to_check
let pre_hash_results : Vec < ( u64 , BTreeMap < String , Vec < Duplicat eEntry> > , Vec < String > ) > = non_cached_files_to_check
. into_par_iter ( )
. map ( | ( size , vec_file_entry ) | {
let mut hashmap_with_hash : BTreeMap < String , Vec < Fil eEntry> > = Default ::default ( ) ;
let mut hashmap_with_hash : BTreeMap < String , Vec < Duplicat eEntry> > = Default ::default ( ) ;
let mut errors : Vec < String > = Vec ::new ( ) ;
let mut buffer = [ 0 u8 ; 1024 * 32 ] ;
@ -526,7 +559,7 @@ impl DuplicateFinder {
match hash_calculation ( & mut buffer , & file_entry , & check_type , 0 ) {
Ok ( hash_string ) = > {
file_entry . hash = hash_string . clone ( ) ;
hashmap_with_hash . entry ( hash_string .clone ( ) ). or_default ( ) . push ( file_entry ) ;
hashmap_with_hash . entry ( hash_string ). or_default ( ) . push ( file_entry ) ;
}
Err ( s ) = > errors . push ( s ) ,
}
@ -573,16 +606,16 @@ impl DuplicateFinder {
#[ fun_time(message = " full_hashing_load_cache_at_start " , level = " debug " ) ]
fn full_hashing_load_cache_at_start (
& mut self ,
mut pre_checked_map : BTreeMap < u64 , Vec < Fil eEntry> > ,
) -> ( BTreeMap < u64 , Vec < Fil eEntry> > , BTreeMap < u64 , Vec < Fil eEntry> > , BTreeMap < u64 , Vec < Fil eEntry> > ) {
mut pre_checked_map : BTreeMap < u64 , Vec < Duplicat eEntry> > ,
) -> ( BTreeMap < u64 , Vec < Duplicat eEntry> > , BTreeMap < u64 , Vec < Duplicat eEntry> > , BTreeMap < u64 , Vec < Duplicat eEntry> > ) {
let loaded_hash_map ;
let mut records_already_cached : BTreeMap < u64 , Vec < Fil eEntry> > = Default ::default ( ) ;
let mut non_cached_files_to_check : BTreeMap < u64 , Vec < Fil eEntry> > = Default ::default ( ) ;
let mut records_already_cached : BTreeMap < u64 , Vec < Duplicat eEntry> > = Default ::default ( ) ;
let mut non_cached_files_to_check : BTreeMap < u64 , Vec < Duplicat eEntry> > = Default ::default ( ) ;
if self . common_data . use_cache {
debug ! ( "full_hashing_load_cache_at_start - using cache" ) ;
let ( messages , loaded_items ) =
load_cache_from_file_generalized_by_size ::< Fil eEntry> ( & get_duplicate_cache_file ( & self . hash_type , false ) , self . get_delete_outdated_cache ( ) , & pre_checked_map ) ;
load_cache_from_file_generalized_by_size ::< Duplicat eEntry> ( & get_duplicate_cache_file ( & self . hash_type , false ) , self . get_delete_outdated_cache ( ) , & pre_checked_map ) ;
self . get_text_messages_mut ( ) . extend_with_another_messages ( messages ) ;
loaded_hash_map = loaded_items . unwrap_or_default ( ) ;
@ -590,7 +623,7 @@ impl DuplicateFinder {
for ( size , mut vec_file_entry ) in pre_checked_map {
if let Some ( cached_vec_file_entry ) = loaded_hash_map . get ( & size ) {
// TODO maybe hashmap is not needed when using < 4 elements
let mut cached_path_entries : HashMap < & Path , Fil eEntry> = HashMap ::new ( ) ;
let mut cached_path_entries : HashMap < & Path , Duplicat eEntry> = HashMap ::new ( ) ;
for file_entry in cached_vec_file_entry {
cached_path_entries . insert ( & file_entry . path , file_entry . clone ( ) ) ;
}
@ -624,9 +657,9 @@ impl DuplicateFinder {
#[ fun_time(message = " full_hashing_save_cache_at_exit " , level = " debug " ) ]
fn full_hashing_save_cache_at_exit (
& mut self ,
records_already_cached : BTreeMap < u64 , Vec < Fil eEntry> > ,
full_hash_results : & mut Vec < ( u64 , BTreeMap < String , Vec < Fil eEntry> > , Vec < String > ) > ,
loaded_hash_map : BTreeMap < u64 , Vec < Fil eEntry> > ,
records_already_cached : BTreeMap < u64 , Vec < Duplicat eEntry> > ,
full_hash_results : & mut Vec < ( u64 , BTreeMap < String , Vec < Duplicat eEntry> > , Vec < String > ) > ,
loaded_hash_map : BTreeMap < u64 , Vec < Duplicat eEntry> > ,
) {
if ! self . common_data . use_cache {
return ;
@ -642,7 +675,7 @@ impl DuplicateFinder {
}
}
// Size doesn't exists add results to files
let mut temp_hashmap : BTreeMap < String , Vec < Fil eEntry> > = Default ::default ( ) ;
let mut temp_hashmap : BTreeMap < String , Vec < Duplicat eEntry> > = Default ::default ( ) ;
for file_entry in vec_file_entry {
temp_hashmap . entry ( file_entry . hash . clone ( ) ) . or_default ( ) . push ( file_entry ) ;
}
@ -650,7 +683,7 @@ impl DuplicateFinder {
}
// Must save all results to file, old loaded from file with all currently counted results
let mut all_results : BTreeMap < String , Fil eEntry> = Default ::default ( ) ;
let mut all_results : BTreeMap < String , Duplicat eEntry> = Default ::default ( ) ;
for ( _size , vec_file_entry ) in loaded_hash_map {
for file_entry in vec_file_entry {
all_results . insert ( file_entry . path . to_string_lossy ( ) . to_string ( ) , file_entry ) ;
@ -674,7 +707,12 @@ impl DuplicateFinder {
}
#[ fun_time(message = " full_hashing " , level = " debug " ) ]
fn full_hashing ( & mut self , stop_receiver : Option < & Receiver < ( ) > > , progress_sender : Option < & Sender < ProgressData > > , pre_checked_map : BTreeMap < u64 , Vec < FileEntry > > ) -> Option < ( ) > {
fn full_hashing (
& mut self ,
stop_receiver : Option < & Receiver < ( ) > > ,
progress_sender : Option < & Sender < ProgressData > > ,
pre_checked_map : BTreeMap < u64 , Vec < DuplicateEntry > > ,
) -> Option < ( ) > {
let ( progress_thread_handle , progress_thread_run , _atomic_counter , _check_was_stopped ) =
prepare_thread_handler_common ( progress_sender , 4 , MAX_STAGE , 0 , self . check_method , self . common_data . tool_type ) ;
@ -696,10 +734,10 @@ impl DuplicateFinder {
let check_type = self . hash_type ;
debug ! ( "Starting full hashing of {} files" , non_cached_files_to_check . values ( ) . map ( Vec ::len ) . sum ::< usize > ( ) ) ;
let mut full_hash_results : Vec < ( u64 , BTreeMap < String , Vec < Fil eEntry> > , Vec < String > ) > = non_cached_files_to_check
let mut full_hash_results : Vec < ( u64 , BTreeMap < String , Vec < Duplicat eEntry> > , Vec < String > ) > = non_cached_files_to_check
. into_par_iter ( )
. map ( | ( size , vec_file_entry ) | {
let mut hashmap_with_hash : BTreeMap < String , Vec < Fil eEntry> > = Default ::default ( ) ;
let mut hashmap_with_hash : BTreeMap < String , Vec < Duplicat eEntry> > = Default ::default ( ) ;
let mut errors : Vec < String > = Vec ::new ( ) ;
let mut buffer = [ 0 u8 ; 1024 * 16 ] ;
@ -769,7 +807,7 @@ impl DuplicateFinder {
Some ( all_results_with_same_size )
}
} )
. collect ::< Vec < Vec < ( FileEntry, Vec < Fil eEntry> ) > > > ( ) ;
. collect ::< Vec < Vec < ( DuplicateEntry, Vec < Duplicat eEntry> ) > > > ( ) ;
for vec_of_vec in vec {
self . files_with_identical_hashes_referenced . insert ( vec_of_vec [ 0 ] . 0. size , vec_of_vec ) ;
}
@ -798,7 +836,7 @@ impl DuplicateFinder {
fn check_files_hash ( & mut self , stop_receiver : Option < & Receiver < ( ) > > , progress_sender : Option < & Sender < ProgressData > > ) -> bool {
assert_eq! ( self . check_method , CheckingMethod ::Hash ) ;
let mut pre_checked_map : BTreeMap < u64 , Vec < Fil eEntry> > = Default ::default ( ) ;
let mut pre_checked_map : BTreeMap < u64 , Vec < Duplicat eEntry> > = Default ::default ( ) ;
let ret = self . prehashing ( stop_receiver , progress_sender , & mut pre_checked_map ) ;
if ret . is_none ( ) {
return false ;
@ -834,7 +872,7 @@ impl DuplicateFinder {
}
CheckingMethod ::Hash = > {
for vec_files in self . files_with_identical_hashes . values ( ) {
let vev : Vec < & Vec < Fil eEntry> > = vec_files . iter ( ) . collect ::< Vec < _ > > ( ) ;
let vev : Vec < & Vec < Duplicat eEntry> > = vec_files . iter ( ) . collect ::< Vec < _ > > ( ) ;
delete_files_custom ( & vev , & self . common_data . delete_method , & mut self . common_data . text_messages , self . common_data . dry_run ) ;
}
}
@ -864,7 +902,7 @@ impl DuplicateFinder {
self . minimal_prehash_cache_file_size = minimal_prehash_cache_file_size ;
}
pub const fn get_files_sorted_by_names ( & self ) -> & BTreeMap < String , Vec < Fil eEntry> > {
pub const fn get_files_sorted_by_names ( & self ) -> & BTreeMap < String , Vec < Duplicat eEntry> > {
& self . files_with_identical_names
}
@ -872,15 +910,15 @@ impl DuplicateFinder {
self . use_prehash_cache = use_prehash_cache ;
}
pub const fn get_files_sorted_by_size ( & self ) -> & BTreeMap < u64 , Vec < Fil eEntry> > {
pub const fn get_files_sorted_by_size ( & self ) -> & BTreeMap < u64 , Vec < Duplicat eEntry> > {
& self . files_with_identical_size
}
pub const fn get_files_sorted_by_size_name ( & self ) -> & BTreeMap < ( u64 , String ) , Vec < Fil eEntry> > {
pub const fn get_files_sorted_by_size_name ( & self ) -> & BTreeMap < ( u64 , String ) , Vec < Duplicat eEntry> > {
& self . files_with_identical_size_names
}
pub const fn get_files_sorted_by_hash ( & self ) -> & BTreeMap < u64 , Vec < Vec < Fil eEntry> > > {
pub const fn get_files_sorted_by_hash ( & self ) -> & BTreeMap < u64 , Vec < Vec < Duplicat eEntry> > > {
& self . files_with_identical_hashes
}
@ -908,19 +946,19 @@ impl DuplicateFinder {
self . common_data . use_reference_folders
}
pub fn get_files_with_identical_hashes_referenced ( & self ) -> & BTreeMap < u64 , Vec < ( FileEntry, Vec < Fil eEntry> ) > > {
pub fn get_files_with_identical_hashes_referenced ( & self ) -> & BTreeMap < u64 , Vec < ( DuplicateEntry, Vec < Duplicat eEntry> ) > > {
& self . files_with_identical_hashes_referenced
}
pub fn get_files_with_identical_name_referenced ( & self ) -> & BTreeMap < String , ( FileEntry, Vec < Fil eEntry> ) > {
pub fn get_files_with_identical_name_referenced ( & self ) -> & BTreeMap < String , ( DuplicateEntry, Vec < Duplicat eEntry> ) > {
& self . files_with_identical_names_referenced
}
pub fn get_files_with_identical_size_referenced ( & self ) -> & BTreeMap < u64 , ( FileEntry, Vec < Fil eEntry> ) > {
pub fn get_files_with_identical_size_referenced ( & self ) -> & BTreeMap < u64 , ( DuplicateEntry, Vec < Duplicat eEntry> ) > {
& self . files_with_identical_size_referenced
}
pub fn get_files_with_identical_size_names_referenced ( & self ) -> & BTreeMap < ( u64 , String ) , ( FileEntry, Vec < Fil eEntry> ) > {
pub fn get_files_with_identical_size_names_referenced ( & self ) -> & BTreeMap < ( u64 , String ) , ( DuplicateEntry, Vec < Duplicat eEntry> ) > {
& self . files_with_identical_size_names_referenced
}
}
@ -1216,7 +1254,7 @@ pub trait MyHasher {
fn finalize ( & self ) -> String ;
}
fn hash_calculation ( buffer : & mut [ u8 ] , file_entry : & Fil eEntry, hash_type : & HashType , limit : u64 ) -> Result < String , String > {
fn hash_calculation ( buffer : & mut [ u8 ] , file_entry : & Duplicat eEntry, hash_type : & HashType , limit : u64 ) -> Result < String , String > {
let mut file_handler = match File ::open ( & file_entry . path ) {
Ok ( t ) = > t ,
Err ( e ) = > return Err ( format! ( "Unable to check hash of file {:?}, reason {e}" , file_entry . path ) ) ,
@ -1376,7 +1414,7 @@ mod tests {
let src = dir . path ( ) . join ( "a" ) ;
let mut file = File ::create ( & src ) ? ;
file . write_all ( b" aa " ) ? ;
let e = Fil eEntry { path : src , .. Default ::default ( ) } ;
let e = Duplicat eEntry { path : src , .. Default ::default ( ) } ;
let r = hash_calculation ( & mut buf , & e , & HashType ::Blake3 , 0 ) . unwrap ( ) ;
assert! ( ! r . is_empty ( ) ) ;
Ok ( ( ) )
@ -1389,7 +1427,7 @@ mod tests {
let src = dir . path ( ) . join ( "a" ) ;
let mut file = File ::create ( & src ) ? ;
file . write_all ( b" aa " ) ? ;
let e = Fil eEntry { path : src , .. Default ::default ( ) } ;
let e = Duplicat eEntry { path : src , .. Default ::default ( ) } ;
let r1 = hash_calculation ( & mut buf , & e , & HashType ::Blake3 , 1 ) . unwrap ( ) ;
let r2 = hash_calculation ( & mut buf , & e , & HashType ::Blake3 , 2 ) . unwrap ( ) ;
let r3 = hash_calculation ( & mut buf , & e , & HashType ::Blake3 , u64 ::MAX ) . unwrap ( ) ;
@ -1403,7 +1441,7 @@ mod tests {
let dir = tempfile ::Builder ::new ( ) . tempdir ( ) ? ;
let mut buf = [ 0 u8 ; 1 < < 10 ] ;
let src = dir . path ( ) . join ( "a" ) ;
let e = Fil eEntry { path : src , .. Default ::default ( ) } ;
let e = Duplicat eEntry { path : src , .. Default ::default ( ) } ;
let r = hash_calculation ( & mut buf , & e , & HashType ::Blake3 , 0 ) . unwrap_err ( ) ;
assert! ( ! r . is_empty ( ) ) ;
Ok ( ( ) )