@ -1,22 +1,30 @@
use std ::collections ::{ BTreeMap , HashMap } ;
use std ::cmp ::max ;
use std ::collections ::{ BTreeMap , HashMap , HashSet } ;
use std ::fs ::File ;
use std ::io ::prelude ::* ;
use std ::io ::{ BufReader , BufWriter } ;
use std ::path ::{ Path , PathBuf } ;
use std ::sync ::atomic ::{ Atomic Bool, Atomic Usize, Ordering } ;
use std ::sync ::atomic ::{ Atomic Usize, Ordering } ;
use std ::sync ::Arc ;
use std ::time ::SystemTime ;
use std ::{ mem , panic } ;
use anyhow ::Context ;
use crossbeam_channel ::Receiver ;
use futures ::channel ::mpsc ::UnboundedSender ;
use lofty ::TaggedFileExt ;
use lofty ::{ read_from , AudioFile , ItemKey } ;
use rayon ::prelude ::* ;
use rusty_chromaprint ::{ match_fingerprints , Configuration , Fingerprinter } ;
use serde ::{ Deserialize , Serialize } ;
use symphonia ::core ::audio ::SampleBuffer ;
use symphonia ::core ::codecs ::{ DecoderOptions , CODEC_TYPE_NULL } ;
use symphonia ::core ::formats ::FormatOptions ;
use symphonia ::core ::io ::MediaSourceStream ;
use symphonia ::core ::meta ::MetadataOptions ;
use symphonia ::core ::probe ::Hint ;
use crate ::common ::{ create_crash_message , prepare_thread_handler_common , send_info_and_wait_for_ending_all_threads , AUDIO_FILES_EXTENSIONS } ;
use crate ::common ::{ open_cache_folder , Common } ;
use crate ::common ::{ filter_reference_folders_generic, open_cache_folder } ;
use crate ::common_dir_traversal ::{ CheckingMethod , DirTraversalBuilder , DirTraversalResult , FileEntry , ProgressData } ;
use crate ::common_directory ::Directories ;
use crate ::common_extensions ::Extensions ;
@ -30,12 +38,6 @@ pub enum DeleteMethod {
Delete ,
}
#[ derive(Eq, PartialEq, Clone, Debug, Copy) ]
pub enum AudioCheckMethod {
Tags ,
Content ,
}
bitflags ! {
#[ derive(PartialEq, Copy, Clone, Debug) ]
pub struct MusicSimilarity : u32 {
@ -56,6 +58,7 @@ pub struct MusicEntry {
pub path : PathBuf ,
pub modified_date : u64 ,
pub fingerprint : Vec < u32 > ,
pub track_title : String ,
pub track_artist : String ,
@ -65,6 +68,12 @@ pub struct MusicEntry {
pub bitrate : u32 ,
}
impl ResultEntry for MusicEntry {
fn get_path ( & self ) -> & Path {
& self . path
}
}
impl FileEntry {
fn to_music_entry ( & self ) -> MusicEntry {
MusicEntry {
@ -72,6 +81,7 @@ impl FileEntry {
path : self . path . clone ( ) ,
modified_date : self . modified_date ,
fingerprint : vec ! [ ] ,
track_title : String ::new ( ) ,
track_artist : String ::new ( ) ,
year : String ::new ( ) ,
@ -118,7 +128,10 @@ pub struct SameMusic {
delete_outdated_cache : bool , // TODO add this to GUI
use_reference_folders : bool ,
save_also_as_json : bool ,
check_type : AudioCheckMethod ,
check_type : CheckingMethod ,
hash_preset_config : Configuration ,
minimum_segment_duration : f32 ,
maximum_difference : f64 ,
}
impl SameMusic {
@ -145,7 +158,10 @@ impl SameMusic {
use_reference_folders : false ,
duplicated_music_entries_referenced : vec ! [ ] ,
save_also_as_json : false ,
check_type : AudioCheckMethod ::Tags ,
check_type : CheckingMethod ::AudioContent ,
hash_preset_config : Configuration ::preset_test1 ( ) , // TODO allow to change this
minimum_segment_duration : 10.0 ,
maximum_difference : 2.0 ,
}
}
@ -157,7 +173,7 @@ impl SameMusic {
return ;
}
match self . check_type {
Audio CheckMethod:: Tags = > {
Checking Method::Audio Tags = > {
if ! self . read_tags ( stop_receiver , progress_sender ) {
self . stopped_search = true ;
return ;
@ -167,9 +183,21 @@ impl SameMusic {
return ;
}
}
AudioCheckMethod ::Content = > {
unimplemented! ( ) ;
CheckingMethod ::AudioContent = > {
if ! self . calculate_fingerprint ( stop_receiver , progress_sender ) {
self . stopped_search = true ;
return ;
}
if ! self . check_for_duplicate_fingerprints ( stop_receiver , progress_sender ) {
self . stopped_search = true ;
return ;
}
if ! self . read_tags_to_files_similar_by_content ( stop_receiver , progress_sender ) {
self . stopped_search = true ;
return ;
}
}
_ = > panic! ( ) ,
}
self . delete_files ( ) ;
self . debug_print ( ) ;
@ -231,10 +259,27 @@ impl SameMusic {
self . directories . set_included_directory ( included_directory , & mut self . text_messages ) ;
}
pub fn set_maximum_difference ( & mut self , maximum_difference : f64 ) {
self . maximum_difference = maximum_difference ;
}
pub fn set_minimum_segment_duration ( & mut self , minimum_segment_duration : f32 ) {
self . minimum_segment_duration = minimum_segment_duration ;
}
pub fn set_reference_directory ( & mut self , reference_directory : Vec < PathBuf > ) {
self . directories . set_reference_directory ( reference_directory ) ;
}
pub fn set_check_type ( & mut self , check_type : CheckingMethod ) {
assert! ( [ CheckingMethod ::AudioTags , CheckingMethod ::AudioContent ] . contains ( & check_type ) ) ;
self . check_type = check_type ;
}
#[ must_use ]
pub fn get_check_type ( & self ) -> CheckingMethod {
self . check_type
}
pub fn set_excluded_directory ( & mut self , excluded_directory : Vec < PathBuf > ) {
self . directories . set_excluded_directory ( excluded_directory , & mut self . text_messages ) ;
}
@ -302,18 +347,14 @@ impl SameMusic {
. build ( )
. run ( ) ;
match result {
DirTraversalResult ::SuccessFiles {
start_time ,
grouped_file_entries ,
warnings ,
} = > {
DirTraversalResult ::SuccessFiles { grouped_file_entries , warnings } = > {
if let Some ( music_to_check ) = grouped_file_entries . get ( & ( ) ) {
for fe in music_to_check {
self . music_to_check . insert ( fe . path . to_string_lossy ( ) . to_string ( ) , fe . to_music_entry ( ) ) ;
}
}
self . text_messages . warnings . extend ( warnings ) ;
Common ::print_time ( start_time , SystemTime ::now ( ) , "check_files" ) ;
true
}
DirTraversalResult ::SuccessFolders { .. } = > {
@ -323,29 +364,31 @@ impl SameMusic {
}
}
fn read_tags_ load_cache( & mut self ) -> ( HashMap < String , MusicEntry > , HashMap < String , MusicEntry > , HashMap < String , MusicEntry > ) {
fn load_cache( & mut self , checking_tags : bool ) -> ( HashMap < String , MusicEntry > , HashMap < String , MusicEntry > , HashMap < String , MusicEntry > ) {
let loaded_hash_map ;
let mut records_already_cached : HashMap < String , MusicEntry > = Default ::default ( ) ;
let mut non_cached_files_to_check : HashMap < String , MusicEntry > = Default ::default ( ) ;
if self . use_cache {
loaded_hash_map = match load_cache_from_file ( & mut self . text_messages , self . delete_outdated_cache ) {
loaded_hash_map = match load_cache_from_file ( & mut self . text_messages , self . delete_outdated_cache , checking_tags ) {
Some ( t ) = > t ,
None = > Default ::default ( ) ,
} ;
for ( name , file_entry ) in & self . music_to_check {
#[ allow(clippy::if_same_then_else) ]
if ! loaded_hash_map . contains_key ( name ) {
// If loaded data doesn't contains current image info
non_cached_files_to_check . insert ( name . clone ( ) , file_entry . clone ( ) ) ;
} else if file_entry . size ! = loaded_hash_map . get ( name ) . unwrap ( ) . size | | file_entry . modified_date ! = loaded_hash_map . get ( name ) . unwrap ( ) . modified_date {
// When size or modification date of image changed, then it is clear that is different image
non_cached_files_to_check . insert ( name . clone ( ) , file_entry . clone ( ) ) ;
} else {
// Checking may be omitted when already there is entry with same size and modification date
records_already_cached . insert ( name . clone ( ) , loaded_hash_map . get ( name ) . unwrap ( ) . clone ( ) ) ;
let loaded_item = loaded_hash_map . get ( name ) . unwrap ( ) ;
if file_entry . size ! = loaded_item . size | | file_entry . modified_date ! = loaded_item . modified_date {
// When size or modification date of image changed, then it is clear that is different image
non_cached_files_to_check . insert ( name . clone ( ) , file_entry . clone ( ) ) ;
} else {
// Checking may be omitted when already there is entry with same size and modification date
records_already_cached . insert ( name . clone ( ) , loaded_item . clone ( ) ) ;
}
}
}
} else {
@ -355,7 +398,7 @@ impl SameMusic {
( loaded_hash_map , records_already_cached , non_cached_files_to_check )
}
fn read_tags_ save_cache( & mut self , vec_file_entry : Vec < MusicEntry > , loaded_hash_map : HashMap < String , MusicEntry > ) {
fn save_cache( & mut self , vec_file_entry : Vec < MusicEntry > , loaded_hash_map : HashMap < String , MusicEntry > , checking_tags : bool ) {
if ! self . use_cache {
return ;
}
@ -365,38 +408,32 @@ impl SameMusic {
for file_entry in vec_file_entry {
all_results . insert ( file_entry . path . to_string_lossy ( ) . to_string ( ) , file_entry ) ;
}
save_cache_to_file ( & all_results , & mut self . text_messages , self . save_also_as_json );
save_cache_to_file ( & all_results , & mut self . text_messages , self . save_also_as_json , checking_tags );
}
fn read_tags ( & mut self , stop_receiver : Option < & Receiver < ( ) > > , progress_sender : Option < & UnboundedSender < ProgressData > > ) -> bool {
let start_time : SystemTime = SystemTime ::now ( ) ;
let ( loaded_hash_map , records_already_cached , non_cached_files_to_check ) = self . read_tags_load_cache ( ) ;
fn calculate_fingerprint ( & mut self , stop_receiver : Option < & Receiver < ( ) > > , progress_sender : Option < & UnboundedSender < ProgressData > > ) -> bool {
let ( loaded_hash_map , records_already_cached , non_cached_files_to_check ) = self . load_cache ( false ) ;
let check_was_stopped = AtomicBool ::new ( false ) ; // Used for breaking from GUI and ending check thread
let progress_thread_run = Arc ::new ( AtomicBool ::new ( true ) ) ;
let atomic_counter = Arc ::new ( AtomicUsize ::new ( 0 ) ) ;
let progress_thread_handle = prepare_thread_handler_common (
progress_sender ,
& progress_thread_run ,
& atomic_counter ,
1 ,
2 ,
non_cached_files_to_check . len ( ) ,
CheckingMethod ::None ,
) ;
let ( progress_thread_handle , progress_thread_run , atomic_counter , check_was_stopped ) =
prepare_thread_handler_common ( progress_sender , 1 , 3 , non_cached_files_to_check . len ( ) , self . check_type ) ;
let configuration = & self . hash_preset_config ;
// Clean for duplicate files
let mut vec_file_entry = non_cached_files_to_check
. into_par_iter ( )
. map ( | ( path , music_entry ) | {
. map ( | ( path , mut music_entry ) | {
atomic_counter . fetch_add ( 1 , Ordering ::Relaxed ) ;
if stop_receiver . is_some ( ) & & stop_receiver . unwrap ( ) . try_recv ( ) . is_ok ( ) {
check_was_stopped . store ( true , Ordering ::Relaxed ) ;
return None ;
}
Some ( self . read_single_file_tag ( & path , music_entry ) )
let Ok ( fingerprint ) = calc_fingerprint_helper ( path , configuration ) else {
return Some ( None ) ;
} ;
music_entry . fingerprint = fingerprint ;
Some ( Some ( music_entry ) )
} )
. while_some ( )
. filter ( Option ::is_some )
@ -406,131 +443,66 @@ impl SameMusic {
send_info_and_wait_for_ending_all_threads ( & progress_thread_run , progress_thread_handle ) ;
// Just connect loaded results with already calculated
for ( _name , file_entry ) in records_already_cached {
vec_file_entry . push ( file_entry . clone ( ) ) ;
}
vec_file_entry . extend ( records_already_cached . into_values ( ) ) ;
self . music_entries = vec_file_entry . clone ( ) ;
self . read_tags_ save_cache( vec_file_entry , loaded_hash_map ) ;
self . save_cache( vec_file_entry , loaded_hash_map , false ) ;
// Break if stop was clicked after saving to cache
if check_was_stopped . load ( Ordering ::Relaxed ) {
return false ;
}
Common ::print_time ( start_time , SystemTime ::now ( ) , "read_tags" ) ;
true
}
fn read_single_file_tag ( & self , path : & str , mut music_entry : MusicEntry ) -> Option < MusicEntry > {
let Ok ( mut file ) = File ::open ( path ) else { return None ; } ;
let result = panic ::catch_unwind ( move | | {
match read_from ( & mut file ) {
Ok ( t ) = > Some ( t ) ,
Err ( _inspected ) = > {
// println!("Failed to open {}", path);
None
}
}
} ) ;
fn read_tags ( & mut self , stop_receiver : Option < & Receiver < ( ) > > , progress_sender : Option < & UnboundedSender < ProgressData > > ) -> bool {
let ( loaded_hash_map , records_already_cached , non_cached_files_to_check ) = self . load_cache ( true ) ;
let tagged_file = if let Ok ( t ) = result {
match t {
Some ( r ) = > r ,
None = > {
return Some ( music_entry ) ;
}
}
} else {
let message = create_crash_message ( "Lofty" , path , "https://github.com/image-rs/image/issues" ) ;
println! ( "{message}" ) ;
return None ;
} ;
let ( progress_thread_handle , progress_thread_run , atomic_counter , check_was_stopped ) =
prepare_thread_handler_common ( progress_sender , 1 , 2 , non_cached_files_to_check . len ( ) , self . check_type ) ;
let properties = tagged_file . properties ( ) ;
// Clean for duplicate files
let mut vec_file_entry = non_cached_files_to_check
. into_par_iter ( )
. map ( | ( path , mut music_entry ) | {
atomic_counter . fetch_add ( 1 , Ordering ::Relaxed ) ;
if stop_receiver . is_some ( ) & & stop_receiver . unwrap ( ) . try_recv ( ) . is_ok ( ) {
check_was_stopped . store ( true , Ordering ::Relaxed ) ;
return None ;
}
if read_single_file_tag ( & path , & mut music_entry ) {
Some ( Some ( music_entry ) )
} else {
Some ( None )
}
} )
. while_some ( )
. filter ( Option ::is_some )
. map ( Option ::unwrap )
. collect ::< Vec < _ > > ( ) ;
let mut track_title = String ::new ( ) ;
let mut track_artist = String ::new ( ) ;
let mut year = String ::new ( ) ;
let mut genre = String ::new ( ) ;
send_info_and_wait_for_ending_all_threads ( & progress_thread_run , progress_thread_handle ) ;
let bitrate = properties . audio_bitrate ( ) . unwrap_or ( 0 ) ;
let mut length = properties . duration ( ) . as_millis ( ) . to_string ( ) ;
// Just connect loaded results with already calculated
vec_file_entry . extend ( records_already_cached . into_values ( ) ) ;
if let Some ( tag ) = tagged_file . primary_tag ( ) {
track_title = tag . get_string ( & ItemKey ::TrackTitle ) . unwrap_or ( "" ) . to_string ( ) ;
track_artist = tag . get_string ( & ItemKey ::TrackArtist ) . unwrap_or ( "" ) . to_string ( ) ;
year = tag . get_string ( & ItemKey ::Year ) . unwrap_or ( "" ) . to_string ( ) ;
genre = tag . get_string ( & ItemKey ::Genre ) . unwrap_or ( "" ) . to_string ( ) ;
}
self . music_entries = vec_file_entry . clone ( ) ;
for tag in tagged_file . tags ( ) {
if track_title . is_empty ( ) {
if let Some ( tag_value ) = tag . get_string ( & ItemKey ::TrackTitle ) {
track_title = tag_value . to_string ( ) ;
}
}
if track_artist . is_empty ( ) {
if let Some ( tag_value ) = tag . get_string ( & ItemKey ::TrackArtist ) {
track_artist = tag_value . to_string ( ) ;
}
}
if year . is_empty ( ) {
if let Some ( tag_value ) = tag . get_string ( & ItemKey ::Year ) {
year = tag_value . to_string ( ) ;
}
}
if genre . is_empty ( ) {
if let Some ( tag_value ) = tag . get_string ( & ItemKey ::Genre ) {
genre = tag_value . to_string ( ) ;
}
}
// println!("{:?}", tag.items());
}
self . save_cache ( vec_file_entry , loaded_hash_map , true ) ;
if let Ok ( old_length_number ) = length . parse ::< u32 > ( ) {
let length_number = old_length_number / 60 ;
let minutes = length_number / 1000 ;
let seconds = ( length_number % 1000 ) * 6 / 100 ;
if minutes ! = 0 | | seconds ! = 0 {
length = format! ( "{minutes}:{seconds:02}" ) ;
} else if old_length_number > 0 {
// That means, that audio have length smaller that second, but length is properly read
length = "0:01" . to_string ( ) ;
} else {
length = String ::new ( ) ;
}
} else {
length = String ::new ( ) ;
// Break if stop was clicked after saving to cache
if check_was_stopped . load ( Ordering ::Relaxed ) {
return false ;
}
music_entry . track_title = track_title ;
music_entry . track_artist = track_artist ;
music_entry . year = year ;
music_entry . length = length ;
music_entry . genre = genre ;
music_entry . bitrate = bitrate ;
Some ( music_entry )
true
}
fn check_for_duplicate_tags ( & mut self , stop_receiver : Option < & Receiver < ( ) > > , progress_sender : Option < & UnboundedSender < ProgressData > > ) -> bool {
assert_ne! ( MusicSimilarity ::NONE , self . music_similarity , "This can't be none" ) ;
let start_time : SystemTime = SystemTime ::now ( ) ;
let progress_thread_run = Arc ::new ( AtomicBool ::new ( true ) ) ;
let atomic_counter = Arc ::new ( AtomicUsize ::new ( 0 ) ) ;
let progress_thread_handle = prepare_thread_handler_common (
progress_sender ,
& progress_thread_run ,
& atomic_counter ,
2 ,
2 ,
self . music_to_check . len ( ) ,
CheckingMethod ::None ,
) ;
let ( progress_thread_handle , progress_thread_run , atomic_counter , _check_was_stopped ) =
prepare_thread_handler_common ( progress_sender , 2 , 2 , self . music_to_check . len ( ) , self . check_type ) ;
let mut old_duplicates : Vec < Vec < MusicEntry > > = vec! [ self . music_entries . clone ( ) ] ;
let mut new_duplicates : Vec < Vec < MusicEntry > > = Vec ::new ( ) ;
@ -580,9 +552,8 @@ impl SameMusic {
send_info_and_wait_for_ending_all_threads ( & progress_thread_run , progress_thread_handle ) ;
return false ;
}
let old_duplicates_len = old_duplicates . len ( ) ;
for vec_file_entry in old_duplicates {
atomic_counter . fetch_add ( 1 , Ordering ::Relaxed ) ;
let mut hash_map : BTreeMap < String , Vec < MusicEntry > > = Default ::default ( ) ;
for file_entry in vec_file_entry {
if file_entry . bitrate ! = 0 {
@ -598,6 +569,7 @@ impl SameMusic {
}
}
}
atomic_counter . fetch_add ( old_duplicates_len , Ordering ::Relaxed ) ;
old_duplicates = new_duplicates ;
}
@ -605,7 +577,9 @@ impl SameMusic {
self . duplicated_music_entries = old_duplicates ;
self . filter_reference_folders ( ) ;
if self . use_reference_folders {
self . duplicated_music_entries_referenced = filter_reference_folders_generic ( mem ::take ( & mut self . duplicated_music_entries ) , & self . directories ) ;
}
if self . use_reference_folders {
for ( _fe , vector ) in & self . duplicated_music_entries_referenced {
@ -619,7 +593,165 @@ impl SameMusic {
}
}
Common ::print_time ( start_time , SystemTime ::now ( ) , "check_for_duplicate_tags" ) ;
// Clear unused data
self . music_entries . clear ( ) ;
true
}
fn read_tags_to_files_similar_by_content ( & mut self , stop_receiver : Option < & Receiver < ( ) > > , progress_sender : Option < & UnboundedSender < ProgressData > > ) -> bool {
let groups_to_check = max ( self . duplicated_music_entries . len ( ) , self . duplicated_music_entries_referenced . len ( ) ) ;
let ( progress_thread_handle , progress_thread_run , atomic_counter , check_was_stopped ) =
prepare_thread_handler_common ( progress_sender , 3 , 3 , groups_to_check , self . check_type ) ;
// TODO is ther a way to just run iterator and not collect any info?
if ! self . duplicated_music_entries . is_empty ( ) {
let _ : Vec < _ > = self
. duplicated_music_entries
. par_iter_mut ( )
. map ( | vec_me | {
atomic_counter . fetch_add ( 1 , Ordering ::Relaxed ) ;
if stop_receiver . is_some ( ) & & stop_receiver . unwrap ( ) . try_recv ( ) . is_ok ( ) {
check_was_stopped . store ( true , Ordering ::Relaxed ) ;
return None ;
}
for me in vec_me {
let me_path = me . path . to_string_lossy ( ) . to_string ( ) ;
read_single_file_tag ( & me_path , me ) ;
}
Some ( ( ) )
} )
. while_some ( )
. collect ( ) ;
} else {
let _ : Vec < _ > = self
. duplicated_music_entries_referenced
. par_iter_mut ( )
. map ( | ( me_o , vec_me ) | {
atomic_counter . fetch_add ( 1 , Ordering ::Relaxed ) ;
if stop_receiver . is_some ( ) & & stop_receiver . unwrap ( ) . try_recv ( ) . is_ok ( ) {
check_was_stopped . store ( true , Ordering ::Relaxed ) ;
return None ;
}
let me_o_path = me_o . path . to_string_lossy ( ) . to_string ( ) ;
read_single_file_tag ( & me_o_path , me_o ) ;
for me in vec_me {
let me_path = me . path . to_string_lossy ( ) . to_string ( ) ;
read_single_file_tag ( & me_path , me ) ;
}
Some ( ( ) )
} )
. while_some ( )
. collect ( ) ;
}
send_info_and_wait_for_ending_all_threads ( & progress_thread_run , progress_thread_handle ) ;
! check_was_stopped . load ( Ordering ::Relaxed )
}
fn split_fingerprints_to_check ( & mut self ) -> ( Vec < MusicEntry > , Vec < MusicEntry > ) {
let base_files : Vec < MusicEntry > ;
let files_to_compare : Vec < MusicEntry > ;
if self . use_reference_folders {
( base_files , files_to_compare ) = mem ::take ( & mut self . music_entries )
. into_iter ( )
. partition ( | f | self . directories . is_in_referenced_directory ( f . get_path ( ) ) ) ;
} else {
base_files = self . music_entries . clone ( ) ;
files_to_compare = mem ::take ( & mut self . music_entries ) ;
}
( base_files , files_to_compare )
}
fn compare_fingerprints (
& mut self ,
stop_receiver : Option < & Receiver < ( ) > > ,
atomic_counter : & Arc < AtomicUsize > ,
base_files : Vec < MusicEntry > ,
files_to_compare : & [ MusicEntry ] ,
) -> Option < Vec < Vec < MusicEntry > > > {
let mut used_paths : HashSet < String > = Default ::default ( ) ;
let configuration = & self . hash_preset_config ;
let minimum_segment_duration = self . minimum_segment_duration ;
let maximum_difference = self . maximum_difference ;
let mut duplicated_music_entries = Vec ::new ( ) ;
for f_entry in base_files {
atomic_counter . fetch_add ( 1 , Ordering ::Relaxed ) ;
if stop_receiver . is_some ( ) & & stop_receiver . unwrap ( ) . try_recv ( ) . is_ok ( ) {
return None ;
}
let f_string = f_entry . path . to_string_lossy ( ) . to_string ( ) ;
if used_paths . contains ( & f_string ) {
continue ;
}
let mut collected_similar_items = files_to_compare
. par_iter ( )
. filter_map ( | e_entry | {
let e_string = e_entry . path . to_string_lossy ( ) . to_string ( ) ;
if used_paths . contains ( & e_string ) | | e_string = = f_string {
return None ;
}
let mut segments = match_fingerprints ( & f_entry . fingerprint , & e_entry . fingerprint , configuration ) . unwrap ( ) ;
segments . retain ( | s | s . duration ( configuration ) > minimum_segment_duration & & s . score < maximum_difference ) ;
if segments . is_empty ( ) {
None
} else {
Some ( ( e_string , e_entry ) )
}
} )
. collect ::< Vec < _ > > ( ) ;
collected_similar_items . retain ( | ( path , _entry ) | ! used_paths . contains ( path ) ) ;
if ! collected_similar_items . is_empty ( ) {
let mut music_entries = Vec ::new ( ) ;
for ( path , entry ) in collected_similar_items {
used_paths . insert ( path ) ;
music_entries . push ( entry . clone ( ) ) ;
}
used_paths . insert ( f_string ) ;
music_entries . push ( f_entry . clone ( ) ) ;
duplicated_music_entries . push ( music_entries ) ;
}
}
Some ( duplicated_music_entries )
}
fn check_for_duplicate_fingerprints ( & mut self , stop_receiver : Option < & Receiver < ( ) > > , progress_sender : Option < & UnboundedSender < ProgressData > > ) -> bool {
let ( base_files , files_to_compare ) = self . split_fingerprints_to_check ( ) ;
let ( progress_thread_handle , progress_thread_run , atomic_counter , _check_was_stopped ) =
prepare_thread_handler_common ( progress_sender , 2 , 3 , base_files . len ( ) , self . check_type ) ;
let Some ( duplicated_music_entries ) = self . compare_fingerprints ( stop_receiver , & atomic_counter , base_files , & files_to_compare ) else {
send_info_and_wait_for_ending_all_threads ( & progress_thread_run , progress_thread_handle ) ;
return false ;
} ;
send_info_and_wait_for_ending_all_threads ( & progress_thread_run , progress_thread_handle ) ;
self . duplicated_music_entries = duplicated_music_entries ;
if self . use_reference_folders {
self . duplicated_music_entries_referenced = filter_reference_folders_generic ( mem ::take ( & mut self . duplicated_music_entries ) , & self . directories ) ;
}
if self . use_reference_folders {
for ( _fe , vector ) in & self . duplicated_music_entries_referenced {
self . information . number_of_duplicates + = vector . len ( ) ;
self . information . number_of_groups + = 1 ;
}
} else {
for vector in & self . duplicated_music_entries {
self . information . number_of_duplicates + = vector . len ( ) - 1 ;
self . information . number_of_groups + = 1 ;
}
}
// Clear unused data
self . music_entries . clear ( ) ;
@ -635,8 +767,8 @@ impl SameMusic {
approximate_comparison : bool ,
) -> Vec < Vec < MusicEntry > > {
let mut new_duplicates : Vec < _ > = Default ::default ( ) ;
let old_duplicates_len = old_duplicates . len ( ) ;
for vec_file_entry in old_duplicates {
atomic_counter . fetch_add ( 1 , Ordering ::Relaxed ) ;
let mut hash_map : BTreeMap < String , Vec < MusicEntry > > = Default ::default ( ) ;
for file_entry in vec_file_entry {
let mut thing = get_item ( & file_entry ) . trim ( ) . to_lowercase ( ) ;
@ -653,40 +785,11 @@ impl SameMusic {
}
}
}
atomic_counter . fetch_add ( old_duplicates_len , Ordering ::Relaxed ) ;
new_duplicates
}
fn filter_reference_folders ( & mut self ) {
if ! self . use_reference_folders {
return ;
}
let mut similar_vector = Default ::default ( ) ;
mem ::swap ( & mut self . duplicated_music_entries , & mut similar_vector ) ;
let reference_directories = self . directories . reference_directories . clone ( ) ;
self . duplicated_music_entries_referenced = similar_vector
. into_iter ( )
. filter_map ( | vec_file_entry | {
let mut files_from_referenced_folders = Vec ::new ( ) ;
let mut normal_files = Vec ::new ( ) ;
for file_entry in vec_file_entry {
if reference_directories . iter ( ) . any ( | e | file_entry . path . starts_with ( e ) ) {
files_from_referenced_folders . push ( file_entry ) ;
} else {
normal_files . push ( file_entry ) ;
}
}
if files_from_referenced_folders . is_empty ( ) | | normal_files . is_empty ( ) {
None
} else {
Some ( ( files_from_referenced_folders . pop ( ) . unwrap ( ) , normal_files ) )
}
} )
. collect ::< Vec < ( MusicEntry , Vec < MusicEntry > ) > > ( ) ;
}
pub fn set_minimal_file_size ( & mut self , minimal_file_size : u64 ) {
self . minimal_file_size = match minimal_file_size {
0 = > 1 ,
@ -696,7 +799,7 @@ impl SameMusic {
/// Function to delete files, from filed Vector
fn delete_files ( & mut self ) {
let start_time : SystemTime = SystemTime ::now ( ) ;
// TODO
// match self.delete_method {
// DeleteMethod::Delete => {
@ -710,13 +813,13 @@ impl SameMusic {
// //Just do nothing
// }
// }
Common ::print_time ( start_time , SystemTime ::now ( ) , "delete_files" ) ;
}
}
fn save_cache_to_file ( hashmap : & HashMap < String , MusicEntry > , text_messages : & mut Messages , save_also_as_json : bool ) {
if let Some ( ( ( file_handler , cache_file ) , ( file_handler_json , cache_file_json ) ) ) = open_cache_folder ( & get_cache_file ( ) , true , save_also_as_json , & mut text_messages . warnings ) {
fn save_cache_to_file ( hashmap : & HashMap < String , MusicEntry > , text_messages : & mut Messages , save_also_as_json : bool , checking_tags : bool ) {
if let Some ( ( ( file_handler , cache_file ) , ( file_handler_json , cache_file_json ) ) ) =
open_cache_folder ( get_cache_file ( checking_tags ) , true , save_also_as_json , & mut text_messages . warnings )
{
{
let writer = BufWriter ::new ( file_handler . unwrap ( ) ) ; // Unwrap because cannot fail here
if let Err ( e ) = bincode ::serialize_into ( writer , hashmap ) {
@ -742,8 +845,8 @@ fn save_cache_to_file(hashmap: &HashMap<String, MusicEntry>, text_messages: &mut
}
}
fn load_cache_from_file ( text_messages : & mut Messages , delete_outdated_cache : bool ) -> Option < HashMap < String , MusicEntry > > {
if let Some ( ( ( file_handler , cache_file ) , ( file_handler_json , cache_file_json ) ) ) = open_cache_folder ( & get_cache_file ( ) , false , true , & mut text_messages . warnings ) {
fn load_cache_from_file ( text_messages : & mut Messages , delete_outdated_cache : bool , checking_tags : bool ) -> Option < HashMap < String , MusicEntry > > {
if let Some ( ( ( file_handler , cache_file ) , ( file_handler_json , cache_file_json ) ) ) = open_cache_folder ( get_cache_file ( checking_tags ) , false , true , & mut text_messages . warnings ) {
let mut hashmap_loaded_entries : HashMap < String , MusicEntry > ;
if let Some ( file_handler ) = file_handler {
let reader = BufReader ::new ( file_handler ) ;
@ -781,8 +884,172 @@ fn load_cache_from_file(text_messages: &mut Messages, delete_outdated_cache: boo
None
}
fn get_cache_file ( ) -> String {
"cache_same_music.bin" . to_string ( )
// TODO this should be taken from rusty-chromaprint repo, not reimplemented here
fn calc_fingerprint_helper ( path : impl AsRef < Path > , config : & Configuration ) -> anyhow ::Result < Vec < u32 > > {
let path = path . as_ref ( ) ;
let src = File ::open ( path ) . context ( "failed to open file" ) ? ;
let mss = MediaSourceStream ::new ( Box ::new ( src ) , Default ::default ( ) ) ;
let mut hint = Hint ::new ( ) ;
if let Some ( ext ) = path . extension ( ) . and_then ( std ::ffi ::OsStr ::to_str ) {
hint . with_extension ( ext ) ;
}
let meta_opts : MetadataOptions = Default ::default ( ) ;
let fmt_opts : FormatOptions = Default ::default ( ) ;
let probed = symphonia ::default ::get_probe ( ) . format ( & hint , mss , & fmt_opts , & meta_opts ) . context ( "unsupported format" ) ? ;
let mut format = probed . format ;
let track = format
. tracks ( )
. iter ( )
. find ( | t | t . codec_params . codec ! = CODEC_TYPE_NULL )
. context ( "no supported audio tracks" ) ? ;
let dec_opts : DecoderOptions = Default ::default ( ) ;
let mut decoder = symphonia ::default ::get_codecs ( ) . make ( & track . codec_params , & dec_opts ) . context ( "unsupported codec" ) ? ;
let track_id = track . id ;
let mut printer = Fingerprinter ::new ( config ) ;
let sample_rate = track . codec_params . sample_rate . context ( "missing sample rate" ) ? ;
let channels = track . codec_params . channels . context ( "missing audio channels" ) ? . count ( ) as u32 ;
printer . start ( sample_rate , channels ) . context ( "initializing fingerprinter" ) ? ;
let mut sample_buf = None ;
loop {
let Ok ( packet ) = format . next_packet ( ) else { break } ;
if packet . track_id ( ) ! = track_id {
continue ;
}
match decoder . decode ( & packet ) {
Ok ( audio_buf ) = > {
if sample_buf . is_none ( ) {
let spec = * audio_buf . spec ( ) ;
let duration = audio_buf . capacity ( ) as u64 ;
sample_buf = Some ( SampleBuffer ::< i16 > ::new ( duration , spec ) ) ;
}
if let Some ( buf ) = & mut sample_buf {
buf . copy_interleaved_ref ( audio_buf ) ;
printer . consume ( buf . samples ( ) ) ;
}
}
Err ( symphonia ::core ::errors ::Error ::DecodeError ( _ ) ) = > ( ) ,
Err ( _ ) = > break ,
}
}
printer . finish ( ) ;
Ok ( printer . fingerprint ( ) . to_vec ( ) )
}
fn read_single_file_tag ( path : & str , music_entry : & mut MusicEntry ) -> bool {
let Ok ( mut file ) = File ::open ( path ) else { return false ; } ;
let result = panic ::catch_unwind ( move | | {
match read_from ( & mut file ) {
Ok ( t ) = > Some ( t ) ,
Err ( _inspected ) = > {
// println!("Failed to open {}", path);
None
}
}
} ) ;
let tagged_file = if let Ok ( t ) = result {
match t {
Some ( r ) = > r ,
None = > {
return true ;
}
}
} else {
let message = create_crash_message ( "Lofty" , path , "https://github.com/image-rs/image/issues" ) ;
println! ( "{message}" ) ;
return false ;
} ;
let properties = tagged_file . properties ( ) ;
let mut track_title = String ::new ( ) ;
let mut track_artist = String ::new ( ) ;
let mut year = String ::new ( ) ;
let mut genre = String ::new ( ) ;
let bitrate = properties . audio_bitrate ( ) . unwrap_or ( 0 ) ;
let mut length = properties . duration ( ) . as_millis ( ) . to_string ( ) ;
if let Some ( tag ) = tagged_file . primary_tag ( ) {
track_title = tag . get_string ( & ItemKey ::TrackTitle ) . unwrap_or ( "" ) . to_string ( ) ;
track_artist = tag . get_string ( & ItemKey ::TrackArtist ) . unwrap_or ( "" ) . to_string ( ) ;
year = tag . get_string ( & ItemKey ::Year ) . unwrap_or ( "" ) . to_string ( ) ;
genre = tag . get_string ( & ItemKey ::Genre ) . unwrap_or ( "" ) . to_string ( ) ;
}
for tag in tagged_file . tags ( ) {
if track_title . is_empty ( ) {
if let Some ( tag_value ) = tag . get_string ( & ItemKey ::TrackTitle ) {
track_title = tag_value . to_string ( ) ;
}
}
if track_artist . is_empty ( ) {
if let Some ( tag_value ) = tag . get_string ( & ItemKey ::TrackArtist ) {
track_artist = tag_value . to_string ( ) ;
}
}
if year . is_empty ( ) {
if let Some ( tag_value ) = tag . get_string ( & ItemKey ::Year ) {
year = tag_value . to_string ( ) ;
}
}
if genre . is_empty ( ) {
if let Some ( tag_value ) = tag . get_string ( & ItemKey ::Genre ) {
genre = tag_value . to_string ( ) ;
}
}
// println!("{:?}", tag.items());
}
if let Ok ( old_length_number ) = length . parse ::< u32 > ( ) {
let length_number = old_length_number / 60 ;
let minutes = length_number / 1000 ;
let seconds = ( length_number % 1000 ) * 6 / 100 ;
if minutes ! = 0 | | seconds ! = 0 {
length = format! ( "{minutes}:{seconds:02}" ) ;
} else if old_length_number > 0 {
// That means, that audio have length smaller that second, but length is properly read
length = "0:01" . to_string ( ) ;
} else {
length = String ::new ( ) ;
}
} else {
length = String ::new ( ) ;
}
music_entry . track_title = track_title ;
music_entry . track_artist = track_artist ;
music_entry . year = year ;
music_entry . length = length ;
music_entry . genre = genre ;
music_entry . bitrate = bitrate ;
true
}
// Using different cache folders, because loading cache just for finding duplicated tags would be really slow
fn get_cache_file ( checking_tags : bool ) -> & ' static str {
if checking_tags {
"cache_same_music_tags.bin"
} else {
"cache_same_music_fingerprints.bin"
}
}
impl Default for SameMusic {
@ -825,7 +1092,6 @@ impl DebugPrint for SameMusic {
impl SaveResults for SameMusic {
fn save_results_to_file ( & mut self , file_name : & str ) -> bool {
let start_time : SystemTime = SystemTime ::now ( ) ;
let file_name : String = match file_name {
"" = > "results.txt" . to_string ( ) ,
k = > k . to_string ( ) ,
@ -857,7 +1123,7 @@ impl SaveResults for SameMusic {
} else {
write! ( writer , "Not found any empty files." ) . unwrap ( ) ;
}
Common ::print_time ( start_time , SystemTime ::now ( ) , "save_results_to_file" ) ;
true
}
}
@ -866,7 +1132,6 @@ impl PrintResults for SameMusic {
/// Print information's about duplicated entries
/// Only needed for CLI
fn print_results ( & self ) {
let start_time : SystemTime = SystemTime ::now ( ) ;
println! ( "Found {} similar music files.\n" , self . duplicated_music_entries . len ( ) ) ;
for vec_file_entry in & self . duplicated_music_entries {
for file_entry in vec_file_entry {
@ -883,8 +1148,6 @@ impl PrintResults for SameMusic {
}
println! ( ) ;
}
Common ::print_time ( start_time , SystemTime ::now ( ) , "print_entries" ) ;
}
}