mirror of
https://github.com/qarmin/czkawka
synced 2024-11-04 12:00:29 +00:00
Optimize a little image compare algorithm (#528)
* Split checking images at 2 functions * Optimize a little image finding * 1.54.0 farawell
This commit is contained in:
parent
d8700f6e78
commit
7da578fa7f
@ -1,4 +1,4 @@
|
||||
use std::collections::{BTreeMap, BTreeSet};
|
||||
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
|
||||
use std::fs::OpenOptions;
|
||||
use std::fs::{File, Metadata};
|
||||
use std::io::Write;
|
||||
@ -47,7 +47,6 @@ const LOOP_DURATION: u32 = 200; //ms
|
||||
|
||||
#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Debug, Serialize, Deserialize)]
|
||||
pub enum Similarity {
|
||||
None,
|
||||
Similar(u32),
|
||||
}
|
||||
|
||||
@ -239,7 +238,11 @@ impl SimilarImages {
|
||||
self.stopped_search = true;
|
||||
return;
|
||||
}
|
||||
if !self.sort_images(stop_receiver, progress_sender) {
|
||||
if !self.hash_images(stop_receiver, progress_sender) {
|
||||
self.stopped_search = true;
|
||||
return;
|
||||
}
|
||||
if !self.find_similar_hashes(stop_receiver, progress_sender) {
|
||||
self.stopped_search = true;
|
||||
return;
|
||||
}
|
||||
@ -412,7 +415,7 @@ impl SimilarImages {
|
||||
},
|
||||
|
||||
hash: Vec::new(),
|
||||
similarity: Similarity::None,
|
||||
similarity: Similarity::Similar(0),
|
||||
};
|
||||
|
||||
fe_result.push((current_file_name.to_string_lossy().to_string(), fe));
|
||||
@ -450,7 +453,7 @@ impl SimilarImages {
|
||||
// - Join already read hashes with hashes which were read from file
|
||||
// - Join all hashes and save it to file
|
||||
|
||||
fn sort_images(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&futures::channel::mpsc::UnboundedSender<ProgressData>>) -> bool {
|
||||
fn hash_images(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&futures::channel::mpsc::UnboundedSender<ProgressData>>) -> bool {
|
||||
let hash_map_modification = SystemTime::now();
|
||||
|
||||
let loaded_hash_map;
|
||||
@ -579,7 +582,6 @@ impl SimilarImages {
|
||||
for (file_entry, buf) in &vec_file_entry {
|
||||
// Only use to comparing, non broken hashes(all 0 or 255 hashes means that algorithm fails to decode them because e.g. contains a log of alpha channel)
|
||||
if !(buf.iter().all(|e| *e == 0) || buf.iter().all(|e| *e == 255)) {
|
||||
self.bktree.add(buf.clone());
|
||||
self.image_hashes.entry(buf.clone()).or_insert_with(Vec::<FileEntry>::new);
|
||||
self.image_hashes.get_mut(buf).unwrap().push(file_entry.clone());
|
||||
}
|
||||
@ -595,18 +597,32 @@ impl SimilarImages {
|
||||
}
|
||||
|
||||
Common::print_time(hash_map_modification, SystemTime::now(), "sort_images - saving data to files".to_string());
|
||||
true
|
||||
}
|
||||
|
||||
fn find_similar_hashes(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&futures::channel::mpsc::UnboundedSender<ProgressData>>) -> bool {
|
||||
let hash_map_modification = SystemTime::now();
|
||||
let Similarity::Similar(similarity) = self.similarity;
|
||||
|
||||
let similarity: u32 = match self.similarity {
|
||||
Similarity::Similar(k) => k,
|
||||
_ => panic!(),
|
||||
};
|
||||
|
||||
// Results
|
||||
let mut collected_similar_images: BTreeMap<Vec<u8>, Vec<FileEntry>> = Default::default();
|
||||
|
||||
let mut available_hashes = self.image_hashes.clone();
|
||||
let mut temp_hashes = Default::default();
|
||||
mem::swap(&mut temp_hashes, &mut self.image_hashes);
|
||||
|
||||
let mut this_time_check_hashes;
|
||||
let mut master_of_group: BTreeSet<Vec<u8>> = Default::default(); // Lista wszystkich głównych hashy, które odpowiadają za porównywanie
|
||||
let mut master_of_group: HashSet<Vec<u8>> = Default::default(); // Lista wszystkich głównych hashy, które odpowiadają za porównywanie
|
||||
|
||||
let mut available_hashes: HashMap<Vec<u8>, Vec<FileEntry>> = Default::default();
|
||||
for (hash, vec_file_entry) in temp_hashes {
|
||||
// There exists 2 or more hashes with same hash
|
||||
if vec_file_entry.len() >= 2 {
|
||||
collected_similar_images.insert(hash, vec_file_entry);
|
||||
} else {
|
||||
self.bktree.add(hash.clone());
|
||||
available_hashes.insert(hash, vec_file_entry);
|
||||
}
|
||||
}
|
||||
|
||||
//// PROGRESS THREAD START
|
||||
let progress_thread_run = Arc::new(AtomicBool::new(true));
|
||||
@ -636,37 +652,10 @@ impl SimilarImages {
|
||||
thread::spawn(|| {})
|
||||
};
|
||||
//// PROGRESS THREAD END
|
||||
if similarity >= 1 {
|
||||
for current_similarity in 1..=similarity {
|
||||
this_time_check_hashes = available_hashes.clone();
|
||||
|
||||
for current_similarity in 0..=similarity {
|
||||
this_time_check_hashes = available_hashes.clone();
|
||||
|
||||
if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() {
|
||||
// End thread which send info to gui
|
||||
progress_thread_run.store(false, Ordering::Relaxed);
|
||||
progress_thread_handle.join().unwrap();
|
||||
return false;
|
||||
}
|
||||
|
||||
for (hash, vec_file_entry) in &this_time_check_hashes {
|
||||
atomic_mode_counter.fetch_add(1, Ordering::Relaxed);
|
||||
|
||||
let vector_with_found_similar_hashes = self
|
||||
.bktree
|
||||
.find(hash, similarity)
|
||||
.filter(|r| (r.0 == current_similarity) && !master_of_group.contains(r.1) && available_hashes.contains_key(r.1))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// Not found any hash with specific distance
|
||||
if vector_with_found_similar_hashes.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
// This one picture doesn't have similar pictures except self in similarity 0
|
||||
if current_similarity == 0 && vector_with_found_similar_hashes.len() == 1 {
|
||||
continue;
|
||||
}
|
||||
|
||||
// This shouldn't be executed too much times, so it should be quite fast to check this
|
||||
if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() {
|
||||
// End thread which send info to gui
|
||||
progress_thread_run.store(false, Ordering::Relaxed);
|
||||
@ -674,43 +663,51 @@ impl SimilarImages {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Jeśli jeszcze nie dodał, to dodaje teraz grupę główną do już obrobionych
|
||||
if !master_of_group.contains(hash) {
|
||||
master_of_group.insert(hash.clone());
|
||||
collected_similar_images.insert(hash.clone(), Vec::new());
|
||||
for (hash, vec_file_entry) in this_time_check_hashes.into_iter() {
|
||||
atomic_mode_counter.fetch_add(1, Ordering::Relaxed);
|
||||
|
||||
let mut things: Vec<FileEntry> = vec_file_entry
|
||||
.iter()
|
||||
.map(|fe| FileEntry {
|
||||
path: fe.path.clone(),
|
||||
size: fe.size,
|
||||
dimensions: fe.dimensions.clone(),
|
||||
modified_date: fe.modified_date,
|
||||
hash: fe.hash.clone(),
|
||||
similarity: Similarity::Similar(0),
|
||||
})
|
||||
.collect();
|
||||
collected_similar_images.get_mut(hash).unwrap().append(&mut things);
|
||||
}
|
||||
// Finds hashes with specific distance to
|
||||
let vector_with_found_similar_hashes = self
|
||||
.bktree
|
||||
.find(&hash, similarity)
|
||||
.filter(|(similarity, hash)| (*similarity == current_similarity) && !master_of_group.contains(*hash) && available_hashes.contains_key(*hash))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// Since we checked hash, we don't need to check it again
|
||||
if current_similarity != 0 {
|
||||
vector_with_found_similar_hashes.iter().for_each(|e| {
|
||||
let mut things: Vec<FileEntry> = available_hashes
|
||||
.get_mut(e.1)
|
||||
.unwrap()
|
||||
.iter()
|
||||
.map(|fe| FileEntry {
|
||||
path: fe.path.clone(),
|
||||
size: fe.size,
|
||||
dimensions: fe.dimensions.clone(),
|
||||
modified_date: fe.modified_date,
|
||||
hash: Vec::new(),
|
||||
similarity: Similarity::Similar(current_similarity),
|
||||
// Not found any hash with specific distance
|
||||
if vector_with_found_similar_hashes.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Current checked hash isn't in any group of similarity, so we create one, because found similar images
|
||||
if !master_of_group.contains(&hash) {
|
||||
master_of_group.insert(hash.clone());
|
||||
collected_similar_images.insert(hash.clone(), Vec::new());
|
||||
|
||||
let mut things: Vec<FileEntry> = vec_file_entry
|
||||
.into_iter()
|
||||
.map(|mut fe| {
|
||||
fe.similarity = Similarity::Similar(0);
|
||||
fe
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
collected_similar_images.get_mut(hash).unwrap().append(&mut things);
|
||||
available_hashes.remove(e.1);
|
||||
.collect();
|
||||
collected_similar_images.get_mut(&hash).unwrap().append(&mut things);
|
||||
|
||||
// This shouldn't be executed too much times, so it should be quite fast to check this
|
||||
if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() {
|
||||
// End thread which send info to gui
|
||||
progress_thread_run.store(false, Ordering::Relaxed);
|
||||
progress_thread_handle.join().unwrap();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
vector_with_found_similar_hashes.iter().for_each(|(_similarity, other_hash)| {
|
||||
let mut vec_fe = available_hashes.remove(*other_hash).unwrap();
|
||||
for fe in &mut vec_fe {
|
||||
fe.similarity = Similarity::Similar(current_similarity)
|
||||
}
|
||||
|
||||
collected_similar_images.get_mut(&hash).unwrap().append(&mut vec_fe);
|
||||
});
|
||||
}
|
||||
}
|
||||
@ -719,7 +716,8 @@ impl SimilarImages {
|
||||
progress_thread_run.store(false, Ordering::Relaxed);
|
||||
progress_thread_handle.join().unwrap();
|
||||
|
||||
self.similar_vectors = collected_similar_images.values().cloned().collect();
|
||||
// self.similar_vectors = collected_similar_images.into_values().collect(); // TODO use this in Rust 1.54.0
|
||||
self.similar_vectors = collected_similar_images.values().cloned().collect(); // 1.53.0 version
|
||||
|
||||
if self.exclude_images_with_same_size {
|
||||
let mut new_vector = Default::default();
|
||||
@ -1028,9 +1026,9 @@ pub fn get_string_from_similarity(similarity: &Similarity, hash_size: u8) -> Str
|
||||
};
|
||||
|
||||
match similarity {
|
||||
Similarity::None => {
|
||||
panic!()
|
||||
}
|
||||
// Similarity::None => {
|
||||
// panic!()
|
||||
// }
|
||||
Similarity::Similar(h) => {
|
||||
// #[cfg(debug_assertions)]
|
||||
// {
|
||||
|
Loading…
Reference in New Issue
Block a user