Loading x2

pull/1072/head
Rafał Mikrut 8 months ago
parent 9f0b3e0803
commit e9765e1387

@ -17,7 +17,7 @@ use crate::common_dir_traversal::{common_get_entry_data_metadata, common_read_di
use crate::common_tool::{CommonData, CommonToolData};
use crate::common_traits::{DebugPrint, PrintResults, SaveResults};
#[derive(Clone)]
#[derive(Clone, Debug)]
pub struct FileEntry {
pub path: PathBuf,
pub size: u64,

@ -18,11 +18,10 @@ use rayon::prelude::*;
use serde::{Deserialize, Serialize};
use crate::common::{
check_folder_children, create_crash_message, load_cache_from_file_generalized, open_cache_folder, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads,
AUDIO_FILES_EXTENSIONS, IMAGE_RS_BROKEN_FILES_EXTENSIONS, PDF_FILES_EXTENSIONS, ZIP_FILES_EXTENSIONS,
check_folder_children, create_crash_message, load_cache_from_file_generalized, prepare_thread_handler_common, save_cache_to_file_generalized,
send_info_and_wait_for_ending_all_threads, AUDIO_FILES_EXTENSIONS, IMAGE_RS_BROKEN_FILES_EXTENSIONS, PDF_FILES_EXTENSIONS, ZIP_FILES_EXTENSIONS,
};
use crate::common_dir_traversal::{common_get_entry_data_metadata, common_read_dir, get_lowercase_name, get_modified_time, CheckingMethod, ProgressData, ToolType};
use crate::common_messages::Messages;
use crate::common_tool::{CommonData, CommonToolData};
use crate::common_traits::*;
@ -44,6 +43,12 @@ impl ResultEntry for FileEntry {
fn get_path(&self) -> &Path {
&self.path
}
fn get_modified_date(&self) -> u64 {
self.modified_date
}
fn get_size(&self) -> u64 {
self.size
}
}
#[derive(Copy, Clone, PartialEq, Eq, Serialize, Deserialize, Debug)]
@ -444,8 +449,9 @@ impl BrokenFiles {
for (_name, file_entry) in loaded_hash_map {
all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry);
}
let save_as_json = self.get_save_also_as_json();
save_cache_to_file(&all_results, &mut self.common_data.text_messages, save_as_json);
let messages = save_cache_to_file_generalized(&get_cache_file(), &all_results, self.common_data.save_also_as_json);
self.get_text_messages_mut().extend_with_another_messages(messages);
}
debug!("save_to_cache - end");
}
@ -541,43 +547,8 @@ impl PrintResults for BrokenFiles {
}
}
fn save_cache_to_file(old_hashmap: &BTreeMap<String, FileEntry>, text_messages: &mut Messages, save_also_as_json: bool) {
let mut hashmap: BTreeMap<String, FileEntry> = Default::default();
for (path, fe) in old_hashmap {
if fe.size > 1024 {
hashmap.insert(path.clone(), fe.clone());
}
}
let hashmap = &hashmap;
if let Some(((file_handler, cache_file), (file_handler_json, cache_file_json))) = open_cache_folder(&get_cache_file(), true, save_also_as_json, &mut text_messages.warnings) {
{
let writer = BufWriter::new(file_handler.unwrap()); // Unwrap because cannot fail here
if let Err(e) = bincode::serialize_into(writer, hashmap) {
text_messages
.warnings
.push(format!("Cannot write data to cache file {}, reason {}", cache_file.display(), e));
return;
}
}
if save_also_as_json {
if let Some(file_handler_json) = file_handler_json {
let writer = BufWriter::new(file_handler_json);
if let Err(e) = serde_json::to_writer(writer, hashmap) {
text_messages
.warnings
.push(format!("Cannot write data to cache file {}, reason {}", cache_file_json.display(), e));
return;
}
}
}
text_messages.messages.push(format!("Properly saved to file {} cache entries.", hashmap.len()));
}
}
fn get_cache_file() -> String {
"cache_broken_files.bin".to_string()
"cache_broken_files_61.bin".to_string()
}
fn check_extension_availability(file_name_lowercase: &str) -> TypeOfFile {

@ -2,12 +2,12 @@ use rayon::iter::ParallelIterator;
use std::collections::BTreeMap;
use std::ffi::OsString;
use std::fs::{DirEntry, File, OpenOptions};
use std::io::BufReader;
use std::io::{BufReader, BufWriter};
use std::path::{Path, PathBuf};
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
use std::sync::Arc;
use std::thread::{sleep, JoinHandle};
use std::time::{Duration, SystemTime};
use std::time::{Duration, SystemTime, UNIX_EPOCH};
use std::{fs, thread};
#[cfg(feature = "heif")]
@ -21,7 +21,7 @@ use imagepipe::{ImageSource, Pipeline};
use libheif_rs::{ColorSpace, HeifContext, RgbChroma};
use log::{debug, LevelFilter, Record};
use rayon::prelude::*;
use serde::Deserialize;
use serde::{Deserialize, Serialize};
// #[cfg(feature = "heif")]
// use libheif_rs::LibHeif;
@ -160,6 +160,45 @@ pub fn open_cache_folder(cache_file_name: &str, save_to_cache: bool, use_json: b
None
}
pub fn save_cache_to_file_generalized<T>(cache_file_name: &str, hashmap: &BTreeMap<String, T>, save_also_as_json: bool) -> Messages
where
T: Serialize + ResultEntry + Sized + Send + Sync,
{
debug!("Saving cache to file {} (or also json alternative) - {} results", cache_file_name, hashmap.len());
let mut text_messages = Messages::new();
if let Some(((file_handler, cache_file), (file_handler_json, cache_file_json))) = open_cache_folder(cache_file_name, true, save_also_as_json, &mut text_messages.warnings) {
{
let writer = BufWriter::new(file_handler.unwrap()); // Unwrap because cannot fail here
if let Err(e) = bincode::serialize_into(writer, &hashmap.values().collect::<Vec<_>>()) {
text_messages
.warnings
.push(format!("Cannot write data to cache file {}, reason {}", cache_file.display(), e));
debug!("Failed to save cache to file {:?}", cache_file);
return text_messages;
}
debug!("Saved binary to file {:?}", cache_file);
}
if save_also_as_json {
if let Some(file_handler_json) = file_handler_json {
let writer = BufWriter::new(file_handler_json);
if let Err(e) = serde_json::to_writer(writer, &hashmap.values().collect::<Vec<_>>()) {
text_messages
.warnings
.push(format!("Cannot write data to cache file {}, reason {}", cache_file_json.display(), e));
debug!("Failed to save cache to file {:?}", cache_file_json);
return text_messages;
}
debug!("Saved json to file {:?}", cache_file_json);
}
}
text_messages.messages.push(format!("Properly saved to file {} cache entries.", hashmap.len()));
} else {
debug!("Failed to save cache to file {cache_file_name} because not exists");
}
text_messages
}
pub fn load_cache_from_file_generalized<T>(cache_file_name: &str, delete_outdated_cache: bool) -> (Messages, Option<BTreeMap<String, T>>)
where
for<'a> T: Deserialize<'a> + ResultEntry + Sized + Send + Sync,
@ -199,17 +238,46 @@ where
// Don't load cache data if destination file not exists
if delete_outdated_cache {
debug!("Starting to removing outdated cache entries");
vec_loaded_entries = vec_loaded_entries.into_par_iter().filter(|file_entry| !file_entry.get_path().exists()).collect();
debug!("Completed removing outdated cache entries");
let initial_number_of_entries = vec_loaded_entries.len();
vec_loaded_entries = vec_loaded_entries
.into_par_iter()
.filter(|file_entry| {
if !file_entry.get_path().exists() {
return false;
}
let Ok(metadata) = file_entry.get_path().metadata() else {
return false;
};
if metadata.len() != file_entry.get_size() {
return false;
}
let Ok(modified) = metadata.modified() else {
return false;
};
let Ok(secs) = modified.duration_since(UNIX_EPOCH) else {
return false;
};
if secs.as_secs() != file_entry.get_modified_date() {
return false;
}
true
})
.collect();
debug!(
"Completed removing outdated cache entries, removed {} out of all {} entries",
initial_number_of_entries - vec_loaded_entries.len(),
initial_number_of_entries
);
}
text_messages.messages.push(format!("Properly loaded {} cache entries.", vec_loaded_entries.len()));
let map_loaded_entries = vec_loaded_entries
let map_loaded_entries: BTreeMap<_, _> = vec_loaded_entries
.into_iter()
.map(|file_entry| (file_entry.get_path().to_string_lossy().into_owned(), file_entry))
.collect();
debug!("Loaded cache from file {cache_file_name} (or json alternative)");
debug!("Loaded cache from file {cache_file_name} (or json alternative) - {} results", map_loaded_entries.len());
return (text_messages, Some(map_loaded_entries));
}
debug!("Failed to load cache from file {cache_file_name} because not exists");

@ -70,6 +70,12 @@ impl ResultEntry for FileEntry {
fn get_path(&self) -> &Path {
&self.path
}
fn get_modified_date(&self) -> u64 {
self.modified_date
}
fn get_size(&self) -> u64 {
self.size
}
}
// Symlinks

@ -14,4 +14,6 @@ pub trait PrintResults {
pub trait ResultEntry {
fn get_path(&self) -> &Path;
fn get_modified_date(&self) -> u64;
fn get_size(&self) -> u64;
}

@ -24,11 +24,10 @@ use symphonia::core::meta::MetadataOptions;
use symphonia::core::probe::Hint;
use crate::common::{
create_crash_message, filter_reference_folders_generic, load_cache_from_file_generalized, open_cache_folder, prepare_thread_handler_common,
create_crash_message, filter_reference_folders_generic, load_cache_from_file_generalized, prepare_thread_handler_common, save_cache_to_file_generalized,
send_info_and_wait_for_ending_all_threads, AUDIO_FILES_EXTENSIONS,
};
use crate::common_dir_traversal::{CheckingMethod, DirTraversalBuilder, DirTraversalResult, FileEntry, ProgressData, ToolType};
use crate::common_messages::Messages;
use crate::common_tool::{CommonData, CommonToolData};
use crate::common_traits::*;
@ -72,6 +71,12 @@ impl ResultEntry for MusicEntry {
fn get_path(&self) -> &Path {
&self.path
}
fn get_modified_date(&self) -> u64 {
self.modified_date
}
fn get_size(&self) -> u64 {
self.size
}
}
impl FileEntry {
@ -268,8 +273,9 @@ impl SameMusic {
for file_entry in vec_file_entry {
all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry);
}
let save_also_as_json = self.get_save_also_as_json();
save_cache_to_file(&all_results, &mut self.common_data.text_messages, save_also_as_json, checking_tags);
let messages = save_cache_to_file_generalized(get_cache_file(checking_tags), &all_results, self.common_data.save_also_as_json);
self.get_text_messages_mut().extend_with_another_messages(messages);
debug!("save_cache - end");
}
@ -746,35 +752,6 @@ impl SameMusic {
}
}
fn save_cache_to_file(hashmap: &BTreeMap<String, MusicEntry>, text_messages: &mut Messages, save_also_as_json: bool, checking_tags: bool) {
if let Some(((file_handler, cache_file), (file_handler_json, cache_file_json))) =
open_cache_folder(get_cache_file(checking_tags), true, save_also_as_json, &mut text_messages.warnings)
{
{
let writer = BufWriter::new(file_handler.unwrap()); // Unwrap because cannot fail here
if let Err(e) = bincode::serialize_into(writer, hashmap) {
text_messages
.warnings
.push(format!("Cannot write data to cache file {}, reason {}", cache_file.display(), e));
return;
}
}
if save_also_as_json {
if let Some(file_handler_json) = file_handler_json {
let writer = BufWriter::new(file_handler_json);
if let Err(e) = serde_json::to_writer(writer, hashmap) {
text_messages
.warnings
.push(format!("Cannot write data to cache file {}, reason {}", cache_file_json.display(), e));
return;
}
}
}
text_messages.messages.push(format!("Properly saved to file {} cache entries.", hashmap.len()));
}
}
// TODO this should be taken from rusty-chromaprint repo, not reimplemented here
fn calc_fingerprint_helper(path: impl AsRef<Path>, config: &Configuration) -> anyhow::Result<Vec<u32>> {
let path = path.as_ref();
@ -941,9 +918,9 @@ fn read_single_file_tag(path: &str, music_entry: &mut MusicEntry) -> bool {
// Using different cache folders, because loading cache just for finding duplicated tags would be really slow
fn get_cache_file(checking_tags: bool) -> &'static str {
if checking_tags {
"cache_same_music_tags.bin"
"cache_same_music_tags_61.bin"
} else {
"cache_same_music_fingerprints.bin"
"cache_same_music_fingerprints_61.bin"
}
}

@ -19,11 +19,10 @@ use serde::{Deserialize, Serialize};
#[cfg(feature = "heif")]
use crate::common::get_dynamic_image_from_heic;
use crate::common::{
check_folder_children, create_crash_message, get_dynamic_image_from_raw_image, load_cache_from_file_generalized, open_cache_folder, prepare_thread_handler_common,
check_folder_children, create_crash_message, get_dynamic_image_from_raw_image, load_cache_from_file_generalized, prepare_thread_handler_common, save_cache_to_file_generalized,
send_info_and_wait_for_ending_all_threads, HEIC_EXTENSIONS, IMAGE_RS_SIMILAR_IMAGES_EXTENSIONS, RAW_IMAGE_EXTENSIONS,
};
use crate::common_dir_traversal::{common_get_entry_data_metadata, common_read_dir, get_lowercase_name, get_modified_time, CheckingMethod, ProgressData, ToolType};
use crate::common_messages::Messages;
use crate::common_tool::{CommonData, CommonToolData};
use crate::common_traits::{DebugPrint, PrintResults, ResultEntry, SaveResults};
use crate::flc;
@ -51,6 +50,12 @@ impl ResultEntry for FileEntry {
fn get_path(&self) -> &Path {
&self.path
}
fn get_modified_date(&self) -> u64 {
self.modified_date
}
fn get_size(&self) -> u64 {
self.size
}
}
/// Used by CLI tool when we cannot use directly values
@ -375,15 +380,13 @@ impl SimilarImages {
for (file_entry, _hash) in vec_file_entry {
all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry);
}
let save_also_as_json = self.get_save_also_as_json();
save_hashes_to_file(
let messages = save_cache_to_file_generalized(
&get_cache_file(&self.hash_size, &self.hash_alg, &self.image_filter),
&all_results,
&mut self.common_data.text_messages,
save_also_as_json,
self.hash_size,
self.hash_alg,
self.image_filter,
self.common_data.save_also_as_json,
);
self.get_text_messages_mut().extend_with_another_messages(messages);
}
debug!("save_to_cache - end");
}
@ -940,92 +943,9 @@ impl PrintResults for SimilarImages {
}
}
pub fn save_hashes_to_file(
hashmap: &BTreeMap<String, FileEntry>,
text_messages: &mut Messages,
save_also_as_json: bool,
hash_size: u8,
hash_alg: HashAlg,
image_filter: FilterType,
) {
if let Some(((file_handler, cache_file), (file_handler_json, cache_file_json))) =
open_cache_folder(&get_cache_file(&hash_size, &hash_alg, &image_filter), true, save_also_as_json, &mut text_messages.warnings)
{
{
let writer = BufWriter::new(file_handler.unwrap()); // Unwrap because cannot fail here
if let Err(e) = bincode::serialize_into(writer, hashmap) {
text_messages
.warnings
.push(format!("Cannot write data to cache file {}, reason {}", cache_file.display(), e));
return;
}
}
if save_also_as_json {
if let Some(file_handler_json) = file_handler_json {
let writer = BufWriter::new(file_handler_json);
if let Err(e) = serde_json::to_writer(writer, hashmap) {
text_messages
.warnings
.push(format!("Cannot write data to cache file {}, reason {}", cache_file_json.display(), e));
return;
}
}
}
text_messages.messages.push(format!("Properly saved to file {} cache entries.", hashmap.len()));
}
}
pub fn load_hashes_from_file(
text_messages: &mut Messages,
delete_outdated_cache: bool,
hash_size: u8,
hash_alg: HashAlg,
image_filter: FilterType,
) -> Option<HashMap<String, FileEntry>> {
if let Some(((file_handler, cache_file), (file_handler_json, cache_file_json))) =
open_cache_folder(&get_cache_file(&hash_size, &hash_alg, &image_filter), false, true, &mut text_messages.warnings)
{
let mut hashmap_loaded_entries: HashMap<String, FileEntry>;
if let Some(file_handler) = file_handler {
let reader = BufReader::new(file_handler);
hashmap_loaded_entries = match bincode::deserialize_from(reader) {
Ok(t) => t,
Err(e) => {
text_messages
.warnings
.push(format!("Failed to load data from cache file {}, reason {}", cache_file.display(), e));
return None;
}
};
} else {
let reader = BufReader::new(file_handler_json.unwrap()); // Unwrap cannot fail, because at least one file must be valid
hashmap_loaded_entries = match serde_json::from_reader(reader) {
Ok(t) => t,
Err(e) => {
text_messages
.warnings
.push(format!("Failed to load data from cache file {}, reason {}", cache_file_json.display(), e));
return None;
}
};
}
// Don't load cache data if destination file not exists
if delete_outdated_cache {
hashmap_loaded_entries.retain(|src_path, _file_entry| Path::new(src_path).exists());
}
text_messages.messages.push(format!("Properly loaded {} cache entries.", hashmap_loaded_entries.len()));
return Some(hashmap_loaded_entries);
}
None
}
pub fn get_cache_file(hash_size: &u8, hash_alg: &HashAlg, image_filter: &FilterType) -> String {
format!(
"cache_similar_images_{}_{}_{}_50.bin",
"cache_similar_images_{}_{}_{}_61.bin",
hash_size,
convert_algorithm_to_string(hash_alg),
convert_filters_to_string(image_filter),

@ -16,10 +16,10 @@ use vid_dup_finder_lib::HashCreationErrorKind::DetermineVideo;
use vid_dup_finder_lib::{NormalizedTolerance, VideoHash};
use crate::common::{
check_folder_children, load_cache_from_file_generalized, open_cache_folder, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads, VIDEO_FILES_EXTENSIONS,
check_folder_children, load_cache_from_file_generalized, prepare_thread_handler_common, save_cache_to_file_generalized, send_info_and_wait_for_ending_all_threads,
VIDEO_FILES_EXTENSIONS,
};
use crate::common_dir_traversal::{common_get_entry_data_metadata, common_read_dir, get_lowercase_name, get_modified_time, CheckingMethod, ProgressData, ToolType};
use crate::common_messages::Messages;
use crate::common_tool::{CommonData, CommonToolData};
use crate::common_traits::{DebugPrint, PrintResults, ResultEntry, SaveResults};
use crate::flc;
@ -40,6 +40,12 @@ impl ResultEntry for FileEntry {
fn get_path(&self) -> &Path {
&self.path
}
fn get_modified_date(&self) -> u64 {
self.modified_date
}
fn get_size(&self) -> u64 {
self.size
}
}
/// Distance metric to use with the BK-tree.
@ -376,8 +382,9 @@ impl SimilarVideos {
for file_entry in vec_file_entry {
all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry);
}
let save_also_as_json = self.get_save_also_as_json();
save_hashes_to_file(&all_results, &mut self.common_data.text_messages, save_also_as_json);
let messages = save_cache_to_file_generalized(&get_cache_file(), &all_results, self.common_data.save_also_as_json);
self.get_text_messages_mut().extend_with_another_messages(messages);
}
debug!("save_cache - end");
}
@ -514,35 +521,8 @@ impl PrintResults for SimilarVideos {
}
}
pub fn save_hashes_to_file(hashmap: &BTreeMap<String, FileEntry>, text_messages: &mut Messages, save_also_as_json: bool) {
if let Some(((file_handler, cache_file), (file_handler_json, cache_file_json))) = open_cache_folder(&get_cache_file(), true, save_also_as_json, &mut text_messages.warnings) {
{
let writer = BufWriter::new(file_handler.unwrap()); // Unwrap because cannot fail here
if let Err(e) = bincode::serialize_into(writer, hashmap) {
text_messages
.warnings
.push(format!("Cannot write data to cache file {}, reason {}", cache_file.display(), e));
return;
}
}
if save_also_as_json {
if let Some(file_handler_json) = file_handler_json {
let writer = BufWriter::new(file_handler_json);
if let Err(e) = serde_json::to_writer(writer, hashmap) {
text_messages
.warnings
.push(format!("Cannot write data to cache file {}, reason {}", cache_file_json.display(), e));
return;
}
}
}
text_messages.messages.push(format!("Properly saved to file {} cache entries.", hashmap.len()));
}
}
pub fn get_cache_file() -> String {
"cache_similar_videos.bin".to_string()
"cache_similar_videos_61.bin".to_string()
}
pub fn check_if_ffmpeg_is_installed() -> bool {

@ -1,7 +1,7 @@
use std::collections::BTreeMap;
use std::default::Default;
use czkawka_core::common::load_cache_from_file_generalized;
use czkawka_core::common::{load_cache_from_file_generalized, save_cache_to_file_generalized};
use directories_next::ProjectDirs;
use gtk4::prelude::*;
use gtk4::{Label, ResponseType, Window};
@ -176,7 +176,9 @@ pub fn connect_settings(gui_data: &GuiData) {
);
if let Some(cache_entries) = loaded_items {
czkawka_core::similar_images::save_hashes_to_file(&cache_entries, &mut messages, false, *hash_size, *hash_alg, *image_filter);
let save_messages =
save_cache_to_file_generalized(&czkawka_core::similar_images::get_cache_file(hash_size, hash_alg, image_filter), &cache_entries, false);
messages.extend_with_another_messages(save_messages);
}
}
}
@ -206,7 +208,8 @@ pub fn connect_settings(gui_data: &GuiData) {
load_cache_from_file_generalized::<czkawka_core::similar_videos::FileEntry>(&czkawka_core::similar_videos::get_cache_file(), true);
if let Some(cache_entries) = loaded_items {
czkawka_core::similar_videos::save_hashes_to_file(&cache_entries, &mut messages, false);
let save_messages = save_cache_to_file_generalized(&czkawka_core::similar_videos::get_cache_file(), &cache_entries, false);
messages.extend_with_another_messages(save_messages);
}
messages.messages.push(flg!("cache_properly_cleared"));

Loading…
Cancel
Save