Audio tags/content

pull/1087/head
Rafał Mikrut 8 months ago
parent f91abb966b
commit 26028b5e92

@ -15,6 +15,7 @@ static CZKAWKA_PATH: state::InitCell<String> = state::InitCell::new();
static COLLECTED_FILES: state::InitCell<CollectedFiles> = state::InitCell::new();
const ATTEMPTS: u32 = 10;
const PRINT_MESSAGES_CZKAWKA: bool = true;
// App runs - ./ci_tester PATH_TO_CZKAWKA
fn main() {
@ -41,10 +42,83 @@ fn main() {
test_remove_duplicates_one_newest();
test_remove_duplicates_all_expect_newest();
test_remove_duplicates_all_expect_oldest();
test_remove_same_music_tags_one_oldest();
test_remove_same_music_tags_one_newest();
test_remove_same_music_tags_all_expect_oldest();
test_remove_same_music_tags_all_expect_newest();
test_remove_same_music_content_one_oldest();
test_remove_same_music_content_all_expect_oldest();
test_remove_same_music_content_one_newest();
test_remove_same_music_content_all_expect_newest();
}
println!("Completed checking");
}
fn test_remove_same_music_content_one_newest() {
info!("test_remove_same_music_content_one_newest");
run_test(
&["music", "-d", "TestFiles", "-s", "CONTENT", "-l", "2.0", "-D", "ON"],
vec!["Music/M2.mp3"],
vec![],
vec![],
);
}
fn test_remove_same_music_content_all_expect_newest() {
info!("test_remove_same_music_content_all_expect_newest");
run_test(
&["music", "-d", "TestFiles", "-s", "CONTENT", "-l", "2.0", "-D", "AEN"],
vec!["Music/M1.mp3", "Music/M3.flac", "Music/M5.mp3"],
vec![],
vec![],
);
}
fn test_remove_same_music_content_all_expect_oldest() {
info!("test_remove_same_music_content_all_expect_oldest");
run_test(
&["music", "-d", "TestFiles", "-s", "CONTENT", "-l", "2.0", "-D", "AEO"],
vec!["Music/M1.mp3", "Music/M2.mp3", "Music/M3.flac"],
vec![],
vec![],
);
}
fn test_remove_same_music_content_one_oldest() {
info!("test_remove_same_music_content_one_oldest");
run_test(
&["music", "-d", "TestFiles", "-s", "CONTENT", "-l", "2.0", "-D", "OO"],
vec!["Music/M5.mp3"],
vec![],
vec![],
);
}
fn test_remove_same_music_tags_one_oldest() {
info!("test_remove_same_music_one_oldest");
run_test(&["music", "-d", "TestFiles", "-D", "OO"], vec!["Music/M5.mp3"], vec![], vec![]);
}
fn test_remove_same_music_tags_one_newest() {
info!("test_remove_same_music_one_newest");
run_test(&["music", "-d", "TestFiles", "-D", "ON"], vec!["Music/M2.mp3"], vec![], vec![]);
}
fn test_remove_same_music_tags_all_expect_oldest() {
info!("test_remove_same_music_all_expect_oldest");
run_test(
&["music", "-d", "TestFiles", "-D", "AEO"],
vec!["Music/M1.mp3", "Music/M2.mp3", "Music/M3.flac"],
vec![],
vec![],
);
}
fn test_remove_same_music_tags_all_expect_newest() {
info!("test_remove_same_music_all_expect_newest");
run_test(
&["music", "-d", "TestFiles", "-D", "AEN"],
vec!["Music/M1.mp3", "Music/M3.flac", "Music/M5.mp3"],
vec![],
vec![],
);
}
fn test_remove_duplicates_all_expect_oldest() {
info!("test_remove_duplicates_all_expect_oldest");
run_test(
@ -93,12 +167,7 @@ fn test_temporary_files() {
}
fn test_empty_folders() {
info!("test_empty_folders");
run_test(
&["empty-folders", "-d", "TestFiles", "-D"],
vec![],
vec!["EmptyFolders/One", "EmptyFolders/Two", "EmptyFolders/Two/TwoInside"],
vec![],
);
run_test(&["empty-folders", "-d", "TestFiles", "-D"], vec![], vec!["EmptyFolders/One", "EmptyFolders/Two"], vec![]);
}
fn test_biggest_files() {
@ -138,7 +207,7 @@ fn run_test(arguments: &[&str], expected_files_differences: Vec<&'static str>, e
let mut all_arguments = vec![];
all_arguments.push(CZKAWKA_PATH.get().as_str());
all_arguments.extend_from_slice(arguments);
run_with_good_status(&all_arguments, true);
run_with_good_status(&all_arguments, PRINT_MESSAGES_CZKAWKA);
file_folder_diffs(
COLLECTED_FILES.get(),
expected_files_differences,

@ -127,7 +127,7 @@ pub struct DuplicatesArgs {
short,
long,
default_value = "HASH",
value_parser = parse_checking_method,
value_parser = parse_checking_method_duplicate,
help = "Search method (NAME, SIZE, HASH)",
long_help = "Methods to search files.\nNAME - Fast but but rarely usable,\nSIZE - Fast but not accurate, checking by the file's size,\nHASH - The slowest method, checking by the hash of the entire file"
)]
@ -368,6 +368,15 @@ pub struct SameMusicArgs {
long_help = "Sets which rows must be equal to set this files as duplicates(may be mixed, but must be divided by commas)."
)]
pub music_similarity: MusicSimilarity,
#[clap(
short,
long,
default_value = "TAGS",
value_parser = parse_checking_method_same_music,
help = "Search method (CONTENT, TAGS)",
long_help = "Methods to search files.\nCONTENT - finds similar audio files by content, TAGS - finds similar images by tags, needs to set"
)]
pub search_method: CheckingMethod,
#[clap(flatten)]
pub file_to_save: FileToSave,
#[clap(flatten)]
@ -397,6 +406,53 @@ pub struct SameMusicArgs {
long_help = "Maximum size of checked files in bytes, assigning lower value may speed up searching"
)]
pub maximal_file_size: u64,
#[clap(
short = 'l',
long,
value_parser = parse_minimum_segment_duration,
default_value = "10.0",
help = "Maximum size in bytes",
long_help = "Minimum segment duration, smaller value will finds also shorter similar segments, which may increase false positives number"
)]
pub minimum_segment_duration: f32,
#[clap(
short = 'd',
long,
value_parser = parse_maximum_difference,
default_value = "2.0",
help = "Maximum difference between segments",
long_help = "Maximum difference between segments, 0.0 will find only identical segments, 10.0 will find also segments which are almost not similar at all"
)]
pub maximum_difference: f64,
}
fn parse_maximum_difference(src: &str) -> Result<f64, String> {
match src.parse::<f64>() {
Ok(maximum_difference) => {
if maximum_difference <= 0.0 {
Err("Maximum difference must be bigger than 0".to_string())
} else if maximum_difference >= 10.0 {
Err("Maximum difference must be smaller than 10.0".to_string())
} else {
Ok(maximum_difference)
}
}
Err(e) => Err(e.to_string()),
}
}
fn parse_minimum_segment_duration(src: &str) -> Result<f32, String> {
match src.parse::<f32>() {
Ok(minimum_segment_duration) => {
if minimum_segment_duration <= 0.0 {
Err("Minimum segment duration must be bigger than 0".to_string())
} else if minimum_segment_duration >= 3600.0 {
Err("Minimum segment duration must be smaller than 3600(greater values not have much sense)".to_string())
} else {
Ok(minimum_segment_duration)
}
}
Err(e) => Err(e.to_string()),
}
}
#[derive(Debug, clap::Args)]
@ -697,7 +753,7 @@ fn parse_tolerance(src: &str) -> Result<i32, &'static str> {
}
}
fn parse_checking_method(src: &str) -> Result<CheckingMethod, &'static str> {
fn parse_checking_method_duplicate(src: &str) -> Result<CheckingMethod, &'static str> {
match src.to_ascii_lowercase().as_str() {
"name" => Ok(CheckingMethod::Name),
"size" => Ok(CheckingMethod::Size),
@ -707,6 +763,14 @@ fn parse_checking_method(src: &str) -> Result<CheckingMethod, &'static str> {
}
}
fn parse_checking_method_same_music(src: &str) -> Result<CheckingMethod, &'static str> {
match src.to_ascii_lowercase().as_str() {
"tags" => Ok(CheckingMethod::AudioTags),
"content" => Ok(CheckingMethod::AudioContent),
_ => Err("Couldn't parse the searc method (allowed: TAGS, CONTENT)"),
}
}
fn parse_delete_method(src: &str) -> Result<DeleteMethod, &'static str> {
match src.to_ascii_lowercase().as_str() {
"none" => Ok(DeleteMethod::None),
@ -787,7 +851,7 @@ fn parse_image_hash_size(src: &str) -> Result<u8, String> {
}
fn parse_music_duplicate_type(src: &str) -> Result<MusicSimilarity, String> {
if src.is_empty() {
if src.trim().is_empty() {
return Ok(MusicSimilarity::NONE);
}
@ -795,22 +859,22 @@ fn parse_music_duplicate_type(src: &str) -> Result<MusicSimilarity, String> {
let parts: Vec<String> = src.split(',').map(|e| e.to_lowercase().replace('_', "")).collect();
if parts.iter().any(|e| e.contains("tracktitle")) {
if parts.contains(&"tracktitle".into()) {
similarity |= MusicSimilarity::TRACK_TITLE;
}
if parts.iter().any(|e| e.contains("trackartist")) {
if parts.contains(&"trackartist".into()) {
similarity |= MusicSimilarity::TRACK_ARTIST;
}
if parts.iter().any(|e| e.contains("year")) {
if parts.contains(&"year".into()) {
similarity |= MusicSimilarity::YEAR;
}
if parts.iter().any(|e| e.contains("bitrate")) {
if parts.contains(&"bitrate".into()) {
similarity |= MusicSimilarity::BITRATE;
}
if parts.iter().any(|e| e.contains("genre")) {
if parts.contains(&"genre".into()) {
similarity |= MusicSimilarity::GENRE;
}
if parts.iter().any(|e| e.contains("length")) {
if parts.contains(&"length".into()) {
similarity |= MusicSimilarity::LENGTH;
}

@ -345,6 +345,9 @@ fn same_music(same_music: SameMusicArgs) {
maximal_file_size,
music_similarity,
dry_run,
minimum_segment_duration,
maximum_difference,
search_method,
} = same_music;
set_number_of_threads(thread_number.thread_number);
@ -362,6 +365,9 @@ fn same_music(same_music: SameMusicArgs) {
item.set_music_similarity(music_similarity);
item.set_delete_method(delete_method.delete_method);
item.set_dry_run(dry_run.dry_run);
item.set_minimum_segment_duration(minimum_segment_duration);
item.set_maximum_difference(maximum_difference);
item.set_check_type(search_method);
item.find_same_music(None, None);

@ -1,6 +1,5 @@
use std::collections::BTreeMap;
use std::{fs, mem};
use std::fs;
use std::io::Write;
use std::path::PathBuf;
@ -58,15 +57,15 @@ impl EmptyFolder {
fn optimize_folders(&mut self) {
let mut new_directory_folders: BTreeMap<PathBuf, FolderEntry> = Default::default();
for (name, folder_entry) in mem::take(&mut self.empty_folder_list) {
for (name, folder_entry) in &self.empty_folder_list {
match &folder_entry.parent_path {
Some(t) => {
if !self.empty_folder_list.contains_key(t) {
new_directory_folders.insert(name, folder_entry);
new_directory_folders.insert(name.clone(), folder_entry.clone());
}
}
None => {
new_directory_folders.insert(name, folder_entry);
new_directory_folders.insert(name.clone(), folder_entry.clone());
}
}
}

@ -125,7 +125,7 @@ impl SameMusic {
music_to_check: Default::default(),
approximate_comparison: true,
duplicated_music_entries_referenced: vec![],
check_type: CheckingMethod::AudioContent,
check_type: CheckingMethod::AudioTags,
hash_preset_config: Configuration::preset_test1(), // TODO allow to change this
minimum_segment_duration: 10.0,
maximum_difference: 2.0,

Loading…
Cancel
Save