diff --git a/Cargo.lock b/Cargo.lock index 55406e6..a7edda7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,161 +4,150 @@ version = 3 [[package]] name = "aho-corasick" -version = "1.0.2" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43f6cb1bf222025340178f382c426f13757b2960e89779dfcb319c32542a5a41" +checksum = "0c378d78423fdad8089616f827526ee33c19f2fddbd5de1629152c9593ba4783" dependencies = [ "memchr", ] [[package]] -name = "atty" -version = "0.2.14" +name = "anstream" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +checksum = "b1f58811cfac344940f1a400b6e6231ce35171f614f26439e80f8c1465c5cc0c" dependencies = [ - "hermit-abi", - "libc", - "winapi", + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "utf8parse", ] [[package]] -name = "bitflags" -version = "1.3.2" +name = "anstyle" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +checksum = "15c4c2c83f81532e5845a733998b6971faca23490340a418e9b72a3ec9de12ea" [[package]] -name = "clap" -version = "4.0.26" +name = "anstyle-parse" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2148adefda54e14492fb9bddcc600b4344c5d1a3123bd666dcb939c6f0e0e57e" +checksum = "938874ff5980b03a87c5524b3ae5b59cf99b1d6bc836848df7bc5ada9643c333" dependencies = [ - "atty", - "bitflags", - "clap_derive", - "clap_lex", - "once_cell", - "strsim", - "termcolor", + "utf8parse", ] [[package]] -name = "clap_derive" -version = "4.0.21" +name = "anstyle-query" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0177313f9f02afc995627906bbd8967e2be069f5261954222dac78290c2b9014" +checksum = "5ca11d4be1bab0c8bc8734a9aa7bf4ee8316d462a08c6ac5052f888fef5b494b" dependencies = [ - "heck", - "proc-macro-error", - "proc-macro2", - "quote", - "syn", + "windows-sys", ] [[package]] -name = "clap_lex" -version = "0.3.0" +name = "anstyle-wincon" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d4198f73e42b4936b35b5bb248d81d2b595ecb170da0bac7655c54eedfa8da8" +checksum = "58f54d10c6dfa51283a066ceab3ec1ab78d13fae00aa49243a45e4571fb79dfd" dependencies = [ - "os_str_bytes", + "anstyle", + "windows-sys", ] [[package]] -name = "colmap" -version = "0.1.0" +name = "clap" +version = "4.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a13b88d2c62ff462f88e4a121f17a82c1af05693a2f192b5c38d14de73c19f6" dependencies = [ - "clap", - "regex", + "clap_builder", + "clap_derive", ] [[package]] -name = "heck" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9" - -[[package]] -name = "hermit-abi" -version = "0.1.19" +name = "clap_builder" +version = "4.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +checksum = "2bb9faaa7c2ef94b2743a21f5a29e6f0010dff4caa69ac8e9d6cf8b6fa74da08" dependencies = [ - "libc", + "anstream", + "anstyle", + "clap_lex", + "strsim", ] [[package]] -name = "libc" -version = "0.2.137" +name = "clap_derive" +version = "4.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc7fcc620a3bff7cdd7a365be3376c97191aeaccc2a603e600951e452615bf89" +checksum = "0862016ff20d69b84ef8247369fabf5c008a7417002411897d40ee1f4532b873" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] [[package]] -name = "memchr" -version = "2.5.0" +name = "clap_lex" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" +checksum = "cd7cc57abe963c6d3b9d8be5b06ba7c8957a930305ca90304f24ef040aa6f961" [[package]] -name = "once_cell" -version = "1.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86f0b0d4bf799edbc74508c1e8bf170ff5f41238e5f8225603ca7caaae2b7860" +name = "colmap" +version = "0.1.0" +dependencies = [ + "clap", + "regex", +] [[package]] -name = "os_str_bytes" -version = "6.4.0" +name = "colorchoice" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b5bf27447411e9ee3ff51186bf7a08e16c341efdde93f4d823e8844429bed7e" +checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" [[package]] -name = "proc-macro-error" -version = "1.0.4" +name = "heck" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" -dependencies = [ - "proc-macro-error-attr", - "proc-macro2", - "quote", - "syn", - "version_check", -] +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" [[package]] -name = "proc-macro-error-attr" -version = "1.0.4" +name = "memchr" +version = "2.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" -dependencies = [ - "proc-macro2", - "quote", - "version_check", -] +checksum = "8f232d6ef707e1956a43342693d2a31e72989554d58299d7a88738cc95b0d35c" [[package]] name = "proc-macro2" -version = "1.0.47" +version = "1.0.66" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ea3d908b0e36316caf9e9e2c4625cdde190a7e6f440d794667ed17a1855e725" +checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.21" +version = "1.0.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179" +checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" dependencies = [ "proc-macro2", ] [[package]] name = "regex" -version = "1.9.1" +version = "1.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2eae68fc220f7cf2532e4494aded17545fce192d59cd996e0fe7887f4ceb575" +checksum = "697061221ea1b4a94a624f67d0ae2bfe4e22b8a17b6a192afb11046542cc8c47" dependencies = [ "aho-corasick", "memchr", @@ -168,9 +157,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.3.3" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39354c10dd07468c2e73926b23bb9c2caca74c5501e38a35da70406f1d923310" +checksum = "c2f401f4955220693b56f8ec66ee9c78abffd8d1c4f23dc41a23839eb88f0795" dependencies = [ "aho-corasick", "memchr", @@ -179,9 +168,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.7.4" +version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2" +checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" [[package]] name = "strsim" @@ -191,9 +180,9 @@ checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" [[package]] name = "syn" -version = "1.0.103" +version = "2.0.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a864042229133ada95abf3b54fdc62ef5ccabe9515b64717bcb9a1919e59445d" +checksum = "718fa2415bcb8d8bd775917a1bf12a7931b6dfa890753378538118181e0cb398" dependencies = [ "proc-macro2", "quote", @@ -201,53 +190,79 @@ dependencies = [ ] [[package]] -name = "termcolor" -version = "1.1.3" +name = "unicode-ident" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755" -dependencies = [ - "winapi-util", -] +checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" [[package]] -name = "unicode-ident" -version = "1.0.5" +name = "utf8parse" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ceab39d59e4c9499d4e5a8ee0e2735b891bb7308ac83dfb4e80cad195c9f6f3" +checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" [[package]] -name = "version_check" -version = "0.9.4" +name = "windows-sys" +version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets", +] [[package]] -name = "winapi" -version = "0.3.9" +name = "windows-targets" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", ] [[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" +name = "windows_aarch64_gnullvm" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" [[package]] -name = "winapi-util" -version = "0.1.5" +name = "windows_aarch64_msvc" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" -dependencies = [ - "winapi", -] +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" [[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" +name = "windows_x86_64_msvc" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" diff --git a/src/input.rs b/src/input.rs index abc787a..713c2ec 100644 --- a/src/input.rs +++ b/src/input.rs @@ -1,13 +1,16 @@ //DEBUG: #![allow(dead_code)] -use std::fmt; use regex::Regex; +use std::result; + +pub const DEFAULT_SEP_PATTERN: &str = r"[\t]+"; -const DEFAULT_SEP: &str = r"[\t]+"; type Column = Vec; type Columns = Vec; +type Result = result::Result; + /// split input text into columns based on separator character /// returns a type representing a variable length array of strings (columns) ? @@ -17,32 +20,34 @@ type Columns = Vec; ///  error handling ///  accept &str and String /// -pub fn split_columns(text: &str, sep: &str) -> Result { +pub fn split_columns(text: &str, sep: &str) -> Result { // read the first line stripping empty lines let lines: Vec<&str> = text.trim().lines().collect(); - eprintln!("lines: {:?}", lines); + // eprintln!("lines: {:?}", lines); let re = Regex::new(sep).unwrap(); // count number of columns let n_col = match lines.first() { Some(line) => re.split(line).count(), - None => return Err(std::fmt::Error) + None => return Err(format!("no lines left")) }; // eprintln!("# columns: {n_col}"); let mut columns = vec![Column::new(); n_col]; - for (i, line) in lines.iter().enumerate() { - eprintln!("checking line {}", i); + for (n, line) in lines.iter().enumerate() { + // eprintln!("checking line {}", i); let new_n_col = re.split(line).count(); if new_n_col != n_col { - return Err(fmt::Error) + return Err( + format!("unmatched column: expected {n_col} got {new_n_col} on line {}", n+1) + ) } - eprintln!("number of columns: {}", columns.len()); + // eprintln!("number of columns: {}", columns.len()); for (c_idx, col) in re.split(line).enumerate() { columns[c_idx].push(col.to_string()) @@ -55,25 +60,37 @@ pub fn split_columns(text: &str, sep: &str) -> Result { } #[test] -fn test_split_columns(){ - let coltext1 = r###" -file1.txt title1 -file2.pdf title2 -file3 title3 -file with space title 4 - "###; +fn test_split_columns_default_sep(){ + let coltext1 = " +file1.txt\t\ttitle1 +file2.pdf\t\ttitle2 +file3\t\t\ttitle3 +file with space \textra + "; let columns = split_columns(coltext1, DEFAULT_SEP); // should have two columns assert_eq!(2, columns.clone().unwrap().len()); assert_eq!(vec!["file1.txt", - "file2.pdf", + "file2.pdf", "file3", - "file with space" + "file with space " ], columns.unwrap()[0]); } +#[test] +#[should_panic] +fn test_wrong_ncol_default_sep() { + let coltext1 = " +file1.txt\t\ttitle1 +file2.pdf\t\ttitle2 +file3\t\t\ttitle3 +file with space\ttitle 4\textra + "; + split_columns(coltext1, DEFAULT_SEP).unwrap(); +} + // #[test] fn test_re_split() { let text = "this is two tabs"; diff --git a/src/main.rs b/src/main.rs index 33deffe..2ce2c29 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,44 +5,32 @@ * . execute arbitrary shell commands to manipulate input * . dynamically generate field parameters ? */ - -use clap::Parser; -use std::process; - mod input; +use clap::Parser; +use input::DEFAULT_SEP_PATTERN; - -/// # Parsing parameters -/// . parsing x-args as field parameters (-f1 'x argument') -/// . detecting number of columns and x-args from positional arguments #[derive(Parser)] +/// colmap - map commands to columns of text input +/// +/// The colmap command reads text from stdin as columns. Each column is then passed to a command +/// specified by the user. Commands are mapped to specific columns using positional arguments. +/// +/// The first command is applied to the first column, the second command to the second column, etc. #[command(name="colmap")] #[command(author="blob42")] #[command(version="0.1")] -#[command(about = "execute commands on columns")] struct Cli { - /// separator character used to split text into columns - #[arg(default_value=" ")] + #[arg(default_value_t=DEFAULT_SEP_PATTERN.to_owned())] #[arg(short, long = "sep")] - separator: Option, - - #[arg(long, help="select field 1")] - f1: Option, - #[arg(long, help="select field 2")] - f2: Option, - #[arg(long, help="select field 3")] - f3: Option, - #[arg(long, help="select field 4")] - f4: Option, - #[arg(long, help="select field 5")] - f5: Option, - #[arg(long, help="select field 6")] - f6: Option, + separator: String, #[arg(short, long, action = clap::ArgAction::Count)] debug: u8, + + /// execute CMD each column of input. 0 < N_CMD < NB_COLUMNS + commands: Vec } fn main() { @@ -55,17 +43,17 @@ fn main() { // [ ] execute a command on first text column // 4. print resulting concatenated columns - - if let None = cli.f1.as_deref() { - eprintln!("no field --fX to operate on"); - process::exit(1); - } + // if let None = cli.f1.as_deref() { + // eprintln!("no field --fX to operate on"); + // process::exit(1); + // } if cli.debug > 0 { - println!("{:?}", cli.separator.unwrap()); - println!("{:?}", cli.f1.unwrap()); + println!("{:?}", cli.separator); } - + for c in cli.commands { + println!("- {}", c); + } }