diff --git a/Cargo.lock b/Cargo.lock index fa1f1f8..2194f24 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -59,11 +59,43 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "anyhow" +version = "1.0.75" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6" + +[[package]] +name = "assert_cmd" +version = "2.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88903cb14723e4d4003335bb7f8a14f27691649105346a0f0957466c096adfe6" +dependencies = [ + "anstyle", + "bstr", + "doc-comment", + "predicates", + "predicates-core", + "predicates-tree", + "wait-timeout", +] + +[[package]] +name = "bstr" +version = "1.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c2f7349907b712260e64b0afe2f84692af14a454be26187d9df565c7f69266a" +dependencies = [ + "memchr", + "regex-automata", + "serde", +] + [[package]] name = "clap" -version = "4.4.2" +version = "4.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a13b88d2c62ff462f88e4a121f17a82c1af05693a2f192b5c38d14de73c19f6" +checksum = "84ed82781cea27b43c9b106a979fe450a13a31aab0500595fb3fc06616de08e6" dependencies = [ "clap_builder", "clap_derive", @@ -105,18 +137,79 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" +[[package]] +name = "difflib" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8" + +[[package]] +name = "doc-comment" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" + +[[package]] +name = "either" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" + [[package]] name = "heck" version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" +[[package]] +name = "itertools" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +dependencies = [ + "either", +] + +[[package]] +name = "libc" +version = "0.2.148" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9cdc71e17332e86d2e1d38c1f99edcb6288ee11b815fb1a4b049eaa2114d369b" + [[package]] name = "memchr" version = "2.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f232d6ef707e1956a43342693d2a31e72989554d58299d7a88738cc95b0d35c" +[[package]] +name = "predicates" +version = "3.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dfc28575c2e3f19cb3c73b93af36460ae898d426eba6fc15b9bd2a5220758a0" +dependencies = [ + "anstyle", + "difflib", + "itertools", + "predicates-core", +] + +[[package]] +name = "predicates-core" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b794032607612e7abeb4db69adb4e33590fa6cf1149e95fd7cb00e634b92f174" + +[[package]] +name = "predicates-tree" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368ba315fb8c5052ab692e68a0eefec6ec57b23a36959c14496f0b0df2c0cecf" +dependencies = [ + "predicates-core", + "termtree", +] + [[package]] name = "proc-macro2" version = "1.0.66" @@ -164,6 +257,26 @@ version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" +[[package]] +name = "serde" +version = "1.0.188" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf9e0fcba69a370eed61bcf2b728575f726b50b55cba78064753d708ddc7549e" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.188" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "strsim" version = "0.10.0" @@ -181,6 +294,12 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "termtree" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76" + [[package]] name = "unicode-ident" version = "1.0.11" @@ -193,6 +312,15 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" +[[package]] +name = "wait-timeout" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f200f5b12eb75f8c1ed65abd4b2db8a6e1b138a20de009dacee265a2498f3f6" +dependencies = [ + "libc", +] + [[package]] name = "windows-sys" version = "0.48.0" @@ -263,6 +391,8 @@ checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" name = "yargs" version = "0.1.0" dependencies = [ + "anyhow", + "assert_cmd", "clap", "regex", ] diff --git a/Cargo.toml b/Cargo.toml index 2e2db2c..0ca558f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,5 +6,9 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +anyhow = "1.0.75" clap = { version = "4.0.26", features = ["derive"] } regex = "1.9.1" + +[dev-dependencies] +assert_cmd = "2.0.12" diff --git a/README.md b/README.md index 6190c86..7fedf50 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,12 @@ foo_cmd | yargs -f1 'basename {}' -f2 'awk { print $1 }' foo_cmd | yargs 'basename {}' 'awk { print $2 }' ``` +3. skipping fields +```shell +foo_cmd | yargs 'basename {}' - 'awk { print $2 }' +# keeps the second field unchanged +``` + ## Example input: diff --git a/TODO.md b/TODO.md index c585e1b..43cb7c9 100644 --- a/TODO.md +++ b/TODO.md @@ -1,6 +1,6 @@ 1. parse cli parameters 2. read from stdin 3. split stdin into columns (column/awk commands) - 3. execute every field command on it's corresponding column - [ ] execute a command on first text column + 3. map (execute) commands to fields + - [ ] execute a command on first text column 4. print resulting concatenated columns diff --git a/examples/clap1.rs b/examples/clap1.rs new file mode 100644 index 0000000..37e7238 --- /dev/null +++ b/examples/clap1.rs @@ -0,0 +1,68 @@ +use std::path::PathBuf; + +use clap::{Parser, Subcommand}; + +#[derive(Parser)] +#[command(author, version, about, long_about = None)] +struct Cli { + /// Optional name to operate on + name: Option, + + /// Sets a custom config file + #[arg(short, long, value_name = "FILE")] + config: Option, + + /// Turn debugging information on + #[arg(short, long, action = clap::ArgAction::Count)] + debug: u8, + + #[command(subcommand)] + command: Option, +} + +#[derive(Subcommand)] +enum Commands { + /// does testing things + Test { + /// lists test values + #[arg(short, long)] + list: bool, + }, +} + +fn main() { + let cli = Cli::parse(); + + // You can check the value provided by positional arguments, or option arguments + if let Some(name) = cli.name.as_deref() { + println!("Value for name: {name}") + } + + if let Some(config_path) = cli.config.as_deref() { + println!("Value for config: {}", config_path.display()); + } + + // You can see how many times a particular flag or argument occurred + // Note, only flags can have multiple occurrences + match cli.debug { + 0 => println!("Debug mode is off"), + 1 => println!("Debug mode is kind of on"), + 2 => println!("Debug mode is on"), + _ => println!("Don't be crazy"), + } + + // You can check for the existence of subcommands, and if found use their + // matches just as you would the top level cmd + match &cli.command { + Some(Commands::Test { list }) => { + if *list { + println!("Printing testing lists..."); + } else { + println!("Not printing testing lists..."); + } + } + None => {} + } + + // Continued program logic goes here... +} diff --git a/examples/multipos.rs b/examples/multipos.rs new file mode 100644 index 0000000..b76bf8e --- /dev/null +++ b/examples/multipos.rs @@ -0,0 +1,53 @@ +// Using multiple positional arguments +// Detect dynamically how many positional arguments where passed and handle them +use std::path::PathBuf; + +use clap::{Parser}; + +#[derive(Parser)] +#[command(author, version, about, long_about = None)] +struct Cli { + + /// Sets a custom config file + #[arg(short, long, value_name = "FILE")] + config: Option, + + /// Turn debugging information on + #[arg(short, long, action = clap::ArgAction::Count)] + debug: u8, + + /// CMD to execute for each column of input text. 0 < N_CMD < NB_COLUMNS + #[arg( last = true )] + commands: Vec + +} + +fn main() { + let cli = Cli::parse(); + + // You can check the value provided by positional arguments, or option arguments + // if let Some(name) = cli.name.as_deref() { + // println!("Value for name: {name}") + // } + + if let Some(config_path) = cli.config.as_deref() { + println!("Value for config: {}", config_path.display()); + } + + // You can see how many times a particular flag or argument occurred + // Note, only flags can have multiple occurrences + match cli.debug { + 0 => println!("Debug mode is off"), + 1 => println!("Debug mode is kind of on"), + 2 => println!("Debug mode is on"), + 3 | 4 | 5 => println!("too much dude !"), + _ => println!("Don't be crazy"), + } + + + for c in cli.commands { + println!("{:?}", c); + } + + // Continued program logic goes here... +} diff --git a/src/errors.rs b/src/errors.rs new file mode 100644 index 0000000..8aa7022 --- /dev/null +++ b/src/errors.rs @@ -0,0 +1,6 @@ + +// struct YargsError; +// +// impl Error for YarrgsError { +// fn +// } diff --git a/src/input.rs b/src/input.rs new file mode 100644 index 0000000..d7f0c1b --- /dev/null +++ b/src/input.rs @@ -0,0 +1,28 @@ +use anyhow::Result; +use std::io::{BufReader, self, Read}; + + +// this will read and validate input from stdin +// TODO: make as iterator, avoid loading all stdin to memroy +pub fn read_stdin() -> Result> { + let mut r = BufReader::new(io::stdin()); + let mut buf = Box::new(String::new()); + r.read_to_string(&mut buf)?; + Ok(buf) +} +// +//NOTE: need deeper understanding of difference with prev implementation +// pub fn read_stdin() -> Result> { +// Ok(Box::new(BufReader::new(io::stdin()))) +// } + + + + +#[cfg(test)] +mod tests { + // #[test] + // fn read_stdin() { + // panic!() + // } +} diff --git a/src/lib.rs b/src/lib.rs index de25050..397fd5a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,18 +2,12 @@ #![allow(dead_code)] use regex::Regex; -use std::result; -use std::ops::Index; -use std::slice::SliceIndex; -use crate::parsing::DEFAULT_SEP_PATTERN; +use std::ops::Deref; +use anyhow::{anyhow, Result}; +pub mod input; -pub mod parsing { - - pub const DEFAULT_SEP_PATTERN: &str = r"[\t]+"; - -} - +pub const DEFAULT_SEP_PATTERN: &str = r"[\t]+"; type Column = Vec; // type Columns = Vec; @@ -21,8 +15,6 @@ type Column = Vec; #[derive(Clone, Debug)] pub struct Columns(Vec); -type Result = result::Result; - /// split input text into columns based on separator character /// returns a type representing a variable length array of strings (columns) ? @@ -32,21 +24,59 @@ type Result = result::Result; ///  error handling ///  accept &str and String /// -pub fn split_columns(text: &str, sep: &str) -> Result { + +pub struct InputText<'a> { + raw: &'a str, + + sep: String, +} + +impl<'a> InputText<'a> { + + pub fn new(raw: &'a str, sep: &str) -> Self { + InputText { + raw: raw.into(), + sep: sep.into() + } + } + + pub fn n_cols(&self) -> Result { + // read the first line stripping empty lines + let lines: Vec<&str> = self.raw.trim().lines().collect(); + // eprintln!("lines: {:?}", lines); + + let re = Regex::new(&self.sep).unwrap(); + + // count number of columns + match lines.first() { + Some(line) => Ok(re.split(line).count()), + None => return Err(anyhow!("no lines left")), + } + } +} + +/// Return the number of columns given input text and a separator +pub fn n_columns(text: &str, sep: &str) -> Result { // read the first line stripping empty lines let lines: Vec<&str> = text.trim().lines().collect(); // eprintln!("lines: {:?}", lines); let re = Regex::new(sep).unwrap(); - // count number of columns - let n_col = match lines.first() { - Some(line) => re.split(line).count(), - None => return Err(format!("no lines left")) - }; + // count number of columns + match lines.first() { + Some(line) => Ok(re.split(line).count()), + None => return Err(anyhow!("no lines left")), + } +} + +pub fn split_columns(text: &str, sep: &str) -> Result { + let re = Regex::new(sep).unwrap(); // eprintln!("# columns: {n_col}"); + let lines: Vec<&str> = text.trim().lines().collect(); + let n_col = n_columns(text, sep)?; let mut columns = vec![Column::new(); n_col]; for (n, line) in lines.iter().enumerate() { @@ -55,9 +85,10 @@ pub fn split_columns(text: &str, sep: &str) -> Result { let new_n_col = re.split(line).count(); if new_n_col != n_col { - return Err( - format!("unmatched column: expected {n_col} got {new_n_col} on line {}", n+1) - ) + return Err(anyhow!( + "unmatched column: expected {n_col} got {new_n_col} on line {}", + n + 1 + )); } // eprintln!("number of columns: {}", columns.len()); @@ -71,78 +102,83 @@ pub fn split_columns(text: &str, sep: &str) -> Result { Ok(Columns(columns)) } -impl Columns { +impl Deref for Columns { + type Target = Vec>; - //NOTE: is there a way to auto implement what's implemented in the wrapped type self.0 ? - fn len(&self) -> usize { - self.0.len() + fn deref(&self) -> &Vec> { + &self.0 } + } // build Columns from &str impl TryFrom<&str> for Columns { - type Error = String; + type Error = anyhow::Error; fn try_from(value: &str) -> Result { split_columns(value, DEFAULT_SEP_PATTERN) } } -// impl Index to allow indexing in our wrapped Vector -impl Index for Columns -where - I: SliceIndex<[Column]>, -{ - type Output = I::Output; +#[cfg(test)] +mod tests { + use crate::{Columns, split_columns, DEFAULT_SEP_PATTERN}; + use crate::Regex; + use std::error::Error; - fn index(&self, index: I) -> &Self::Output { - self.0.index(index) - } -} + type TestResult = Result<(), Box>; -#[test] -fn test_split_columns_default_sep(){ - let coltext1 = " + #[test] + fn test_split_columns_default_sep() -> TestResult { + let coltext1 = " file1.txt\t\ttitle1 file2.pdf\t\ttitle2 file3\t\t\ttitle3 file with space \textra "; - let columns = split_columns(coltext1, DEFAULT_SEP_PATTERN); + let columns = split_columns(coltext1, DEFAULT_SEP_PATTERN)?; - // should have two columns - assert_eq!(2, columns.clone().unwrap().len()); + // should have two columns + assert_eq!(2, columns.clone().len()); - assert_eq!(vec!["file1.txt", - "file2.pdf", - "file3", - "file with space " - ], columns.unwrap()[0]); -} + assert_eq!( + vec!["file1.txt", "file2.pdf", "file3", "file with space "], + columns[0] + ); + Ok(()) + } -#[test] -#[should_panic] -fn test_wrong_ncol_default_sep() { - let coltext1 = " + #[test] + #[should_panic] + fn test_wrong_ncol_default_sep() { + let coltext1 = " file1.txt\t\ttitle1 file2.pdf\t\ttitle2 file3\t\t\ttitle3 file with space\ttitle 4\textra "; - split_columns(coltext1, DEFAULT_SEP_PATTERN).unwrap(); -} + split_columns(coltext1, DEFAULT_SEP_PATTERN).unwrap(); + } -// #[test] -fn test_re_split() { - let text = "this is two tabs"; - let re = Regex::new(r"[\t]+").unwrap(); - let fields: Vec<&str> = re.split(text).collect(); - eprintln!("{:?}", fields); - assert!(false); -} + // #[test] + fn test_re_split() { + let text = "this is two tabs"; + let re = Regex::new(r"[\t]+").unwrap(); + let fields: Vec<&str> = re.split(text).collect(); + eprintln!("{:?}", fields); + assert!(false); + } + + #[test] + fn test_columns_from_str() { + let res: Columns = "first column\tsecond column\t\tthird column" + .try_into() + .unwrap(); + assert_eq!(res.len(), 3); + } -#[test] -fn test_columns_from_str() { - let res: Columns = "first column\tsecond column\t\tthird column".try_into().unwrap(); - assert_eq!(res.len(), 3); + #[test] + fn test_input_text(){ + // it = + } } diff --git a/src/main.rs b/src/main.rs index 345a475..b31e948 100644 --- a/src/main.rs +++ b/src/main.rs @@ -6,8 +6,13 @@ * . dynamically generate field parameters ? */ -use clap::Parser; -use yargs::parsing::DEFAULT_SEP_PATTERN; +#![allow(unused_imports)] +use clap::{Parser,CommandFactory}; +use clap::error::ErrorKind; +use yargs::{DEFAULT_SEP_PATTERN, split_columns, input}; +use anyhow::Result; +use std::io::{BufRead, Read, BufReader, stdin}; + #[derive(Parser)] /// yargs - map commands to columns of text input @@ -18,23 +23,28 @@ use yargs::parsing::DEFAULT_SEP_PATTERN; /// arguments. /// /// The first command is applied to the first column, the second command to the second column, etc. +#[derive(Debug)] #[command(name="yargs")] #[command(author="blob42")] #[command(version="0.1")] struct Cli { /// separator character used to split text into columns - #[arg(default_value_t=DEFAULT_SEP_PATTERN.to_owned())] + #[arg(default_value=DEFAULT_SEP_PATTERN)] #[arg(short)] - delimiter: String, + delimiter: Option, + + //TODO: + // -f --field + // skip fields with `-` #[arg(short, long, action = clap::ArgAction::Count)] - debug: u8, + verbose: u8, /// execute CMD each column of input. 0 < N_CMD < NB_COLUMNS - commands: Vec + yargs: Vec } -fn main() { +fn main() -> Result<()> { let cli = Cli::parse(); // if let None = cli.f1.as_deref() { @@ -42,12 +52,50 @@ fn main() { // process::exit(1); // } - if cli.debug > 0 { - println!("{:?}", cli.delimiter); + if cli.verbose > 0 { + println!("{:?}", cli); + + for cmd in &cli.yargs { + println!("- {}", cmd); + } } - for c in cli.commands { - println!("- {}", c); + + // input validation + // take input text, split_columns, nb yargs <= nb columns + // Validate that the number of positional args <= nb of text columns + // ex: input: hello foo bar + // -- + // possible ways to call the app: + // $ echo 'hello foo bar' | yargs cat rev 'tr -d b' + // $ echo 'hello foo bar' | yargs cat rev + // $ echo 'hello foo bar' | yargs cat + // let mut cmd = Cli::command(); + + // Read commands as positional args + + // Read input from stdin + let raw_input = input::read_stdin()?; + let input_text = yargs::InputText::new(&raw_input, yargs::DEFAULT_SEP_PATTERN); + + // Check that n args <= input cols + if cli.yargs.len() > input_text.n_cols()? { + panic!("too many arguments"); } + // assert_eq!(input_text.n_cols()?, cli.yargs.len()); + + + + + print!("{}", raw_input); + + + + + // + // cmd.error(ErrorKind::ValueValidation, "invalid") + // .exit() + // validate number of + Ok(()) } diff --git a/tests/cli.rs b/tests/cli.rs new file mode 100644 index 0000000..16f8226 --- /dev/null +++ b/tests/cli.rs @@ -0,0 +1,33 @@ +//TODO: + +use std::error::Error; +use assert_cmd::Command; +// use assert_cmd::prelude::*; +use std::path::Path; +use std::fs::read_to_string; + +type TestResult = Result<(), Box>; + +#[test] +fn pass(){ + let mut cmd = Command::cargo_bin("yargs").unwrap(); + cmd.assert().success(); +} + + + +// input with many columns +// no positional arguments +// behaves like cat +#[test] +fn pass_noargs() -> TestResult { + let input = Path::new("tests/inputs/input1"); + + let mut cmd = Command::cargo_bin("yargs").unwrap(); + let assert = cmd + .pipe_stdin(input)? + .assert(); + assert.stdout(read_to_string(input)?); + Ok(()) +} + diff --git a/tests/inputs/input1 b/tests/inputs/input1 new file mode 100644 index 0000000..6fc0b8e --- /dev/null +++ b/tests/inputs/input1 @@ -0,0 +1,20 @@ +.rw-r--r-- 894 root 9 Sep 22:19 coredump.conf +.rw-r--r-- 540 root 20 Sep 2020 homed.conf +.rw-r--r-- 766 root 9 Sep 22:19 homed.conf.pacnew +.rw-r--r-- 894 root 9 Sep 22:19 journal-remote.conf +.rw-r--r-- 822 root 9 Sep 22:19 journal-upload.conf +.rw-r--r-- 1.3k root 9 Sep 22:19 journald.conf +.rw-r--r-- 1.1k root 24 Feb 2022 logind.conf +.rw-r--r-- 1.5k root 9 Sep 22:19 logind.conf.pacnew +drwxr-xr-x - root 25 Jan 18:33 network +.rw-r--r-- 872 root 9 Sep 22:19 networkd.conf +.rw-r--r-- 804 root 9 Sep 22:19 oomd.conf +.rw-r--r-- 670 root 9 Sep 22:19 pstore.conf +.rw-r--r-- 763 root 19 Aug 2020 resolved.conf +.rw-r--r-- 1.6k root 9 Sep 22:19 resolved.conf.pacnew +.rw-r--r-- 953 root 9 Sep 22:19 sleep.conf +drwxr-xr-x - root 2 Feb 18:04 system +.rw-r--r-- 2.2k root 9 Sep 22:19 system.conf +.rw-r--r-- 856 root 9 Sep 22:19 timesyncd.conf +drwxr-xr-x - root 9 Nov 2022 user +.rw-r--r-- 1.6k root 9 Sep 22:19 user.conf