From b6e93bd5cc28f366f11226b4dacae284c5592576 Mon Sep 17 00:00:00 2001 From: blob42 Date: Thu, 5 Oct 2023 01:47:33 +0200 Subject: [PATCH] add cli tests + refactor --- README.md | 7 +- src/lib.rs | 155 +----------------------------------- src/main.rs | 37 +++++++-- src/parse.rs | 159 +++++++++++++++++++++++++++++++++++++ src/{input.rs => stdin.rs} | 0 tests/cli.rs | 27 ++++++- tests/inputs/input1 | 28 ++----- 7 files changed, 227 insertions(+), 186 deletions(-) create mode 100644 src/parse.rs rename src/{input.rs => stdin.rs} (100%) diff --git a/README.md b/README.md index 7fedf50..00a322e 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,4 @@ -Very much work in progress. This is the first project in my Rust learning journey. -Expect a lot of changes and refactoring. +WIP pet project. # yargs @@ -8,7 +7,7 @@ the its ability to work on columns of text and allows for arbitrary commands to be applied per column in a similar way to `xargs`. The columns are called `fields`. The command to execute on each field is called -an `x-arg`. +an `y-arg`. # Usage @@ -40,7 +39,7 @@ input: /long/path/to/some/ebook.pdf | Title Of Ebook ____ | - example usage: | x:arg + example usage: | y-arg -------------- | ---------------- yargs 'basename {}' "awk { print $1 }" diff --git a/src/lib.rs b/src/lib.rs index 397fd5a..d6cbb9f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,106 +1,21 @@ //DEBUG: #![allow(dead_code)] -use regex::Regex; +use anyhow::Result; use std::ops::Deref; -use anyhow::{anyhow, Result}; +use parse::split_columns; -pub mod input; +pub mod stdin; +pub mod parse; pub const DEFAULT_SEP_PATTERN: &str = r"[\t]+"; type Column = Vec; -// type Columns = Vec; #[derive(Clone, Debug)] pub struct Columns(Vec); -/// split input text into columns based on separator character -/// returns a type representing a variable length array of strings (columns) ? -/// -/// TODO: -/// -///  error handling -///  accept &str and String -/// - -pub struct InputText<'a> { - raw: &'a str, - - sep: String, -} - -impl<'a> InputText<'a> { - - pub fn new(raw: &'a str, sep: &str) -> Self { - InputText { - raw: raw.into(), - sep: sep.into() - } - } - - pub fn n_cols(&self) -> Result { - // read the first line stripping empty lines - let lines: Vec<&str> = self.raw.trim().lines().collect(); - // eprintln!("lines: {:?}", lines); - - let re = Regex::new(&self.sep).unwrap(); - - // count number of columns - match lines.first() { - Some(line) => Ok(re.split(line).count()), - None => return Err(anyhow!("no lines left")), - } - } -} - -/// Return the number of columns given input text and a separator -pub fn n_columns(text: &str, sep: &str) -> Result { - // read the first line stripping empty lines - let lines: Vec<&str> = text.trim().lines().collect(); - // eprintln!("lines: {:?}", lines); - - let re = Regex::new(sep).unwrap(); - - // count number of columns - match lines.first() { - Some(line) => Ok(re.split(line).count()), - None => return Err(anyhow!("no lines left")), - } -} - -pub fn split_columns(text: &str, sep: &str) -> Result { - let re = Regex::new(sep).unwrap(); - - // eprintln!("# columns: {n_col}"); - let lines: Vec<&str> = text.trim().lines().collect(); - - let n_col = n_columns(text, sep)?; - let mut columns = vec![Column::new(); n_col]; - - for (n, line) in lines.iter().enumerate() { - // eprintln!("checking line {}", i); - - let new_n_col = re.split(line).count(); - - if new_n_col != n_col { - return Err(anyhow!( - "unmatched column: expected {n_col} got {new_n_col} on line {}", - n + 1 - )); - } - // eprintln!("number of columns: {}", columns.len()); - - for (c_idx, col) in re.split(line).enumerate() { - columns[c_idx].push(col.to_string()) - } - } - - eprintln!("{:?}", columns); - - Ok(Columns(columns)) -} impl Deref for Columns { type Target = Vec>; @@ -120,65 +35,3 @@ impl TryFrom<&str> for Columns { } } -#[cfg(test)] -mod tests { - use crate::{Columns, split_columns, DEFAULT_SEP_PATTERN}; - use crate::Regex; - use std::error::Error; - - type TestResult = Result<(), Box>; - - #[test] - fn test_split_columns_default_sep() -> TestResult { - let coltext1 = " -file1.txt\t\ttitle1 -file2.pdf\t\ttitle2 -file3\t\t\ttitle3 -file with space \textra - "; - let columns = split_columns(coltext1, DEFAULT_SEP_PATTERN)?; - - // should have two columns - assert_eq!(2, columns.clone().len()); - - assert_eq!( - vec!["file1.txt", "file2.pdf", "file3", "file with space "], - columns[0] - ); - Ok(()) - } - - #[test] - #[should_panic] - fn test_wrong_ncol_default_sep() { - let coltext1 = " -file1.txt\t\ttitle1 -file2.pdf\t\ttitle2 -file3\t\t\ttitle3 -file with space\ttitle 4\textra - "; - split_columns(coltext1, DEFAULT_SEP_PATTERN).unwrap(); - } - - // #[test] - fn test_re_split() { - let text = "this is two tabs"; - let re = Regex::new(r"[\t]+").unwrap(); - let fields: Vec<&str> = re.split(text).collect(); - eprintln!("{:?}", fields); - assert!(false); - } - - #[test] - fn test_columns_from_str() { - let res: Columns = "first column\tsecond column\t\tthird column" - .try_into() - .unwrap(); - assert_eq!(res.len(), 3); - } - - #[test] - fn test_input_text(){ - // it = - } -} diff --git a/src/main.rs b/src/main.rs index b31e948..ae3bd2c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -9,9 +9,11 @@ #![allow(unused_imports)] use clap::{Parser,CommandFactory}; use clap::error::ErrorKind; -use yargs::{DEFAULT_SEP_PATTERN, split_columns, input}; +use yargs::{DEFAULT_SEP_PATTERN, stdin}; +use yargs::parse::InputText; use anyhow::Result; use std::io::{BufRead, Read, BufReader, stdin}; +use std::process; #[derive(Parser)] @@ -28,10 +30,10 @@ use std::io::{BufRead, Read, BufReader, stdin}; #[command(author="blob42")] #[command(version="0.1")] struct Cli { - /// separator character used to split text into columns + /// Regex used to to split input into columns #[arg(default_value=DEFAULT_SEP_PATTERN)] #[arg(short)] - delimiter: Option, + delimiter: String, //TODO: // -f --field @@ -52,8 +54,10 @@ fn main() -> Result<()> { // process::exit(1); // } + if cli.verbose > 0 { - println!("{:?}", cli); + eprintln!("======\nDEBUG:\n"); + eprintln!("{:?}", cli); for cmd in &cli.yargs { println!("- {}", cmd); @@ -75,18 +79,35 @@ fn main() -> Result<()> { // Read commands as positional args // Read input from stdin - let raw_input = input::read_stdin()?; - let input_text = yargs::InputText::new(&raw_input, yargs::DEFAULT_SEP_PATTERN); + let raw_input = stdin::read_stdin()?; + let input_text = InputText::new(&raw_input, &cli.delimiter); + + let n_cols = match input_text.n_cols() { + Err(e) => { + eprintln!("error parsing input: {}", e); + process::exit(1) + }, + Ok(n) => n, + }; // Check that n args <= input cols if cli.yargs.len() > input_text.n_cols()? { - panic!("too many arguments"); + // panic!("too many arguments"); + eprint!("too many arguments for delimiter={:?}", input_text.sep); + process::exit(1); + } + + if cli.verbose > 0 { + eprintln!("detected {n_cols} colunms"); + eprintln!("======"); } - // assert_eq!(input_text.n_cols()?, cli.yargs.len()); + + assert!(input_text.n_cols()? >= cli.yargs.len()); + // TODO: RESULT print!("{}", raw_input); diff --git a/src/parse.rs b/src/parse.rs new file mode 100644 index 0000000..fbe5569 --- /dev/null +++ b/src/parse.rs @@ -0,0 +1,159 @@ + +use super::{Columns, Column}; +use anyhow::{anyhow, Result}; +use regex::Regex; + +/// split input text into columns based on separator character +/// returns a type representing a variable length array of strings (columns) ? +/// +/// TODO: +/// +///  error handling +///  accept &str and String +/// + +#[derive(Debug)] +pub struct InputText<'a> { + raw: &'a str, + + pub sep: String, +} + +impl<'a> InputText<'a> { + + pub fn new(raw: &'a str, sep: &str) -> Self { + InputText { + raw: raw.into(), + sep: sep.into() + } + } + + pub fn n_cols(&self) -> Result { + // read the first line stripping empty lines + let lines: Vec<&str> = self.raw.trim().lines().collect(); + // eprintln!("lines: {:?}", lines); + + let re = Regex::new(&self.sep).unwrap(); + + // count number of columns + match lines.first() { + Some(line) => Ok(re.split(line).count()), + None => Ok(0) + } + } + + pub fn len(self) -> usize { + self.raw.len() + } +} + +/// Return the number of columns given input text and a separator +pub fn n_columns(text: &str, sep: &str) -> Result { + // read the first line stripping empty lines + let lines: Vec<&str> = text.trim().lines().collect(); + // eprintln!("lines: {:?}", lines); + + let re = Regex::new(sep).unwrap(); + + // count number of columns + match lines.first() { + Some(line) => Ok(re.split(line).count()), + None => return Err(anyhow!("no lines left")), + } +} + +pub fn split_columns(text: &str, sep: &str) -> Result { + let re = Regex::new(sep).unwrap(); + + // eprintln!("# columns: {n_col}"); + let lines: Vec<&str> = text.trim().lines().collect(); + + let n_col = n_columns(text, sep)?; + let mut columns = vec![Column::new(); n_col]; + + for (n, line) in lines.iter().enumerate() { + // eprintln!("checking line {}", i); + + let new_n_col = re.split(line).count(); + + if new_n_col != n_col { + return Err(anyhow!( + "unmatched column: expected {n_col} got {new_n_col} on line {}", + n + 1 + )); + } + // eprintln!("number of columns: {}", columns.len()); + + for (c_idx, col) in re.split(line).enumerate() { + columns[c_idx].push(col.to_string()) + } + } + + eprintln!("{:?}", columns); + + Ok(Columns(columns)) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::DEFAULT_SEP_PATTERN; + use regex::Regex; + use std::error::Error; + + type TestResult = Result<(), Box>; + + #[test] + fn test_split_columns_default_sep() -> TestResult { + let coltext1 = " +file1.txt\t\ttitle1 +file2.pdf\t\ttitle2 +file3\t\t\ttitle3 +file with space \textra + "; + let columns = split_columns(coltext1, DEFAULT_SEP_PATTERN)?; + + // should have two columns + assert_eq!(2, columns.clone().len()); + + assert_eq!( + vec!["file1.txt", "file2.pdf", "file3", "file with space "], + columns[0] + ); + Ok(()) + } + + #[test] + #[should_panic] + fn test_wrong_ncol_default_sep() { + let coltext1 = " +file1.txt\t\ttitle1 +file2.pdf\t\ttitle2 +file3\t\t\ttitle3 +file with space\ttitle 4\textra + "; + split_columns(coltext1, DEFAULT_SEP_PATTERN).unwrap(); + } + + // #[test] + fn test_re_split() { + let text = "this is two tabs"; + let re = Regex::new(r"[\t]+").unwrap(); + let fields: Vec<&str> = re.split(text).collect(); + eprintln!("{:?}", fields); + assert!(false); + } + + #[test] + fn test_columns_from_str() { + let res: Columns = "first column\tsecond column\t\tthird column" + .try_into() + .unwrap(); + assert_eq!(res.len(), 3); + } + + #[test] + fn test_input_text(){ + // it = + } +} diff --git a/src/input.rs b/src/stdin.rs similarity index 100% rename from src/input.rs rename to src/stdin.rs diff --git a/tests/cli.rs b/tests/cli.rs index 16f8226..2267127 100644 --- a/tests/cli.rs +++ b/tests/cli.rs @@ -8,19 +8,23 @@ use std::fs::read_to_string; type TestResult = Result<(), Box>; +// empty stdin should return an empty line #[test] fn pass(){ let mut cmd = Command::cargo_bin("yargs").unwrap(); - cmd.assert().success(); + let assert = cmd + .write_stdin("") + .assert(); + assert.stdout(""); } +#[test] // input with many columns // no positional arguments // behaves like cat -#[test] -fn pass_noargs() -> TestResult { +fn pass_columns_no_args() -> TestResult { let input = Path::new("tests/inputs/input1"); let mut cmd = Command::cargo_bin("yargs").unwrap(); @@ -31,3 +35,20 @@ fn pass_noargs() -> TestResult { Ok(()) } + +#[test] +// should if more yargs provided than detected columns +fn fail_yargs_mismatch1() -> TestResult { + let input = Path::new("tests/inputs/input1"); + + let mut cmd = Command::cargo_bin("yargs").unwrap(); + + let assert = cmd + .args(&["one", "two"]) + .pipe_stdin(input)? + .assert(); + assert.failure(); + Ok(()) +} + + diff --git a/tests/inputs/input1 b/tests/inputs/input1 index 6fc0b8e..aaac8ed 100644 --- a/tests/inputs/input1 +++ b/tests/inputs/input1 @@ -1,20 +1,8 @@ -.rw-r--r-- 894 root 9 Sep 22:19 coredump.conf -.rw-r--r-- 540 root 20 Sep 2020 homed.conf -.rw-r--r-- 766 root 9 Sep 22:19 homed.conf.pacnew -.rw-r--r-- 894 root 9 Sep 22:19 journal-remote.conf -.rw-r--r-- 822 root 9 Sep 22:19 journal-upload.conf -.rw-r--r-- 1.3k root 9 Sep 22:19 journald.conf -.rw-r--r-- 1.1k root 24 Feb 2022 logind.conf -.rw-r--r-- 1.5k root 9 Sep 22:19 logind.conf.pacnew -drwxr-xr-x - root 25 Jan 18:33 network -.rw-r--r-- 872 root 9 Sep 22:19 networkd.conf -.rw-r--r-- 804 root 9 Sep 22:19 oomd.conf -.rw-r--r-- 670 root 9 Sep 22:19 pstore.conf -.rw-r--r-- 763 root 19 Aug 2020 resolved.conf -.rw-r--r-- 1.6k root 9 Sep 22:19 resolved.conf.pacnew -.rw-r--r-- 953 root 9 Sep 22:19 sleep.conf -drwxr-xr-x - root 2 Feb 18:04 system -.rw-r--r-- 2.2k root 9 Sep 22:19 system.conf -.rw-r--r-- 856 root 9 Sep 22:19 timesyncd.conf -drwxr-xr-x - root 9 Nov 2022 user -.rw-r--r-- 1.6k root 9 Sep 22:19 user.conf +.rw-rw-r-- 11k blob42 21 Sep 23:25 Cargo.lock +.rw-rw-r-- 303 blob42 21 Sep 23:25 Cargo.toml +drwxrwxr-x - blob42 10 Sep 12:27 examples +.rw-rw-r-- 1.6k blob42 1 Oct 17:24 README.md +drwxrwxr-x - blob42 21 Sep 23:31 src +drwxrwxr-x - blob42 1 Oct 17:26 target +drwxrwxr-x - blob42 21 Sep 00:44 tests +.rw-rw-r-- 222 blob42 17 Sep 19:03 TODO.md