From b6e93bd5cc28f366f11226b4dacae284c5592576 Mon Sep 17 00:00:00 2001
From: blob42 <contact@blob42.xyz>
Date: Thu, 5 Oct 2023 01:47:33 +0200
Subject: [PATCH] add cli tests + refactor

---
 README.md                  |   7 +-
 src/lib.rs                 | 155 +-----------------------------------
 src/main.rs                |  37 +++++++--
 src/parse.rs               | 159 +++++++++++++++++++++++++++++++++++++
 src/{input.rs => stdin.rs} |   0
 tests/cli.rs               |  27 ++++++-
 tests/inputs/input1        |  28 ++-----
 7 files changed, 227 insertions(+), 186 deletions(-)
 create mode 100644 src/parse.rs
 rename src/{input.rs => stdin.rs} (100%)
diff --git a/README.md b/README.md
index 7fedf50..00a322e 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,4 @@
-Very much work in progress. This is the first project in my Rust learning journey. 
-Expect a lot of changes and refactoring.
+WIP pet project.
 
 # yargs
 
@@ -8,7 +7,7 @@ the its ability to work on columns of text and allows for arbitrary
 commands to be applied per column in a similar way to `xargs`.
 
 The columns are called `fields`. The command to execute on each field is called
-an `x-arg`. 
+an `y-arg`. 
 
 # Usage
 
@@ -40,7 +39,7 @@ input:
     /long/path/to/some/ebook.pdf   | Title Of Ebook
                                      ____
                                        |
-    example usage:                     | x:arg
+    example usage:                     | y-arg
     --------------                     |                     
                           ----------------                       
     yargs 'basename {}' "awk { print $1 }"
diff --git a/src/lib.rs b/src/lib.rs
index 397fd5a..d6cbb9f 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,106 +1,21 @@
 //DEBUG:
 #![allow(dead_code)]
 
-use regex::Regex;
+use anyhow::Result;
 use std::ops::Deref;
-use anyhow::{anyhow, Result};
+use parse::split_columns;
 
-pub mod input;
+pub mod stdin;
+pub mod parse;
 
 pub const DEFAULT_SEP_PATTERN: &str = r"[\t]+";
 
 type Column = Vec<String>;
-// type Columns = Vec<Column>;
 
 #[derive(Clone, Debug)]
 pub struct Columns(Vec<Column>);
 
 
-/// split input text into columns based on separator character
-/// returns a type representing a variable length array of strings (columns) ?
-///
-/// TODO:
-///
-///   error handling
-///   accept &str and String
-///
-
-pub struct InputText<'a> {
-    raw: &'a str,
-
-    sep: String,
-}
-
-impl<'a> InputText<'a> {
-
-    pub fn new(raw: &'a str, sep: &str) -> Self {
-       InputText {
-           raw: raw.into(),
-           sep: sep.into()
-       } 
-    }
-
-    pub fn n_cols(&self) -> Result<usize> {
-        // read the first line stripping empty lines
-        let lines: Vec<&str> = self.raw.trim().lines().collect();
-        // eprintln!("lines: {:?}", lines);
-
-        let re = Regex::new(&self.sep).unwrap();
-
-        // count number of columns
-        match lines.first() {
-            Some(line) => Ok(re.split(line).count()),
-            None => return Err(anyhow!("no lines left")),
-        }
-    }
-}
-
-/// Return the number of columns given input text and a separator
-pub fn n_columns(text: &str, sep: &str) -> Result<usize> {
-    // read the first line stripping empty lines
-    let lines: Vec<&str> = text.trim().lines().collect();
-    // eprintln!("lines: {:?}", lines);
-
-    let re = Regex::new(sep).unwrap();
-
-    // count number of columns
-    match lines.first() {
-        Some(line) => Ok(re.split(line).count()),
-        None => return Err(anyhow!("no lines left")),
-    }
-}
-
-pub fn split_columns(text: &str, sep: &str) -> Result<Columns> {
-    let re = Regex::new(sep).unwrap();
-
-    // eprintln!("# columns: {n_col}");
-    let lines: Vec<&str> = text.trim().lines().collect();
-
-    let n_col = n_columns(text, sep)?;
-    let mut columns = vec![Column::new(); n_col];
-
-    for (n, line) in lines.iter().enumerate() {
-        // eprintln!("checking line {}", i);
-
-        let new_n_col = re.split(line).count();
-
-        if new_n_col != n_col {
-            return Err(anyhow!(
-                "unmatched column: expected {n_col} got {new_n_col} on line {}",
-                n + 1
-            ));
-        }
-        // eprintln!("number of columns: {}", columns.len());
-
-        for (c_idx, col) in re.split(line).enumerate() {
-            columns[c_idx].push(col.to_string())
-        }
-    }
-
-    eprintln!("{:?}", columns);
-
-    Ok(Columns(columns))
-}
 
 impl Deref for Columns {
     type Target = Vec<Vec<String>>;
@@ -120,65 +35,3 @@ impl TryFrom<&str> for Columns {
     }
 }
 
-#[cfg(test)]
-mod tests {
-    use crate::{Columns, split_columns, DEFAULT_SEP_PATTERN};
-    use crate::Regex;
-    use std::error::Error;
-
-    type TestResult = Result<(), Box<dyn Error>>;
-
-    #[test]
-    fn test_split_columns_default_sep() -> TestResult {
-        let coltext1 = "
-file1.txt\t\ttitle1
-file2.pdf\t\ttitle2
-file3\t\t\ttitle3
-file with space \textra
-        ";
-        let columns = split_columns(coltext1, DEFAULT_SEP_PATTERN)?;
-
-        // should have two columns
-        assert_eq!(2, columns.clone().len());
-
-        assert_eq!(
-            vec!["file1.txt", "file2.pdf", "file3", "file with space "],
-            columns[0]
-        );
-        Ok(())
-    }
-
-    #[test]
-    #[should_panic]
-    fn test_wrong_ncol_default_sep() {
-        let coltext1 = "
-file1.txt\t\ttitle1
-file2.pdf\t\ttitle2
-file3\t\t\ttitle3
-file with space\ttitle 4\textra
-        ";
-        split_columns(coltext1, DEFAULT_SEP_PATTERN).unwrap();
-    }
-
-    // #[test]
-    fn test_re_split() {
-        let text = "this is		two tabs";
-        let re = Regex::new(r"[\t]+").unwrap();
-        let fields: Vec<&str> = re.split(text).collect();
-        eprintln!("{:?}", fields);
-        assert!(false);
-    }
-
-    #[test]
-    fn test_columns_from_str() {
-        let res: Columns = "first column\tsecond column\t\tthird column"
-            .try_into()
-            .unwrap();
-        assert_eq!(res.len(), 3);
-    }
-
-    #[test]
-    fn test_input_text(){
-        // it =
-    }
-}
diff --git a/src/main.rs b/src/main.rs
index b31e948..ae3bd2c 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -9,9 +9,11 @@
 #![allow(unused_imports)]
 use clap::{Parser,CommandFactory};
 use clap::error::ErrorKind;
-use yargs::{DEFAULT_SEP_PATTERN, split_columns, input};
+use yargs::{DEFAULT_SEP_PATTERN, stdin};
+use yargs::parse::InputText;
 use anyhow::Result;
 use std::io::{BufRead, Read, BufReader, stdin};
+use std::process;
 
 
 #[derive(Parser)]
@@ -28,10 +30,10 @@ use std::io::{BufRead, Read, BufReader, stdin};
 #[command(author="blob42")]
 #[command(version="0.1")]
 struct Cli {
-    /// separator character used to split text into columns
+    /// Regex used to  to split input into columns
     #[arg(default_value=DEFAULT_SEP_PATTERN)]
     #[arg(short)]
-    delimiter: Option<String>,
+    delimiter: String,
 
     //TODO:
     // -f --field
@@ -52,8 +54,10 @@ fn main() -> Result<()> {
     //     process::exit(1);
     // }
 
+
     if cli.verbose > 0 {
-        println!("{:?}", cli);
+        eprintln!("======\nDEBUG:\n");
+        eprintln!("{:?}", cli);
 
         for cmd in &cli.yargs {
             println!("- {}", cmd);
@@ -75,18 +79,35 @@ fn main() -> Result<()> {
     // Read commands as positional args
 
     // Read input from stdin
-    let raw_input = input::read_stdin()?;
-    let input_text = yargs::InputText::new(&raw_input, yargs::DEFAULT_SEP_PATTERN);
+    let raw_input = stdin::read_stdin()?;
+    let input_text = InputText::new(&raw_input, &cli.delimiter);
+
+    let n_cols = match input_text.n_cols() {
+        Err(e) => {
+            eprintln!("error parsing input: {}", e);
+            process::exit(1)
+        },
+        Ok(n) => n,
+    };
 
     // Check that n args <= input cols
     if cli.yargs.len() > input_text.n_cols()? {
-        panic!("too many arguments");
+        // panic!("too many arguments");
+        eprint!("too many arguments for delimiter={:?}", input_text.sep);
+        process::exit(1);
+    }
+
+    if cli.verbose > 0 {
+        eprintln!("detected {n_cols} colunms");
+        eprintln!("======");
     }
-    // assert_eq!(input_text.n_cols()?, cli.yargs.len());
+
+    assert!(input_text.n_cols()? >= cli.yargs.len());
     
 
 
 
+    // TODO: RESULT
     print!("{}", raw_input);
 
 
diff --git a/src/parse.rs b/src/parse.rs
new file mode 100644
index 0000000..fbe5569
--- /dev/null
+++ b/src/parse.rs
@@ -0,0 +1,159 @@
+
+use super::{Columns, Column};
+use anyhow::{anyhow, Result};
+use regex::Regex;
+
+/// split input text into columns based on separator character
+/// returns a type representing a variable length array of strings (columns) ?
+///
+/// TODO:
+///
+///   error handling
+///   accept &str and String
+///
+
+#[derive(Debug)]
+pub struct InputText<'a> {
+    raw: &'a str,
+
+    pub sep: String,
+}
+
+impl<'a> InputText<'a> {
+
+    pub fn new(raw: &'a str, sep: &str) -> Self {
+       InputText {
+           raw: raw.into(),
+           sep: sep.into()
+       } 
+    }
+
+    pub fn n_cols(&self) -> Result<usize> {
+        // read the first line stripping empty lines
+        let lines: Vec<&str> = self.raw.trim().lines().collect();
+        // eprintln!("lines: {:?}", lines);
+
+        let re = Regex::new(&self.sep).unwrap();
+
+        // count number of columns
+        match lines.first() {
+            Some(line) => Ok(re.split(line).count()),
+            None => Ok(0)
+        }
+    }
+
+    pub fn len(self) -> usize {
+        self.raw.len()
+    }
+}
+
+/// Return the number of columns given input text and a separator
+pub fn n_columns(text: &str, sep: &str) -> Result<usize> {
+    // read the first line stripping empty lines
+    let lines: Vec<&str> = text.trim().lines().collect();
+    // eprintln!("lines: {:?}", lines);
+
+    let re = Regex::new(sep).unwrap();
+
+    // count number of columns
+    match lines.first() {
+        Some(line) => Ok(re.split(line).count()),
+        None => return Err(anyhow!("no lines left")),
+    }
+}
+
+pub fn split_columns(text: &str, sep: &str) -> Result<Columns> {
+    let re = Regex::new(sep).unwrap();
+
+    // eprintln!("# columns: {n_col}");
+    let lines: Vec<&str> = text.trim().lines().collect();
+
+    let n_col = n_columns(text, sep)?;
+    let mut columns = vec![Column::new(); n_col];
+
+    for (n, line) in lines.iter().enumerate() {
+        // eprintln!("checking line {}", i);
+
+        let new_n_col = re.split(line).count();
+
+        if new_n_col != n_col {
+            return Err(anyhow!(
+                "unmatched column: expected {n_col} got {new_n_col} on line {}",
+                n + 1
+            ));
+        }
+        // eprintln!("number of columns: {}", columns.len());
+
+        for (c_idx, col) in re.split(line).enumerate() {
+            columns[c_idx].push(col.to_string())
+        }
+    }
+
+    eprintln!("{:?}", columns);
+
+    Ok(Columns(columns))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::DEFAULT_SEP_PATTERN;
+    use regex::Regex;
+    use std::error::Error;
+
+    type TestResult = Result<(), Box<dyn Error>>;
+
+    #[test]
+    fn test_split_columns_default_sep() -> TestResult {
+        let coltext1 = "
+file1.txt\t\ttitle1
+file2.pdf\t\ttitle2
+file3\t\t\ttitle3
+file with space \textra
+        ";
+        let columns = split_columns(coltext1, DEFAULT_SEP_PATTERN)?;
+
+        // should have two columns
+        assert_eq!(2, columns.clone().len());
+
+        assert_eq!(
+            vec!["file1.txt", "file2.pdf", "file3", "file with space "],
+            columns[0]
+        );
+        Ok(())
+    }
+
+    #[test]
+    #[should_panic]
+    fn test_wrong_ncol_default_sep() {
+        let coltext1 = "
+file1.txt\t\ttitle1
+file2.pdf\t\ttitle2
+file3\t\t\ttitle3
+file with space\ttitle 4\textra
+        ";
+        split_columns(coltext1, DEFAULT_SEP_PATTERN).unwrap();
+    }
+
+    // #[test]
+    fn test_re_split() {
+        let text = "this is		two tabs";
+        let re = Regex::new(r"[\t]+").unwrap();
+        let fields: Vec<&str> = re.split(text).collect();
+        eprintln!("{:?}", fields);
+        assert!(false);
+    }
+
+    #[test]
+    fn test_columns_from_str() {
+        let res: Columns = "first column\tsecond column\t\tthird column"
+            .try_into()
+            .unwrap();
+        assert_eq!(res.len(), 3);
+    }
+
+    #[test]
+    fn test_input_text(){
+        // it =
+    }
+}
diff --git a/src/input.rs b/src/stdin.rs
similarity index 100%
rename from src/input.rs
rename to src/stdin.rs
diff --git a/tests/cli.rs b/tests/cli.rs
index 16f8226..2267127 100644
--- a/tests/cli.rs
+++ b/tests/cli.rs
@@ -8,19 +8,23 @@ use std::fs::read_to_string;
 
 type TestResult = Result<(), Box<dyn Error>>;
 
+// empty stdin should return an empty line
 #[test]
 fn pass(){
     let mut cmd = Command::cargo_bin("yargs").unwrap();
-    cmd.assert().success();
+    let assert = cmd
+        .write_stdin("")
+        .assert();
+    assert.stdout("");
 }
 
 
 
+#[test]
 // input with many columns
 // no positional arguments
 // behaves like cat
-#[test]
-fn pass_noargs() -> TestResult {
+fn pass_columns_no_args() -> TestResult {
     let input = Path::new("tests/inputs/input1");
 
     let mut cmd = Command::cargo_bin("yargs").unwrap();
@@ -31,3 +35,20 @@ fn pass_noargs() -> TestResult {
     Ok(())
 }
 
+
+#[test]
+// should if more yargs provided than detected columns
+fn fail_yargs_mismatch1() -> TestResult {
+    let input = Path::new("tests/inputs/input1");
+
+    let mut cmd = Command::cargo_bin("yargs").unwrap();
+
+    let assert = cmd
+        .args(&["one", "two"])
+        .pipe_stdin(input)?
+        .assert();
+    assert.failure();
+    Ok(())
+}
+
+
diff --git a/tests/inputs/input1 b/tests/inputs/input1
index 6fc0b8e..aaac8ed 100644
--- a/tests/inputs/input1
+++ b/tests/inputs/input1
@@ -1,20 +1,8 @@
-.rw-r--r--  894 root  9 Sep 22:19 coredump.conf
-.rw-r--r--  540 root 20 Sep  2020 homed.conf
-.rw-r--r--  766 root  9 Sep 22:19 homed.conf.pacnew
-.rw-r--r--  894 root  9 Sep 22:19 journal-remote.conf
-.rw-r--r--  822 root  9 Sep 22:19 journal-upload.conf
-.rw-r--r-- 1.3k root  9 Sep 22:19 journald.conf
-.rw-r--r-- 1.1k root 24 Feb  2022 logind.conf
-.rw-r--r-- 1.5k root  9 Sep 22:19 logind.conf.pacnew
-drwxr-xr-x    - root 25 Jan 18:33 network
-.rw-r--r--  872 root  9 Sep 22:19 networkd.conf
-.rw-r--r--  804 root  9 Sep 22:19 oomd.conf
-.rw-r--r--  670 root  9 Sep 22:19 pstore.conf
-.rw-r--r--  763 root 19 Aug  2020 resolved.conf
-.rw-r--r-- 1.6k root  9 Sep 22:19 resolved.conf.pacnew
-.rw-r--r--  953 root  9 Sep 22:19 sleep.conf
-drwxr-xr-x    - root  2 Feb 18:04 system
-.rw-r--r-- 2.2k root  9 Sep 22:19 system.conf
-.rw-r--r--  856 root  9 Sep 22:19 timesyncd.conf
-drwxr-xr-x    - root  9 Nov  2022 user
-.rw-r--r-- 1.6k root  9 Sep 22:19 user.conf
+.rw-rw-r--  11k blob42 21 Sep 23:25 Cargo.lock
+.rw-rw-r--  303 blob42 21 Sep 23:25 Cargo.toml
+drwxrwxr-x    - blob42 10 Sep 12:27 examples
+.rw-rw-r-- 1.6k blob42  1 Oct 17:24 README.md
+drwxrwxr-x    - blob42 21 Sep 23:31 src
+drwxrwxr-x    - blob42  1 Oct 17:26 target
+drwxrwxr-x    - blob42 21 Sep 00:44 tests
+.rw-rw-r--  222 blob42 17 Sep 19:03 TODO.md