add cli tests + refactor

main
blob42 7 months ago
parent 564147daae
commit b6e93bd5cc

@ -1,5 +1,4 @@
Very much work in progress. This is the first project in my Rust learning journey.
Expect a lot of changes and refactoring.
WIP pet project.
# yargs
@ -8,7 +7,7 @@ the its ability to work on columns of text and allows for arbitrary
commands to be applied per column in a similar way to `xargs`.
The columns are called `fields`. The command to execute on each field is called
an `x-arg`.
an `y-arg`.
# Usage
@ -40,7 +39,7 @@ input:
/long/path/to/some/ebook.pdf | Title Of Ebook
____
|
example usage: | x:arg
example usage: | y-arg
-------------- |
----------------
yargs 'basename {}' "awk { print $1 }"

@ -1,106 +1,21 @@
//DEBUG:
#![allow(dead_code)]
use regex::Regex;
use anyhow::Result;
use std::ops::Deref;
use anyhow::{anyhow, Result};
use parse::split_columns;
pub mod input;
pub mod stdin;
pub mod parse;
pub const DEFAULT_SEP_PATTERN: &str = r"[\t]+";
type Column = Vec<String>;
// type Columns = Vec<Column>;
#[derive(Clone, Debug)]
pub struct Columns(Vec<Column>);
/// split input text into columns based on separator character
/// returns a type representing a variable length array of strings (columns) ?
///
/// TODO:
///
///  error handling
///  accept &str and String
///
pub struct InputText<'a> {
raw: &'a str,
sep: String,
}
impl<'a> InputText<'a> {
pub fn new(raw: &'a str, sep: &str) -> Self {
InputText {
raw: raw.into(),
sep: sep.into()
}
}
pub fn n_cols(&self) -> Result<usize> {
// read the first line stripping empty lines
let lines: Vec<&str> = self.raw.trim().lines().collect();
// eprintln!("lines: {:?}", lines);
let re = Regex::new(&self.sep).unwrap();
// count number of columns
match lines.first() {
Some(line) => Ok(re.split(line).count()),
None => return Err(anyhow!("no lines left")),
}
}
}
/// Return the number of columns given input text and a separator
pub fn n_columns(text: &str, sep: &str) -> Result<usize> {
// read the first line stripping empty lines
let lines: Vec<&str> = text.trim().lines().collect();
// eprintln!("lines: {:?}", lines);
let re = Regex::new(sep).unwrap();
// count number of columns
match lines.first() {
Some(line) => Ok(re.split(line).count()),
None => return Err(anyhow!("no lines left")),
}
}
pub fn split_columns(text: &str, sep: &str) -> Result<Columns> {
let re = Regex::new(sep).unwrap();
// eprintln!("# columns: {n_col}");
let lines: Vec<&str> = text.trim().lines().collect();
let n_col = n_columns(text, sep)?;
let mut columns = vec![Column::new(); n_col];
for (n, line) in lines.iter().enumerate() {
// eprintln!("checking line {}", i);
let new_n_col = re.split(line).count();
if new_n_col != n_col {
return Err(anyhow!(
"unmatched column: expected {n_col} got {new_n_col} on line {}",
n + 1
));
}
// eprintln!("number of columns: {}", columns.len());
for (c_idx, col) in re.split(line).enumerate() {
columns[c_idx].push(col.to_string())
}
}
eprintln!("{:?}", columns);
Ok(Columns(columns))
}
impl Deref for Columns {
type Target = Vec<Vec<String>>;
@ -120,65 +35,3 @@ impl TryFrom<&str> for Columns {
}
}
#[cfg(test)]
mod tests {
use crate::{Columns, split_columns, DEFAULT_SEP_PATTERN};
use crate::Regex;
use std::error::Error;
type TestResult = Result<(), Box<dyn Error>>;
#[test]
fn test_split_columns_default_sep() -> TestResult {
let coltext1 = "
file1.txt\t\ttitle1
file2.pdf\t\ttitle2
file3\t\t\ttitle3
file with space \textra
";
let columns = split_columns(coltext1, DEFAULT_SEP_PATTERN)?;
// should have two columns
assert_eq!(2, columns.clone().len());
assert_eq!(
vec!["file1.txt", "file2.pdf", "file3", "file with space "],
columns[0]
);
Ok(())
}
#[test]
#[should_panic]
fn test_wrong_ncol_default_sep() {
let coltext1 = "
file1.txt\t\ttitle1
file2.pdf\t\ttitle2
file3\t\t\ttitle3
file with space\ttitle 4\textra
";
split_columns(coltext1, DEFAULT_SEP_PATTERN).unwrap();
}
// #[test]
fn test_re_split() {
let text = "this is two tabs";
let re = Regex::new(r"[\t]+").unwrap();
let fields: Vec<&str> = re.split(text).collect();
eprintln!("{:?}", fields);
assert!(false);
}
#[test]
fn test_columns_from_str() {
let res: Columns = "first column\tsecond column\t\tthird column"
.try_into()
.unwrap();
assert_eq!(res.len(), 3);
}
#[test]
fn test_input_text(){
// it =
}
}

@ -9,9 +9,11 @@
#![allow(unused_imports)]
use clap::{Parser,CommandFactory};
use clap::error::ErrorKind;
use yargs::{DEFAULT_SEP_PATTERN, split_columns, input};
use yargs::{DEFAULT_SEP_PATTERN, stdin};
use yargs::parse::InputText;
use anyhow::Result;
use std::io::{BufRead, Read, BufReader, stdin};
use std::process;
#[derive(Parser)]
@ -28,10 +30,10 @@ use std::io::{BufRead, Read, BufReader, stdin};
#[command(author="blob42")]
#[command(version="0.1")]
struct Cli {
/// separator character used to split text into columns
/// Regex used to to split input into columns
#[arg(default_value=DEFAULT_SEP_PATTERN)]
#[arg(short)]
delimiter: Option<String>,
delimiter: String,
//TODO:
// -f --field
@ -52,8 +54,10 @@ fn main() -> Result<()> {
// process::exit(1);
// }
if cli.verbose > 0 {
println!("{:?}", cli);
eprintln!("======\nDEBUG:\n");
eprintln!("{:?}", cli);
for cmd in &cli.yargs {
println!("- {}", cmd);
@ -75,18 +79,35 @@ fn main() -> Result<()> {
// Read commands as positional args
// Read input from stdin
let raw_input = input::read_stdin()?;
let input_text = yargs::InputText::new(&raw_input, yargs::DEFAULT_SEP_PATTERN);
let raw_input = stdin::read_stdin()?;
let input_text = InputText::new(&raw_input, &cli.delimiter);
let n_cols = match input_text.n_cols() {
Err(e) => {
eprintln!("error parsing input: {}", e);
process::exit(1)
},
Ok(n) => n,
};
// Check that n args <= input cols
if cli.yargs.len() > input_text.n_cols()? {
panic!("too many arguments");
// panic!("too many arguments");
eprint!("too many arguments for delimiter={:?}", input_text.sep);
process::exit(1);
}
if cli.verbose > 0 {
eprintln!("detected {n_cols} colunms");
eprintln!("======");
}
// assert_eq!(input_text.n_cols()?, cli.yargs.len());
assert!(input_text.n_cols()? >= cli.yargs.len());
// TODO: RESULT
print!("{}", raw_input);

@ -0,0 +1,159 @@
use super::{Columns, Column};
use anyhow::{anyhow, Result};
use regex::Regex;
/// split input text into columns based on separator character
/// returns a type representing a variable length array of strings (columns) ?
///
/// TODO:
///
///  error handling
///  accept &str and String
///
#[derive(Debug)]
pub struct InputText<'a> {
raw: &'a str,
pub sep: String,
}
impl<'a> InputText<'a> {
pub fn new(raw: &'a str, sep: &str) -> Self {
InputText {
raw: raw.into(),
sep: sep.into()
}
}
pub fn n_cols(&self) -> Result<usize> {
// read the first line stripping empty lines
let lines: Vec<&str> = self.raw.trim().lines().collect();
// eprintln!("lines: {:?}", lines);
let re = Regex::new(&self.sep).unwrap();
// count number of columns
match lines.first() {
Some(line) => Ok(re.split(line).count()),
None => Ok(0)
}
}
pub fn len(self) -> usize {
self.raw.len()
}
}
/// Return the number of columns given input text and a separator
pub fn n_columns(text: &str, sep: &str) -> Result<usize> {
// read the first line stripping empty lines
let lines: Vec<&str> = text.trim().lines().collect();
// eprintln!("lines: {:?}", lines);
let re = Regex::new(sep).unwrap();
// count number of columns
match lines.first() {
Some(line) => Ok(re.split(line).count()),
None => return Err(anyhow!("no lines left")),
}
}
pub fn split_columns(text: &str, sep: &str) -> Result<Columns> {
let re = Regex::new(sep).unwrap();
// eprintln!("# columns: {n_col}");
let lines: Vec<&str> = text.trim().lines().collect();
let n_col = n_columns(text, sep)?;
let mut columns = vec![Column::new(); n_col];
for (n, line) in lines.iter().enumerate() {
// eprintln!("checking line {}", i);
let new_n_col = re.split(line).count();
if new_n_col != n_col {
return Err(anyhow!(
"unmatched column: expected {n_col} got {new_n_col} on line {}",
n + 1
));
}
// eprintln!("number of columns: {}", columns.len());
for (c_idx, col) in re.split(line).enumerate() {
columns[c_idx].push(col.to_string())
}
}
eprintln!("{:?}", columns);
Ok(Columns(columns))
}
#[cfg(test)]
mod tests {
use super::*;
use crate::DEFAULT_SEP_PATTERN;
use regex::Regex;
use std::error::Error;
type TestResult = Result<(), Box<dyn Error>>;
#[test]
fn test_split_columns_default_sep() -> TestResult {
let coltext1 = "
file1.txt\t\ttitle1
file2.pdf\t\ttitle2
file3\t\t\ttitle3
file with space \textra
";
let columns = split_columns(coltext1, DEFAULT_SEP_PATTERN)?;
// should have two columns
assert_eq!(2, columns.clone().len());
assert_eq!(
vec!["file1.txt", "file2.pdf", "file3", "file with space "],
columns[0]
);
Ok(())
}
#[test]
#[should_panic]
fn test_wrong_ncol_default_sep() {
let coltext1 = "
file1.txt\t\ttitle1
file2.pdf\t\ttitle2
file3\t\t\ttitle3
file with space\ttitle 4\textra
";
split_columns(coltext1, DEFAULT_SEP_PATTERN).unwrap();
}
// #[test]
fn test_re_split() {
let text = "this is two tabs";
let re = Regex::new(r"[\t]+").unwrap();
let fields: Vec<&str> = re.split(text).collect();
eprintln!("{:?}", fields);
assert!(false);
}
#[test]
fn test_columns_from_str() {
let res: Columns = "first column\tsecond column\t\tthird column"
.try_into()
.unwrap();
assert_eq!(res.len(), 3);
}
#[test]
fn test_input_text(){
// it =
}
}

@ -8,19 +8,23 @@ use std::fs::read_to_string;
type TestResult = Result<(), Box<dyn Error>>;
// empty stdin should return an empty line
#[test]
fn pass(){
let mut cmd = Command::cargo_bin("yargs").unwrap();
cmd.assert().success();
let assert = cmd
.write_stdin("")
.assert();
assert.stdout("");
}
#[test]
// input with many columns
// no positional arguments
// behaves like cat
#[test]
fn pass_noargs() -> TestResult {
fn pass_columns_no_args() -> TestResult {
let input = Path::new("tests/inputs/input1");
let mut cmd = Command::cargo_bin("yargs").unwrap();
@ -31,3 +35,20 @@ fn pass_noargs() -> TestResult {
Ok(())
}
#[test]
// should if more yargs provided than detected columns
fn fail_yargs_mismatch1() -> TestResult {
let input = Path::new("tests/inputs/input1");
let mut cmd = Command::cargo_bin("yargs").unwrap();
let assert = cmd
.args(&["one", "two"])
.pipe_stdin(input)?
.assert();
assert.failure();
Ok(())
}

@ -1,20 +1,8 @@
.rw-r--r-- 894 root 9 Sep 22:19 coredump.conf
.rw-r--r-- 540 root 20 Sep 2020 homed.conf
.rw-r--r-- 766 root 9 Sep 22:19 homed.conf.pacnew
.rw-r--r-- 894 root 9 Sep 22:19 journal-remote.conf
.rw-r--r-- 822 root 9 Sep 22:19 journal-upload.conf
.rw-r--r-- 1.3k root 9 Sep 22:19 journald.conf
.rw-r--r-- 1.1k root 24 Feb 2022 logind.conf
.rw-r--r-- 1.5k root 9 Sep 22:19 logind.conf.pacnew
drwxr-xr-x - root 25 Jan 18:33 network
.rw-r--r-- 872 root 9 Sep 22:19 networkd.conf
.rw-r--r-- 804 root 9 Sep 22:19 oomd.conf
.rw-r--r-- 670 root 9 Sep 22:19 pstore.conf
.rw-r--r-- 763 root 19 Aug 2020 resolved.conf
.rw-r--r-- 1.6k root 9 Sep 22:19 resolved.conf.pacnew
.rw-r--r-- 953 root 9 Sep 22:19 sleep.conf
drwxr-xr-x - root 2 Feb 18:04 system
.rw-r--r-- 2.2k root 9 Sep 22:19 system.conf
.rw-r--r-- 856 root 9 Sep 22:19 timesyncd.conf
drwxr-xr-x - root 9 Nov 2022 user
.rw-r--r-- 1.6k root 9 Sep 22:19 user.conf
.rw-rw-r-- 11k blob42 21 Sep 23:25 Cargo.lock
.rw-rw-r-- 303 blob42 21 Sep 23:25 Cargo.toml
drwxrwxr-x - blob42 10 Sep 12:27 examples
.rw-rw-r-- 1.6k blob42 1 Oct 17:24 README.md
drwxrwxr-x - blob42 21 Sep 23:31 src
drwxrwxr-x - blob42 1 Oct 17:26 target
drwxrwxr-x - blob42 21 Sep 00:44 tests
.rw-rw-r-- 222 blob42 17 Sep 19:03 TODO.md

Loading…
Cancel
Save