add assert_cmd, wip cli tests, capture stdin

main
blob42 7 months ago
parent 7d073c6eda
commit 564147daae

134
Cargo.lock generated

@ -59,11 +59,43 @@ dependencies = [
"windows-sys",
]
[[package]]
name = "anyhow"
version = "1.0.75"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6"
[[package]]
name = "assert_cmd"
version = "2.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "88903cb14723e4d4003335bb7f8a14f27691649105346a0f0957466c096adfe6"
dependencies = [
"anstyle",
"bstr",
"doc-comment",
"predicates",
"predicates-core",
"predicates-tree",
"wait-timeout",
]
[[package]]
name = "bstr"
version = "1.6.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c2f7349907b712260e64b0afe2f84692af14a454be26187d9df565c7f69266a"
dependencies = [
"memchr",
"regex-automata",
"serde",
]
[[package]]
name = "clap"
version = "4.4.2"
version = "4.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a13b88d2c62ff462f88e4a121f17a82c1af05693a2f192b5c38d14de73c19f6"
checksum = "84ed82781cea27b43c9b106a979fe450a13a31aab0500595fb3fc06616de08e6"
dependencies = [
"clap_builder",
"clap_derive",
@ -105,18 +137,79 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7"
[[package]]
name = "difflib"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8"
[[package]]
name = "doc-comment"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10"
[[package]]
name = "either"
version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07"
[[package]]
name = "heck"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
[[package]]
name = "itertools"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57"
dependencies = [
"either",
]
[[package]]
name = "libc"
version = "0.2.148"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9cdc71e17332e86d2e1d38c1f99edcb6288ee11b815fb1a4b049eaa2114d369b"
[[package]]
name = "memchr"
version = "2.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f232d6ef707e1956a43342693d2a31e72989554d58299d7a88738cc95b0d35c"
[[package]]
name = "predicates"
version = "3.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6dfc28575c2e3f19cb3c73b93af36460ae898d426eba6fc15b9bd2a5220758a0"
dependencies = [
"anstyle",
"difflib",
"itertools",
"predicates-core",
]
[[package]]
name = "predicates-core"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b794032607612e7abeb4db69adb4e33590fa6cf1149e95fd7cb00e634b92f174"
[[package]]
name = "predicates-tree"
version = "1.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "368ba315fb8c5052ab692e68a0eefec6ec57b23a36959c14496f0b0df2c0cecf"
dependencies = [
"predicates-core",
"termtree",
]
[[package]]
name = "proc-macro2"
version = "1.0.66"
@ -164,6 +257,26 @@ version = "0.7.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da"
[[package]]
name = "serde"
version = "1.0.188"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf9e0fcba69a370eed61bcf2b728575f726b50b55cba78064753d708ddc7549e"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.188"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "strsim"
version = "0.10.0"
@ -181,6 +294,12 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "termtree"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76"
[[package]]
name = "unicode-ident"
version = "1.0.11"
@ -193,6 +312,15 @@ version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"
[[package]]
name = "wait-timeout"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9f200f5b12eb75f8c1ed65abd4b2db8a6e1b138a20de009dacee265a2498f3f6"
dependencies = [
"libc",
]
[[package]]
name = "windows-sys"
version = "0.48.0"
@ -263,6 +391,8 @@ checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
name = "yargs"
version = "0.1.0"
dependencies = [
"anyhow",
"assert_cmd",
"clap",
"regex",
]

@ -6,5 +6,9 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
anyhow = "1.0.75"
clap = { version = "4.0.26", features = ["derive"] }
regex = "1.9.1"
[dev-dependencies]
assert_cmd = "2.0.12"

@ -25,6 +25,12 @@ foo_cmd | yargs -f1 'basename {}' -f2 'awk { print $1 }'
foo_cmd | yargs 'basename {}' 'awk { print $2 }'
```
3. skipping fields
```shell
foo_cmd | yargs 'basename {}' - 'awk { print $2 }'
# keeps the second field unchanged
```
## Example
input:

@ -1,6 +1,6 @@
1. parse cli parameters
2. read from stdin
3. split stdin into columns (column/awk commands)
3. execute every field command on it's corresponding column
[ ] execute a command on first text column
3. map (execute) commands to fields
- [ ] execute a command on first text column
4. print resulting concatenated columns

@ -0,0 +1,68 @@
use std::path::PathBuf;
use clap::{Parser, Subcommand};
#[derive(Parser)]
#[command(author, version, about, long_about = None)]
struct Cli {
/// Optional name to operate on
name: Option<String>,
/// Sets a custom config file
#[arg(short, long, value_name = "FILE")]
config: Option<PathBuf>,
/// Turn debugging information on
#[arg(short, long, action = clap::ArgAction::Count)]
debug: u8,
#[command(subcommand)]
command: Option<Commands>,
}
#[derive(Subcommand)]
enum Commands {
/// does testing things
Test {
/// lists test values
#[arg(short, long)]
list: bool,
},
}
fn main() {
let cli = Cli::parse();
// You can check the value provided by positional arguments, or option arguments
if let Some(name) = cli.name.as_deref() {
println!("Value for name: {name}")
}
if let Some(config_path) = cli.config.as_deref() {
println!("Value for config: {}", config_path.display());
}
// You can see how many times a particular flag or argument occurred
// Note, only flags can have multiple occurrences
match cli.debug {
0 => println!("Debug mode is off"),
1 => println!("Debug mode is kind of on"),
2 => println!("Debug mode is on"),
_ => println!("Don't be crazy"),
}
// You can check for the existence of subcommands, and if found use their
// matches just as you would the top level cmd
match &cli.command {
Some(Commands::Test { list }) => {
if *list {
println!("Printing testing lists...");
} else {
println!("Not printing testing lists...");
}
}
None => {}
}
// Continued program logic goes here...
}

@ -0,0 +1,53 @@
// Using multiple positional arguments
// Detect dynamically how many positional arguments where passed and handle them
use std::path::PathBuf;
use clap::{Parser};
#[derive(Parser)]
#[command(author, version, about, long_about = None)]
struct Cli {
/// Sets a custom config file
#[arg(short, long, value_name = "FILE")]
config: Option<PathBuf>,
/// Turn debugging information on
#[arg(short, long, action = clap::ArgAction::Count)]
debug: u8,
/// CMD to execute for each column of input text. 0 < N_CMD < NB_COLUMNS
#[arg( last = true )]
commands: Vec<String>
}
fn main() {
let cli = Cli::parse();
// You can check the value provided by positional arguments, or option arguments
// if let Some(name) = cli.name.as_deref() {
// println!("Value for name: {name}")
// }
if let Some(config_path) = cli.config.as_deref() {
println!("Value for config: {}", config_path.display());
}
// You can see how many times a particular flag or argument occurred
// Note, only flags can have multiple occurrences
match cli.debug {
0 => println!("Debug mode is off"),
1 => println!("Debug mode is kind of on"),
2 => println!("Debug mode is on"),
3 | 4 | 5 => println!("too much dude !"),
_ => println!("Don't be crazy"),
}
for c in cli.commands {
println!("{:?}", c);
}
// Continued program logic goes here...
}

@ -0,0 +1,6 @@
// struct YargsError;
//
// impl Error for YarrgsError {
// fn
// }

@ -0,0 +1,28 @@
use anyhow::Result;
use std::io::{BufReader, self, Read};
// this will read and validate input from stdin
// TODO: make as iterator, avoid loading all stdin to memroy
pub fn read_stdin() -> Result<Box<String>> {
let mut r = BufReader::new(io::stdin());
let mut buf = Box::new(String::new());
r.read_to_string(&mut buf)?;
Ok(buf)
}
//
//NOTE: need deeper understanding of difference with prev implementation
// pub fn read_stdin() -> Result<Box<dyn BufRead>> {
// Ok(Box::new(BufReader::new(io::stdin())))
// }
#[cfg(test)]
mod tests {
// #[test]
// fn read_stdin() {
// panic!()
// }
}

@ -2,18 +2,12 @@
#![allow(dead_code)]
use regex::Regex;
use std::result;
use std::ops::Index;
use std::slice::SliceIndex;
use crate::parsing::DEFAULT_SEP_PATTERN;
use std::ops::Deref;
use anyhow::{anyhow, Result};
pub mod input;
pub mod parsing {
pub const DEFAULT_SEP_PATTERN: &str = r"[\t]+";
}
pub const DEFAULT_SEP_PATTERN: &str = r"[\t]+";
type Column = Vec<String>;
// type Columns = Vec<Column>;
@ -21,8 +15,6 @@ type Column = Vec<String>;
#[derive(Clone, Debug)]
pub struct Columns(Vec<Column>);
type Result<T> = result::Result<T, String>;
/// split input text into columns based on separator character
/// returns a type representing a variable length array of strings (columns) ?
@ -32,21 +24,59 @@ type Result<T> = result::Result<T, String>;
///  error handling
///  accept &str and String
///
pub fn split_columns(text: &str, sep: &str) -> Result<Columns> {
pub struct InputText<'a> {
raw: &'a str,
sep: String,
}
impl<'a> InputText<'a> {
pub fn new(raw: &'a str, sep: &str) -> Self {
InputText {
raw: raw.into(),
sep: sep.into()
}
}
pub fn n_cols(&self) -> Result<usize> {
// read the first line stripping empty lines
let lines: Vec<&str> = self.raw.trim().lines().collect();
// eprintln!("lines: {:?}", lines);
let re = Regex::new(&self.sep).unwrap();
// count number of columns
match lines.first() {
Some(line) => Ok(re.split(line).count()),
None => return Err(anyhow!("no lines left")),
}
}
}
/// Return the number of columns given input text and a separator
pub fn n_columns(text: &str, sep: &str) -> Result<usize> {
// read the first line stripping empty lines
let lines: Vec<&str> = text.trim().lines().collect();
// eprintln!("lines: {:?}", lines);
let re = Regex::new(sep).unwrap();
// count number of columns
let n_col = match lines.first() {
Some(line) => re.split(line).count(),
None => return Err(format!("no lines left"))
};
// count number of columns
match lines.first() {
Some(line) => Ok(re.split(line).count()),
None => return Err(anyhow!("no lines left")),
}
}
pub fn split_columns(text: &str, sep: &str) -> Result<Columns> {
let re = Regex::new(sep).unwrap();
// eprintln!("# columns: {n_col}");
let lines: Vec<&str> = text.trim().lines().collect();
let n_col = n_columns(text, sep)?;
let mut columns = vec![Column::new(); n_col];
for (n, line) in lines.iter().enumerate() {
@ -55,9 +85,10 @@ pub fn split_columns(text: &str, sep: &str) -> Result<Columns> {
let new_n_col = re.split(line).count();
if new_n_col != n_col {
return Err(
format!("unmatched column: expected {n_col} got {new_n_col} on line {}", n+1)
)
return Err(anyhow!(
"unmatched column: expected {n_col} got {new_n_col} on line {}",
n + 1
));
}
// eprintln!("number of columns: {}", columns.len());
@ -71,78 +102,83 @@ pub fn split_columns(text: &str, sep: &str) -> Result<Columns> {
Ok(Columns(columns))
}
impl Columns {
impl Deref for Columns {
type Target = Vec<Vec<String>>;
//NOTE: is there a way to auto implement what's implemented in the wrapped type self.0 ?
fn len(&self) -> usize {
self.0.len()
fn deref(&self) -> &Vec<Vec<String>> {
&self.0
}
}
// build Columns from &str
impl TryFrom<&str> for Columns {
type Error = String;
type Error = anyhow::Error;
fn try_from(value: &str) -> Result<Self> {
split_columns(value, DEFAULT_SEP_PATTERN)
}
}
// impl Index to allow indexing in our wrapped Vector
impl <I> Index<I> for Columns
where
I: SliceIndex<[Column]>,
{
type Output = I::Output;
#[cfg(test)]
mod tests {
use crate::{Columns, split_columns, DEFAULT_SEP_PATTERN};
use crate::Regex;
use std::error::Error;
fn index(&self, index: I) -> &Self::Output {
self.0.index(index)
}
}
type TestResult = Result<(), Box<dyn Error>>;
#[test]
fn test_split_columns_default_sep(){
let coltext1 = "
#[test]
fn test_split_columns_default_sep() -> TestResult {
let coltext1 = "
file1.txt\t\ttitle1
file2.pdf\t\ttitle2
file3\t\t\ttitle3
file with space \textra
";
let columns = split_columns(coltext1, DEFAULT_SEP_PATTERN);
let columns = split_columns(coltext1, DEFAULT_SEP_PATTERN)?;
// should have two columns
assert_eq!(2, columns.clone().unwrap().len());
// should have two columns
assert_eq!(2, columns.clone().len());
assert_eq!(vec!["file1.txt",
"file2.pdf",
"file3",
"file with space "
], columns.unwrap()[0]);
}
assert_eq!(
vec!["file1.txt", "file2.pdf", "file3", "file with space "],
columns[0]
);
Ok(())
}
#[test]
#[should_panic]
fn test_wrong_ncol_default_sep() {
let coltext1 = "
#[test]
#[should_panic]
fn test_wrong_ncol_default_sep() {
let coltext1 = "
file1.txt\t\ttitle1
file2.pdf\t\ttitle2
file3\t\t\ttitle3
file with space\ttitle 4\textra
";
split_columns(coltext1, DEFAULT_SEP_PATTERN).unwrap();
}
split_columns(coltext1, DEFAULT_SEP_PATTERN).unwrap();
}
// #[test]
fn test_re_split() {
let text = "this is two tabs";
let re = Regex::new(r"[\t]+").unwrap();
let fields: Vec<&str> = re.split(text).collect();
eprintln!("{:?}", fields);
assert!(false);
}
// #[test]
fn test_re_split() {
let text = "this is two tabs";
let re = Regex::new(r"[\t]+").unwrap();
let fields: Vec<&str> = re.split(text).collect();
eprintln!("{:?}", fields);
assert!(false);
}
#[test]
fn test_columns_from_str() {
let res: Columns = "first column\tsecond column\t\tthird column"
.try_into()
.unwrap();
assert_eq!(res.len(), 3);
}
#[test]
fn test_columns_from_str() {
let res: Columns = "first column\tsecond column\t\tthird column".try_into().unwrap();
assert_eq!(res.len(), 3);
#[test]
fn test_input_text(){
// it =
}
}

@ -6,8 +6,13 @@
* . dynamically generate field parameters ?
*/
use clap::Parser;
use yargs::parsing::DEFAULT_SEP_PATTERN;
#![allow(unused_imports)]
use clap::{Parser,CommandFactory};
use clap::error::ErrorKind;
use yargs::{DEFAULT_SEP_PATTERN, split_columns, input};
use anyhow::Result;
use std::io::{BufRead, Read, BufReader, stdin};
#[derive(Parser)]
/// yargs - map commands to columns of text input
@ -18,23 +23,28 @@ use yargs::parsing::DEFAULT_SEP_PATTERN;
/// arguments.
///
/// The first command is applied to the first column, the second command to the second column, etc.
#[derive(Debug)]
#[command(name="yargs")]
#[command(author="blob42")]
#[command(version="0.1")]
struct Cli {
/// separator character used to split text into columns
#[arg(default_value_t=DEFAULT_SEP_PATTERN.to_owned())]
#[arg(default_value=DEFAULT_SEP_PATTERN)]
#[arg(short)]
delimiter: String,
delimiter: Option<String>,
//TODO:
// -f --field
// skip fields with `-`
#[arg(short, long, action = clap::ArgAction::Count)]
debug: u8,
verbose: u8,
/// execute CMD each column of input. 0 < N_CMD < NB_COLUMNS
commands: Vec<String>
yargs: Vec<String>
}
fn main() {
fn main() -> Result<()> {
let cli = Cli::parse();
// if let None = cli.f1.as_deref() {
@ -42,12 +52,50 @@ fn main() {
// process::exit(1);
// }
if cli.debug > 0 {
println!("{:?}", cli.delimiter);
if cli.verbose > 0 {
println!("{:?}", cli);
for cmd in &cli.yargs {
println!("- {}", cmd);
}
}
for c in cli.commands {
println!("- {}", c);
// input validation
// take input text, split_columns, nb yargs <= nb columns
// Validate that the number of positional args <= nb of text columns
// ex: input: hello foo bar
// --
// possible ways to call the app:
// $ echo 'hello foo bar' | yargs cat rev 'tr -d b'
// $ echo 'hello foo bar' | yargs cat rev
// $ echo 'hello foo bar' | yargs cat
// let mut cmd = Cli::command();
// Read commands as positional args
// Read input from stdin
let raw_input = input::read_stdin()?;
let input_text = yargs::InputText::new(&raw_input, yargs::DEFAULT_SEP_PATTERN);
// Check that n args <= input cols
if cli.yargs.len() > input_text.n_cols()? {
panic!("too many arguments");
}
// assert_eq!(input_text.n_cols()?, cli.yargs.len());
print!("{}", raw_input);
//
// cmd.error(ErrorKind::ValueValidation, "invalid")
// .exit()
// validate number of
Ok(())
}

@ -0,0 +1,33 @@
//TODO:
use std::error::Error;
use assert_cmd::Command;
// use assert_cmd::prelude::*;
use std::path::Path;
use std::fs::read_to_string;
type TestResult = Result<(), Box<dyn Error>>;
#[test]
fn pass(){
let mut cmd = Command::cargo_bin("yargs").unwrap();
cmd.assert().success();
}
// input with many columns
// no positional arguments
// behaves like cat
#[test]
fn pass_noargs() -> TestResult {
let input = Path::new("tests/inputs/input1");
let mut cmd = Command::cargo_bin("yargs").unwrap();
let assert = cmd
.pipe_stdin(input)?
.assert();
assert.stdout(read_to_string(input)?);
Ok(())
}

@ -0,0 +1,20 @@
.rw-r--r-- 894 root 9 Sep 22:19 coredump.conf
.rw-r--r-- 540 root 20 Sep 2020 homed.conf
.rw-r--r-- 766 root 9 Sep 22:19 homed.conf.pacnew
.rw-r--r-- 894 root 9 Sep 22:19 journal-remote.conf
.rw-r--r-- 822 root 9 Sep 22:19 journal-upload.conf
.rw-r--r-- 1.3k root 9 Sep 22:19 journald.conf
.rw-r--r-- 1.1k root 24 Feb 2022 logind.conf
.rw-r--r-- 1.5k root 9 Sep 22:19 logind.conf.pacnew
drwxr-xr-x - root 25 Jan 18:33 network
.rw-r--r-- 872 root 9 Sep 22:19 networkd.conf
.rw-r--r-- 804 root 9 Sep 22:19 oomd.conf
.rw-r--r-- 670 root 9 Sep 22:19 pstore.conf
.rw-r--r-- 763 root 19 Aug 2020 resolved.conf
.rw-r--r-- 1.6k root 9 Sep 22:19 resolved.conf.pacnew
.rw-r--r-- 953 root 9 Sep 22:19 sleep.conf
drwxr-xr-x - root 2 Feb 18:04 system
.rw-r--r-- 2.2k root 9 Sep 22:19 system.conf
.rw-r--r-- 856 root 9 Sep 22:19 timesyncd.conf
drwxr-xr-x - root 9 Nov 2022 user
.rw-r--r-- 1.6k root 9 Sep 22:19 user.conf
Loading…
Cancel
Save