From 3fa4559117c69992c4bff54a3175b0329a56e461 Mon Sep 17 00:00:00 2001 From: blob42 Date: Mon, 20 Feb 2023 23:52:37 +0100 Subject: [PATCH] wip column splitting --- README.md | 41 +++++++++++++++++++++++++++++---------- src/input.rs | 54 ++++++++++++++++++++++++++++++++++++++++------------ src/main.rs | 14 ++++++++++---- 3 files changed, 83 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index 3bdd9da..bc0195a 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,19 @@ +Very much work in progress. This is the first project in my Rust learning journey. +Expect a lot of changes and refactoring. + # Colmap -CLI program that takes tabular data as input and maps a random -command to each input column. It works in a similar way to `awk` or `xargs` in -that the input text is converted into fields and the user supplied -commands are executed on each matched column (field) +Colmap is hybrid between `awk` in `xargs`. It borrows from the former its ability +to work on tabular columns of text and allows for arbitrary commands to be +applied per column in a similar way to `xargs`. + +The columns are called `fields`. The command to execute on each field is called +an `x-arg`. ## Example -INPUT: ------ +input: field #1 field #2 |--------------------------| |--------------| @@ -20,16 +24,33 @@ INPUT: -------------- \__________________ ___\_ colmap --field-1='basename {}' --field-2="awk { print $1 }" + _________________|: x-arg + + #OR colmap -f1 'basename {}' -f2 'awk { print $1 }' +would output: `ebook.pdf | Title` + + - use colon as delimiter - colmap -d':' +`colmap -d':' -f1 '...'` + +### Ways of passing x-args + +1. passing column x-args as fields + +```shell +foo_cmd | colmap --field-1='basename {}' --field-2="awk { print $1 }" +foo_cmd | colmap -f1 'basename {}' -f2 'awk { print $1 }' +``` + +2. Passing an `xarg template` -WILL OUPUT: ----------- +```shell +foo_cmd | colmap -t 'basename {}' 'awk { print $2 }' +``` -ebook.pdf | Title diff --git a/src/input.rs b/src/input.rs index 3d6c1f1..57be3b7 100644 --- a/src/input.rs +++ b/src/input.rs @@ -1,18 +1,49 @@ -use std::io; +//DEBUG: +#![allow(dead_code)] + +use std::fmt; type Column = Vec; type Columns = Vec; -// split input text into columns based on separator character -// returns a type representing a variable length array of strings (columns) ? -pub fn split_columns(text: &str, sep: char) -> Result { +/// split input text into columns based on separator character +/// returns a type representing a variable length array of strings (columns) ? +/// TODO: +/// +///  accept &str and String +///  error handling +pub fn split_columns(text: &str, sep: char) -> Result { // read the first line stripping empty lines let lines: Vec<&str> = text.trim().lines().collect(); + eprintln!("lines: {:?}", lines); + + // count number of columns + let n_col = match lines.first() { + Some(line) => line.split(sep).count(), + None => return Err(std::fmt::Error) + }; + // eprintln!("first line: {:?}", lines.first().unwrap()); + // eprintln!("# columns: {n_col}"); + + let mut columns = vec![Column::new(); n_col]; + for (_l_idx, line) in lines.iter().enumerate() { - // count number of columns - let n_col = lines.iter().next().unwrap(); + let new_n_col = line.split(sep).count(); - eprintln!("{:?}", lines); + // HACK: I should handle repeating separators with a glob or regex library + // TIP: usek + if new_n_col != n_col { + return Err(std::fmt::Error) + } + eprintln!("number of columns: {}", columns.len()); + for (c_idx, col) in line.split(sep).enumerate() { + columns[c_idx].push(col.to_string()) + } + } + + eprintln!("{:?}", columns); + + // let n_col = *lines.first().unwrap(); // detect number of columns Ok(Columns::new()) @@ -21,15 +52,14 @@ pub fn split_columns(text: &str, sep: char) -> Result { #[test] fn test_split_columns(){ let coltext1 = r###" -file1.txt title1 -file2.pdf title2 -file3 title3 +file1.txt title1 +file2.pdf title2 +file3 title3 "###; let columns = split_columns(coltext1, '\t'); // should have two columns - assert_eq!(columns.unwrap().len(), 2); - + assert_eq!(2, columns.unwrap().len()); // println!("columns:\n{:?}", columns); } diff --git a/src/main.rs b/src/main.rs index 1ee1178..c918eba 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,8 +1,9 @@ /* TODO: - * 1. add clap for CLI flags - * 2. read input as column field1920s - * 3. test splitting input into fields - * 4. execute arbitrary shell commands to manipulate input + * . add clap for CLI flags + * . read input as column field1920s + * . test splitting input into fields + * . execute arbitrary shell commands to manipulate input + * . dynamically generate field parameters ? */ use clap::{Parser}; @@ -11,6 +12,10 @@ use std::process; mod input; + +/// # Parsing parameters +/// . parsing x-args as field parameters (-f1 'x argument') +/// . detecting number of columns and x-args from positional arguments #[derive(Parser)] #[command(name="colmap")] #[command(author="blob42")] @@ -18,6 +23,7 @@ mod input; #[command(about = "execute commands on columns")] struct Cli { + /// separator character used to split text into columns #[arg(default_value=" ")] #[arg(short, long = "sep")] separator: Option,