wip column splitting

main
blob42 1 year ago
parent ad9404dd32
commit 3fa4559117

@ -1,15 +1,19 @@
Very much work in progress. This is the first project in my Rust learning journey.
Expect a lot of changes and refactoring.
# Colmap
CLI program that takes tabular data as input and maps a random
command to each input column. It works in a similar way to `awk` or `xargs` in
that the input text is converted into fields and the user supplied
commands are executed on each matched column (field)
Colmap is hybrid between `awk` in `xargs`. It borrows from the former its ability
to work on tabular columns of text and allows for arbitrary commands to be
applied per column in a similar way to `xargs`.
The columns are called `fields`. The command to execute on each field is called
an `x-arg`.
## Example
INPUT:
-----
input:
field #1 field #2
|--------------------------| |--------------|
@ -20,16 +24,33 @@ INPUT:
-------------- \__________________
___\_
colmap --field-1='basename {}' --field-2="awk { print $1 }"
_________________|: x-arg
#OR
colmap -f1 'basename {}' -f2 'awk { print $1 }'
would output: `ebook.pdf | Title`
- use colon as delimiter
colmap -d':'
`colmap -d':' -f1 '...'`
### Ways of passing x-args
1. passing column x-args as fields
```shell
foo_cmd | colmap --field-1='basename {}' --field-2="awk { print $1 }"
foo_cmd | colmap -f1 'basename {}' -f2 'awk { print $1 }'
```
2. Passing an `xarg template`
WILL OUPUT:
----------
```shell
foo_cmd | colmap -t 'basename {}' 'awk { print $2 }'
```
ebook.pdf | Title

@ -1,18 +1,49 @@
use std::io;
//DEBUG:
#![allow(dead_code)]
use std::fmt;
type Column = Vec<String>;
type Columns = Vec<Column>;
// split input text into columns based on separator character
// returns a type representing a variable length array of strings (columns) ?
pub fn split_columns(text: &str, sep: char) -> Result<Columns, io::Error> {
/// split input text into columns based on separator character
/// returns a type representing a variable length array of strings (columns) ?
/// TODO:
///
///  accept &str and String
///  error handling
pub fn split_columns(text: &str, sep: char) -> Result<Columns, fmt::Error> {
// read the first line stripping empty lines
let lines: Vec<&str> = text.trim().lines().collect();
eprintln!("lines: {:?}", lines);
// count number of columns
let n_col = match lines.first() {
Some(line) => line.split(sep).count(),
None => return Err(std::fmt::Error)
};
// eprintln!("first line: {:?}", lines.first().unwrap());
// eprintln!("# columns: {n_col}");
let mut columns = vec![Column::new(); n_col];
for (_l_idx, line) in lines.iter().enumerate() {
// count number of columns
let n_col = lines.iter().next().unwrap();
let new_n_col = line.split(sep).count();
eprintln!("{:?}", lines);
// HACK: I should handle repeating separators with a glob or regex library
// TIP: usek
if new_n_col != n_col {
return Err(std::fmt::Error)
}
eprintln!("number of columns: {}", columns.len());
for (c_idx, col) in line.split(sep).enumerate() {
columns[c_idx].push(col.to_string())
}
}
eprintln!("{:?}", columns);
// let n_col = *lines.first().unwrap();
// detect number of columns
Ok(Columns::new())
@ -21,15 +52,14 @@ pub fn split_columns(text: &str, sep: char) -> Result<Columns, io::Error> {
#[test]
fn test_split_columns(){
let coltext1 = r###"
file1.txt title1
file2.pdf title2
file3 title3
file1.txt title1
file2.pdf title2
file3 title3
"###;
let columns = split_columns(coltext1, '\t');
// should have two columns
assert_eq!(columns.unwrap().len(), 2);
assert_eq!(2, columns.unwrap().len());
// println!("columns:\n{:?}", columns);
}

@ -1,8 +1,9 @@
/* TODO:
* 1. add clap for CLI flags
* 2. read input as column field1920s
* 3. test splitting input into fields
* 4. execute arbitrary shell commands to manipulate input
* . add clap for CLI flags
* . read input as column field1920s
* . test splitting input into fields
* . execute arbitrary shell commands to manipulate input
* . dynamically generate field parameters ?
*/
use clap::{Parser};
@ -11,6 +12,10 @@ use std::process;
mod input;
/// # Parsing parameters
/// . parsing x-args as field parameters (-f1 'x argument')
/// . detecting number of columns and x-args from positional arguments
#[derive(Parser)]
#[command(name="colmap")]
#[command(author="blob42")]
@ -18,6 +23,7 @@ mod input;
#[command(about = "execute commands on columns")]
struct Cli {
/// separator character used to split text into columns
#[arg(default_value=" ")]
#[arg(short, long = "sep")]
separator: Option<char>,

Loading…
Cancel
Save