You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

85 lines
2.0 KiB
Rust

//DEBUG:
#![allow(dead_code)]
use std::fmt;
use regex::Regex;
const DEFAULT_SEP: &str = r"[\t]+";
type Column = Vec<String>;
type Columns = Vec<Column>;
/// split input text into columns based on separator character
/// returns a type representing a variable length array of strings (columns) ?
///
/// TODO:
///
///  error handling
///  accept &str and String
///
pub fn split_columns(text: &str, sep: &str) -> Result<Columns, fmt::Error> {
// read the first line stripping empty lines
let lines: Vec<&str> = text.trim().lines().collect();
eprintln!("lines: {:?}", lines);
let re = Regex::new(sep).unwrap();
// count number of columns
let n_col = match lines.first() {
Some(line) => re.split(line).count(),
None => return Err(std::fmt::Error)
};
// eprintln!("# columns: {n_col}");
let mut columns = vec![Column::new(); n_col];
for (i, line) in lines.iter().enumerate() {
eprintln!("checking line {}", i);
let new_n_col = re.split(line).count();
if new_n_col != n_col {
return Err(fmt::Error)
}
eprintln!("number of columns: {}", columns.len());
for (c_idx, col) in re.split(line).enumerate() {
columns[c_idx].push(col.to_string())
}
}
eprintln!("{:?}", columns);
Ok(columns)
}
#[test]
fn test_split_columns(){
let coltext1 = r###"
file1.txt title1
file2.pdf title2
file3 title3
file with space title 4
"###;
let columns = split_columns(coltext1, DEFAULT_SEP);
// should have two columns
assert_eq!(2, columns.clone().unwrap().len());
assert_eq!(vec!["file1.txt",
"file2.pdf",
"file3",
"file with space"
], columns.unwrap()[0]);
}
// #[test]
fn test_re_split() {
let text = "this is two tabs";
let re = Regex::new(r"[\t]+").unwrap();
let fields: Vec<&str> = re.split(text).collect();
eprintln!("{:?}", fields);
assert!(false);
}