Решение на CSV Filter от Пламен Николов

Обратно към всички решения

Към профила на Пламен Николов

Резултати

  • 15 точки от тестове
  • 0 бонус точки
  • 15 точки общо
  • 15 успешни тест(а)
  • 0 неуспешни тест(а)

Код

use std::collections::HashMap;
use std::io::{BufRead, Write};
pub fn skip_next(input: &str, target: char) -> Option<&str> {
let mut char_iter = input.char_indices();
if char_iter.next() == Some((0, target)) {
return match char_iter.next() {
Some((char_index, _)) => Some(&input[char_index..]),
None => Some(&input[input.len()..])
}
}
None
}
pub fn take_until(input: &str, target: char) -> (&str, &str) {
for (char_index, char_value) in input.char_indices() {
if char_value == target {
return (&input[..char_index], &input[char_index..]);
}
}
(input, "")
}
pub fn take_and_skip(input: &str, target: char) -> Option<(&str, &str)> {
let (before, after) = take_until(input, target);
Some((before, skip_next(after, target)?))
}
#[derive(Debug)]
pub enum CsvError {
IO(std::io::Error),
ParseError(String),
InvalidHeader(String),
InvalidRow(String),
InvalidColumn(String),
}
impl From<std::io::Error> for CsvError {
fn from(error: std::io::Error) -> Self {
CsvError::IO(error)
}
}
type Row = HashMap<String, String>;
pub struct Csv<R: BufRead> {
pub columns: Vec<String>,
reader: R,
selection: Option<Box<dyn Fn(&Row) -> Result<bool, CsvError>>>,
}
impl<R: BufRead> Csv<R> {
pub fn new(mut reader: R) -> Result<Self, CsvError> {
let mut header = String::new();
reader.read_line(&mut header)?;
if header.trim().len() == 0 {
return Err(CsvError::InvalidHeader(String::from("Empty header row")));
}
let columns: Vec<String> = header.split(',').map(str::trim).map(String::from).collect();
let mut hashmap: HashMap<&str, ()> = HashMap::new();
for col in &columns {
if hashmap.insert(&col, ()).is_some() {
return Err(CsvError::InvalidHeader(format!("Duplicate column '{}'", col)));
}
}
Ok(Csv {
columns,
reader,
selection: None
})
}
pub fn parse_line(&mut self, line: &str) -> Result<Row, CsvError> {
let mut line = line.trim();
let mut row = Row::new();
for (index, col) in self.columns.iter().enumerate() {
line = skip_next(line, '"').ok_or(CsvError::InvalidRow(format!("Missing starting \" on column '{}'", col)))?;
let (content, rest) = take_and_skip(line, '"').ok_or(CsvError::InvalidRow(format!("Missing ending \" on column '{}'", col)))?;
row.insert(col.clone(), String::from(content));
line = rest.trim();
if index != self.columns.len() - 1 {
line = skip_next(line, ',').ok_or(CsvError::InvalidRow(format!("Missing , after column '{}'", col)))?.trim();
}
}
if line.len() > 0 {
return Err(CsvError::InvalidRow(String::from("Line does not end after last value")));
}
Ok(row)
}
pub fn apply_selection<F>(&mut self, callback: F)
where F: Fn(&Row) -> Result<bool, CsvError> + 'static
{
self.selection = Some(Box::new(callback));
}
pub fn write_to<W: Write>(self, mut writer: W) -> Result<(), CsvError> {
writer.write(self.columns.join(", ").as_bytes())?;
writer.write("\n".as_bytes())?;
let columns = self.columns.clone();
for row in self {
let mut buffer: Vec<String> = Vec::new();
let row = row?;
for col_name in &columns {
buffer.push(format!("\"{}\"", row.get(col_name).unwrap()));
}
writer.write(buffer.join(", ").as_bytes())?;
writer.write("\n".as_bytes())?;
}
Ok(())
}
}
impl<R: BufRead> Iterator for Csv<R> {
type Item = Result<Row, CsvError>;
fn next(&mut self) -> Option<Self::Item> {
loop {
let mut line = String::new();
match self.reader.read_line(&mut line) {
Err(err) => { return Some(Err(CsvError::IO(err))); },
Ok(0) => { return None; },
Ok(_) => ()
};
let row = match self.parse_line(&mut line) {
Ok(row) => row,
err => { return Some(err); }
};
if let Some(callback) = &self.selection {
match callback(&row) {
Ok(false) => { continue; },
Ok(true) => (),
Err(err) => { return Some(Err(err)); }
}
}
return Some(Ok(row));
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::{self, Read, BufRead, BufReader};
// За тестване че някакъв резултат пасва на някакъв pattern:
macro_rules! assert_match {
($expr:expr, $pat:pat) => {
if let $pat = $expr {
// all good
} else {
assert!(false, "Expression {:?} does not match the pattern {:?}", $expr, stringify!($pat));
}
}
}
// За тестване на IO грешки:
struct ErroringReader {}
impl Read for ErroringReader {
fn read(&mut self, _buf: &mut [u8]) -> io::Result<usize> {
Err(io::Error::new(io::ErrorKind::Other, "read error!"))
}
}
impl BufRead for ErroringReader {
fn fill_buf(&mut self) -> io::Result<&[u8]> {
Err(io::Error::new(io::ErrorKind::Other, "fill_buf error!"))
}
fn consume(&mut self, _amt: usize) { }
}
#[test]
fn test_string_parsing() {
assert_eq!(skip_next("[test]", '['), Some("test]"));
assert_eq!(take_until("one/two", '/'), ("one", "/two"));
assert_eq!(take_and_skip("one/two", '/'), Some(("one", "two")));
}
#[test]
fn test_utf8_string_parsing() {
assert_eq!(skip_next("тест", 'т'), Some("ест"));
assert_eq!(take_until("едноЮдве", 'Ю'), ("едно", "Юдве"));
assert_eq!(take_and_skip("едноЮдве", 'Ю'), Some(("едно", "две")));
}
#[test]
fn test_string_parsing_with_missing_target() {
assert_eq!(skip_next("[test]", ','), None);
assert_eq!(take_until("one/two", ','), ("one/two", ""));
assert_eq!(take_and_skip("one/two", ','), None);
}
#[test]
fn test_empty_string_parsing() {
assert_eq!(skip_next("", '['), None);
assert_eq!(take_until("", '/'), ("", ""));
assert_eq!(take_and_skip("", '/'), None);
}
#[test]
fn test_single_char_parsing() {
assert_eq!(skip_next("[", '['), Some(""));
assert_eq!(take_until("/", '/'), ("", "/"));
assert_eq!(take_and_skip("/", '/'), Some(("", "")));
}
#[test]
fn test_new_io_error() {
assert_match!(Csv::new(ErroringReader {}).err(), Some(CsvError::IO(_)));
}
fn build_csv(csv_file: &str) -> Result<Csv<BufReader<&[u8]>>, CsvError> {
let reader = BufReader::new(csv_file.as_bytes());
Csv::new(reader)
}
#[test]
fn test_new_missing_header() {
let csv_file =
r#"
"Adam", "32", "yes"
"Susan", "29", "yes""#;
let err = build_csv(csv_file).err();
assert_match!(err, Some(CsvError::InvalidHeader(_)));
}
#[test]
fn test_new_duplicate_columns() {
let csv_file =
r#"Name, Age, Name
"Adam", "32", "yes"
"Susan", "29", "yes""#;
let err = build_csv(csv_file).err();
assert_match!(err, Some(CsvError::InvalidHeader(_)));
}
#[test]
fn test_new_ok_columns() {
let csv_file =
r#"Name, Age, Eyes
"Adam", "32", "yes"
"Susan", "29", "yes""#;
let expected = vec![String::from("Name"), String::from("Age"), String::from("Eyes")];
let columns = build_csv(csv_file).unwrap().columns;
assert_eq!(columns, expected);
let csv_file =
r#" Name , Age ,Eyes
"Adam", "32", "yes"
"Susan", "29", "yes""#;
let columns = build_csv(csv_file).unwrap().columns;
assert_eq!(columns, expected);
let csv_file =
r#"Name, Age, Eye Color
"Adam", "32", "brown"
"Susan", "29", "blue""#;
let expected = vec![String::from("Name"), String::from("Age"), String::from("Eye Color")];
let columns = build_csv(csv_file).unwrap().columns;
assert_eq!(columns, expected);
}
#[test]
fn test_parse_line_invalid_quotes() {
let mut csv = build_csv("Name, Age, Eyes").unwrap();
let test_str = "Adam, 32, yes";
assert_match!(csv.parse_line(test_str).err(), Some(CsvError::InvalidRow(_)));
let test_str = r#""Adam", 32, "yes""#;
assert_match!(csv.parse_line(test_str).err(), Some(CsvError::InvalidRow(_)));
let test_str = r#""Adam", "32", "yes"#;
assert_match!(csv.parse_line(test_str).err(), Some(CsvError::InvalidRow(_)));
let test_str = r#"Adam", "32", "yes""#;
assert_match!(csv.parse_line(test_str).err(), Some(CsvError::InvalidRow(_)));
}
#[test]
fn test_parse_line_invalid_commas() {
let mut csv = build_csv("Name, Age, Eyes").unwrap();
let test_str = r#""Adam" "32" "yes""#;
assert_match!(csv.parse_line(test_str).err(), Some(CsvError::InvalidRow(_)));
let test_str = r#""Adam", "32" "yes""#;
assert_match!(csv.parse_line(test_str).err(), Some(CsvError::InvalidRow(_)));
let test_str = r#""Adam", "32" "yes""#;
assert_match!(csv.parse_line(test_str).err(), Some(CsvError::InvalidRow(_)));
let test_str = r#", "Adam", "32", "yes""#;
assert_match!(csv.parse_line(test_str).err(), Some(CsvError::InvalidRow(_)));
let test_str = r#""Adam", "32", "yes", "#;
assert_match!(csv.parse_line(test_str).err(), Some(CsvError::InvalidRow(_)));
let test_str = r#""Adam",, "32", "yes""#;
assert_match!(csv.parse_line(test_str).err(), Some(CsvError::InvalidRow(_)));
}
#[test]
fn test_parse_line_key_value_count_mismatch() {
let mut csv = build_csv("Name, Age, Eyes").unwrap();
let test_str = r#""Adam", "32""#;
assert_match!(csv.parse_line(test_str).err(), Some(CsvError::InvalidRow(_)));
let test_str = r#""Adam", "32", "yes", "brown""#;
assert_match!(csv.parse_line(test_str).err(), Some(CsvError::InvalidRow(_)));
}
#[test]
fn test_parse_line_ok() {
let mut csv = build_csv("Name, Age, Eyes").unwrap();
let test_str = r#""Adam", "32", "yes""#;
let row = csv.parse_line(test_str).unwrap();
let expected = ("Adam", "32", "yes");
let actual: (&str, &str, &str) = (&row["Name"], &row["Age"], &row["Eyes"]);
assert_eq!(actual, expected);
let test_str = r#""Adam Man Evans", "32", "yes""#;
let row = csv.parse_line(test_str).unwrap();
let expected = ("Adam Man Evans", "32", "yes");
let actual: (&str, &str, &str) = (&row["Name"], &row["Age"], &row["Eyes"]);
assert_eq!(actual, expected);
let test_str = r#""Adam", "32", "yes, brown""#;
let row = csv.parse_line(test_str).unwrap();
let expected = ("Adam", "32", "yes, brown");
let actual: (&str, &str, &str) = (&row["Name"], &row["Age"], &row["Eyes"]);
assert_eq!(actual, expected);
}
#[test]
fn test_iteration() {
let csv_file =
r#"Name, Age, Eyes
"Adam", "32", "yes"
"Susan", "29", "yes"
"Stone", "1000000", "no""#;
let mapper = |row: Row| (row["Name"].clone(), row["Age"].clone(), row["Eyes"].clone());
let adam = (String::from("Adam"), String::from("32"), String::from("yes"));
let susan = (String::from("Susan"), String::from("29"), String::from("yes"));
let stone = (String::from("Stone"), String::from("1000000"), String::from("no"));
// No filter
let csv = build_csv(csv_file).unwrap();
let expected = vec![adam.clone(), susan.clone(), stone.clone()];
let actual: Vec<_> = csv.map(Result::unwrap).map(mapper).collect();
assert_eq!(actual, expected);
// With filter
let mut csv = build_csv(csv_file).unwrap();
csv.apply_selection(|row| Ok(&row["Eyes"] == "yes"));
let expected = vec![adam.clone(), susan.clone()];
let actual: Vec<_> = csv.map(Result::unwrap).map(mapper).collect();
assert_eq!(actual, expected);
// With impossible filter
let mut csv = build_csv(csv_file).unwrap();
csv.apply_selection(|_row| Ok(false));
let expected = vec![];
let actual: Vec<_> = csv.map(Result::unwrap).map(mapper).collect();
assert_eq!(actual, expected);
// With erroring filter
let mut csv = build_csv(csv_file).unwrap();
csv.apply_selection(|row| if &row["Eyes"] == "yes" {Ok(true)} else {Err(CsvError::InvalidColumn(String::from("csv error")))});
assert_eq!(mapper(csv.next().unwrap().unwrap()), adam.clone());
assert_eq!(mapper(csv.next().unwrap().unwrap()), susan.clone());
assert_match!(csv.next().unwrap(), Err(CsvError::InvalidColumn(_)));
assert_match!(csv.next(), None);
}
struct ErroringWriter {}
impl Write for ErroringWriter {
fn write(&mut self, _buf: &[u8]) -> io::Result<usize> {
Err(io::Error::new(io::ErrorKind::Other, "write error!"))
}
fn flush(&mut self) -> io::Result<()> {
Ok(())
}
}
#[test]
fn test_write_to_io_error() {
let csv_file =
r#"Name, Age, Eyes
"Adam", "32", "yes"
"Susan", "29", "yes""#;
let csv = build_csv(csv_file).unwrap();
let err = csv.write_to(ErroringWriter {}).err();
assert_match!(err, Some(CsvError::IO(_)));
}
#[test]
fn test_write_to_with_invalid_input_file() {
let csv_file =
r#"Name, Age, Eyes
"Adam", "32""#;
let mut output = Vec::new();
let csv = build_csv(csv_file).unwrap();
let err = csv.write_to(&mut output).err();
assert_match!(err, Some(CsvError::InvalidRow(_)));
}
#[test]
fn test_write_to_ok() {
let csv_file =
r#"Name, Age, Eyes
"Adam", "32", "yes"
"Susan", "29", "yes""#;
let mut output = Vec::new();
let csv = build_csv(csv_file).unwrap();
assert!(csv.write_to(&mut output).is_ok());
let actual = output.lines().
map(Result::unwrap).
collect::<Vec<String>>();
let expected = &[
"Name, Age, Eyes",
"\"Adam\", \"32\", \"yes\"",
"\"Susan\", \"29\", \"yes\"",
];
assert_eq!(actual, expected);
}
#[test]
fn test_basic_csv() {
let data = r#"
name, age, birth date
"Gen Z. Person", "20", "2000-01-01"
"#.trim().as_bytes();
let mut csv = Csv::new(BufReader::new(data)).unwrap();
csv.apply_selection(|_row| Ok(true));
// Парсене на един ред:
let row = csv.parse_line(r#""Basic Name","13","2020-01-01""#).unwrap();
assert_eq! {
(row["name"].as_str(), row["age"].as_str(), row["birth date"].as_str()),
("Basic Name", "13", "2020-01-01"),
};
// Употреба като итератор:
let filtered_names = csv.map(|row| row.unwrap()["name"].clone()).collect::<Vec<_>>();
assert_eq!(filtered_names, &["Gen Z. Person"]);
// Писане в някакъв изход
let mut csv = Csv::new(BufReader::new(data)).unwrap();
csv.apply_selection(|_row| Ok(true));
let mut output = Vec::new();
csv.write_to(&mut output).unwrap();
let output_lines = output.lines().
map(Result::unwrap).
collect::<Vec<String>>();
assert_eq!(output_lines, &[
"name, age, birth date",
"\"Gen Z. Person\", \"20\", \"2000-01-01\"",
]);
}
}

Лог от изпълнението

Compiling solution v0.1.0 (/tmp/d20210111-1538662-1c7ihn5/solution)
    Finished test [unoptimized + debuginfo] target(s) in 4.50s
     Running target/debug/deps/solution_test-8916805fc40a2dab

running 15 tests
test solution_test::test_csv_basic ... ok
test solution_test::test_csv_duplicate_columns ... ok
test solution_test::test_csv_empty ... ok
test solution_test::test_csv_iterating_with_a_selection ... ok
test solution_test::test_csv_iterating_with_no_selection ... ok
test solution_test::test_csv_parse_line ... ok
test solution_test::test_csv_parse_line_with_commas ... ok
test solution_test::test_csv_selection_and_writing ... ok
test solution_test::test_csv_single_column_no_data ... ok
test solution_test::test_csv_writing_without_a_selection ... ok
test solution_test::test_csv_writing_without_any_rows ... ok
test solution_test::test_parsing_helpers_for_unicode ... ok
test solution_test::test_skip_next ... ok
test solution_test::test_take_and_skip ... ok
test solution_test::test_take_until ... ok

test result: ok. 15 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out

История (1 версия и 0 коментара)

Пламен качи първо решение на 23.12.2020 17:10 (преди 7 месеца)