diff --git a/Cargo.lock b/Cargo.lock index 76df4ec..4ae4da7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -458,6 +458,7 @@ dependencies = [ "camt053", "chrono", "kash", + "ktf", "quick-xml", "serde", "serde_json", @@ -494,6 +495,14 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "ktf" +version = "0.1.0" +dependencies = [ + "kash", + "serde", +] + [[package]] name = "lazy_static" version = "1.4.0" diff --git a/cli/src/kash/args.rs b/cli/src/kash/args.rs index 7b2cc11..c749984 100644 --- a/cli/src/kash/args.rs +++ b/cli/src/kash/args.rs @@ -7,6 +7,7 @@ pub enum InputFormat { Toml, Json, Camt053, + Ktf, } /// command-line interface to kash diff --git a/cli/src/kash/main.rs b/cli/src/kash/main.rs index 99c851a..27106a8 100644 --- a/cli/src/kash/main.rs +++ b/cli/src/kash/main.rs @@ -3,6 +3,7 @@ mod args; use self::args::{Args, InputFormat}; use clap::Parser; use kash_cli::output::OutputOptions; +use kash_convert::input::ktf::KtfInput; use kash_convert::input::toml::TomlInput; use kash_convert::input::{camt053::Camt053Input, json::JsonInput, Input}; use std::fs::File; @@ -29,6 +30,7 @@ fn main() { InputFormat::Json => JsonInput::new().from_read(reader), InputFormat::Toml => TomlInput::new().from_read(reader), InputFormat::Camt053 => Camt053Input::new().from_read(reader), + InputFormat::Ktf => KtfInput::new().from_read(reader), } .unwrap(), ); diff --git a/convert/Cargo.toml b/convert/Cargo.toml index e6347bf..d6ce651 100644 --- a/convert/Cargo.toml +++ b/convert/Cargo.toml @@ -10,6 +10,7 @@ serde_json = { version = "1.0.82", optional = true } toml = { version = "0.5.9", optional = true } kash = { path = "../lib" } camt053 = { path = "../camt053", optional = true } +ktf = { path = "../ktf", optional = true } [dependencies.quick-xml] version = "0.23.0" @@ -27,11 +28,13 @@ all = ["all-inputs", "all-outputs"] all-inputs = [ "input-json", "input-toml", - "input-camt053" + "input-camt053", + "input-ktf" ] all-outputs = ["output-json"] input-json = ["dep:serde_json"] input-toml = ["dep:toml"] input-camt053 = ["dep:camt053", "dep:quick-xml", "dep:chrono"] +input-ktf = ["dep:ktf"] output-json = ["dep:serde_json"] diff --git a/convert/src/input/ktf.rs b/convert/src/input/ktf.rs new file mode 100644 index 0000000..e104bd1 --- /dev/null +++ b/convert/src/input/ktf.rs @@ -0,0 +1,50 @@ +use super::{Input, InputError}; +use kash::statements::Statement; +use serde::Deserialize; +use std::io::Read; + +pub struct KtfInput; + +impl KtfInput { + pub fn new() -> Self { + Self + } +} + +#[derive(Debug, Deserialize)] +#[serde(tag = "type", rename_all = "camelCase")] +pub enum KtfInputData { + Test(TestData), +} + +#[derive(Debug, Deserialize)] +pub struct TestData { + pub a: i32, + pub b: i32, +} + +impl Input for KtfInput { + fn from_read(&self, mut reader: R) -> Result, InputError> + where + R: Read, + { + let statements = Vec::new(); + + // FIXME: actually implement from_read instead of this + // memory-hogging garbage. + let input_data = ktf::from_str::>( + { + let mut input = String::new(); + reader + .read_to_string(&mut input) + .map_err(|_| InputError::Read)?; + input + } + .as_str(), + ) + .map_err(|e| InputError::Invalid(e.to_string()))?; + println!("{:#?}", input_data); + + Ok(statements) + } +} diff --git a/convert/src/input/mod.rs b/convert/src/input/mod.rs index e2987d2..29a9ddd 100644 --- a/convert/src/input/mod.rs +++ b/convert/src/input/mod.rs @@ -2,6 +2,8 @@ pub mod camt053; #[cfg(feature = "input-json")] pub mod json; +#[cfg(feature = "input-ktf")] +pub mod ktf; #[cfg(feature = "input-toml")] pub mod toml; diff --git a/ktf/Cargo.toml b/ktf/Cargo.toml new file mode 100644 index 0000000..24d1d7b --- /dev/null +++ b/ktf/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "ktf" +version = "0.1.0" +edition = "2021" + +[dependencies] +serde = { version = "1.0.139", optional = true } +kash = { path = "../lib" } + +[features] +default = ["serde"] +serde = ["dep:serde"] diff --git a/ktf/README.md b/ktf/README.md new file mode 100644 index 0000000..4358cd4 --- /dev/null +++ b/ktf/README.md @@ -0,0 +1,5 @@ +# ktf + +**k**ash **t**able **f**ormat (_ktf_ for short) is an experimental table-like +data format. +It is designed for but not limited to interacting with kash and has support for Serde. diff --git a/ktf/src/de.rs b/ktf/src/de.rs new file mode 100644 index 0000000..5ae15d5 --- /dev/null +++ b/ktf/src/de.rs @@ -0,0 +1,108 @@ +use super::error::{Error, Result}; + +#[derive(Debug)] +pub struct Deserializer<'a> { + input: &'a str, + pub col_index: usize, + pub header: Vec, + pub row: Row, +} + +impl<'a> Deserializer<'a> { + pub fn from_str(input: &'a str) -> Self { + Deserializer { + input, + col_index: 0, + header: vec![], + row: Row { + cols: vec![], + len: 0, + }, + } + } + + pub fn advance(&mut self, n: usize) { + self.input = &self.input[n..]; + } + + pub fn peek_char(&self) -> Result { + self.input.chars().next().ok_or(Error::Eof) + } + + pub fn next_char(&mut self) -> Result { + let c = self.peek_char()?; + self.advance(c.len_utf8()); + Ok(c) + } + + pub fn next_row(&mut self) -> Result { + let row = self.peek_row()?; + self.advance(row.len + 1); + self.row = row.clone(); + self.col_index = 0; + Ok(row) + } + + pub fn peek_row(&self) -> Result { + let row = self.input.lines().next().ok_or(Error::Eof)?; + Ok(Row::new(row)) + } + + pub fn peek_key(&self) -> Result { + self.header + .iter() + .nth(self.col_index) + .map(String::to_owned) + .ok_or(Error::MapEnd) + } + + pub fn peek_value(&self) -> Result { + self.row + .cols + .iter() + .nth(self.col_index) + .map(String::to_owned) + .ok_or(Error::ExpectedMapValue) + } + + pub fn next_key(&mut self) -> Result { + let col = self.peek_key()?; + self.col_index += 1; + Ok(col) + } + + pub fn parse_header(&mut self) -> Result> { + let header: Vec = match self.next_char()? { + '>' => Ok(self.next_row()?.cols.iter().map(String::to_owned).collect()), + _ => Err(Error::ExpectedHeader), + }?; + + self.header = header.clone(); + Ok(header) + } + + #[inline] + pub fn parse_f32(&self) -> Result { + self.peek_value()?.parse().map_err(|_| Error::ExpectedFloat) + } + + #[inline] + pub fn parse_string(&self) -> Result { + self.peek_value() + } +} + +#[derive(Debug, Clone)] +pub struct Row { + pub cols: Vec, + pub len: usize, +} + +impl Row { + pub fn new(row: &str) -> Self { + Self { + cols: row.split('|').map(str::trim).map(str::to_owned).collect(), + len: row.len(), + } + } +} diff --git a/ktf/src/error.rs b/ktf/src/error.rs new file mode 100644 index 0000000..23b5d9c --- /dev/null +++ b/ktf/src/error.rs @@ -0,0 +1,33 @@ +use std::fmt::Display; +use std::{error, result}; + +pub type Result = result::Result; + +#[derive(Debug)] +pub enum Error { + Message(String), + ExpectedHeader, + ExpectedFloat, + ExpectedMap, + ExpectedMapValue, + MapEnd, + Syntax, + Eof, +} + +impl error::Error for Error {} + +impl Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(match self { + Error::Message(m) => m, + Error::ExpectedHeader => "expected header", + Error::ExpectedFloat => "expected float", + Error::ExpectedMap => "expected map", + Error::ExpectedMapValue => "expected map value", + Error::MapEnd => "unexpected end of map", + Error::Syntax => "syntax error", + Error::Eof => "unexpected EOF", + }) + } +} diff --git a/ktf/src/lib.rs b/ktf/src/lib.rs new file mode 100644 index 0000000..142235c --- /dev/null +++ b/ktf/src/lib.rs @@ -0,0 +1,29 @@ +pub mod de; +pub mod error; + +#[cfg(feature = "serde")] +mod serde; + +#[cfg(feature = "serde")] +pub use self::serde::*; + +#[cfg(test)] +mod tests { + use super::de::Deserializer; + use super::error::Result; + + #[test] + fn de_header_single_col() -> Result<()> { + let mut des = Deserializer::from_str(">col1\n"); + Ok(assert_eq!(vec!["col1"], des.parse_header()?)) + } + + #[test] + fn de_header_multi_col() -> Result<()> { + let mut des = Deserializer::from_str(">col1|col2|col3\n"); + Ok(assert_eq!( + vec!["col1", "col2", "col3"], + des.parse_header()? + )) + } +} diff --git a/ktf/src/serde/de.rs b/ktf/src/serde/de.rs new file mode 100644 index 0000000..0ab116e --- /dev/null +++ b/ktf/src/serde/de.rs @@ -0,0 +1,142 @@ +use crate::de::Deserializer; +use crate::error::{Error, Result}; +use serde::de::value::StrDeserializer; +use serde::de::{self, Deserialize, MapAccess, SeqAccess, Visitor}; +use serde::forward_to_deserialize_any; + +pub fn from_str<'a, T>(s: &'a str) -> Result +where + T: Deserialize<'a>, +{ + let mut des = Deserializer::from_str(s); + des.parse_header()?; + + let t = T::deserialize(&mut des)?; + Ok(t) +} + +impl<'a, 'de> de::Deserializer<'de> for &'a mut Deserializer<'de> { + type Error = Error; + + fn deserialize_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + match self.peek_char()? { + '|' => self.deserialize_map(visitor), + _ => Err(Error::Syntax), + } + } + + fn deserialize_map(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + match self.next_char()? { + '|' => self.next_row().map(|_| ()), + _ => Err(Error::ExpectedMap), + }?; + + visitor.visit_map(RowMap::new(self)) + } + + fn deserialize_seq(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_seq(LineSeq::new(self)) + } + + fn deserialize_identifier(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_str(visitor) + } + + fn deserialize_f32(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_f32(self.parse_f32()?) + } + + fn deserialize_str(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_str(&self.parse_string()?) + } + + fn deserialize_string(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_str(visitor) + } + + forward_to_deserialize_any! { + bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f64 char + bytes byte_buf option unit unit_struct newtype_struct tuple + tuple_struct struct enum ignored_any + } +} + +struct RowMap<'a, 'de> { + de: &'a mut Deserializer<'de>, +} + +impl<'a, 'de> RowMap<'a, 'de> { + pub fn new(de: &'a mut Deserializer<'de>) -> Self { + Self { de } + } +} + +impl<'a, 'de> MapAccess<'de> for RowMap<'a, 'de> { + type Error = Error; + + fn next_key_seed(&mut self, seed: K) -> Result> + where + K: de::DeserializeSeed<'de>, + { + match self.de.peek_key() { + Err(Error::MapEnd) => Ok(None), + Err(e) => Err(e), + Ok(key) => seed.deserialize(StrDeserializer::new(&key)).map(Some), + } + } + + fn next_value_seed(&mut self, seed: V) -> Result + where + V: de::DeserializeSeed<'de>, + { + let des = seed.deserialize(&mut *self.de); + self.de.next_key()?; + des + } +} + +struct LineSeq<'a, 'de> { + de: &'a mut Deserializer<'de>, +} + +impl<'a, 'de> LineSeq<'a, 'de> { + pub fn new(de: &'a mut Deserializer<'de>) -> Self { + Self { de } + } +} + +impl<'a, 'de> SeqAccess<'de> for LineSeq<'a, 'de> { + type Error = Error; + + fn next_element_seed(&mut self, seed: T) -> Result> + where + T: de::DeserializeSeed<'de>, + { + match self.de.peek_row() { + Err(Error::Eof) => Ok(None), + Err(e) => Err(e), + _ => seed.deserialize(&mut *self.de).map(Some), + } + } +} diff --git a/ktf/src/serde/error.rs b/ktf/src/serde/error.rs new file mode 100644 index 0000000..5be32e5 --- /dev/null +++ b/ktf/src/serde/error.rs @@ -0,0 +1,9 @@ +use crate::error::Error; +use serde::de; +use std::fmt::Display; + +impl de::Error for Error { + fn custom(msg: T) -> Self { + Error::Message(msg.to_string()) + } +} diff --git a/ktf/src/serde/mod.rs b/ktf/src/serde/mod.rs new file mode 100644 index 0000000..fce2d9b --- /dev/null +++ b/ktf/src/serde/mod.rs @@ -0,0 +1,5 @@ +mod de; +mod error; + +pub use de::*; +pub use error::*; diff --git a/repo/src/fs.rs b/repo/src/fs.rs index 650e500..fb58fb8 100644 --- a/repo/src/fs.rs +++ b/repo/src/fs.rs @@ -6,7 +6,7 @@ use kash::{ }, }; use kash_convert::input::{ - camt053::Camt053Input, json::JsonInput, toml::TomlInput, Input, + camt053::Camt053Input, json::JsonInput, ktf::KtfInput, toml::TomlInput, Input, }; use std::{ fs::{self, File}, @@ -33,6 +33,7 @@ impl FsRepo { Some("json") => Ok(JsonInput::new().from_read(input_file)), Some("toml") => Ok(TomlInput::new().from_read(input_file)), Some("xml") => Ok(Camt053Input::new().from_read(input_file)), + Some("ktf") => Ok(KtfInput::new().from_read(input_file)), Some(ext) => Err(Error::Message(format!("unknown format '{ext}'"))), None => Err(Error::Message("extension parse error".into())), }?