summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Silverstone <dsilvers@digital-scurf.org>2018-12-04 23:33:00 +0000
committerDaniel Silverstone <dsilvers@digital-scurf.org>2018-12-04 23:33:00 +0000
commit4d0b76c935d9198a297f9594bd10e309b42af1bf (patch)
tree4c321a6276720ef2f73e3e329bb75ba13e3251d7
downloadparsebyregex-4d0b76c935d9198a297f9594bd10e309b42af1bf.tar.bz2
Initial version, yuck
-rw-r--r--.gitignore2
-rw-r--r--.vscode/settings.json3
-rw-r--r--COPYING26
-rw-r--r--Cargo.toml15
-rw-r--r--parsebyregex-derive/Cargo.toml17
-rw-r--r--parsebyregex-derive/src/lib.rs338
-rw-r--r--parsebyregex-derive/tests/basic.rs15
-rw-r--r--parsebyregex-derive/tests/enum.rs42
-rw-r--r--parsebyregex-example/Cargo.toml10
-rw-r--r--parsebyregex-example/src/main.rs64
-rw-r--r--rust-toolchain1
-rw-r--r--src/lib.rs40
12 files changed, 573 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..fa8d85a
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+Cargo.lock
+target
diff --git a/.vscode/settings.json b/.vscode/settings.json
new file mode 100644
index 0000000..6c49489
--- /dev/null
+++ b/.vscode/settings.json
@@ -0,0 +1,3 @@
+{
+ "rust.clippy_preference": "on"
+} \ No newline at end of file
diff --git a/COPYING b/COPYING
new file mode 100644
index 0000000..0a8103f
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,26 @@
+Copyright 2018 Daniel Silverstone <dsilvers@digital-scurf.org>
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+3. Neither the name of the author nor the names of their contributors
+ may be used to endorse or promote products derived from this software
+ without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGE.
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..e1f3cc8
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,15 @@
+[package]
+name = "parsebyregex"
+version = "0.1.0"
+authors = ["Daniel Silverstone <dsilvers@digital-scurf.org>"]
+edition = "2018"
+
+[workspace]
+members = [
+ "parsebyregex-derive",
+ "parsebyregex-example"
+]
+
+[dependencies]
+regex = "1.1"
+lazy_static = "1.2"
diff --git a/parsebyregex-derive/Cargo.toml b/parsebyregex-derive/Cargo.toml
new file mode 100644
index 0000000..ea58f9f
--- /dev/null
+++ b/parsebyregex-derive/Cargo.toml
@@ -0,0 +1,17 @@
+[package]
+name = "parsebyregex-derive"
+version = "0.1.0"
+authors = ["Daniel Silverstone <dsilvers@digital-scurf.org>"]
+edition = "2018"
+
+[lib]
+proc-macro = true
+
+[dependencies]
+syn = {version="0.15", features=["derive","parsing","printing"]}
+quote = "0.6"
+regex = "1.1"
+proc-macro2 = "0.4"
+
+[dev-dependencies]
+parsebyregex = {path = ".."} \ No newline at end of file
diff --git a/parsebyregex-derive/src/lib.rs b/parsebyregex-derive/src/lib.rs
new file mode 100644
index 0000000..12f94e6
--- /dev/null
+++ b/parsebyregex-derive/src/lib.rs
@@ -0,0 +1,338 @@
+#![recursion_limit = "128"]
+use quote::quote;
+use syn::{parse_macro_input, DeriveInput};
+
+extern crate proc_macro;
+
+use proc_macro::TokenStream;
+
+use regex::Regex;
+
+#[proc_macro_derive(ParseByRegex, attributes(regex))]
+pub fn derive_parse_by_regex(input: TokenStream) -> TokenStream {
+ let ast = parse_macro_input!(input as DeriveInput);
+ impl_pbr_macro(&ast)
+}
+
+fn impl_pbr_macro(ast: &syn::DeriveInput) -> TokenStream {
+ let name = &ast.ident;
+
+ (match ast.data {
+ syn::Data::Struct(ref sdata) => {
+ let regex = find_regex(&ast.attrs).expect(r#"Unable to find #[regex = r"somesuch"]"#);
+ confirm_fields(&regex, sdata);
+ let body = gen_struct_body(&name, &regex);
+ quote! {
+ impl ParseByRegex for #name {
+ fn parse_by_regex<T: AsRef<str>>(input: T) -> Result<Self, Box<std::error::Error>> {
+ use parsebyregex::lazy_static;
+ use parsebyregex::Regex;
+ lazy_static! {
+ static ref PARSER: Regex = Regex::new(#regex).expect("Unable to parse regular expression");
+ }
+ if let Some(captures) = PARSER.captures(input.as_ref()) {
+ #body
+ } else {
+ Err(format!("Unable to parse `{}` by `{}`", input.as_ref(), #regex))?
+ }
+ }
+ }
+ }
+ }
+ syn::Data::Enum(ref edata) => {
+ // Enums work as follows:
+ // Each variant must be annotated with a regex, each variant
+ // must be struct-like, and regexs are tested in-order, first match
+ // wins.
+ check_variants(&name, &edata);
+ let parsers = gen_enum_parsers(&name, &edata);
+ let body = gen_enum_body(&name, &edata);
+ quote! {
+ impl ParseByRegex for #name {
+ fn parse_by_regex<T: AsRef<str>>(input: T) -> Result<Self, Box<std::error::Error>> {
+ use parsebyregex::lazy_static;
+ use parsebyregex::Regex;
+ lazy_static! {
+ #parsers
+ }
+ #body
+ }
+ }
+
+ }
+ }
+ _ => {
+ quote!{
+ // Nothing
+ }
+ }
+ }).into()
+}
+
+fn confirm_fields(rex: &syn::LitStr, body: &syn::DataStruct) {
+ let rex = Regex::new(&rex.value()).expect("Unable to parse regex for struct");
+ let mut regex_labels: Vec<_> = rex
+ .capture_names()
+ .filter(Option::is_some)
+ .map(Option::unwrap)
+ .collect();
+ if regex_labels.len() != rex.captures_len() - 1 {
+ panic!("There are unlabelled captures in this regex!");
+ }
+ regex_labels.sort();
+ if let syn::Fields::Named(ref fields) = body.fields {
+ let mut field_names: Vec<_> = fields
+ .named
+ .iter()
+ .map(|f| {
+ f.ident
+ .as_ref()
+ .expect("Named fields should be named")
+ .to_string()
+ })
+ .collect();
+ field_names.sort();
+ if regex_labels.len() != field_names.len() {
+ panic!("Mismatch between capture count and field count");
+ }
+ for (rexlab, fieldname) in regex_labels.iter().zip(field_names.iter()) {
+ if rexlab != fieldname {
+ panic!(
+ "Mismatch, expected field {} but found field {}",
+ rexlab, fieldname
+ );
+ }
+ }
+ } else {
+ panic!("The body of the struct uses unnamed fields or is a unit!");
+ }
+}
+
+fn gen_struct_body(name: &syn::Ident, rex: &syn::LitStr) -> proc_macro2::TokenStream {
+ let rex = Regex::new(&rex.value()).expect("Unable to parse regex for struct");
+ let regex_labels: Vec<_> = rex
+ .capture_names()
+ .filter(Option::is_some)
+ .map(Option::unwrap)
+ .collect();
+
+ let mut lets: Vec<_> = regex_labels
+ .iter()
+ .map(|n| {
+ let ident = syn::Ident::new(n, proc_macro2::Span::call_site());
+ quote! {
+ let #ident = ParseByRegex::parse_by_regex(&captures[#n])?;
+ }
+ })
+ .collect();
+ let lets: proc_macro2::TokenStream = lets.drain(..).fold(Default::default(), |mut base, n| {
+ base.extend(n.into_iter());
+ base
+ });
+
+ let mut fields: Vec<_> = regex_labels
+ .iter()
+ .map(|n| {
+ let ident = syn::Ident::new(n, proc_macro2::Span::call_site());
+ quote! {
+ #ident,
+ }
+ })
+ .collect();
+
+ let fields: proc_macro2::TokenStream =
+ fields.drain(..).fold(Default::default(), |mut base, n| {
+ base.extend(n.into_iter());
+ base
+ });
+
+ quote! {
+ #lets
+
+ Ok(#name {
+ #fields
+ })
+ }
+}
+fn find_regex(attrs: &[syn::Attribute]) -> Option<syn::LitStr> {
+ for attr in attrs {
+ if let Ok(meta) = attr.parse_meta() {
+ if let syn::Meta::NameValue(nv) = meta {
+ if let syn::Lit::Str(s) = nv.lit {
+ return Some(s);
+ }
+ }
+ }
+ }
+ None
+}
+
+fn check_variants(name: &syn::Ident, edata: &syn::DataEnum) {
+ for var in &edata.variants {
+ let rex = find_regex(&var.attrs).unwrap_or_else(|| {
+ panic!(
+ r#"Unable to find #[regex = "somesuch"] for variant {}::{}"#,
+ name.to_string(),
+ var.ident.to_string()
+ )
+ });
+ let rex = Regex::new(&rex.value()).unwrap_or_else(|e| {
+ panic!(
+ "Unable to parse regex for enum variant {}::{} ({:?})",
+ name.to_string(),
+ var.ident.to_string(),
+ e
+ )
+ });
+ match var.fields {
+ syn::Fields::Unit => {
+ if rex.captures_len() != 1 {
+ panic!(
+ "Regular expression with captures against unit enum variant {}::{}",
+ name.to_string(),
+ var.ident.to_string()
+ )
+ }
+ }
+ syn::Fields::Named(ref named) => {
+ let mut regex_labels: Vec<_> = rex
+ .capture_names()
+ .filter(Option::is_some)
+ .map(Option::unwrap)
+ .collect();
+ if regex_labels.len() != rex.captures_len() - 1 {
+ panic!("There are unlabelled captures in regex for struct-type enum variant {}::{}!",name.to_string(),
+ var.ident.to_string());
+ }
+ regex_labels.sort();
+ let mut field_names: Vec<_> = named
+ .named
+ .iter()
+ .map(|n| {
+ n.ident
+ .as_ref()
+ .expect("Unnamed field in named fields?")
+ .to_string()
+ })
+ .collect();
+ field_names.sort();
+ if regex_labels.len() != field_names.len() {
+ panic!(
+ "Mismatch between captures and field count in enum variant {}::{}",
+ name.to_string(),
+ var.ident.to_string()
+ );
+ }
+ for (rexname, fieldname) in regex_labels.iter().zip(field_names.iter()) {
+ if rexname != fieldname {
+ panic!(
+ "Mismatch in field names, saw {} expected {}, in enum variant {}::{}",
+ rexname,
+ fieldname,
+ name.to_string(),
+ var.ident.to_string()
+ )
+ }
+ }
+ }
+ syn::Fields::Unnamed(ref unamed) => {
+ // The only thing we can do is ensure we have only unlabelled fields
+ // and that the count matches our unnamed enum type
+ let cap_count = rex.capture_names().filter(Option::is_none).count();
+ if cap_count != rex.captures_len() {
+ panic!(
+ "Discovered labelled capture in enum variant {}::{}",
+ name.to_string(),
+ var.ident.to_string()
+ )
+ }
+ if cap_count != unamed.unnamed.iter().count() + 1 {
+ panic!(
+ "Capture count mismatch in enum variant {}::{}",
+ name.to_string(),
+ var.ident.to_string()
+ )
+ }
+ }
+ }
+ }
+}
+
+fn get_named_variant_parser(var: &syn::Ident) -> syn::Ident {
+ syn::Ident::new(&format!("PARSER_{}", var.to_string().to_uppercase()), proc_macro2::Span::call_site())
+}
+
+fn gen_enum_parsers(name: &syn::Ident, edata: &syn::DataEnum) -> proc_macro2::TokenStream {
+ let parsers: Vec<_> = edata.variants.iter().map(|var|{
+ let rex = find_regex(&var.attrs).unwrap_or_else(|| {
+ panic!(
+ r#"Unable to find #[regex = "somesuch"] for variant {}::{}"#,
+ name.to_string(),
+ var.ident.to_string()
+ )
+ });
+ let parsername = get_named_variant_parser(&var.ident);
+ quote! {
+ static ref #parsername: Regex = Regex::new(#rex).expect("Unable to parse regular expression")
+ }
+ }).collect();
+
+ quote! {
+ #(#parsers);*;
+ }
+}
+
+fn gen_enum_body(name: &syn::Ident, edata: &syn::DataEnum) -> proc_macro2::TokenStream {
+ let strname = name.to_string();
+ let parsers: Vec<_> = edata.variants.iter().map(|var|{
+ let parsername = get_named_variant_parser(&var.ident);
+ let varname = &var.ident;
+ let body = match var.fields {
+ syn::Fields::Unit => quote! {
+ Ok(#name :: #varname)
+ },
+ syn::Fields::Unnamed(ref unnamed) => {
+ let fieldcount = unnamed.unnamed.iter().count();
+ let capnames: Vec<syn::Ident> = (1..=fieldcount).map(|n|syn::Ident::new(&format!("CAP_{}", n), proc_macro2::Span::call_site())
+ ).collect();
+ let caps:Vec<_> = (0..fieldcount).zip(capnames.iter()).map(|(n,capname)| {
+ let n = n + 1;
+ quote!{
+ let #capname = ParseByRegex::parse_by_regex(captures.get(#n).expect("Confusing, missing capture").as_str())?
+ }}).collect();
+
+
+
+ quote! {
+ #(#caps);*;
+ Ok(#name :: #varname (#(#capnames),*))
+ }
+ }
+ syn::Fields::Named(ref named) => {
+ let caps: Vec<_> = named.named.iter().map(|f| {
+ let ident = f.ident.as_ref().expect("Named fields should be named");
+ let sident = ident.to_string();
+ quote! {
+ let #ident = ParseByRegex::parse_by_regex(&captures[#sident])?
+ }
+ }).collect();
+ let fields: Vec<_> = named.named.iter().map(|f| f.ident.as_ref().expect("Named fields should be named")).collect();
+ quote! {
+ #(#caps);*;
+ Ok(#name :: #varname {#(#fields),*})
+ }
+ }
+ };
+ quote! {
+ if let Some(captures) = #parsername .captures(input.as_ref()) {
+ #body
+ }
+ }
+ }).collect();
+
+ quote! {
+ #(#parsers else)* {
+ Err(format!("Unable to parse `{}` as {}", input.as_ref(), #strname))?
+ }
+ }
+}
diff --git a/parsebyregex-derive/tests/basic.rs b/parsebyregex-derive/tests/basic.rs
new file mode 100644
index 0000000..16cdefd
--- /dev/null
+++ b/parsebyregex-derive/tests/basic.rs
@@ -0,0 +1,15 @@
+use parsebyregex::*;
+
+use parsebyregex_derive::ParseByRegex;
+
+#[derive(ParseByRegex)]
+#[regex = r"age=(?P<age>\d+)"]
+struct Person {
+ pub age: u32,
+}
+
+fn main() -> Result<(), Box<std::error::Error>> {
+ let person: Person = Person::parse_by_regex("age=14")?;
+ assert_eq!(person.age, 14);
+ Ok(())
+}
diff --git a/parsebyregex-derive/tests/enum.rs b/parsebyregex-derive/tests/enum.rs
new file mode 100644
index 0000000..9335ef4
--- /dev/null
+++ b/parsebyregex-derive/tests/enum.rs
@@ -0,0 +1,42 @@
+use parsebyregex::*;
+
+use parsebyregex_derive::ParseByRegex;
+
+#[derive(ParseByRegex, Debug)]
+enum Thingy {
+ #[regex = r"nada"]
+ Nada,
+ #[regex = r"stuff\((\d+)\)"]
+ Stuff(u32),
+ #[regex = r"things\((?P<age>\d+), *(?P<name>[^\)]+)\)"]
+ Things { name: String, age: u32 },
+}
+
+static TESTS: [&str; 3] = ["nada", "stuff(4)", "things(14, Susan)"];
+
+#[test]
+fn check_enums() {
+ let res: Result<Vec<Thingy>, _> = TESTS.iter().map(ParseByRegex::parse_by_regex).collect();
+ let res = res.unwrap();
+ assert_eq!(res.len(), 3);
+ match res[0] {
+ Thingy::Nada => {}
+ _ => panic!("Unexpected {:?}, expecting Thingy::Nada", res[0]),
+ }
+ match res[1] {
+ Thingy::Stuff(n) => {
+ assert_eq!(n, 4);
+ }
+ _ => panic!("Unexpected {:?}, expecting Thingy::Stuff(4)", res[1]),
+ }
+ match res[2] {
+ Thingy::Things { ref name, age } => {
+ assert_eq!(age, 14);
+ assert_eq!(name, "Susan");
+ }
+ _ => panic!(
+ "Unexpected {:?}, expecting Thingy::Things{{name:\"Susan\", age=14}}",
+ res[2]
+ ),
+ }
+}
diff --git a/parsebyregex-example/Cargo.toml b/parsebyregex-example/Cargo.toml
new file mode 100644
index 0000000..1a9b85f
--- /dev/null
+++ b/parsebyregex-example/Cargo.toml
@@ -0,0 +1,10 @@
+[package]
+name = "parsebyregex-example"
+version = "0.1.0"
+authors = ["Daniel Silverstone <dsilvers@digital-scurf.org>"]
+edition = "2018"
+
+[dependencies]
+parsebyregex = {path=".."}
+parsebyregex-derive = {path="../parsebyregex-derive"}
+chrono="0.4"
diff --git a/parsebyregex-example/src/main.rs b/parsebyregex-example/src/main.rs
new file mode 100644
index 0000000..b30a3ca
--- /dev/null
+++ b/parsebyregex-example/src/main.rs
@@ -0,0 +1,64 @@
+use parsebyregex::ParseByRegex;
+
+use parsebyregex_derive::ParseByRegex;
+
+use chrono::prelude::*;
+
+#[derive(Debug)]
+struct MyDateTime(DateTime<Utc>);
+impl ParseByRegex for MyDateTime {
+ fn parse_by_regex<T: AsRef<str>>(input: T) -> Result<MyDateTime, Box<std::error::Error>> {
+ static DATEFMT: &str = "%Y-%m-%d %H:%M";
+ Ok(MyDateTime(Utc.datetime_from_str(input.as_ref(), DATEFMT)?))
+ }
+}
+
+#[derive(Debug, ParseByRegex)]
+enum Action {
+ #[regex = r"^falls *asleep$"]
+ FallsAsleep,
+ #[regex = r"^wakes *up$"]
+ WakesUp,
+ #[regex = r"^Guard #(\d+) begins shift"]
+ GuardShift(usize),
+}
+
+#[derive(ParseByRegex, Debug)]
+#[regex = r#"^\[(?P<when>[^\]]+)\] (?P<action>.*)$"#]
+struct Entry {
+ when: MyDateTime,
+ action: Action,
+}
+
+static TEST_INPUT: &str = r#"
+[1518-11-01 00:00] Guard #10 begins shift
+[1518-11-01 00:05] falls asleep
+[1518-11-01 00:25] wakes up
+[1518-11-01 00:30] falls asleep
+[1518-11-01 00:55] wakes up
+[1518-11-01 23:58] Guard #99 begins shift
+[1518-11-02 00:40] falls asleep
+[1518-11-02 00:50] wakes up
+[1518-11-03 00:05] Guard #10 begins shift
+[1518-11-03 00:24] falls asleep
+[1518-11-03 00:29] wakes up
+[1518-11-04 00:02] Guard #99 begins shift
+[1518-11-04 00:36] falls asleep
+[1518-11-04 00:46] wakes up
+[1518-11-05 00:03] Guard #99 begins shift
+[1518-11-05 00:45] falls asleep
+[1518-11-05 00:55] wakes up
+"#;
+
+fn main() -> Result<(), Box<std::error::Error>> {
+ let input: Result<Vec<Entry>, _> = TEST_INPUT
+ .trim()
+ .lines()
+ .map(ParseByRegex::parse_by_regex)
+ .collect();
+ let input = input?;
+ for row in input {
+ println!("{:?}", row);
+ }
+ Ok(())
+}
diff --git a/rust-toolchain b/rust-toolchain
new file mode 100644
index 0000000..65b2df8
--- /dev/null
+++ b/rust-toolchain
@@ -0,0 +1 @@
+beta
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..39f26ca
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,40 @@
+//! Parsing by regular expression
+
+use std::error::Error;
+use std::str::FromStr;
+
+pub use lazy_static::lazy_static;
+pub use regex::Regex;
+
+/// Parse by regular expression
+///
+/// Essentially this is like FromStr only this is used directly by our derive
+/// and that's nice.
+///
+/// If this is implemented then you can parse things by regular expression
+/// just like `FromStr`.
+pub trait ParseByRegex: Sized {
+ fn parse_by_regex<T: AsRef<str>>(input: T) -> Result<Self, Box<Error>>;
+}
+
+impl<TY> ParseByRegex for TY
+where
+ TY: FromStr,
+ <TY as FromStr>::Err: Error + 'static,
+{
+ fn parse_by_regex<T: AsRef<str>>(input: T) -> Result<Self, Box<Error>> {
+ Ok(<Self as FromStr>::from_str(input.as_ref())?)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use crate::ParseByRegex;
+ #[test]
+ fn basic_check() {
+ let f = u32::parse_by_regex("1");
+ assert!(f.is_ok());
+ assert_eq!(f.unwrap(), 1);
+ }
+
+}