Switch to lalrpop for parsing/lexing

author: David Li <li.davidm96@gmail.com> 2017-11-03 23:37:17 -0400
committer: David Li <li.davidm96@gmail.com> 2017-11-03 23:37:37 -0400
commit: d5b0224a398810a2e34713d72c6710f4f54df420 (patch)
tree: f56bf6cd72292bfab30f2bcd75e41c921fe089fd /src
parent: 169870428a9074d2429e6ec3dac64e84bb0392e3 (diff)
4 files changed, 24 insertions, 111 deletions
diff --git a/src/ast.rs b/src/ast.rs
new file mode 100644
index 0000000..b5f2049
--- /dev/null
+++ b/src/ast.rs
@@ -0,0 +1,7 @@
+#[derive(Debug)]
+pub struct Program(pub Box<Expression>);
+
+#[derive(Debug)]
+pub enum Expression {
+    Number(u64),
+}
diff --git a/src/lexer.rs b/src/lexer.rs
deleted file mode 100644
index dd89fe5..0000000
--- a/src/lexer.rs
+++ /dev/null
@@ -1,101 +0,0 @@
-use regex::Regex;
-
-#[derive(Clone,Debug)]
-pub enum TokenKind {
-    Let,
-    In,
-    End,
-    Equals,
-    Integer(u64),
-    Name(String),
-}
-
-#[derive(Debug)]
-pub struct Token {
-    pub kind: TokenKind,
-    pub start: usize,
-    pub end: usize,
-}
-
-pub struct Lexer<'a> {
-    input: &'a str,
-    position: usize,
-}
-
-lazy_static! {
-    static ref WHITESPACE: Regex = Regex::new(r"^\s+").unwrap();
-    static ref LET: Regex        = Regex::new(r"^let").unwrap();
-    static ref IN: Regex         = Regex::new(r"^in").unwrap();
-    static ref END: Regex        = Regex::new(r"^end").unwrap();
-    static ref EQUALS: Regex     = Regex::new(r"^=").unwrap();
-    static ref INTEGER: Regex    = Regex::new(r"^[0-9]+").unwrap();
-    static ref NAME: Regex       = Regex::new(r"^[_[:alpha:]][_[:alpha:]0-9]*").unwrap();
-
-    static ref TOKENS: [(&'static Regex, fn(&str) -> TokenKind); 6] = [
-        (&LET, {fn temp(_: &str) -> TokenKind {
-            TokenKind::Let
-        } temp}),
-        (&IN, {fn temp(_: &str) -> TokenKind {
-            TokenKind::In
-        } temp}),
-        (&END, {fn temp(_: &str) -> TokenKind {
-            TokenKind::End
-        } temp}),
-        (&EQUALS, {fn temp(_: &str) -> TokenKind {
-            TokenKind::Equals
-        } temp}),
-        (&INTEGER, {fn temp(num: &str) -> TokenKind {
-            TokenKind::Integer(num.parse::<u64>().unwrap())
-        } temp}),
-        (&NAME, {fn temp(name: &str) -> TokenKind {
-            TokenKind::Name(name.to_owned())
-        } temp}),
-    ];
-}
-
-impl<'a> Lexer<'a> {
-    pub fn new(input: &'a str) -> Lexer<'a> {
-        // blog.matthewcheok.com/writing-a-lexer-in-swift
-        Lexer {
-            input: input,
-            position: 0,
-        }
-    }
-
-    fn skip_whitespace(&mut self) {
-        if let Some(result) = WHITESPACE.find(&self.input[self.position..]) {
-            self.position += result.end();
-        }
-    }
-}
-
-impl<'a> Iterator for Lexer<'a> {
-    type Item = Token;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        if self.position >= self.input.len() {
-            None
-        }
-        else {
-            self.skip_whitespace();
-            if self.position >= self.input.len() {
-                return None;
-            }
-
-            for &(regexp, builder) in TOKENS.iter() {
-                if let Some(result) = regexp.find(&self.input[self.position..]) {
-                    let position = self.position;
-                    self.position += result.end();
-                    return Some(Token {
-                        kind: builder(result.as_str()),
-                        start: position + result.start(),
-                        end: position + result.end(),
-                    });
-                }
-            }
-
-            self.position = self.input.len();
-            None
-        }
-    }
-}
diff --git a/src/main.rs b/src/main.rs
index 5e071b0..5b343ec 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,13 +1,6 @@
-#[macro_use] extern crate lazy_static;
-extern crate regex;
-
-pub mod lexer;
+pub mod ast;
+pub mod taiga;
 
 fn main() {
-    println!("Hello, world!");
-    let s = "let x = 5 in x end   end ";
-    let lex = lexer::Lexer::new(s);
-    for token in lex {
-        println!("{:?}", token);
-    }
+    println!("{:?}", taiga::parse_Program("5"));
 }
diff --git a/src/taiga.lalrpop b/src/taiga.lalrpop
new file mode 100644
index 0000000..b60d68f
--- /dev/null
+++ b/src/taiga.lalrpop
@@ -0,0 +1,14 @@
+use std::str::FromStr;
+use ast;
+
+grammar;
+
+pub Program: ast::Program = {
+    Expression => ast::Program(<>),
+};
+
+Expression: Box<ast::Expression> = {
+    Num => Box::new(ast::Expression::Number(<>)),
+};
+
+Num: u64 = r"[0-9]+" => u64::from_str(<>).unwrap();
author	David Li <li.davidm96@gmail.com>	2017-11-03 23:37:17 -0400
committer	David Li <li.davidm96@gmail.com>	2017-11-03 23:37:37 -0400
commit	d5b0224a398810a2e34713d72c6710f4f54df420 (patch)
tree	f56bf6cd72292bfab30f2bcd75e41c921fe089fd /src
parent	169870428a9074d2429e6ec3dac64e84bb0392e3 (diff)