From d5b0224a398810a2e34713d72c6710f4f54df420 Mon Sep 17 00:00:00 2001 From: David Li Date: Fri, 3 Nov 2017 23:37:17 -0400 Subject: Switch to lalrpop for parsing/lexing --- src/ast.rs | 7 ++++ src/lexer.rs | 101 ------------------------------------------------------ src/main.rs | 13 ++----- src/taiga.lalrpop | 14 ++++++++ 4 files changed, 24 insertions(+), 111 deletions(-) create mode 100644 src/ast.rs delete mode 100644 src/lexer.rs create mode 100644 src/taiga.lalrpop (limited to 'src') diff --git a/src/ast.rs b/src/ast.rs new file mode 100644 index 0000000..b5f2049 --- /dev/null +++ b/src/ast.rs @@ -0,0 +1,7 @@ +#[derive(Debug)] +pub struct Program(pub Box); + +#[derive(Debug)] +pub enum Expression { + Number(u64), +} diff --git a/src/lexer.rs b/src/lexer.rs deleted file mode 100644 index dd89fe5..0000000 --- a/src/lexer.rs +++ /dev/null @@ -1,101 +0,0 @@ -use regex::Regex; - -#[derive(Clone,Debug)] -pub enum TokenKind { - Let, - In, - End, - Equals, - Integer(u64), - Name(String), -} - -#[derive(Debug)] -pub struct Token { - pub kind: TokenKind, - pub start: usize, - pub end: usize, -} - -pub struct Lexer<'a> { - input: &'a str, - position: usize, -} - -lazy_static! { - static ref WHITESPACE: Regex = Regex::new(r"^\s+").unwrap(); - static ref LET: Regex = Regex::new(r"^let").unwrap(); - static ref IN: Regex = Regex::new(r"^in").unwrap(); - static ref END: Regex = Regex::new(r"^end").unwrap(); - static ref EQUALS: Regex = Regex::new(r"^=").unwrap(); - static ref INTEGER: Regex = Regex::new(r"^[0-9]+").unwrap(); - static ref NAME: Regex = Regex::new(r"^[_[:alpha:]][_[:alpha:]0-9]*").unwrap(); - - static ref TOKENS: [(&'static Regex, fn(&str) -> TokenKind); 6] = [ - (&LET, {fn temp(_: &str) -> TokenKind { - TokenKind::Let - } temp}), - (&IN, {fn temp(_: &str) -> TokenKind { - TokenKind::In - } temp}), - (&END, {fn temp(_: &str) -> TokenKind { - TokenKind::End - } temp}), - (&EQUALS, {fn temp(_: &str) -> TokenKind { - TokenKind::Equals - } temp}), - (&INTEGER, {fn temp(num: &str) -> TokenKind { - TokenKind::Integer(num.parse::().unwrap()) - } temp}), - (&NAME, {fn temp(name: &str) -> TokenKind { - TokenKind::Name(name.to_owned()) - } temp}), - ]; -} - -impl<'a> Lexer<'a> { - pub fn new(input: &'a str) -> Lexer<'a> { - // blog.matthewcheok.com/writing-a-lexer-in-swift - Lexer { - input: input, - position: 0, - } - } - - fn skip_whitespace(&mut self) { - if let Some(result) = WHITESPACE.find(&self.input[self.position..]) { - self.position += result.end(); - } - } -} - -impl<'a> Iterator for Lexer<'a> { - type Item = Token; - - fn next(&mut self) -> Option { - if self.position >= self.input.len() { - None - } - else { - self.skip_whitespace(); - if self.position >= self.input.len() { - return None; - } - - for &(regexp, builder) in TOKENS.iter() { - if let Some(result) = regexp.find(&self.input[self.position..]) { - let position = self.position; - self.position += result.end(); - return Some(Token { - kind: builder(result.as_str()), - start: position + result.start(), - end: position + result.end(), - }); - } - } - - self.position = self.input.len(); - None - } - } -} diff --git a/src/main.rs b/src/main.rs index 5e071b0..5b343ec 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,13 +1,6 @@ -#[macro_use] extern crate lazy_static; -extern crate regex; - -pub mod lexer; +pub mod ast; +pub mod taiga; fn main() { - println!("Hello, world!"); - let s = "let x = 5 in x end end "; - let lex = lexer::Lexer::new(s); - for token in lex { - println!("{:?}", token); - } + println!("{:?}", taiga::parse_Program("5")); } diff --git a/src/taiga.lalrpop b/src/taiga.lalrpop new file mode 100644 index 0000000..b60d68f --- /dev/null +++ b/src/taiga.lalrpop @@ -0,0 +1,14 @@ +use std::str::FromStr; +use ast; + +grammar; + +pub Program: ast::Program = { + Expression => ast::Program(<>), +}; + +Expression: Box = { + Num => Box::new(ast::Expression::Number(<>)), +}; + +Num: u64 = r"[0-9]+" => u64::from_str(<>).unwrap(); -- cgit v1.2.3