diff options
author | David Li <li.davidm96@gmail.com> | 2017-11-03 23:37:17 -0400 |
---|---|---|
committer | David Li <li.davidm96@gmail.com> | 2017-11-03 23:37:37 -0400 |
commit | d5b0224a398810a2e34713d72c6710f4f54df420 (patch) | |
tree | f56bf6cd72292bfab30f2bcd75e41c921fe089fd /src | |
parent | 169870428a9074d2429e6ec3dac64e84bb0392e3 (diff) |
Switch to lalrpop for parsing/lexing
Diffstat (limited to 'src')
-rw-r--r-- | src/ast.rs | 7 | ||||
-rw-r--r-- | src/lexer.rs | 101 | ||||
-rw-r--r-- | src/main.rs | 13 | ||||
-rw-r--r-- | src/taiga.lalrpop | 14 |
4 files changed, 24 insertions, 111 deletions
diff --git a/src/ast.rs b/src/ast.rs new file mode 100644 index 0000000..b5f2049 --- /dev/null +++ b/src/ast.rs @@ -0,0 +1,7 @@ +#[derive(Debug)] +pub struct Program(pub Box<Expression>); + +#[derive(Debug)] +pub enum Expression { + Number(u64), +} diff --git a/src/lexer.rs b/src/lexer.rs deleted file mode 100644 index dd89fe5..0000000 --- a/src/lexer.rs +++ /dev/null @@ -1,101 +0,0 @@ -use regex::Regex; - -#[derive(Clone,Debug)] -pub enum TokenKind { - Let, - In, - End, - Equals, - Integer(u64), - Name(String), -} - -#[derive(Debug)] -pub struct Token { - pub kind: TokenKind, - pub start: usize, - pub end: usize, -} - -pub struct Lexer<'a> { - input: &'a str, - position: usize, -} - -lazy_static! { - static ref WHITESPACE: Regex = Regex::new(r"^\s+").unwrap(); - static ref LET: Regex = Regex::new(r"^let").unwrap(); - static ref IN: Regex = Regex::new(r"^in").unwrap(); - static ref END: Regex = Regex::new(r"^end").unwrap(); - static ref EQUALS: Regex = Regex::new(r"^=").unwrap(); - static ref INTEGER: Regex = Regex::new(r"^[0-9]+").unwrap(); - static ref NAME: Regex = Regex::new(r"^[_[:alpha:]][_[:alpha:]0-9]*").unwrap(); - - static ref TOKENS: [(&'static Regex, fn(&str) -> TokenKind); 6] = [ - (&LET, {fn temp(_: &str) -> TokenKind { - TokenKind::Let - } temp}), - (&IN, {fn temp(_: &str) -> TokenKind { - TokenKind::In - } temp}), - (&END, {fn temp(_: &str) -> TokenKind { - TokenKind::End - } temp}), - (&EQUALS, {fn temp(_: &str) -> TokenKind { - TokenKind::Equals - } temp}), - (&INTEGER, {fn temp(num: &str) -> TokenKind { - TokenKind::Integer(num.parse::<u64>().unwrap()) - } temp}), - (&NAME, {fn temp(name: &str) -> TokenKind { - TokenKind::Name(name.to_owned()) - } temp}), - ]; -} - -impl<'a> Lexer<'a> { - pub fn new(input: &'a str) -> Lexer<'a> { - // blog.matthewcheok.com/writing-a-lexer-in-swift - Lexer { - input: input, - position: 0, - } - } - - fn skip_whitespace(&mut self) { - if let Some(result) = WHITESPACE.find(&self.input[self.position..]) { - self.position += result.end(); - } - } -} - -impl<'a> Iterator for Lexer<'a> { - type Item = Token; - - fn next(&mut self) -> Option<Self::Item> { - if self.position >= self.input.len() { - None - } - else { - self.skip_whitespace(); - if self.position >= self.input.len() { - return None; - } - - for &(regexp, builder) in TOKENS.iter() { - if let Some(result) = regexp.find(&self.input[self.position..]) { - let position = self.position; - self.position += result.end(); - return Some(Token { - kind: builder(result.as_str()), - start: position + result.start(), - end: position + result.end(), - }); - } - } - - self.position = self.input.len(); - None - } - } -} diff --git a/src/main.rs b/src/main.rs index 5e071b0..5b343ec 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,13 +1,6 @@ -#[macro_use] extern crate lazy_static; -extern crate regex; - -pub mod lexer; +pub mod ast; +pub mod taiga; fn main() { - println!("Hello, world!"); - let s = "let x = 5 in x end end "; - let lex = lexer::Lexer::new(s); - for token in lex { - println!("{:?}", token); - } + println!("{:?}", taiga::parse_Program("5")); } diff --git a/src/taiga.lalrpop b/src/taiga.lalrpop new file mode 100644 index 0000000..b60d68f --- /dev/null +++ b/src/taiga.lalrpop @@ -0,0 +1,14 @@ +use std::str::FromStr; +use ast; + +grammar; + +pub Program: ast::Program = { + Expression => ast::Program(<>), +}; + +Expression: Box<ast::Expression> = { + Num => Box::new(ast::Expression::Number(<>)), +}; + +Num: u64 = r"[0-9]+" => u64::from_str(<>).unwrap(); |