From d5b0224a398810a2e34713d72c6710f4f54df420 Mon Sep 17 00:00:00 2001 From: David Li Date: Fri, 3 Nov 2017 23:37:17 -0400 Subject: Switch to lalrpop for parsing/lexing --- Cargo.lock | 208 +++++++++++++++++++++++++++++++++++++++++++++++++++++- Cargo.toml | 11 ++- build.rs | 5 ++ src/ast.rs | 7 ++ src/lexer.rs | 101 -------------------------- src/main.rs | 13 +--- src/taiga.lalrpop | 14 ++++ 7 files changed, 244 insertions(+), 115 deletions(-) create mode 100644 build.rs create mode 100644 src/ast.rs delete mode 100644 src/lexer.rs create mode 100644 src/taiga.lalrpop diff --git a/Cargo.lock b/Cargo.lock index 7a58190..175e5e5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6,6 +6,140 @@ dependencies = [ "memchr 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "ascii-canvas" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "term 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "atty" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "bit-set" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "bit-vec 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "bit-vec" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "diff" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "docopt" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "lazy_static 0.2.9 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", + "rustc-serialize 0.3.24 (registry+https://github.com/rust-lang/crates.io-index)", + "strsim 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "either" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "ena" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "fixedbitset" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "itertools" +version = "0.5.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "either 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "kernel32-sys" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "lalrpop" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "ascii-canvas 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", + "atty 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", + "bit-set 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "diff 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", + "docopt 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", + "ena 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", + "itertools 0.5.10 (registry+https://github.com/rust-lang/crates.io-index)", + "lalrpop-intern 0.13.1 (registry+https://github.com/rust-lang/crates.io-index)", + "lalrpop-snap 0.13.1 (registry+https://github.com/rust-lang/crates.io-index)", + "lalrpop-util 0.13.1 (registry+https://github.com/rust-lang/crates.io-index)", + "petgraph 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", + "regex-syntax 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)", + "rustc-serialize 0.3.24 (registry+https://github.com/rust-lang/crates.io-index)", + "term 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-xid 0.0.4 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "lalrpop-intern" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "lalrpop-snap" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "ascii-canvas 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", + "atty 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", + "bit-set 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "diff 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", + "docopt 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", + "ena 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", + "itertools 0.5.10 (registry+https://github.com/rust-lang/crates.io-index)", + "lalrpop-intern 0.13.1 (registry+https://github.com/rust-lang/crates.io-index)", + "lalrpop-util 0.13.1 (registry+https://github.com/rust-lang/crates.io-index)", + "petgraph 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", + "regex-syntax 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)", + "rustc-serialize 0.3.24 (registry+https://github.com/rust-lang/crates.io-index)", + "term 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-xid 0.0.4 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "lalrpop-util" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "lazy_static" version = "0.2.9" @@ -24,6 +158,20 @@ dependencies = [ "libc 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "ordermap" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "petgraph" +version = "0.4.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "fixedbitset 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", + "ordermap 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "regex" version = "0.2.2" @@ -41,14 +189,34 @@ name = "regex-syntax" version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "rustc-serialize" +version = "0.3.24" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "strsim" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "taiga" version = "0.1.0" dependencies = [ - "lazy_static 0.2.9 (registry+https://github.com/rust-lang/crates.io-index)", + "lalrpop 0.13.1 (registry+https://github.com/rust-lang/crates.io-index)", + "lalrpop-util 0.13.1 (registry+https://github.com/rust-lang/crates.io-index)", "regex 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "term" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "thread_local" version = "0.3.4" @@ -58,6 +226,11 @@ dependencies = [ "unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "unicode-xid" +version = "0.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "unreachable" version = "1.0.0" @@ -76,14 +249,47 @@ name = "void" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "winapi" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "winapi-build" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + [metadata] "checksum aho-corasick 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)" = "500909c4f87a9e52355b26626d890833e9e1d53ac566db76c36faa984b889699" +"checksum ascii-canvas 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b385d69402821a1c254533a011a312531cbcc0e3e24f19bbb4747a5a2daf37e2" +"checksum atty 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "d0fd4c0631f06448cc45a6bbb3b710ebb7ff8ccb96a0800c994afe23a70d5df2" +"checksum bit-set 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d9bf6104718e80d7b26a68fdbacff3481cfc05df670821affc7e9cbc1884400c" +"checksum bit-vec 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "02b4ff8b16e6076c3e14220b39fbc1fabb6737522281a388998046859400895f" +"checksum diff 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "0a515461b6c8c08419850ced27bc29e86166dcdcde8fbe76f8b1f0589bb49472" +"checksum docopt 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ab32ea6e284d87987066f21a9e809a73c14720571ef34516f0890b3d355ccfd8" +"checksum either 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e311a7479512fbdf858fb54d91ec59f3b9f85bc0113659f46bba12b199d273ce" +"checksum ena 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "cabe5a5078ac8c506d3e4430763b1ba9b609b1286913e7d08e581d1c2de9b7e5" +"checksum fixedbitset 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "85cb8fec437468d86dc7c83ca7cfc933341d561873275f22dd5eedefa63a6478" +"checksum itertools 0.5.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4833d6978da405305126af4ac88569b5d71ff758581ce5a987dbfa3755f694fc" +"checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" +"checksum lalrpop 0.13.1 (registry+https://github.com/rust-lang/crates.io-index)" = "8ebe5a5c90d5edeecb7f62f6ebec0a3d0f6faf4759a052708348cda99fd311a0" +"checksum lalrpop-intern 0.13.1 (registry+https://github.com/rust-lang/crates.io-index)" = "05410c1e4aff497bdea1ccb274ac35536fda0ee858600df36966502d4f7acbe3" +"checksum lalrpop-snap 0.13.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3f866ece35287f5223a1a022c5d86417c260cda2ca9c8a156af9959404ce5313" +"checksum lalrpop-util 0.13.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7c7743f235fc17f5f50f3b1e64a8690ee154f17f86bd68cbb78787c5b37907f7" "checksum lazy_static 0.2.9 (registry+https://github.com/rust-lang/crates.io-index)" = "c9e5e58fa1a4c3b915a561a78a22ee0cac6ab97dca2504428bc1cb074375f8d5" "checksum libc 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)" = "5ba3df4dcb460b9dfbd070d41c94c19209620c191b0340b929ce748a2bcd42d2" "checksum memchr 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "148fab2e51b4f1cfc66da2a7c32981d1d3c083a803978268bb11fe4b86925e7a" +"checksum ordermap 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8c7790b1bc9bf27776cd5cdeaae1263758c2c597d4ae02b58aa63c320f94d778" +"checksum petgraph 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)" = "28d0872a49ce3ee71b345f4fa675afe394d9e0d077f8eeeb3d04081724065d67" "checksum regex 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "1731164734096285ec2a5ec7fea5248ae2f5485b3feeb0115af4fda2183b2d1b" "checksum regex-syntax 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ad890a5eef7953f55427c50575c680c42841653abd2b028b68cd223d157f62db" +"checksum rustc-serialize 0.3.24 (registry+https://github.com/rust-lang/crates.io-index)" = "dcf128d1287d2ea9d80910b5f1120d0b8eede3fbf1abe91c40d39ea7d51e6fda" +"checksum strsim 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b4d15c810519a91cf877e7e36e63fe068815c678181439f2f29e2562147c3694" +"checksum term 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "fa63644f74ce96fbeb9b794f66aff2a52d601cbd5e80f4b97123e3899f4570f1" "checksum thread_local 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "1697c4b57aeeb7a536b647165a2825faddffb1d3bad386d507709bd51a90bb14" +"checksum unicode-xid 0.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "8c1f860d7d29cf02cb2f3f359fd35991af3d30bac52c57d265a3c461074cb4dc" "checksum unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56" "checksum utf8-ranges 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "662fab6525a98beff2921d7f61a39e7d59e0b425ebc7d0d9e66d316e55124122" "checksum void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" +"checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a" +"checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc" diff --git a/Cargo.toml b/Cargo.toml index 14d5b5a..e209fce 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,6 +3,11 @@ name = "taiga" version = "0.1.0" authors = ["David Li "] -[dependencies] -lazy_static = "0.2.9" -regex = "0.2" \ No newline at end of file +[dependencies.regex] +version = "0.2.0" + +[dependencies.lalrpop-util] +version = "0.13.1" + +[build-dependencies.lalrpop] +version = "0.13.1" \ No newline at end of file diff --git a/build.rs b/build.rs new file mode 100644 index 0000000..23c7d3f --- /dev/null +++ b/build.rs @@ -0,0 +1,5 @@ +extern crate lalrpop; + +fn main() { + lalrpop::process_root().unwrap(); +} diff --git a/src/ast.rs b/src/ast.rs new file mode 100644 index 0000000..b5f2049 --- /dev/null +++ b/src/ast.rs @@ -0,0 +1,7 @@ +#[derive(Debug)] +pub struct Program(pub Box); + +#[derive(Debug)] +pub enum Expression { + Number(u64), +} diff --git a/src/lexer.rs b/src/lexer.rs deleted file mode 100644 index dd89fe5..0000000 --- a/src/lexer.rs +++ /dev/null @@ -1,101 +0,0 @@ -use regex::Regex; - -#[derive(Clone,Debug)] -pub enum TokenKind { - Let, - In, - End, - Equals, - Integer(u64), - Name(String), -} - -#[derive(Debug)] -pub struct Token { - pub kind: TokenKind, - pub start: usize, - pub end: usize, -} - -pub struct Lexer<'a> { - input: &'a str, - position: usize, -} - -lazy_static! { - static ref WHITESPACE: Regex = Regex::new(r"^\s+").unwrap(); - static ref LET: Regex = Regex::new(r"^let").unwrap(); - static ref IN: Regex = Regex::new(r"^in").unwrap(); - static ref END: Regex = Regex::new(r"^end").unwrap(); - static ref EQUALS: Regex = Regex::new(r"^=").unwrap(); - static ref INTEGER: Regex = Regex::new(r"^[0-9]+").unwrap(); - static ref NAME: Regex = Regex::new(r"^[_[:alpha:]][_[:alpha:]0-9]*").unwrap(); - - static ref TOKENS: [(&'static Regex, fn(&str) -> TokenKind); 6] = [ - (&LET, {fn temp(_: &str) -> TokenKind { - TokenKind::Let - } temp}), - (&IN, {fn temp(_: &str) -> TokenKind { - TokenKind::In - } temp}), - (&END, {fn temp(_: &str) -> TokenKind { - TokenKind::End - } temp}), - (&EQUALS, {fn temp(_: &str) -> TokenKind { - TokenKind::Equals - } temp}), - (&INTEGER, {fn temp(num: &str) -> TokenKind { - TokenKind::Integer(num.parse::().unwrap()) - } temp}), - (&NAME, {fn temp(name: &str) -> TokenKind { - TokenKind::Name(name.to_owned()) - } temp}), - ]; -} - -impl<'a> Lexer<'a> { - pub fn new(input: &'a str) -> Lexer<'a> { - // blog.matthewcheok.com/writing-a-lexer-in-swift - Lexer { - input: input, - position: 0, - } - } - - fn skip_whitespace(&mut self) { - if let Some(result) = WHITESPACE.find(&self.input[self.position..]) { - self.position += result.end(); - } - } -} - -impl<'a> Iterator for Lexer<'a> { - type Item = Token; - - fn next(&mut self) -> Option { - if self.position >= self.input.len() { - None - } - else { - self.skip_whitespace(); - if self.position >= self.input.len() { - return None; - } - - for &(regexp, builder) in TOKENS.iter() { - if let Some(result) = regexp.find(&self.input[self.position..]) { - let position = self.position; - self.position += result.end(); - return Some(Token { - kind: builder(result.as_str()), - start: position + result.start(), - end: position + result.end(), - }); - } - } - - self.position = self.input.len(); - None - } - } -} diff --git a/src/main.rs b/src/main.rs index 5e071b0..5b343ec 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,13 +1,6 @@ -#[macro_use] extern crate lazy_static; -extern crate regex; - -pub mod lexer; +pub mod ast; +pub mod taiga; fn main() { - println!("Hello, world!"); - let s = "let x = 5 in x end end "; - let lex = lexer::Lexer::new(s); - for token in lex { - println!("{:?}", token); - } + println!("{:?}", taiga::parse_Program("5")); } diff --git a/src/taiga.lalrpop b/src/taiga.lalrpop new file mode 100644 index 0000000..b60d68f --- /dev/null +++ b/src/taiga.lalrpop @@ -0,0 +1,14 @@ +use std::str::FromStr; +use ast; + +grammar; + +pub Program: ast::Program = { + Expression => ast::Program(<>), +}; + +Expression: Box = { + Num => Box::new(ast::Expression::Number(<>)), +}; + +Num: u64 = r"[0-9]+" => u64::from_str(<>).unwrap(); -- cgit v1.2.3