summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Li <li.davidm96@gmail.com>2017-11-03 23:37:17 -0400
committerDavid Li <li.davidm96@gmail.com>2017-11-03 23:37:37 -0400
commitd5b0224a398810a2e34713d72c6710f4f54df420 (patch)
treef56bf6cd72292bfab30f2bcd75e41c921fe089fd
parent169870428a9074d2429e6ec3dac64e84bb0392e3 (diff)
Switch to lalrpop for parsing/lexing
-rw-r--r--Cargo.lock208
-rw-r--r--Cargo.toml11
-rw-r--r--build.rs5
-rw-r--r--src/ast.rs7
-rw-r--r--src/lexer.rs101
-rw-r--r--src/main.rs13
-rw-r--r--src/taiga.lalrpop14
7 files changed, 244 insertions, 115 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 7a58190..175e5e5 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -7,6 +7,140 @@ dependencies = [
]
[[package]]
+name = "ascii-canvas"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "term 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "atty"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
+ "libc 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)",
+ "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "bit-set"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "bit-vec 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "bit-vec"
+version = "0.4.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "diff"
+version = "0.1.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "docopt"
+version = "0.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "lazy_static 0.2.9 (registry+https://github.com/rust-lang/crates.io-index)",
+ "regex 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rustc-serialize 0.3.24 (registry+https://github.com/rust-lang/crates.io-index)",
+ "strsim 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "either"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "ena"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "fixedbitset"
+version = "0.1.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "itertools"
+version = "0.5.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "either 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "kernel32-sys"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
+ "winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "lalrpop"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "ascii-canvas 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "atty 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
+ "bit-set 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "diff 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)",
+ "docopt 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "ena 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "itertools 0.5.10 (registry+https://github.com/rust-lang/crates.io-index)",
+ "lalrpop-intern 0.13.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "lalrpop-snap 0.13.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "lalrpop-util 0.13.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "petgraph 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)",
+ "regex 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
+ "regex-syntax 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rustc-serialize 0.3.24 (registry+https://github.com/rust-lang/crates.io-index)",
+ "term 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
+ "unicode-xid 0.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "lalrpop-intern"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "lalrpop-snap"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "ascii-canvas 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "atty 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
+ "bit-set 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "diff 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)",
+ "docopt 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "ena 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "itertools 0.5.10 (registry+https://github.com/rust-lang/crates.io-index)",
+ "lalrpop-intern 0.13.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "lalrpop-util 0.13.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "petgraph 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)",
+ "regex 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
+ "regex-syntax 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rustc-serialize 0.3.24 (registry+https://github.com/rust-lang/crates.io-index)",
+ "term 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
+ "unicode-xid 0.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "lalrpop-util"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
name = "lazy_static"
version = "0.2.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -25,6 +159,20 @@ dependencies = [
]
[[package]]
+name = "ordermap"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "petgraph"
+version = "0.4.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "fixedbitset 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
+ "ordermap 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
name = "regex"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -42,14 +190,34 @@ version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
+name = "rustc-serialize"
+version = "0.3.24"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "strsim"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
name = "taiga"
version = "0.1.0"
dependencies = [
- "lazy_static 0.2.9 (registry+https://github.com/rust-lang/crates.io-index)",
+ "lalrpop 0.13.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "lalrpop-util 0.13.1 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
+name = "term"
+version = "0.4.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
+ "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
name = "thread_local"
version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -59,6 +227,11 @@ dependencies = [
]
[[package]]
+name = "unicode-xid"
+version = "0.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
name = "unreachable"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -76,14 +249,47 @@ name = "void"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
+[[package]]
+name = "winapi"
+version = "0.2.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "winapi-build"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
[metadata]
"checksum aho-corasick 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)" = "500909c4f87a9e52355b26626d890833e9e1d53ac566db76c36faa984b889699"
+"checksum ascii-canvas 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b385d69402821a1c254533a011a312531cbcc0e3e24f19bbb4747a5a2daf37e2"
+"checksum atty 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "d0fd4c0631f06448cc45a6bbb3b710ebb7ff8ccb96a0800c994afe23a70d5df2"
+"checksum bit-set 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d9bf6104718e80d7b26a68fdbacff3481cfc05df670821affc7e9cbc1884400c"
+"checksum bit-vec 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "02b4ff8b16e6076c3e14220b39fbc1fabb6737522281a388998046859400895f"
+"checksum diff 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "0a515461b6c8c08419850ced27bc29e86166dcdcde8fbe76f8b1f0589bb49472"
+"checksum docopt 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ab32ea6e284d87987066f21a9e809a73c14720571ef34516f0890b3d355ccfd8"
+"checksum either 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e311a7479512fbdf858fb54d91ec59f3b9f85bc0113659f46bba12b199d273ce"
+"checksum ena 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "cabe5a5078ac8c506d3e4430763b1ba9b609b1286913e7d08e581d1c2de9b7e5"
+"checksum fixedbitset 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "85cb8fec437468d86dc7c83ca7cfc933341d561873275f22dd5eedefa63a6478"
+"checksum itertools 0.5.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4833d6978da405305126af4ac88569b5d71ff758581ce5a987dbfa3755f694fc"
+"checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d"
+"checksum lalrpop 0.13.1 (registry+https://github.com/rust-lang/crates.io-index)" = "8ebe5a5c90d5edeecb7f62f6ebec0a3d0f6faf4759a052708348cda99fd311a0"
+"checksum lalrpop-intern 0.13.1 (registry+https://github.com/rust-lang/crates.io-index)" = "05410c1e4aff497bdea1ccb274ac35536fda0ee858600df36966502d4f7acbe3"
+"checksum lalrpop-snap 0.13.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3f866ece35287f5223a1a022c5d86417c260cda2ca9c8a156af9959404ce5313"
+"checksum lalrpop-util 0.13.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7c7743f235fc17f5f50f3b1e64a8690ee154f17f86bd68cbb78787c5b37907f7"
"checksum lazy_static 0.2.9 (registry+https://github.com/rust-lang/crates.io-index)" = "c9e5e58fa1a4c3b915a561a78a22ee0cac6ab97dca2504428bc1cb074375f8d5"
"checksum libc 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)" = "5ba3df4dcb460b9dfbd070d41c94c19209620c191b0340b929ce748a2bcd42d2"
"checksum memchr 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "148fab2e51b4f1cfc66da2a7c32981d1d3c083a803978268bb11fe4b86925e7a"
+"checksum ordermap 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8c7790b1bc9bf27776cd5cdeaae1263758c2c597d4ae02b58aa63c320f94d778"
+"checksum petgraph 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)" = "28d0872a49ce3ee71b345f4fa675afe394d9e0d077f8eeeb3d04081724065d67"
"checksum regex 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "1731164734096285ec2a5ec7fea5248ae2f5485b3feeb0115af4fda2183b2d1b"
"checksum regex-syntax 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ad890a5eef7953f55427c50575c680c42841653abd2b028b68cd223d157f62db"
+"checksum rustc-serialize 0.3.24 (registry+https://github.com/rust-lang/crates.io-index)" = "dcf128d1287d2ea9d80910b5f1120d0b8eede3fbf1abe91c40d39ea7d51e6fda"
+"checksum strsim 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b4d15c810519a91cf877e7e36e63fe068815c678181439f2f29e2562147c3694"
+"checksum term 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "fa63644f74ce96fbeb9b794f66aff2a52d601cbd5e80f4b97123e3899f4570f1"
"checksum thread_local 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "1697c4b57aeeb7a536b647165a2825faddffb1d3bad386d507709bd51a90bb14"
+"checksum unicode-xid 0.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "8c1f860d7d29cf02cb2f3f359fd35991af3d30bac52c57d265a3c461074cb4dc"
"checksum unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56"
"checksum utf8-ranges 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "662fab6525a98beff2921d7f61a39e7d59e0b425ebc7d0d9e66d316e55124122"
"checksum void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d"
+"checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a"
+"checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc"
diff --git a/Cargo.toml b/Cargo.toml
index 14d5b5a..e209fce 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -3,6 +3,11 @@ name = "taiga"
version = "0.1.0"
authors = ["David Li <li.davidm96@gmail.com>"]
-[dependencies]
-lazy_static = "0.2.9"
-regex = "0.2" \ No newline at end of file
+[dependencies.regex]
+version = "0.2.0"
+
+[dependencies.lalrpop-util]
+version = "0.13.1"
+
+[build-dependencies.lalrpop]
+version = "0.13.1" \ No newline at end of file
diff --git a/build.rs b/build.rs
new file mode 100644
index 0000000..23c7d3f
--- /dev/null
+++ b/build.rs
@@ -0,0 +1,5 @@
+extern crate lalrpop;
+
+fn main() {
+ lalrpop::process_root().unwrap();
+}
diff --git a/src/ast.rs b/src/ast.rs
new file mode 100644
index 0000000..b5f2049
--- /dev/null
+++ b/src/ast.rs
@@ -0,0 +1,7 @@
+#[derive(Debug)]
+pub struct Program(pub Box<Expression>);
+
+#[derive(Debug)]
+pub enum Expression {
+ Number(u64),
+}
diff --git a/src/lexer.rs b/src/lexer.rs
deleted file mode 100644
index dd89fe5..0000000
--- a/src/lexer.rs
+++ /dev/null
@@ -1,101 +0,0 @@
-use regex::Regex;
-
-#[derive(Clone,Debug)]
-pub enum TokenKind {
- Let,
- In,
- End,
- Equals,
- Integer(u64),
- Name(String),
-}
-
-#[derive(Debug)]
-pub struct Token {
- pub kind: TokenKind,
- pub start: usize,
- pub end: usize,
-}
-
-pub struct Lexer<'a> {
- input: &'a str,
- position: usize,
-}
-
-lazy_static! {
- static ref WHITESPACE: Regex = Regex::new(r"^\s+").unwrap();
- static ref LET: Regex = Regex::new(r"^let").unwrap();
- static ref IN: Regex = Regex::new(r"^in").unwrap();
- static ref END: Regex = Regex::new(r"^end").unwrap();
- static ref EQUALS: Regex = Regex::new(r"^=").unwrap();
- static ref INTEGER: Regex = Regex::new(r"^[0-9]+").unwrap();
- static ref NAME: Regex = Regex::new(r"^[_[:alpha:]][_[:alpha:]0-9]*").unwrap();
-
- static ref TOKENS: [(&'static Regex, fn(&str) -> TokenKind); 6] = [
- (&LET, {fn temp(_: &str) -> TokenKind {
- TokenKind::Let
- } temp}),
- (&IN, {fn temp(_: &str) -> TokenKind {
- TokenKind::In
- } temp}),
- (&END, {fn temp(_: &str) -> TokenKind {
- TokenKind::End
- } temp}),
- (&EQUALS, {fn temp(_: &str) -> TokenKind {
- TokenKind::Equals
- } temp}),
- (&INTEGER, {fn temp(num: &str) -> TokenKind {
- TokenKind::Integer(num.parse::<u64>().unwrap())
- } temp}),
- (&NAME, {fn temp(name: &str) -> TokenKind {
- TokenKind::Name(name.to_owned())
- } temp}),
- ];
-}
-
-impl<'a> Lexer<'a> {
- pub fn new(input: &'a str) -> Lexer<'a> {
- // blog.matthewcheok.com/writing-a-lexer-in-swift
- Lexer {
- input: input,
- position: 0,
- }
- }
-
- fn skip_whitespace(&mut self) {
- if let Some(result) = WHITESPACE.find(&self.input[self.position..]) {
- self.position += result.end();
- }
- }
-}
-
-impl<'a> Iterator for Lexer<'a> {
- type Item = Token;
-
- fn next(&mut self) -> Option<Self::Item> {
- if self.position >= self.input.len() {
- None
- }
- else {
- self.skip_whitespace();
- if self.position >= self.input.len() {
- return None;
- }
-
- for &(regexp, builder) in TOKENS.iter() {
- if let Some(result) = regexp.find(&self.input[self.position..]) {
- let position = self.position;
- self.position += result.end();
- return Some(Token {
- kind: builder(result.as_str()),
- start: position + result.start(),
- end: position + result.end(),
- });
- }
- }
-
- self.position = self.input.len();
- None
- }
- }
-}
diff --git a/src/main.rs b/src/main.rs
index 5e071b0..5b343ec 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,13 +1,6 @@
-#[macro_use] extern crate lazy_static;
-extern crate regex;
-
-pub mod lexer;
+pub mod ast;
+pub mod taiga;
fn main() {
- println!("Hello, world!");
- let s = "let x = 5 in x end end ";
- let lex = lexer::Lexer::new(s);
- for token in lex {
- println!("{:?}", token);
- }
+ println!("{:?}", taiga::parse_Program("5"));
}
diff --git a/src/taiga.lalrpop b/src/taiga.lalrpop
new file mode 100644
index 0000000..b60d68f
--- /dev/null
+++ b/src/taiga.lalrpop
@@ -0,0 +1,14 @@
+use std::str::FromStr;
+use ast;
+
+grammar;
+
+pub Program: ast::Program = {
+ Expression => ast::Program(<>),
+};
+
+Expression: Box<ast::Expression> = {
+ Num => Box::new(ast::Expression::Number(<>)),
+};
+
+Num: u64 = r"[0-9]+" => u64::from_str(<>).unwrap();