summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorDavid Li <li.davidm96@gmail.com>2017-11-03 23:37:17 -0400
committerDavid Li <li.davidm96@gmail.com>2017-11-03 23:37:37 -0400
commitd5b0224a398810a2e34713d72c6710f4f54df420 (patch)
treef56bf6cd72292bfab30f2bcd75e41c921fe089fd /src
parent169870428a9074d2429e6ec3dac64e84bb0392e3 (diff)
Switch to lalrpop for parsing/lexing
Diffstat (limited to 'src')
-rw-r--r--src/ast.rs7
-rw-r--r--src/lexer.rs101
-rw-r--r--src/main.rs13
-rw-r--r--src/taiga.lalrpop14
4 files changed, 24 insertions, 111 deletions
diff --git a/src/ast.rs b/src/ast.rs
new file mode 100644
index 0000000..b5f2049
--- /dev/null
+++ b/src/ast.rs
@@ -0,0 +1,7 @@
+#[derive(Debug)]
+pub struct Program(pub Box<Expression>);
+
+#[derive(Debug)]
+pub enum Expression {
+ Number(u64),
+}
diff --git a/src/lexer.rs b/src/lexer.rs
deleted file mode 100644
index dd89fe5..0000000
--- a/src/lexer.rs
+++ /dev/null
@@ -1,101 +0,0 @@
-use regex::Regex;
-
-#[derive(Clone,Debug)]
-pub enum TokenKind {
- Let,
- In,
- End,
- Equals,
- Integer(u64),
- Name(String),
-}
-
-#[derive(Debug)]
-pub struct Token {
- pub kind: TokenKind,
- pub start: usize,
- pub end: usize,
-}
-
-pub struct Lexer<'a> {
- input: &'a str,
- position: usize,
-}
-
-lazy_static! {
- static ref WHITESPACE: Regex = Regex::new(r"^\s+").unwrap();
- static ref LET: Regex = Regex::new(r"^let").unwrap();
- static ref IN: Regex = Regex::new(r"^in").unwrap();
- static ref END: Regex = Regex::new(r"^end").unwrap();
- static ref EQUALS: Regex = Regex::new(r"^=").unwrap();
- static ref INTEGER: Regex = Regex::new(r"^[0-9]+").unwrap();
- static ref NAME: Regex = Regex::new(r"^[_[:alpha:]][_[:alpha:]0-9]*").unwrap();
-
- static ref TOKENS: [(&'static Regex, fn(&str) -> TokenKind); 6] = [
- (&LET, {fn temp(_: &str) -> TokenKind {
- TokenKind::Let
- } temp}),
- (&IN, {fn temp(_: &str) -> TokenKind {
- TokenKind::In
- } temp}),
- (&END, {fn temp(_: &str) -> TokenKind {
- TokenKind::End
- } temp}),
- (&EQUALS, {fn temp(_: &str) -> TokenKind {
- TokenKind::Equals
- } temp}),
- (&INTEGER, {fn temp(num: &str) -> TokenKind {
- TokenKind::Integer(num.parse::<u64>().unwrap())
- } temp}),
- (&NAME, {fn temp(name: &str) -> TokenKind {
- TokenKind::Name(name.to_owned())
- } temp}),
- ];
-}
-
-impl<'a> Lexer<'a> {
- pub fn new(input: &'a str) -> Lexer<'a> {
- // blog.matthewcheok.com/writing-a-lexer-in-swift
- Lexer {
- input: input,
- position: 0,
- }
- }
-
- fn skip_whitespace(&mut self) {
- if let Some(result) = WHITESPACE.find(&self.input[self.position..]) {
- self.position += result.end();
- }
- }
-}
-
-impl<'a> Iterator for Lexer<'a> {
- type Item = Token;
-
- fn next(&mut self) -> Option<Self::Item> {
- if self.position >= self.input.len() {
- None
- }
- else {
- self.skip_whitespace();
- if self.position >= self.input.len() {
- return None;
- }
-
- for &(regexp, builder) in TOKENS.iter() {
- if let Some(result) = regexp.find(&self.input[self.position..]) {
- let position = self.position;
- self.position += result.end();
- return Some(Token {
- kind: builder(result.as_str()),
- start: position + result.start(),
- end: position + result.end(),
- });
- }
- }
-
- self.position = self.input.len();
- None
- }
- }
-}
diff --git a/src/main.rs b/src/main.rs
index 5e071b0..5b343ec 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,13 +1,6 @@
-#[macro_use] extern crate lazy_static;
-extern crate regex;
-
-pub mod lexer;
+pub mod ast;
+pub mod taiga;
fn main() {
- println!("Hello, world!");
- let s = "let x = 5 in x end end ";
- let lex = lexer::Lexer::new(s);
- for token in lex {
- println!("{:?}", token);
- }
+ println!("{:?}", taiga::parse_Program("5"));
}
diff --git a/src/taiga.lalrpop b/src/taiga.lalrpop
new file mode 100644
index 0000000..b60d68f
--- /dev/null
+++ b/src/taiga.lalrpop
@@ -0,0 +1,14 @@
+use std::str::FromStr;
+use ast;
+
+grammar;
+
+pub Program: ast::Program = {
+ Expression => ast::Program(<>),
+};
+
+Expression: Box<ast::Expression> = {
+ Num => Box::new(ast::Expression::Number(<>)),
+};
+
+Num: u64 = r"[0-9]+" => u64::from_str(<>).unwrap();