summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorDavid Li <li.davidm96@gmail.com>2017-11-03 23:00:54 -0400
committerDavid Li <li.davidm96@gmail.com>2017-11-03 23:00:54 -0400
commit169870428a9074d2429e6ec3dac64e84bb0392e3 (patch)
tree0f069451157aab3c02ddc42de38cf677146ff942 /src
Implement skeleton of lexer
Diffstat (limited to 'src')
-rw-r--r--src/lexer.rs101
-rw-r--r--src/main.rs13
2 files changed, 114 insertions, 0 deletions
diff --git a/src/lexer.rs b/src/lexer.rs
new file mode 100644
index 0000000..dd89fe5
--- /dev/null
+++ b/src/lexer.rs
@@ -0,0 +1,101 @@
+use regex::Regex;
+
+#[derive(Clone,Debug)]
+pub enum TokenKind {
+ Let,
+ In,
+ End,
+ Equals,
+ Integer(u64),
+ Name(String),
+}
+
+#[derive(Debug)]
+pub struct Token {
+ pub kind: TokenKind,
+ pub start: usize,
+ pub end: usize,
+}
+
+pub struct Lexer<'a> {
+ input: &'a str,
+ position: usize,
+}
+
+lazy_static! {
+ static ref WHITESPACE: Regex = Regex::new(r"^\s+").unwrap();
+ static ref LET: Regex = Regex::new(r"^let").unwrap();
+ static ref IN: Regex = Regex::new(r"^in").unwrap();
+ static ref END: Regex = Regex::new(r"^end").unwrap();
+ static ref EQUALS: Regex = Regex::new(r"^=").unwrap();
+ static ref INTEGER: Regex = Regex::new(r"^[0-9]+").unwrap();
+ static ref NAME: Regex = Regex::new(r"^[_[:alpha:]][_[:alpha:]0-9]*").unwrap();
+
+ static ref TOKENS: [(&'static Regex, fn(&str) -> TokenKind); 6] = [
+ (&LET, {fn temp(_: &str) -> TokenKind {
+ TokenKind::Let
+ } temp}),
+ (&IN, {fn temp(_: &str) -> TokenKind {
+ TokenKind::In
+ } temp}),
+ (&END, {fn temp(_: &str) -> TokenKind {
+ TokenKind::End
+ } temp}),
+ (&EQUALS, {fn temp(_: &str) -> TokenKind {
+ TokenKind::Equals
+ } temp}),
+ (&INTEGER, {fn temp(num: &str) -> TokenKind {
+ TokenKind::Integer(num.parse::<u64>().unwrap())
+ } temp}),
+ (&NAME, {fn temp(name: &str) -> TokenKind {
+ TokenKind::Name(name.to_owned())
+ } temp}),
+ ];
+}
+
+impl<'a> Lexer<'a> {
+ pub fn new(input: &'a str) -> Lexer<'a> {
+ // blog.matthewcheok.com/writing-a-lexer-in-swift
+ Lexer {
+ input: input,
+ position: 0,
+ }
+ }
+
+ fn skip_whitespace(&mut self) {
+ if let Some(result) = WHITESPACE.find(&self.input[self.position..]) {
+ self.position += result.end();
+ }
+ }
+}
+
+impl<'a> Iterator for Lexer<'a> {
+ type Item = Token;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ if self.position >= self.input.len() {
+ None
+ }
+ else {
+ self.skip_whitespace();
+ if self.position >= self.input.len() {
+ return None;
+ }
+
+ for &(regexp, builder) in TOKENS.iter() {
+ if let Some(result) = regexp.find(&self.input[self.position..]) {
+ let position = self.position;
+ self.position += result.end();
+ return Some(Token {
+ kind: builder(result.as_str()),
+ start: position + result.start(),
+ end: position + result.end(),
+ });
+ }
+ }
+
+ self.position = self.input.len();
+ None
+ }
+ }
+}
diff --git a/src/main.rs b/src/main.rs
new file mode 100644
index 0000000..5e071b0
--- /dev/null
+++ b/src/main.rs
@@ -0,0 +1,13 @@
+#[macro_use] extern crate lazy_static;
+extern crate regex;
+
+pub mod lexer;
+
+fn main() {
+ println!("Hello, world!");
+ let s = "let x = 5 in x end end ";
+ let lex = lexer::Lexer::new(s);
+ for token in lex {
+ println!("{:?}", token);
+ }
+}