summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Li <li.davidm96@gmail.com>2017-11-03 23:00:54 -0400
committerDavid Li <li.davidm96@gmail.com>2017-11-03 23:00:54 -0400
commit169870428a9074d2429e6ec3dac64e84bb0392e3 (patch)
tree0f069451157aab3c02ddc42de38cf677146ff942
Implement skeleton of lexer
-rw-r--r--.gitignore3
-rw-r--r--Cargo.lock89
-rw-r--r--Cargo.toml8
-rw-r--r--src/lexer.rs101
-rw-r--r--src/main.rs13
5 files changed, 214 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..fa50122
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+target/
+**/*.rs.bk
+*~
diff --git a/Cargo.lock b/Cargo.lock
new file mode 100644
index 0000000..7a58190
--- /dev/null
+++ b/Cargo.lock
@@ -0,0 +1,89 @@
+[[package]]
+name = "aho-corasick"
+version = "0.6.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "memchr 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "lazy_static"
+version = "0.2.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "libc"
+version = "0.2.33"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "memchr"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "libc 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "regex"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "aho-corasick 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)",
+ "memchr 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
+ "regex-syntax 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "thread_local 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)",
+ "utf8-ranges 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "taiga"
+version = "0.1.0"
+dependencies = [
+ "lazy_static 0.2.9 (registry+https://github.com/rust-lang/crates.io-index)",
+ "regex 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "thread_local"
+version = "0.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "lazy_static 0.2.9 (registry+https://github.com/rust-lang/crates.io-index)",
+ "unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "unreachable"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "utf8-ranges"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "void"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[metadata]
+"checksum aho-corasick 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)" = "500909c4f87a9e52355b26626d890833e9e1d53ac566db76c36faa984b889699"
+"checksum lazy_static 0.2.9 (registry+https://github.com/rust-lang/crates.io-index)" = "c9e5e58fa1a4c3b915a561a78a22ee0cac6ab97dca2504428bc1cb074375f8d5"
+"checksum libc 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)" = "5ba3df4dcb460b9dfbd070d41c94c19209620c191b0340b929ce748a2bcd42d2"
+"checksum memchr 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "148fab2e51b4f1cfc66da2a7c32981d1d3c083a803978268bb11fe4b86925e7a"
+"checksum regex 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "1731164734096285ec2a5ec7fea5248ae2f5485b3feeb0115af4fda2183b2d1b"
+"checksum regex-syntax 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ad890a5eef7953f55427c50575c680c42841653abd2b028b68cd223d157f62db"
+"checksum thread_local 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "1697c4b57aeeb7a536b647165a2825faddffb1d3bad386d507709bd51a90bb14"
+"checksum unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56"
+"checksum utf8-ranges 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "662fab6525a98beff2921d7f61a39e7d59e0b425ebc7d0d9e66d316e55124122"
+"checksum void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d"
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..14d5b5a
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,8 @@
+[package]
+name = "taiga"
+version = "0.1.0"
+authors = ["David Li <li.davidm96@gmail.com>"]
+
+[dependencies]
+lazy_static = "0.2.9"
+regex = "0.2" \ No newline at end of file
diff --git a/src/lexer.rs b/src/lexer.rs
new file mode 100644
index 0000000..dd89fe5
--- /dev/null
+++ b/src/lexer.rs
@@ -0,0 +1,101 @@
+use regex::Regex;
+
+#[derive(Clone,Debug)]
+pub enum TokenKind {
+ Let,
+ In,
+ End,
+ Equals,
+ Integer(u64),
+ Name(String),
+}
+
+#[derive(Debug)]
+pub struct Token {
+ pub kind: TokenKind,
+ pub start: usize,
+ pub end: usize,
+}
+
+pub struct Lexer<'a> {
+ input: &'a str,
+ position: usize,
+}
+
+lazy_static! {
+ static ref WHITESPACE: Regex = Regex::new(r"^\s+").unwrap();
+ static ref LET: Regex = Regex::new(r"^let").unwrap();
+ static ref IN: Regex = Regex::new(r"^in").unwrap();
+ static ref END: Regex = Regex::new(r"^end").unwrap();
+ static ref EQUALS: Regex = Regex::new(r"^=").unwrap();
+ static ref INTEGER: Regex = Regex::new(r"^[0-9]+").unwrap();
+ static ref NAME: Regex = Regex::new(r"^[_[:alpha:]][_[:alpha:]0-9]*").unwrap();
+
+ static ref TOKENS: [(&'static Regex, fn(&str) -> TokenKind); 6] = [
+ (&LET, {fn temp(_: &str) -> TokenKind {
+ TokenKind::Let
+ } temp}),
+ (&IN, {fn temp(_: &str) -> TokenKind {
+ TokenKind::In
+ } temp}),
+ (&END, {fn temp(_: &str) -> TokenKind {
+ TokenKind::End
+ } temp}),
+ (&EQUALS, {fn temp(_: &str) -> TokenKind {
+ TokenKind::Equals
+ } temp}),
+ (&INTEGER, {fn temp(num: &str) -> TokenKind {
+ TokenKind::Integer(num.parse::<u64>().unwrap())
+ } temp}),
+ (&NAME, {fn temp(name: &str) -> TokenKind {
+ TokenKind::Name(name.to_owned())
+ } temp}),
+ ];
+}
+
+impl<'a> Lexer<'a> {
+ pub fn new(input: &'a str) -> Lexer<'a> {
+ // blog.matthewcheok.com/writing-a-lexer-in-swift
+ Lexer {
+ input: input,
+ position: 0,
+ }
+ }
+
+ fn skip_whitespace(&mut self) {
+ if let Some(result) = WHITESPACE.find(&self.input[self.position..]) {
+ self.position += result.end();
+ }
+ }
+}
+
+impl<'a> Iterator for Lexer<'a> {
+ type Item = Token;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ if self.position >= self.input.len() {
+ None
+ }
+ else {
+ self.skip_whitespace();
+ if self.position >= self.input.len() {
+ return None;
+ }
+
+ for &(regexp, builder) in TOKENS.iter() {
+ if let Some(result) = regexp.find(&self.input[self.position..]) {
+ let position = self.position;
+ self.position += result.end();
+ return Some(Token {
+ kind: builder(result.as_str()),
+ start: position + result.start(),
+ end: position + result.end(),
+ });
+ }
+ }
+
+ self.position = self.input.len();
+ None
+ }
+ }
+}
diff --git a/src/main.rs b/src/main.rs
new file mode 100644
index 0000000..5e071b0
--- /dev/null
+++ b/src/main.rs
@@ -0,0 +1,13 @@
+#[macro_use] extern crate lazy_static;
+extern crate regex;
+
+pub mod lexer;
+
+fn main() {
+ println!("Hello, world!");
+ let s = "let x = 5 in x end end ";
+ let lex = lexer::Lexer::new(s);
+ for token in lex {
+ println!("{:?}", token);
+ }
+}