1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
|
use regex::Regex;
#[derive(Clone,Debug)]
pub enum TokenKind {
Let,
In,
End,
Equals,
Integer(u64),
Name(String),
}
#[derive(Debug)]
pub struct Token {
pub kind: TokenKind,
pub start: usize,
pub end: usize,
}
pub struct Lexer<'a> {
input: &'a str,
position: usize,
}
lazy_static! {
static ref WHITESPACE: Regex = Regex::new(r"^\s+").unwrap();
static ref LET: Regex = Regex::new(r"^let").unwrap();
static ref IN: Regex = Regex::new(r"^in").unwrap();
static ref END: Regex = Regex::new(r"^end").unwrap();
static ref EQUALS: Regex = Regex::new(r"^=").unwrap();
static ref INTEGER: Regex = Regex::new(r"^[0-9]+").unwrap();
static ref NAME: Regex = Regex::new(r"^[_[:alpha:]][_[:alpha:]0-9]*").unwrap();
static ref TOKENS: [(&'static Regex, fn(&str) -> TokenKind); 6] = [
(&LET, {fn temp(_: &str) -> TokenKind {
TokenKind::Let
} temp}),
(&IN, {fn temp(_: &str) -> TokenKind {
TokenKind::In
} temp}),
(&END, {fn temp(_: &str) -> TokenKind {
TokenKind::End
} temp}),
(&EQUALS, {fn temp(_: &str) -> TokenKind {
TokenKind::Equals
} temp}),
(&INTEGER, {fn temp(num: &str) -> TokenKind {
TokenKind::Integer(num.parse::<u64>().unwrap())
} temp}),
(&NAME, {fn temp(name: &str) -> TokenKind {
TokenKind::Name(name.to_owned())
} temp}),
];
}
impl<'a> Lexer<'a> {
pub fn new(input: &'a str) -> Lexer<'a> {
// blog.matthewcheok.com/writing-a-lexer-in-swift
Lexer {
input: input,
position: 0,
}
}
fn skip_whitespace(&mut self) {
if let Some(result) = WHITESPACE.find(&self.input[self.position..]) {
self.position += result.end();
}
}
}
impl<'a> Iterator for Lexer<'a> {
type Item = Token;
fn next(&mut self) -> Option<Self::Item> {
if self.position >= self.input.len() {
None
}
else {
self.skip_whitespace();
if self.position >= self.input.len() {
return None;
}
for &(regexp, builder) in TOKENS.iter() {
if let Some(result) = regexp.find(&self.input[self.position..]) {
let position = self.position;
self.position += result.end();
return Some(Token {
kind: builder(result.as_str()),
start: position + result.start(),
end: position + result.end(),
});
}
}
self.position = self.input.len();
None
}
}
}
|