summaryrefslogtreecommitdiff
path: root/src/lexer.rs
blob: dd89fe5fa35df7f93501e49f2b9c8243c4b3e6fa (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
use regex::Regex;

#[derive(Clone,Debug)]
pub enum TokenKind {
    Let,
    In,
    End,
    Equals,
    Integer(u64),
    Name(String),
}

#[derive(Debug)]
pub struct Token {
    pub kind: TokenKind,
    pub start: usize,
    pub end: usize,
}

pub struct Lexer<'a> {
    input: &'a str,
    position: usize,
}

lazy_static! {
    static ref WHITESPACE: Regex = Regex::new(r"^\s+").unwrap();
    static ref LET: Regex        = Regex::new(r"^let").unwrap();
    static ref IN: Regex         = Regex::new(r"^in").unwrap();
    static ref END: Regex        = Regex::new(r"^end").unwrap();
    static ref EQUALS: Regex     = Regex::new(r"^=").unwrap();
    static ref INTEGER: Regex    = Regex::new(r"^[0-9]+").unwrap();
    static ref NAME: Regex       = Regex::new(r"^[_[:alpha:]][_[:alpha:]0-9]*").unwrap();

    static ref TOKENS: [(&'static Regex, fn(&str) -> TokenKind); 6] = [
        (&LET, {fn temp(_: &str) -> TokenKind {
            TokenKind::Let
        } temp}),
        (&IN, {fn temp(_: &str) -> TokenKind {
            TokenKind::In
        } temp}),
        (&END, {fn temp(_: &str) -> TokenKind {
            TokenKind::End
        } temp}),
        (&EQUALS, {fn temp(_: &str) -> TokenKind {
            TokenKind::Equals
        } temp}),
        (&INTEGER, {fn temp(num: &str) -> TokenKind {
            TokenKind::Integer(num.parse::<u64>().unwrap())
        } temp}),
        (&NAME, {fn temp(name: &str) -> TokenKind {
            TokenKind::Name(name.to_owned())
        } temp}),
    ];
}

impl<'a> Lexer<'a> {
    pub fn new(input: &'a str) -> Lexer<'a> {
        // blog.matthewcheok.com/writing-a-lexer-in-swift
        Lexer {
            input: input,
            position: 0,
        }
    }

    fn skip_whitespace(&mut self) {
        if let Some(result) = WHITESPACE.find(&self.input[self.position..]) {
            self.position += result.end();
        }
    }
}

impl<'a> Iterator for Lexer<'a> {
    type Item = Token;

    fn next(&mut self) -> Option<Self::Item> {
        if self.position >= self.input.len() {
            None
        }
        else {
            self.skip_whitespace();
            if self.position >= self.input.len() {
                return None;
            }

            for &(regexp, builder) in TOKENS.iter() {
                if let Some(result) = regexp.find(&self.input[self.position..]) {
                    let position = self.position;
                    self.position += result.end();
                    return Some(Token {
                        kind: builder(result.as_str()),
                        start: position + result.start(),
                        end: position + result.end(),
                    });
                }
            }

            self.position = self.input.len();
            None
        }
    }
}