use regex::Regex; #[derive(Clone,Debug)] pub enum TokenKind { Let, In, End, Equals, Integer(u64), Name(String), } #[derive(Debug)] pub struct Token { pub kind: TokenKind, pub start: usize, pub end: usize, } pub struct Lexer<'a> { input: &'a str, position: usize, } lazy_static! { static ref WHITESPACE: Regex = Regex::new(r"^\s+").unwrap(); static ref LET: Regex = Regex::new(r"^let").unwrap(); static ref IN: Regex = Regex::new(r"^in").unwrap(); static ref END: Regex = Regex::new(r"^end").unwrap(); static ref EQUALS: Regex = Regex::new(r"^=").unwrap(); static ref INTEGER: Regex = Regex::new(r"^[0-9]+").unwrap(); static ref NAME: Regex = Regex::new(r"^[_[:alpha:]][_[:alpha:]0-9]*").unwrap(); static ref TOKENS: [(&'static Regex, fn(&str) -> TokenKind); 6] = [ (&LET, {fn temp(_: &str) -> TokenKind { TokenKind::Let } temp}), (&IN, {fn temp(_: &str) -> TokenKind { TokenKind::In } temp}), (&END, {fn temp(_: &str) -> TokenKind { TokenKind::End } temp}), (&EQUALS, {fn temp(_: &str) -> TokenKind { TokenKind::Equals } temp}), (&INTEGER, {fn temp(num: &str) -> TokenKind { TokenKind::Integer(num.parse::().unwrap()) } temp}), (&NAME, {fn temp(name: &str) -> TokenKind { TokenKind::Name(name.to_owned()) } temp}), ]; } impl<'a> Lexer<'a> { pub fn new(input: &'a str) -> Lexer<'a> { // blog.matthewcheok.com/writing-a-lexer-in-swift Lexer { input: input, position: 0, } } fn skip_whitespace(&mut self) { if let Some(result) = WHITESPACE.find(&self.input[self.position..]) { self.position += result.end(); } } } impl<'a> Iterator for Lexer<'a> { type Item = Token; fn next(&mut self) -> Option { if self.position >= self.input.len() { None } else { self.skip_whitespace(); if self.position >= self.input.len() { return None; } for &(regexp, builder) in TOKENS.iter() { if let Some(result) = regexp.find(&self.input[self.position..]) { let position = self.position; self.position += result.end(); return Some(Token { kind: builder(result.as_str()), start: position + result.start(), end: position + result.end(), }); } } self.position = self.input.len(); None } } }