Categories
Tags
algorithms APIT arm assembly asynchronous base64 Blogging box c clang-format cmake compiler concurrency const_fn contravariant cos covariant cpp Customization cybersecurity DataStructure db Demo deserialization discrete doc DP Dynamic Example FFI flat_map FP Functional functions futures Fuwari GATs gccrs generics gitignore GUI hacking hashmap haskell heap interop invariant iterator justfile kernel LaTeX LFU linux MachineLearning Markdown math ML OnceLock optimization OS parallels perf physics pin postgresql release RPIT rust science Science serialization shift sin SmallProjects std String surrealdb swisstable synchronous tan traits triangulation utf16 utf8 Video x86_64 xilem zig
572 words
3 minutes
260109_kor_rust_interpreter001
link
KAIST 카이스트 강연자료
2023년 고려대학교 MatKor 스터디 - Rust 기초 프로그래밍 + 인터프리터 만들기
lexer
use crate::token::token::Token;
pub struct Lexer<'a> {
input: &'a str,
position: usize,
read_position: usize,
ch: u8,
}
impl<'a> Lexer<'a> {
pub fn new(input: &'a str) -> Self {
let mut lexer = Lexer {
input,
position: 0,
read_position: 0,
ch: 0,
};
lexer.read_char();
lexer
}
fn read_char(&mut self) {
if self.read_position >= self.input.len() {
self.ch = 0;
} else {
self.ch = self.input.as_bytes()[self.read_position];
}
self.position = self.read_position;
self.read_position += 1;
}
pub fn next_token(&mut self) -> Token {
self.skip_whitespace();
let tok = match self.ch {
b'=' => {
if self.peek_char() == b'=' {
self.read_char();
Token::Equal
} else {
Token::Assign
}
}
b'+' => Token::Plus,
b'-' => Token::Minus,
b'!' => {
if self.peek_char() == b'=' {
self.read_char();
Token::NotEqual
} else {
Token::Bang
}
}
b'*' => Token::Asterisk,
b'/' => Token::Slash,
b'<' => Token::LessThan,
b'>' => Token::GreaterThan,
b',' => Token::Comma,
b';' => Token::Semicolon,
b'(' => Token::Lparen,
b')' => Token::Rparen,
b'{' => Token::Lbrace,
b'}' => Token::Rbrace,
b'a'..=b'z' | b'A'..=b'Z' | b'_' => {
return self.read_identifier();
}
b'0'..=b'9' => {
return self.read_number();
}
0 => Token::Eof,
_ => Token::Illegal,
};
self.read_char();
tok
}
fn skip_whitespace(&mut self) {
loop {
match self.ch {
b' ' | b'\t' | b'\n' | b'\r' => {
self.read_char();
}
_ => {
break;
}
}
}
}
fn peek_char(&mut self) -> u8 {
if self.read_position >= self.input.len() {
0
} else {
self.input.as_bytes()[self.read_position]
}
}
fn read_identifier(&mut self) -> Token {
let position = self.position;
loop {
match self.ch {
b'a'..=b'z' | b'A'..=b'Z' | b'_' => {
self.read_char();
}
_ => {
break;
}
}
}
let literal = &self.input[position..self.position];
match literal {
"fn" => Token::Function,
"let" => Token::Let,
"true" => Token::Bool(true),
"false" => Token::Bool(false),
"if" => Token::If,
"else" => Token::Else,
"return" => Token::Return,
_ => Token::Ident(String::from(literal)),
}
}
fn read_number(&mut self) -> Token {
let position = self.position;
loop {
match self.ch {
b'0'..=b'9' => {
self.read_char();
}
_ => {
break;
}
}
}
let literal = &self.input[position..self.position];
Token::Int(literal.parse::<i64>().unwrap())
}
}
#[cfg(test)]
mod tests {
use crate::lexer::lexer::Lexer;
use crate::token::token::Token;
#[test]
fn test_next_token() {
let input = r#"let five = 5;
let ten = 10;
let add = fn(x, y) {
x + y;
};
let result = add(five, ten);
!-/*5;
5 < 10 > 5;
if (5 < 10) {
return true;
} else {
return false;
}
10 == 10;
10 != 9;
"#;
let tests = vec![
Token::Let,
Token::Ident(String::from("five")),
Token::Assign,
Token::Int(5),
Token::Semicolon,
Token::Let,
Token::Ident(String::from("ten")),
Token::Assign,
Token::Int(10),
Token::Semicolon,
Token::Let,
Token::Ident(String::from("add")),
Token::Assign,
Token::Function,
Token::Lparen,
Token::Ident(String::from("x")),
Token::Comma,
Token::Ident(String::from("y")),
Token::Rparen,
Token::Lbrace,
Token::Ident(String::from("x")),
Token::Plus,
Token::Ident(String::from("y")),
Token::Semicolon,
Token::Rbrace,
Token::Semicolon,
Token::Let,
Token::Ident(String::from("result")),
Token::Assign,
Token::Ident(String::from("add")),
Token::Lparen,
Token::Ident(String::from("five")),
Token::Comma,
Token::Ident(String::from("ten")),
Token::Rparen,
Token::Semicolon,
Token::Bang,
Token::Minus,
Token::Slash,
Token::Asterisk,
Token::Int(5),
Token::Semicolon,
Token::Int(5),
Token::LessThan,
Token::Int(10),
Token::GreaterThan,
Token::Int(5),
Token::Semicolon,
Token::If,
Token::Lparen,
Token::Int(5),
Token::LessThan,
Token::Int(10),
Token::Rparen,
Token::Lbrace,
Token::Return,
Token::Bool(true),
Token::Semicolon,
Token::Rbrace,
Token::Else,
Token::Lbrace,
Token::Return,
Token::Bool(false),
Token::Semicolon,
Token::Rbrace,
Token::Int(10),
Token::Equal,
Token::Int(10),
Token::Semicolon,
Token::Int(10),
Token::NotEqual,
Token::Int(9),
Token::Semicolon,
Token::Eof,
];
let mut lexer = Lexer::new(input);
for expect in tests {
let tok = lexer.next_token();
assert_eq!(expect, tok);
}
}
}token
#[derive(Debug, PartialEq)]
pub enum Token {
Illegal,
Eof,
// Identifiers + Literals
Ident(String),
Int(i64),
Bool(bool),
// Operators
Assign,
Plus,
Minus,
Bang,
Asterisk,
Slash,
Equal,
NotEqual,
LessThan,
GreaterThan,
// Delimiters
Comma,
Semicolon,
Lparen,
Rparen,
Lbrace,
Rbrace,
// Reserved Keywords
Function,
Let,
If,
Else,
Return,
}260109_kor_rust_interpreter001
https://younghakim7.github.io/blog/posts/260109_kor_rust_interpreter001/