Categories
Tags
algorithms APIT Arc arm assembly asynchronous base64 BitHacks Blogging box c clang-format client cmake compiler concat concurrency const_fn contravariant cos covariant cpp Customization cybersecurity DataStructure db debugging Demo deserialization discrete doc DP dtruss Dynamic Example FFI flat_map format FP fsanitize Functional functions futures Fuwari GATs gccrs generics gitignore glibc GUI hacking hashmap haskell heap interop invariant iterator join justfile kernel LaTeX leak LFU linux lto MachineLearning macOS Markdown math ML mmap nc OnceLock optimization OS panic parallels perf physics pin postgresql radare2 release reverse RPIT rust sanitizer science Science serialization server shift sin SmallProjects socket std strace String StringView strip strlen surrealdb SWAR swisstable synchronous tan toml traits triangulation UnsafeRust utf16 utf8 Video wsl x86_64 xilem zig
572 words
3 minutes
260109_kor_rust_interpreter001
link
KAIST 카이스트 강연자료
2023년 고려대학교 MatKor 스터디 - Rust 기초 프로그래밍 + 인터프리터 만들기
lexer
use crate::token::token::Token;
pub struct Lexer<'a> {
input: &'a str,
position: usize,
read_position: usize,
ch: u8,
}
impl<'a> Lexer<'a> {
pub fn new(input: &'a str) -> Self {
let mut lexer = Lexer {
input,
position: 0,
read_position: 0,
ch: 0,
};
lexer.read_char();
lexer
}
fn read_char(&mut self) {
if self.read_position >= self.input.len() {
self.ch = 0;
} else {
self.ch = self.input.as_bytes()[self.read_position];
}
self.position = self.read_position;
self.read_position += 1;
}
pub fn next_token(&mut self) -> Token {
self.skip_whitespace();
let tok = match self.ch {
b'=' => {
if self.peek_char() == b'=' {
self.read_char();
Token::Equal
} else {
Token::Assign
}
}
b'+' => Token::Plus,
b'-' => Token::Minus,
b'!' => {
if self.peek_char() == b'=' {
self.read_char();
Token::NotEqual
} else {
Token::Bang
}
}
b'*' => Token::Asterisk,
b'/' => Token::Slash,
b'<' => Token::LessThan,
b'>' => Token::GreaterThan,
b',' => Token::Comma,
b';' => Token::Semicolon,
b'(' => Token::Lparen,
b')' => Token::Rparen,
b'{' => Token::Lbrace,
b'}' => Token::Rbrace,
b'a'..=b'z' | b'A'..=b'Z' | b'_' => {
return self.read_identifier();
}
b'0'..=b'9' => {
return self.read_number();
}
0 => Token::Eof,
_ => Token::Illegal,
};
self.read_char();
tok
}
fn skip_whitespace(&mut self) {
loop {
match self.ch {
b' ' | b'\t' | b'\n' | b'\r' => {
self.read_char();
}
_ => {
break;
}
}
}
}
fn peek_char(&mut self) -> u8 {
if self.read_position >= self.input.len() {
0
} else {
self.input.as_bytes()[self.read_position]
}
}
fn read_identifier(&mut self) -> Token {
let position = self.position;
loop {
match self.ch {
b'a'..=b'z' | b'A'..=b'Z' | b'_' => {
self.read_char();
}
_ => {
break;
}
}
}
let literal = &self.input[position..self.position];
match literal {
"fn" => Token::Function,
"let" => Token::Let,
"true" => Token::Bool(true),
"false" => Token::Bool(false),
"if" => Token::If,
"else" => Token::Else,
"return" => Token::Return,
_ => Token::Ident(String::from(literal)),
}
}
fn read_number(&mut self) -> Token {
let position = self.position;
loop {
match self.ch {
b'0'..=b'9' => {
self.read_char();
}
_ => {
break;
}
}
}
let literal = &self.input[position..self.position];
Token::Int(literal.parse::<i64>().unwrap())
}
}
#[cfg(test)]
mod tests {
use crate::lexer::lexer::Lexer;
use crate::token::token::Token;
#[test]
fn test_next_token() {
let input = r#"let five = 5;
let ten = 10;
let add = fn(x, y) {
x + y;
};
let result = add(five, ten);
!-/*5;
5 < 10 > 5;
if (5 < 10) {
return true;
} else {
return false;
}
10 == 10;
10 != 9;
"#;
let tests = vec![
Token::Let,
Token::Ident(String::from("five")),
Token::Assign,
Token::Int(5),
Token::Semicolon,
Token::Let,
Token::Ident(String::from("ten")),
Token::Assign,
Token::Int(10),
Token::Semicolon,
Token::Let,
Token::Ident(String::from("add")),
Token::Assign,
Token::Function,
Token::Lparen,
Token::Ident(String::from("x")),
Token::Comma,
Token::Ident(String::from("y")),
Token::Rparen,
Token::Lbrace,
Token::Ident(String::from("x")),
Token::Plus,
Token::Ident(String::from("y")),
Token::Semicolon,
Token::Rbrace,
Token::Semicolon,
Token::Let,
Token::Ident(String::from("result")),
Token::Assign,
Token::Ident(String::from("add")),
Token::Lparen,
Token::Ident(String::from("five")),
Token::Comma,
Token::Ident(String::from("ten")),
Token::Rparen,
Token::Semicolon,
Token::Bang,
Token::Minus,
Token::Slash,
Token::Asterisk,
Token::Int(5),
Token::Semicolon,
Token::Int(5),
Token::LessThan,
Token::Int(10),
Token::GreaterThan,
Token::Int(5),
Token::Semicolon,
Token::If,
Token::Lparen,
Token::Int(5),
Token::LessThan,
Token::Int(10),
Token::Rparen,
Token::Lbrace,
Token::Return,
Token::Bool(true),
Token::Semicolon,
Token::Rbrace,
Token::Else,
Token::Lbrace,
Token::Return,
Token::Bool(false),
Token::Semicolon,
Token::Rbrace,
Token::Int(10),
Token::Equal,
Token::Int(10),
Token::Semicolon,
Token::Int(10),
Token::NotEqual,
Token::Int(9),
Token::Semicolon,
Token::Eof,
];
let mut lexer = Lexer::new(input);
for expect in tests {
let tok = lexer.next_token();
assert_eq!(expect, tok);
}
}
}token
#[derive(Debug, PartialEq)]
pub enum Token {
Illegal,
Eof,
// Identifiers + Literals
Ident(String),
Int(i64),
Bool(bool),
// Operators
Assign,
Plus,
Minus,
Bang,
Asterisk,
Slash,
Equal,
NotEqual,
LessThan,
GreaterThan,
// Delimiters
Comma,
Semicolon,
Lparen,
Rparen,
Lbrace,
Rbrace,
// Reserved Keywords
Function,
Let,
If,
Else,
Return,
}260109_kor_rust_interpreter001
https://younghakim7.github.io/blog/posts/260109_kor_rust_interpreter001/