Merge branch 'feat/lexer'

2026-05-09 11:17:00 +08:00
parent e8b50ae0d7 63a2990826
commit a0855755f8
12 changed files with 525 additions and 120 deletions
@@ -17,6 +17,17 @@ version = "1.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
 [[package]]
 name = "codespan-reporting"
 version = "0.13.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "af491d569909a7e4dee0ad7db7f5341fef5c614d5b8ec8cf765732aba3cff681"
 dependencies = [
 "serde",
 "termcolor",
 "unicode-width",
 ]
 [[package]]
 name = "heck"
 version = "0.5.0"
@@ -153,12 +164,43 @@ checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a"
 name = "rusty-minic"
 version = "0.1.0"
 dependencies = [
 "codespan-reporting",
 "num",
 "regex",
 "strum",
 "thiserror",
 ]
 [[package]]
 name = "serde"
 version = "1.0.228"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
 dependencies = [
 "serde_core",
 "serde_derive",
 ]
 [[package]]
 name = "serde_core"
 version = "1.0.228"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
 dependencies = [
 "serde_derive",
 ]
 [[package]]
 name = "serde_derive"
 version = "1.0.228"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
 dependencies = [
 "proc-macro2",
 "quote",
 "syn",
 ]
 [[package]]
 name = "strum"
 version = "0.28.0"
@@ -191,6 +233,15 @@ dependencies = [
 "unicode-ident",
 ]
 [[package]]
 name = "termcolor"
 version = "1.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755"
 dependencies = [
 "winapi-util",
 ]
 [[package]]
 name = "thiserror"
 version = "2.0.18"
@@ -216,3 +267,33 @@ name = "unicode-ident"
 version = "1.0.24"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
 [[package]]
 name = "unicode-width"
 version = "0.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254"
 [[package]]
 name = "winapi-util"
 version = "0.1.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
 dependencies = [
 "windows-sys",
 ]
 [[package]]
 name = "windows-link"
 version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
 [[package]]
 name = "windows-sys"
 version = "0.61.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
 dependencies = [
 "windows-link",
 ]
@@ -4,6 +4,7 @@ version = "0.1.0"
 edition = "2024"
 [dependencies]
 codespan-reporting = "0.13.1"
 num = "0.4.3"
 regex = "1.12.3"
 strum = { version = "0.28.0", features = ["derive"] }
@@ -0,0 +1 @@
 pub mod types;
@@ -0,0 +1,75 @@
 #[derive(Debug, Clone, Copy)]
 pub struct Span {
    start: usize,
    end: usize,
 }
 pub struct CompileUnit {
    pub global_decls: Vec<GlobalDeclStmt>,
 }
 pub enum GlobalDeclStmt {
    VarDecl(VarDeclStmt),
    FuncDecl(FuncDeclStmt),
 }
 pub struct VarDeclStmt {
    pub name: String,
    pub var_type: Type,
    pub span: Span,
 }
 pub struct FuncDeclStmt {
    pub name: String,
    pub return_type: Type,
    pub params: Vec<Param>,
    pub body: BlockStmt,
    pub span: Span,
 }
 pub struct BlockStmt {
    pub statements: Vec<Statement>,
    pub span: Span,
 }
 pub enum Statement {
    Return(ReturnStmt),
    Block(BlockStmt),
    Expr(Expr),
    VarDecl(VarDeclStmt),
 }
 pub struct ReturnStmt {
    pub value: Option<Expr>,
    pub span: Span,
 }
 pub struct Expr {
    pub value: ExprValue,
    pub span: Span,
 }
 pub enum ExprValue {
    IntLit(i64),
    Var(String),
    BinaryOp {
        lhs: Box<Expr>, 
        op: BinaryOp, 
        rhs: Box<Expr>
    },
    FuncCall(String, Vec<Expr>),
    Assign {
        lvalue: Box<Expr>, 
        rvalue: Box<Expr>
    },
 }
 pub enum BinaryOp {
    Add, Sub, Mul, Div, Mod,
    Equal, NotEqual, Less, LessEqual, Greater, GreaterEqual,
 }
 pub enum Type {
    Int,
    Void,
 }
 pub struct Param {
    name: String,
    param_type: Type,
    span: Span,
 }
@@ -0,0 +1,94 @@
 use crate::{diagnostic::span::Span, err::CompileError, frontend::err::FrontendError};
 pub mod span;
 pub struct Diagnositics {
    diagnostics: Vec<Diagnostic>,
 }
 pub enum DiagnosticLevel {
    Error,
    Warning,
    Info,
 }
 pub struct Diagnostic {
    level: DiagnosticLevel,
    message: String,
    span: Span
 }
 impl Diagnositics {
    pub fn new() -> Self {
        Self { diagnostics: vec![] }
    }
    pub fn add(&mut self, diagnostic: Diagnostic) {
        self.diagnostics.push(diagnostic);
    }
    pub fn add_from_error(&mut self, error: impl Into<CompileError>, span: Span) {
        self.diagnostics.push(Diagnostic {
            level: DiagnosticLevel::Error,
            message: Into::<CompileError>::into(error).to_string(),
            span,
        });
    }
    pub fn add_from_frontend_error(&mut self, error: impl Into<FrontendError>, span: Span) {
        self.diagnostics.push(Diagnostic {
            level: DiagnosticLevel::Error,
            message: Into::<FrontendError>::into(error).to_string(),
            span,
        });
    }
    pub fn is_empty(&self) -> bool {
        self.diagnostics.is_empty()
    }
    pub fn print(&self, name: &str, source: &str) {
        use codespan_reporting::diagnostic::Diagnostic as CodespanDiagnostic;
        use codespan_reporting::files::SimpleFile;
        use codespan_reporting::diagnostic::{Severity, Label};
        use std::io::IsTerminal;
        use codespan_reporting::term::{self, termcolor::{ColorChoice, StandardStream}};
        let mut choice = ColorChoice::Auto;
        if !std::io::stdin().is_terminal() {
            choice = ColorChoice::Never;
        }
        let stdout = StandardStream::stdout(choice);
        let source_file = SimpleFile::new(name, source);
        let output_config = codespan_reporting::term::Config::default();
        for diagnostic in &self.diagnostics {
            let output_level = match diagnostic.level {
                DiagnosticLevel::Error => Severity::Error,
                DiagnosticLevel::Warning => Severity::Warning,
                DiagnosticLevel::Info => Severity::Note,
            };
            let output_diagnostic = CodespanDiagnostic::new(output_level)
                .with_message(&diagnostic.message)
                .with_label(
                    Label::primary((), diagnostic.span.start..diagnostic.span.end)
                );
            term::emit_to_write_style(&mut stdout.lock(), &output_config, &source_file, &output_diagnostic);
        }
    }
 }
 #[cfg(test)]
 mod tests {
    #[test]
    fn test_diagnostics() {
        use crate::diagnostic::{Diagnostic, DiagnosticLevel, Diagnositics};
        let mut diagnostics = Diagnositics::new();
        diagnostics.add(Diagnostic {
            level: DiagnosticLevel::Error,
            message: "test error".to_string(),
            span: crate::diagnostic::span::Span { start: 0, end: 3 },
        });
        diagnostics.add(Diagnostic {
            level: DiagnosticLevel::Error,
            message: "test error".to_string(),
            span: crate::diagnostic::span::Span { start: 16, end: 22 },
        });
        diagnostics.print("main.c", 
 r#"int main(){
    return 1;
 }"#);
    }
 }
@@ -0,0 +1,6 @@
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub struct Span {
    pub start: usize,
    pub end: usize,
 }
@@ -0,0 +1,9 @@
 use thiserror::Error;
 use crate::frontend::err::FrontendError;
 #[derive(Debug, Clone, PartialEq, Eq, Error)]
 pub enum CompileError {
    #[error(transparent)]
    Frontend(#[from] FrontendError),
 }
@@ -0,0 +1,28 @@
 use thiserror::Error;
 // #[derive(Debug, Clone, PartialEq, Eq, Error)]
 // pub enum ParseError {
 //     BlockStmt(#[from] BlockStmtError)
 // }
 // #[derive(Debug, Clone, PartialEq, Eq, Error)]
 // pub enum BlockStmtError {
 //     MissingLBrace,
 //     MissingRBrace,
 // }
 #[derive(Debug, Clone, PartialEq, Eq, Error)]
 pub enum LexingError {
    #[error("invalid int literal")]
    InvalidIntLiteral,
    #[error("invalid ident")]
    InvalidIdent,
    #[error("comment unterminated")]
    UnterminatedComment,
    #[error("unrecognized token: {0}")]
    UnrecognizedToken(String),
 }
 #[derive(Debug, Clone, PartialEq, Eq, Error)]
 pub enum FrontendError {
    #[error(transparent)]
    Lexing(#[from] LexingError),
 }
@@ -1,21 +1,32 @@
-use std::{io::BufRead, iter::Peekable, str::FromStr};
+use std::{io::BufRead, str::FromStr};
 use codespan_reporting::diagnostic;
 use thiserror::Error;
-use crate::frontend::types::{Span, TokenValue, TypeIdent};
+use crate::{diagnostic::{Diagnositics, span::{self, Span}}, frontend::{err::LexingError, types::{TokenValue, TypeIdent}}};
 use super::types::Token;
 pub struct Lexer {
-    tokens: Vec<Token>,
+    pub tokens: Vec<Token>,
-    errors: Vec<usize>, // every entry points to the index of unrecognized tokens
+    pub diagnostics: Diagnositics,
    old_char_count: usize,
    block_comment_span: Option<Span>,
    in_skip_line: bool
 }
 const WHITESPACE_CHARS: &[char] = &[' ', '\t', '\n', '\r'];
 const DELIMITER_CHARS: &[char] = &[
    '+', '-', '*', '/', '%', '=', '!', '<', '>', '(', ')', ',', ';'
 ];
 struct Cursor {
    chars: Vec<char>,
    pos: usize,
 }
 enum LexParseError {
    NotMatched,
    InvalidInMatch(LexingError)
 }
 impl Cursor {
    pub fn new(s: &str) -> Self {
        Self { chars: s.chars().collect(), pos: 0 }
@@ -47,20 +58,35 @@ impl Cursor {
        self.pos
    }
 }
 /// try parse using the giving function, return whether should continue
 fn try_parse_as(
-    f: fn(&mut Cursor) -> Option<TokenValue>,
+    f: fn(&mut Cursor) -> Result<TokenValue, LexParseError>,
    tokens: &mut Vec<Token>,
    str_iter: &mut Cursor,
-    line: &mut usize,
+    diagnostics: &mut Diagnositics,
-    column: &mut usize,
+    last_char_count: usize,
 ) -> bool {
-    let last_pos = str_iter.pos();
+    let last_pos = str_iter.pos() + last_char_count;
-    if let Some(token) = f(str_iter) {
+    match f(str_iter) {
-        let span = Span { line: *line, column: *column, length: str_iter.pos() - last_pos };
+        Ok(token_value) => {
-        tokens.push(Token { value: token, span });
+            let span = Span { start: last_pos, end: str_iter.pos() + last_char_count };
-        return true;
+            tokens.push(Token { value: token_value, span });
            return true;
        }
        Err(LexParseError::NotMatched) => false,
        Err(LexParseError::InvalidInMatch(err)) => {
            // try recover from delimiter char or whitespace char
            while let Some(c) = str_iter.peek() {
                if DELIMITER_CHARS.contains(&c) || WHITESPACE_CHARS.contains(&c) {
                    break;
                }
                str_iter.advance(1);
            }
            let span = Span { start: last_pos, end: str_iter.pos() + last_char_count };
            diagnostics.add_from_frontend_error(err, span);
            return true;
        }
    }
    false
 }
 macro_rules! if_true_then_continue {
    ($e: expr) => {
@@ -77,99 +103,106 @@ pub enum LexerError {
    TooManyErrors,
 }
 impl Lexer {
-    pub fn has_errors(&self) -> bool {
+    pub fn new() -> Self {
-        !self.errors.is_empty()
+        Self { tokens: vec![], diagnostics: Diagnositics::new(), old_char_count: 0, block_comment_span: None, in_skip_line: false }
    }
-    pub fn parse(reader: &mut impl BufRead) -> Result<Self, LexerError> {
+    pub fn finish(mut self) -> (Vec<Token>, Diagnositics) {
-        let mut tokens = Vec::new();
+        if let Some(span) = self.block_comment_span.take() {
-        let mut errors = Vec::new();
+            self.diagnostics.add_from_frontend_error(LexingError::UnterminatedComment, span);
        let mut line = 1;
        let mut column = 1;
        let mut in_block_comment = false;
        for line_str in reader.lines() {
            let line_str = line_str?;
            let mut cursor = Cursor::new(&line_str);
                loop {
                    if let Some(c) = cursor.peek() {
                        // check white space first, if it's white space, skip it and continue to the next character
                        if WHITESPACE_CHARS.contains(&c) {
                            column += 1;
                            cursor.advance(1);
                            continue;
                        }
                        // check comment
                        match cursor.peek_multiple(2) {
                            Some(['/', '/']) => {
                                // skip the rest of the line
                                line += 1;
                                column = 1;
                                break;
                            }
                            Some(['/', '*']) => {
                                in_block_comment = true;
                                cursor.advance(2);
                                column += 2;
                                continue;
                            }
                            Some(['*', '/']) => {
                                in_block_comment = false;
                                cursor.advance(2);
                                column += 2;
                                continue;
                            }
                            _ => {}
                        }
                    } else {
                        break;
                    }
                    if in_block_comment {
                        cursor.advance(1);
                        column += 1;
                    }
                    if_true_then_continue!(try_parse_as(parse_litint, &mut tokens, &mut cursor, &mut line, &mut column));
                    if_true_then_continue!(try_parse_as(parse_delimiter, &mut tokens, &mut cursor, &mut line, &mut column));
                    if_true_then_continue!(try_parse_as(parse_puncuation, &mut tokens, &mut cursor, &mut line, &mut column));
                    if_true_then_continue!(try_parse_as(parse_ident, &mut tokens, &mut cursor, &mut line, &mut column));
                    // unrecognized token
                    errors.push(tokens.len());
                    let c = cursor.next().unwrap();
                    tokens.push(Token {
                        value: TokenValue::Unrecognized(c),
                        span: Span { line, column, length: 1 },
                    });
                    if errors.len() > 20 {
                        return Err(LexerError::TooManyErrors);
                    }
                    column += 1;
                }
            line += 1;
            column = 1;
        }
-        Ok(Self { tokens, errors })
+        (self.tokens, self.diagnostics)
    }
    /// call `parse_str` will continue to parse the input from current state
    /// please also pass the whitespace to ensure the correct char position in diagnostics
    pub fn parse_next_str(&mut self, s: &str) {
        let mut cursor = Cursor::new(s);
        loop {
            if let Some(c) = cursor.peek() {
                if self.in_skip_line && c != '\n' {
                    cursor.advance(1);
                    continue;
                }
                // check white space first, if it's white space, skip it and continue to the next character
                if WHITESPACE_CHARS.contains(&c) {
                    if c == '\n' {
                        self.in_skip_line = false;
                    }
                    cursor.advance(1);
                    continue;
                }
                // check comment
                match cursor.peek_multiple(2) {
                    Some(['/', '/']) => {
                        // skip the rest of the line
                        self.in_skip_line = true;
                        cursor.advance(2);
                        continue;
                    }
                    Some(['/', '*']) => {
                        let start = cursor.pos() + self.old_char_count;
                        self.block_comment_span = Some(Span { start, end: start + 2 });
                        cursor.advance(2);
                        continue;
                    }
                    Some(['*', '/']) => {
                        self.block_comment_span = None;
                        cursor.advance(2);
                        continue;
                    }
                    _ => {}
                }
                if self.block_comment_span.is_some() {
                    cursor.advance(1);
                    continue;
                }
            } else {
                break;
            }
            if_true_then_continue!(try_parse_as(parse_litint, &mut self.tokens, &mut cursor, &mut self.diagnostics, self.old_char_count));
            if_true_then_continue!(try_parse_as(parse_delimiter, &mut self.tokens, &mut cursor, &mut self.diagnostics, self.old_char_count));
            if_true_then_continue!(try_parse_as(parse_puncuation, &mut self.tokens, &mut cursor, &mut self.diagnostics, self.old_char_count));
            if_true_then_continue!(try_parse_as(parse_ident, &mut self.tokens, &mut cursor, &mut self.diagnostics, self.old_char_count));
            // unrecognized token
            let last_pos = cursor.pos() + self.old_char_count;
            let mut unrecognized = Vec::new();
            while let Some(c) = cursor.peek() {
                if DELIMITER_CHARS.contains(&c) || WHITESPACE_CHARS.contains(&c) {
                    break;
                }
                unrecognized.push(c);
                cursor.advance(1);
            }
            let span = Span { start: last_pos, end: cursor.pos() + self.old_char_count };
            let unrecognized = unrecognized.into_iter().collect::<String>();
            self.diagnostics.add_from_frontend_error(LexingError::UnrecognizedToken(unrecognized), span);
            self.tokens.push(Token { value: TokenValue::Unrecognized, span });
        }
        self.old_char_count += s.len();
    }
 }
 fn parse_litint(
    str_iter: &mut Cursor,
-) -> Option<TokenValue> {
+) -> Result<TokenValue, LexParseError> {
-    let mut c1 = str_iter.peek()?;
+    let mut c1 = str_iter.peek().ok_or(LexParseError::NotMatched)?;
    // c1 is the peek value from here
    let mut sign_base: i64 = 1;
    let mut base: i64 = 10;
    if !(c1.is_ascii_digit() || c1 == '-') {
-        return None;
+        return Err(LexParseError::NotMatched);
    }
    if c1 == '-' {
        sign_base = -1;
        str_iter.advance(1);
-        c1 = str_iter.peek()?;
+        c1 = str_iter.peek().ok_or(LexParseError::NotMatched)?;
        if !c1.is_ascii_digit() {
            // only a minus sign, not a number
            // back one so cursor still points to the minus sign
            str_iter.back(1);
-            return None;
+            return Err(LexParseError::NotMatched);
        }
    }
    let mut number = 0i64;
    let mut has_digits = false;
    if c1 == '0' {
        str_iter.advance(1);
        match str_iter.peek() {
@@ -181,12 +214,13 @@ fn parse_litint(
                base = 8;
            }
            _ => {
                has_digits = true;
                // only zero
            }
        }
    }
    // from here, the cursor points to:
-    // 0x1234 -> cursor at 'x'
+    // 0x1234 -> cursor at '1'
    // 0123 -> cursor at '1'
    // 0 -> cursor at end
    // 1234 -> cursor at '1'
@@ -199,32 +233,42 @@ fn parse_litint(
            '0'..='9' if (c as u8 - b'0') < base as u8 => c as i64 - '0' as i64 ,
            'a'..='f' if base == 16 => c as i64 - 'a' as i64 + 10 ,
            'A'..='F' if base == 16 => c as i64 - 'A' as i64 + 10,
-            _ => break,
+            c => if WHITESPACE_CHARS.contains(&c) || DELIMITER_CHARS.contains(&c) {
                break;
            } else {
                // unrecognized character in number literal
                return Err(LexParseError::InvalidInMatch(LexingError::InvalidIntLiteral));
            }
        };
        has_digits = true;
        number = number * base + digit;
        str_iter.advance(1);
    }
    if !has_digits {
        // No valid digits found, add a diagnostic
        return Err(LexParseError::InvalidInMatch(LexingError::InvalidIntLiteral));
    }
    number *= sign_base;
-    Some(TokenValue::IntLit(number))
+    Ok(TokenValue::IntLit(number))
 }
 fn parse_delimiter(
    str_iter: &mut Cursor,
-) -> Option<TokenValue> {
+) -> Result<TokenValue, LexParseError> {
-    let c = str_iter.peek()?;
+    let c = str_iter.peek().ok_or(LexParseError::NotMatched)?;
    let token_value = match c {
        '(' => TokenValue::LParen,
        ')' => TokenValue::RParen,
        '{' => TokenValue::LBrace,
        '}' => TokenValue::RBrace,
-        _ => return None,
+        _ => return Err(LexParseError::NotMatched),
    };
    str_iter.advance(1);
-    Some(token_value)
+    Ok(token_value)
 }
 fn parse_puncuation(
    str_iter: &mut Cursor,
-) -> Option<TokenValue> {
+) -> Result<TokenValue, LexParseError> {
    let get_value_by_next_char = 
        |str_iter: &mut Cursor, not_equal_value: TokenValue, equal_value: TokenValue| {
            str_iter.advance(1);
@@ -235,7 +279,7 @@ fn parse_puncuation(
                not_equal_value
            }
    };
-    let c = str_iter.peek()?;
+    let c = str_iter.peek().ok_or(LexParseError::NotMatched)?;
    let token_value = match c {
        '+' => TokenValue::Plus,
        '-' => TokenValue::Minus,
@@ -249,9 +293,7 @@ fn parse_puncuation(
            if let Some('=') = str_iter.peek() {
                TokenValue::NotEqual
            } else {
-                // only '!' is not a valid token, back one so cursor still points to '!'
+                TokenValue::Not
                str_iter.back(1);
                return None;
            }
        },
        '<' => get_value_by_next_char(str_iter, TokenValue::Less, TokenValue::LessEqual),
@@ -260,33 +302,35 @@ fn parse_puncuation(
        ',' => TokenValue::Comma,
        ';' => TokenValue::Semicolon,
-        _ => return None,
+        _ => return Err(LexParseError::NotMatched),
    };
    str_iter.advance(1);
-    Some(token_value)
+    Ok(token_value)
 }
 fn parse_ident(
    str_iter: &mut Cursor,
-) -> Option<TokenValue> {
+) -> Result<TokenValue, LexParseError> {
-    let c = str_iter.peek()?;
+    let c = str_iter.peek().ok_or(LexParseError::NotMatched)?;
    if !c.is_ascii_alphabetic() && c != '_' {
-        return None;
+        return Err(LexParseError::NotMatched);
    }
    let mut name = Vec::new();
    while let Some(c) = str_iter.peek() {
        if c.is_ascii_alphanumeric() || c == '_' {
            name.push(c);
            str_iter.advance(1);
-        } else {
+        } else if DELIMITER_CHARS.contains(&c) || WHITESPACE_CHARS.contains(&c) {
            break;
        } else {
            return Err(LexParseError::InvalidInMatch(LexingError::InvalidIdent));
        }
    }
    let name = name.into_iter().collect::<String>();
    if let Some(type_ident) = TypeIdent::from_str(&name).ok() {
-        return Some(TokenValue::TypeIdent(type_ident));
+        return Ok(TokenValue::TypeIdent(type_ident));
    }
-    Some(TokenValue::Ident(name))
+    Ok(TokenValue::Ident(name))
 }
 #[cfg(test)]
 mod tests {
@@ -303,11 +347,22 @@ mod tests {
        for case_no in case_sequence {
            let case_path = case_list.get_case_path(case_no).unwrap();
            println!("{}", case_path.display());
-            let file = File::open(case_path).unwrap();
+            let file = File::open(&case_path).unwrap();
            let mut buf_reader = std::io::BufReader::new(file);
-            let lexer = Lexer::parse(&mut buf_reader).unwrap();
+            let mut lexer = Lexer::new();
-            if lexer.has_errors() {
+            let mut full_text = String::new();
-                eprintln!("Case {} has error", case_list.get_case_name(case_no).unwrap());
+            loop {
                let mut line = String::new();
                let bytes_read = buf_reader.read_line(&mut line).unwrap();
                if bytes_read == 0 {
                    break;
                }
                full_text.push_str(&line);
                lexer.parse_next_str(&line);
            }
            let (_tokens, diagnostics) = lexer.finish();
            if !diagnostics.is_empty() {
                diagnostics.print(&format!("{}", case_path.display()), &full_text);
                error_case_cnt += 1;
            }
        }
@@ -1,2 +1,4 @@
 pub mod types;
-mod lexer;
+mod lexer;
 // pub mod parser;
 pub mod err;
@@ -1,4 +1,6 @@
-use strum::EnumString;
+use strum::{AsRefStr, EnumString};
 use crate::diagnostic::span::Span;
 #[derive(Debug, Clone, PartialEq, Eq)]
@@ -7,12 +9,6 @@ pub struct Token {
    pub span: Span,
 }
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct Span {
    pub line: usize,
    pub column: usize,
    pub length: usize,
 }
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub enum TokenValue {
@@ -20,6 +16,61 @@ pub enum TokenValue {
    Ident(String),
    TypeIdent(TypeIdent),
    Plus, Minus, Star, Slash, Percent,
    Equal, DoubleEqual, Not, NotEqual, Less, LessEqual, Greater, GreaterEqual,
    LParen, RParen,
    LBrace, RBrace,
    Comma, Semicolon,
    If, Else, While, Return, Break, Continue,
    // Eof,
    Unrecognized,
 }
 impl std::fmt::Display for TokenValue {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            TokenValue::IntLit(i) => write!(f, "literal int: {}", i),
            TokenValue::Ident(s) => write!(f, "identifier: {}", s),
            TokenValue::TypeIdent(t) => write!(f, "type {}", t.as_ref()),
            TokenValue::Plus => write!(f, "+"),
            TokenValue::Minus => write!(f, "-"),
            TokenValue::Star => write!(f, "*"),
            TokenValue::Slash => write!(f, "/"),
            TokenValue::Percent => write!(f, "%"),
            TokenValue::Equal => write!(f, "="),
            TokenValue::DoubleEqual => write!(f, "=="),
            TokenValue::Not => write!(f, "!"),
            TokenValue::NotEqual => write!(f, "!="),
            TokenValue::Less => write!(f, "<"),
            TokenValue::LessEqual => write!(f, "<="),
            TokenValue::Greater => write!(f, ">"),
            TokenValue::GreaterEqual => write!(f, ">="),
            TokenValue::LParen => write!(f, "("),
            TokenValue::RParen => write!(f, ")"),
            TokenValue::LBrace => write!(f, "{{"),
            TokenValue::RBrace => write!(f, "}}"),
            TokenValue::Comma => write!(f, ","),
            TokenValue::Semicolon => write!(f, ";"),
            TokenValue::If => write!(f, "if"),
            TokenValue::Else => write!(f, "else"),
            TokenValue::While => write!(f, "while"),
            TokenValue::Return => write!(f, "return"),
            TokenValue::Break => write!(f, "break"),
            TokenValue::Continue => write!(f, "continue"),
            // TokenValue::Eof => write!(f, "<EOF>"),
            TokenValue::Unrecognized => write!(f, "unrecognized"),
        }
    }
 }
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum TokenKind {
    IntLit,
    Ident,
    TypeIdent,
    Plus, Minus, Star, Slash, Percent,
    Equal, DoubleEqual, NotEqual, Less, LessEqual, Greater, GreaterEqual,
@@ -29,10 +80,10 @@ pub enum TokenValue {
    If, Else, While, Return, Break, Continue,
-    Eof,
+    // Eof,
-    Unrecognized(char),
+    Unrecognized,
 }
-#[derive(Debug, Clone, PartialEq, Eq, EnumString)]
+#[derive(Debug, Clone, PartialEq, Eq, EnumString, AsRefStr)]
 pub enum TypeIdent {
    #[strum(serialize = "int")]
    Int,
@@ -1,6 +1,8 @@
 mod frontend;
 mod ast;
 mod utils;
 mod diagnostic;
 mod err;
 fn main() {
    println!("Hello, world!");
 }