feat(lexer): Add diagnostic and impl error recovering

2026-05-08 22:54:46 +08:00
parent e8b50ae0d7
commit 63a2990826
12 changed files with 525 additions and 120 deletions
@@ -17,6 +17,17 @@ version = "1.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"

+[[package]]
+name = "codespan-reporting"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "af491d569909a7e4dee0ad7db7f5341fef5c614d5b8ec8cf765732aba3cff681"
+dependencies = [
+ "serde",
+ "termcolor",
+ "unicode-width",
+]
+
 [[package]]
 name = "heck"
 version = "0.5.0"
@@ -153,12 +164,43 @@ checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a"
 name = "rusty-minic"
 version = "0.1.0"
 dependencies = [
+ "codespan-reporting",
 "num",
 "regex",
 "strum",
 "thiserror",
 ]

+[[package]]
+name = "serde"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
+dependencies = [
+ "serde_core",
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_core"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
 [[package]]
 name = "strum"
 version = "0.28.0"
@@ -191,6 +233,15 @@ dependencies = [
 "unicode-ident",
 ]

+[[package]]
+name = "termcolor"
+version = "1.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755"
+dependencies = [
+ "winapi-util",
+]
+
 [[package]]
 name = "thiserror"
 version = "2.0.18"
@@ -216,3 +267,33 @@ name = "unicode-ident"
 version = "1.0.24"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
+
+[[package]]
+name = "unicode-width"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254"
+
+[[package]]
+name = "winapi-util"
+version = "0.1.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
+dependencies = [
+ "windows-sys",
+]
+
+[[package]]
+name = "windows-link"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
+
+[[package]]
+name = "windows-sys"
+version = "0.61.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
+dependencies = [
+ "windows-link",
+]
@@ -4,6 +4,7 @@ version = "0.1.0"
 edition = "2024"

 [dependencies]
+codespan-reporting = "0.13.1"
 num = "0.4.3"
 regex = "1.12.3"
 strum = { version = "0.28.0", features = ["derive"] }
@@ -0,0 +1 @@
+pub mod types;
@@ -0,0 +1,75 @@
+#[derive(Debug, Clone, Copy)]
+pub struct Span {
+    start: usize,
+    end: usize,
+}
+pub struct CompileUnit {
+    pub global_decls: Vec<GlobalDeclStmt>,
+}
+pub enum GlobalDeclStmt {
+    VarDecl(VarDeclStmt),
+    FuncDecl(FuncDeclStmt),
+}
+
+pub struct VarDeclStmt {
+    pub name: String,
+    pub var_type: Type,
+    pub span: Span,
+}
+
+pub struct FuncDeclStmt {
+    pub name: String,
+    pub return_type: Type,
+    pub params: Vec<Param>,
+    pub body: BlockStmt,
+    pub span: Span,
+}
+pub struct BlockStmt {
+    pub statements: Vec<Statement>,
+    pub span: Span,
+}
+
+pub enum Statement {
+    Return(ReturnStmt),
+    Block(BlockStmt),
+    Expr(Expr),
+    VarDecl(VarDeclStmt),
+}
+pub struct ReturnStmt {
+    pub value: Option<Expr>,
+    pub span: Span,
+}
+pub struct Expr {
+    pub value: ExprValue,
+    pub span: Span,
+}
+pub enum ExprValue {
+    IntLit(i64),
+    Var(String),
+    BinaryOp {
+        lhs: Box<Expr>, 
+        op: BinaryOp, 
+        rhs: Box<Expr>
+    },
+    FuncCall(String, Vec<Expr>),
+    Assign {
+        lvalue: Box<Expr>, 
+        rvalue: Box<Expr>
+    },
+}
+
+pub enum BinaryOp {
+    Add, Sub, Mul, Div, Mod,
+    Equal, NotEqual, Less, LessEqual, Greater, GreaterEqual,
+}
+
+pub enum Type {
+    Int,
+    Void,
+}
+
+pub struct Param {
+    name: String,
+    param_type: Type,
+    span: Span,
+}
@@ -0,0 +1,94 @@
+use crate::{diagnostic::span::Span, err::CompileError, frontend::err::FrontendError};
+
+pub mod span;
+
+pub struct Diagnositics {
+    diagnostics: Vec<Diagnostic>,
+}
+pub enum DiagnosticLevel {
+    Error,
+    Warning,
+    Info,
+}
+pub struct Diagnostic {
+    level: DiagnosticLevel,
+    message: String,
+    span: Span
+}
+impl Diagnositics {
+    pub fn new() -> Self {
+        Self { diagnostics: vec![] }
+    }
+    pub fn add(&mut self, diagnostic: Diagnostic) {
+        self.diagnostics.push(diagnostic);
+    }
+    pub fn add_from_error(&mut self, error: impl Into<CompileError>, span: Span) {
+        self.diagnostics.push(Diagnostic {
+            level: DiagnosticLevel::Error,
+            message: Into::<CompileError>::into(error).to_string(),
+            span,
+        });
+    }
+    pub fn add_from_frontend_error(&mut self, error: impl Into<FrontendError>, span: Span) {
+        self.diagnostics.push(Diagnostic {
+            level: DiagnosticLevel::Error,
+            message: Into::<FrontendError>::into(error).to_string(),
+            span,
+        });
+    }
+    pub fn is_empty(&self) -> bool {
+        self.diagnostics.is_empty()
+    }
+    pub fn print(&self, name: &str, source: &str) {
+        use codespan_reporting::diagnostic::Diagnostic as CodespanDiagnostic;
+        use codespan_reporting::files::SimpleFile;
+        use codespan_reporting::diagnostic::{Severity, Label};
+        use std::io::IsTerminal;
+
+        use codespan_reporting::term::{self, termcolor::{ColorChoice, StandardStream}};
+        let mut choice = ColorChoice::Auto;
+        if !std::io::stdin().is_terminal() {
+            choice = ColorChoice::Never;
+        }
+        let stdout = StandardStream::stdout(choice);
+        let source_file = SimpleFile::new(name, source);
+        let output_config = codespan_reporting::term::Config::default();
+        for diagnostic in &self.diagnostics {
+            let output_level = match diagnostic.level {
+                DiagnosticLevel::Error => Severity::Error,
+                DiagnosticLevel::Warning => Severity::Warning,
+                DiagnosticLevel::Info => Severity::Note,
+            };
+            let output_diagnostic = CodespanDiagnostic::new(output_level)
+                .with_message(&diagnostic.message)
+                .with_label(
+                    Label::primary((), diagnostic.span.start..diagnostic.span.end)
+                );
+            term::emit_to_write_style(&mut stdout.lock(), &output_config, &source_file, &output_diagnostic);
+            
+        }
+            
+    }
+}
+#[cfg(test)]
+mod tests {
+    #[test]
+    fn test_diagnostics() {
+        use crate::diagnostic::{Diagnostic, DiagnosticLevel, Diagnositics};
+        let mut diagnostics = Diagnositics::new();
+        diagnostics.add(Diagnostic {
+            level: DiagnosticLevel::Error,
+            message: "test error".to_string(),
+            span: crate::diagnostic::span::Span { start: 0, end: 3 },
+        });
+        diagnostics.add(Diagnostic {
+            level: DiagnosticLevel::Error,
+            message: "test error".to_string(),
+            span: crate::diagnostic::span::Span { start: 16, end: 22 },
+        });
+        diagnostics.print("main.c", 
+r#"int main(){
+    return 1;
+}"#);
+    }
+}
@@ -0,0 +1,6 @@
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub struct Span {
+    pub start: usize,
+    pub end: usize,
+}
@@ -0,0 +1,9 @@
+use thiserror::Error;
+
+use crate::frontend::err::FrontendError;
+
+#[derive(Debug, Clone, PartialEq, Eq, Error)]
+pub enum CompileError {
+    #[error(transparent)]
+    Frontend(#[from] FrontendError),
+}
@@ -0,0 +1,28 @@
+use thiserror::Error;
+
+// #[derive(Debug, Clone, PartialEq, Eq, Error)]
+// pub enum ParseError {
+//     BlockStmt(#[from] BlockStmtError)
+// }
+// #[derive(Debug, Clone, PartialEq, Eq, Error)]
+// pub enum BlockStmtError {
+//     MissingLBrace,
+//     MissingRBrace,
+// }
+#[derive(Debug, Clone, PartialEq, Eq, Error)]
+pub enum LexingError {
+    #[error("invalid int literal")]
+    InvalidIntLiteral,
+    #[error("invalid ident")]
+    InvalidIdent,
+    #[error("comment unterminated")]
+    UnterminatedComment,
+    #[error("unrecognized token: {0}")]
+    UnrecognizedToken(String),
+}
+#[derive(Debug, Clone, PartialEq, Eq, Error)]
+pub enum FrontendError {
+    #[error(transparent)]
+    Lexing(#[from] LexingError),
+    
+}
@@ -1,21 +1,32 @@
-use std::{io::BufRead, iter::Peekable, str::FromStr};
+use std::{io::BufRead, str::FromStr};

+use codespan_reporting::diagnostic;
 use thiserror::Error;

-use crate::frontend::types::{Span, TokenValue, TypeIdent};
+use crate::{diagnostic::{Diagnositics, span::{self, Span}}, frontend::{err::LexingError, types::{TokenValue, TypeIdent}}};

 use super::types::Token;

 pub struct Lexer {
-    tokens: Vec<Token>,
-    errors: Vec<usize>, // every entry points to the index of unrecognized tokens
+    pub tokens: Vec<Token>,
+    pub diagnostics: Diagnositics,
+    old_char_count: usize,
+    block_comment_span: Option<Span>,
+    in_skip_line: bool
 }

 const WHITESPACE_CHARS: &[char] = &[' ', '\t', '\n', '\r'];
+const DELIMITER_CHARS: &[char] = &[
+    '+', '-', '*', '/', '%', '=', '!', '<', '>', '(', ')', ',', ';'
+];
 struct Cursor {
    chars: Vec<char>,
    pos: usize,
 }
+enum LexParseError {
+    NotMatched,
+    InvalidInMatch(LexingError)
+}
 impl Cursor {
    pub fn new(s: &str) -> Self {
        Self { chars: s.chars().collect(), pos: 0 }
@@ -47,20 +58,35 @@ impl Cursor {
        self.pos
    }
 }
+/// try parse using the giving function, return whether should continue
 fn try_parse_as(
-    f: fn(&mut Cursor) -> Option<TokenValue>,
+    f: fn(&mut Cursor) -> Result<TokenValue, LexParseError>,
    tokens: &mut Vec<Token>,
    str_iter: &mut Cursor,
-    line: &mut usize,
-    column: &mut usize,
+    diagnostics: &mut Diagnositics,
+    last_char_count: usize,
 ) -> bool {
-    let last_pos = str_iter.pos();
-    if let Some(token) = f(str_iter) {
-        let span = Span { line: *line, column: *column, length: str_iter.pos() - last_pos };
-        tokens.push(Token { value: token, span });
-        return true;
+    let last_pos = str_iter.pos() + last_char_count;
+    match f(str_iter) {
+        Ok(token_value) => {
+            let span = Span { start: last_pos, end: str_iter.pos() + last_char_count };
+            tokens.push(Token { value: token_value, span });
+            return true;
+        }
+        Err(LexParseError::NotMatched) => false,
+        Err(LexParseError::InvalidInMatch(err)) => {
+            // try recover from delimiter char or whitespace char
+            while let Some(c) = str_iter.peek() {
+                if DELIMITER_CHARS.contains(&c) || WHITESPACE_CHARS.contains(&c) {
+                    break;
+                }
+                str_iter.advance(1);
+            }
+            let span = Span { start: last_pos, end: str_iter.pos() + last_char_count };
+            diagnostics.add_from_frontend_error(err, span);
+            return true;
+        }
    }
-    false
 }
 macro_rules! if_true_then_continue {
    ($e: expr) => {
@@ -77,99 +103,106 @@ pub enum LexerError {
    TooManyErrors,
 }
 impl Lexer {
-    pub fn has_errors(&self) -> bool {
-        !self.errors.is_empty()
+    pub fn new() -> Self {
+        Self { tokens: vec![], diagnostics: Diagnositics::new(), old_char_count: 0, block_comment_span: None, in_skip_line: false }
    }
-    pub fn parse(reader: &mut impl BufRead) -> Result<Self, LexerError> {
-        let mut tokens = Vec::new();
-        let mut errors = Vec::new();
-        let mut line = 1;
-        let mut column = 1;
-        let mut in_block_comment = false;
-        for line_str in reader.lines() {
-            let line_str = line_str?;
-            let mut cursor = Cursor::new(&line_str);
-                loop {
-                    if let Some(c) = cursor.peek() {
-                        // check white space first, if it's white space, skip it and continue to the next character
-                        if WHITESPACE_CHARS.contains(&c) {
-                            column += 1;
-                            cursor.advance(1);
-                            continue;
-                        }
-                        // check comment
-                        match cursor.peek_multiple(2) {
-                            Some(['/', '/']) => {
-                                // skip the rest of the line
-                                line += 1;
-                                column = 1;
-                                break;
-                            }
-                            Some(['/', '*']) => {
-                                in_block_comment = true;
-                                cursor.advance(2);
-                                column += 2;
-                                continue;
-                            }
-                            Some(['*', '/']) => {
-                                in_block_comment = false;
-                                cursor.advance(2);
-                                column += 2;
-                                continue;
-                            }
-                            _ => {}
-                        }
-                    } else {
-                        break;
-                    }
-                    if in_block_comment {
-                        cursor.advance(1);
-                        column += 1;
-                    }
-                    if_true_then_continue!(try_parse_as(parse_litint, &mut tokens, &mut cursor, &mut line, &mut column));
-                    if_true_then_continue!(try_parse_as(parse_delimiter, &mut tokens, &mut cursor, &mut line, &mut column));
-                    if_true_then_continue!(try_parse_as(parse_puncuation, &mut tokens, &mut cursor, &mut line, &mut column));
-                    if_true_then_continue!(try_parse_as(parse_ident, &mut tokens, &mut cursor, &mut line, &mut column));
-                    // unrecognized token
-                    errors.push(tokens.len());
-                    let c = cursor.next().unwrap();
-                    tokens.push(Token {
-                        value: TokenValue::Unrecognized(c),
-                        span: Span { line, column, length: 1 },
-                    });
-                    if errors.len() > 20 {
-                        return Err(LexerError::TooManyErrors);
-                    }
-                    column += 1;
-                }
-            line += 1;
-            column = 1;
+    pub fn finish(mut self) -> (Vec<Token>, Diagnositics) {
+        if let Some(span) = self.block_comment_span.take() {
+            self.diagnostics.add_from_frontend_error(LexingError::UnterminatedComment, span);
        }
-        Ok(Self { tokens, errors })
+        (self.tokens, self.diagnostics)
+    }
+    /// call `parse_str` will continue to parse the input from current state
+    /// please also pass the whitespace to ensure the correct char position in diagnostics
+    pub fn parse_next_str(&mut self, s: &str) {
+        let mut cursor = Cursor::new(s);
+        loop {
+            if let Some(c) = cursor.peek() {
+                if self.in_skip_line && c != '\n' {
+                    cursor.advance(1);
+                    continue;
+                }
+                // check white space first, if it's white space, skip it and continue to the next character
+                if WHITESPACE_CHARS.contains(&c) {
+                    if c == '\n' {
+                        self.in_skip_line = false;
+                    }
+                    cursor.advance(1);
+                    continue;
+                }
+                // check comment
+                match cursor.peek_multiple(2) {
+                    Some(['/', '/']) => {
+                        // skip the rest of the line
+                        self.in_skip_line = true;
+                        cursor.advance(2);
+                        continue;
+                    }
+                    Some(['/', '*']) => {
+                        let start = cursor.pos() + self.old_char_count;
+                        self.block_comment_span = Some(Span { start, end: start + 2 });
+                        cursor.advance(2);
+                        continue;
+                    }
+                    Some(['*', '/']) => {
+                        self.block_comment_span = None;
+                        cursor.advance(2);
+                        continue;
+                    }
+                    _ => {}
+                }
+                if self.block_comment_span.is_some() {
+                    cursor.advance(1);
+                    continue;
+                }
+            } else {
+                break;
+            }
+            if_true_then_continue!(try_parse_as(parse_litint, &mut self.tokens, &mut cursor, &mut self.diagnostics, self.old_char_count));
+            if_true_then_continue!(try_parse_as(parse_delimiter, &mut self.tokens, &mut cursor, &mut self.diagnostics, self.old_char_count));
+            if_true_then_continue!(try_parse_as(parse_puncuation, &mut self.tokens, &mut cursor, &mut self.diagnostics, self.old_char_count));
+            if_true_then_continue!(try_parse_as(parse_ident, &mut self.tokens, &mut cursor, &mut self.diagnostics, self.old_char_count));
+            // unrecognized token
+            let last_pos = cursor.pos() + self.old_char_count;
+            let mut unrecognized = Vec::new();
+            while let Some(c) = cursor.peek() {
+                if DELIMITER_CHARS.contains(&c) || WHITESPACE_CHARS.contains(&c) {
+                    break;
+                }
+                unrecognized.push(c);
+                cursor.advance(1);
+            }
+            let span = Span { start: last_pos, end: cursor.pos() + self.old_char_count };
+            let unrecognized = unrecognized.into_iter().collect::<String>();
+            self.diagnostics.add_from_frontend_error(LexingError::UnrecognizedToken(unrecognized), span);
+            self.tokens.push(Token { value: TokenValue::Unrecognized, span });
+        }
+        self.old_char_count += s.len();
    }
 }
 fn parse_litint(
    str_iter: &mut Cursor,
-) -> Option<TokenValue> {
-    let mut c1 = str_iter.peek()?;
+) -> Result<TokenValue, LexParseError> {
+    let mut c1 = str_iter.peek().ok_or(LexParseError::NotMatched)?;
    // c1 is the peek value from here
    let mut sign_base: i64 = 1;
    let mut base: i64 = 10;
    if !(c1.is_ascii_digit() || c1 == '-') {
-        return None;
+        return Err(LexParseError::NotMatched);
    }
    if c1 == '-' {
        sign_base = -1;
        str_iter.advance(1);
-        c1 = str_iter.peek()?;
+        c1 = str_iter.peek().ok_or(LexParseError::NotMatched)?;
        if !c1.is_ascii_digit() {
            // only a minus sign, not a number
            // back one so cursor still points to the minus sign
            str_iter.back(1);
-            return None;
+            return Err(LexParseError::NotMatched);
        }
    }
    let mut number = 0i64;
+    let mut has_digits = false;
    if c1 == '0' {
        str_iter.advance(1);
        match str_iter.peek() {
@@ -181,12 +214,13 @@ fn parse_litint(
                base = 8;
            }
            _ => {
+                has_digits = true;
                // only zero
            }
        }
    }
    // from here, the cursor points to:
-    // 0x1234 -> cursor at 'x'
+    // 0x1234 -> cursor at '1'
    // 0123 -> cursor at '1'
    // 0 -> cursor at end
    // 1234 -> cursor at '1'
@@ -199,32 +233,42 @@ fn parse_litint(
            '0'..='9' if (c as u8 - b'0') < base as u8 => c as i64 - '0' as i64 ,
            'a'..='f' if base == 16 => c as i64 - 'a' as i64 + 10 ,
            'A'..='F' if base == 16 => c as i64 - 'A' as i64 + 10,
-            _ => break,
+            c => if WHITESPACE_CHARS.contains(&c) || DELIMITER_CHARS.contains(&c) {
+                break;
+            } else {
+                // unrecognized character in number literal
+                return Err(LexParseError::InvalidInMatch(LexingError::InvalidIntLiteral));
+            }
        };
+        has_digits = true;
        number = number * base + digit;
        str_iter.advance(1);
    }
+    if !has_digits {
+        // No valid digits found, add a diagnostic
+        return Err(LexParseError::InvalidInMatch(LexingError::InvalidIntLiteral));
+    }
    number *= sign_base;
-    Some(TokenValue::IntLit(number))
+    Ok(TokenValue::IntLit(number))
 }

 fn parse_delimiter(
    str_iter: &mut Cursor,
-) -> Option<TokenValue> {
-    let c = str_iter.peek()?;
+) -> Result<TokenValue, LexParseError> {
+    let c = str_iter.peek().ok_or(LexParseError::NotMatched)?;
    let token_value = match c {
        '(' => TokenValue::LParen,
        ')' => TokenValue::RParen,
        '{' => TokenValue::LBrace,
        '}' => TokenValue::RBrace,
-        _ => return None,
+        _ => return Err(LexParseError::NotMatched),
    };
    str_iter.advance(1);
-    Some(token_value)
+    Ok(token_value)
 }
 fn parse_puncuation(
    str_iter: &mut Cursor,
-) -> Option<TokenValue> {
+) -> Result<TokenValue, LexParseError> {
    let get_value_by_next_char = 
        |str_iter: &mut Cursor, not_equal_value: TokenValue, equal_value: TokenValue| {
            str_iter.advance(1);
@@ -235,7 +279,7 @@ fn parse_puncuation(
                not_equal_value
            }
    };
-    let c = str_iter.peek()?;
+    let c = str_iter.peek().ok_or(LexParseError::NotMatched)?;
    let token_value = match c {
        '+' => TokenValue::Plus,
        '-' => TokenValue::Minus,
@@ -249,9 +293,7 @@ fn parse_puncuation(
            if let Some('=') = str_iter.peek() {
                TokenValue::NotEqual
            } else {
-                // only '!' is not a valid token, back one so cursor still points to '!'
-                str_iter.back(1);
-                return None;
+                TokenValue::Not
            }
        },
        '<' => get_value_by_next_char(str_iter, TokenValue::Less, TokenValue::LessEqual),
@@ -260,33 +302,35 @@ fn parse_puncuation(
        ',' => TokenValue::Comma,
        ';' => TokenValue::Semicolon,

-        _ => return None,
+        _ => return Err(LexParseError::NotMatched),
    };
    str_iter.advance(1);
-    Some(token_value)
+    Ok(token_value)
 }

 fn parse_ident(
    str_iter: &mut Cursor,
-) -> Option<TokenValue> {
-    let c = str_iter.peek()?;
+) -> Result<TokenValue, LexParseError> {
+    let c = str_iter.peek().ok_or(LexParseError::NotMatched)?;
    if !c.is_ascii_alphabetic() && c != '_' {
-        return None;
+        return Err(LexParseError::NotMatched);
    }
    let mut name = Vec::new();
    while let Some(c) = str_iter.peek() {
        if c.is_ascii_alphanumeric() || c == '_' {
            name.push(c);
            str_iter.advance(1);
-        } else {
+        } else if DELIMITER_CHARS.contains(&c) || WHITESPACE_CHARS.contains(&c) {
            break;
+        } else {
+            return Err(LexParseError::InvalidInMatch(LexingError::InvalidIdent));
        }
    }
    let name = name.into_iter().collect::<String>();
    if let Some(type_ident) = TypeIdent::from_str(&name).ok() {
-        return Some(TokenValue::TypeIdent(type_ident));
+        return Ok(TokenValue::TypeIdent(type_ident));
    }
-    Some(TokenValue::Ident(name))
+    Ok(TokenValue::Ident(name))
 }
 #[cfg(test)]
 mod tests {
@@ -303,11 +347,22 @@ mod tests {
        for case_no in case_sequence {
            let case_path = case_list.get_case_path(case_no).unwrap();
            println!("{}", case_path.display());
-            let file = File::open(case_path).unwrap();
+            let file = File::open(&case_path).unwrap();
            let mut buf_reader = std::io::BufReader::new(file);
-            let lexer = Lexer::parse(&mut buf_reader).unwrap();
-            if lexer.has_errors() {
-                eprintln!("Case {} has error", case_list.get_case_name(case_no).unwrap());
+            let mut lexer = Lexer::new();
+            let mut full_text = String::new();
+            loop {
+                let mut line = String::new();
+                let bytes_read = buf_reader.read_line(&mut line).unwrap();
+                if bytes_read == 0 {
+                    break;
+                }
+                full_text.push_str(&line);
+                lexer.parse_next_str(&line);
+            }
+            let (_tokens, diagnostics) = lexer.finish();
+            if !diagnostics.is_empty() {
+                diagnostics.print(&format!("{}", case_path.display()), &full_text);
                error_case_cnt += 1;
            }
        }
@@ -1,2 +1,4 @@
 pub mod types;
-mod lexer;
+mod lexer;
+// pub mod parser;
+pub mod err;
@@ -1,4 +1,6 @@
-use strum::EnumString;
+use strum::{AsRefStr, EnumString};
+
+use crate::diagnostic::span::Span;


 #[derive(Debug, Clone, PartialEq, Eq)]
@@ -7,12 +9,6 @@ pub struct Token {
    pub span: Span,
 }

-#[derive(Debug, Clone, PartialEq, Eq)]
-pub struct Span {
-    pub line: usize,
-    pub column: usize,
-    pub length: usize,
-}

 #[derive(Debug, Clone, PartialEq, Eq)]
 pub enum TokenValue {
@@ -20,6 +16,61 @@ pub enum TokenValue {
    Ident(String),
    TypeIdent(TypeIdent),
    
+    Plus, Minus, Star, Slash, Percent,
+    Equal, DoubleEqual, Not, NotEqual, Less, LessEqual, Greater, GreaterEqual,
+
+    LParen, RParen,
+    LBrace, RBrace,
+    Comma, Semicolon,
+
+    If, Else, While, Return, Break, Continue,
+
+    // Eof,
+    Unrecognized,
+}
+impl std::fmt::Display for TokenValue {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            TokenValue::IntLit(i) => write!(f, "literal int: {}", i),
+            TokenValue::Ident(s) => write!(f, "identifier: {}", s),
+            TokenValue::TypeIdent(t) => write!(f, "type {}", t.as_ref()),
+            TokenValue::Plus => write!(f, "+"),
+            TokenValue::Minus => write!(f, "-"),
+            TokenValue::Star => write!(f, "*"),
+            TokenValue::Slash => write!(f, "/"),
+            TokenValue::Percent => write!(f, "%"),
+            TokenValue::Equal => write!(f, "="),
+            TokenValue::DoubleEqual => write!(f, "=="),
+            TokenValue::Not => write!(f, "!"),
+            TokenValue::NotEqual => write!(f, "!="),
+            TokenValue::Less => write!(f, "<"),
+            TokenValue::LessEqual => write!(f, "<="),
+            TokenValue::Greater => write!(f, ">"),
+            TokenValue::GreaterEqual => write!(f, ">="),
+            TokenValue::LParen => write!(f, "("),
+            TokenValue::RParen => write!(f, ")"),
+            TokenValue::LBrace => write!(f, "{{"),
+            TokenValue::RBrace => write!(f, "}}"),
+            TokenValue::Comma => write!(f, ","),
+            TokenValue::Semicolon => write!(f, ";"),
+            TokenValue::If => write!(f, "if"),
+            TokenValue::Else => write!(f, "else"),
+            TokenValue::While => write!(f, "while"),
+            TokenValue::Return => write!(f, "return"),
+            TokenValue::Break => write!(f, "break"),
+            TokenValue::Continue => write!(f, "continue"),
+            // TokenValue::Eof => write!(f, "<EOF>"),
+            TokenValue::Unrecognized => write!(f, "unrecognized"),
+        }
+    }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum TokenKind {
+    IntLit,
+    Ident,
+    TypeIdent,
+    
    Plus, Minus, Star, Slash, Percent,
    Equal, DoubleEqual, NotEqual, Less, LessEqual, Greater, GreaterEqual,

@@ -29,10 +80,10 @@ pub enum TokenValue {

    If, Else, While, Return, Break, Continue,

-    Eof,
-    Unrecognized(char),
+    // Eof,
+    Unrecognized,
 }
-#[derive(Debug, Clone, PartialEq, Eq, EnumString)]
+#[derive(Debug, Clone, PartialEq, Eq, EnumString, AsRefStr)]
 pub enum TypeIdent {
    #[strum(serialize = "int")]
    Int,
@@ -1,6 +1,8 @@
 mod frontend;
 mod ast;
 mod utils;
+mod diagnostic;
+mod err;
 fn main() {
    println!("Hello, world!");
 }