From 63a29908261169742cb9d5528ba383a504d290f5 Mon Sep 17 00:00:00 2001
From: Hydrostic <hydrostic@outlook.com>
Date: Fri, 8 May 2026 22:54:46 +0800
Subject: [PATCH] feat(lexer): Add diagnostic and impl error recovering

---
 Cargo.lock             |  81 ++++++++++++
 Cargo.toml             |   1 +
 src/ast/mod.rs         |   1 +
 src/ast/types.rs       |  75 +++++++++++
 src/diagnostic/mod.rs  |  94 ++++++++++++++
 src/diagnostic/span.rs |   6 +
 src/err.rs             |   9 ++
 src/frontend/err.rs    |  28 +++++
 src/frontend/lexer.rs  | 273 +++++++++++++++++++++++++----------------
 src/frontend/mod.rs    |   4 +-
 src/frontend/types.rs  |  71 +++++++++--
 src/main.rs            |   2 +
 12 files changed, 525 insertions(+), 120 deletions(-)
 create mode 100644 src/ast/mod.rs
 create mode 100644 src/ast/types.rs
 create mode 100644 src/diagnostic/mod.rs
 create mode 100644 src/diagnostic/span.rs
 create mode 100644 src/err.rs
 create mode 100644 src/frontend/err.rs

diff --git a/Cargo.lock b/Cargo.lock
index 1fd0f62..db97c61 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -17,6 +17,17 @@ version = "1.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
 
+[[package]]
+name = "codespan-reporting"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "af491d569909a7e4dee0ad7db7f5341fef5c614d5b8ec8cf765732aba3cff681"
+dependencies = [
+ "serde",
+ "termcolor",
+ "unicode-width",
+]
+
 [[package]]
 name = "heck"
 version = "0.5.0"
@@ -153,12 +164,43 @@ checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a"
 name = "rusty-minic"
 version = "0.1.0"
 dependencies = [
+ "codespan-reporting",
  "num",
  "regex",
  "strum",
  "thiserror",
 ]
 
+[[package]]
+name = "serde"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
+dependencies = [
+ "serde_core",
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_core"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
 [[package]]
 name = "strum"
 version = "0.28.0"
@@ -191,6 +233,15 @@ dependencies = [
  "unicode-ident",
 ]
 
+[[package]]
+name = "termcolor"
+version = "1.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755"
+dependencies = [
+ "winapi-util",
+]
+
 [[package]]
 name = "thiserror"
 version = "2.0.18"
@@ -216,3 +267,33 @@ name = "unicode-ident"
 version = "1.0.24"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
+
+[[package]]
+name = "unicode-width"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254"
+
+[[package]]
+name = "winapi-util"
+version = "0.1.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
+dependencies = [
+ "windows-sys",
+]
+
+[[package]]
+name = "windows-link"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
+
+[[package]]
+name = "windows-sys"
+version = "0.61.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
+dependencies = [
+ "windows-link",
+]
diff --git a/Cargo.toml b/Cargo.toml
index e2c6a11..61533d4 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -4,6 +4,7 @@ version = "0.1.0"
 edition = "2024"
 
 [dependencies]
+codespan-reporting = "0.13.1"
 num = "0.4.3"
 regex = "1.12.3"
 strum = { version = "0.28.0", features = ["derive"] }
diff --git a/src/ast/mod.rs b/src/ast/mod.rs
new file mode 100644
index 0000000..cd40856
--- /dev/null
+++ b/src/ast/mod.rs
@@ -0,0 +1 @@
+pub mod types;
diff --git a/src/ast/types.rs b/src/ast/types.rs
new file mode 100644
index 0000000..dbe2293
--- /dev/null
+++ b/src/ast/types.rs
@@ -0,0 +1,75 @@
+#[derive(Debug, Clone, Copy)]
+pub struct Span {
+    start: usize,
+    end: usize,
+}
+pub struct CompileUnit {
+    pub global_decls: Vec<GlobalDeclStmt>,
+}
+pub enum GlobalDeclStmt {
+    VarDecl(VarDeclStmt),
+    FuncDecl(FuncDeclStmt),
+}
+
+pub struct VarDeclStmt {
+    pub name: String,
+    pub var_type: Type,
+    pub span: Span,
+}
+
+pub struct FuncDeclStmt {
+    pub name: String,
+    pub return_type: Type,
+    pub params: Vec<Param>,
+    pub body: BlockStmt,
+    pub span: Span,
+}
+pub struct BlockStmt {
+    pub statements: Vec<Statement>,
+    pub span: Span,
+}
+
+pub enum Statement {
+    Return(ReturnStmt),
+    Block(BlockStmt),
+    Expr(Expr),
+    VarDecl(VarDeclStmt),
+}
+pub struct ReturnStmt {
+    pub value: Option<Expr>,
+    pub span: Span,
+}
+pub struct Expr {
+    pub value: ExprValue,
+    pub span: Span,
+}
+pub enum ExprValue {
+    IntLit(i64),
+    Var(String),
+    BinaryOp {
+        lhs: Box<Expr>, 
+        op: BinaryOp, 
+        rhs: Box<Expr>
+    },
+    FuncCall(String, Vec<Expr>),
+    Assign {
+        lvalue: Box<Expr>, 
+        rvalue: Box<Expr>
+    },
+}
+
+pub enum BinaryOp {
+    Add, Sub, Mul, Div, Mod,
+    Equal, NotEqual, Less, LessEqual, Greater, GreaterEqual,
+}
+
+pub enum Type {
+    Int,
+    Void,
+}
+
+pub struct Param {
+    name: String,
+    param_type: Type,
+    span: Span,
+}
\ No newline at end of file
diff --git a/src/diagnostic/mod.rs b/src/diagnostic/mod.rs
new file mode 100644
index 0000000..685dbd6
--- /dev/null
+++ b/src/diagnostic/mod.rs
@@ -0,0 +1,94 @@
+use crate::{diagnostic::span::Span, err::CompileError, frontend::err::FrontendError};
+
+pub mod span;
+
+pub struct Diagnositics {
+    diagnostics: Vec<Diagnostic>,
+}
+pub enum DiagnosticLevel {
+    Error,
+    Warning,
+    Info,
+}
+pub struct Diagnostic {
+    level: DiagnosticLevel,
+    message: String,
+    span: Span
+}
+impl Diagnositics {
+    pub fn new() -> Self {
+        Self { diagnostics: vec![] }
+    }
+    pub fn add(&mut self, diagnostic: Diagnostic) {
+        self.diagnostics.push(diagnostic);
+    }
+    pub fn add_from_error(&mut self, error: impl Into<CompileError>, span: Span) {
+        self.diagnostics.push(Diagnostic {
+            level: DiagnosticLevel::Error,
+            message: Into::<CompileError>::into(error).to_string(),
+            span,
+        });
+    }
+    pub fn add_from_frontend_error(&mut self, error: impl Into<FrontendError>, span: Span) {
+        self.diagnostics.push(Diagnostic {
+            level: DiagnosticLevel::Error,
+            message: Into::<FrontendError>::into(error).to_string(),
+            span,
+        });
+    }
+    pub fn is_empty(&self) -> bool {
+        self.diagnostics.is_empty()
+    }
+    pub fn print(&self, name: &str, source: &str) {
+        use codespan_reporting::diagnostic::Diagnostic as CodespanDiagnostic;
+        use codespan_reporting::files::SimpleFile;
+        use codespan_reporting::diagnostic::{Severity, Label};
+        use std::io::IsTerminal;
+
+        use codespan_reporting::term::{self, termcolor::{ColorChoice, StandardStream}};
+        let mut choice = ColorChoice::Auto;
+        if !std::io::stdin().is_terminal() {
+            choice = ColorChoice::Never;
+        }
+        let stdout = StandardStream::stdout(choice);
+        let source_file = SimpleFile::new(name, source);
+        let output_config = codespan_reporting::term::Config::default();
+        for diagnostic in &self.diagnostics {
+            let output_level = match diagnostic.level {
+                DiagnosticLevel::Error => Severity::Error,
+                DiagnosticLevel::Warning => Severity::Warning,
+                DiagnosticLevel::Info => Severity::Note,
+            };
+            let output_diagnostic = CodespanDiagnostic::new(output_level)
+                .with_message(&diagnostic.message)
+                .with_label(
+                    Label::primary((), diagnostic.span.start..diagnostic.span.end)
+                );
+            term::emit_to_write_style(&mut stdout.lock(), &output_config, &source_file, &output_diagnostic);
+            
+        }
+            
+    }
+}
+#[cfg(test)]
+mod tests {
+    #[test]
+    fn test_diagnostics() {
+        use crate::diagnostic::{Diagnostic, DiagnosticLevel, Diagnositics};
+        let mut diagnostics = Diagnositics::new();
+        diagnostics.add(Diagnostic {
+            level: DiagnosticLevel::Error,
+            message: "test error".to_string(),
+            span: crate::diagnostic::span::Span { start: 0, end: 3 },
+        });
+        diagnostics.add(Diagnostic {
+            level: DiagnosticLevel::Error,
+            message: "test error".to_string(),
+            span: crate::diagnostic::span::Span { start: 16, end: 22 },
+        });
+        diagnostics.print("main.c", 
+r#"int main(){
+    return 1;
+}"#);
+    }
+}
\ No newline at end of file
diff --git a/src/diagnostic/span.rs b/src/diagnostic/span.rs
new file mode 100644
index 0000000..4a0820c
--- /dev/null
+++ b/src/diagnostic/span.rs
@@ -0,0 +1,6 @@
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub struct Span {
+    pub start: usize,
+    pub end: usize,
+}
\ No newline at end of file
diff --git a/src/err.rs b/src/err.rs
new file mode 100644
index 0000000..9f4869f
--- /dev/null
+++ b/src/err.rs
@@ -0,0 +1,9 @@
+use thiserror::Error;
+
+use crate::frontend::err::FrontendError;
+
+#[derive(Debug, Clone, PartialEq, Eq, Error)]
+pub enum CompileError {
+    #[error(transparent)]
+    Frontend(#[from] FrontendError),
+}
\ No newline at end of file
diff --git a/src/frontend/err.rs b/src/frontend/err.rs
new file mode 100644
index 0000000..c733c7b
--- /dev/null
+++ b/src/frontend/err.rs
@@ -0,0 +1,28 @@
+use thiserror::Error;
+
+// #[derive(Debug, Clone, PartialEq, Eq, Error)]
+// pub enum ParseError {
+//     BlockStmt(#[from] BlockStmtError)
+// }
+// #[derive(Debug, Clone, PartialEq, Eq, Error)]
+// pub enum BlockStmtError {
+//     MissingLBrace,
+//     MissingRBrace,
+// }
+#[derive(Debug, Clone, PartialEq, Eq, Error)]
+pub enum LexingError {
+    #[error("invalid int literal")]
+    InvalidIntLiteral,
+    #[error("invalid ident")]
+    InvalidIdent,
+    #[error("comment unterminated")]
+    UnterminatedComment,
+    #[error("unrecognized token: {0}")]
+    UnrecognizedToken(String),
+}
+#[derive(Debug, Clone, PartialEq, Eq, Error)]
+pub enum FrontendError {
+    #[error(transparent)]
+    Lexing(#[from] LexingError),
+    
+}
\ No newline at end of file
diff --git a/src/frontend/lexer.rs b/src/frontend/lexer.rs
index c7476da..c378ed7 100644
--- a/src/frontend/lexer.rs
+++ b/src/frontend/lexer.rs
@@ -1,21 +1,32 @@
-use std::{io::BufRead, iter::Peekable, str::FromStr};
+use std::{io::BufRead, str::FromStr};
 
+use codespan_reporting::diagnostic;
 use thiserror::Error;
 
-use crate::frontend::types::{Span, TokenValue, TypeIdent};
+use crate::{diagnostic::{Diagnositics, span::{self, Span}}, frontend::{err::LexingError, types::{TokenValue, TypeIdent}}};
 
 use super::types::Token;
 
 pub struct Lexer {
-    tokens: Vec<Token>,
-    errors: Vec<usize>, // every entry points to the index of unrecognized tokens
+    pub tokens: Vec<Token>,
+    pub diagnostics: Diagnositics,
+    old_char_count: usize,
+    block_comment_span: Option<Span>,
+    in_skip_line: bool
 }
 
 const WHITESPACE_CHARS: &[char] = &[' ', '\t', '\n', '\r'];
+const DELIMITER_CHARS: &[char] = &[
+    '+', '-', '*', '/', '%', '=', '!', '<', '>', '(', ')', ',', ';'
+];
 struct Cursor {
     chars: Vec<char>,
     pos: usize,
 }
+enum LexParseError {
+    NotMatched,
+    InvalidInMatch(LexingError)
+}
 impl Cursor {
     pub fn new(s: &str) -> Self {
         Self { chars: s.chars().collect(), pos: 0 }
@@ -47,20 +58,35 @@ impl Cursor {
         self.pos
     }
 }
+/// try parse using the giving function, return whether should continue
 fn try_parse_as(
-    f: fn(&mut Cursor) -> Option<TokenValue>,
+    f: fn(&mut Cursor) -> Result<TokenValue, LexParseError>,
     tokens: &mut Vec<Token>,
     str_iter: &mut Cursor,
-    line: &mut usize,
-    column: &mut usize,
+    diagnostics: &mut Diagnositics,
+    last_char_count: usize,
 ) -> bool {
-    let last_pos = str_iter.pos();
-    if let Some(token) = f(str_iter) {
-        let span = Span { line: *line, column: *column, length: str_iter.pos() - last_pos };
-        tokens.push(Token { value: token, span });
-        return true;
+    let last_pos = str_iter.pos() + last_char_count;
+    match f(str_iter) {
+        Ok(token_value) => {
+            let span = Span { start: last_pos, end: str_iter.pos() + last_char_count };
+            tokens.push(Token { value: token_value, span });
+            return true;
+        }
+        Err(LexParseError::NotMatched) => false,
+        Err(LexParseError::InvalidInMatch(err)) => {
+            // try recover from delimiter char or whitespace char
+            while let Some(c) = str_iter.peek() {
+                if DELIMITER_CHARS.contains(&c) || WHITESPACE_CHARS.contains(&c) {
+                    break;
+                }
+                str_iter.advance(1);
+            }
+            let span = Span { start: last_pos, end: str_iter.pos() + last_char_count };
+            diagnostics.add_from_frontend_error(err, span);
+            return true;
+        }
     }
-    false
 }
 macro_rules! if_true_then_continue {
     ($e: expr) => {
@@ -77,99 +103,106 @@ pub enum LexerError {
     TooManyErrors,
 }
 impl Lexer {
-    pub fn has_errors(&self) -> bool {
-        !self.errors.is_empty()
+    pub fn new() -> Self {
+        Self { tokens: vec![], diagnostics: Diagnositics::new(), old_char_count: 0, block_comment_span: None, in_skip_line: false }
     }
-    pub fn parse(reader: &mut impl BufRead) -> Result<Self, LexerError> {
-        let mut tokens = Vec::new();
-        let mut errors = Vec::new();
-        let mut line = 1;
-        let mut column = 1;
-        let mut in_block_comment = false;
-        for line_str in reader.lines() {
-            let line_str = line_str?;
-            let mut cursor = Cursor::new(&line_str);
-                loop {
-                    if let Some(c) = cursor.peek() {
-                        // check white space first, if it's white space, skip it and continue to the next character
-                        if WHITESPACE_CHARS.contains(&c) {
-                            column += 1;
-                            cursor.advance(1);
-                            continue;
-                        }
-                        // check comment
-                        match cursor.peek_multiple(2) {
-                            Some(['/', '/']) => {
-                                // skip the rest of the line
-                                line += 1;
-                                column = 1;
-                                break;
-                            }
-                            Some(['/', '*']) => {
-                                in_block_comment = true;
-                                cursor.advance(2);
-                                column += 2;
-                                continue;
-                            }
-                            Some(['*', '/']) => {
-                                in_block_comment = false;
-                                cursor.advance(2);
-                                column += 2;
-                                continue;
-                            }
-                            _ => {}
-                        }
-                    } else {
-                        break;
-                    }
-                    if in_block_comment {
-                        cursor.advance(1);
-                        column += 1;
-                    }
-                    if_true_then_continue!(try_parse_as(parse_litint, &mut tokens, &mut cursor, &mut line, &mut column));
-                    if_true_then_continue!(try_parse_as(parse_delimiter, &mut tokens, &mut cursor, &mut line, &mut column));
-                    if_true_then_continue!(try_parse_as(parse_puncuation, &mut tokens, &mut cursor, &mut line, &mut column));
-                    if_true_then_continue!(try_parse_as(parse_ident, &mut tokens, &mut cursor, &mut line, &mut column));
-                    // unrecognized token
-                    errors.push(tokens.len());
-                    let c = cursor.next().unwrap();
-                    tokens.push(Token {
-                        value: TokenValue::Unrecognized(c),
-                        span: Span { line, column, length: 1 },
-                    });
-                    if errors.len() > 20 {
-                        return Err(LexerError::TooManyErrors);
-                    }
-                    column += 1;
-                }
-            line += 1;
-            column = 1;
+    pub fn finish(mut self) -> (Vec<Token>, Diagnositics) {
+        if let Some(span) = self.block_comment_span.take() {
+            self.diagnostics.add_from_frontend_error(LexingError::UnterminatedComment, span);
         }
-        Ok(Self { tokens, errors })
+        (self.tokens, self.diagnostics)
+    }
+    /// call `parse_str` will continue to parse the input from current state
+    /// please also pass the whitespace to ensure the correct char position in diagnostics
+    pub fn parse_next_str(&mut self, s: &str) {
+        let mut cursor = Cursor::new(s);
+        loop {
+            if let Some(c) = cursor.peek() {
+                if self.in_skip_line && c != '\n' {
+                    cursor.advance(1);
+                    continue;
+                }
+                // check white space first, if it's white space, skip it and continue to the next character
+                if WHITESPACE_CHARS.contains(&c) {
+                    if c == '\n' {
+                        self.in_skip_line = false;
+                    }
+                    cursor.advance(1);
+                    continue;
+                }
+                // check comment
+                match cursor.peek_multiple(2) {
+                    Some(['/', '/']) => {
+                        // skip the rest of the line
+                        self.in_skip_line = true;
+                        cursor.advance(2);
+                        continue;
+                    }
+                    Some(['/', '*']) => {
+                        let start = cursor.pos() + self.old_char_count;
+                        self.block_comment_span = Some(Span { start, end: start + 2 });
+                        cursor.advance(2);
+                        continue;
+                    }
+                    Some(['*', '/']) => {
+                        self.block_comment_span = None;
+                        cursor.advance(2);
+                        continue;
+                    }
+                    _ => {}
+                }
+                if self.block_comment_span.is_some() {
+                    cursor.advance(1);
+                    continue;
+                }
+            } else {
+                break;
+            }
+            if_true_then_continue!(try_parse_as(parse_litint, &mut self.tokens, &mut cursor, &mut self.diagnostics, self.old_char_count));
+            if_true_then_continue!(try_parse_as(parse_delimiter, &mut self.tokens, &mut cursor, &mut self.diagnostics, self.old_char_count));
+            if_true_then_continue!(try_parse_as(parse_puncuation, &mut self.tokens, &mut cursor, &mut self.diagnostics, self.old_char_count));
+            if_true_then_continue!(try_parse_as(parse_ident, &mut self.tokens, &mut cursor, &mut self.diagnostics, self.old_char_count));
+            // unrecognized token
+            let last_pos = cursor.pos() + self.old_char_count;
+            let mut unrecognized = Vec::new();
+            while let Some(c) = cursor.peek() {
+                if DELIMITER_CHARS.contains(&c) || WHITESPACE_CHARS.contains(&c) {
+                    break;
+                }
+                unrecognized.push(c);
+                cursor.advance(1);
+            }
+            let span = Span { start: last_pos, end: cursor.pos() + self.old_char_count };
+            let unrecognized = unrecognized.into_iter().collect::<String>();
+            self.diagnostics.add_from_frontend_error(LexingError::UnrecognizedToken(unrecognized), span);
+            self.tokens.push(Token { value: TokenValue::Unrecognized, span });
+        }
+        self.old_char_count += s.len();
     }
 }
 fn parse_litint(
     str_iter: &mut Cursor,
-) -> Option<TokenValue> {
-    let mut c1 = str_iter.peek()?;
+) -> Result<TokenValue, LexParseError> {
+    let mut c1 = str_iter.peek().ok_or(LexParseError::NotMatched)?;
     // c1 is the peek value from here
     let mut sign_base: i64 = 1;
     let mut base: i64 = 10;
     if !(c1.is_ascii_digit() || c1 == '-') {
-        return None;
+        return Err(LexParseError::NotMatched);
     }
     if c1 == '-' {
         sign_base = -1;
         str_iter.advance(1);
-        c1 = str_iter.peek()?;
+        c1 = str_iter.peek().ok_or(LexParseError::NotMatched)?;
         if !c1.is_ascii_digit() {
             // only a minus sign, not a number
             // back one so cursor still points to the minus sign
             str_iter.back(1);
-            return None;
+            return Err(LexParseError::NotMatched);
         }
     }
     let mut number = 0i64;
+    let mut has_digits = false;
     if c1 == '0' {
         str_iter.advance(1);
         match str_iter.peek() {
@@ -181,12 +214,13 @@ fn parse_litint(
                 base = 8;
             }
             _ => {
+                has_digits = true;
                 // only zero
             }
         }
     }
     // from here, the cursor points to:
-    // 0x1234 -> cursor at 'x'
+    // 0x1234 -> cursor at '1'
     // 0123 -> cursor at '1'
     // 0 -> cursor at end
     // 1234 -> cursor at '1'
@@ -199,32 +233,42 @@ fn parse_litint(
             '0'..='9' if (c as u8 - b'0') < base as u8 => c as i64 - '0' as i64 ,
             'a'..='f' if base == 16 => c as i64 - 'a' as i64 + 10 ,
             'A'..='F' if base == 16 => c as i64 - 'A' as i64 + 10,
-            _ => break,
+            c => if WHITESPACE_CHARS.contains(&c) || DELIMITER_CHARS.contains(&c) {
+                break;
+            } else {
+                // unrecognized character in number literal
+                return Err(LexParseError::InvalidInMatch(LexingError::InvalidIntLiteral));
+            }
         };
+        has_digits = true;
         number = number * base + digit;
         str_iter.advance(1);
     }
+    if !has_digits {
+        // No valid digits found, add a diagnostic
+        return Err(LexParseError::InvalidInMatch(LexingError::InvalidIntLiteral));
+    }
     number *= sign_base;
-    Some(TokenValue::IntLit(number))
+    Ok(TokenValue::IntLit(number))
 }
 
 fn parse_delimiter(
     str_iter: &mut Cursor,
-) -> Option<TokenValue> {
-    let c = str_iter.peek()?;
+) -> Result<TokenValue, LexParseError> {
+    let c = str_iter.peek().ok_or(LexParseError::NotMatched)?;
     let token_value = match c {
         '(' => TokenValue::LParen,
         ')' => TokenValue::RParen,
         '{' => TokenValue::LBrace,
         '}' => TokenValue::RBrace,
-        _ => return None,
+        _ => return Err(LexParseError::NotMatched),
     };
     str_iter.advance(1);
-    Some(token_value)
+    Ok(token_value)
 }
 fn parse_puncuation(
     str_iter: &mut Cursor,
-) -> Option<TokenValue> {
+) -> Result<TokenValue, LexParseError> {
     let get_value_by_next_char = 
         |str_iter: &mut Cursor, not_equal_value: TokenValue, equal_value: TokenValue| {
             str_iter.advance(1);
@@ -235,7 +279,7 @@ fn parse_puncuation(
                 not_equal_value
             }
     };
-    let c = str_iter.peek()?;
+    let c = str_iter.peek().ok_or(LexParseError::NotMatched)?;
     let token_value = match c {
         '+' => TokenValue::Plus,
         '-' => TokenValue::Minus,
@@ -249,9 +293,7 @@ fn parse_puncuation(
             if let Some('=') = str_iter.peek() {
                 TokenValue::NotEqual
             } else {
-                // only '!' is not a valid token, back one so cursor still points to '!'
-                str_iter.back(1);
-                return None;
+                TokenValue::Not
             }
         },
         '<' => get_value_by_next_char(str_iter, TokenValue::Less, TokenValue::LessEqual),
@@ -260,33 +302,35 @@ fn parse_puncuation(
         ',' => TokenValue::Comma,
         ';' => TokenValue::Semicolon,
 
-        _ => return None,
+        _ => return Err(LexParseError::NotMatched),
     };
     str_iter.advance(1);
-    Some(token_value)
+    Ok(token_value)
 }
 
 fn parse_ident(
     str_iter: &mut Cursor,
-) -> Option<TokenValue> {
-    let c = str_iter.peek()?;
+) -> Result<TokenValue, LexParseError> {
+    let c = str_iter.peek().ok_or(LexParseError::NotMatched)?;
     if !c.is_ascii_alphabetic() && c != '_' {
-        return None;
+        return Err(LexParseError::NotMatched);
     }
     let mut name = Vec::new();
     while let Some(c) = str_iter.peek() {
         if c.is_ascii_alphanumeric() || c == '_' {
             name.push(c);
             str_iter.advance(1);
-        } else {
+        } else if DELIMITER_CHARS.contains(&c) || WHITESPACE_CHARS.contains(&c) {
             break;
+        } else {
+            return Err(LexParseError::InvalidInMatch(LexingError::InvalidIdent));
         }
     }
     let name = name.into_iter().collect::<String>();
     if let Some(type_ident) = TypeIdent::from_str(&name).ok() {
-        return Some(TokenValue::TypeIdent(type_ident));
+        return Ok(TokenValue::TypeIdent(type_ident));
     }
-    Some(TokenValue::Ident(name))
+    Ok(TokenValue::Ident(name))
 }
 #[cfg(test)]
 mod tests {
@@ -303,11 +347,22 @@ mod tests {
         for case_no in case_sequence {
             let case_path = case_list.get_case_path(case_no).unwrap();
             println!("{}", case_path.display());
-            let file = File::open(case_path).unwrap();
+            let file = File::open(&case_path).unwrap();
             let mut buf_reader = std::io::BufReader::new(file);
-            let lexer = Lexer::parse(&mut buf_reader).unwrap();
-            if lexer.has_errors() {
-                eprintln!("Case {} has error", case_list.get_case_name(case_no).unwrap());
+            let mut lexer = Lexer::new();
+            let mut full_text = String::new();
+            loop {
+                let mut line = String::new();
+                let bytes_read = buf_reader.read_line(&mut line).unwrap();
+                if bytes_read == 0 {
+                    break;
+                }
+                full_text.push_str(&line);
+                lexer.parse_next_str(&line);
+            }
+            let (_tokens, diagnostics) = lexer.finish();
+            if !diagnostics.is_empty() {
+                diagnostics.print(&format!("{}", case_path.display()), &full_text);
                 error_case_cnt += 1;
             }
         }
diff --git a/src/frontend/mod.rs b/src/frontend/mod.rs
index c7108a2..a82d1d8 100644
--- a/src/frontend/mod.rs
+++ b/src/frontend/mod.rs
@@ -1,2 +1,4 @@
 pub mod types;
-mod lexer;
\ No newline at end of file
+mod lexer;
+// pub mod parser;
+pub mod err;
\ No newline at end of file
diff --git a/src/frontend/types.rs b/src/frontend/types.rs
index 1899d24..b82fd6b 100644
--- a/src/frontend/types.rs
+++ b/src/frontend/types.rs
@@ -1,4 +1,6 @@
-use strum::EnumString;
+use strum::{AsRefStr, EnumString};
+
+use crate::diagnostic::span::Span;
 
 
 #[derive(Debug, Clone, PartialEq, Eq)]
@@ -7,12 +9,6 @@ pub struct Token {
     pub span: Span,
 }
 
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub struct Span {
-    pub line: usize,
-    pub column: usize,
-    pub length: usize,
-}
 
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub enum TokenValue {
@@ -20,6 +16,61 @@ pub enum TokenValue {
     Ident(String),
     TypeIdent(TypeIdent),
     
+    Plus, Minus, Star, Slash, Percent,
+    Equal, DoubleEqual, Not, NotEqual, Less, LessEqual, Greater, GreaterEqual,
+
+    LParen, RParen,
+    LBrace, RBrace,
+    Comma, Semicolon,
+
+    If, Else, While, Return, Break, Continue,
+
+    // Eof,
+    Unrecognized,
+}
+impl std::fmt::Display for TokenValue {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            TokenValue::IntLit(i) => write!(f, "literal int: {}", i),
+            TokenValue::Ident(s) => write!(f, "identifier: {}", s),
+            TokenValue::TypeIdent(t) => write!(f, "type {}", t.as_ref()),
+            TokenValue::Plus => write!(f, "+"),
+            TokenValue::Minus => write!(f, "-"),
+            TokenValue::Star => write!(f, "*"),
+            TokenValue::Slash => write!(f, "/"),
+            TokenValue::Percent => write!(f, "%"),
+            TokenValue::Equal => write!(f, "="),
+            TokenValue::DoubleEqual => write!(f, "=="),
+            TokenValue::Not => write!(f, "!"),
+            TokenValue::NotEqual => write!(f, "!="),
+            TokenValue::Less => write!(f, "<"),
+            TokenValue::LessEqual => write!(f, "<="),
+            TokenValue::Greater => write!(f, ">"),
+            TokenValue::GreaterEqual => write!(f, ">="),
+            TokenValue::LParen => write!(f, "("),
+            TokenValue::RParen => write!(f, ")"),
+            TokenValue::LBrace => write!(f, "{{"),
+            TokenValue::RBrace => write!(f, "}}"),
+            TokenValue::Comma => write!(f, ","),
+            TokenValue::Semicolon => write!(f, ";"),
+            TokenValue::If => write!(f, "if"),
+            TokenValue::Else => write!(f, "else"),
+            TokenValue::While => write!(f, "while"),
+            TokenValue::Return => write!(f, "return"),
+            TokenValue::Break => write!(f, "break"),
+            TokenValue::Continue => write!(f, "continue"),
+            // TokenValue::Eof => write!(f, "<EOF>"),
+            TokenValue::Unrecognized => write!(f, "unrecognized"),
+        }
+    }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum TokenKind {
+    IntLit,
+    Ident,
+    TypeIdent,
+    
     Plus, Minus, Star, Slash, Percent,
     Equal, DoubleEqual, NotEqual, Less, LessEqual, Greater, GreaterEqual,
 
@@ -29,10 +80,10 @@ pub enum TokenValue {
 
     If, Else, While, Return, Break, Continue,
 
-    Eof,
-    Unrecognized(char),
+    // Eof,
+    Unrecognized,
 }
-#[derive(Debug, Clone, PartialEq, Eq, EnumString)]
+#[derive(Debug, Clone, PartialEq, Eq, EnumString, AsRefStr)]
 pub enum TypeIdent {
     #[strum(serialize = "int")]
     Int,
diff --git a/src/main.rs b/src/main.rs
index 1f94806..9540caf 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,6 +1,8 @@
 mod frontend;
 mod ast;
 mod utils;
+mod diagnostic;
+mod err;
 fn main() {
     println!("Hello, world!");
 }