From 3cb698cc5dcca5de0abc5b8eab0203ee90ca08a8 Mon Sep 17 00:00:00 2001 From: Hydrostic Date: Sat, 9 May 2026 11:19:50 +0800 Subject: [PATCH 1/3] fix(util): x-y case not correctly handled --- src/utils/num_sequence.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/num_sequence.rs b/src/utils/num_sequence.rs index 9666b04..bae5a89 100644 --- a/src/utils/num_sequence.rs +++ b/src/utils/num_sequence.rs @@ -22,7 +22,7 @@ impl NumberSequence { } else if let Some((start_str, end_str)) = group.split_once('-') { if let (Ok(start), Ok(end)) = (start_str.parse::(), end_str.parse::()) { - ranges.push((start, end)); + ranges.push((start, end + T::one())); } else { return None; } From 567057fd7695e0ff6be65740706ec7c14ad11177 Mon Sep 17 00:00:00 2001 From: Hydrostic Date: Sat, 9 May 2026 11:20:26 +0800 Subject: [PATCH 2/3] feat(parser): Impl parser for basic functionality --- src/ast/types.rs | 55 +++- src/diagnostic/span.rs | 9 + src/frontend/err.rs | 14 +- src/frontend/mod.rs | 2 +- src/frontend/parser.rs | 708 +++++++++++++++++++++++++++++++++++++++++ src/frontend/types.rs | 60 ++-- 6 files changed, 815 insertions(+), 33 deletions(-) create mode 100644 src/frontend/parser.rs diff --git a/src/ast/types.rs b/src/ast/types.rs index dbe2293..6c15b08 100644 --- a/src/ast/types.rs +++ b/src/ast/types.rs @@ -1,8 +1,5 @@ -#[derive(Debug, Clone, Copy)] -pub struct Span { - start: usize, - end: usize, -} +use crate::{diagnostic::span::Span, frontend::types::{TokenValue, TypeIdent}}; + pub struct CompileUnit { pub global_decls: Vec, } @@ -12,9 +9,14 @@ pub enum GlobalDeclStmt { } pub struct VarDeclStmt { + pub values: Vec, + pub span: Span, +} +pub struct VarDeclStmtValue { pub name: String, pub var_type: Type, pub span: Span, + } pub struct FuncDeclStmt { @@ -35,6 +37,16 @@ pub enum Statement { Expr(Expr), VarDecl(VarDeclStmt), } +impl Statement { + pub fn span(&self) -> Span { + match self { + Statement::Return(s) => s.span, + Statement::Block(s) => s.span, + Statement::Expr(s) => s.span, + Statement::VarDecl(s) => s.span, + } + } +} pub struct ReturnStmt { pub value: Option, pub span: Span, @@ -63,13 +75,38 @@ pub enum BinaryOp { Equal, NotEqual, Less, LessEqual, Greater, GreaterEqual, } +impl BinaryOp { + pub fn from_token_value(token_value: &TokenValue) -> Option { + match token_value { + TokenValue::Plus => Some(BinaryOp::Add), + TokenValue::Minus => Some(BinaryOp::Sub), + TokenValue::Star => Some(BinaryOp::Mul), + TokenValue::Slash => Some(BinaryOp::Div), + TokenValue::Percent => Some(BinaryOp::Mod), + TokenValue::DoubleEqual => Some(BinaryOp::Equal), + TokenValue::NotEqual => Some(BinaryOp::NotEqual), + TokenValue::Less => Some(BinaryOp::Less), + TokenValue::LessEqual => Some(BinaryOp::LessEqual), + TokenValue::Greater => Some(BinaryOp::Greater), + TokenValue::GreaterEqual => Some(BinaryOp::GreaterEqual), + _ => None, + } + } +} pub enum Type { Int, Void, } - +impl From for Type { + fn from(value: TypeIdent) -> Self { + match value { + TypeIdent::Int => Type::Int, + TypeIdent::Void => Type::Void, + } + } +} pub struct Param { - name: String, - param_type: Type, - span: Span, + pub name: String, + pub param_type: Type, + pub span: Span, } \ No newline at end of file diff --git a/src/diagnostic/span.rs b/src/diagnostic/span.rs index 4a0820c..09ce7c8 100644 --- a/src/diagnostic/span.rs +++ b/src/diagnostic/span.rs @@ -3,4 +3,13 @@ pub struct Span { pub start: usize, pub end: usize, +} +impl Span { + pub fn from_two(start: Span, end: Span) -> Self { + assert!(start.start <= end.end); + Self { + start: start.start, + end: end.end, + } + } } \ No newline at end of file diff --git a/src/frontend/err.rs b/src/frontend/err.rs index c733c7b..523e35a 100644 --- a/src/frontend/err.rs +++ b/src/frontend/err.rs @@ -1,5 +1,7 @@ use thiserror::Error; +use crate::frontend::types::{Token, TokenValue}; + // #[derive(Debug, Clone, PartialEq, Eq, Error)] // pub enum ParseError { // BlockStmt(#[from] BlockStmtError) @@ -21,8 +23,18 @@ pub enum LexingError { UnrecognizedToken(String), } #[derive(Debug, Clone, PartialEq, Eq, Error)] +pub enum ParseError { + #[error("unexpected token {}, expect {}", .0, .1)] + UnexpectedToken(TokenValue, &'static str), + #[error("cannot combine with previous {}", .0)] + CantCombineWith(TokenValue), + #[error("expect {0}")] + ExpectButEof(&'static str), +} +#[derive(Debug, Clone, PartialEq, Eq, Error)] pub enum FrontendError { #[error(transparent)] Lexing(#[from] LexingError), - + #[error(transparent)] + Parse(#[from] ParseError), } \ No newline at end of file diff --git a/src/frontend/mod.rs b/src/frontend/mod.rs index a82d1d8..565d9e9 100644 --- a/src/frontend/mod.rs +++ b/src/frontend/mod.rs @@ -1,4 +1,4 @@ pub mod types; mod lexer; -// pub mod parser; +pub mod parser; pub mod err; \ No newline at end of file diff --git a/src/frontend/parser.rs b/src/frontend/parser.rs new file mode 100644 index 0000000..c96ec70 --- /dev/null +++ b/src/frontend/parser.rs @@ -0,0 +1,708 @@ +use crate::{ + ast::types::{ + BinaryOp, BlockStmt, CompileUnit, Expr, ExprValue, FuncDeclStmt, GlobalDeclStmt, Param, + ReturnStmt, Statement, VarDeclStmt, VarDeclStmtValue, + }, + diagnostic::{Diagnositics, span::Span}, + frontend::{ + err::ParseError, + types::{Token, TokenValue, TypeIdent}, + }, +}; + +pub struct Parser { + tokens: Vec, + diagnostics: Diagnositics, + pos: usize, +} +#[derive(Debug, Clone, PartialEq, Eq)] +enum ParseType { + MustParse, + TryParse, +} +// const FUNC_OR_VAR_DECL_AT_TOP_LEVEL: &str = "function or variable declaration at top level"; +// const PARAM_DECL: &str = "parameter declaration"; +// const BODY_DECL: &str = "function body"; +impl Parser { + pub fn new(tokens: Vec, diagnostics: Diagnositics) -> Self { + Self { + tokens, + diagnostics, + pos: 0, + } + } + pub fn parse(&mut self) {} + fn peek(&self) -> Option<&Token> { + self.tokens.get(self.pos) + } + fn next(&mut self) -> Option<&Token> { + let token = self.tokens.get(self.pos); + if token.is_some() { + self.pos += 1; + } + token + } + fn advance(&mut self, n: usize) { + self.pos += n; + assert!(self.pos <= self.tokens.len()); + } + fn back(&mut self, n: usize) { + assert!(self.pos >= n); + self.pos -= n; + } + fn parse_compile_unit(&mut self) -> CompileUnit { + let mut global_decls = vec![]; + while self.peek().is_some() { + if let Some(decl) = self.parse_global_decl_stmt() { + global_decls.push(decl); + } + } + CompileUnit { global_decls } + } + + fn parse_global_decl_stmt(&mut self) -> Option { + assert!(self.peek().is_some()); + if let Some(func_decl) = self.parse_func_decl_stmt() { + return Some(GlobalDeclStmt::FuncDecl(func_decl)); + } + if let Some(var_decl) = self.parse_var_decl_stmt() { + return Some(GlobalDeclStmt::VarDecl(var_decl)); + } + let token = self.next().unwrap().clone(); + self.diagnostics.add_from_frontend_error( + ParseError::UnexpectedToken(token.value, "function or variable declaration at top level"), + token.span, + ); + None + } + fn parse_type_and_name(&mut self) -> Option<(TypeIdent, String, Span)> { + assert!(self.peek().is_some()); + let start_span = self.peek().unwrap().span; + let type_ident = self.peek().unwrap().value.as_type_ident()?; + self.advance(1); + let name = match self.peek().map(|t| t.value.as_ident()).flatten() { + None => { + let span = self.next().unwrap().span; + self.diagnostics.add_from_frontend_error( + ParseError::CantCombineWith(TokenValue::TypeIdent(type_ident)), + span, + ); + return None; + } + Some(ident) => ident, + }; + let end_span = self.peek().unwrap().span; + self.advance(1); + Some((type_ident, name, Span::from_two(start_span, end_span))) + } + fn parse_func_decl_stmt(&mut self) -> Option { + assert!(self.peek().is_some()); + let start_span = self.peek().unwrap().span; + let (return_type, name, _) = self.parse_type_and_name()?; + if self + .peek() + .is_some_and(|t| matches!(t.value, TokenValue::LParen)) + { + self.advance(1); + } else { + self.back(2); + return None; + } + // from here we can be sure it's a function declaration, so we can report error if the syntax is wrong + let params = match self.peek().map(|t| &t.value) { + Some(TokenValue::RParen) => { + self.advance(1); + vec![] + } + Some(_) => self.parse_param_list()?, + None => { + let span = self.next().unwrap().span; + self.diagnostics + .add_from_frontend_error(ParseError::ExpectButEof("parameter declaration"), span); + return None; + } + }; + let body = match self.peek().map(|t| &t.value) { + Some(_) => self.parse_block_stmt(ParseType::MustParse)?, + None => { + let span = self.next().unwrap().span; + self.diagnostics + .add_from_frontend_error(ParseError::ExpectButEof("function body"), span); + return None; + } + }; + let end_span = body.span; + Some(FuncDeclStmt { + return_type: return_type.into(), + name, + params, + body, + span: Span::from_two(start_span, end_span), + }) + } + fn parse_param_list(&mut self) -> Option> { + assert!(self.peek().is_some()); + let mut params = vec![]; + + while self.peek().is_some() { + if !params.is_empty() { + if self.peek().map(|t| &t.value) == Some(&TokenValue::Comma) { + self.advance(1); + } else { + let token = self.next().unwrap().clone(); + self.diagnostics.add_from_frontend_error( + ParseError::UnexpectedToken(token.value, "`,`"), + token.span, + ); + return None; + } + } + if let Some(param) = self.parse_param() { + params.push(param); + } else { + return None; + } + } + Some(params) + } + fn parse_param(&mut self) -> Option { + assert!(self.peek().is_some()); + let (param_type, name, span) = self.parse_type_and_name()?; + Some(Param { + param_type: param_type.into(), + name, + span, + }) + } + fn must_match_semicolon(&mut self) -> Option<()> { + if self + .peek() + .is_some_and(|t| matches!(t.value, TokenValue::Semicolon)) + { + self.advance(1); + Some(()) + } else { + let token = self.next().unwrap().clone(); + self.diagnostics + .add_from_frontend_error(ParseError::UnexpectedToken(token.value, "`;`"), token.span); + while let Some(t) = self.peek() { + if matches!(t.value, TokenValue::Semicolon) { + self.advance(1); + break; + } + if matches!(t.value, TokenValue::RBrace) { + break; + } + self.advance(1); + } + None + } + } + fn parse_var_decl_stmt(&mut self) -> Option { + assert!(self.peek().is_some()); + let mut values = vec![]; + let (var_type, name, span) = self.parse_type_and_name()?; + values.push(VarDeclStmtValue { name, var_type: var_type.into(), span }); + while let Some(t) = self.peek() { + if matches!(t.value, TokenValue::Semicolon) { + break; + } + if matches!(t.value, TokenValue::Comma) { + self.advance(1); + if let Some(ident) = self.peek().map(|t| t.value.as_ident()).flatten() { + let span = self.peek().unwrap().span; + self.advance(1); + values.push(VarDeclStmtValue { name: ident, var_type: var_type.into(), span }); + } else { + let token = self.next().unwrap().clone(); + self.diagnostics.add_from_frontend_error( + ParseError::CantCombineWith(TokenValue::TypeIdent(var_type)), + token.span, + ); + return None; + } + continue; + } + let token = self.next().unwrap().clone(); + self.diagnostics.add_from_frontend_error( + ParseError::UnexpectedToken(token.value, "variable declaration"), + token.span, + ); + } + self.must_match_semicolon()?; + let span = Span::from_two(values.first().unwrap().span, values.last().unwrap().span); + Some(VarDeclStmt { values, span }) + } + + fn parse_block_stmt(&mut self, parse_type: ParseType) -> Option { + assert!(self.peek().is_some()); + let start_span = self.peek().unwrap().span; + if !self + .peek() + .map(|t| matches!(t.value, TokenValue::LBrace)) + .unwrap_or(false) + { + if parse_type == ParseType::MustParse { + let token = self.next().unwrap().clone(); + self.diagnostics.add_from_frontend_error( + ParseError::UnexpectedToken(token.value, "`{`"), + token.span, + ); + } + return None; + } + self.advance(1); + let mut statements = vec![]; + println!("parse block stmt"); + loop { + if self.peek().is_none() { + let span = self.next().unwrap().span; + self.diagnostics + .add_from_frontend_error(ParseError::ExpectButEof("`}`"), span); + return None; + } + if self + .peek() + .map(|t| matches!(t.value, TokenValue::Semicolon)) + .unwrap() + { + self.advance(1); + continue; + } + if self + .peek() + .map(|t| matches!(t.value, TokenValue::RBrace)) + .unwrap() + { + self.advance(1); + break; + } + // parse statement here + statements.push(self.parse_stmt()?); + } + println!("finish parse block stmt"); + let end_span = statements.last().map(|s| s.span()).unwrap_or(start_span); + Some(BlockStmt { + statements, + span: Span::from_two(start_span, end_span), + }) + } + + fn parse_stmt(&mut self) -> Option { + assert!(self.peek().is_some()); + if let Some(var_decl) = self.parse_var_decl_stmt() { + return Some(Statement::VarDecl(var_decl)); + } + if let Some(return_stmt) = self.parse_return_stmt() { + return Some(Statement::Return(return_stmt)); + } + if let Some(block_stmt) = self.parse_block_stmt(ParseType::TryParse) { + return Some(Statement::Block(block_stmt)); + } + if let Some(expr) = self.parse_expr() { + return Some(Statement::Expr(expr)); + } + let token = self.next().unwrap().clone(); + self.diagnostics.add_from_frontend_error( + ParseError::UnexpectedToken(token.value, "statement"), + token.span, + ); + while let Some(t) = self.peek() { + if matches!(t.value, TokenValue::Semicolon) { + self.advance(1); + break; + } + if matches!(t.value, TokenValue::RBrace) { + break; + } + self.advance(1); + } + None + } + + fn parse_return_stmt(&mut self) -> Option { + assert!(self.peek().is_some()); + let start_span = self.peek().unwrap().span; + if !self + .peek() + .map(|t| matches!(t.value, TokenValue::Return)) + .unwrap_or(false) + { + return None; + } + self.advance(1); + let value = if self + .peek() + .map(|t| matches!(t.value, TokenValue::Semicolon)) + .unwrap_or(false) + { + None + } else { + Some(self.parse_expr()?) + }; + self.must_match_semicolon()?; + let end_span = self.peek().unwrap().span; + Some(ReturnStmt { + value, + span: Span::from_two(start_span, end_span), + }) + } + // fn parse_expr_tail(&mut self, left: Expr) -> Option { + // match self.peek() { + // None => Some(left), + // Some(t1) => { + // // TODO: add delimiter judge to support better error recovery + // let op = BinaryOp::from_token_value(&t1.value)?; + // match op { + // BinaryOp::Add + // } + // let right = self.parse_term()?; + // let expr = Expr { value: ExprValue::BinaryOp { lhs: Box::new(left), op, rhs: Box::new(right) }, span: Span::from_two(left.span, right.span) }; + // self.parse_expr_tail(expr) + // } + // } + // } + fn parse_primary(&mut self) -> Option { + assert!(self.peek().is_some()); + let token = self.next().unwrap().clone(); + match token.value { + TokenValue::Ident(name) => Some(Expr { + value: ExprValue::Var(name), + span: token.span, + }), + TokenValue::IntLit(value) => Some(Expr { + value: ExprValue::IntLit(value), + span: token.span, + }), + TokenValue::LParen => { + let expr = match self.peek() { + Some(_) => self.parse_expr()?, + None => { + self.diagnostics.add_from_frontend_error( + ParseError::ExpectButEof("expression"), + token.span, + ); + return None; + } + }; + match self.peek() { + Some(t) if matches!(t.value, TokenValue::RParen) => { + let end_span = t.span; + self.advance(1); + Some(Expr { + span: Span::from_two(token.span, end_span), + ..expr + }) + } + Some(_) => { + let token = self.next().unwrap().clone(); + self.diagnostics.add_from_frontend_error( + ParseError::UnexpectedToken(token.value, "`)`"), + token.span, + ); + None + } + None => { + self.diagnostics + .add_from_frontend_error(ParseError::ExpectButEof("`)`"), expr.span); + None + } + } + } + _ => { + self.diagnostics.add_from_frontend_error( + ParseError::UnexpectedToken(token.value, "expression"), + token.span, + ); + None + } + } + } + fn parse_unary(&mut self) -> Option { + assert!(self.peek().is_some()); + let token = self.peek().unwrap().clone(); + match token.value { + TokenValue::Plus => { + self.advance(1); + let expr = match self.peek() { + Some(_) => self.parse_unary()?, + None => { + self.diagnostics.add_from_frontend_error( + ParseError::ExpectButEof("expression"), + token.span, + ); + return None; + } + }; + Some(Expr { + span: Span::from_two(token.span, expr.span), + ..expr + }) + } + TokenValue::Minus => { + self.advance(1); + let rhs = match self.peek() { + Some(_) => self.parse_unary()?, + None => { + self.diagnostics.add_from_frontend_error( + ParseError::ExpectButEof("expression"), + token.span, + ); + return None; + } + }; + let lhs = Expr { + value: ExprValue::IntLit(0), + span: token.span, + }; + let span = Span::from_two(token.span, rhs.span); + Some(Expr { + value: ExprValue::BinaryOp { + lhs: Box::new(lhs), + op: BinaryOp::Sub, + rhs: Box::new(rhs), + }, + span, + }) + } + _ => self.parse_primary(), + } + } + fn parse_multiplicative(&mut self) -> Option { + assert!(self.peek().is_some()); + let mut left = self.parse_unary()?; + while let Some(t) = self.peek() { + let op = match t.value { + TokenValue::Star => BinaryOp::Mul, + TokenValue::Slash => BinaryOp::Div, + TokenValue::Percent => BinaryOp::Mod, + _ => break, + }; + self.advance(1); + let right = match self.peek() { + Some(_) => self.parse_unary()?, + None => { + self.diagnostics + .add_from_frontend_error(ParseError::ExpectButEof("expression"), left.span); + return None; + } + }; + let span = Span::from_two(left.span, right.span); + left = Expr { + value: ExprValue::BinaryOp { + lhs: Box::new(left), + op, + rhs: Box::new(right), + }, + span, + }; + } + Some(left) + } + fn parse_additive(&mut self) -> Option { + assert!(self.peek().is_some()); + let mut left = self.parse_multiplicative()?; + while let Some(t) = self.peek() { + let op = match t.value { + TokenValue::Plus => BinaryOp::Add, + TokenValue::Minus => BinaryOp::Sub, + _ => break, + }; + self.advance(1); + let right = match self.peek() { + Some(_) => self.parse_multiplicative()?, + None => { + self.diagnostics + .add_from_frontend_error(ParseError::ExpectButEof("expression"), left.span); + return None; + } + }; + let span = Span::from_two(left.span, right.span); + left = Expr { + value: ExprValue::BinaryOp { + lhs: Box::new(left), + op, + rhs: Box::new(right), + }, + span, + }; + } + Some(left) + } + fn parse_relational(&mut self) -> Option { + assert!(self.peek().is_some()); + let mut left = self.parse_additive()?; + while let Some(t) = self.peek() { + let op = match t.value { + TokenValue::Less => BinaryOp::Less, + TokenValue::Greater => BinaryOp::Greater, + TokenValue::LessEqual => BinaryOp::LessEqual, + TokenValue::GreaterEqual => BinaryOp::GreaterEqual, + TokenValue::DoubleEqual => BinaryOp::Equal, + TokenValue::NotEqual => BinaryOp::NotEqual, + _ => break, + }; + self.advance(1); + let right = match self.peek() { + Some(_) => self.parse_additive()?, + None => { + self.diagnostics + .add_from_frontend_error(ParseError::ExpectButEof("expression"), left.span); + return None; + } + }; + let span = Span::from_two(left.span, right.span); + left = Expr { + value: ExprValue::BinaryOp { + lhs: Box::new(left), + op, + rhs: Box::new(right), + }, + span, + }; + } + Some(left) + } + fn parse_assign(&mut self) -> Option { + assert!(self.peek().is_some()); + let is_assign = matches!( + (self.tokens.get(self.pos), self.tokens.get(self.pos + 1)), + ( + Some(Token { + value: TokenValue::Ident(_), + .. + }), + Some(Token { + value: TokenValue::Equal, + .. + }) + ) + ); + if !is_assign { + return self.parse_relational(); + } + + let lvalue_token = self.next().unwrap().clone(); + let name = lvalue_token.value.as_ident().unwrap(); + self.advance(1); + let rvalue = match self.peek() { + Some(_) => self.parse_assign()?, + None => { + self.diagnostics.add_from_frontend_error( + ParseError::ExpectButEof("expression"), + lvalue_token.span, + ); + return None; + } + }; + let lvalue = Expr { + value: ExprValue::Var(name), + span: lvalue_token.span, + }; + let span = Span::from_two(lvalue.span, rvalue.span); + Some(Expr { + value: ExprValue::Assign { + lvalue: Box::new(lvalue), + rvalue: Box::new(rvalue), + }, + span, + }) + } + /* + expr + := assign + + assign + := relational + | IDENT "=" assign + + relational + := additive + | relational "<" additive + | relational ">" additive + | relational "<=" additive + | relational ">=" additive + + additive + := multiplicative + | additive "+" multiplicative + | additive "-" multiplicative + + multiplicative + := unary + | multiplicative "*" unary + | multiplicative "/" unary + + unary + := primary + | "+" unary + | "-" unary + + primary + := IDENT + | NUMBER + | "(" expr ")" + */ + fn parse_expr(&mut self) -> Option { + assert!(self.peek().is_some()); + self.parse_assign() + } +} + +#[cfg(test)] +mod tests { + use std::io::BufRead; + use std::path::Path; + use std::fs::File; + use crate::frontend::lexer::Lexer; + use crate::utils::case_list::CaseList; + use crate::utils::num_sequence::NumberSequence; + + pub use super::*; + fn test_case(case_str: &str) { + let case_sequence = NumberSequence::from_str(case_str).unwrap(); + let case_list = CaseList::from_dir(&Path::new("./testcases")).unwrap(); + let mut error_case_cnt = 0; + for case_no in case_sequence { + let case_path = case_list.get_case_path(case_no).unwrap(); + println!("{}", case_path.display()); + let file = File::open(&case_path).unwrap(); + let mut buf_reader = std::io::BufReader::new(file); + let mut lexer = Lexer::new(); + let mut full_text = String::new(); + loop { + let mut line = String::new(); + let bytes_read = buf_reader.read_line(&mut line).unwrap(); + if bytes_read == 0 { + break; + } + full_text.push_str(&line); + lexer.parse_next_str(&line); + } + let (tokens, diagnostics) = lexer.finish(); + let mut is_error = false; + if !diagnostics.is_empty() { + diagnostics.print(&format!("{}", case_path.display()), &full_text); + is_error = true; + } + let mut parser = Parser::new(tokens, diagnostics); + let _compile_unit = parser.parse_compile_unit(); + if !parser.diagnostics.is_empty() { + parser.diagnostics.print(&format!("{}", case_path.display()), &full_text); + is_error = true; + } + if is_error { + error_case_cnt += 1; + } + } + if error_case_cnt > 0 { + panic!("Found {} cases with errors", error_case_cnt); + } + + } + #[test] + fn test_expr() { + test_case("0-3,14-25"); + // test_case("0-3,14-25"); + } +} \ No newline at end of file diff --git a/src/frontend/types.rs b/src/frontend/types.rs index b82fd6b..dff97d8 100644 --- a/src/frontend/types.rs +++ b/src/frontend/types.rs @@ -28,31 +28,47 @@ pub enum TokenValue { // Eof, Unrecognized, } +impl TokenValue { + pub fn as_type_ident(&self) -> Option { + if let TokenValue::TypeIdent(t) = self { + Some(t.clone()) + } else { + None + } + } + pub fn as_ident(&self) -> Option { + if let TokenValue::Ident(s) = self { + Some(s.clone()) + } else { + None + } + } +} impl std::fmt::Display for TokenValue { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - TokenValue::IntLit(i) => write!(f, "literal int: {}", i), - TokenValue::Ident(s) => write!(f, "identifier: {}", s), + TokenValue::IntLit(i) => write!(f, "literal int {}", i), + TokenValue::Ident(s) => write!(f, "identifier {}", s), TokenValue::TypeIdent(t) => write!(f, "type {}", t.as_ref()), - TokenValue::Plus => write!(f, "+"), - TokenValue::Minus => write!(f, "-"), - TokenValue::Star => write!(f, "*"), - TokenValue::Slash => write!(f, "/"), - TokenValue::Percent => write!(f, "%"), - TokenValue::Equal => write!(f, "="), - TokenValue::DoubleEqual => write!(f, "=="), - TokenValue::Not => write!(f, "!"), - TokenValue::NotEqual => write!(f, "!="), - TokenValue::Less => write!(f, "<"), - TokenValue::LessEqual => write!(f, "<="), - TokenValue::Greater => write!(f, ">"), - TokenValue::GreaterEqual => write!(f, ">="), - TokenValue::LParen => write!(f, "("), - TokenValue::RParen => write!(f, ")"), - TokenValue::LBrace => write!(f, "{{"), - TokenValue::RBrace => write!(f, "}}"), - TokenValue::Comma => write!(f, ","), - TokenValue::Semicolon => write!(f, ";"), + TokenValue::Plus => write!(f, "`+`"), + TokenValue::Minus => write!(f, "`-`"), + TokenValue::Star => write!(f, "`*`"), + TokenValue::Slash => write!(f, "`/`"), + TokenValue::Percent => write!(f, "`%`"), + TokenValue::Equal => write!(f, "`=`"), + TokenValue::DoubleEqual => write!(f, "`==`"), + TokenValue::Not => write!(f, "`!`"), + TokenValue::NotEqual => write!(f, "`!=`"), + TokenValue::Less => write!(f, "`<`"), + TokenValue::LessEqual => write!(f, "`<=`"), + TokenValue::Greater => write!(f, "`>`"), + TokenValue::GreaterEqual => write!(f, "`>=`"), + TokenValue::LParen => write!(f, "`(`"), + TokenValue::RParen => write!(f, "`)`"), + TokenValue::LBrace => write!(f, "`{{`"), + TokenValue::RBrace => write!(f, "`}}`"), + TokenValue::Comma => write!(f, "`,`"), + TokenValue::Semicolon => write!(f, "`;`"), TokenValue::If => write!(f, "if"), TokenValue::Else => write!(f, "else"), TokenValue::While => write!(f, "while"), @@ -83,7 +99,7 @@ pub enum TokenKind { // Eof, Unrecognized, } -#[derive(Debug, Clone, PartialEq, Eq, EnumString, AsRefStr)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, EnumString, AsRefStr)] pub enum TypeIdent { #[strum(serialize = "int")] Int, From 3c728fb2b8b5106144f93f3b4fcdb11aa553c790 Mon Sep 17 00:00:00 2001 From: Hydrostic Date: Sat, 9 May 2026 12:29:59 +0800 Subject: [PATCH 3/3] feat(ast): Add graph output --- .gitignore | 3 +- Cargo.lock | 56 +++++++++++++++ Cargo.toml | 1 + src/ast/graph.rs | 155 +++++++++++++++++++++++++++++++++++++++++ src/ast/mod.rs | 1 + src/ast/types.rs | 111 ++++++++++++++++++++++++++++- src/frontend/parser.rs | 6 +- 7 files changed, 330 insertions(+), 3 deletions(-) create mode 100644 src/ast/graph.rs diff --git a/.gitignore b/.gitignore index 26ceea5..8774c22 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ /target -/testcases \ No newline at end of file +/testcases +/output \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index db97c61..037fc18 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -28,12 +28,55 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "fixedbitset" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" + +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "foldhash", +] + +[[package]] +name = "hashbrown" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f467dd6dccf739c208452f8014c75c18bb8301b050ad1cfb27153803edb0f51" + [[package]] name = "heck" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "indexmap" +version = "2.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9" +dependencies = [ + "equivalent", + "hashbrown 0.17.0", +] + [[package]] name = "memchr" version = "2.8.0" @@ -113,6 +156,18 @@ dependencies = [ "autocfg", ] +[[package]] +name = "petgraph" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455" +dependencies = [ + "fixedbitset", + "hashbrown 0.15.5", + "indexmap", + "serde", +] + [[package]] name = "proc-macro2" version = "1.0.106" @@ -166,6 +221,7 @@ version = "0.1.0" dependencies = [ "codespan-reporting", "num", + "petgraph", "regex", "strum", "thiserror", diff --git a/Cargo.toml b/Cargo.toml index 61533d4..71ecf51 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,6 +6,7 @@ edition = "2024" [dependencies] codespan-reporting = "0.13.1" num = "0.4.3" +petgraph = "0.8.3" regex = "1.12.3" strum = { version = "0.28.0", features = ["derive"] } thiserror = "2.0.18" diff --git a/src/ast/graph.rs b/src/ast/graph.rs new file mode 100644 index 0000000..8a0e302 --- /dev/null +++ b/src/ast/graph.rs @@ -0,0 +1,155 @@ +use petgraph::dot::{Config, Dot}; +use petgraph::graph::{Graph, NodeIndex}; + +use crate::ast::types::{ + BlockStmt, CompileUnit, Expr, ExprValue, FuncDeclStmt, GlobalDeclStmt, Param, ReturnStmt, + Statement, VarDeclStmt, VarDeclStmtValue, +}; + +pub type AstGraph = Graph; + +pub trait AstGraphExt { + fn to_graph(&self) -> AstGraph; + fn to_dot(&self) -> String { + format!("{}", Dot::with_config(&self.to_graph(), &[Config::EdgeNoLabel])) + } +} + +impl AstGraphExt for CompileUnit { + fn to_graph(&self) -> AstGraph { + let mut builder = AstGraphBuilder::new(); + builder.add_compile_unit(self); + builder.graph + } +} + +struct AstGraphBuilder { + graph: AstGraph, +} + +impl AstGraphBuilder { + fn new() -> Self { + Self { graph: Graph::new() } + } + + fn node(&mut self, label: impl Into) -> NodeIndex { + self.graph.add_node(label.into()) + } + + fn child(&mut self, parent: NodeIndex, label: impl Into) -> NodeIndex { + let child = self.node(label); + self.graph.add_edge(parent, child, String::new()); + child + } + + fn add_compile_unit(&mut self, compile_unit: &CompileUnit) -> NodeIndex { + let root = self.node(compile_unit.to_string()); + for decl in &compile_unit.global_decls { + self.add_global_decl(root, decl); + } + root + } + + fn add_global_decl(&mut self, parent: NodeIndex, decl: &GlobalDeclStmt) -> NodeIndex { + match decl { + GlobalDeclStmt::VarDecl(var_decl) => { + let node = self.child(parent, decl.to_string()); + self.add_var_decl(node, var_decl); + node + } + GlobalDeclStmt::FuncDecl(func_decl) => { + let node = self.child(parent, decl.to_string()); + self.add_func_decl(node, func_decl); + node + } + } + } + + fn add_var_decl(&mut self, parent: NodeIndex, var_decl: &VarDeclStmt) -> NodeIndex { + let node = self.child(parent, var_decl.to_string()); + for value in &var_decl.values { + self.add_var_decl_value(node, value); + } + node + } + + fn add_var_decl_value(&mut self, parent: NodeIndex, value: &VarDeclStmtValue) -> NodeIndex { + self.child(parent, value.to_string()) + } + + fn add_func_decl(&mut self, parent: NodeIndex, func_decl: &FuncDeclStmt) -> NodeIndex { + let node = self.child(parent, func_decl.to_string()); + let params = self.child(node, "Params"); + for param in &func_decl.params { + self.add_param(params, param); + } + self.add_block_stmt(node, &func_decl.body); + node + } + + fn add_param(&mut self, parent: NodeIndex, param: &Param) -> NodeIndex { + self.child(parent, param.to_string()) + } + + fn add_block_stmt(&mut self, parent: NodeIndex, block_stmt: &BlockStmt) -> NodeIndex { + let node = self.child(parent, block_stmt.to_string()); + for stmt in &block_stmt.statements { + self.add_statement(node, stmt); + } + node + } + + fn add_statement(&mut self, parent: NodeIndex, stmt: &Statement) -> NodeIndex { + match stmt { + Statement::Return(return_stmt) => { + let node = self.child(parent, stmt.to_string()); + self.add_return_stmt(node, return_stmt); + node + } + Statement::Block(block_stmt) => self.add_block_stmt(parent, block_stmt), + Statement::Expr(expr) => { + let node = self.child(parent, stmt.to_string()); + self.add_expr(node, expr); + node + } + Statement::VarDecl(var_decl) => { + let node = self.child(parent, stmt.to_string()); + self.add_var_decl(node, var_decl); + node + } + } + } + + fn add_return_stmt(&mut self, parent: NodeIndex, return_stmt: &ReturnStmt) -> NodeIndex { + match &return_stmt.value { + Some(expr) => self.add_expr(parent, expr), + None => self.child(parent, "Void"), + } + } + + fn add_expr(&mut self, parent: NodeIndex, expr: &Expr) -> NodeIndex { + match &expr.value { + ExprValue::IntLit(_) | ExprValue::Var(_) => self.child(parent, expr.value.to_string()), + ExprValue::BinaryOp { lhs, op: _, rhs } => { + let node = self.child(parent, expr.value.to_string()); + self.add_expr(node, lhs); + self.add_expr(node, rhs); + node + } + ExprValue::FuncCall(_, args) => { + let node = self.child(parent, expr.value.to_string()); + let args_node = self.child(node, "Args"); + for arg in args { + self.add_expr(args_node, arg); + } + node + } + ExprValue::Assign { lvalue, rvalue } => { + let node = self.child(parent, expr.value.to_string()); + self.add_expr(node, lvalue); + self.add_expr(node, rvalue); + node + } + } + } +} diff --git a/src/ast/mod.rs b/src/ast/mod.rs index cd40856..6d20066 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1 +1,2 @@ +pub mod graph; pub mod types; diff --git a/src/ast/types.rs b/src/ast/types.rs index 6c15b08..623432f 100644 --- a/src/ast/types.rs +++ b/src/ast/types.rs @@ -1,4 +1,5 @@ use crate::{diagnostic::span::Span, frontend::types::{TokenValue, TypeIdent}}; +use std::fmt; pub struct CompileUnit { pub global_decls: Vec, @@ -109,4 +110,112 @@ pub struct Param { pub name: String, pub param_type: Type, pub span: Span, -} \ No newline at end of file +} + +impl fmt::Display for CompileUnit { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "CompileUnit") + } +} + +impl fmt::Display for GlobalDeclStmt { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + GlobalDeclStmt::VarDecl(_) => write!(f, "GlobalVarDecl"), + GlobalDeclStmt::FuncDecl(_) => write!(f, "FuncDecl"), + } + } +} + +impl fmt::Display for VarDeclStmt { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "VarDecl") + } +} + +impl fmt::Display for VarDeclStmtValue { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{} {}", self.var_type, self.name) + } +} + +impl fmt::Display for FuncDeclStmt { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{} {}", self.return_type, self.name) + } +} + +impl fmt::Display for BlockStmt { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "Block") + } +} + +impl fmt::Display for Statement { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Statement::Return(_) => write!(f, "ReturnStmt"), + Statement::Block(_) => write!(f, "BlockStmt"), + Statement::Expr(_) => write!(f, "ExprStmt"), + Statement::VarDecl(_) => write!(f, "VarDeclStmt"), + } + } +} + +impl fmt::Display for ReturnStmt { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "ReturnStmt") + } +} + +impl fmt::Display for Expr { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.value) + } +} + +impl fmt::Display for ExprValue { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + ExprValue::IntLit(value) => write!(f, "IntLit({})", value), + ExprValue::Var(name) => write!(f, "Var({})", name), + ExprValue::BinaryOp { op, .. } => write!(f, "BinaryOp({})", op), + ExprValue::FuncCall(name, _) => write!(f, "FuncCall({})", name), + ExprValue::Assign { .. } => write!(f, "Assign"), + } + } +} + +impl fmt::Display for Param { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{} {}", self.param_type, self.name) + } +} + +impl fmt::Display for BinaryOp { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let op = match self { + BinaryOp::Add => "+", + BinaryOp::Sub => "-", + BinaryOp::Mul => "*", + BinaryOp::Div => "/", + BinaryOp::Mod => "%", + BinaryOp::Equal => "==", + BinaryOp::NotEqual => "!=", + BinaryOp::Less => "<", + BinaryOp::LessEqual => "<=", + BinaryOp::Greater => ">", + BinaryOp::GreaterEqual => ">=", + }; + write!(f, "{}", op) + } +} + +impl fmt::Display for Type { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Type::Int => write!(f, "int"), + Type::Void => write!(f, "void"), + } + } +} diff --git a/src/frontend/parser.rs b/src/frontend/parser.rs index c96ec70..7359101 100644 --- a/src/frontend/parser.rs +++ b/src/frontend/parser.rs @@ -654,6 +654,7 @@ mod tests { use std::io::BufRead; use std::path::Path; use std::fs::File; + use crate::ast::graph::AstGraphExt; use crate::frontend::lexer::Lexer; use crate::utils::case_list::CaseList; use crate::utils::num_sequence::NumberSequence; @@ -686,7 +687,10 @@ mod tests { is_error = true; } let mut parser = Parser::new(tokens, diagnostics); - let _compile_unit = parser.parse_compile_unit(); + let compile_unit = parser.parse_compile_unit(); + let dot = compile_unit.to_dot(); + let case_name = case_list.get_case_name(case_no).unwrap().strip_suffix(".c").unwrap(); + std::fs::write(format!("output/{}.dot", case_name), dot).unwrap(); if !parser.diagnostics.is_empty() { parser.diagnostics.print(&format!("{}", case_path.display()), &full_text); is_error = true;