From 7b6eede961d87d430cec9e3cfcab6db97f241cd1 Mon Sep 17 00:00:00 2001 From: Hydrostic Date: Thu, 14 May 2026 15:54:31 +0800 Subject: [PATCH] feat(ir, parser): Support if/while/break/logical expr/cmp in parser and ir generator --- src/ast/graph.rs | 36 +- src/ast/types.rs | 126 ++++++- src/backend/generator.rs | 7 +- src/frontend/err.rs | 2 +- src/frontend/lexer.rs | 52 ++- src/frontend/parser.rs | 757 +++++++++++++++++++++++++++------------ src/frontend/types.rs | 4 +- src/ir/err.rs | 8 + src/ir/generator.rs | 521 +++++++++++++++++++++++---- src/ir/types.rs | 104 +++++- 10 files changed, 1260 insertions(+), 357 deletions(-) diff --git a/src/ast/graph.rs b/src/ast/graph.rs index 8a0e302..3bebcf2 100644 --- a/src/ast/graph.rs +++ b/src/ast/graph.rs @@ -2,8 +2,7 @@ use petgraph::dot::{Config, Dot}; use petgraph::graph::{Graph, NodeIndex}; use crate::ast::types::{ - BlockStmt, CompileUnit, Expr, ExprValue, FuncDeclStmt, GlobalDeclStmt, Param, ReturnStmt, - Statement, VarDeclStmt, VarDeclStmtValue, + BlockStmt, BreakStmt, CompileUnit, ContinueStmt, Expr, ExprValue, FuncDeclStmt, GlobalDeclStmt, IfStmt, Param, ReturnStmt, Statement, VarDeclStmt, VarDeclStmtValue, WhileStmt }; pub type AstGraph = Graph; @@ -116,7 +115,11 @@ impl AstGraphBuilder { let node = self.child(parent, stmt.to_string()); self.add_var_decl(node, var_decl); node - } + }, + Statement::If(if_stmt) => self.add_if_stmt(parent, if_stmt), + Statement::While(while_stmt) => self.add_while_stmt(parent, while_stmt), + Statement::Break(break_stmt) => self.add_break_stmt(parent, break_stmt), + Statement::Continue(continue_stmt) => self.add_continue_stmt(parent, continue_stmt), } } @@ -126,7 +129,27 @@ impl AstGraphBuilder { None => self.child(parent, "Void"), } } - + fn add_if_stmt(&mut self, parent: NodeIndex, if_stmt: &IfStmt) -> NodeIndex { + let node = self.child(parent, if_stmt.to_string()); + self.add_expr(node, &if_stmt.condition); + self.add_block_stmt(node, &if_stmt.then_branch); + if let Some(else_branch) = &if_stmt.else_branch { + self.add_block_stmt(node, else_branch); + } + node + } + fn add_while_stmt(&mut self, parent: NodeIndex, while_stmt: &WhileStmt) -> NodeIndex { + let node = self.child(parent, while_stmt.to_string()); + self.add_expr(node, &while_stmt.condition); + self.add_block_stmt(node, &while_stmt.body); + node + } + fn add_break_stmt(&mut self, parent: NodeIndex, break_stmt: &BreakStmt) -> NodeIndex { + self.child(parent, break_stmt.to_string()) + } + fn add_continue_stmt(&mut self, parent: NodeIndex, continue_stmt: &ContinueStmt) -> NodeIndex { + self.child(parent, continue_stmt.to_string()) + } fn add_expr(&mut self, parent: NodeIndex, expr: &Expr) -> NodeIndex { match &expr.value { ExprValue::IntLit(_) | ExprValue::Var(_) => self.child(parent, expr.value.to_string()), @@ -149,6 +172,11 @@ impl AstGraphBuilder { self.add_expr(node, lvalue); self.add_expr(node, rvalue); node + }, + ExprValue::UnaryOp { op: _, operand } => { + let node = self.child(parent, expr.value.to_string()); + self.add_expr(node, operand); + node } } } diff --git a/src/ast/types.rs b/src/ast/types.rs index d90314d..15c19bb 100644 --- a/src/ast/types.rs +++ b/src/ast/types.rs @@ -1,4 +1,4 @@ -use crate::{diagnostic::span::Span, frontend::types::{TokenValue, TypeIdent}}; +use crate::{diagnostic::span::Span, frontend::types::{Token, TokenValue, TypeIdent}}; use std::fmt; pub struct CompileUnit { @@ -11,12 +11,12 @@ pub enum GlobalDeclStmt { pub struct VarDeclStmt { pub values: Vec, - pub span: Span, + pub data_type: Type, + pub type_span: Span, } pub struct VarDeclStmtValue { pub name: String, - pub var_type: Type, - pub span: Span, + pub name_span: Span, } @@ -25,11 +25,11 @@ pub struct FuncDeclStmt { pub return_type: Type, pub params: Vec, pub body: BlockStmt, - pub span: Span, + pub ret_type_span: Span, + pub name_span: Span, } pub struct BlockStmt { pub statements: Vec, - pub span: Span, } pub enum Statement { @@ -37,16 +37,46 @@ pub enum Statement { Block(BlockStmt), Expr(Expr), VarDecl(VarDeclStmt), + If(IfStmt), + While(WhileStmt), + Break(BreakStmt), + Continue(ContinueStmt), } -impl Statement { - pub fn span(&self) -> Span { - match self { - Statement::Return(s) => s.span, - Statement::Block(s) => s.span, - Statement::Expr(s) => s.span, - Statement::VarDecl(s) => s.span, - } - } +// impl Statement { +// pub fn span(&self) -> Span { +// match self { +// Statement::Return(s) => s.span, +// Statement::Block(s) => s.span, +// Statement::Expr(s) => s.span, +// Statement::VarDecl(s) => s.span, +// Statement::If(s) => s.span, +// Statement::While(s) => s.span, +// Statement::Break(s) => s.span, +// Statement::Continue(s) => s.span, +// } +// } +// } +pub struct IfStmt { + pub condition: Expr, + pub then_branch: BlockStmt, + pub ifelse_branch: Vec, + pub else_branch: Option, + // pub span: Span, +} +pub struct IfElseBranch { + pub condition: Expr, + pub then_branch: BlockStmt, +} +pub struct WhileStmt { + pub condition: Expr, + pub body: BlockStmt, + // pub span: Span, +} +pub struct BreakStmt { + pub span: Span, +} +pub struct ContinueStmt { + pub span: Span, } pub struct ReturnStmt { pub value: Option, @@ -64,6 +94,10 @@ pub enum ExprValue { op: BinaryOp, rhs: Box }, + UnaryOp { + op: UnaryOp, + operand: Box, + }, FuncCall(String, Vec), Assign { lvalue: Box, @@ -74,8 +108,13 @@ pub enum ExprValue { pub enum BinaryOp { Add, Sub, Mul, Div, Mod, Equal, NotEqual, Less, LessEqual, Greater, GreaterEqual, + And, Or, } +#[derive(Clone, Copy)] +pub enum UnaryOp { + Add, Sub, Not, +} impl BinaryOp { pub fn from_token_value(token_value: &TokenValue) -> Option { match token_value { @@ -90,10 +129,20 @@ impl BinaryOp { TokenValue::LessEqual => Some(BinaryOp::LessEqual), TokenValue::Greater => Some(BinaryOp::Greater), TokenValue::GreaterEqual => Some(BinaryOp::GreaterEqual), + TokenValue::And => Some(BinaryOp::And), + TokenValue::Or => Some(BinaryOp::Or), _ => None, } } + pub fn is_logical(&self) -> bool { + matches!(self, BinaryOp::And | BinaryOp::Or) + } + pub fn is_cmp(&self) -> bool { + matches!(self, BinaryOp::Equal | BinaryOp::NotEqual | BinaryOp::Less | BinaryOp::LessEqual | BinaryOp::Greater | BinaryOp::GreaterEqual) + } } + + #[derive(Clone, Copy)] pub enum Type { Int, @@ -110,7 +159,8 @@ impl From for Type { pub struct Param { pub name: String, pub param_type: Type, - pub span: Span, + pub name_span: Span, + pub type_span: Span, } impl fmt::Display for CompileUnit { @@ -136,7 +186,7 @@ impl fmt::Display for VarDeclStmt { impl fmt::Display for VarDeclStmtValue { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{} {}", self.var_type, self.name) + write!(f, "{}", self.name) } } @@ -159,16 +209,42 @@ impl fmt::Display for Statement { Statement::Block(_) => write!(f, "BlockStmt"), Statement::Expr(_) => write!(f, "ExprStmt"), Statement::VarDecl(_) => write!(f, "VarDeclStmt"), + Statement::If(_) => write!(f, "IfStmt"), + Statement::While(_) => write!(f, "WhileStmt"), + Statement::Break(_) => write!(f, "BreakStmt"), + Statement::Continue(_) => write!(f, "ContinueStmt"), } } } - +impl fmt::Display for IfStmt { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "IfStmt") + } +} impl fmt::Display for ReturnStmt { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "ReturnStmt") } } +impl fmt::Display for WhileStmt { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "WhileStmt") + } +} + +impl fmt::Display for BreakStmt { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "BreakStmt") + } +} + +impl fmt::Display for ContinueStmt { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "ContinueStmt") + } +} + impl fmt::Display for Expr { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{}", self.value) @@ -183,6 +259,7 @@ impl fmt::Display for ExprValue { ExprValue::BinaryOp { op, .. } => write!(f, "BinaryOp({})", op), ExprValue::FuncCall(name, _) => write!(f, "FuncCall({})", name), ExprValue::Assign { .. } => write!(f, "Assign"), + ExprValue::UnaryOp { op, .. } => write!(f, "UnaryOp({})", op), } } } @@ -207,11 +284,22 @@ impl fmt::Display for BinaryOp { BinaryOp::LessEqual => "<=", BinaryOp::Greater => ">", BinaryOp::GreaterEqual => ">=", + BinaryOp::And => "&&", + BinaryOp::Or => "||", + }; + write!(f, "{}", op) + } +} +impl fmt::Display for UnaryOp { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let op = match self { + UnaryOp::Add => "+", + UnaryOp::Sub => "-", + UnaryOp::Not => "!", }; write!(f, "{}", op) } } - impl fmt::Display for Type { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { diff --git a/src/backend/generator.rs b/src/backend/generator.rs index 3cb4900..07ec23f 100644 --- a/src/backend/generator.rs +++ b/src/backend/generator.rs @@ -98,6 +98,7 @@ impl Generator { self.instrs.push(SubInstr::new_sp(stack_size_needed as i32)); }, IRInstr::DefineFunc(_, _, _) => unreachable!(), + _ => unimplemented!(), } } } @@ -189,12 +190,6 @@ impl Generator { self.instrs.push(MulInstr::new(temp_reg, temp_reg, RegisterOrImm::Reg(right_reg))); self.instrs.push(SubInstr::new(dest_reg, left_reg, RegisterOrImm::Reg(temp_reg))); }, - IRBinaryOp::Le => todo!(), - IRBinaryOp::Lt => todo!(), - IRBinaryOp::Gt => todo!(), - IRBinaryOp::Ge => todo!(), - IRBinaryOp::Ne => todo!(), - IRBinaryOp::Eq => todo!(), } let dest_stack_offset = var_index_to_stack_offset.get(&dest.index).expect("Variable not declared"); self.instrs.push(StoreInstr::new_stack(dest_reg, *dest_stack_offset as i32)); diff --git a/src/frontend/err.rs b/src/frontend/err.rs index 523e35a..2752a84 100644 --- a/src/frontend/err.rs +++ b/src/frontend/err.rs @@ -28,7 +28,7 @@ pub enum ParseError { UnexpectedToken(TokenValue, &'static str), #[error("cannot combine with previous {}", .0)] CantCombineWith(TokenValue), - #[error("expect {0}")] + #[error("expect {0} after")] ExpectButEof(&'static str), } #[derive(Debug, Clone, PartialEq, Eq, Error)] diff --git a/src/frontend/lexer.rs b/src/frontend/lexer.rs index 56d46ec..7c9d01c 100644 --- a/src/frontend/lexer.rs +++ b/src/frontend/lexer.rs @@ -17,7 +17,7 @@ pub struct Lexer { const WHITESPACE_CHARS: &[char] = &[' ', '\t', '\n', '\r']; const DELIMITER_CHARS: &[char] = &[ - '+', '-', '*', '/', '%', '=', '!', '<', '>', '(', ')', ',', ';' + '+', '-', '*', '/', '%', '=', '!', '<', '>', '(', ')', ',', ';', '{', '|', '&' ]; struct Cursor { chars: Vec, @@ -185,22 +185,10 @@ fn parse_litint( ) -> Result { let mut c1 = str_iter.peek().ok_or(LexParseError::NotMatched)?; // c1 is the peek value from here - let mut sign_base: i64 = 1; let mut base: i64 = 10; - if !(c1.is_ascii_digit() || c1 == '-') { + if !(c1.is_ascii_digit()) { return Err(LexParseError::NotMatched); } - if c1 == '-' { - sign_base = -1; - str_iter.advance(1); - c1 = str_iter.peek().ok_or(LexParseError::NotMatched)?; - if !c1.is_ascii_digit() { - // only a minus sign, not a number - // back one so cursor still points to the minus sign - str_iter.back(1); - return Err(LexParseError::NotMatched); - } - } let mut number = 0i64; let mut has_digits = false; if c1 == '0' { @@ -248,7 +236,6 @@ fn parse_litint( // No valid digits found, add a diagnostic return Err(LexParseError::InvalidInMatch(LexingError::InvalidIntLiteral)); } - number *= sign_base; Ok(TokenValue::IntLit(number)) } @@ -293,6 +280,7 @@ fn parse_puncuation( if let Some('=') = str_iter.peek() { TokenValue::NotEqual } else { + str_iter.back(1); TokenValue::Not } }, @@ -301,7 +289,24 @@ fn parse_puncuation( ',' => TokenValue::Comma, ';' => TokenValue::Semicolon, - + '|' => { + str_iter.advance(1); + if let Some('|') = str_iter.peek() { + TokenValue::Or + } else { + // unrecognized token starting with '|' + return Err(LexParseError::InvalidInMatch(LexingError::UnrecognizedToken("|".to_string()))); + } + }, + '&' => { + str_iter.advance(1); + if let Some('&') = str_iter.peek() { + TokenValue::And + } else { + // unrecognized token starting with '&' + return Err(LexParseError::InvalidInMatch(LexingError::UnrecognizedToken("&".to_string()))); + } + }, _ => return Err(LexParseError::NotMatched), }; str_iter.advance(1); @@ -330,6 +335,21 @@ fn parse_ident( if name.eq("return") { return Ok(TokenValue::Return); } + if name.eq("if") { + return Ok(TokenValue::If); + } + if name.eq("else") { + return Ok(TokenValue::Else); + } + if name.eq("while") { + return Ok(TokenValue::While); + } + if name.eq("break") { + return Ok(TokenValue::Break); + } + if name.eq("continue") { + return Ok(TokenValue::Continue); + } if let Some(type_ident) = TypeIdent::from_str(&name).ok() { return Ok(TokenValue::TypeIdent(type_ident)); } diff --git a/src/frontend/parser.rs b/src/frontend/parser.rs index 915cad2..fda17d4 100644 --- a/src/frontend/parser.rs +++ b/src/frontend/parser.rs @@ -1,7 +1,6 @@ use crate::{ ast::types::{ - BinaryOp, BlockStmt, CompileUnit, Expr, ExprValue, FuncDeclStmt, GlobalDeclStmt, Param, - ReturnStmt, Statement, VarDeclStmt, VarDeclStmtValue, + BinaryOp, BlockStmt, BreakStmt, CompileUnit, ContinueStmt, Expr, ExprValue, FuncDeclStmt, GlobalDeclStmt, IfElseBranch, IfStmt, Param, ReturnStmt, Statement, UnaryOp, VarDeclStmt, VarDeclStmtValue, WhileStmt }, diagnostic::{Diagnositics, span::Span}, frontend::{ @@ -15,14 +14,15 @@ pub struct Parser { pub diagnostics: Diagnositics, pos: usize, } -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] enum ParseType { - MustParse, + MustParse, TryParse, } -// const FUNC_OR_VAR_DECL_AT_TOP_LEVEL: &str = "function or variable declaration at top level"; -// const PARAM_DECL: &str = "parameter declaration"; -// const BODY_DECL: &str = "function body"; +enum ParseProcessError { + TryNext, + ErrorInMatch +} impl Parser { pub fn new(tokens: Vec, diagnostics: Diagnositics) -> Self { Self { @@ -52,11 +52,60 @@ impl Parser { assert!(self.pos >= n); self.pos -= n; } + fn last(&self) -> Option<&Token> { + if self.pos == 0 { + None + } else { + self.tokens.get(self.pos - 1) + } + } + fn must_match_token(&mut self, expected: &TokenValue, diagnostic_text: &'static str) -> Result<(), ParseProcessError> { + if let Some(t) = self.peek() { + if &t.value == expected { + self.advance(1); + Ok(()) + } else { + let token = self.next().unwrap().clone(); + self.diagnostics.add_from_frontend_error( + ParseError::UnexpectedToken(token.value, diagnostic_text), + token.span, + ); + Err(ParseProcessError::ErrorInMatch) + } + + } else { + let span = self.next().unwrap().span; + self.diagnostics + .add_from_frontend_error(ParseError::ExpectButEof(diagnostic_text), span); + Err(ParseProcessError::ErrorInMatch) + } + } + fn must_have_some(&mut self, diagnostic_text: &'static str) -> Result<(), ParseProcessError> { + if self.peek().is_none() { + let span = self.last().unwrap().span; + self.diagnostics + .add_from_frontend_error(ParseError::ExpectButEof(diagnostic_text), span); + return Err(ParseProcessError::ErrorInMatch); + } + Ok(()) + + } + fn until_next_token(&mut self, expected: &[TokenValue]) { + while let Some(t) = self.peek() { + if expected.contains(&t.value) { + self.advance(1); + break; + } + self.advance(1); + } + } fn parse_compile_unit(&mut self) -> CompileUnit { let mut global_decls = vec![]; while self.peek().is_some() { if let Some(decl) = self.parse_global_decl_stmt() { global_decls.push(decl); + } else { + self.until_next_token(&[TokenValue::Semicolon, TokenValue::RBrace]); } } CompileUnit { global_decls } @@ -64,185 +113,232 @@ impl Parser { fn parse_global_decl_stmt(&mut self) -> Option { assert!(self.peek().is_some()); - if let Some(func_decl) = self.parse_func_decl_stmt() { - return Some(GlobalDeclStmt::FuncDecl(func_decl)); - } - if let Some(var_decl) = self.parse_var_decl_stmt() { - return Some(GlobalDeclStmt::VarDecl(var_decl)); + match self.parse_func_decl_stmt() { + Ok(func_decl) => return Some(GlobalDeclStmt::FuncDecl(func_decl)), + Err(ParseProcessError::ErrorInMatch) => { + return None + }, + _ => {} + }; + match self.parse_var_decl_stmt(ParseType::MustParse) { + Ok(var_decl) => return Some(GlobalDeclStmt::VarDecl(var_decl)), + Err(ParseProcessError::ErrorInMatch) => { + return None + }, + _ => {} } let token = self.next().unwrap().clone(); self.diagnostics.add_from_frontend_error( - ParseError::UnexpectedToken(token.value, "function or variable declaration at top level"), + ParseError::UnexpectedToken(token.value, "ident"), token.span, ); None } - fn parse_type_and_name(&mut self) -> Option<(TypeIdent, String, Span)> { + // fn until_next_stmt(&mut self) { + // // skip tokens until we find a semicolon or a right brace, which may indicate the end of the declaration + // while let Some(t) = self.peek() { + // if matches!(t.value, TokenValue::Semicolon | TokenValue::RBrace) { + // self.advance(1); + // break; + // } + // self.advance(1); + // } + // } + fn parse_type_and_name(&mut self, parse_type: ParseType) -> Result<(TypeIdent, String, Span, Span), ParseProcessError> { assert!(self.peek().is_some()); - let start_span = self.peek().unwrap().span; - let type_ident = self.peek().unwrap().value.as_type_ident()?; - self.advance(1); - let name = match self.peek().map(|t| t.value.as_ident()).flatten() { + let type_token = self.peek().unwrap().clone(); + let type_ident = match self.peek().unwrap().value.as_type_ident() { + Some(ti) => ti, None => { - let span = self.next().unwrap().span; - self.diagnostics.add_from_frontend_error( - ParseError::CantCombineWith(TokenValue::TypeIdent(type_ident)), - span, - ); - return None; - } - Some(ident) => ident, + if matches!(parse_type, ParseType::MustParse) { + let token = self.next().unwrap().clone(); + self.diagnostics.add_from_frontend_error( + ParseError::UnexpectedToken(token.value, "type ident"), + token.span, + ); + return Err(ParseProcessError::ErrorInMatch); + } + return Err(ParseProcessError::TryNext); + }, }; - let end_span = self.peek().unwrap().span; + let type_span = type_token.span; self.advance(1); - Some((type_ident, name, Span::from_two(start_span, end_span))) + let name = match self.peek().map(|t| t.value.as_ident()) { + None => { + self.diagnostics.add_from_frontend_error( + ParseError::ExpectButEof("ident"), + type_span, + ); + return Err(ParseProcessError::ErrorInMatch); + }, + Some(None) => { + let next_span = self.peek().unwrap().span; + self.diagnostics.add_from_frontend_error( + ParseError::CantCombineWith(type_token.value), + next_span + ); + return Err(ParseProcessError::ErrorInMatch); + } + Some(Some(ident)) => ident, + }; + let name_span = self.peek().unwrap().span; + self.advance(1); + Ok((type_ident, name, type_span, name_span)) } - fn parse_func_decl_stmt(&mut self) -> Option { + fn parse_func_decl_stmt(&mut self) -> Result { assert!(self.peek().is_some()); - let start_span = self.peek().unwrap().span; - let (return_type, name, _) = self.parse_type_and_name()?; + let (return_type, name, ret_type_span, name_span) = self.parse_type_and_name(ParseType::MustParse)?; if self .peek() .is_some_and(|t| matches!(t.value, TokenValue::LParen)) { - self.advance(1); } else { self.back(2); - return None; + return Err(ParseProcessError::TryNext); } // from here we can be sure it's a function declaration, so we can report error if the syntax is wrong - let params = match self.peek().map(|t| &t.value) { - Some(TokenValue::RParen) => { - self.advance(1); - vec![] - } - Some(_) => self.parse_param_list()?, - None => { - let span = self.next().unwrap().span; - self.diagnostics - .add_from_frontend_error(ParseError::ExpectButEof("parameter declaration"), span); - return None; - } - }; + let params = self.parse_param_list()?; let body = match self.peek().map(|t| &t.value) { Some(_) => self.parse_block_stmt(ParseType::MustParse)?, None => { let span = self.next().unwrap().span; self.diagnostics .add_from_frontend_error(ParseError::ExpectButEof("function body"), span); - return None; + return Err(ParseProcessError::ErrorInMatch); } }; - let end_span = body.span; - Some(FuncDeclStmt { + Ok(FuncDeclStmt { return_type: return_type.into(), name, params, body, - span: Span::from_two(start_span, end_span), + ret_type_span, + name_span, }) } - fn parse_param_list(&mut self) -> Option> { + fn parse_param_list(&mut self) -> Result, ParseProcessError> { assert!(self.peek().is_some()); - let mut params = vec![]; - - while self.peek().is_some() { - if !params.is_empty() { - if self.peek().map(|t| &t.value) == Some(&TokenValue::Comma) { - self.advance(1); - } else { - let token = self.next().unwrap().clone(); - self.diagnostics.add_from_frontend_error( - ParseError::UnexpectedToken(token.value, "`,`"), - token.span, - ); - return None; - } - } - if let Some(param) = self.parse_param() { - params.push(param); - } else { - return None; - } - } - Some(params) - } - fn parse_param(&mut self) -> Option { - assert!(self.peek().is_some()); - let (param_type, name, span) = self.parse_type_and_name()?; - Some(Param { - param_type: param_type.into(), - name, - span, - }) - } - fn must_match_semicolon(&mut self) -> Option<()> { - if self - .peek() - .is_some_and(|t| matches!(t.value, TokenValue::Semicolon)) - { - self.advance(1); - Some(()) - } else { - let token = self.next().unwrap().clone(); - self.diagnostics - .add_from_frontend_error(ParseError::UnexpectedToken(token.value, "`;`"), token.span); - while let Some(t) = self.peek() { - if matches!(t.value, TokenValue::Semicolon) { - self.advance(1); - break; - } - if matches!(t.value, TokenValue::RBrace) { - break; - } - self.advance(1); - } - None - } - } - fn parse_var_decl_stmt(&mut self) -> Option { - assert!(self.peek().is_some()); - let mut values = vec![]; - let (var_type, name, span) = self.parse_type_and_name()?; - values.push(VarDeclStmtValue { name, var_type: var_type.into(), span }); - while let Some(t) = self.peek() { - if matches!(t.value, TokenValue::Semicolon) { - break; - } - if matches!(t.value, TokenValue::Comma) { - self.advance(1); - if let Some(ident) = self.peek().map(|t| t.value.as_ident()).flatten() { - let span = self.peek().unwrap().span; - self.advance(1); - values.push(VarDeclStmtValue { name: ident, var_type: var_type.into(), span }); - } else { - let token = self.next().unwrap().clone(); - self.diagnostics.add_from_frontend_error( - ParseError::CantCombineWith(TokenValue::TypeIdent(var_type)), - token.span, - ); - return None; - } - continue; - } + if self.peek().unwrap().value != TokenValue::LParen { let token = self.next().unwrap().clone(); self.diagnostics.add_from_frontend_error( - ParseError::UnexpectedToken(token.value, "variable declaration"), + ParseError::UnexpectedToken(token.value, "`(`"), token.span, ); + return Err(ParseProcessError::ErrorInMatch); } - self.must_match_semicolon()?; - let span = Span::from_two(values.first().unwrap().span, values.last().unwrap().span); - Some(VarDeclStmt { values, span }) + self.advance(1); + let mut params = vec![]; + let mut last_is_var = false; + while self.peek().is_some() { + if self.peek().map(|t| &t.value) == Some(&TokenValue::RParen) { + self.advance(1); + break; + } + if last_is_var { + self.must_match_token(&TokenValue::Comma, "`,` or `)`") + .inspect_err(|_| self.until_next_token(&[TokenValue::RBrace]))?; + } + match self.parse_param() { + Ok(param) => { + params.push(param); + last_is_var = true; + } + Err(_e) => { + self.until_next_token(&[TokenValue::RParen]); + if self.last().map(|t| &t.value) == Some(&TokenValue::RParen) { + break; + } + } + } + } + Ok(params) + } + fn parse_param(&mut self) -> Result { + assert!(self.peek().is_some()); + let (param_type, name, type_span, name_span) = self.parse_type_and_name(ParseType::MustParse)?; + Ok(Param { + param_type: param_type.into(), + name, + name_span, + type_span, + }) + } + // fn must_match_semicolon(&mut self) -> Option<()> { + // if self + // .peek() + // .is_some_and(|t| matches!(t.value, TokenValue::Semicolon)) + // { + // self.advance(1); + // Some(()) + // } else { + // let token = self.next().unwrap().clone(); + // self.diagnostics + // .add_from_frontend_error(ParseError::UnexpectedToken(token.value, "`;`"), token.span); + // while let Some(t) = self.peek() { + // if matches!(t.value, TokenValue::Semicolon) { + // self.advance(1); + // break; + // } + // if matches!(t.value, TokenValue::RBrace) { + // break; + // } + // self.advance(1); + // } + // None + // } + // } + fn parse_var_decl_stmt(&mut self, parse_type: ParseType) -> Result { + assert!(self.peek().is_some()); + let mut values = vec![]; + let (var_type, name, type_span, name_span) = match self.parse_type_and_name(parse_type) { + Ok(res) => res, + Err(_e) => { + if matches!(parse_type, ParseType::TryParse) { + return Err(ParseProcessError::TryNext); + } else { + return Err(ParseProcessError::ErrorInMatch); + } + } + }; + values.push(VarDeclStmtValue { name, name_span }); + let mut last_name = true; // indicate whether the last parsed token is a variable name + while let Some(t) = self.peek() { + if matches!(t.value, TokenValue::Semicolon) { // statement end + break; + } + if last_name { // expect a comma after a variable name + self.must_match_token(&TokenValue::Comma, "`,` or `;`")?; + last_name = false; + } + // check eof again + if self.peek().is_none() { + let span = self.last().unwrap().span; + self.diagnostics + .add_from_frontend_error(ParseError::ExpectButEof("`,` or `;`"), span); + break; + } + if let Some(ident) = self.peek().unwrap().value.as_ident() { + let span = self.next().unwrap().span; + values.push(VarDeclStmtValue { name: ident, name_span: span }); + last_name = true; + } else { + let token = self.next().unwrap().clone(); + self.diagnostics.add_from_frontend_error( + ParseError::CantCombineWith(TokenValue::TypeIdent(var_type)), + token.span, + ); + return Err(ParseProcessError::ErrorInMatch); + } + } + self.must_match_token(&TokenValue::Semicolon, "`;`")?; + Ok(VarDeclStmt { values, type_span, data_type: var_type.into() }) } - fn parse_block_stmt(&mut self, parse_type: ParseType) -> Option { + fn parse_block_stmt(&mut self, parse_type: ParseType) -> Result { assert!(self.peek().is_some()); - let start_span = self.peek().unwrap().span; - if !self + if self .peek() - .map(|t| matches!(t.value, TokenValue::LBrace)) - .unwrap_or(false) + .unwrap().value != TokenValue::LBrace { if parse_type == ParseType::MustParse { let token = self.next().unwrap().clone(); @@ -250,24 +346,26 @@ impl Parser { ParseError::UnexpectedToken(token.value, "`{`"), token.span, ); + return Err(ParseProcessError::ErrorInMatch); } - return None; + return Err(ParseProcessError::TryNext); } self.advance(1); let mut statements = vec![]; // println!("parse block stmt"); loop { if self.peek().is_none() { - let span = self.next().unwrap().span; + let span = self.last().unwrap().span; self.diagnostics .add_from_frontend_error(ParseError::ExpectButEof("`}`"), span); - return None; + return Err(ParseProcessError::ErrorInMatch); } if self .peek() .map(|t| matches!(t.value, TokenValue::Semicolon)) .unwrap() { + // like a();; self.advance(1); continue; } @@ -280,59 +378,77 @@ impl Parser { break; } // parse statement here - statements.push(self.parse_stmt()?); + match self.parse_stmt() { + Ok(stmt) => statements.push(stmt), + Err(_) => { + self.until_next_token(&[TokenValue::Semicolon, TokenValue::RBrace]); + if self.last().unwrap().value == TokenValue::RBrace { + break; + } + } + } } // println!("finish parse block stmt"); - let end_span = statements.last().map(|s| s.span()).unwrap_or(start_span); - Some(BlockStmt { - statements, - span: Span::from_two(start_span, end_span), + Ok(BlockStmt { + statements }) } - fn parse_stmt(&mut self) -> Option { + fn parse_stmt(&mut self) -> Result { assert!(self.peek().is_some()); - if let Some(var_decl) = self.parse_var_decl_stmt() { - return Some(Statement::VarDecl(var_decl)); + match self.parse_var_decl_stmt(ParseType::TryParse) { + Ok(var_decl) => return Ok(Statement::VarDecl(var_decl)), + Err(ParseProcessError::ErrorInMatch) => return Err(ParseProcessError::ErrorInMatch), + Err(_) => {}, + }; + match self.parse_return_stmt() { + Ok(return_stmt) => return Ok(Statement::Return(return_stmt)), + Err(ParseProcessError::ErrorInMatch) => return Err(ParseProcessError::ErrorInMatch), + Err(_) => {}, + }; + match self.parse_if_stmt() { + Ok(if_stmt) => return Ok(Statement::If(if_stmt)), + Err(ParseProcessError::ErrorInMatch) => return Err(ParseProcessError::ErrorInMatch), + Err(_) => {}, } - if let Some(return_stmt) = self.parse_return_stmt() { - return Some(Statement::Return(return_stmt)); + match self.parse_while_stmt() { + Ok(while_stmt) => return Ok(Statement::While(while_stmt)), + Err(ParseProcessError::ErrorInMatch) => return Err(ParseProcessError::ErrorInMatch), + Err(_) => {}, } - if let Some(block_stmt) = self.parse_block_stmt(ParseType::TryParse) { - return Some(Statement::Block(block_stmt)); + match self.parse_break_stmt() { + Ok(break_stmt) => return Ok(Statement::Break(break_stmt)), + Err(ParseProcessError::ErrorInMatch) => return Err(ParseProcessError::ErrorInMatch), + Err(_) => {}, } - if let Some(expr) = self.parse_expr() { - return Some(Statement::Expr(expr)); + match self.parse_continue_stmt() { + Ok(continue_stmt) => return Ok(Statement::Continue(continue_stmt)), + Err(ParseProcessError::ErrorInMatch) => return Err(ParseProcessError::ErrorInMatch), + Err(_) => {}, } - let token = self.next().unwrap().clone(); - self.diagnostics.add_from_frontend_error( - ParseError::UnexpectedToken(token.value, "statement"), - token.span, - ); - while let Some(t) = self.peek() { - if matches!(t.value, TokenValue::Semicolon) { - self.advance(1); - break; - } - if matches!(t.value, TokenValue::RBrace) { - break; - } - self.advance(1); + match self.parse_block_stmt(ParseType::TryParse) { + Ok(block_stmt) => return Ok(Statement::Block(block_stmt)), + Err(ParseProcessError::ErrorInMatch) => return Err(ParseProcessError::ErrorInMatch), + Err(_) => {}, } - None + match self.parse_expr() { + Ok(expr) => { + self.must_match_token(&TokenValue::Semicolon, "`;`")?; + return Ok(Statement::Expr(expr)) + }, + Err(ParseProcessError::ErrorInMatch) => return Err(ParseProcessError::ErrorInMatch), + Err(_) => {}, + } + self.until_next_token(&[TokenValue::Semicolon]); + Err(ParseProcessError::ErrorInMatch) } - fn parse_return_stmt(&mut self) -> Option { + fn parse_return_stmt(&mut self) -> Result { assert!(self.peek().is_some()); - let start_span = self.peek().unwrap().span; - if !self - .peek() - .map(|t| matches!(t.value, TokenValue::Return)) - .unwrap_or(false) - { - return None; + if self.peek().unwrap().value != TokenValue::Return { + return Err(ParseProcessError::TryNext); } - self.advance(1); + let span = self.next().unwrap().span; let value = if self .peek() .map(|t| matches!(t.value, TokenValue::Semicolon)) @@ -342,13 +458,131 @@ impl Parser { } else { Some(self.parse_expr()?) }; - self.must_match_semicolon()?; - let end_span = self.peek().unwrap().span; - Some(ReturnStmt { + self.must_match_token(&TokenValue::Semicolon, "`;`")?; + Ok(ReturnStmt { value, - span: Span::from_two(start_span, end_span), + span, }) } + fn parse_if_stmt(&mut self) -> Result { + assert!(self.peek().is_some()); + if self.peek().unwrap().value != TokenValue::If { + return Err(ParseProcessError::TryNext); + } + self.advance(1); + self.must_match_token(&TokenValue::LParen, "`(`")?; + self.must_have_some("if condition expr")?; + let condition = self.parse_expr()?; + self.must_match_token(&TokenValue::RParen, "`)`")?; + let then_branch; + let mut ifelse_branch = vec![]; + let mut else_branch = None; + self.must_have_some("if statement body")?; + if self.peek().unwrap().value != TokenValue::LBrace { + let stmt = self.parse_stmt()?; + then_branch = BlockStmt { statements: vec![stmt] }; + } else { + then_branch = self.parse_block_stmt(ParseType::MustParse)?; + } + loop { + if self.peek().is_none() { + break; + } + if self.peek().unwrap().value == TokenValue::Else { + self.advance(1); + self.must_have_some("else body")?; + if self.peek().unwrap().value != TokenValue::If { + self.back(1); + break; + } + // else if + self.advance(1); + self.must_match_token(&TokenValue::LParen, "`(`")?; + self.must_have_some("else if condition expr")?; + let condition = self.parse_expr()?; + self.must_match_token(&TokenValue::RParen, "`)`")?; + let then_branch; + if self.peek().unwrap().value != TokenValue::LBrace { + let stmt = self.parse_stmt()?; + then_branch = BlockStmt { statements: vec![stmt] }; + } else { + then_branch = self.parse_block_stmt(ParseType::MustParse)?; + } + ifelse_branch.push(IfElseBranch { condition, then_branch }); + } else { + // if end ? + break; + } + } + if self.peek().is_some_and(|t| t.value == TokenValue::Else) { + // Parse else branch + self.advance(1); + self.must_have_some("else body")?; + if self.peek().unwrap().value != TokenValue::LBrace { + let stmt = self.parse_stmt()?; + else_branch = Some(BlockStmt { statements: vec![stmt] }); + } else { + else_branch = Some(self.parse_block_stmt(ParseType::MustParse)?); + } + } + Ok(IfStmt { + condition, + then_branch, + ifelse_branch, + else_branch, + }) + } + + fn parse_while_stmt(&mut self) -> Result { + assert!(self.peek().is_some()); + if self.peek().unwrap().value != TokenValue::While { + return Err(ParseProcessError::TryNext); + } + self.advance(1); + self.must_match_token(&TokenValue::LParen, "`(`")?; + self.must_have_some("while condition expr")?; + let condition = self.parse_expr()?; + self.must_match_token(&TokenValue::RParen, "`)`")?; + let body; + if self.peek().unwrap().value != TokenValue::LBrace { + let stmt = self.parse_stmt()?; + body = BlockStmt { statements: vec![stmt] }; + } else { + body = self.parse_block_stmt(ParseType::MustParse)?; + } + Ok(WhileStmt { + condition, + body, + }) + } + + fn parse_break_stmt(&mut self) -> Result { + assert!(self.peek().is_some()); + let start_span = self.peek().unwrap().span; + if self.peek().unwrap().value == TokenValue::Break { + self.advance(1); + self.must_match_token(&TokenValue::Semicolon, "`;`")?; + Ok(BreakStmt { + span: start_span, + }) + } else { + Err(ParseProcessError::TryNext) + } + } + + fn parse_continue_stmt(&mut self) -> Result { + assert!(self.peek().is_some()); + let start_span = self.peek().unwrap().span; + if self.peek().unwrap().value == TokenValue::Continue { + self.advance(1); + self.must_match_token(&TokenValue::Semicolon, "`;`")?; + Ok(ContinueStmt { + span: start_span, + }) + } else { + Err(ParseProcessError::TryNext) + } + } // fn parse_expr_tail(&mut self, left: Expr) -> Option { // match self.peek() { // None => Some(left), @@ -364,7 +598,7 @@ impl Parser { // } // } // } - fn parse_primary(&mut self) -> Option { + fn parse_primary(&mut self) -> Result { assert!(self.peek().is_some()); let token = self.next().unwrap().clone(); match token.value { @@ -380,7 +614,7 @@ impl Parser { Some(t) if matches!(t.value, TokenValue::RParen) => { let end_span = t.span; self.advance(1); - return Some(Expr { + return Ok(Expr { value: ExprValue::FuncCall(name, args), span: Span::from_two(token.span, end_span), }); @@ -391,7 +625,7 @@ impl Parser { ParseError::ExpectButEof("`)`"), token.span, ); - return None; + return Err(ParseProcessError::ErrorInMatch); } } args.push(self.parse_expr()?); @@ -402,7 +636,7 @@ impl Parser { Some(t) if matches!(t.value, TokenValue::RParen) => { let end_span = t.span; self.advance(1); - return Some(Expr { + return Ok(Expr { value: ExprValue::FuncCall(name, args), span: Span::from_two(token.span, end_span), }); @@ -413,24 +647,24 @@ impl Parser { ParseError::UnexpectedToken(token.value, "`,` or `)`"), token.span, ); - return None; + return Err(ParseProcessError::ErrorInMatch); } None => { self.diagnostics.add_from_frontend_error( ParseError::ExpectButEof("`)`"), token.span, ); - return None; + return Err(ParseProcessError::ErrorInMatch); } } } } - Some(Expr { + Ok(Expr { value: ExprValue::Var(name), span: token.span, }) }, - TokenValue::IntLit(value) => Some(Expr { + TokenValue::IntLit(value) => Ok(Expr { value: ExprValue::IntLit(value), span: token.span, }), @@ -442,14 +676,14 @@ impl Parser { ParseError::ExpectButEof("expression"), token.span, ); - return None; + return Err(ParseProcessError::ErrorInMatch); } }; match self.peek() { Some(t) if matches!(t.value, TokenValue::RParen) => { let end_span = t.span; self.advance(1); - Some(Expr { + Ok(Expr { span: Span::from_two(token.span, end_span), ..expr }) @@ -460,12 +694,12 @@ impl Parser { ParseError::UnexpectedToken(token.value, "`)`"), token.span, ); - None + Err(ParseProcessError::ErrorInMatch) } None => { self.diagnostics .add_from_frontend_error(ParseError::ExpectButEof("`)`"), expr.span); - None + Err(ParseProcessError::ErrorInMatch) } } } @@ -474,11 +708,11 @@ impl Parser { ParseError::UnexpectedToken(token.value, "expression"), token.span, ); - None + Err(ParseProcessError::ErrorInMatch) } } } - fn parse_unary(&mut self) -> Option { + fn parse_unary(&mut self) -> Result { assert!(self.peek().is_some()); let token = self.peek().unwrap().clone(); match token.value { @@ -491,12 +725,16 @@ impl Parser { ParseError::ExpectButEof("expression"), token.span, ); - return None; + return Err(ParseProcessError::ErrorInMatch); } }; - Some(Expr { - span: Span::from_two(token.span, expr.span), - ..expr + let span = Span::from_two(token.span, expr.span); + Ok(Expr { + value: ExprValue::UnaryOp { + op: UnaryOp::Add, + operand: Box::new(expr), + }, + span, }) } TokenValue::Minus => { @@ -508,19 +746,35 @@ impl Parser { ParseError::ExpectButEof("expression"), token.span, ); - return None; + return Err(ParseProcessError::ErrorInMatch); } }; - let lhs = Expr { - value: ExprValue::IntLit(0), - span: token.span, + let span = Span::from_two(token.span, rhs.span); + Ok(Expr { + value: ExprValue::UnaryOp { + op: UnaryOp::Sub, + operand: Box::new(rhs), + }, + span, + }) + } + TokenValue::Not => { + self.advance(1); + let rhs = match self.peek() { + Some(_) => self.parse_unary()?, + None => { + self.diagnostics.add_from_frontend_error( + ParseError::ExpectButEof("expression"), + token.span, + ); + return Err(ParseProcessError::ErrorInMatch); + } }; let span = Span::from_two(token.span, rhs.span); - Some(Expr { - value: ExprValue::BinaryOp { - lhs: Box::new(lhs), - op: BinaryOp::Sub, - rhs: Box::new(rhs), + Ok(Expr { + value: ExprValue::UnaryOp { + op: UnaryOp::Not, + operand: Box::new(rhs), }, span, }) @@ -528,7 +782,7 @@ impl Parser { _ => self.parse_primary(), } } - fn parse_multiplicative(&mut self) -> Option { + fn parse_multiplicative(&mut self) -> Result { assert!(self.peek().is_some()); let mut left = self.parse_unary()?; while let Some(t) = self.peek() { @@ -544,7 +798,7 @@ impl Parser { None => { self.diagnostics .add_from_frontend_error(ParseError::ExpectButEof("expression"), left.span); - return None; + return Err(ParseProcessError::ErrorInMatch); } }; let span = Span::from_two(left.span, right.span); @@ -557,9 +811,9 @@ impl Parser { span, }; } - Some(left) + Ok(left) } - fn parse_additive(&mut self) -> Option { + fn parse_additive(&mut self) -> Result { assert!(self.peek().is_some()); let mut left = self.parse_multiplicative()?; while let Some(t) = self.peek() { @@ -574,7 +828,7 @@ impl Parser { None => { self.diagnostics .add_from_frontend_error(ParseError::ExpectButEof("expression"), left.span); - return None; + return Err(ParseProcessError::ErrorInMatch); } }; let span = Span::from_two(left.span, right.span); @@ -587,9 +841,9 @@ impl Parser { span, }; } - Some(left) + Ok(left) } - fn parse_relational(&mut self) -> Option { + fn parse_relational(&mut self) -> Result { assert!(self.peek().is_some()); let mut left = self.parse_additive()?; while let Some(t) = self.peek() { @@ -608,7 +862,7 @@ impl Parser { None => { self.diagnostics .add_from_frontend_error(ParseError::ExpectButEof("expression"), left.span); - return None; + return Err(ParseProcessError::ErrorInMatch); } }; let span = Span::from_two(left.span, right.span); @@ -621,9 +875,39 @@ impl Parser { span, }; } - Some(left) + Ok(left) } - fn parse_assign(&mut self) -> Option { + fn parse_logical(&mut self) -> Result { + assert!(self.peek().is_some()); + let mut left = self.parse_relational()?; + while let Some(t) = self.peek() { + let op = match t.value { + TokenValue::Or => BinaryOp::Or, + TokenValue::And => BinaryOp::And, + _ => break, + }; + self.advance(1); + let right = match self.peek() { + Some(_) => self.parse_relational()?, + None => { + self.diagnostics + .add_from_frontend_error(ParseError::ExpectButEof("expression"), left.span); + return Err(ParseProcessError::ErrorInMatch); + } + }; + let span = Span::from_two(left.span, right.span); + left = Expr { + value: ExprValue::BinaryOp { + lhs: Box::new(left), + op, + rhs: Box::new(right), + }, + span, + }; + } + Ok(left) + } + fn parse_assign(&mut self) -> Result { assert!(self.peek().is_some()); let is_assign = matches!( (self.tokens.get(self.pos), self.tokens.get(self.pos + 1)), @@ -639,7 +923,7 @@ impl Parser { ) ); if !is_assign { - return self.parse_relational(); + return self.parse_logical(); } let lvalue_token = self.next().unwrap().clone(); @@ -652,7 +936,7 @@ impl Parser { ParseError::ExpectButEof("expression"), lvalue_token.span, ); - return None; + return Err(ParseProcessError::ErrorInMatch); } }; let lvalue = Expr { @@ -660,7 +944,7 @@ impl Parser { span: lvalue_token.span, }; let span = Span::from_two(lvalue.span, rvalue.span); - Some(Expr { + Ok(Expr { value: ExprValue::Assign { lvalue: Box::new(lvalue), rvalue: Box::new(rvalue), @@ -673,15 +957,20 @@ impl Parser { := assign assign - := relational + := logical | IDENT "=" assign - + logical + := relational + | logical "||" relational + | logical "&&" relational relational := additive | relational "<" additive | relational ">" additive | relational "<=" additive | relational ">=" additive + | relational "==" additive + | relational "!=" additive additive := multiplicative @@ -703,7 +992,7 @@ impl Parser { | NUMBER | "(" expr ")" */ - fn parse_expr(&mut self) -> Option { + fn parse_expr(&mut self) -> Result { assert!(self.peek().is_some()); self.parse_assign() } @@ -769,4 +1058,14 @@ mod tests { test_case("0-3,14-25"); // test_case("0-3,14-25"); } + + #[test] + fn test_if_while() { + test_case("26-32,34-41,46-51,57"); + } + + #[test] + fn test_error() { + test_case("999"); + } } diff --git a/src/frontend/types.rs b/src/frontend/types.rs index dff97d8..1ad2ea8 100644 --- a/src/frontend/types.rs +++ b/src/frontend/types.rs @@ -17,7 +17,7 @@ pub enum TokenValue { TypeIdent(TypeIdent), Plus, Minus, Star, Slash, Percent, - Equal, DoubleEqual, Not, NotEqual, Less, LessEqual, Greater, GreaterEqual, + Equal, DoubleEqual, Not, NotEqual, Less, LessEqual, Greater, GreaterEqual, And, Or, LParen, RParen, LBrace, RBrace, @@ -58,6 +58,8 @@ impl std::fmt::Display for TokenValue { TokenValue::Equal => write!(f, "`=`"), TokenValue::DoubleEqual => write!(f, "`==`"), TokenValue::Not => write!(f, "`!`"), + TokenValue::And => write!(f, "`&&`"), + TokenValue::Or => write!(f, "`||`"), TokenValue::NotEqual => write!(f, "`!=`"), TokenValue::Less => write!(f, "`<`"), TokenValue::LessEqual => write!(f, "`<=`"), diff --git a/src/ir/err.rs b/src/ir/err.rs index 71d9f02..3ae12eb 100644 --- a/src/ir/err.rs +++ b/src/ir/err.rs @@ -24,4 +24,12 @@ pub enum IRError { TypeMismatch(IRType, IRType), #[error("invalid assignment target")] InvalidAssignmentTarget, + #[error("break statement outside of loop")] + BreakOutsideLoop, + #[error("continue statement outside of loop")] + ContinueOutsideLoop, + #[error("invalid parameter type: {0}")] + InvalidParameterType(IRType), + #[error("return expression on void function")] + ReturnExpressionOnVoidFunction, } \ No newline at end of file diff --git a/src/ir/generator.rs b/src/ir/generator.rs index f931826..41404df 100644 --- a/src/ir/generator.rs +++ b/src/ir/generator.rs @@ -1,12 +1,22 @@ -use std::{collections::{BTreeMap, BTreeSet}, result, vec}; - -use crate::{ast::types::{BlockStmt, CompileUnit, Expr, FuncDeclStmt, GlobalDeclStmt, ReturnStmt, Statement, VarDeclStmt}, diagnostic::Diagnositics, ir::{err::IRError, types::{BinaryOp, Function, IRInstr, IRType, MoveRValue, Variable, VariableType}}}; +use std::{collections::{BTreeMap, BTreeSet}, vec}; +use crate::{ast::types::{BlockStmt, BreakStmt, CompileUnit, ContinueStmt, Expr, ExprValue, FuncDeclStmt, GlobalDeclStmt, IfElseBranch, IfStmt, ReturnStmt, Statement, VarDeclStmt, WhileStmt}, diagnostic::Diagnositics, ir::{err::IRError, types::{BinaryOp, CmpOp, Function, IRInstr, IRType, MoveRValue, UnaryOp, Variable, VariableOrIntLit, VariableType}}}; +use crate::ast::types::BinaryOp as AstBinaryOp; +use crate::ast::types::UnaryOp as AstUnaryOp; pub struct Generator { var_manager: VariableManager, function_map: BTreeMap, current_func_return_type: Option, diagnostic: Diagnositics, + current_exit_label: Vec>, // true exit, false exit + // About exit label passing: + // for non-logical parent expr, current_exit_label is None + // this means we need to create label for logical child expr, and calculate the value + // for logical parent expr, we can directly use the label for logical child expr, and no need to calculate + // if child expr isn't logical, we need to do cmp to decide which label to goto + while_exit_label: Vec<(usize, usize)>, // continue exit, break exit + func_exit: Option<(usize, Option)>, // (label, return_var) + label_counter: usize } impl Generator { @@ -17,14 +27,27 @@ impl Generator { parameter_types: vec![IRType::I32], return_type: IRType::Void, }); + function_map.insert("getint".to_string(), Function { + name: "getint".to_string(), + parameter_types: vec![], + return_type: IRType::I32, + }); Self { var_manager: VariableManager::new(), current_func_return_type: None, diagnostic: Diagnositics::new(), function_map, + current_exit_label: vec![], + while_exit_label: vec![], + func_exit: None, + label_counter: 0, } } - + fn request_label(&mut self) -> usize { + let label = self.label_counter; + self.label_counter += 1; + label + } pub fn emit(&mut self, compile_unit: CompileUnit) -> Vec { self.generate_compile_unit(compile_unit) } @@ -51,12 +74,12 @@ impl Generator { let mut instrs = vec![]; let var_type = if is_global { VariableType::Global } else { VariableType::Local }; for value in var_decl.values { - match self.var_manager.declare_variable(&value.name, var_type, value.var_type.into()) { + match self.var_manager.declare_variable(&value.name, var_type, var_decl.data_type.into()) { Ok(var) => { if is_global { instrs.push(IRInstr::Declare(var)); } } Err(e) => { - self.diagnostic.add_from_ir_error(e, value.span); + self.diagnostic.add_from_ir_error(e, value.name_span); } } } @@ -66,7 +89,7 @@ impl Generator { fn generate_func_decl(&mut self, func_decl: FuncDeclStmt) -> Vec { if self.function_map.contains_key(&func_decl.name) { - self.diagnostic.add_from_ir_error(IRError::FunctionHasBeenDefined(func_decl.name.clone()), func_decl.span); + self.diagnostic.add_from_ir_error(IRError::FunctionHasBeenDefined(func_decl.name.clone()), func_decl.name_span); return vec![]; } self.current_func_return_type = Some(func_decl.return_type.into()); @@ -75,7 +98,7 @@ impl Generator { match self.var_manager.declare_variable(¶m.name, VariableType::Local, param.param_type.into()) { Ok(var) => Ok(var), Err(e) => { - self.diagnostic.add_from_ir_error(e, param.span); + self.diagnostic.add_from_ir_error(e, param.name_span); Err(()) } } @@ -85,6 +108,14 @@ impl Generator { }; let temp_parameters = parameters.iter().map(|param| self.var_manager.declare_param_temp(param.data_type)).collect::>(); let mut body_instrs = vec![]; + self.func_exit = Some((self.request_label(), { + let ret_type = func_decl.return_type.into(); + if ret_type != IRType::Void { + Some(self.var_manager.declare_unamed_local(ret_type)) + } else { + None + } + })); let block_instrs = self.generate_block_stmt(func_decl.body); for var in self.var_manager.get_cur_func_variables() { if matches!(var.var_type, VariableType::ParamTemp) { @@ -97,7 +128,9 @@ impl Generator { body_instrs.push(IRInstr::Move(*temp_param, MoveRValue::Var(*param))); }); body_instrs.extend(block_instrs); - + let func_exit = self.func_exit.take().unwrap(); + body_instrs.push(IRInstr::Label(func_exit.0)); + body_instrs.push(IRInstr::Exit(func_exit.1)); self.var_manager.exit_scope(); self.current_func_return_type = None; self.var_manager.clear_local_counter(); @@ -121,13 +154,28 @@ impl Generator { use Statement::*; let instrs = match stmt { Return(return_stmt) => self.generate_return_stmt(return_stmt), + If(if_stmt) => self.generate_if_stmt(if_stmt), + While(while_stmt) => self.generate_while_stmt(while_stmt), + Break(break_stmt) => self.generate_break_stmt(break_stmt), + Continue(continue_stmt) => self.generate_continue_stmt(continue_stmt), Block(block_stmt) => { self.var_manager.enter_scope(); let block_instrs = self.generate_block_stmt(block_stmt); self.var_manager.exit_scope(); block_instrs }, - Expr(expr) => self.generate_expr(expr).0, + Expr(expr) => { + self.current_exit_label.push(None); + let (instrs, _) = match self.generate_expr(expr) { + Some(res) => res, + None => { + self.current_exit_label.pop(); + return vec![]; + } + }; + self.current_exit_label.pop(); + instrs + }, VarDecl(var_decl) => self.generate_var_decl(var_decl, false), }; instrs @@ -135,22 +183,160 @@ impl Generator { fn generate_return_stmt(&mut self, return_stmt: ReturnStmt) -> Vec { let mut instrs = vec![]; + let func_exit = self.func_exit.unwrap(); match return_stmt.value { Some(expr) => { - let (value_instrs, value_var) = self.generate_expr(expr); - instrs.extend(value_instrs); - if value_var.is_some() { - instrs.push(IRInstr::Exit(value_var)); + if func_exit.1.is_none() { + // shouldn't return value but return stmt has expr; + self.diagnostic.add_from_ir_error(IRError::ReturnExpressionOnVoidFunction, return_stmt.span); + return vec![]; } + self.current_exit_label.push(None); + let (value_instrs, value_var) = match self.generate_expr(expr) { + Some(res) => res, + None => { + self.current_exit_label.pop(); + return vec![]; + } + }; + self.current_exit_label.pop(); + if value_var.is_none() { + self.diagnostic.add_from_ir_error(IRError::InvalidOperand(IRType::Void), return_stmt.span); + self.current_exit_label.pop(); + return vec![]; + } + instrs.extend(value_instrs); + instrs.push(IRInstr::Move(func_exit.1.unwrap(), MoveRValue::Var(value_var.unwrap()))); + // if value_var.is_none() { + // self.diagnostic.add_from_ir_error(IRError::InvalidOperand(IRType::Void), return_stmt.span); + // return vec![]; + // } + // let value_var = value_var.unwrap(); } - None => instrs.push(IRInstr::Exit(None)), + None => { + if func_exit.1.is_some() { + // should return value but return stmt has no expr; + self.diagnostic.add_from_ir_error(IRError::TypeMismatch(self.current_func_return_type.unwrap(), IRType::Void), return_stmt.span); + } + }, } + instrs.push(IRInstr::Goto(func_exit.0)); instrs } + fn generate_while_stmt(&mut self, while_stmt: WhileStmt) -> Vec { + let mut instrs = vec![]; + let cond_label = self.request_label(); + let body_label = self.request_label(); + let exit_label = self.request_label(); + instrs.push(IRInstr::Label(cond_label)); + self.current_exit_label.push(Some((body_label, exit_label))); + let while_cond_span = while_stmt.condition.span; + let (cond_instrs, cond_var) = match self.generate_expr(while_stmt.condition) { + Some(res) => res, + None => { + self.current_exit_label.pop(); + return vec![]; + } + }; + self.current_exit_label.pop(); + if cond_var.is_none() { + self.diagnostic.add_from_ir_error(IRError::InvalidOperand(IRType::Void), while_cond_span); + return vec![]; + } + instrs.extend(cond_instrs); + instrs.push(IRInstr::Label(body_label)); + self.while_exit_label.push((cond_label, exit_label)); + instrs.extend(self.generate_block_stmt(while_stmt.body)); + self.while_exit_label.pop(); + instrs.push(IRInstr::Goto(cond_label)); + instrs.push(IRInstr::Label(exit_label)); + instrs + } + fn generate_if_stmt(&mut self, if_stmt: IfStmt) -> Vec { + let mut instrs = vec![]; + let then_label = self.request_label(); + let exit_label = self.request_label(); + let mut labels = vec![then_label]; + for _ in &if_stmt.ifelse_branch { + let else_if_label = self.request_label(); + let else_if_body_label = self.request_label(); - fn generate_expr(&mut self, expr: Expr) -> (Vec, Option) { - use crate::ast::types::ExprValue; - match expr.value { + labels.push(else_if_label); + labels.push(else_if_body_label); + } + if if_stmt.else_branch.is_some() { + let else_label = self.request_label(); + labels.push(else_label); + } + labels.push(exit_label); + // now generate if expr, true exit to labels[0], false exit to labels[1] + self.current_exit_label.push(Some((labels[0], labels[1]))); + let cond_span = if_stmt.condition.span; + let (cond_instrs, cond_var) = match self.generate_expr(if_stmt.condition) { + Some(res) => res, + None => { + self.current_exit_label.pop(); + return vec![]; + } + }; + self.current_exit_label.pop(); + if cond_var.is_none() { + self.diagnostic.add_from_ir_error(IRError::InvalidOperand(IRType::Void), cond_span); + return vec![]; + } + instrs.extend(cond_instrs); + instrs.push(IRInstr::Label(labels[0])); + instrs.extend(self.generate_block_stmt(if_stmt.then_branch)); + instrs.push(IRInstr::Goto(exit_label)); + for (i, else_if_branch) in if_stmt.ifelse_branch.into_iter().enumerate() { + let IfElseBranch { condition: else_if_cond, then_branch: else_if_block } = else_if_branch; + instrs.push(IRInstr::Label(labels[i * 2 + 1])); + self.current_exit_label.push(Some((labels[i * 2 + 2], labels[i * 2 + 3]))); + let else_if_cond_span = else_if_cond.span; + let (else_if_cond_instrs, else_if_cond_var) = match self.generate_expr(else_if_cond) { + Some(res) => res, + None => { + self.current_exit_label.pop(); + continue; + } + }; + self.current_exit_label.pop(); + if else_if_cond_var.is_none() { + self.diagnostic.add_from_ir_error(IRError::InvalidOperand(IRType::Void), else_if_cond_span); + return vec![]; + } + instrs.extend(else_if_cond_instrs); + instrs.push(IRInstr::Label(labels[i * 2 + 2])); + instrs.extend(self.generate_block_stmt(else_if_block)); + instrs.push(IRInstr::Goto(exit_label)); + } + if let Some(else_block) = if_stmt.else_branch { + instrs.push(IRInstr::Label(labels[labels.len() - 2])); + instrs.extend(self.generate_block_stmt(else_block)); + instrs.push(IRInstr::Goto(exit_label)); + } + instrs.push(IRInstr::Label(exit_label)); + instrs + } + fn generate_continue_stmt(&mut self, stmt: ContinueStmt) -> Vec { + if let Some((continue_label, _)) = self.while_exit_label.last() { + vec![IRInstr::Goto(*continue_label)] + } else { + self.diagnostic.add_from_ir_error(IRError::ContinueOutsideLoop, stmt.span); + vec![] + } + } + fn generate_break_stmt(&mut self, stmt: BreakStmt) -> Vec { + if let Some((_, break_label)) = self.while_exit_label.last() { + vec![IRInstr::Goto(*break_label)] + } else { + self.diagnostic.add_from_ir_error(IRError::BreakOutsideLoop, stmt.span); + vec![] + } + } + fn generate_expr(&mut self, expr: Expr) -> Option<(Vec, Option)> { + // there may be some expr that doesn't produce value, like void func call + let (mut instrs, var) = match expr.value { ExprValue::IntLit(i) => { // TODO: convert check let var = self.var_manager.declare_temp(IRType::I32); @@ -161,7 +347,7 @@ impl Generator { (vec![], Some(var)) } else { self.diagnostic.add_from_ir_error(IRError::VariableNotFound(name.clone()), expr.span); - (vec![], None) + return None; } }, ExprValue::Assign { lvalue, rvalue } => { @@ -171,69 +357,249 @@ impl Generator { Some(var) => var, None => { self.diagnostic.add_from_ir_error(IRError::VariableNotFound(name.clone()), lvalue.span); - return (vec![], None); + return None; } }; - let (mut instrs, rvalue_var) = self.generate_expr(*rvalue); + self.current_exit_label.push(None); + let rvalue_span = rvalue.span; + let (mut instrs, rvalue_var) = self.generate_expr(*rvalue)?; + self.current_exit_label.pop(); + // TODO: further check + // if var.data_type != rvalue_var.data_type { + // self.diagnostic.add_from_ir_error(IRError::TypeMismatch(var.data_type, rvalue_var.data_type), lvalue.span); + // return (vec![], None); + // } if rvalue_var.is_none() { - return (vec![], None); + self.diagnostic.add_from_ir_error(IRError::InvalidOperand(IRType::Void), rvalue_span); + return None; } - let rvalue_var = rvalue_var.unwrap(); - if var.data_type != rvalue_var.data_type { - self.diagnostic.add_from_ir_error(IRError::TypeMismatch(var.data_type, rvalue_var.data_type), lvalue.span); - return (vec![], None); - } - instrs.push(IRInstr::Move(var, MoveRValue::Var(rvalue_var))); + instrs.push(IRInstr::Move(var, MoveRValue::Var(rvalue_var.unwrap()))); let temp_var = self.var_manager.declare_temp(var.data_type); instrs.push(IRInstr::Move(temp_var, MoveRValue::Var(var))); (instrs, Some(temp_var)) } else { self.diagnostic.add_from_ir_error(IRError::InvalidAssignmentTarget, lvalue.span); - return (vec![], None); + return None; } }, + ExprValue::UnaryOp { op, operand } => { + let mut parent_is_logical = true; + let exit_passdown = if matches!(op, AstUnaryOp::Not) { + Some(self.current_exit_label.last().cloned().flatten().map_or_else(|| { + parent_is_logical = false; + (self.request_label(), self.request_label()) + }, |(true_exit, false_exit)| (false_exit, true_exit))) + } else { + None + }; + // NOT: + // $x = cmp eq left_var, 0 + // bc, $x, true_exit, false_exit + // false_exit: #only when parent isn't logical + // true_exit: + // $dest = $x + + // +--------+-------------------------+-------------------------+----------------------------+ + // | parent | (true_exit, false_exit) | (true_exit, false_exit) | None | + // | self | not | add/sub | not(true_exit, false_exit) | + // | child | (false_exit, true_exit) | None | (true_exit, false_exit) | + // +--------+-------------------------+-------------------------+----------------------------+ + + + self.current_exit_label.push(exit_passdown); + let operand_span = operand.span; + let (mut instrs, operand_var) = self.generate_expr(*operand)?; + self.current_exit_label.pop(); + if operand_var.is_none() { + self.diagnostic.add_from_ir_error(IRError::InvalidOperand(IRType::Void), operand_span); + return None; + } + let operand_var = operand_var.unwrap(); + let dest_var = match op { + AstUnaryOp::Add => { + let dest_var = self.var_manager.declare_temp(operand_var.data_type); + instrs.push(IRInstr::Move(dest_var, MoveRValue::Var(operand_var))); + dest_var + }, + AstUnaryOp::Sub => { + let dest_var = self.var_manager.declare_temp(operand_var.data_type); + instrs.push(IRInstr::Unary(dest_var, UnaryOp::Neg, operand_var)); + dest_var + }, + AstUnaryOp::Not => { + let dest_var = self.var_manager.declare_unamed_local(operand_var.data_type); + // child will do the cmp + if !parent_is_logical { + let exit = exit_passdown.unwrap(); // (false_exit, true_exit) (consider `not`) + let final_exit = self.request_label(); + instrs.push(IRInstr::Label(exit.1)); + instrs.push(IRInstr::Move(dest_var, MoveRValue::ConstInt(1))); + instrs.push(IRInstr::Goto(final_exit)); + instrs.push(IRInstr::Label(exit.0)); + instrs.push(IRInstr::Move(dest_var, MoveRValue::ConstInt(0))); + instrs.push(IRInstr::Goto(final_exit)); + instrs.push(IRInstr::Label(final_exit)); + + } + // return directly since it's logical expr + return Some((instrs, Some(dest_var))); + } + }; + (instrs, Some(dest_var)) + }, ExprValue::BinaryOp { lhs, op, rhs } => { let lhs_span = lhs.span; let rhs_span = rhs.span; - let (mut instrs, left_var) = self.generate_expr(*lhs); + + // +--------+-------------------------+-------------------------+-------------------------+ + // | parent | (true_exit, false_exit) | (true_exit, false_exit) | (true_exit, false_exit) | + // | self | and(next) | or(next) | others | + // | lhs | (next, false_exit) | (true_exit, next) | None | + // | rhs | (true_exit, false_exit) | (next, false_exit) | None | + // +--------+-------------------------+-------------------------+-------------------------+ + + + // +--------+----------------------------------+-------------------------------+--------+ + // | parent | None | None | None | + // | self | and(true_exit, next, false_exit) | or(true_exit,next,false_exit) | others | + // | lhs | (next, false_exit) | (true_exit, next) | None | + // | rhs | (true_exit, false_exit) | (next, false_exit) | None | + // +--------+----------------------------------+-------------------------------+--------+ + + + let mut parent_exit = None; + let exit = if op.is_logical() { + Some(self.current_exit_label.last().cloned().flatten().map_or( + (self.request_label(), self.request_label(), self.request_label()), + |(true_exit, false_exit)| { + parent_exit = Some((true_exit, false_exit)); + (true_exit, self.request_label(), false_exit) + } + )) + } else { + None + }; // (true_exit, next_label, false_exit) + let lhs_exit_passdown = exit.map(|(true_exit, next_label, false_exit)| match op { + AstBinaryOp::And => (next_label, false_exit), + AstBinaryOp::Or => (true_exit, next_label), + _ => unreachable!(), + }); + let rhs_exit_passdown = exit.map(|(true_exit, _, false_exit)| match op { + AstBinaryOp::And => (true_exit, false_exit), + AstBinaryOp::Or => (true_exit, false_exit), + _ => unreachable!(), + }); + self.current_exit_label.push(lhs_exit_passdown); + let (mut instrs, left_var) = self.generate_expr(*lhs)?; + self.current_exit_label.pop(); if left_var.is_none() { - return (vec![], None); - } - let left_var = left_var.unwrap(); - let (right_instrs, right_var) = self.generate_expr(*rhs); - if right_var.is_none() { - return (vec![], None); - } - let right_var = right_var.unwrap(); - instrs.extend(right_instrs); - let mut has_void = false; - if matches!(left_var.data_type, IRType::Void) { self.diagnostic.add_from_ir_error(IRError::InvalidOperand(IRType::Void), lhs_span); - has_void = true; + return None; } - if matches!(right_var.data_type, IRType::Void) { + let mut left_var = left_var.unwrap(); + self.current_exit_label.push(rhs_exit_passdown); + let (right_instrs, right_var) = self.generate_expr(*rhs)?; + self.current_exit_label.pop(); + if right_var.is_none() { self.diagnostic.add_from_ir_error(IRError::InvalidOperand(IRType::Void), rhs_span); - has_void = true; + return None; } - if has_void { - return (vec![], None); + let mut right_var = right_var.unwrap(); + // check implicit convert + let convert_to; + if op.is_logical() { + // we dont really care since we use cmp + convert_to = left_var.data_type; + } else { + if let Some(ty) = IRType::get_elevate_result(left_var.data_type, right_var.data_type) { + convert_to = ty; + } else { + self.diagnostic.add_from_ir_error(IRError::IncompatiableOperand(left_var.data_type, right_var.data_type), lhs_span); + self.diagnostic.add_from_ir_error(IRError::IncompatiableOperand(left_var.data_type, right_var.data_type), rhs_span); + return None; + } } - if left_var.data_type != right_var.data_type { - self.diagnostic.add_from_ir_error(IRError::IncompatiableOperand(left_var.data_type, right_var.data_type), lhs_span); - self.diagnostic.add_from_ir_error(IRError::InvalidOperand(left_var.data_type), rhs_span); - return (vec![], None); + // do implicit convert if needed + // TODO: further check + if !op.is_logical() && convert_to != left_var.data_type { + let temp_var = self.var_manager.declare_temp(convert_to); + instrs.push(IRInstr::Move(temp_var, MoveRValue::Var(left_var))); + left_var = temp_var; } let result_type; - match Into::::into(op) { - BinaryOp::Add | BinaryOp::Sub | BinaryOp::Mul | BinaryOp::Div | BinaryOp::Mod => { + match op { + AstBinaryOp::Add | AstBinaryOp::Sub | AstBinaryOp::Mul | AstBinaryOp::Div | AstBinaryOp::Mod => { result_type = left_var.data_type; }, - BinaryOp::Eq | BinaryOp::Ne | BinaryOp::Lt | BinaryOp::Gt | BinaryOp::Le | BinaryOp::Ge => { + AstBinaryOp::Equal | AstBinaryOp::NotEqual | AstBinaryOp::Less | AstBinaryOp::Greater | AstBinaryOp::LessEqual | AstBinaryOp::GreaterEqual + | AstBinaryOp::And | AstBinaryOp::Or => { result_type = IRType::I1; } } let dest_var = self.var_manager.declare_temp(result_type); - instrs.push(IRInstr::Binary(dest_var, left_var, op.into(), right_var)); + match op { + AstBinaryOp::And | AstBinaryOp::Or => { + instrs.push(IRInstr::Label(exit.unwrap().1)); + }, + _ => {} + } + // for logical operator, considering short-circuit + // AND: + // $x = cmp ne left_var, 0 + // bc, $x, next, false_exit + // next: + // $x = cmp eq right_var, 0 + // bc, $x, true_exit, false_exit + // false_exit: #only when parent isn't logical + // true_exit: + // OR: + // $x = cmp ne left_var, 0 + // bc, $x, true_exit, next + // next: + // $x = cmp ne right_var, 0 + // bc, $x, true_exit, false_exit + // false_exit: #only when parent isn't logical + // true_exit: + instrs.extend(right_instrs); + if !op.is_logical() && convert_to != right_var.data_type { + let temp_var = self.var_manager.declare_temp(convert_to); + instrs.push(IRInstr::Move(temp_var, MoveRValue::Var(right_var))); + right_var = temp_var; + } + if !op.is_logical() && let Some((true_exit, false_exit)) = parent_exit { + let final_exit = self.request_label(); + instrs.push(IRInstr::Label(true_exit)); + instrs.push(IRInstr::Move(dest_var, MoveRValue::ConstInt(1))); + instrs.push(IRInstr::Goto(final_exit)); + instrs.push(IRInstr::Label(false_exit)); + instrs.push(IRInstr::Move(dest_var, MoveRValue::ConstInt(0))); + instrs.push(IRInstr::Label(final_exit)); + + + } + if op.is_logical() { + return Some((instrs, Some(dest_var))); + } else { + if op.is_cmp() { + instrs.push(IRInstr::Cmp(dest_var, VariableOrIntLit::Var(left_var), op.into(), VariableOrIntLit::Var(right_var))); + } else { + instrs.push(IRInstr::Binary(dest_var, left_var, op.into(), right_var)); + } + } + // if !op.is_logical() { + // if let Some((true_exit, false_exit)) = self.current_exit_label.last().cloned().flatten() { + // // parent is logical + // instrs.push(IRInstr::Cmp(dest_var, VariableOrIntLit::Var(dest_var), CmpOp::Ne, VariableOrIntLit::IntLit(0))); + // instrs.push(IRInstr::CondGoto(dest_var, true_exit, false_exit)); + // } else { + // // parent isn't logical + // if op.is_cmp() { + // instrs.push(IRInstr::Cmp(dest_var, VariableOrIntLit::Var(left_var), op.into(), VariableOrIntLit::Var(right_var))); + // } else { + // instrs.push(IRInstr::Binary(dest_var, left_var, op.into(), right_var)); + // } + // } + // } (instrs, Some(dest_var)) }, ExprValue::FuncCall(func_name, args) => { @@ -243,35 +609,40 @@ impl Generator { func_def } else { self.diagnostic.add_from_ir_error(IRError::FunctionNotFound(func_name.clone()), expr.span); - return (vec![], None); + return None; }.clone(); + if args.len() < func_def.parameter_types.len() { self.diagnostic.add_from_ir_error(IRError::TooFewArguments(func_def.parameter_types.len(), args.len()), expr.span); - return (vec![], None); + return None; } if args.len() > func_def.parameter_types.len() { self.diagnostic.add_from_ir_error(IRError::TooManyArguments(func_def.parameter_types.len(), args.len()), expr.span); - return (vec![], None); + return None; } let mut has_error = false; - for (i, arg) in args.into_iter().enumerate() { - let (arg_instrs, arg_var) = self.generate_expr(arg); - if arg_var.is_none() { + for parameter_type in &func_def.parameter_types { + if matches!(parameter_type, IRType::Void) { + self.diagnostic.add_from_ir_error(IRError::InvalidParameterType(IRType::Void), expr.span); has_error = true; - continue; } - let arg_var = arg_var.unwrap(); + } + if has_error { + return None; + } + for (i, arg) in args.into_iter().enumerate() { + let (arg_instrs, arg_var) = self.generate_expr(arg)?; let parameter_type = func_def.parameter_types.get(i).unwrap(); - if *parameter_type != arg_var.data_type { - self.diagnostic.add_from_ir_error(IRError::TypeMismatch(*parameter_type, arg_var.data_type), expr.span); + if *parameter_type != arg_var.map_or(IRType::Void, |v| v.data_type) { + self.diagnostic.add_from_ir_error(IRError::TypeMismatch(*parameter_type, arg_var.map_or(IRType::Void, |v| v.data_type)), expr.span); has_error = true; continue; } instrs.extend(arg_instrs); - arg_vars.push(arg_var); + arg_vars.push(arg_var.unwrap()); } if has_error { - return (vec![], None); + return None; } let ret_variable = if matches!(func_def.return_type, IRType::Void) { None @@ -281,6 +652,14 @@ impl Generator { instrs.push(IRInstr::FuncCall(func_def.clone(), arg_vars, ret_variable)); (instrs, ret_variable) } + }; + if let Some((true_exit, false_exit)) = self.current_exit_label.last().cloned().flatten() { + let cmp_var = self.var_manager.declare_temp(IRType::I1); + instrs.push(IRInstr::Cmp(cmp_var, VariableOrIntLit::Var(var.unwrap()), CmpOp::Ne, VariableOrIntLit::IntLit(0))); + instrs.push(IRInstr::CondGoto(cmp_var, true_exit, false_exit)); + Some((instrs, var)) + } else { + Some((instrs, var)) } } } @@ -350,6 +729,12 @@ impl VariableManager { self.local_var_type.push(var); var } + pub fn declare_unamed_local(&mut self, var_data_type: IRType) -> Variable { + let var = Variable { index: self.local_counter, var_type: VariableType::Local, data_type: var_data_type }; + self.local_counter += 1; + self.local_var_type.push(var); + var + } pub fn declare_param_temp(&mut self, var_data_type: IRType) -> Variable { let var = Variable { index: self.local_counter, var_type: VariableType::ParamTemp, data_type: var_data_type }; self.local_counter += 1; @@ -439,4 +824,8 @@ mod tests { test_case("0-3,14-25"); // test_case("0-3,14-25"); } + #[test] + fn test_if_while() { + test_case("26-32,34-41,46-51,57"); + } } \ No newline at end of file diff --git a/src/ir/types.rs b/src/ir/types.rs index 648dc00..5260214 100644 --- a/src/ir/types.rs +++ b/src/ir/types.rs @@ -3,15 +3,32 @@ use std::fmt::Display; use crate::ast::types::Type as AstType; use crate::ast::types::BinaryOp as AstBinaryOp; use crate::ir::err::IRError; + +pub enum VariableOrIntLit { + Var(Variable), + IntLit(i32), +} + +impl Display for VariableOrIntLit { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + VariableOrIntLit::Var(v) => write!(f, "{}", v), + VariableOrIntLit::IntLit(i) => write!(f, "{}", i), + } + } +} pub enum IRInstr { Declare(Variable), DefineFunc(Function, Vec, Vec), Entry, Binary(Variable, Variable, BinaryOp, Variable), + Cmp(Variable, VariableOrIntLit, CmpOp, VariableOrIntLit), + Unary(Variable, UnaryOp, Variable), Exit(Option), FuncCall(Function, Vec, Option), - // Goto, - // Label, + Goto(usize), + CondGoto(Variable, usize, usize), // condition, true label, false label + Label(usize), Move(Variable, MoveRValue), } @@ -20,6 +37,7 @@ impl Display for IRInstr { match self { IRInstr::Entry => write!(f, "entry"), IRInstr::Binary(dest, left, op, right) => write!(f, "{} = {} {},{}", dest, op, left, right), + IRInstr::Cmp(dest, left, op, right) => write!(f, "{} = icmp {} {},{}", dest, op, left, right), IRInstr::Exit(v) => if let Some(v) = v { write!(f, "exit {}", v) } else { write!(f, "exit") }, IRInstr::FuncCall(func, args, dest) => { if let Some(dest) = dest { @@ -33,7 +51,11 @@ impl Display for IRInstr { IRInstr::DefineFunc(func, args, body) => { let body_str = body.iter().map(|instr| format!(" {}", instr)).collect::>().join("\n"); write!(f, "define {} {{\n{}\n}}", func.to_decl_string(args), body_str) - } + }, + IRInstr::Goto(label) => write!(f, "br label .L{}", label), + IRInstr::CondGoto(cond, true_label, false_label) => write!(f, "bc {}, .L{}, .L{}", cond, true_label, false_label), + IRInstr::Label(label) => write!(f, ".L{}:", label), + IRInstr::Unary(dest, op, src) => write!(f, "{} = {} {}", dest, op, src), } } } @@ -51,6 +73,16 @@ impl IRType { IRType::Void => 0, } } + + pub fn get_elevate_result(lhs: IRType, rhs: IRType) -> Option { + if lhs == rhs { + Some(lhs) + } else if (lhs == IRType::I32 && rhs == IRType::I1) || (lhs == IRType::I1 && rhs == IRType::I32) { + Some(IRType::I32) + } else { + None + } + } } impl Display for IRType { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { @@ -153,6 +185,8 @@ pub enum BinaryOp { Mul, Div, Mod, +} +pub enum CmpOp { Le, Lt, Gt, @@ -160,6 +194,9 @@ pub enum BinaryOp { Ne, Eq, } +pub enum UnaryOp { + Neg +} impl Display for BinaryOp { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { @@ -169,12 +206,35 @@ impl Display for BinaryOp { BinaryOp::Mul => "mul", BinaryOp::Div => "div", BinaryOp::Mod => "mod", - BinaryOp::Le => "le", - BinaryOp::Lt => "lt", - BinaryOp::Gt => "gt", - BinaryOp::Ge => "ge", - BinaryOp::Ne => "ne", - BinaryOp::Eq => "eq", + // BinaryOp::Le => "cmp le", + // BinaryOp::Lt => "cmp lt", + // BinaryOp::Gt => "cmp gt", + // BinaryOp::Ge => "cmp ge", + // BinaryOp::Ne => "cmp ne", + // BinaryOp::Eq => "cmp eq", + }; + write!(f, "{}", op_str) + } +} + +impl Display for CmpOp { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let op_str = match self { + CmpOp::Le => "le", + CmpOp::Lt => "lt", + CmpOp::Gt => "gt", + CmpOp::Ge => "ge", + CmpOp::Ne => "ne", + CmpOp::Eq => "eq", + }; + write!(f, "{}", op_str) + } +} + +impl Display for UnaryOp { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let op_str = match self { + UnaryOp::Neg => "neg", }; write!(f, "{}", op_str) } @@ -187,12 +247,26 @@ impl From for BinaryOp { AstBinaryOp::Mul => BinaryOp::Mul, AstBinaryOp::Div => BinaryOp::Div, AstBinaryOp::Mod => BinaryOp::Mod, - AstBinaryOp::Equal => BinaryOp::Eq, - AstBinaryOp::NotEqual => BinaryOp::Ne, - AstBinaryOp::Less => BinaryOp::Lt, - AstBinaryOp::LessEqual => BinaryOp::Le, - AstBinaryOp::Greater => BinaryOp::Gt, - AstBinaryOp::GreaterEqual => BinaryOp::Ge, + // AstBinaryOp::Equal => BinaryOp::Eq, + // AstBinaryOp::NotEqual => BinaryOp::Ne, + // AstBinaryOp::Less => BinaryOp::Lt, + // AstBinaryOp::LessEqual => BinaryOp::Le, + // AstBinaryOp::Greater => BinaryOp::Gt, + // AstBinaryOp::GreaterEqual => BinaryOp::Ge, + _ => unreachable!(), + } + } +} +impl From for CmpOp { + fn from(ast_op: AstBinaryOp) -> Self { + match ast_op { + AstBinaryOp::Equal => CmpOp::Eq, + AstBinaryOp::NotEqual => CmpOp::Ne, + AstBinaryOp::Less => CmpOp::Lt, + AstBinaryOp::LessEqual => CmpOp::Le, + AstBinaryOp::Greater => CmpOp::Gt, + AstBinaryOp::GreaterEqual => CmpOp::Ge, + _ => unreachable!(), } } } \ No newline at end of file