Merge branch 'feat/parser'

This commit is contained in:
2026-05-09 12:32:58 +08:00
12 changed files with 1145 additions and 36 deletions
+2 -1
View File
@@ -1,2 +1,3 @@
/target
/testcases
/testcases
/output
Generated
+56
View File
@@ -28,12 +28,55 @@ dependencies = [
"unicode-width",
]
[[package]]
name = "equivalent"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
[[package]]
name = "fixedbitset"
version = "0.5.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99"
[[package]]
name = "foldhash"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
[[package]]
name = "hashbrown"
version = "0.15.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
dependencies = [
"foldhash",
]
[[package]]
name = "hashbrown"
version = "0.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4f467dd6dccf739c208452f8014c75c18bb8301b050ad1cfb27153803edb0f51"
[[package]]
name = "heck"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
[[package]]
name = "indexmap"
version = "2.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9"
dependencies = [
"equivalent",
"hashbrown 0.17.0",
]
[[package]]
name = "memchr"
version = "2.8.0"
@@ -113,6 +156,18 @@ dependencies = [
"autocfg",
]
[[package]]
name = "petgraph"
version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455"
dependencies = [
"fixedbitset",
"hashbrown 0.15.5",
"indexmap",
"serde",
]
[[package]]
name = "proc-macro2"
version = "1.0.106"
@@ -166,6 +221,7 @@ version = "0.1.0"
dependencies = [
"codespan-reporting",
"num",
"petgraph",
"regex",
"strum",
"thiserror",
+1
View File
@@ -6,6 +6,7 @@ edition = "2024"
[dependencies]
codespan-reporting = "0.13.1"
num = "0.4.3"
petgraph = "0.8.3"
regex = "1.12.3"
strum = { version = "0.28.0", features = ["derive"] }
thiserror = "2.0.18"
+155
View File
@@ -0,0 +1,155 @@
use petgraph::dot::{Config, Dot};
use petgraph::graph::{Graph, NodeIndex};
use crate::ast::types::{
BlockStmt, CompileUnit, Expr, ExprValue, FuncDeclStmt, GlobalDeclStmt, Param, ReturnStmt,
Statement, VarDeclStmt, VarDeclStmtValue,
};
pub type AstGraph = Graph<String, String>;
pub trait AstGraphExt {
fn to_graph(&self) -> AstGraph;
fn to_dot(&self) -> String {
format!("{}", Dot::with_config(&self.to_graph(), &[Config::EdgeNoLabel]))
}
}
impl AstGraphExt for CompileUnit {
fn to_graph(&self) -> AstGraph {
let mut builder = AstGraphBuilder::new();
builder.add_compile_unit(self);
builder.graph
}
}
struct AstGraphBuilder {
graph: AstGraph,
}
impl AstGraphBuilder {
fn new() -> Self {
Self { graph: Graph::new() }
}
fn node(&mut self, label: impl Into<String>) -> NodeIndex {
self.graph.add_node(label.into())
}
fn child(&mut self, parent: NodeIndex, label: impl Into<String>) -> NodeIndex {
let child = self.node(label);
self.graph.add_edge(parent, child, String::new());
child
}
fn add_compile_unit(&mut self, compile_unit: &CompileUnit) -> NodeIndex {
let root = self.node(compile_unit.to_string());
for decl in &compile_unit.global_decls {
self.add_global_decl(root, decl);
}
root
}
fn add_global_decl(&mut self, parent: NodeIndex, decl: &GlobalDeclStmt) -> NodeIndex {
match decl {
GlobalDeclStmt::VarDecl(var_decl) => {
let node = self.child(parent, decl.to_string());
self.add_var_decl(node, var_decl);
node
}
GlobalDeclStmt::FuncDecl(func_decl) => {
let node = self.child(parent, decl.to_string());
self.add_func_decl(node, func_decl);
node
}
}
}
fn add_var_decl(&mut self, parent: NodeIndex, var_decl: &VarDeclStmt) -> NodeIndex {
let node = self.child(parent, var_decl.to_string());
for value in &var_decl.values {
self.add_var_decl_value(node, value);
}
node
}
fn add_var_decl_value(&mut self, parent: NodeIndex, value: &VarDeclStmtValue) -> NodeIndex {
self.child(parent, value.to_string())
}
fn add_func_decl(&mut self, parent: NodeIndex, func_decl: &FuncDeclStmt) -> NodeIndex {
let node = self.child(parent, func_decl.to_string());
let params = self.child(node, "Params");
for param in &func_decl.params {
self.add_param(params, param);
}
self.add_block_stmt(node, &func_decl.body);
node
}
fn add_param(&mut self, parent: NodeIndex, param: &Param) -> NodeIndex {
self.child(parent, param.to_string())
}
fn add_block_stmt(&mut self, parent: NodeIndex, block_stmt: &BlockStmt) -> NodeIndex {
let node = self.child(parent, block_stmt.to_string());
for stmt in &block_stmt.statements {
self.add_statement(node, stmt);
}
node
}
fn add_statement(&mut self, parent: NodeIndex, stmt: &Statement) -> NodeIndex {
match stmt {
Statement::Return(return_stmt) => {
let node = self.child(parent, stmt.to_string());
self.add_return_stmt(node, return_stmt);
node
}
Statement::Block(block_stmt) => self.add_block_stmt(parent, block_stmt),
Statement::Expr(expr) => {
let node = self.child(parent, stmt.to_string());
self.add_expr(node, expr);
node
}
Statement::VarDecl(var_decl) => {
let node = self.child(parent, stmt.to_string());
self.add_var_decl(node, var_decl);
node
}
}
}
fn add_return_stmt(&mut self, parent: NodeIndex, return_stmt: &ReturnStmt) -> NodeIndex {
match &return_stmt.value {
Some(expr) => self.add_expr(parent, expr),
None => self.child(parent, "Void"),
}
}
fn add_expr(&mut self, parent: NodeIndex, expr: &Expr) -> NodeIndex {
match &expr.value {
ExprValue::IntLit(_) | ExprValue::Var(_) => self.child(parent, expr.value.to_string()),
ExprValue::BinaryOp { lhs, op: _, rhs } => {
let node = self.child(parent, expr.value.to_string());
self.add_expr(node, lhs);
self.add_expr(node, rhs);
node
}
ExprValue::FuncCall(_, args) => {
let node = self.child(parent, expr.value.to_string());
let args_node = self.child(node, "Args");
for arg in args {
self.add_expr(args_node, arg);
}
node
}
ExprValue::Assign { lvalue, rvalue } => {
let node = self.child(parent, expr.value.to_string());
self.add_expr(node, lvalue);
self.add_expr(node, rvalue);
node
}
}
}
}
+1
View File
@@ -1 +1,2 @@
pub mod graph;
pub mod types;
+156 -10
View File
@@ -1,8 +1,6 @@
#[derive(Debug, Clone, Copy)]
pub struct Span {
start: usize,
end: usize,
}
use crate::{diagnostic::span::Span, frontend::types::{TokenValue, TypeIdent}};
use std::fmt;
pub struct CompileUnit {
pub global_decls: Vec<GlobalDeclStmt>,
}
@@ -12,9 +10,14 @@ pub enum GlobalDeclStmt {
}
pub struct VarDeclStmt {
pub values: Vec<VarDeclStmtValue>,
pub span: Span,
}
pub struct VarDeclStmtValue {
pub name: String,
pub var_type: Type,
pub span: Span,
}
pub struct FuncDeclStmt {
@@ -35,6 +38,16 @@ pub enum Statement {
Expr(Expr),
VarDecl(VarDeclStmt),
}
impl Statement {
pub fn span(&self) -> Span {
match self {
Statement::Return(s) => s.span,
Statement::Block(s) => s.span,
Statement::Expr(s) => s.span,
Statement::VarDecl(s) => s.span,
}
}
}
pub struct ReturnStmt {
pub value: Option<Expr>,
pub span: Span,
@@ -63,13 +76,146 @@ pub enum BinaryOp {
Equal, NotEqual, Less, LessEqual, Greater, GreaterEqual,
}
impl BinaryOp {
pub fn from_token_value(token_value: &TokenValue) -> Option<Self> {
match token_value {
TokenValue::Plus => Some(BinaryOp::Add),
TokenValue::Minus => Some(BinaryOp::Sub),
TokenValue::Star => Some(BinaryOp::Mul),
TokenValue::Slash => Some(BinaryOp::Div),
TokenValue::Percent => Some(BinaryOp::Mod),
TokenValue::DoubleEqual => Some(BinaryOp::Equal),
TokenValue::NotEqual => Some(BinaryOp::NotEqual),
TokenValue::Less => Some(BinaryOp::Less),
TokenValue::LessEqual => Some(BinaryOp::LessEqual),
TokenValue::Greater => Some(BinaryOp::Greater),
TokenValue::GreaterEqual => Some(BinaryOp::GreaterEqual),
_ => None,
}
}
}
pub enum Type {
Int,
Void,
}
impl From<TypeIdent> for Type {
fn from(value: TypeIdent) -> Self {
match value {
TypeIdent::Int => Type::Int,
TypeIdent::Void => Type::Void,
}
}
}
pub struct Param {
name: String,
param_type: Type,
span: Span,
}
pub name: String,
pub param_type: Type,
pub span: Span,
}
impl fmt::Display for CompileUnit {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "CompileUnit")
}
}
impl fmt::Display for GlobalDeclStmt {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
GlobalDeclStmt::VarDecl(_) => write!(f, "GlobalVarDecl"),
GlobalDeclStmt::FuncDecl(_) => write!(f, "FuncDecl"),
}
}
}
impl fmt::Display for VarDeclStmt {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "VarDecl")
}
}
impl fmt::Display for VarDeclStmtValue {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{} {}", self.var_type, self.name)
}
}
impl fmt::Display for FuncDeclStmt {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{} {}", self.return_type, self.name)
}
}
impl fmt::Display for BlockStmt {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "Block")
}
}
impl fmt::Display for Statement {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Statement::Return(_) => write!(f, "ReturnStmt"),
Statement::Block(_) => write!(f, "BlockStmt"),
Statement::Expr(_) => write!(f, "ExprStmt"),
Statement::VarDecl(_) => write!(f, "VarDeclStmt"),
}
}
}
impl fmt::Display for ReturnStmt {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "ReturnStmt")
}
}
impl fmt::Display for Expr {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.value)
}
}
impl fmt::Display for ExprValue {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
ExprValue::IntLit(value) => write!(f, "IntLit({})", value),
ExprValue::Var(name) => write!(f, "Var({})", name),
ExprValue::BinaryOp { op, .. } => write!(f, "BinaryOp({})", op),
ExprValue::FuncCall(name, _) => write!(f, "FuncCall({})", name),
ExprValue::Assign { .. } => write!(f, "Assign"),
}
}
}
impl fmt::Display for Param {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{} {}", self.param_type, self.name)
}
}
impl fmt::Display for BinaryOp {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let op = match self {
BinaryOp::Add => "+",
BinaryOp::Sub => "-",
BinaryOp::Mul => "*",
BinaryOp::Div => "/",
BinaryOp::Mod => "%",
BinaryOp::Equal => "==",
BinaryOp::NotEqual => "!=",
BinaryOp::Less => "<",
BinaryOp::LessEqual => "<=",
BinaryOp::Greater => ">",
BinaryOp::GreaterEqual => ">=",
};
write!(f, "{}", op)
}
}
impl fmt::Display for Type {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Type::Int => write!(f, "int"),
Type::Void => write!(f, "void"),
}
}
}
+9
View File
@@ -3,4 +3,13 @@
pub struct Span {
pub start: usize,
pub end: usize,
}
impl Span {
pub fn from_two(start: Span, end: Span) -> Self {
assert!(start.start <= end.end);
Self {
start: start.start,
end: end.end,
}
}
}
+13 -1
View File
@@ -1,5 +1,7 @@
use thiserror::Error;
use crate::frontend::types::{Token, TokenValue};
// #[derive(Debug, Clone, PartialEq, Eq, Error)]
// pub enum ParseError {
// BlockStmt(#[from] BlockStmtError)
@@ -21,8 +23,18 @@ pub enum LexingError {
UnrecognizedToken(String),
}
#[derive(Debug, Clone, PartialEq, Eq, Error)]
pub enum ParseError {
#[error("unexpected token {}, expect {}", .0, .1)]
UnexpectedToken(TokenValue, &'static str),
#[error("cannot combine with previous {}", .0)]
CantCombineWith(TokenValue),
#[error("expect {0}")]
ExpectButEof(&'static str),
}
#[derive(Debug, Clone, PartialEq, Eq, Error)]
pub enum FrontendError {
#[error(transparent)]
Lexing(#[from] LexingError),
#[error(transparent)]
Parse(#[from] ParseError),
}
+1 -1
View File
@@ -1,4 +1,4 @@
pub mod types;
mod lexer;
// pub mod parser;
pub mod parser;
pub mod err;
+712
View File
@@ -0,0 +1,712 @@
use crate::{
ast::types::{
BinaryOp, BlockStmt, CompileUnit, Expr, ExprValue, FuncDeclStmt, GlobalDeclStmt, Param,
ReturnStmt, Statement, VarDeclStmt, VarDeclStmtValue,
},
diagnostic::{Diagnositics, span::Span},
frontend::{
err::ParseError,
types::{Token, TokenValue, TypeIdent},
},
};
pub struct Parser {
tokens: Vec<Token>,
diagnostics: Diagnositics,
pos: usize,
}
#[derive(Debug, Clone, PartialEq, Eq)]
enum ParseType {
MustParse,
TryParse,
}
// const FUNC_OR_VAR_DECL_AT_TOP_LEVEL: &str = "function or variable declaration at top level";
// const PARAM_DECL: &str = "parameter declaration";
// const BODY_DECL: &str = "function body";
impl Parser {
pub fn new(tokens: Vec<Token>, diagnostics: Diagnositics) -> Self {
Self {
tokens,
diagnostics,
pos: 0,
}
}
pub fn parse(&mut self) {}
fn peek(&self) -> Option<&Token> {
self.tokens.get(self.pos)
}
fn next(&mut self) -> Option<&Token> {
let token = self.tokens.get(self.pos);
if token.is_some() {
self.pos += 1;
}
token
}
fn advance(&mut self, n: usize) {
self.pos += n;
assert!(self.pos <= self.tokens.len());
}
fn back(&mut self, n: usize) {
assert!(self.pos >= n);
self.pos -= n;
}
fn parse_compile_unit(&mut self) -> CompileUnit {
let mut global_decls = vec![];
while self.peek().is_some() {
if let Some(decl) = self.parse_global_decl_stmt() {
global_decls.push(decl);
}
}
CompileUnit { global_decls }
}
fn parse_global_decl_stmt(&mut self) -> Option<GlobalDeclStmt> {
assert!(self.peek().is_some());
if let Some(func_decl) = self.parse_func_decl_stmt() {
return Some(GlobalDeclStmt::FuncDecl(func_decl));
}
if let Some(var_decl) = self.parse_var_decl_stmt() {
return Some(GlobalDeclStmt::VarDecl(var_decl));
}
let token = self.next().unwrap().clone();
self.diagnostics.add_from_frontend_error(
ParseError::UnexpectedToken(token.value, "function or variable declaration at top level"),
token.span,
);
None
}
fn parse_type_and_name(&mut self) -> Option<(TypeIdent, String, Span)> {
assert!(self.peek().is_some());
let start_span = self.peek().unwrap().span;
let type_ident = self.peek().unwrap().value.as_type_ident()?;
self.advance(1);
let name = match self.peek().map(|t| t.value.as_ident()).flatten() {
None => {
let span = self.next().unwrap().span;
self.diagnostics.add_from_frontend_error(
ParseError::CantCombineWith(TokenValue::TypeIdent(type_ident)),
span,
);
return None;
}
Some(ident) => ident,
};
let end_span = self.peek().unwrap().span;
self.advance(1);
Some((type_ident, name, Span::from_two(start_span, end_span)))
}
fn parse_func_decl_stmt(&mut self) -> Option<FuncDeclStmt> {
assert!(self.peek().is_some());
let start_span = self.peek().unwrap().span;
let (return_type, name, _) = self.parse_type_and_name()?;
if self
.peek()
.is_some_and(|t| matches!(t.value, TokenValue::LParen))
{
self.advance(1);
} else {
self.back(2);
return None;
}
// from here we can be sure it's a function declaration, so we can report error if the syntax is wrong
let params = match self.peek().map(|t| &t.value) {
Some(TokenValue::RParen) => {
self.advance(1);
vec![]
}
Some(_) => self.parse_param_list()?,
None => {
let span = self.next().unwrap().span;
self.diagnostics
.add_from_frontend_error(ParseError::ExpectButEof("parameter declaration"), span);
return None;
}
};
let body = match self.peek().map(|t| &t.value) {
Some(_) => self.parse_block_stmt(ParseType::MustParse)?,
None => {
let span = self.next().unwrap().span;
self.diagnostics
.add_from_frontend_error(ParseError::ExpectButEof("function body"), span);
return None;
}
};
let end_span = body.span;
Some(FuncDeclStmt {
return_type: return_type.into(),
name,
params,
body,
span: Span::from_two(start_span, end_span),
})
}
fn parse_param_list(&mut self) -> Option<Vec<Param>> {
assert!(self.peek().is_some());
let mut params = vec![];
while self.peek().is_some() {
if !params.is_empty() {
if self.peek().map(|t| &t.value) == Some(&TokenValue::Comma) {
self.advance(1);
} else {
let token = self.next().unwrap().clone();
self.diagnostics.add_from_frontend_error(
ParseError::UnexpectedToken(token.value, "`,`"),
token.span,
);
return None;
}
}
if let Some(param) = self.parse_param() {
params.push(param);
} else {
return None;
}
}
Some(params)
}
fn parse_param(&mut self) -> Option<Param> {
assert!(self.peek().is_some());
let (param_type, name, span) = self.parse_type_and_name()?;
Some(Param {
param_type: param_type.into(),
name,
span,
})
}
fn must_match_semicolon(&mut self) -> Option<()> {
if self
.peek()
.is_some_and(|t| matches!(t.value, TokenValue::Semicolon))
{
self.advance(1);
Some(())
} else {
let token = self.next().unwrap().clone();
self.diagnostics
.add_from_frontend_error(ParseError::UnexpectedToken(token.value, "`;`"), token.span);
while let Some(t) = self.peek() {
if matches!(t.value, TokenValue::Semicolon) {
self.advance(1);
break;
}
if matches!(t.value, TokenValue::RBrace) {
break;
}
self.advance(1);
}
None
}
}
fn parse_var_decl_stmt(&mut self) -> Option<VarDeclStmt> {
assert!(self.peek().is_some());
let mut values = vec![];
let (var_type, name, span) = self.parse_type_and_name()?;
values.push(VarDeclStmtValue { name, var_type: var_type.into(), span });
while let Some(t) = self.peek() {
if matches!(t.value, TokenValue::Semicolon) {
break;
}
if matches!(t.value, TokenValue::Comma) {
self.advance(1);
if let Some(ident) = self.peek().map(|t| t.value.as_ident()).flatten() {
let span = self.peek().unwrap().span;
self.advance(1);
values.push(VarDeclStmtValue { name: ident, var_type: var_type.into(), span });
} else {
let token = self.next().unwrap().clone();
self.diagnostics.add_from_frontend_error(
ParseError::CantCombineWith(TokenValue::TypeIdent(var_type)),
token.span,
);
return None;
}
continue;
}
let token = self.next().unwrap().clone();
self.diagnostics.add_from_frontend_error(
ParseError::UnexpectedToken(token.value, "variable declaration"),
token.span,
);
}
self.must_match_semicolon()?;
let span = Span::from_two(values.first().unwrap().span, values.last().unwrap().span);
Some(VarDeclStmt { values, span })
}
fn parse_block_stmt(&mut self, parse_type: ParseType) -> Option<BlockStmt> {
assert!(self.peek().is_some());
let start_span = self.peek().unwrap().span;
if !self
.peek()
.map(|t| matches!(t.value, TokenValue::LBrace))
.unwrap_or(false)
{
if parse_type == ParseType::MustParse {
let token = self.next().unwrap().clone();
self.diagnostics.add_from_frontend_error(
ParseError::UnexpectedToken(token.value, "`{`"),
token.span,
);
}
return None;
}
self.advance(1);
let mut statements = vec![];
println!("parse block stmt");
loop {
if self.peek().is_none() {
let span = self.next().unwrap().span;
self.diagnostics
.add_from_frontend_error(ParseError::ExpectButEof("`}`"), span);
return None;
}
if self
.peek()
.map(|t| matches!(t.value, TokenValue::Semicolon))
.unwrap()
{
self.advance(1);
continue;
}
if self
.peek()
.map(|t| matches!(t.value, TokenValue::RBrace))
.unwrap()
{
self.advance(1);
break;
}
// parse statement here
statements.push(self.parse_stmt()?);
}
println!("finish parse block stmt");
let end_span = statements.last().map(|s| s.span()).unwrap_or(start_span);
Some(BlockStmt {
statements,
span: Span::from_two(start_span, end_span),
})
}
fn parse_stmt(&mut self) -> Option<Statement> {
assert!(self.peek().is_some());
if let Some(var_decl) = self.parse_var_decl_stmt() {
return Some(Statement::VarDecl(var_decl));
}
if let Some(return_stmt) = self.parse_return_stmt() {
return Some(Statement::Return(return_stmt));
}
if let Some(block_stmt) = self.parse_block_stmt(ParseType::TryParse) {
return Some(Statement::Block(block_stmt));
}
if let Some(expr) = self.parse_expr() {
return Some(Statement::Expr(expr));
}
let token = self.next().unwrap().clone();
self.diagnostics.add_from_frontend_error(
ParseError::UnexpectedToken(token.value, "statement"),
token.span,
);
while let Some(t) = self.peek() {
if matches!(t.value, TokenValue::Semicolon) {
self.advance(1);
break;
}
if matches!(t.value, TokenValue::RBrace) {
break;
}
self.advance(1);
}
None
}
fn parse_return_stmt(&mut self) -> Option<ReturnStmt> {
assert!(self.peek().is_some());
let start_span = self.peek().unwrap().span;
if !self
.peek()
.map(|t| matches!(t.value, TokenValue::Return))
.unwrap_or(false)
{
return None;
}
self.advance(1);
let value = if self
.peek()
.map(|t| matches!(t.value, TokenValue::Semicolon))
.unwrap_or(false)
{
None
} else {
Some(self.parse_expr()?)
};
self.must_match_semicolon()?;
let end_span = self.peek().unwrap().span;
Some(ReturnStmt {
value,
span: Span::from_two(start_span, end_span),
})
}
// fn parse_expr_tail(&mut self, left: Expr) -> Option<Expr> {
// match self.peek() {
// None => Some(left),
// Some(t1) => {
// // TODO: add delimiter judge to support better error recovery
// let op = BinaryOp::from_token_value(&t1.value)?;
// match op {
// BinaryOp::Add
// }
// let right = self.parse_term()?;
// let expr = Expr { value: ExprValue::BinaryOp { lhs: Box::new(left), op, rhs: Box::new(right) }, span: Span::from_two(left.span, right.span) };
// self.parse_expr_tail(expr)
// }
// }
// }
fn parse_primary(&mut self) -> Option<Expr> {
assert!(self.peek().is_some());
let token = self.next().unwrap().clone();
match token.value {
TokenValue::Ident(name) => Some(Expr {
value: ExprValue::Var(name),
span: token.span,
}),
TokenValue::IntLit(value) => Some(Expr {
value: ExprValue::IntLit(value),
span: token.span,
}),
TokenValue::LParen => {
let expr = match self.peek() {
Some(_) => self.parse_expr()?,
None => {
self.diagnostics.add_from_frontend_error(
ParseError::ExpectButEof("expression"),
token.span,
);
return None;
}
};
match self.peek() {
Some(t) if matches!(t.value, TokenValue::RParen) => {
let end_span = t.span;
self.advance(1);
Some(Expr {
span: Span::from_two(token.span, end_span),
..expr
})
}
Some(_) => {
let token = self.next().unwrap().clone();
self.diagnostics.add_from_frontend_error(
ParseError::UnexpectedToken(token.value, "`)`"),
token.span,
);
None
}
None => {
self.diagnostics
.add_from_frontend_error(ParseError::ExpectButEof("`)`"), expr.span);
None
}
}
}
_ => {
self.diagnostics.add_from_frontend_error(
ParseError::UnexpectedToken(token.value, "expression"),
token.span,
);
None
}
}
}
fn parse_unary(&mut self) -> Option<Expr> {
assert!(self.peek().is_some());
let token = self.peek().unwrap().clone();
match token.value {
TokenValue::Plus => {
self.advance(1);
let expr = match self.peek() {
Some(_) => self.parse_unary()?,
None => {
self.diagnostics.add_from_frontend_error(
ParseError::ExpectButEof("expression"),
token.span,
);
return None;
}
};
Some(Expr {
span: Span::from_two(token.span, expr.span),
..expr
})
}
TokenValue::Minus => {
self.advance(1);
let rhs = match self.peek() {
Some(_) => self.parse_unary()?,
None => {
self.diagnostics.add_from_frontend_error(
ParseError::ExpectButEof("expression"),
token.span,
);
return None;
}
};
let lhs = Expr {
value: ExprValue::IntLit(0),
span: token.span,
};
let span = Span::from_two(token.span, rhs.span);
Some(Expr {
value: ExprValue::BinaryOp {
lhs: Box::new(lhs),
op: BinaryOp::Sub,
rhs: Box::new(rhs),
},
span,
})
}
_ => self.parse_primary(),
}
}
fn parse_multiplicative(&mut self) -> Option<Expr> {
assert!(self.peek().is_some());
let mut left = self.parse_unary()?;
while let Some(t) = self.peek() {
let op = match t.value {
TokenValue::Star => BinaryOp::Mul,
TokenValue::Slash => BinaryOp::Div,
TokenValue::Percent => BinaryOp::Mod,
_ => break,
};
self.advance(1);
let right = match self.peek() {
Some(_) => self.parse_unary()?,
None => {
self.diagnostics
.add_from_frontend_error(ParseError::ExpectButEof("expression"), left.span);
return None;
}
};
let span = Span::from_two(left.span, right.span);
left = Expr {
value: ExprValue::BinaryOp {
lhs: Box::new(left),
op,
rhs: Box::new(right),
},
span,
};
}
Some(left)
}
fn parse_additive(&mut self) -> Option<Expr> {
assert!(self.peek().is_some());
let mut left = self.parse_multiplicative()?;
while let Some(t) = self.peek() {
let op = match t.value {
TokenValue::Plus => BinaryOp::Add,
TokenValue::Minus => BinaryOp::Sub,
_ => break,
};
self.advance(1);
let right = match self.peek() {
Some(_) => self.parse_multiplicative()?,
None => {
self.diagnostics
.add_from_frontend_error(ParseError::ExpectButEof("expression"), left.span);
return None;
}
};
let span = Span::from_two(left.span, right.span);
left = Expr {
value: ExprValue::BinaryOp {
lhs: Box::new(left),
op,
rhs: Box::new(right),
},
span,
};
}
Some(left)
}
fn parse_relational(&mut self) -> Option<Expr> {
assert!(self.peek().is_some());
let mut left = self.parse_additive()?;
while let Some(t) = self.peek() {
let op = match t.value {
TokenValue::Less => BinaryOp::Less,
TokenValue::Greater => BinaryOp::Greater,
TokenValue::LessEqual => BinaryOp::LessEqual,
TokenValue::GreaterEqual => BinaryOp::GreaterEqual,
TokenValue::DoubleEqual => BinaryOp::Equal,
TokenValue::NotEqual => BinaryOp::NotEqual,
_ => break,
};
self.advance(1);
let right = match self.peek() {
Some(_) => self.parse_additive()?,
None => {
self.diagnostics
.add_from_frontend_error(ParseError::ExpectButEof("expression"), left.span);
return None;
}
};
let span = Span::from_two(left.span, right.span);
left = Expr {
value: ExprValue::BinaryOp {
lhs: Box::new(left),
op,
rhs: Box::new(right),
},
span,
};
}
Some(left)
}
fn parse_assign(&mut self) -> Option<Expr> {
assert!(self.peek().is_some());
let is_assign = matches!(
(self.tokens.get(self.pos), self.tokens.get(self.pos + 1)),
(
Some(Token {
value: TokenValue::Ident(_),
..
}),
Some(Token {
value: TokenValue::Equal,
..
})
)
);
if !is_assign {
return self.parse_relational();
}
let lvalue_token = self.next().unwrap().clone();
let name = lvalue_token.value.as_ident().unwrap();
self.advance(1);
let rvalue = match self.peek() {
Some(_) => self.parse_assign()?,
None => {
self.diagnostics.add_from_frontend_error(
ParseError::ExpectButEof("expression"),
lvalue_token.span,
);
return None;
}
};
let lvalue = Expr {
value: ExprValue::Var(name),
span: lvalue_token.span,
};
let span = Span::from_two(lvalue.span, rvalue.span);
Some(Expr {
value: ExprValue::Assign {
lvalue: Box::new(lvalue),
rvalue: Box::new(rvalue),
},
span,
})
}
/*
expr
:= assign
assign
:= relational
| IDENT "=" assign
relational
:= additive
| relational "<" additive
| relational ">" additive
| relational "<=" additive
| relational ">=" additive
additive
:= multiplicative
| additive "+" multiplicative
| additive "-" multiplicative
multiplicative
:= unary
| multiplicative "*" unary
| multiplicative "/" unary
unary
:= primary
| "+" unary
| "-" unary
primary
:= IDENT
| NUMBER
| "(" expr ")"
*/
fn parse_expr(&mut self) -> Option<Expr> {
assert!(self.peek().is_some());
self.parse_assign()
}
}
#[cfg(test)]
mod tests {
use std::io::BufRead;
use std::path::Path;
use std::fs::File;
use crate::ast::graph::AstGraphExt;
use crate::frontend::lexer::Lexer;
use crate::utils::case_list::CaseList;
use crate::utils::num_sequence::NumberSequence;
pub use super::*;
fn test_case(case_str: &str) {
let case_sequence = NumberSequence::from_str(case_str).unwrap();
let case_list = CaseList::from_dir(&Path::new("./testcases")).unwrap();
let mut error_case_cnt = 0;
for case_no in case_sequence {
let case_path = case_list.get_case_path(case_no).unwrap();
println!("{}", case_path.display());
let file = File::open(&case_path).unwrap();
let mut buf_reader = std::io::BufReader::new(file);
let mut lexer = Lexer::new();
let mut full_text = String::new();
loop {
let mut line = String::new();
let bytes_read = buf_reader.read_line(&mut line).unwrap();
if bytes_read == 0 {
break;
}
full_text.push_str(&line);
lexer.parse_next_str(&line);
}
let (tokens, diagnostics) = lexer.finish();
let mut is_error = false;
if !diagnostics.is_empty() {
diagnostics.print(&format!("{}", case_path.display()), &full_text);
is_error = true;
}
let mut parser = Parser::new(tokens, diagnostics);
let compile_unit = parser.parse_compile_unit();
let dot = compile_unit.to_dot();
let case_name = case_list.get_case_name(case_no).unwrap().strip_suffix(".c").unwrap();
std::fs::write(format!("output/{}.dot", case_name), dot).unwrap();
if !parser.diagnostics.is_empty() {
parser.diagnostics.print(&format!("{}", case_path.display()), &full_text);
is_error = true;
}
if is_error {
error_case_cnt += 1;
}
}
if error_case_cnt > 0 {
panic!("Found {} cases with errors", error_case_cnt);
}
}
#[test]
fn test_expr() {
test_case("0-3,14-25");
// test_case("0-3,14-25");
}
}
+38 -22
View File
@@ -28,31 +28,47 @@ pub enum TokenValue {
// Eof,
Unrecognized,
}
impl TokenValue {
pub fn as_type_ident(&self) -> Option<TypeIdent> {
if let TokenValue::TypeIdent(t) = self {
Some(t.clone())
} else {
None
}
}
pub fn as_ident(&self) -> Option<String> {
if let TokenValue::Ident(s) = self {
Some(s.clone())
} else {
None
}
}
}
impl std::fmt::Display for TokenValue {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
TokenValue::IntLit(i) => write!(f, "literal int: {}", i),
TokenValue::Ident(s) => write!(f, "identifier: {}", s),
TokenValue::IntLit(i) => write!(f, "literal int {}", i),
TokenValue::Ident(s) => write!(f, "identifier {}", s),
TokenValue::TypeIdent(t) => write!(f, "type {}", t.as_ref()),
TokenValue::Plus => write!(f, "+"),
TokenValue::Minus => write!(f, "-"),
TokenValue::Star => write!(f, "*"),
TokenValue::Slash => write!(f, "/"),
TokenValue::Percent => write!(f, "%"),
TokenValue::Equal => write!(f, "="),
TokenValue::DoubleEqual => write!(f, "=="),
TokenValue::Not => write!(f, "!"),
TokenValue::NotEqual => write!(f, "!="),
TokenValue::Less => write!(f, "<"),
TokenValue::LessEqual => write!(f, "<="),
TokenValue::Greater => write!(f, ">"),
TokenValue::GreaterEqual => write!(f, ">="),
TokenValue::LParen => write!(f, "("),
TokenValue::RParen => write!(f, ")"),
TokenValue::LBrace => write!(f, "{{"),
TokenValue::RBrace => write!(f, "}}"),
TokenValue::Comma => write!(f, ","),
TokenValue::Semicolon => write!(f, ";"),
TokenValue::Plus => write!(f, "`+`"),
TokenValue::Minus => write!(f, "`-`"),
TokenValue::Star => write!(f, "`*`"),
TokenValue::Slash => write!(f, "`/`"),
TokenValue::Percent => write!(f, "`%`"),
TokenValue::Equal => write!(f, "`=`"),
TokenValue::DoubleEqual => write!(f, "`==`"),
TokenValue::Not => write!(f, "`!`"),
TokenValue::NotEqual => write!(f, "`!=`"),
TokenValue::Less => write!(f, "`<`"),
TokenValue::LessEqual => write!(f, "`<=`"),
TokenValue::Greater => write!(f, "`>`"),
TokenValue::GreaterEqual => write!(f, "`>=`"),
TokenValue::LParen => write!(f, "`(`"),
TokenValue::RParen => write!(f, "`)`"),
TokenValue::LBrace => write!(f, "`{{`"),
TokenValue::RBrace => write!(f, "`}}`"),
TokenValue::Comma => write!(f, "`,`"),
TokenValue::Semicolon => write!(f, "`;`"),
TokenValue::If => write!(f, "if"),
TokenValue::Else => write!(f, "else"),
TokenValue::While => write!(f, "while"),
@@ -83,7 +99,7 @@ pub enum TokenKind {
// Eof,
Unrecognized,
}
#[derive(Debug, Clone, PartialEq, Eq, EnumString, AsRefStr)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, EnumString, AsRefStr)]
pub enum TypeIdent {
#[strum(serialize = "int")]
Int,
+1 -1
View File
@@ -22,7 +22,7 @@ impl<T: Integer + Copy + FromStr> NumberSequence<T> {
}
else if let Some((start_str, end_str)) = group.split_once('-') {
if let (Ok(start), Ok(end)) = (start_str.parse::<T>(), end_str.parse::<T>()) {
ranges.push((start, end));
ranges.push((start, end + T::one()));
} else {
return None;
}