Merge branch 'feat/backend'

This commit is contained in:
2026-05-13 08:52:22 +08:00
10 changed files with 1140 additions and 13 deletions
+3 -1
View File
@@ -1,3 +1,5 @@
/target
/testcases
/output
/output
*.pyc
*.zip
+287
View File
@@ -0,0 +1,287 @@
use std::{fmt::Display, ops::Add};
use crate::backend::register_allocator::{REG_FP, REG_LR, REG_PC, REG_R0, REG_R1, REG_R2, REG_R3, REG_R12, REG_SP, Register};
pub enum ARMInstr{
Move(MoveInstr),
Load(LoadInstr),
LoadPseudo(LoadPseudoInstr),
Store(StoreInstr),
Mul(MulInstr),
SDiv(SDivInstr),
Add(AddInstr),
Sub(SubInstr),
Cmp(CmpInstr),
Push(PushInstr),
Pop(PopInstr),
FunctionHead(String, usize),
Bl(BlInstr),
}
impl Display for ARMInstr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
ARMInstr::Move(instr) => write!(f, "{}", instr),
ARMInstr::Load(instr) => write!(f, "{}", instr),
ARMInstr::LoadPseudo(instr) => write!(f, "{}", instr),
ARMInstr::Store(instr) => write!(f, "{}", instr),
ARMInstr::Mul(instr) => write!(f, "{}", instr),
ARMInstr::SDiv(instr) => write!(f, "{}", instr),
ARMInstr::Add(instr) => write!(f, "{}", instr),
ARMInstr::Sub(instr) => write!(f, "{}", instr),
ARMInstr::Cmp(instr) => write!(f, "{}", instr),
ARMInstr::Push(instr) => write!(f, "{}", instr),
ARMInstr::Pop(instr) => write!(f, "{}", instr),
ARMInstr::Bl(instr) => write!(f, "{}", instr),
ARMInstr::FunctionHead(name, align_size) => write!(f, ".align {}\n.global {}\n.type {}, %function\n{}:", align_size, name, name, name),
}
}
}
pub enum RegisterOrImm {
Reg(Register),
Imm(i32),
}
impl Display for RegisterOrImm {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
RegisterOrImm::Reg(reg) => write!(f, "{}", reg),
RegisterOrImm::Imm(imm) => write!(f, "#{}", imm),
}
}
}
pub enum ConditionCode {
Eq,
Ne,
Lt,
Le,
Gt,
Ge,
}
impl Display for ConditionCode {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let code_str = match self {
ConditionCode::Eq => "eq",
ConditionCode::Ne => "ne",
ConditionCode::Lt => "lt",
ConditionCode::Le => "le",
ConditionCode::Gt => "gt",
ConditionCode::Ge => "ge",
};
write!(f, "{}", code_str)
}
}
// pub enum RegisterOrMemory {
// Reg(Register),
// Mem(Register, Option<RegisterOrImm>), // Base register and optional offset
// MemPesudoName(String),
// }
// impl Display for RegisterOrMemory {
// fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
// match self {
// RegisterOrMemory::Reg(reg) => write!(f, "{}", reg),
// RegisterOrMemory::Mem(base, Some(offset)) => write!(f, "[{}, {}]", base, offset),
// RegisterOrMemory::Mem(base, None) => write!(f, "[{}]", base),
// RegisterOrMemory::MemPesudoName(name) => write!(f, "{}", name),
// }
// }
// }
pub struct MoveInstr(Option<ConditionCode>, Register, RegisterOrImm);
impl MoveInstr {
pub fn new_uncond(dest: Register, src: RegisterOrImm) -> ARMInstr {
ARMInstr::Move(MoveInstr(None, dest, src))
}
pub fn new_sp_to_fp() -> ARMInstr {
ARMInstr::Move(MoveInstr(None, REG_FP, RegisterOrImm::Reg(REG_SP)))
}
pub fn new_fp_to_sp() -> ARMInstr {
ARMInstr::Move(MoveInstr(None, REG_SP, RegisterOrImm::Reg(REG_FP)))
}
}
impl Display for MoveInstr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let MoveInstr(condition, dest, src) = self;
if let Some(condition) = condition {
write!(f, "mov{} {}, {}", condition, dest, src)
} else {
write!(f, "mov {}, {}", dest, src)
}
}
}
pub struct LoadInstr(Register, Register, Option<RegisterOrImm>);
impl LoadInstr {
pub fn new(dest: Register, base: Register, offset: Option<RegisterOrImm>) -> ARMInstr {
ARMInstr::Load(LoadInstr(dest, base, offset))
}
pub fn new_stack(dest: Register, offset: i32) -> ARMInstr {
ARMInstr::Load(LoadInstr(dest, REG_FP, Some(RegisterOrImm::Imm(-(offset as i32)))))
}
}
impl Display for LoadInstr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let LoadInstr(dest, base, offset) = self;
if let Some(offset) = offset {
write!(f, "ldr {}, [{}, {}]", dest, base, offset)
} else {
write!(f, "ldr {}, [{}]", dest, base)
}
}
}
pub struct LoadPseudoInstr(Register, String);
impl LoadPseudoInstr {
pub fn new(dest: Register, name: String) -> ARMInstr {
ARMInstr::LoadPseudo(LoadPseudoInstr(dest, name))
}
}
impl Display for LoadPseudoInstr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let LoadPseudoInstr(dest, name) = self;
write!(f, "ldr {}, ={}", dest, name)
}
}
pub struct StoreInstr(Register, Register, Option<RegisterOrImm>);
impl StoreInstr {
pub fn new(src: Register, dest: Register, offset: Option<RegisterOrImm>) -> ARMInstr {
ARMInstr::Store(StoreInstr(src, dest, offset))
}
pub fn new_stack(dest: Register, offset: i32) -> ARMInstr {
ARMInstr::Store(StoreInstr(dest, REG_FP, Some(RegisterOrImm::Imm(-(offset as i32)))))
}
}
impl Display for StoreInstr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let StoreInstr(src, dest, offset) = self;
if let Some(offset) = offset {
write!(f, "str {}, [{}, {}]", src, dest, offset)
} else {
write!(f, "str {}, [{}]", src, dest)
}
}
}
pub struct MulInstr(Register, Register, RegisterOrImm);
impl MulInstr {
pub fn new(dest: Register, left: Register, right: RegisterOrImm) -> ARMInstr {
ARMInstr::Mul(MulInstr(dest, left, right))
}
}
impl Display for MulInstr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let MulInstr(dest, left, right) = self;
write!(f, "mul {}, {}, {}", dest, left, right)
}
}
pub struct SDivInstr(Register, Register, RegisterOrImm);
impl SDivInstr {
pub fn new(dest: Register, left: Register, right: RegisterOrImm) -> ARMInstr {
ARMInstr::SDiv(SDivInstr(dest, left, right))
}
}
impl Display for SDivInstr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let SDivInstr(dest, left, right) = self;
write!(f, "sdiv {}, {}, {}", dest, left, right)
}
}
pub struct AddInstr(Register, Register, RegisterOrImm);
impl AddInstr {
pub fn new(dest: Register, left: Register, right: RegisterOrImm) -> ARMInstr {
ARMInstr::Add(AddInstr(dest, left, right))
}
pub fn new_sp(offset: i32) -> ARMInstr {
ARMInstr::Add(AddInstr(REG_SP, REG_SP, RegisterOrImm::Imm(offset)))
}
}
impl Display for AddInstr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let AddInstr(dest, left, right) = self;
write!(f, "add {}, {}, {}", dest, left, right)
}
}
pub struct SubInstr(Register, Register, RegisterOrImm);
impl SubInstr {
pub fn new(dest: Register, left: Register, right: RegisterOrImm) -> ARMInstr {
ARMInstr::Sub(SubInstr(dest, left, right))
}
pub fn new_sp(offset: i32) -> ARMInstr {
ARMInstr::Sub(SubInstr(REG_SP, REG_SP, RegisterOrImm::Imm(offset)))
}
}
impl Display for SubInstr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let SubInstr(dest, left, right) = self;
write!(f, "sub {}, {}, {}", dest, left, right)
}
}
pub struct CmpInstr(Register, Register);
impl Display for CmpInstr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let CmpInstr(left, right) = self;
write!(f, "cmp {}, {}", left, right)
}
}
pub struct PushInstr(Vec<Register>);
impl PushInstr {
pub fn new_push_fp_lr() -> ARMInstr {
ARMInstr::Push(PushInstr(vec![
REG_FP,
REG_LR,
]))
}
pub fn new_push_caller_save() -> ARMInstr {
ARMInstr::Push(PushInstr(vec![
REG_R0,
REG_R1,
REG_R2,
REG_R3,
REG_R12,
]))
}
}
impl Display for PushInstr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) ->
std::fmt::Result {
let PushInstr(registers) = self;
let regs_str = registers.iter().map(|reg| format!("{}", reg)).collect::<Vec<_>>().join(", ");
write!(f, "push {{{}}}", regs_str)
}
}
pub struct PopInstr(Vec<Register>);
impl PopInstr {
pub fn new_pop_fp_pc() -> ARMInstr {
ARMInstr::Pop(PopInstr(vec![
REG_FP,
REG_PC,
]))
}
pub fn new_pop_caller_save() -> ARMInstr {
ARMInstr::Pop(PopInstr(vec![
REG_R0,
REG_R1,
REG_R2,
REG_R3,
REG_R12,
]))
}
}
impl Display for PopInstr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let PopInstr(registers) = self;
let regs_str = registers.iter().map(|reg| format!("{}", reg)).collect::<Vec<_>>().join(", ");
write!(f, "pop {{{}}}", regs_str)
}
}
pub struct BlInstr(String);
impl Display for BlInstr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let BlInstr(func_name) = self;
write!(f, "bl {}", func_name)
}
}
impl BlInstr {
pub fn new(func_name: String) -> ARMInstr {
ARMInstr::Bl(BlInstr(func_name))
}
}
+203
View File
@@ -0,0 +1,203 @@
use std::collections::BTreeMap;
use crate::{backend::{arm_instr::{ARMInstr, AddInstr, BlInstr, LoadInstr, LoadPseudoInstr, MoveInstr, MulInstr, PopInstr, PushInstr, RegisterOrImm, SDivInstr, StoreInstr, SubInstr}, register_allocator::{REG_R0, REG_R1, REG_R2, REG_R3, Register, RegisterAlloc, RegisterAllocator}, types::ARMAsmVar}, ir::types::{Function, IRInstr, MoveRValue, Variable, VariableType}};
use crate::ir::types::BinaryOp as IRBinaryOp;
pub const ARM_STACK_ALIGNMENT: usize = 8;
pub struct Generator {
instrs: Vec<ARMInstr>,
var_inited: Vec<ARMAsmVar>,
var_uninited: Vec<ARMAsmVar>,
register_allocator: RegisterAllocator,
}
const DEFAULT_VAR_ALIGN: usize = 4;
impl Generator {
pub fn new() -> Self {
Self {
instrs: Vec::new(),
var_inited: Vec::new(),
var_uninited: Vec::new(),
register_allocator: RegisterAllocator::new(),
}
}
pub fn emit(&mut self, ir_instrs: Vec<IRInstr>) {
for ir_instr in ir_instrs {
match ir_instr {
IRInstr::DefineFunc(func, args, body) => self.emit_func_def(func, args, body),
IRInstr::Declare(var) => self.emit_global_decl(var),
_ => unreachable!(),
}
}
}
pub fn to_text(&self) -> String {
let mut text = String::new();
text.push_str(".arch armv7ve\n.arm\n.fpu vfpv4\n");
for var in &self.var_uninited {
text.push_str(&format!(".comm {}, {}, {}\n", var.name, var.size, var.align));
}
for var in &self.var_inited {
text.push_str(&format!(".data\n.align {}\n.global {}\n.type {}, @object\n:{}\n", var.align, var.name, var.name, var.name));
text.push_str(&format!(".word 0\n"));
}
text.push_str(".text\n");
for instr in &self.instrs {
text.push_str(&format!("{}\n", instr));
}
text
}
fn emit_global_decl(&mut self, var: Variable) {
self.var_uninited.push(ARMAsmVar {
name: format!("global_var_{}", var.index),
size: var.data_type.size_in_bytes(),
align: DEFAULT_VAR_ALIGN,
});
}
fn emit_func_def(&mut self, func: Function, _args: Vec<Variable>, body: Vec<IRInstr>) {
self.instrs.push(ARMInstr::FunctionHead(func.name.clone(), 4)); // Assuming 4-byte alignment for simplicity
self.instrs.push(PushInstr::new_push_fp_lr());
self.instrs.push(MoveInstr::new_sp_to_fp());
self.emit_func(body);
}
fn emit_func(&mut self, instrs: Vec<IRInstr>) {
let mut encounter_entry = false;
let mut stack_size_needed = 0;
let mut var_index_to_stack_offset = BTreeMap::new();
for ir_instr in instrs {
match ir_instr {
IRInstr::Binary(dest, left, op, right) => self.emit_binary(dest, left, op, right, &var_index_to_stack_offset),
IRInstr::Exit(v) => {
if let Some(v) = v {
let ret_alloc = self.register_allocator.alloc_reg(REG_R0).expect("Ran out of registers");
let ret_reg = ret_alloc.reg;
let v_alloc = self.register_allocator.alloc(v).expect("Ran out of registers");
let v_reg = v_alloc.reg;
if !v_alloc.is_reused {
let v_stack_offset = var_index_to_stack_offset.get(&v.index).expect("Variable not declared");
self.instrs.push(LoadInstr::new_stack(v_reg, *v_stack_offset as i32));
}
self.instrs.push(MoveInstr::new_uncond(ret_reg, RegisterOrImm::Reg(v_reg)));
}
self.instrs.push(MoveInstr::new_fp_to_sp());
self.instrs.push(PopInstr::new_pop_fp_pc());
},
IRInstr::FuncCall(func, args, ret) => self.emit_func_call(func, args, ret, &var_index_to_stack_offset),
IRInstr::Move(dest, src) => self.emit_move(dest, src, &var_index_to_stack_offset),
IRInstr::Declare(variable) => {
assert!(!encounter_entry, "Variable declarations must come before entry instruction");
let size = variable.data_type.size_in_bytes();
stack_size_needed = (stack_size_needed + size).next_multiple_of(ARM_STACK_ALIGNMENT);
var_index_to_stack_offset.insert(variable.index, stack_size_needed);
},
IRInstr::Entry => {
assert!(!encounter_entry, "Multiple entry instructions are not allowed");
encounter_entry = true;
self.instrs.push(SubInstr::new_sp(stack_size_needed as i32));
},
IRInstr::DefineFunc(_, _, _) => unreachable!(),
}
}
}
fn emit_func_call(&mut self, func: Function, args: Vec<Variable>, ret: Option<Variable>, var_index_to_stack_offset: &BTreeMap<usize, usize>) {
self.instrs.push(PushInstr::new_push_caller_save());
if args.len() > 4 {
todo!("More than 4 arguments not supported yet");
}
const ARG_REGS: [Register; 4] = [REG_R0, REG_R1, REG_R2, REG_R3];
for (i, arg) in args.into_iter().enumerate() {
let arg_alloc = self.register_allocator.alloc(arg).expect("Ran out of registers");
let arg_reg = arg_alloc.reg;
if !arg_alloc.is_reused {
let arg_stack_offset = var_index_to_stack_offset.get(&arg.index).expect("Variable not declared");
self.instrs.push(LoadInstr::new_stack(arg_reg, *arg_stack_offset as i32));
}
self.instrs.push(MoveInstr::new_uncond(ARG_REGS[i], RegisterOrImm::Reg(arg_reg)));
}
self.instrs.push(BlInstr::new(func.name.clone()));
if let Some(ret) = ret {
let ret_alloc = self.register_allocator.alloc(ret).expect("Ran out of registers");
let ret_reg = ret_alloc.reg;
self.instrs.push(MoveInstr::new_uncond(ret_reg, RegisterOrImm::Reg(REG_R0)));
}
self.instrs.push(PopInstr::new_pop_caller_save());
}
fn emit_move(&mut self, dest: Variable, src: MoveRValue, var_index_to_stack_offset: &BTreeMap<usize, usize>) {
let dest_alloc = self.register_allocator.alloc(dest).expect("Ran out of registers");
let dest_register = dest_alloc.reg;
match src {
MoveRValue::Var(variable) => {
if !dest_alloc.is_reused {
let var_stack_offset = var_index_to_stack_offset.get(&variable.index).expect("Variable not found");
self.instrs.push(LoadInstr::new_stack(dest_register, *var_stack_offset as i32));
}
},
MoveRValue::ConstInt(literal_int) => self.instrs.push(MoveInstr::new_uncond(dest_register, RegisterOrImm::Imm(literal_int))),
};
match dest.var_type {
VariableType::Global => {
let address_reg = self.register_allocator.alloc_any().expect("Ran out of registers");
self.instrs.push(LoadPseudoInstr::new(address_reg, format!("global_var_{}", dest.index)));
self.instrs.push(StoreInstr::new(dest_register, address_reg, None));
},
VariableType::ParamTemp => {
todo!()
},
_ => {
let offset = *var_index_to_stack_offset.get(&dest.index).expect("Variable not declared");
self.instrs.push(StoreInstr::new_stack(dest_register, offset as i32));
}
}
}
fn emit_binary(&mut self, dest: Variable, left: Variable, op: IRBinaryOp, right: Variable, var_index_to_stack_offset: &BTreeMap<usize, usize>) {
let left_alloc = self.register_allocator.alloc(left).expect("Ran out of registers");
let right_alloc = self.register_allocator.alloc(right).expect("Ran out of registers");
let dest_alloc = self.register_allocator.alloc(dest).expect("Ran out of registers");
let left_reg = left_alloc.reg;
let right_reg = right_alloc.reg;
let dest_reg = dest_alloc.reg;
if !left_alloc.is_reused {
let left_offset = var_index_to_stack_offset.get(&left.index).expect("Variable not declared");
self.instrs.push(LoadInstr::new_stack(left_reg, *left_offset as i32));
}
if !right_alloc.is_reused {
let right_offset = var_index_to_stack_offset.get(&right.index).expect("Variable not declared");
self.instrs.push(LoadInstr::new_stack(right_reg, *right_offset as i32));
}
match op {
IRBinaryOp::Add => {
self.instrs.push(AddInstr::new(dest_reg, left_reg, RegisterOrImm::Reg(right_reg)));
},
IRBinaryOp::Sub => {
self.instrs.push(SubInstr::new(dest_reg, left_reg, RegisterOrImm::Reg(right_reg)));
},
IRBinaryOp::Mul => {
self.instrs.push(MulInstr::new(dest_reg, left_reg, RegisterOrImm::Reg(right_reg)));
},
IRBinaryOp::Div => {
self.instrs.push(SDivInstr::new(dest_reg, left_reg, RegisterOrImm::Reg(right_reg)));
},
IRBinaryOp::Mod => {
let temp_reg = self.register_allocator.alloc_any().expect("Ran out of registers");
self.instrs.push(SDivInstr::new(temp_reg, left_reg, RegisterOrImm::Reg(right_reg)));
self.instrs.push(MulInstr::new(temp_reg, temp_reg, RegisterOrImm::Reg(right_reg)));
self.instrs.push(SubInstr::new(dest_reg, left_reg, RegisterOrImm::Reg(temp_reg)));
},
IRBinaryOp::Le => todo!(),
IRBinaryOp::Lt => todo!(),
IRBinaryOp::Gt => todo!(),
IRBinaryOp::Ge => todo!(),
IRBinaryOp::Ne => todo!(),
IRBinaryOp::Eq => todo!(),
}
let dest_stack_offset = var_index_to_stack_offset.get(&dest.index).expect("Variable not declared");
self.instrs.push(StoreInstr::new_stack(dest_reg, *dest_stack_offset as i32));
}
}
+77
View File
@@ -0,0 +1,77 @@
mod register_allocator;
pub mod generator;
mod arm_instr;
pub mod types;
#[cfg(test)]
mod tests {
use std::io::BufRead;
use std::path::Path;
use std::fs::File;
use std::io::Write;
use crate::frontend::lexer::Lexer;
use crate::frontend::parser::Parser;
use crate::utils::case_list::CaseList;
use crate::utils::num_sequence::NumberSequence;
use crate::ir::generator::Generator as IRGenerator;
pub use super::generator::Generator as ASMGenerator;
fn test_case(case_str: &str) {
let case_sequence = NumberSequence::from_str(case_str).unwrap();
let case_list = CaseList::from_dir(&Path::new("./testcases")).unwrap();
let mut error_case_cnt = 0;
for case_no in case_sequence {
let case_path = case_list.get_case_path(case_no).unwrap();
println!("{}", case_path.display());
let file = File::open(&case_path).unwrap();
let mut buf_reader = std::io::BufReader::new(file);
let mut lexer = Lexer::new();
let mut full_text = String::new();
loop {
let mut line = String::new();
let bytes_read = buf_reader.read_line(&mut line).unwrap();
if bytes_read == 0 {
break;
}
full_text.push_str(&line);
lexer.parse_next_str(&line);
}
let (tokens, diagnostics) = lexer.finish();
let mut is_error = false;
if !diagnostics.is_empty() {
diagnostics.print(&format!("{}", case_path.display()), &full_text);
is_error = true;
}
let mut parser = Parser::new(tokens, diagnostics);
let compile_unit = parser.parse();
let case_name = case_list.get_case_name(case_no).unwrap().strip_suffix(".c").unwrap();
if !parser.diagnostics.is_empty() {
parser.diagnostics.print(&format!("{}", case_path.display()), &full_text);
is_error = true;
}
let mut generator = IRGenerator::new();
let ir = generator.emit(compile_unit);
// if !generator.diagnostic.is_empty() {
// generator.diagnostic.print(&format!("{}", case_path.display()), &full_text);
// is_error = true;
// }
let mut asm_generator = ASMGenerator::new();
asm_generator.emit(ir);
let asm_text = asm_generator.to_text();
let mut output_file = File::create(format!("output/{}.s", case_name)).unwrap();
output_file.write_all(asm_text.as_bytes()).unwrap();
if is_error {
error_case_cnt += 1;
}
}
if error_case_cnt > 0 {
panic!("Found {} cases with errors", error_case_cnt);
}
}
#[test]
fn test_expr() {
test_case("0-3,14-25");
// test_case("0-3,14-25");
}
}
+202
View File
@@ -0,0 +1,202 @@
use std::{cell::RefCell, collections::BTreeMap, fmt::Display, rc::{Rc, Weak}};
use crate::ir::types::Variable;
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub struct Register {
name: &'static str,
}
impl Display for Register {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.name)
}
}
macro_rules! register_declare {
($($reg:ident => $name:expr),*) => {
$(
pub const $reg: Register = Register { name: $name };
)*
};
}
register_declare! {
REG_R0 => "r0",
REG_R1 => "r1",
REG_R2 => "r2",
REG_R3 => "r3",
REG_R4 => "r4",
REG_R5 => "r5",
REG_R6 => "r6",
REG_R7 => "r7",
REG_R8 => "r8",
REG_R9 => "r9",
REG_R10 => "r10",
REG_R11 => "r11",
REG_R12 => "r12",
REG_SP => "sp",
REG_LR => "lr",
REG_PC => "pc",
REG_FP => "fp"
}
pub const REGISTERS: &[Register] = &[
REG_R0, REG_R1, REG_R2, REG_R3, REG_R4, REG_R5, REG_R6, REG_R7, REG_R8, REG_R9, REG_R10, REG_R11, REG_R12, REG_SP, REG_LR, REG_PC, REG_FP
];
pub const REGISTERS_CAN_ALLOC: &[Register] = &[
REG_R0, REG_R1, REG_R2, REG_R3, REG_R4, REG_R5, REG_R6, REG_R7, REG_R8, REG_R9, REG_R10, REG_R12
];
pub struct RegisterAlloc {
allocator: Weak<RefCell<RegisterAllocatorInner>>,
pub reg: Register,
pub is_reused: bool,
}
impl Drop for RegisterAlloc {
fn drop(&mut self) {
if let Some(allocator) = self.allocator.upgrade() {
let mut allocator = allocator.borrow_mut();
allocator.mark_unused(self.reg);
}
}
}
pub enum RegisterUseKind {
Designated,
UsedByVariable(Variable),
AllocatedToVariable(Variable),
Free,
}
struct RegisterAllocatorInner {
register_map: BTreeMap<Register, RegisterUseKind>,
variable_to_register: BTreeMap<Variable, Register>,
}
pub struct RegisterAllocator {
// register_map: BTreeMap<Register, RegisterUseKind>,
// variable_to_register: BTreeMap<Variable, Register>,
inner: Rc<RefCell<RegisterAllocatorInner>>,
}
impl RegisterAllocatorInner {
fn mark_unused(&mut self, reg: Register) {
if let Some(use_kind) = self.register_map.get_mut(&reg) {
match use_kind {
RegisterUseKind::Designated => {
*use_kind = RegisterUseKind::Free;
},
RegisterUseKind::UsedByVariable(var) => {
*use_kind = RegisterUseKind::AllocatedToVariable(*var);
},
_ => panic!("Trying to mark a register as unused that is not in use"),
}
}
}
}
impl RegisterAllocator {
pub fn new() -> Self {
let mut register_map = BTreeMap::new();
for &reg in REGISTERS_CAN_ALLOC {
register_map.insert(reg, RegisterUseKind::Free);
}
Self {
inner: Rc::new(RefCell::new(RegisterAllocatorInner {
register_map,
variable_to_register: BTreeMap::new(),
})),
}
}
pub fn alloc(&mut self, var: Variable) -> Option<RegisterAlloc> {
let mut inner = self.inner.borrow_mut();
if let Some(&reg) = inner.variable_to_register.get(&var) {
// Variable already has a register allocated
let use_kind = inner.register_map.get_mut(&reg).expect("Inconsistent state: variable has a register but it's not in the register map");
assert!(matches!(use_kind, RegisterUseKind::UsedByVariable(v) | RegisterUseKind::AllocatedToVariable(v) if *v == var));
*use_kind = RegisterUseKind::UsedByVariable(var);
return Some(RegisterAlloc {
allocator: Rc::downgrade(&self.inner),
reg,
is_reused: true,
});
}
// Find a free register
for (&reg, use_kind) in inner.register_map.iter_mut() {
// Find free register first
if let RegisterUseKind::Free = use_kind {
*use_kind = RegisterUseKind::UsedByVariable(var);
inner.variable_to_register.insert(var, reg);
return Some(RegisterAlloc {
allocator: Rc::downgrade(&self.inner),
reg,
is_reused: false,
});
}
}
let RegisterAllocatorInner{register_map, variable_to_register} = &mut *inner;
for (&reg, use_kind) in register_map.iter_mut() {
// Find allocated register then
if let RegisterUseKind::AllocatedToVariable(ori_var) = use_kind {
assert!(variable_to_register.remove(&ori_var).is_some());
*use_kind = RegisterUseKind::UsedByVariable(var);
variable_to_register.insert(var, reg);
return Some(RegisterAlloc {
allocator: Rc::downgrade(&self.inner),
reg,
is_reused: false,
});
}
}
// No free register available
None
}
pub fn alloc_reg(&mut self, reg: Register) -> Option<RegisterAlloc> {
let mut inner = self.inner.borrow_mut();
let RegisterAllocatorInner{register_map, variable_to_register} = &mut *inner;
let use_kind = register_map.get_mut(&reg).expect("Trying to allocate a register that is not in the register map");
match use_kind {
RegisterUseKind::Free => {
*use_kind = RegisterUseKind::Designated;
return Some(RegisterAlloc {
allocator: Rc::downgrade(&self.inner),
reg,
is_reused: false,
});
},
RegisterUseKind::UsedByVariable(_var) => {
return None;
},
RegisterUseKind::AllocatedToVariable(var) => {
variable_to_register.remove(var);
*use_kind = RegisterUseKind::Designated;
return Some(RegisterAlloc {
allocator: Rc::downgrade(&self.inner),
reg,
is_reused: false,
});
},
RegisterUseKind::Designated => {
return None;
},
}
}
pub fn alloc_any(&mut self) -> Option<Register> {
let mut inner = self.inner.borrow_mut();
for (&reg, use_kind) in inner.register_map.iter_mut() {
if let RegisterUseKind::Free = use_kind {
*use_kind = RegisterUseKind::Designated;
return Some(reg);
}
}
let RegisterAllocatorInner{register_map, variable_to_register} = &mut *inner;
for (&reg, use_kind) in register_map.iter_mut() {
if let RegisterUseKind::AllocatedToVariable(ori_var) = use_kind {
variable_to_register.remove(&ori_var);
*use_kind = RegisterUseKind::Designated;
return Some(reg);
}
}
None
}
}
+5
View File
@@ -0,0 +1,5 @@
pub struct ARMAsmVar {
pub name: String,
pub size: usize,
pub align: usize,
}
+32 -1
View File
@@ -43,6 +43,15 @@ pub enum IRType {
I1,
Void,
}
impl IRType {
pub fn size_in_bytes(&self) -> usize {
match self {
IRType::I32 => 4,
IRType::I1 => 1,
IRType::Void => 0,
}
}
}
impl Display for IRType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
@@ -72,7 +81,7 @@ impl Display for MoveRValue {
}
}
}
#[derive(Clone, Copy)]
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug, Hash)]
pub enum VariableType {
Global,
ParamTemp,
@@ -86,6 +95,28 @@ pub struct Variable {
pub var_type: VariableType,
pub data_type: IRType,
}
impl PartialEq for Variable {
fn eq(&self, other: &Self) -> bool {
self.index == other.index && self.var_type == other.var_type
}
}
impl Eq for Variable {}
impl PartialOrd for Variable {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(match self.index.cmp(&other.index) {
std::cmp::Ordering::Equal => self.var_type.cmp(&other.var_type),
ord => ord,
})
}
}
impl Ord for Variable {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
match self.index.cmp(&other.index) {
std::cmp::Ordering::Equal => self.var_type.cmp(&other.var_type),
ord => ord,
}
}
}
impl Display for Variable {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let prefix = match self.var_type {
+33 -11
View File
@@ -1,6 +1,7 @@
mod frontend;
mod ast;
mod ir;
mod backend;
mod utils;
mod diagnostic;
mod err;
@@ -10,7 +11,7 @@ use std::{fs::File, io::BufRead};
use clap::Parser as ArgParser;
use crate::{frontend::{lexer::Lexer, parser::Parser}, ir::generator::Generator};
use crate::backend::generator::Generator as ASMGerenerator;
/// Simple minic compiler built by Rust
#[derive(ArgParser, Debug)]
#[command(version, about, long_about = None)]
@@ -18,6 +19,10 @@ struct Args {
/// Output the generated IR code
#[arg(short = 'I', long = "ir")]
output_ir: bool,
#[arg(skip)]
output_asm: bool,
#[arg(short = 't', long = "target", default_value = "ARM32")]
target: String,
/// Use recursive descent parsing
#[arg(short = 'D', long = "recursive-descent")]
recursive_descent: bool,
@@ -32,15 +37,18 @@ struct Args {
}
fn main() {
let args = Args::parse();
let mut args = Args::parse();
if !args.output_ir {
eprintln!("Currently only IR generation is supported. Use -I to enable it.");
return;
args.output_asm = true;
}
if !args.recursive_descent {
eprintln!("Currently only recursive descent parsing is supported. Use -D to enable it.");
return;
}
if args.target != "ARM32" {
eprintln!("Currently only ARM32 assembly output is supported. Use -t ARM32 to specify the target architecture.");
return;
}
let source_path = std::path::Path::new(&args.source);
let file = match File::open(&args.source) {
Ok(f) => f,
@@ -75,14 +83,28 @@ fn main() {
if !generator.get_diagnostics().is_empty() {
generator.get_diagnostics().print(&format!("{}", source_path.display()), &full_text);
}
if let Some(output_path) = args.output {
match std::fs::write(&output_path, ir.iter().map(|instr| instr.to_string()).collect::<Vec<_>>().join("\n")) {
Ok(_) => println!("IR code written to {}", output_path),
Err(e) => eprintln!("Failed to write IR code to {}: {}", output_path, e),
if args.output_ir {
if let Some(output_path) = args.output {
match std::fs::write(&output_path, ir.iter().map(|instr| instr.to_string()).collect::<Vec<_>>().join("\n")) {
Ok(_) => println!("IR code written to {}", output_path),
Err(e) => eprintln!("Failed to write IR code to {}: {}", output_path, e),
}
} else {
for instr in ir {
println!("{}", instr);
}
}
} else {
for instr in ir {
println!("{}", instr);
} else if args.output_asm {
let mut asm_generator = ASMGerenerator::new();
asm_generator.emit(ir);
let asm_text = asm_generator.to_text();
if let Some(output_path) = args.output {
match std::fs::write(&output_path, asm_text) {
Ok(_) => println!("Assembly code written to {}", output_path),
Err(e) => eprintln!("Failed to write assembly code to {}: {}", output_path, e),
}
} else {
println!("{}", asm_text);
}
}
}
+121
View File
@@ -0,0 +1,121 @@
///
/// @file std.c
/// @brief 外部或内置函数实现
/// @author zenglj (zenglj@live.com)
/// @version 1.0
/// @date 2024-09-29
///
/// @copyright Copyright (c) 2024
///
/// @par 修改日志:
/// <table>
/// <tr><th>Date <th>Version <th>Author <th>Description
/// <tr><td>2024-09-29 <td>1.0 <td>zenglj <td>新做
/// </table>
///
#include <stdio.h>
#include <stdarg.h>
int getint()
{
int d;
scanf("%d", &d);
return d;
}
int getch()
{
char d;
scanf("%c", &d);
return d;
}
int getarray(int a[])
{
int n, i;
// 获取元素个数
scanf("%d",&n);
// 获取元素内容
for(i = 0; i < n; ++i) {
scanf("%d",&a[i]);
}
return n;
}
void putint(int k)
{
printf("%d", k);
}
void putch(int c)
{
printf("%c", (char)c);
}
void putarray(int n, int * d)
{
int k;
// 输出元素个数
printf("%d:", n);
// 输出元素内容,空格分割
for(k = 0; k < n; k ++) {
printf(" %d", d[k]);
}
// 输出换行符
printf("\n");
}
void putstr(char * str)
{
printf("%s", str);
}
float getfloat()
{
float n;
scanf("%a", &n);
return n;
}
int getfarray(float a[])
{
int n;
scanf("%d", &n);
for (int i = 0; i < n; i++) {
scanf("%a", &a[i]);
}
return n;
}
void putfloat(float a)
{
printf("%a", a);
}
void putfarray(int n, float a[])
{
printf("%d:", n);
for (int i = 0; i < n; i++) {
printf(" %a", a[i]);
}
printf("\n");
}
void putf(char a[], ...)
{
va_list args;
va_start(args, a);
vfprintf(stdout, a, args);
va_end(args);
}
+177
View File
@@ -0,0 +1,177 @@
#!/usr/bin/env python3
"""Compile an ARM assembly file and run it with qemu-arm-static."""
from __future__ import annotations
import argparse
import shutil
import subprocess
import sys
import tempfile
from pathlib import Path
def parse_args() -> argparse.Namespace:
if "--" in sys.argv:
separator_index = sys.argv.index("--")
tool_args = sys.argv[1:separator_index]
program_args = sys.argv[separator_index + 1 :]
else:
tool_args = sys.argv[1:]
program_args = []
parser = argparse.ArgumentParser(
description=(
"Compile an ARM assembly file with arm-linux-gnueabihf-gcc, "
"run it with qemu-arm-static, then print stdout and $?."
)
)
parser.add_argument("asm_file", type=Path, help="path to the assembly file")
parser.add_argument(
"--cc",
default="arm-linux-gnueabihf-gcc",
help="cross compiler to use (default: arm-linux-gnueabihf-gcc)",
)
parser.add_argument(
"--qemu",
default="qemu-arm-static",
help="ARM qemu runner to use (default: qemu-arm-static)",
)
parser.add_argument(
"-o",
"--output",
type=Path,
help="output executable path; defaults to an auto-cleaned temporary file",
)
parser.add_argument(
"-g",
"--gdb",
action="store_true",
help="compile with -g and run qemu in gdb stub mode",
)
parser.add_argument(
"--gdb-port",
default="1234",
metavar="PORT",
help="qemu gdb stub port used with -g/--gdb (default: 1234)",
)
parser.add_argument(
"--gdb-command",
help=(
"gdb command shown in debug instructions "
"(default: first available of arm-linux-gnueabihf-gdb, gdb-multiarch, gdb)"
),
)
parser.add_argument(
"--no-static",
action="store_true",
help="do not pass -static to the compiler",
)
parser.add_argument(
"--gcc-arg",
action="append",
default=[],
help="extra argument passed to gcc; repeat for multiple args",
)
parser.add_argument(
"--std-c",
type=Path,
default=Path("tests/std.c"),
help="C runtime source compiled with the assembly file (default: tests/std.c)",
)
parser.add_argument(
"--no-std-c",
action="store_true",
help="do not compile tests/std.c with the assembly file",
)
args = parser.parse_args(tool_args)
args.program_args = program_args
return args
def require_command(command: str) -> None:
if shutil.which(command) is None:
print(f"error: command not found: {command}", file=sys.stderr)
sys.exit(127)
def run_command(command: list[str]) -> subprocess.CompletedProcess[bytes]:
try:
return subprocess.run(command, capture_output=True, check=False)
except OSError as err:
print(f"error: failed to run {command[0]}: {err}", file=sys.stderr)
sys.exit(127)
def choose_gdb_command(requested_command: str | None) -> str:
if requested_command:
return requested_command
for command in ("arm-linux-gnueabihf-gdb", "gdb-multiarch", "gdb"):
if shutil.which(command) is not None:
return command
return "gdb-multiarch"
def main() -> int:
args = parse_args()
asm_file = args.asm_file
if not asm_file.is_file():
print(f"error: assembly file not found: {asm_file}", file=sys.stderr)
return 2
std_c = args.std_c
if not args.no_std_c and not std_c.is_file():
print(f"error: std C file not found: {std_c}", file=sys.stderr)
return 2
require_command(args.cc)
require_command(args.qemu)
with tempfile.TemporaryDirectory(prefix="run-arm-asm-") as temp_dir:
output = args.output or Path(temp_dir) / asm_file.with_suffix("").name
compile_cmd = [args.cc]
if not args.no_static:
compile_cmd.append("-static")
if args.gdb:
compile_cmd.append("-g")
compile_cmd.extend(args.gcc_arg)
compile_cmd.append(str(asm_file))
if not args.no_std_c:
compile_cmd.append(str(std_c))
compile_cmd.extend(["-o", str(output)])
compile_result = run_command(compile_cmd)
sys.stdout.buffer.write(compile_result.stdout)
sys.stderr.buffer.write(compile_result.stderr)
if compile_result.returncode != 0:
return compile_result.returncode
qemu_cmd = [args.qemu]
if args.gdb:
qemu_cmd.extend(["-g", args.gdb_port])
resolved_output = output.resolve()
gdb_command = choose_gdb_command(args.gdb_command)
print(f"executable: {resolved_output}", file=sys.stderr, flush=True)
print(
f"gdb: {gdb_command} {resolved_output} "
f"-ex 'target remote :{args.gdb_port}'",
file=sys.stderr,
flush=True,
)
print("gdb: use 'si' or 'c'; do not use 'run'", file=sys.stderr, flush=True)
qemu_cmd.extend([str(output), *args.program_args])
run_result = run_command(qemu_cmd)
sys.stdout.buffer.write(run_result.stdout)
sys.stderr.buffer.write(run_result.stderr)
if run_result.stdout and not run_result.stdout.endswith(b"\n"):
print()
print(f"$? = {run_result.returncode}")
return 0
if __name__ == "__main__":
raise SystemExit(main())