diff --git a/.gitignore b/.gitignore index 8774c22..5265a51 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ /target /testcases -/output \ No newline at end of file +/output +*.pyc +*.zip \ No newline at end of file diff --git a/src/backend/arm_instr.rs b/src/backend/arm_instr.rs new file mode 100644 index 0000000..ccf7fb3 --- /dev/null +++ b/src/backend/arm_instr.rs @@ -0,0 +1,287 @@ +use std::{fmt::Display, ops::Add}; + +use crate::backend::register_allocator::{REG_FP, REG_LR, REG_PC, REG_R0, REG_R1, REG_R2, REG_R3, REG_R12, REG_SP, Register}; +pub enum ARMInstr{ + Move(MoveInstr), + Load(LoadInstr), + LoadPseudo(LoadPseudoInstr), + Store(StoreInstr), + Mul(MulInstr), + SDiv(SDivInstr), + Add(AddInstr), + Sub(SubInstr), + Cmp(CmpInstr), + Push(PushInstr), + Pop(PopInstr), + FunctionHead(String, usize), + Bl(BlInstr), +} + +impl Display for ARMInstr { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ARMInstr::Move(instr) => write!(f, "{}", instr), + ARMInstr::Load(instr) => write!(f, "{}", instr), + ARMInstr::LoadPseudo(instr) => write!(f, "{}", instr), + ARMInstr::Store(instr) => write!(f, "{}", instr), + ARMInstr::Mul(instr) => write!(f, "{}", instr), + ARMInstr::SDiv(instr) => write!(f, "{}", instr), + ARMInstr::Add(instr) => write!(f, "{}", instr), + ARMInstr::Sub(instr) => write!(f, "{}", instr), + ARMInstr::Cmp(instr) => write!(f, "{}", instr), + ARMInstr::Push(instr) => write!(f, "{}", instr), + ARMInstr::Pop(instr) => write!(f, "{}", instr), + ARMInstr::Bl(instr) => write!(f, "{}", instr), + ARMInstr::FunctionHead(name, align_size) => write!(f, ".align {}\n.global {}\n.type {}, %function\n{}:", align_size, name, name, name), + } + } +} +pub enum RegisterOrImm { + Reg(Register), + Imm(i32), +} +impl Display for RegisterOrImm { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + RegisterOrImm::Reg(reg) => write!(f, "{}", reg), + RegisterOrImm::Imm(imm) => write!(f, "#{}", imm), + } + } +} +pub enum ConditionCode { + Eq, + Ne, + Lt, + Le, + Gt, + Ge, +} +impl Display for ConditionCode { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let code_str = match self { + ConditionCode::Eq => "eq", + ConditionCode::Ne => "ne", + ConditionCode::Lt => "lt", + ConditionCode::Le => "le", + ConditionCode::Gt => "gt", + ConditionCode::Ge => "ge", + }; + write!(f, "{}", code_str) + } +} +// pub enum RegisterOrMemory { +// Reg(Register), +// Mem(Register, Option), // Base register and optional offset +// MemPesudoName(String), +// } +// impl Display for RegisterOrMemory { +// fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { +// match self { +// RegisterOrMemory::Reg(reg) => write!(f, "{}", reg), +// RegisterOrMemory::Mem(base, Some(offset)) => write!(f, "[{}, {}]", base, offset), +// RegisterOrMemory::Mem(base, None) => write!(f, "[{}]", base), +// RegisterOrMemory::MemPesudoName(name) => write!(f, "{}", name), +// } +// } +// } +pub struct MoveInstr(Option, Register, RegisterOrImm); +impl MoveInstr { + pub fn new_uncond(dest: Register, src: RegisterOrImm) -> ARMInstr { + ARMInstr::Move(MoveInstr(None, dest, src)) + } + pub fn new_sp_to_fp() -> ARMInstr { + ARMInstr::Move(MoveInstr(None, REG_FP, RegisterOrImm::Reg(REG_SP))) + } + pub fn new_fp_to_sp() -> ARMInstr { + ARMInstr::Move(MoveInstr(None, REG_SP, RegisterOrImm::Reg(REG_FP))) + } +} +impl Display for MoveInstr { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let MoveInstr(condition, dest, src) = self; + if let Some(condition) = condition { + write!(f, "mov{} {}, {}", condition, dest, src) + } else { + write!(f, "mov {}, {}", dest, src) + } + } +} +pub struct LoadInstr(Register, Register, Option); +impl LoadInstr { + pub fn new(dest: Register, base: Register, offset: Option) -> ARMInstr { + ARMInstr::Load(LoadInstr(dest, base, offset)) + } + pub fn new_stack(dest: Register, offset: i32) -> ARMInstr { + ARMInstr::Load(LoadInstr(dest, REG_FP, Some(RegisterOrImm::Imm(-(offset as i32))))) + } +} +impl Display for LoadInstr { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let LoadInstr(dest, base, offset) = self; + if let Some(offset) = offset { + write!(f, "ldr {}, [{}, {}]", dest, base, offset) + } else { + write!(f, "ldr {}, [{}]", dest, base) + } + } +} +pub struct LoadPseudoInstr(Register, String); +impl LoadPseudoInstr { + pub fn new(dest: Register, name: String) -> ARMInstr { + ARMInstr::LoadPseudo(LoadPseudoInstr(dest, name)) + } +} +impl Display for LoadPseudoInstr { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let LoadPseudoInstr(dest, name) = self; + write!(f, "ldr {}, ={}", dest, name) + } +} +pub struct StoreInstr(Register, Register, Option); +impl StoreInstr { + pub fn new(src: Register, dest: Register, offset: Option) -> ARMInstr { + ARMInstr::Store(StoreInstr(src, dest, offset)) + } + pub fn new_stack(dest: Register, offset: i32) -> ARMInstr { + ARMInstr::Store(StoreInstr(dest, REG_FP, Some(RegisterOrImm::Imm(-(offset as i32))))) + } +} +impl Display for StoreInstr { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let StoreInstr(src, dest, offset) = self; + if let Some(offset) = offset { + write!(f, "str {}, [{}, {}]", src, dest, offset) + } else { + write!(f, "str {}, [{}]", src, dest) + } + } +} + +pub struct MulInstr(Register, Register, RegisterOrImm); +impl MulInstr { + pub fn new(dest: Register, left: Register, right: RegisterOrImm) -> ARMInstr { + ARMInstr::Mul(MulInstr(dest, left, right)) + } +} +impl Display for MulInstr { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let MulInstr(dest, left, right) = self; + write!(f, "mul {}, {}, {}", dest, left, right) + } +} +pub struct SDivInstr(Register, Register, RegisterOrImm); +impl SDivInstr { + pub fn new(dest: Register, left: Register, right: RegisterOrImm) -> ARMInstr { + ARMInstr::SDiv(SDivInstr(dest, left, right)) + } +} +impl Display for SDivInstr { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let SDivInstr(dest, left, right) = self; + write!(f, "sdiv {}, {}, {}", dest, left, right) + } +} +pub struct AddInstr(Register, Register, RegisterOrImm); +impl AddInstr { + pub fn new(dest: Register, left: Register, right: RegisterOrImm) -> ARMInstr { + ARMInstr::Add(AddInstr(dest, left, right)) + } + pub fn new_sp(offset: i32) -> ARMInstr { + ARMInstr::Add(AddInstr(REG_SP, REG_SP, RegisterOrImm::Imm(offset))) + } +} +impl Display for AddInstr { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let AddInstr(dest, left, right) = self; + write!(f, "add {}, {}, {}", dest, left, right) + } +} +pub struct SubInstr(Register, Register, RegisterOrImm); +impl SubInstr { + pub fn new(dest: Register, left: Register, right: RegisterOrImm) -> ARMInstr { + ARMInstr::Sub(SubInstr(dest, left, right)) + } + pub fn new_sp(offset: i32) -> ARMInstr { + ARMInstr::Sub(SubInstr(REG_SP, REG_SP, RegisterOrImm::Imm(offset))) + } +} +impl Display for SubInstr { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let SubInstr(dest, left, right) = self; + write!(f, "sub {}, {}, {}", dest, left, right) + } +} +pub struct CmpInstr(Register, Register); +impl Display for CmpInstr { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let CmpInstr(left, right) = self; + write!(f, "cmp {}, {}", left, right) + } +} + +pub struct PushInstr(Vec); +impl PushInstr { + pub fn new_push_fp_lr() -> ARMInstr { + ARMInstr::Push(PushInstr(vec![ + REG_FP, + REG_LR, + ])) + } + pub fn new_push_caller_save() -> ARMInstr { + ARMInstr::Push(PushInstr(vec![ + REG_R0, + REG_R1, + REG_R2, + REG_R3, + REG_R12, + ])) + } +} +impl Display for PushInstr { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> + std::fmt::Result { + let PushInstr(registers) = self; + let regs_str = registers.iter().map(|reg| format!("{}", reg)).collect::>().join(", "); + write!(f, "push {{{}}}", regs_str) + } +} + +pub struct PopInstr(Vec); +impl PopInstr { + pub fn new_pop_fp_pc() -> ARMInstr { + ARMInstr::Pop(PopInstr(vec![ + REG_FP, + REG_PC, + ])) + } + pub fn new_pop_caller_save() -> ARMInstr { + ARMInstr::Pop(PopInstr(vec![ + REG_R0, + REG_R1, + REG_R2, + REG_R3, + REG_R12, + ])) + } +} +impl Display for PopInstr { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let PopInstr(registers) = self; + let regs_str = registers.iter().map(|reg| format!("{}", reg)).collect::>().join(", "); + write!(f, "pop {{{}}}", regs_str) + } +} + +pub struct BlInstr(String); +impl Display for BlInstr { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let BlInstr(func_name) = self; + write!(f, "bl {}", func_name) + } +} + +impl BlInstr { + pub fn new(func_name: String) -> ARMInstr { + ARMInstr::Bl(BlInstr(func_name)) + } +} \ No newline at end of file diff --git a/src/backend/generator.rs b/src/backend/generator.rs new file mode 100644 index 0000000..3cb4900 --- /dev/null +++ b/src/backend/generator.rs @@ -0,0 +1,203 @@ +use std::collections::BTreeMap; + +use crate::{backend::{arm_instr::{ARMInstr, AddInstr, BlInstr, LoadInstr, LoadPseudoInstr, MoveInstr, MulInstr, PopInstr, PushInstr, RegisterOrImm, SDivInstr, StoreInstr, SubInstr}, register_allocator::{REG_R0, REG_R1, REG_R2, REG_R3, Register, RegisterAlloc, RegisterAllocator}, types::ARMAsmVar}, ir::types::{Function, IRInstr, MoveRValue, Variable, VariableType}}; +use crate::ir::types::BinaryOp as IRBinaryOp; +pub const ARM_STACK_ALIGNMENT: usize = 8; +pub struct Generator { + instrs: Vec, + var_inited: Vec, + var_uninited: Vec, + register_allocator: RegisterAllocator, +} + +const DEFAULT_VAR_ALIGN: usize = 4; +impl Generator { + pub fn new() -> Self { + Self { + instrs: Vec::new(), + var_inited: Vec::new(), + var_uninited: Vec::new(), + register_allocator: RegisterAllocator::new(), + } + } + pub fn emit(&mut self, ir_instrs: Vec) { + for ir_instr in ir_instrs { + match ir_instr { + IRInstr::DefineFunc(func, args, body) => self.emit_func_def(func, args, body), + IRInstr::Declare(var) => self.emit_global_decl(var), + _ => unreachable!(), + } + } + } + pub fn to_text(&self) -> String { + let mut text = String::new(); + text.push_str(".arch armv7ve\n.arm\n.fpu vfpv4\n"); + for var in &self.var_uninited { + text.push_str(&format!(".comm {}, {}, {}\n", var.name, var.size, var.align)); + } + for var in &self.var_inited { + text.push_str(&format!(".data\n.align {}\n.global {}\n.type {}, @object\n:{}\n", var.align, var.name, var.name, var.name)); + text.push_str(&format!(".word 0\n")); + } + text.push_str(".text\n"); + for instr in &self.instrs { + text.push_str(&format!("{}\n", instr)); + } + text + } + + + fn emit_global_decl(&mut self, var: Variable) { + self.var_uninited.push(ARMAsmVar { + name: format!("global_var_{}", var.index), + size: var.data_type.size_in_bytes(), + align: DEFAULT_VAR_ALIGN, + }); + } + + fn emit_func_def(&mut self, func: Function, _args: Vec, body: Vec) { + self.instrs.push(ARMInstr::FunctionHead(func.name.clone(), 4)); // Assuming 4-byte alignment for simplicity + self.instrs.push(PushInstr::new_push_fp_lr()); + self.instrs.push(MoveInstr::new_sp_to_fp()); + self.emit_func(body); + } + + fn emit_func(&mut self, instrs: Vec) { + let mut encounter_entry = false; + let mut stack_size_needed = 0; + let mut var_index_to_stack_offset = BTreeMap::new(); + for ir_instr in instrs { + match ir_instr { + IRInstr::Binary(dest, left, op, right) => self.emit_binary(dest, left, op, right, &var_index_to_stack_offset), + IRInstr::Exit(v) => { + if let Some(v) = v { + let ret_alloc = self.register_allocator.alloc_reg(REG_R0).expect("Ran out of registers"); + let ret_reg = ret_alloc.reg; + let v_alloc = self.register_allocator.alloc(v).expect("Ran out of registers"); + let v_reg = v_alloc.reg; + if !v_alloc.is_reused { + let v_stack_offset = var_index_to_stack_offset.get(&v.index).expect("Variable not declared"); + self.instrs.push(LoadInstr::new_stack(v_reg, *v_stack_offset as i32)); + } + self.instrs.push(MoveInstr::new_uncond(ret_reg, RegisterOrImm::Reg(v_reg))); + } + self.instrs.push(MoveInstr::new_fp_to_sp()); + self.instrs.push(PopInstr::new_pop_fp_pc()); + }, + IRInstr::FuncCall(func, args, ret) => self.emit_func_call(func, args, ret, &var_index_to_stack_offset), + IRInstr::Move(dest, src) => self.emit_move(dest, src, &var_index_to_stack_offset), + IRInstr::Declare(variable) => { + assert!(!encounter_entry, "Variable declarations must come before entry instruction"); + let size = variable.data_type.size_in_bytes(); + stack_size_needed = (stack_size_needed + size).next_multiple_of(ARM_STACK_ALIGNMENT); + var_index_to_stack_offset.insert(variable.index, stack_size_needed); + }, + IRInstr::Entry => { + assert!(!encounter_entry, "Multiple entry instructions are not allowed"); + encounter_entry = true; + self.instrs.push(SubInstr::new_sp(stack_size_needed as i32)); + }, + IRInstr::DefineFunc(_, _, _) => unreachable!(), + } + } + } + fn emit_func_call(&mut self, func: Function, args: Vec, ret: Option, var_index_to_stack_offset: &BTreeMap) { + + self.instrs.push(PushInstr::new_push_caller_save()); + if args.len() > 4 { + todo!("More than 4 arguments not supported yet"); + } + const ARG_REGS: [Register; 4] = [REG_R0, REG_R1, REG_R2, REG_R3]; + for (i, arg) in args.into_iter().enumerate() { + let arg_alloc = self.register_allocator.alloc(arg).expect("Ran out of registers"); + let arg_reg = arg_alloc.reg; + if !arg_alloc.is_reused { + let arg_stack_offset = var_index_to_stack_offset.get(&arg.index).expect("Variable not declared"); + self.instrs.push(LoadInstr::new_stack(arg_reg, *arg_stack_offset as i32)); + } + self.instrs.push(MoveInstr::new_uncond(ARG_REGS[i], RegisterOrImm::Reg(arg_reg))); + } + self.instrs.push(BlInstr::new(func.name.clone())); + if let Some(ret) = ret { + let ret_alloc = self.register_allocator.alloc(ret).expect("Ran out of registers"); + let ret_reg = ret_alloc.reg; + self.instrs.push(MoveInstr::new_uncond(ret_reg, RegisterOrImm::Reg(REG_R0))); + } + + + self.instrs.push(PopInstr::new_pop_caller_save()); + } + fn emit_move(&mut self, dest: Variable, src: MoveRValue, var_index_to_stack_offset: &BTreeMap) { + let dest_alloc = self.register_allocator.alloc(dest).expect("Ran out of registers"); + let dest_register = dest_alloc.reg; + match src { + MoveRValue::Var(variable) => { + if !dest_alloc.is_reused { + let var_stack_offset = var_index_to_stack_offset.get(&variable.index).expect("Variable not found"); + self.instrs.push(LoadInstr::new_stack(dest_register, *var_stack_offset as i32)); + } + }, + MoveRValue::ConstInt(literal_int) => self.instrs.push(MoveInstr::new_uncond(dest_register, RegisterOrImm::Imm(literal_int))), + }; + match dest.var_type { + VariableType::Global => { + let address_reg = self.register_allocator.alloc_any().expect("Ran out of registers"); + self.instrs.push(LoadPseudoInstr::new(address_reg, format!("global_var_{}", dest.index))); + self.instrs.push(StoreInstr::new(dest_register, address_reg, None)); + }, + VariableType::ParamTemp => { + todo!() + }, + _ => { + let offset = *var_index_to_stack_offset.get(&dest.index).expect("Variable not declared"); + self.instrs.push(StoreInstr::new_stack(dest_register, offset as i32)); + } + } + } + fn emit_binary(&mut self, dest: Variable, left: Variable, op: IRBinaryOp, right: Variable, var_index_to_stack_offset: &BTreeMap) { + let left_alloc = self.register_allocator.alloc(left).expect("Ran out of registers"); + let right_alloc = self.register_allocator.alloc(right).expect("Ran out of registers"); + let dest_alloc = self.register_allocator.alloc(dest).expect("Ran out of registers"); + let left_reg = left_alloc.reg; + let right_reg = right_alloc.reg; + let dest_reg = dest_alloc.reg; + if !left_alloc.is_reused { + let left_offset = var_index_to_stack_offset.get(&left.index).expect("Variable not declared"); + self.instrs.push(LoadInstr::new_stack(left_reg, *left_offset as i32)); + + } + if !right_alloc.is_reused { + let right_offset = var_index_to_stack_offset.get(&right.index).expect("Variable not declared"); + self.instrs.push(LoadInstr::new_stack(right_reg, *right_offset as i32)); + } + match op { + IRBinaryOp::Add => { + self.instrs.push(AddInstr::new(dest_reg, left_reg, RegisterOrImm::Reg(right_reg))); + }, + IRBinaryOp::Sub => { + self.instrs.push(SubInstr::new(dest_reg, left_reg, RegisterOrImm::Reg(right_reg))); + }, + IRBinaryOp::Mul => { + self.instrs.push(MulInstr::new(dest_reg, left_reg, RegisterOrImm::Reg(right_reg))); + }, + IRBinaryOp::Div => { + self.instrs.push(SDivInstr::new(dest_reg, left_reg, RegisterOrImm::Reg(right_reg))); + }, + IRBinaryOp::Mod => { + let temp_reg = self.register_allocator.alloc_any().expect("Ran out of registers"); + self.instrs.push(SDivInstr::new(temp_reg, left_reg, RegisterOrImm::Reg(right_reg))); + self.instrs.push(MulInstr::new(temp_reg, temp_reg, RegisterOrImm::Reg(right_reg))); + self.instrs.push(SubInstr::new(dest_reg, left_reg, RegisterOrImm::Reg(temp_reg))); + }, + IRBinaryOp::Le => todo!(), + IRBinaryOp::Lt => todo!(), + IRBinaryOp::Gt => todo!(), + IRBinaryOp::Ge => todo!(), + IRBinaryOp::Ne => todo!(), + IRBinaryOp::Eq => todo!(), + } + let dest_stack_offset = var_index_to_stack_offset.get(&dest.index).expect("Variable not declared"); + self.instrs.push(StoreInstr::new_stack(dest_reg, *dest_stack_offset as i32)); + } + +} \ No newline at end of file diff --git a/src/backend/mod.rs b/src/backend/mod.rs new file mode 100644 index 0000000..9ce6647 --- /dev/null +++ b/src/backend/mod.rs @@ -0,0 +1,77 @@ +mod register_allocator; +pub mod generator; +mod arm_instr; +pub mod types; + + +#[cfg(test)] +mod tests { + use std::io::BufRead; + use std::path::Path; + use std::fs::File; + use std::io::Write; + use crate::frontend::lexer::Lexer; + use crate::frontend::parser::Parser; + use crate::utils::case_list::CaseList; + use crate::utils::num_sequence::NumberSequence; + use crate::ir::generator::Generator as IRGenerator; + pub use super::generator::Generator as ASMGenerator; + fn test_case(case_str: &str) { + let case_sequence = NumberSequence::from_str(case_str).unwrap(); + let case_list = CaseList::from_dir(&Path::new("./testcases")).unwrap(); + let mut error_case_cnt = 0; + for case_no in case_sequence { + let case_path = case_list.get_case_path(case_no).unwrap(); + println!("{}", case_path.display()); + let file = File::open(&case_path).unwrap(); + let mut buf_reader = std::io::BufReader::new(file); + let mut lexer = Lexer::new(); + let mut full_text = String::new(); + loop { + let mut line = String::new(); + let bytes_read = buf_reader.read_line(&mut line).unwrap(); + if bytes_read == 0 { + break; + } + full_text.push_str(&line); + lexer.parse_next_str(&line); + } + let (tokens, diagnostics) = lexer.finish(); + let mut is_error = false; + if !diagnostics.is_empty() { + diagnostics.print(&format!("{}", case_path.display()), &full_text); + is_error = true; + } + let mut parser = Parser::new(tokens, diagnostics); + let compile_unit = parser.parse(); + let case_name = case_list.get_case_name(case_no).unwrap().strip_suffix(".c").unwrap(); + if !parser.diagnostics.is_empty() { + parser.diagnostics.print(&format!("{}", case_path.display()), &full_text); + is_error = true; + } + let mut generator = IRGenerator::new(); + let ir = generator.emit(compile_unit); + // if !generator.diagnostic.is_empty() { + // generator.diagnostic.print(&format!("{}", case_path.display()), &full_text); + // is_error = true; + // } + let mut asm_generator = ASMGenerator::new(); + asm_generator.emit(ir); + let asm_text = asm_generator.to_text(); + let mut output_file = File::create(format!("output/{}.s", case_name)).unwrap(); + output_file.write_all(asm_text.as_bytes()).unwrap(); + if is_error { + error_case_cnt += 1; + } + } + if error_case_cnt > 0 { + panic!("Found {} cases with errors", error_case_cnt); + } + + } + #[test] + fn test_expr() { + test_case("0-3,14-25"); + // test_case("0-3,14-25"); + } +} \ No newline at end of file diff --git a/src/backend/register_allocator.rs b/src/backend/register_allocator.rs new file mode 100644 index 0000000..347e87e --- /dev/null +++ b/src/backend/register_allocator.rs @@ -0,0 +1,202 @@ +use std::{cell::RefCell, collections::BTreeMap, fmt::Display, rc::{Rc, Weak}}; + +use crate::ir::types::Variable; + +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub struct Register { + name: &'static str, +} +impl Display for Register { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.name) + } +} +macro_rules! register_declare { + ($($reg:ident => $name:expr),*) => { + $( + pub const $reg: Register = Register { name: $name }; + )* + }; +} +register_declare! { + REG_R0 => "r0", + REG_R1 => "r1", + REG_R2 => "r2", + REG_R3 => "r3", + REG_R4 => "r4", + REG_R5 => "r5", + REG_R6 => "r6", + REG_R7 => "r7", + REG_R8 => "r8", + REG_R9 => "r9", + REG_R10 => "r10", + REG_R11 => "r11", + REG_R12 => "r12", + REG_SP => "sp", + REG_LR => "lr", + REG_PC => "pc", + REG_FP => "fp" +} + +pub const REGISTERS: &[Register] = &[ + REG_R0, REG_R1, REG_R2, REG_R3, REG_R4, REG_R5, REG_R6, REG_R7, REG_R8, REG_R9, REG_R10, REG_R11, REG_R12, REG_SP, REG_LR, REG_PC, REG_FP +]; + +pub const REGISTERS_CAN_ALLOC: &[Register] = &[ + REG_R0, REG_R1, REG_R2, REG_R3, REG_R4, REG_R5, REG_R6, REG_R7, REG_R8, REG_R9, REG_R10, REG_R12 +]; +pub struct RegisterAlloc { + allocator: Weak>, + pub reg: Register, + pub is_reused: bool, +} +impl Drop for RegisterAlloc { + fn drop(&mut self) { + if let Some(allocator) = self.allocator.upgrade() { + let mut allocator = allocator.borrow_mut(); + allocator.mark_unused(self.reg); + } + } +} +pub enum RegisterUseKind { + Designated, + UsedByVariable(Variable), + AllocatedToVariable(Variable), + Free, +} +struct RegisterAllocatorInner { + register_map: BTreeMap, + variable_to_register: BTreeMap, +} +pub struct RegisterAllocator { + // register_map: BTreeMap, + // variable_to_register: BTreeMap, + inner: Rc>, +} +impl RegisterAllocatorInner { + fn mark_unused(&mut self, reg: Register) { + if let Some(use_kind) = self.register_map.get_mut(®) { + match use_kind { + RegisterUseKind::Designated => { + *use_kind = RegisterUseKind::Free; + }, + RegisterUseKind::UsedByVariable(var) => { + *use_kind = RegisterUseKind::AllocatedToVariable(*var); + }, + _ => panic!("Trying to mark a register as unused that is not in use"), + } + } + } + +} +impl RegisterAllocator { + pub fn new() -> Self { + let mut register_map = BTreeMap::new(); + for ® in REGISTERS_CAN_ALLOC { + register_map.insert(reg, RegisterUseKind::Free); + } + Self { + inner: Rc::new(RefCell::new(RegisterAllocatorInner { + register_map, + variable_to_register: BTreeMap::new(), + })), + } + } + + pub fn alloc(&mut self, var: Variable) -> Option { + let mut inner = self.inner.borrow_mut(); + if let Some(®) = inner.variable_to_register.get(&var) { + // Variable already has a register allocated + let use_kind = inner.register_map.get_mut(®).expect("Inconsistent state: variable has a register but it's not in the register map"); + assert!(matches!(use_kind, RegisterUseKind::UsedByVariable(v) | RegisterUseKind::AllocatedToVariable(v) if *v == var)); + *use_kind = RegisterUseKind::UsedByVariable(var); + return Some(RegisterAlloc { + allocator: Rc::downgrade(&self.inner), + reg, + is_reused: true, + }); + } + // Find a free register + for (®, use_kind) in inner.register_map.iter_mut() { + // Find free register first + if let RegisterUseKind::Free = use_kind { + *use_kind = RegisterUseKind::UsedByVariable(var); + inner.variable_to_register.insert(var, reg); + return Some(RegisterAlloc { + allocator: Rc::downgrade(&self.inner), + reg, + is_reused: false, + }); + } + } + let RegisterAllocatorInner{register_map, variable_to_register} = &mut *inner; + for (®, use_kind) in register_map.iter_mut() { + // Find allocated register then + if let RegisterUseKind::AllocatedToVariable(ori_var) = use_kind { + assert!(variable_to_register.remove(&ori_var).is_some()); + *use_kind = RegisterUseKind::UsedByVariable(var); + variable_to_register.insert(var, reg); + return Some(RegisterAlloc { + allocator: Rc::downgrade(&self.inner), + reg, + is_reused: false, + }); + } + } + // No free register available + None + } + + pub fn alloc_reg(&mut self, reg: Register) -> Option { + let mut inner = self.inner.borrow_mut(); + let RegisterAllocatorInner{register_map, variable_to_register} = &mut *inner; + let use_kind = register_map.get_mut(®).expect("Trying to allocate a register that is not in the register map"); + match use_kind { + RegisterUseKind::Free => { + *use_kind = RegisterUseKind::Designated; + return Some(RegisterAlloc { + allocator: Rc::downgrade(&self.inner), + reg, + is_reused: false, + }); + }, + RegisterUseKind::UsedByVariable(_var) => { + return None; + }, + RegisterUseKind::AllocatedToVariable(var) => { + variable_to_register.remove(var); + *use_kind = RegisterUseKind::Designated; + return Some(RegisterAlloc { + allocator: Rc::downgrade(&self.inner), + reg, + is_reused: false, + }); + }, + RegisterUseKind::Designated => { + return None; + }, + } + + } + + pub fn alloc_any(&mut self) -> Option { + let mut inner = self.inner.borrow_mut(); + + for (®, use_kind) in inner.register_map.iter_mut() { + if let RegisterUseKind::Free = use_kind { + *use_kind = RegisterUseKind::Designated; + return Some(reg); + } + } + let RegisterAllocatorInner{register_map, variable_to_register} = &mut *inner; + for (®, use_kind) in register_map.iter_mut() { + if let RegisterUseKind::AllocatedToVariable(ori_var) = use_kind { + variable_to_register.remove(&ori_var); + *use_kind = RegisterUseKind::Designated; + return Some(reg); + } + } + + None + } +} \ No newline at end of file diff --git a/src/backend/types.rs b/src/backend/types.rs new file mode 100644 index 0000000..b688dd4 --- /dev/null +++ b/src/backend/types.rs @@ -0,0 +1,5 @@ +pub struct ARMAsmVar { + pub name: String, + pub size: usize, + pub align: usize, +} \ No newline at end of file diff --git a/src/ir/types.rs b/src/ir/types.rs index cd7dc86..648dc00 100644 --- a/src/ir/types.rs +++ b/src/ir/types.rs @@ -43,6 +43,15 @@ pub enum IRType { I1, Void, } +impl IRType { + pub fn size_in_bytes(&self) -> usize { + match self { + IRType::I32 => 4, + IRType::I1 => 1, + IRType::Void => 0, + } + } +} impl Display for IRType { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { @@ -72,7 +81,7 @@ impl Display for MoveRValue { } } } -#[derive(Clone, Copy)] +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug, Hash)] pub enum VariableType { Global, ParamTemp, @@ -86,6 +95,28 @@ pub struct Variable { pub var_type: VariableType, pub data_type: IRType, } +impl PartialEq for Variable { + fn eq(&self, other: &Self) -> bool { + self.index == other.index && self.var_type == other.var_type + } +} +impl Eq for Variable {} +impl PartialOrd for Variable { + fn partial_cmp(&self, other: &Self) -> Option { + Some(match self.index.cmp(&other.index) { + std::cmp::Ordering::Equal => self.var_type.cmp(&other.var_type), + ord => ord, + }) + } +} +impl Ord for Variable { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + match self.index.cmp(&other.index) { + std::cmp::Ordering::Equal => self.var_type.cmp(&other.var_type), + ord => ord, + } + } +} impl Display for Variable { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let prefix = match self.var_type { diff --git a/src/main.rs b/src/main.rs index 071fac6..1046d9f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,7 @@ mod frontend; mod ast; mod ir; +mod backend; mod utils; mod diagnostic; mod err; @@ -10,7 +11,7 @@ use std::{fs::File, io::BufRead}; use clap::Parser as ArgParser; use crate::{frontend::{lexer::Lexer, parser::Parser}, ir::generator::Generator}; - +use crate::backend::generator::Generator as ASMGerenerator; /// Simple minic compiler built by Rust #[derive(ArgParser, Debug)] #[command(version, about, long_about = None)] @@ -18,6 +19,10 @@ struct Args { /// Output the generated IR code #[arg(short = 'I', long = "ir")] output_ir: bool, + #[arg(skip)] + output_asm: bool, + #[arg(short = 't', long = "target", default_value = "ARM32")] + target: String, /// Use recursive descent parsing #[arg(short = 'D', long = "recursive-descent")] recursive_descent: bool, @@ -32,15 +37,18 @@ struct Args { } fn main() { - let args = Args::parse(); + let mut args = Args::parse(); if !args.output_ir { - eprintln!("Currently only IR generation is supported. Use -I to enable it."); - return; + args.output_asm = true; } if !args.recursive_descent { eprintln!("Currently only recursive descent parsing is supported. Use -D to enable it."); return; } + if args.target != "ARM32" { + eprintln!("Currently only ARM32 assembly output is supported. Use -t ARM32 to specify the target architecture."); + return; + } let source_path = std::path::Path::new(&args.source); let file = match File::open(&args.source) { Ok(f) => f, @@ -75,14 +83,28 @@ fn main() { if !generator.get_diagnostics().is_empty() { generator.get_diagnostics().print(&format!("{}", source_path.display()), &full_text); } - if let Some(output_path) = args.output { - match std::fs::write(&output_path, ir.iter().map(|instr| instr.to_string()).collect::>().join("\n")) { - Ok(_) => println!("IR code written to {}", output_path), - Err(e) => eprintln!("Failed to write IR code to {}: {}", output_path, e), + if args.output_ir { + if let Some(output_path) = args.output { + match std::fs::write(&output_path, ir.iter().map(|instr| instr.to_string()).collect::>().join("\n")) { + Ok(_) => println!("IR code written to {}", output_path), + Err(e) => eprintln!("Failed to write IR code to {}: {}", output_path, e), + } + } else { + for instr in ir { + println!("{}", instr); + } } - } else { - for instr in ir { - println!("{}", instr); + } else if args.output_asm { + let mut asm_generator = ASMGerenerator::new(); + asm_generator.emit(ir); + let asm_text = asm_generator.to_text(); + if let Some(output_path) = args.output { + match std::fs::write(&output_path, asm_text) { + Ok(_) => println!("Assembly code written to {}", output_path), + Err(e) => eprintln!("Failed to write assembly code to {}: {}", output_path, e), + } + } else { + println!("{}", asm_text); } } } diff --git a/tests/std.c b/tests/std.c new file mode 100644 index 0000000..b2da05c --- /dev/null +++ b/tests/std.c @@ -0,0 +1,121 @@ +/// +/// @file std.c +/// @brief 外部或内置函数实现 +/// @author zenglj (zenglj@live.com) +/// @version 1.0 +/// @date 2024-09-29 +/// +/// @copyright Copyright (c) 2024 +/// +/// @par 修改日志: +/// +///
Date Version Author Description +///
2024-09-29 1.0 zenglj 新做 +///
+/// +#include +#include + +int getint() +{ + int d; + + scanf("%d", &d); + + return d; +} + +int getch() +{ + char d; + + scanf("%c", &d); + + return d; +} + +int getarray(int a[]) +{ + int n, i; + + // 获取元素个数 + scanf("%d",&n); + + // 获取元素内容 + for(i = 0; i < n; ++i) { + scanf("%d",&a[i]); + } + + return n; +} + +void putint(int k) +{ + printf("%d", k); +} + +void putch(int c) +{ + printf("%c", (char)c); +} + +void putarray(int n, int * d) +{ + int k; + + // 输出元素个数 + printf("%d:", n); + + // 输出元素内容,空格分割 + for(k = 0; k < n; k ++) { + printf(" %d", d[k]); + } + + // 输出换行符 + printf("\n"); +} + +void putstr(char * str) +{ + printf("%s", str); +} + +float getfloat() +{ + float n; + scanf("%a", &n); + return n; +} + +int getfarray(float a[]) +{ + int n; + scanf("%d", &n); + for (int i = 0; i < n; i++) { + scanf("%a", &a[i]); + } + return n; +} + +void putfloat(float a) +{ + printf("%a", a); +} + +void putfarray(int n, float a[]) +{ + printf("%d:", n); + for (int i = 0; i < n; i++) { + printf(" %a", a[i]); + } + printf("\n"); +} + +void putf(char a[], ...) +{ + va_list args; + va_start(args, a); + vfprintf(stdout, a, args); + va_end(args); +} + diff --git a/tools/run_arm_asm.py b/tools/run_arm_asm.py new file mode 100755 index 0000000..540bc28 --- /dev/null +++ b/tools/run_arm_asm.py @@ -0,0 +1,177 @@ +#!/usr/bin/env python3 +"""Compile an ARM assembly file and run it with qemu-arm-static.""" + +from __future__ import annotations + +import argparse +import shutil +import subprocess +import sys +import tempfile +from pathlib import Path + + +def parse_args() -> argparse.Namespace: + if "--" in sys.argv: + separator_index = sys.argv.index("--") + tool_args = sys.argv[1:separator_index] + program_args = sys.argv[separator_index + 1 :] + else: + tool_args = sys.argv[1:] + program_args = [] + + parser = argparse.ArgumentParser( + description=( + "Compile an ARM assembly file with arm-linux-gnueabihf-gcc, " + "run it with qemu-arm-static, then print stdout and $?." + ) + ) + parser.add_argument("asm_file", type=Path, help="path to the assembly file") + parser.add_argument( + "--cc", + default="arm-linux-gnueabihf-gcc", + help="cross compiler to use (default: arm-linux-gnueabihf-gcc)", + ) + parser.add_argument( + "--qemu", + default="qemu-arm-static", + help="ARM qemu runner to use (default: qemu-arm-static)", + ) + parser.add_argument( + "-o", + "--output", + type=Path, + help="output executable path; defaults to an auto-cleaned temporary file", + ) + parser.add_argument( + "-g", + "--gdb", + action="store_true", + help="compile with -g and run qemu in gdb stub mode", + ) + parser.add_argument( + "--gdb-port", + default="1234", + metavar="PORT", + help="qemu gdb stub port used with -g/--gdb (default: 1234)", + ) + parser.add_argument( + "--gdb-command", + help=( + "gdb command shown in debug instructions " + "(default: first available of arm-linux-gnueabihf-gdb, gdb-multiarch, gdb)" + ), + ) + parser.add_argument( + "--no-static", + action="store_true", + help="do not pass -static to the compiler", + ) + parser.add_argument( + "--gcc-arg", + action="append", + default=[], + help="extra argument passed to gcc; repeat for multiple args", + ) + parser.add_argument( + "--std-c", + type=Path, + default=Path("tests/std.c"), + help="C runtime source compiled with the assembly file (default: tests/std.c)", + ) + parser.add_argument( + "--no-std-c", + action="store_true", + help="do not compile tests/std.c with the assembly file", + ) + args = parser.parse_args(tool_args) + args.program_args = program_args + return args + + +def require_command(command: str) -> None: + if shutil.which(command) is None: + print(f"error: command not found: {command}", file=sys.stderr) + sys.exit(127) + + +def run_command(command: list[str]) -> subprocess.CompletedProcess[bytes]: + try: + return subprocess.run(command, capture_output=True, check=False) + except OSError as err: + print(f"error: failed to run {command[0]}: {err}", file=sys.stderr) + sys.exit(127) + + +def choose_gdb_command(requested_command: str | None) -> str: + if requested_command: + return requested_command + + for command in ("arm-linux-gnueabihf-gdb", "gdb-multiarch", "gdb"): + if shutil.which(command) is not None: + return command + + return "gdb-multiarch" + + +def main() -> int: + args = parse_args() + asm_file = args.asm_file + if not asm_file.is_file(): + print(f"error: assembly file not found: {asm_file}", file=sys.stderr) + return 2 + + std_c = args.std_c + if not args.no_std_c and not std_c.is_file(): + print(f"error: std C file not found: {std_c}", file=sys.stderr) + return 2 + + require_command(args.cc) + require_command(args.qemu) + + with tempfile.TemporaryDirectory(prefix="run-arm-asm-") as temp_dir: + output = args.output or Path(temp_dir) / asm_file.with_suffix("").name + + compile_cmd = [args.cc] + if not args.no_static: + compile_cmd.append("-static") + if args.gdb: + compile_cmd.append("-g") + compile_cmd.extend(args.gcc_arg) + compile_cmd.append(str(asm_file)) + if not args.no_std_c: + compile_cmd.append(str(std_c)) + compile_cmd.extend(["-o", str(output)]) + + compile_result = run_command(compile_cmd) + sys.stdout.buffer.write(compile_result.stdout) + sys.stderr.buffer.write(compile_result.stderr) + if compile_result.returncode != 0: + return compile_result.returncode + + qemu_cmd = [args.qemu] + if args.gdb: + qemu_cmd.extend(["-g", args.gdb_port]) + resolved_output = output.resolve() + gdb_command = choose_gdb_command(args.gdb_command) + print(f"executable: {resolved_output}", file=sys.stderr, flush=True) + print( + f"gdb: {gdb_command} {resolved_output} " + f"-ex 'target remote :{args.gdb_port}'", + file=sys.stderr, + flush=True, + ) + print("gdb: use 'si' or 'c'; do not use 'run'", file=sys.stderr, flush=True) + qemu_cmd.extend([str(output), *args.program_args]) + + run_result = run_command(qemu_cmd) + sys.stdout.buffer.write(run_result.stdout) + sys.stderr.buffer.write(run_result.stderr) + if run_result.stdout and not run_result.stdout.endswith(b"\n"): + print() + print(f"$? = {run_result.returncode}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main())