From 009d2966059e966ccdeb6d12209a2cb41f0bee79 Mon Sep 17 00:00:00 2001 From: Gary Gan Date: Thu, 19 Jun 2025 09:42:46 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E4=BC=98=E5=8C=96=E4=BB=A3?= =?UTF-8?q?=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Optimizer/Optimizer.py | 285 ++++++++++++++++++++++++++++------------- Optimizer/block.py | 52 -------- test0.hy | 3 + test4.hy | 2 +- test8.hy | 4 + tk_ui.py | 28 +++- 6 files changed, 228 insertions(+), 146 deletions(-) delete mode 100644 Optimizer/block.py create mode 100644 test0.hy create mode 100644 test8.hy diff --git a/Optimizer/Optimizer.py b/Optimizer/Optimizer.py index 030e4fe..a28df64 100644 --- a/Optimizer/Optimizer.py +++ b/Optimizer/Optimizer.py @@ -1,95 +1,198 @@ -from block import * +# 优化模块 +# 第一步: 常量传播 +def constant_propagation(quads): + #print("Running Constant Propagation") + value_map = {} + optimized = [] -def tp(sth): - if sth ==None: - return -1 - if sth[0] >='0' and sth[0] <='9': - return 2 - if sth[0] != '_': - return 0 - return 1 -class DAGNode: - cnt = 1 - def __init__(self,tag=None,l=None,r=None,op=None): - self.cnt = DAGNode.cnt - DAGNode.cnt += 1 - self.parents = [] - self.lc = l - self.rc = r - self.op = op - self.main_tag = tag - self.other_tag = set() - if tag is not None: - self.other_tag.add(tag) - def insert(self,tag): - self.other_tag.add(tag) - if tp(tag) > tp(self.main_tag): - self.main_tag = tag - def has(self,tag): - return tag in self.other_tag -class Optimizer: - def __init__(self): - self.nodes = [""] - self.cnt = 0 - self.ans= [] - def cfind(self,tag): - for i in range(1,self.cnt+1)[::-1]: - if self.nodes[i].has(tag): - return self.nodes[i] - self.cnt += 1 - self.nodes.append(DAGNode(tag=tag)) - return self.nodes[self.cnt] - def cfind_op1(self,op,lc,rc): - for i in range(1,self.cnt+1)[::-1]: - if self.nodes[i].op==op and ((self.nodes[i].lc == lc and self.nodes[i].rc == rc) or (self.nodes[i].lc == rc and self.nodes[i].rc == lc)): - return self.nodes[i] - self.cnt +=1 - self.nodes.append(DAGNode(op=op,l=lc,r=rc)) - return self.nodes[self.cnt] - def cfind_op2(self,op,lc,rc): - for i in range(1, self.cnt + 1)[::-1]: - if self.nodes[i].op == op and self.nodes[i].lc == lc and self.nodes[i].rc == rc: - return self.nodes[i] - self.cnt += 1 - self.nodes.append(DAGNode(op=op, l=lc, r=rc)) - return self.nodes[self.cnt] - def build(self,lst): - self.ans = [] - for _ in lst: - op,a,b,res = _.a,_.b,_.c,_.d - if op =='=': - # 将ans插入到a的节点中 - self.cfind(a).insert(res) - elif op =='+' or op=='*' or op=='&' or op=='^' or op =='|' or op =='&&' or op =='||' or op =='==' or op == '!=': - # 首先找一下有没有a b节点 , 没有就自己造一个 - aNode = self.cfind(a) - bNode = self.cfind(b) - # 然后找一下有没有a op b 的节点 , 有的话插进去 - # 这里并不能根据a ,b 来查找 , 因为昔人已乘黄鹤去 , 你找到的或许只是曾经的ab - self.cfind_op1(op,aNode,bNode).insert(res) - elif op =='-' or op=='%' or op=='/': - # 这种不换的元素符 - self.cfind_op2(op,self.cfind(a),self.cfind(b)).insert(ans) - pass - # 剩下就是while end , if end , el等 , 不用管 - for i in range(1,self.cnt+1)[::-1]: - _ = self.nodes[i] - op = _.op - if op is not None: - self.ans.append(FourTuple(op,_.lc.main_tag,_.rc.main_tag,_.main_tag)) - for __ in _.other_tag: - if __ != _.main_tag: - self.ans.append(FourTuple('=',_.main_tag,c='_',d=__)) - self.ans = self.ans[::-1] + for quad in quads: + op, arg1, arg2, dest = quad + + if op == '=' and arg1.isdigit(): + # 常量赋值:如 (=, 32, _, _v0) + value_map[dest] = arg1 + elif op == '=': + # 变量赋值:如 (=, _v0, _, age) + if arg1 in value_map: + new_quad = ('=', value_map[arg1], '_', dest) + optimized.append(new_quad) + value_map[dest] = value_map[arg1] + else: + optimized.append(quad) + # 如果 dest 不再被使用,可以考虑删除该赋值 + else: + # 替换操作数中的变量为已知常量 + new_arg1 = value_map.get(arg1, arg1) + new_arg2 = value_map.get(arg2, arg2) if arg2 != '_' else '_' + optimized.append((op, new_arg1, new_arg2, dest)) + + return optimized + +# 第二步: 公共子表达式消除 +def common_subexpression_elimination(quads): + #print("Running Common Subexpression Elimination") + expr_map = {} + optimized = [] + temp_counter = 0 + + for quad in quads: + op, arg1, arg2, dest = quad + + if op in ['+', '-', '*', '/']: + expr_key = f"{arg1} {op} {arg2}" + if expr_key in expr_map: + # 复用已有结果 + reused_var = expr_map[expr_key] + optimized.append(('=', reused_var, '_', dest)) + else: + # 新表达式 + temp_var = f"_t{temp_counter}" + temp_counter += 1 + expr_map[expr_key] = temp_var + optimized.append((op, arg1, arg2, temp_var)) + optimized.append(('=', temp_var, '_', dest)) + else: + optimized.append(quad) + + return optimized + +# 第三步: 死代码消除 +def dead_code_elimination(quads): + #print("Running Dead Code Elimination") + used_vars = set() + optimized = [] + + # 第一遍找出所有被使用的变量 + for quad in quads: + op, arg1, arg2, dest = quad + if arg1 != '_' and not arg1.isdigit(): + used_vars.add(arg1) + if arg2 != '_' and not arg2.isdigit(): + used_vars.add(arg2) + + # 第二遍删除未使用变量的赋值语句 + for quad in quads: + op, arg1, arg2, dest = quad + if op == '=' and dest.startswith('_v') and dest not in used_vars: + continue # 跳过无用的赋值语句 + optimized.append(quad) + + return optimized -if __name__ == '__main__': - divider = BlockDivider(l) - optimizer = Optimizer() - ls = divider.run() - for l in ls: - optimizer.build(l) - for i in optimizer.ans: - print(i) - # print() + +# 第四步: 控制流优化 +def control_flow_simplification(quads): + # print("Running Control Flow Simplification") + optimized = [] + + for quad in quads: + op, arg1, arg2, dest = quad + if op == 'if' and arg2 == 'goto': + if arg1.isdigit(): + if arg1 == '1': + optimized.append(('goto', '_', '_', dest)) + elif arg1 == '0': + continue # 恒假,跳过该条件跳转 + else: + optimized.append(quad) + else: + optimized.append(quad) + + return optimized + +# 第五步: 寄存器重用 +def register_reuse(quads): + #print("Running Register Reuse (Improved)") + + # 计算活跃变量信息 + live_info = compute_live_vars(quads) + + optimized = [] + available_temps = [] # 可用于复用的临时变量池 + + for i, quad in enumerate(quads): + op, arg1, arg2, dest = quad + current_live = live_info[i] + + if op == '=': + # 查看当前 dest 是否会被后续使用 + is_dest_used_later = dest in current_live + + # 如果 dest 不再使用,可以加入可用池 + if not is_dest_used_later and dest.startswith('_'): + available_temps.append(dest) + + # 尝试复用已死的临时变量 + reused = False + for var in list(available_temps): + if var not in [arg1, arg2] and var not in current_live: + # 安全复用 + optimized.append(('=', arg1, arg2, var)) + available_temps.remove(var) + if var != dest: + available_temps.append(dest) # 原 dest 现在可回收 + reused = True + break + + if not reused: + optimized.append(quad) + if dest.startswith('_') and dest not in current_live: + available_temps.append(dest) + else: + optimized.append(quad) + + return optimized + +def compute_live_vars(quads): + """ + 活跃变量分析:从后往前推导每个 quad 之后仍会使用的变量 + """ + live_vars = set() + live_info = [] + + for quad in reversed(quads): + op, arg1, arg2, dest = quad + + # 先移除 dest 的影响(因为 dest 被重新定义) + if dest in live_vars: + live_vars.remove(dest) + + # 如果操作数是变量,则加入活跃集合 + if arg1 != '_' and not arg1.isdigit(): + live_vars.add(arg1) + if arg2 != '_' and not arg2.isdigit(): + live_vars.add(arg2) + + # 把当前活跃变量集合复制一份保存下来 + live_info.append(live_vars.copy()) + + # 从后往前遍历,所以需要反转结果 + live_info.reverse() + return live_info + + +# 优化器整合 +def optimize(quads): + passes = [ + ("Constant Propagation", constant_propagation), + ("Common Subexpression Elimination", common_subexpression_elimination), + ("Dead Code Elimination", dead_code_elimination), + ("Control Flow Simplification", control_flow_simplification), + ("Register Reuse", register_reuse), + ] + + for name, opt_func in passes: + # print(f"Running optimization pass: {name}") + quads = opt_func(quads) + + return quads + +def output_ir_str(quads): + lines = [] + for i, quad in enumerate(quads): + op, arg1, arg2, dest = quad + quad = f"({op}, {arg1}, {arg2}, {dest})" + lines.append(f"{i} {quad}") + return "\n".join(lines) diff --git a/Optimizer/block.py b/Optimizer/block.py deleted file mode 100644 index d237ae3..0000000 --- a/Optimizer/block.py +++ /dev/null @@ -1,52 +0,0 @@ -l = [ - "=,10,_,t1", - "=,20,_,t1", - "+,t1,t1,t2" -] -class FourTuple: - cnt =0 - def __init__(self, a='_',b='_',c='_',d='_'): - self.a = a - self.b = b - self.c = c - self.d = d - self.cnt = FourTuple.cnt - FourTuple.cnt += 1 - self.block_sign = set(['if','jmp','jmpf','el','ifend','while','fun','return','while','we','goto']) - - - def is_block_sign(self): - return self.a in self.block_sign - def __str__(self): - return f"({self.a} , {self.b} , {self.c} , {self.d})" -class BlockDivider: - def __init__(self,lst): - self.ans = [] - self.l =[] - for i in lst: - k = i.split(',') - obj = FourTuple(k[0].strip(),k[1].strip(),k[2].strip(),k[3].strip()) - self.l.append(obj) - # print(obj) - def run(self): - lst = [] - for i in self.l: - lst.append(i) - if i.is_block_sign(): - self.ans.append(lst) - lst = [] - if lst: - self.ans.append(lst) - - return self.ans - - - -if __name__ == '__main__': - - divider = BlockDivider(l) - ans = divider.run() - for i in ans: - for j in i: - print(j,end='\t') - print() \ No newline at end of file diff --git a/test0.hy b/test0.hy new file mode 100644 index 0000000..8a55e80 --- /dev/null +++ b/test0.hy @@ -0,0 +1,3 @@ +fn main() { + +} \ No newline at end of file diff --git a/test4.hy b/test4.hy index adb7108..ee30ef8 100644 --- a/test4.hy +++ b/test4.hy @@ -1,7 +1,7 @@ // test4:嵌套 if 判断(复杂控制流) fn max(x: i32, y: i32) -> i32 { - if x > y { + if x >= y { return x; } else { return y; diff --git a/test8.hy b/test8.hy new file mode 100644 index 0000000..a616753 --- /dev/null +++ b/test8.hy @@ -0,0 +1,4 @@ +fn main( :i32) +{ + a = 34 * 3; +} \ No newline at end of file diff --git a/tk_ui.py b/tk_ui.py index 2d7acad..b8d86d1 100644 --- a/tk_ui.py +++ b/tk_ui.py @@ -4,6 +4,7 @@ from lexical.lexical import Lexical from syntax.syntax import Syntax from semantic.rule import symbol_table_pool from syntax.syntax import LL1Generator +from Optimizer.optimizer import optimize, output_ir_str def process_code(input_code): lexical = Lexical() @@ -30,6 +31,14 @@ def process_code(input_code): for code in syntax.get_result().root.code: i += 1 output.append(f"{i} \t{code}") + + quads = [tuple(map(str.strip, str(item).strip("()").split(","))) for item in syntax.get_result().root.code] + + optimized_quads = optimize(quads) + + output.append('\n') + output.append(f"优化后的四元式:\t") + output.append(output_ir_str(optimized_quads)) return '\n'.join(output), '' else: return '\n'.join(output), '错误原因:\t' + syntax.get_error().info @@ -56,12 +65,27 @@ root.title("Hydrogen语言编译器前端演示") input_text = scrolledtext.ScrolledText(root, width=80, height=20) input_text.pack(padx=10, pady=5) -# 提交按钮 +# 提交按钮和清空按钮在同一行 +button_frame = tk.Frame(root) +def on_generate_ll1_table(): + try: + obj = LL1Generator() + obj.compile() + obj.show_me_what_you_got("LL1_table.csv") + messagebox.showinfo("提示", "成功生成预测表") + except Exception as e: + messagebox.showerror("生成预测表异常", str(e)) + +submit_btn = tk.Button(button_frame, text="生成产生式的预测表", command=on_generate_ll1_table) + +submit_btn.pack(side=tk.LEFT, padx=5) + +button_frame.pack(pady=5) submit_btn = tk.Button(root, text="提交", command=on_submit) submit_btn.pack(pady=5) # 输出框 -output_text = scrolledtext.ScrolledText(root, width=80, height=20, bg="#ffffff") +output_text = scrolledtext.ScrolledText(root, width=80, height=20, bg="#000000") output_text.pack(padx=10, pady=5) # 错误信息框