""" 语法分析 """ import csv from syntax.rule import Sign, Production, terminal_sign_type, non_terminal_sign_type, productions, grammar_start from error import SyntaxRuleError, SyntaxError, SemanticRuleError from semantic.rule import SemanticRule, SemanticError, SemanticRuleFactory class LL1Generator: def __init__(self): self.NTS = [_ for _ in non_terminal_sign_type] self.NTS_sz = len(self.NTS) self.NTSmp = {_:idx for idx,_ in enumerate(self.NTS)} self.TS = [_ for _ in terminal_sign_type] self.TS_sz = len(self.TS) self.TSmp = {_:idx for idx,_ in enumerate(self.TS)} self.tb = [[None for __ in range(self.TS_sz)] for _ in range(self.NTS_sz)] self.firsts = [set() for _ in range(self.NTS_sz)] self.follows = [set() for _ in range(self.NTS_sz)] self.first_debug = False self.first_debug_cnt = 0 self.follow_debug = False self.follow_debug_cnt = 0 def compile(self): self.calculate_firsts() self.calculate_follows() success = self.gen_tb() return success def print_firsts(self): for idx,_ in enumerate(self.firsts): print(f"{self.NTS[idx]}:") print(self.firsts[idx]) def print_follows(self): for idx,_ in enumerate(self.follows): print(f"{self.NTS[idx]}:") print(self.follows[idx]) def is_TS(self,item): return self.TSmp.get(item) is not None def is_NTS(self,item): return self.NTSmp.get(item) is not None def first_set(self,tp): return self.firsts[self.NTSmp.get(tp)] def first_insert(self,l,item): or_sz = len(self.first_set(l)) self.first_set(l).add(item) return or_sz < len(self.first_set(l)) def follow_set(self,tp): return self.follows[self.NTSmp.get(tp)] def follow_insert(self,l,item): or_sz = len(self.follow_set(l)) self.follow_set(l).add(item) return or_sz < len(self.follow_set(l)) def l_r_scan(self,lst): l = set() if len(lst) == 0: l.add('empty') for idx,_ in enumerate(lst): f = False if self.is_TS(_): l.add(_) else: for __ in self.first_set(_): if __ =='empty': f =True if __ != 'empty' or (__ =='empty' and idx==len(lst)-1): l.add(__) if not f: break return l def calculate_firsts(self): f = True while f: f = False for _ in productions: l = _.left.type r = [__.type for __ in _.right] if len(r)==0: if self.first_insert(l,'empty'): if self.first_debug: self.first_debug_cnt+=1 print(f"第{self.first_debug_cnt}次推导: {l}->{' '.join(r)} : 添加empty") f = True elif self.is_TS(r[0]): if self.first_insert(l,r[0]): if self.first_debug: self.first_debug_cnt+=1 print(f"第{self.first_debug_cnt}次推导: {l}->{' '.join(r)} : 添加{r[0]}") f = True else: # 此时就是非终结符了 # 将所有该非终结符的first集元素加入到左部的first集 for _ in self.first_set(r[0]): if self.first_insert(l,_): if self.first_debug: self.first_debug_cnt += 1 print(f"第{self.first_debug_cnt}次推导 : {l}->{' '.join(r)} : 添加{_}") f = True for i in range(0,len(r)): # 如果是非终结符 if self.is_NTS(r[i]): # 如果该非终结符的first里有空 if 'empty' in self.first_set(r[i]): # 如果是最后一个 if i == len(r)-1: if self.first_insert(l,'empty'): if self.first_debug: self.first_debug_cnt += 1 print(f"第{self.first_debug_cnt}次推导 : {l}->{' '.join(r)} : 添加empty") f = True else: # 下一个是终结符 if self.is_TS(r[i+1]): if self.first_insert(l,r[i+1]): if self.first_debug: self.first_debug_cnt += 1 print(f"第{self.first_debug_cnt}次推导 : {l}->{' '.join(r)} : 添加{r[i+1]}") f = True # 下一个为非终结符 elif self.is_NTS(r[i+1]): for _ in self.first_set(r[i+1]): if self.first_insert(l,_): if self.first_debug: self.first_debug_cnt += 1 print( f"第{self.first_debug_cnt}次推导 : {l}->{' '.join(r)} : 添加{_}") f = True else: print("error: TS/NTS type.") else: break else: break def calculate_follows(self): f = True while f: f = False for _ in productions: l = _.left.type r= [__.type for __ in _.right] if l == 'program': if self.follow_insert(l,'pound'): if self.follow_debug: self.follow_debug_cnt +=1 print(f"第{self.follow_debug_cnt}次推导:{l} -> {' '.join(r)} , {l} add pound") f = True for i in range(0,len(r)): if self.is_NTS(r[i]): if i == len(r)-1: for _ in self.follow_set(l): if self.follow_insert(r[i],_): if self.follow_debug: self.follow_debug_cnt += 1 print(f"第{self.follow_debug_cnt}次推导:{l} -> {' '.join(r)} , {r[i]} add {_}") f = True else: empty_find = False for __ in self.l_r_scan(r[i+1:]): if __=='empty': empty_find = True elif self.follow_insert(r[i],__): if self.follow_debug: self.follow_debug_cnt += 1 print(f"第{self.follow_debug_cnt}次推导:{l} -> {' '.join(r)} , {r[i]} add {__}") f = True if empty_find: for _ in self.follow_set(l): if self.follow_insert(r[i],_): if self.follow_debug: self.follow_debug_cnt += 1 print( f"第{self.follow_debug_cnt}次推导:{l} -> {' '.join(r)} , {r[i]} add {l}") f = True def select(self,l): f = False for _ in self.l_r_scan(l): if _ =='empty': f= True break if f: return else: return def insert_tb(self,_,r): if self.tb[self.NTSmp[_.left.type]][self.TSmp[r]] != None and self.tb[self.NTSmp[_.left.type]][self.TSmp[r]] != _: print(f"wa :{_.l.type}->{[__.type for __ in _.r]} : {r}") self.tb[self.NTSmp[_.left.type]][self.TSmp[r]] = _ def gen_tb(self): for _ in productions: l = _.left.type r = [_.type for _ in _.right] empty_find = False for __ in self.l_r_scan(r): if __ =='empty': empty_find = True else: self.insert_tb(_,__) if empty_find: for __ in self.follow_set(l): self.insert_tb(_,__) def get_production(self,row,col): return self.tb[self.NTSmp[row]][self.TSmp[col]] def show_me_what_you_got(self,file,show_by_id=False): l = ['NTS\\TS'] for _ in self.TS: l.append(_) ll = [l] for idx,_ in enumerate(self.tb): l = [self.NTS[idx]] for __ in _: if __ != None: l.append(__.str[int(show_by_id)]) else: l.append(' ') ll.append(l) with open(file,'w',newline='')as f: writer = csv.writer(f) writer.writerows(ll) class Node: """ 树节点 """ def __init__(self, data): """ 树节点 :param data: 节点数据 """ self.data = data self.str = data.type self.children = list() self.parent = None # 属性 self.lexical = None self.array = None self.code = list() self.name = None self.type = None self.id = None self.length = None self.fun = None self.num = None self.names = list() self.bool = None self.op = None self.add = None self.mul = None def get_pre_brother(self, index): """ 获取它前 index 位的兄弟 :param index: .. :return: 兄弟 """ self_index = 0 for i in range(0, len(self.parent.children)): if self.parent.children[i] is self: self_index = i return self.parent.children[self_index - index] class Tree: """ 树 """ def __init__(self, root): """ 构造 :param root: 树的根节点 """ self.root = root class Stack: """ 栈 """ def __init__(self): """ 构造 """ self.__container = list() def push(self, elem): """ 入栈 :param elem: 入栈元素 """ self.__container.append(elem) def pop(self): """ 将栈顶元素出栈 :return: 栈顶元素 """ top = self.top() self.__container.pop() return top def top(self): """ 获取栈顶元素 :return: 栈顶元素 """ return self.__container[-1] def empty(self): """ 栈是否为空 :return: 栈是否为空 """ return len(self.__container) == 0 class Syntax: """ 语法分析器 """ def __init__(self): """ 构造 """ # 语法树的构建 self.__grammar_tree = None # 准备存放错误 self.__error = list() # 预测分析表的构建 self.__pa_table = LL1Generator() # 编译预测分析表 if self.__pa_table.compile(): self.__error.append(SyntaxRuleError('预测分析表编译失败')) # 准备存放词法分析的结果 self.__source = list() # 将词法分析产生的 token 转换成的终结符 self.__terminals = list() def put_source(self, source): """ 装填词法分析结果 :param source: 词法分析结果 """ self.__source.clear() self.__terminals.clear() # 装填词法分析结果 for s in source: self.__source.append(s) # 将 tokens 转换成终结符 for s in self.__source: self.__terminals.append(Sign(s.type, s.str, s.line)) # 在所有 tokens 的最后填入一个 # self.__terminals.append(Sign('pound')) def get_result(self): """ 获取语法树 :return: 语法树 """ return self.__grammar_tree def get_error(self): """ 获取错误 :return: 错误 """ return self.__error def execute(self): """ 执行操作 :return: 语法分析是否成功 """ # 新建栈 stack = Stack() # 清空错误 self.__error = None # 新建临时语法树 grammar_tree = Tree(Node(Sign(grammar_start.type))) # 将 # 入栈 stack.push(Node(Sign('pound'))) # 将语法树根节点入栈 stack.push(grammar_tree.root) # 拷贝转换之后的终结符到输入串 inputs = list() for sign in self.__terminals: inputs.append(sign) # 设置当前输入符号索引 input_index = 0 # 立下 flag flag = True while flag: # 如果栈顶是语义动作 if isinstance(stack.top(), SemanticRule): stack.top().execute() if len(stack.top().errors) > 0: self.__error = stack.top().errors[-1] break else: stack.pop() # 如果栈顶是符号 else: # 如果 top 是非终结符 if stack.top().data.is_non_terminal_sign(): # 查看分析表 # print(stack.top().data.type,inputs[input_index].type) production = self.__pa_table.get_production(stack.top().data.type, inputs[input_index].type) # 如果分析表对应位置存有产生式 if production: # 判断语义规则是否合法 if len(production.right) != len(production.semantic_children): self.__error = SemanticRuleError('语义规则数量与产生式右边数量不一致 ' + str(production.str[0])) break # 执行 start 语义 semantic_start = SemanticRuleFactory.get_instance(production.semantic_start, stack.top()) if semantic_start: semantic_start.execute() if len(semantic_start.errors) > 0: self.__error = semantic_start.errors[-1] break # 将语法树按照产生式进行生长 for i in range(0, len(production.right)): stack.top().children.append(Node(Sign(production.right[i].type))) stack.top().children[i].parent = stack.top() # 将 top 出栈 top = stack.pop() # 将 end 语义规则入栈 semantic_end = SemanticRuleFactory.get_instance(production.semantic_end, top) if semantic_end: stack.push(semantic_end) # 将 top 的孩子节点反序入栈 for i in range(len(production.right) - 1, -1, -1): # for child in top.children[::-1]: stack.push(top.children[i]) semantic_child = SemanticRuleFactory.get_instance(production.semantic_children[i], top.children[i]) if semantic_child: stack.push(semantic_child) # 如果分析表中存放着错误信息 else: self.__error = SyntaxError('语法错误 ' + inputs[input_index].str, inputs[input_index].line) break # 如果 top 是终结符 else: # 如果 top = input if stack.top().data.type == inputs[input_index].type: # 如果 top = #,宣布分析成功 if stack.top().data.type == 'pound': flag = False # 如果 top != # else: # 计算 top 的 lexical 属性 stack.top().lexical = inputs[input_index].str # 将 top 出栈,让 input_index 自增 stack.pop() input_index += 1 # 如果 top != input else: self.__error = SyntaxError('语法错误 ' + inputs[input_index].str, inputs[input_index].line) break if self.__error: return False else: self.__grammar_tree = grammar_tree return True if __name__ == '__main__': obj = LL1Generator() obj.compile() obj.show_me_what_you_got("LL1Table_id.csv",show_by_id=True)