497 lines
18 KiB
Python
497 lines
18 KiB
Python
"""
|
|
语法分析
|
|
"""
|
|
import csv
|
|
from syntax.rule import Sign, Production, terminal_sign_type, non_terminal_sign_type, productions, grammar_start
|
|
from error import SyntaxRuleError, SyntaxError, SemanticRuleError
|
|
from semantic.rule import SemanticRule, SemanticError, SemanticRuleFactory
|
|
|
|
class LL1Generator:
|
|
def __init__(self):
|
|
self.NTS = [_ for _ in non_terminal_sign_type]
|
|
self.NTS_sz = len(self.NTS)
|
|
self.NTSmp = {_:idx for idx,_ in enumerate(self.NTS)}
|
|
self.TS = [_ for _ in terminal_sign_type]
|
|
self.TS_sz = len(self.TS)
|
|
self.TSmp = {_:idx for idx,_ in enumerate(self.TS)}
|
|
self.tb = [[None for __ in range(self.TS_sz)] for _ in range(self.NTS_sz)]
|
|
self.firsts = [set() for _ in range(self.NTS_sz)]
|
|
self.follows = [set() for _ in range(self.NTS_sz)]
|
|
self.first_debug = False
|
|
self.first_debug_cnt = 0
|
|
self.follow_debug = False
|
|
self.follow_debug_cnt = 0
|
|
def compile(self):
|
|
self.calculate_firsts()
|
|
self.calculate_follows()
|
|
success = self.gen_tb()
|
|
|
|
return success
|
|
def print_firsts(self):
|
|
for idx,_ in enumerate(self.firsts):
|
|
print(f"{self.NTS[idx]}:")
|
|
print(self.firsts[idx])
|
|
|
|
def print_follows(self):
|
|
for idx,_ in enumerate(self.follows):
|
|
print(f"{self.NTS[idx]}:")
|
|
print(self.follows[idx])
|
|
|
|
def is_TS(self,item):
|
|
return self.TSmp.get(item) is not None
|
|
|
|
def is_NTS(self,item):
|
|
return self.NTSmp.get(item) is not None
|
|
|
|
def first_set(self,tp):
|
|
return self.firsts[self.NTSmp.get(tp)]
|
|
|
|
def first_insert(self,l,item):
|
|
or_sz = len(self.first_set(l))
|
|
self.first_set(l).add(item)
|
|
return or_sz < len(self.first_set(l))
|
|
|
|
def follow_set(self,tp):
|
|
return self.follows[self.NTSmp.get(tp)]
|
|
|
|
def follow_insert(self,l,item):
|
|
or_sz = len(self.follow_set(l))
|
|
self.follow_set(l).add(item)
|
|
return or_sz < len(self.follow_set(l))
|
|
|
|
def l_r_scan(self,lst):
|
|
l = set()
|
|
if len(lst) == 0:
|
|
l.add('empty')
|
|
|
|
for idx,_ in enumerate(lst):
|
|
f = False
|
|
if self.is_TS(_):
|
|
l.add(_)
|
|
else:
|
|
for __ in self.first_set(_):
|
|
if __ =='empty':
|
|
f =True
|
|
if __ != 'empty' or (__ =='empty' and idx==len(lst)-1):
|
|
l.add(__)
|
|
if not f:
|
|
break
|
|
return l
|
|
|
|
def calculate_firsts(self):
|
|
f = True
|
|
while f:
|
|
f = False
|
|
for _ in productions:
|
|
l = _.left.type
|
|
r = [__.type for __ in _.right]
|
|
if len(r)==0:
|
|
if self.first_insert(l,'empty'):
|
|
if self.first_debug:
|
|
self.first_debug_cnt+=1
|
|
print(f"第{self.first_debug_cnt}次推导: {l}->{' '.join(r)} : 添加empty")
|
|
f = True
|
|
elif self.is_TS(r[0]):
|
|
if self.first_insert(l,r[0]):
|
|
if self.first_debug:
|
|
self.first_debug_cnt+=1
|
|
print(f"第{self.first_debug_cnt}次推导: {l}->{' '.join(r)} : 添加{r[0]}")
|
|
f = True
|
|
else:
|
|
# 此时就是非终结符了
|
|
# 将所有该非终结符的first集元素加入到左部的first集
|
|
for _ in self.first_set(r[0]):
|
|
if self.first_insert(l,_):
|
|
if self.first_debug:
|
|
self.first_debug_cnt += 1
|
|
print(f"第{self.first_debug_cnt}次推导 : {l}->{' '.join(r)} : 添加{_}")
|
|
f = True
|
|
for i in range(0,len(r)):
|
|
# 如果是非终结符
|
|
if self.is_NTS(r[i]):
|
|
# 如果该非终结符的first里有空
|
|
if 'empty' in self.first_set(r[i]):
|
|
# 如果是最后一个
|
|
if i == len(r)-1:
|
|
if self.first_insert(l,'empty'):
|
|
if self.first_debug:
|
|
self.first_debug_cnt += 1
|
|
print(f"第{self.first_debug_cnt}次推导 : {l}->{' '.join(r)} : 添加empty")
|
|
|
|
f = True
|
|
else:
|
|
# 下一个是终结符
|
|
if self.is_TS(r[i+1]):
|
|
if self.first_insert(l,r[i+1]):
|
|
if self.first_debug:
|
|
self.first_debug_cnt += 1
|
|
print(f"第{self.first_debug_cnt}次推导 : {l}->{' '.join(r)} : 添加{r[i+1]}")
|
|
|
|
f = True
|
|
# 下一个为非终结符
|
|
elif self.is_NTS(r[i+1]):
|
|
for _ in self.first_set(r[i+1]):
|
|
if self.first_insert(l,_):
|
|
if self.first_debug:
|
|
self.first_debug_cnt += 1
|
|
print(
|
|
f"第{self.first_debug_cnt}次推导 : {l}->{' '.join(r)} : 添加{_}")
|
|
|
|
f = True
|
|
else:
|
|
print("error: TS/NTS type.")
|
|
else:
|
|
break
|
|
else:
|
|
break
|
|
|
|
def calculate_follows(self):
|
|
f = True
|
|
while f:
|
|
f = False
|
|
for _ in productions:
|
|
l = _.left.type
|
|
r= [__.type for __ in _.right]
|
|
if l == 'program':
|
|
if self.follow_insert(l,'pound'):
|
|
if self.follow_debug:
|
|
self.follow_debug_cnt +=1
|
|
print(f"第{self.follow_debug_cnt}次推导:{l} -> {' '.join(r)} , {l} add pound")
|
|
f = True
|
|
for i in range(0,len(r)):
|
|
if self.is_NTS(r[i]):
|
|
if i == len(r)-1:
|
|
for _ in self.follow_set(l):
|
|
if self.follow_insert(r[i],_):
|
|
if self.follow_debug:
|
|
self.follow_debug_cnt += 1
|
|
print(f"第{self.follow_debug_cnt}次推导:{l} -> {' '.join(r)} , {r[i]} add {_}")
|
|
f = True
|
|
else:
|
|
empty_find = False
|
|
for __ in self.l_r_scan(r[i+1:]):
|
|
if __=='empty':
|
|
empty_find = True
|
|
elif self.follow_insert(r[i],__):
|
|
if self.follow_debug:
|
|
self.follow_debug_cnt += 1
|
|
print(f"第{self.follow_debug_cnt}次推导:{l} -> {' '.join(r)} , {r[i]} add {__}")
|
|
f = True
|
|
if empty_find:
|
|
for _ in self.follow_set(l):
|
|
if self.follow_insert(r[i],_):
|
|
if self.follow_debug:
|
|
self.follow_debug_cnt += 1
|
|
print(
|
|
f"第{self.follow_debug_cnt}次推导:{l} -> {' '.join(r)} , {r[i]} add {l}")
|
|
f = True
|
|
def select(self,l):
|
|
f = False
|
|
for _ in self.l_r_scan(l):
|
|
if _ =='empty':
|
|
f= True
|
|
break
|
|
if f:
|
|
return
|
|
else:
|
|
return
|
|
|
|
def insert_tb(self,_,r):
|
|
if self.tb[self.NTSmp[_.left.type]][self.TSmp[r]] != None and self.tb[self.NTSmp[_.left.type]][self.TSmp[r]] != _:
|
|
print(f"wa :{_.l.type}->{[__.type for __ in _.r]} : {r}")
|
|
self.tb[self.NTSmp[_.left.type]][self.TSmp[r]] = _
|
|
|
|
def gen_tb(self):
|
|
for _ in productions:
|
|
l = _.left.type
|
|
r = [_.type for _ in _.right]
|
|
|
|
empty_find = False
|
|
for __ in self.l_r_scan(r):
|
|
if __ =='empty':
|
|
empty_find = True
|
|
else:
|
|
self.insert_tb(_,__)
|
|
if empty_find:
|
|
for __ in self.follow_set(l):
|
|
self.insert_tb(_,__)
|
|
def get_production(self,row,col):
|
|
return self.tb[self.NTSmp[row]][self.TSmp[col]]
|
|
|
|
def show_me_what_you_got(self,file,show_by_id=False):
|
|
|
|
l = ['NTS\\TS']
|
|
for _ in self.TS:
|
|
l.append(_)
|
|
ll = [l]
|
|
for idx,_ in enumerate(self.tb):
|
|
l = [self.NTS[idx]]
|
|
for __ in _:
|
|
if __ != None:
|
|
l.append(__.str[int(show_by_id)])
|
|
else:
|
|
l.append(' ')
|
|
ll.append(l)
|
|
with open(file,'w',newline='')as f:
|
|
writer = csv.writer(f)
|
|
writer.writerows(ll)
|
|
|
|
class Node:
|
|
"""
|
|
树节点
|
|
"""
|
|
def __init__(self, data):
|
|
"""
|
|
树节点
|
|
:param data: 节点数据
|
|
"""
|
|
self.data = data
|
|
self.str = data.type
|
|
self.children = list()
|
|
self.parent = None
|
|
|
|
# 属性
|
|
self.lexical = None
|
|
self.array = None
|
|
self.code = list()
|
|
self.type = None
|
|
self.id = None
|
|
self.length = None
|
|
self.fun = None
|
|
self.num = None
|
|
self.names = list()
|
|
self.bool = None
|
|
self.op = None
|
|
self.add = None
|
|
self.mul = None
|
|
|
|
def get_pre_brother(self, index):
|
|
"""
|
|
获取它前 index 位的兄弟
|
|
:param index: ..
|
|
:return: 兄弟
|
|
"""
|
|
self_index = 0
|
|
for i in range(0, len(self.parent.children)):
|
|
if self.parent.children[i] is self:
|
|
self_index = i
|
|
return self.parent.children[self_index - index]
|
|
|
|
|
|
class Tree:
|
|
"""
|
|
树
|
|
"""
|
|
def __init__(self, root):
|
|
"""
|
|
构造
|
|
:param root: 树的根节点
|
|
"""
|
|
self.root = root
|
|
|
|
|
|
class Stack:
|
|
"""
|
|
栈
|
|
"""
|
|
def __init__(self):
|
|
"""
|
|
构造
|
|
"""
|
|
self.__container = list()
|
|
|
|
def push(self, elem):
|
|
"""
|
|
入栈
|
|
:param elem: 入栈元素
|
|
"""
|
|
self.__container.append(elem)
|
|
|
|
def pop(self):
|
|
"""
|
|
将栈顶元素出栈
|
|
:return: 栈顶元素
|
|
"""
|
|
top = self.top()
|
|
self.__container.pop()
|
|
return top
|
|
|
|
def top(self):
|
|
"""
|
|
获取栈顶元素
|
|
:return: 栈顶元素
|
|
"""
|
|
return self.__container[-1]
|
|
|
|
def empty(self):
|
|
"""
|
|
栈是否为空
|
|
:return: 栈是否为空
|
|
"""
|
|
return len(self.__container) == 0
|
|
|
|
|
|
class Syntax:
|
|
"""
|
|
语法分析器
|
|
"""
|
|
def __init__(self):
|
|
"""
|
|
构造
|
|
"""
|
|
# 语法树的构建
|
|
self.__grammar_tree = None
|
|
# 准备存放错误
|
|
self.__error = list()
|
|
# 预测分析表的构建
|
|
self.__pa_table = LL1Generator()
|
|
# 编译预测分析表
|
|
if self.__pa_table.compile():
|
|
self.__error.append(SyntaxRuleError('预测分析表编译失败'))
|
|
# 准备存放词法分析的结果
|
|
self.__source = list()
|
|
# 将词法分析产生的 token 转换成的终结符
|
|
self.__terminals = list()
|
|
|
|
def put_source(self, source):
|
|
"""
|
|
装填词法分析结果
|
|
:param source: 词法分析结果
|
|
"""
|
|
self.__source.clear()
|
|
self.__terminals.clear()
|
|
# 装填词法分析结果
|
|
for s in source:
|
|
self.__source.append(s)
|
|
# 将 tokens 转换成终结符
|
|
for s in self.__source:
|
|
self.__terminals.append(Sign(s.type, s.str, s.line))
|
|
# 在所有 tokens 的最后填入一个 #
|
|
self.__terminals.append(Sign('pound'))
|
|
|
|
def get_result(self):
|
|
"""
|
|
获取语法树
|
|
:return: 语法树
|
|
"""
|
|
return self.__grammar_tree
|
|
|
|
def get_error(self):
|
|
"""
|
|
获取错误
|
|
:return: 错误
|
|
"""
|
|
return self.__error
|
|
|
|
def execute(self):
|
|
"""
|
|
执行操作
|
|
:return: 语法分析是否成功
|
|
"""
|
|
# 新建栈
|
|
stack = Stack()
|
|
# 清空错误
|
|
self.__error = None
|
|
# 新建临时语法树
|
|
grammar_tree = Tree(Node(Sign(grammar_start.type)))
|
|
|
|
# 将 # 入栈
|
|
stack.push(Node(Sign('pound')))
|
|
# 将语法树根节点入栈
|
|
stack.push(grammar_tree.root)
|
|
|
|
# 拷贝转换之后的终结符到输入串
|
|
inputs = list()
|
|
for sign in self.__terminals:
|
|
inputs.append(sign)
|
|
# 设置当前输入符号索引
|
|
input_index = 0
|
|
|
|
# 立下 flag
|
|
flag = True
|
|
while flag:
|
|
|
|
# 如果栈顶是语义动作
|
|
if isinstance(stack.top(), SemanticRule):
|
|
stack.top().execute()
|
|
if len(stack.top().errors) > 0:
|
|
self.__error = stack.top().errors[-1]
|
|
break
|
|
else:
|
|
stack.pop()
|
|
# 如果栈顶是符号
|
|
else:
|
|
# 如果 top 是非终结符
|
|
if stack.top().data.is_non_terminal_sign():
|
|
# 查看分析表
|
|
print(stack.top().data.type,inputs[input_index].type)
|
|
production = self.__pa_table.get_production(stack.top().data.type, inputs[input_index].type)
|
|
# 如果分析表对应位置存有产生式
|
|
if production:
|
|
# 判断语义规则是否合法
|
|
if len(production.right) != len(production.semantic_children):
|
|
self.__error = SemanticRuleError('语义规则数量与产生式右边数量不一致 ' + production.str)
|
|
break
|
|
|
|
# 执行 start 语义
|
|
semantic_start = SemanticRuleFactory.get_instance(production.semantic_start, stack.top())
|
|
if semantic_start:
|
|
semantic_start.execute()
|
|
if len(semantic_start.errors) > 0:
|
|
self.__error = semantic_start.errors[-1]
|
|
break
|
|
|
|
# 将语法树按照产生式进行生长
|
|
for i in range(0, len(production.right)):
|
|
stack.top().children.append(Node(Sign(production.right[i].type)))
|
|
stack.top().children[i].parent = stack.top()
|
|
|
|
# 将 top 出栈
|
|
top = stack.pop()
|
|
|
|
# 将 end 语义规则入栈
|
|
semantic_end = SemanticRuleFactory.get_instance(production.semantic_end, top)
|
|
if semantic_end:
|
|
stack.push(semantic_end)
|
|
|
|
# 将 top 的孩子节点反序入栈
|
|
for i in range(len(production.right) - 1, -1, -1):
|
|
# for child in top.children[::-1]:
|
|
stack.push(top.children[i])
|
|
semantic_child = SemanticRuleFactory.get_instance(production.semantic_children[i],
|
|
top.children[i])
|
|
if semantic_child:
|
|
stack.push(semantic_child)
|
|
# 如果分析表中存放着错误信息
|
|
else:
|
|
self.__error = SyntaxError('语法错误 ' + inputs[input_index].str, inputs[input_index].line)
|
|
break
|
|
# 如果 top 是终结符
|
|
else:
|
|
# 如果 top = input
|
|
if stack.top().data.type == inputs[input_index].type:
|
|
# 如果 top = #,宣布分析成功
|
|
if stack.top().data.type == 'pound':
|
|
flag = False
|
|
# 如果 top != #
|
|
else:
|
|
# 计算 top 的 lexical 属性
|
|
stack.top().lexical = inputs[input_index].str
|
|
# 将 top 出栈,让 input_index 自增
|
|
stack.pop()
|
|
input_index += 1
|
|
# 如果 top != input
|
|
else:
|
|
self.__error = SyntaxError('语法错误 ' + inputs[input_index].str, inputs[input_index].line)
|
|
break
|
|
|
|
if self.__error:
|
|
return False
|
|
else:
|
|
self.__grammar_tree = grammar_tree
|
|
|
|
return True
|
|
if __name__ == '__main__':
|
|
obj = LL1Generator()
|
|
obj.compile()
|
|
obj.show_me_what_you_got("LL1Table_id.csv",show_by_id=True) |