CompilerDesign/GrammaticalAnalysis.py

1158 lines
51 KiB
Python
Raw Permalink Normal View History

2024-06-21 05:15:53 +00:00
import inspect
import re
class Yacc:
translation_dict = {
"String": "字符串",
"Program": "程序",
"ConstantDeclaration": "常量说明",
"ConstantDefinition": "常量定义",
"FunctionDefinitionWithReturnValue": "有返回值函数定义",
"FunctionDefinitionWithoutReturnValue": "无返回值函数定义",
"UnsignedInteger": "无符号整数",
"Integer": "整数",
"DeclarationHeader": "声明头部",
"VariableDeclaration": "变量说明",
"VariableDefinition": "变量定义",
"CompoundStatement": "复合语句",
"ParameterList": "参数表",
"MainFunction": "主函数",
"Expression": "表达式",
"Term": "",
"Factor": "因子",
"Statement": "语句",
"AssignmentStatement": "赋值语句",
"ConditionalStatement": "条件语句",
"Condition": "条件",
"LoopStatement": "循环语句",
"Step": "步长",
"FunctionCallWithReturnValue": "有返回值函数调用语句",
"FunctionCallWithoutReturnValue": "无返回值函数调用语句",
"ValueParameterList": "值参数表",
"StatementList": "语句列",
"ReadStatement": "读语句",
"WriteStatement": "写语句",
"ReturnStatement": "返回语句"
}
def is_INTTK_CHARTK(self, t: (str, str)) -> bool:
if t[0] in ['INTTK', 'CHARTK']:
return True
if t[0] == "Factor" and t[1] in ['INTCON', 'CHARCON']:
return True
if t[0] == "Term" and t[1] in ['INTCON', 'CHARCON']:
return True
return False
def is_IDENFR(self, t: (str, str)) -> bool:
if t[0] == 'IDENFR':
return True
if t[0] == "Factor" and t[1] == "IDENFR":
return True
if t[0] == "Term" and t[1] == "IDENFR":
return True
if t[0] == "Expression" and t[1] == "IDENFR":
return True
return False
def is_Integer_CHARCON(self, t: (str, str)) -> bool:
if t[0] == 'Integer' or t[0] == 'CHARCON':
return True
if t[0] == 'Factor' and (t[1] == 'Integer' or t[1] == 'CHARCON'):
return True
if t[0] == 'Term' and (t[1] == 'Integer' or t[1] == 'CHARCON'):
return True
if t[0] == 'Expression' and (t[1] == 'Integer' or t[1] == 'CHARCON'):
return True
return False
def is_Term(self, t: (str, str)) -> bool:
if t[0] == 'Term':
return True
if t[0] == 'Expression' and t[1] == 'Term':
return True
return False
def is_RelationOperator(self, t: (str, str)) -> bool:
if t[0] in ['LSS', 'LEQ', 'GRE', 'GEQ', 'NEQ', 'EQL']:
return True
return False
def is_UnsignedInteger(self, t: (str, str)) -> bool:
if t[0] == 'UnsignedInteger':
return True
if t[0] == 'Integer' and t[1] >= 0:
return True
if t[0] == 'Factor' and t[1] == 'UnsignedInteger':
return True
if t[0] == 'Term' and t[1] == 'UnsignedInteger':
return True
if t[0] == 'Expression' and t[1] == 'UnsignedInteger':
return True
return False
# <字符串> ::= "十进制编码为32,33,35126的ASCII字符"
def String(self, lt: list[(str, str)]) -> (str, (str, str), int):
regex = r'[ -!#-~]*'
if len(lt) == 1 and lt[0][0] == "STRCON" and re.match(regex, lt[0][1]):
return "doit", ("String", lt[0][1])
return "can't"
# <程序> ::= [<常量说明>][<变量说明>]{<有返回值函数定义>|<无返回值函数定义>} <主函数>
def Program(self, lt: list[(str, str)]) -> (str, (str, str), int):
if len(lt) < 2:
return "can't"
if len(lt) == 2 and (
lt[0][0] == 'FunctionDefinitionWithReturnValue' or lt[0][
0] == 'FunctionDefinitionWithoutReturnValue') and lt[1][
0] == 'MainFunction':
return "doit", ("Program", "")
if lt[0][0] == 'ConstantDeclaration' and lt[1][0] == 'VariableDeclaration':
return self.Program(lt[2:])
if lt[0][0] == 'ConstantDefinition' or lt[1][0] == 'VariableDefinition':
return self.Program(lt[1:])
return "can't"
# 部分 <常量说明>: := const常量定义;{const常量定义;}
def PartOfConstantDeclaration(self, t: (str, str)) -> bool:
pt = ['CONSTTK', 'ConstantDefinition', 'SEMICN']
return t[0] in pt or self.PartOfConstantDefinition(t)
# <常量说明>: := const常量定义;{const常量定义;}
def ConstantDeclaration(self, lt: list[(str, str)], newadd: (str, str)) -> (str, (str, str), int):
if lt[0][0] != 'CONSTTK' or len(lt) < 3:
return "can't"
newadd = newadd[0]
if lt[0][0] == 'CONSTTK' and lt[1][0] == 'ConstantDefinition' and lt[2][0] == 'SEMICN':
i = 3
while i < len(lt):
if lt[i][0] != 'CONSTTK':
return "part", ("ConstantDeclaration", ""), i
if i + 1 == len(lt):
if newadd == 'ConstantDefinition' or self.PartOfConstantDefinition((newadd, "")):
return "can't"
else:
return "part", ("ConstantDeclaration", ""), i
elif lt[i + 1][0] != 'ConstantDefinition':
if self.PartOfConstantDefinition(lt[i + 1]):
return "can't"
else:
return "part", ("ConstantDeclaration", ""), i
if i + 2 == len(lt):
if newadd == 'SEMICN':
return "can't"
else:
return "part", ("ConstantDeclaration", ""), i
elif lt[i + 2][0] != 'SEMICN':
return "part", ("ConstantDeclaration", ""), i
i += 3
if newadd == 'CONSTTK':
return "can't"
else:
return "doit", ("ConstantDeclaration", "")
return "can't"
# 部分 <常量定义>: := int标识符整数{,<标识符>=<整数>} | char标识符字符{,<标识符>=<字符>}
def PartOfConstantDefinition(self, t: (str, str)) -> bool:
pt = ['INTTK', 'CHARTK', 'IDENFR', 'ASSIGN', 'INTCON', 'COMMA', 'CHARCON']
return t[0] in pt
# <常量定义>: := int标识符整数{,<标识符>=<整数>} | char标识符字符{,<标识符>=<字符>}
def ConstantDefinition(self, lt: list[(str, str)], newadd: (str, str)) -> (str, (str, str), int):
if (not (self.is_INTTK_CHARTK(lt[0]))) or len(lt) < 4:
return "can't"
newadd = newadd[0]
if self.is_INTTK_CHARTK(lt[0]) and self.is_IDENFR(lt[1]) and lt[2][0] == 'ASSIGN' and self.is_Integer_CHARCON(
lt[3]):
i = 4
while i < len(lt):
if lt[i][0] != 'COMMA':
return "part", ("ConstantDefinition", ""), i
if i + 1 == len(lt):
if newadd == 'IDENFR':
return "can't"
else:
return "part", ("ConstantDefinition", ""), i
elif not self.is_IDENFR(lt[i + 1]):
return "part", ("ConstantDefinition", ""), i
if i + 2 == len(lt):
if newadd == 'ASSIGN':
return "can't"
else:
return "part", ("ConstantDefinition", ""), i
elif lt[i + 2][0] != 'ASSIGN':
return "part", ("ConstantDefinition", ""), i
if i + 3 == len(lt):
if newadd == 'Integer' or newadd == 'CHARCON' or self.PartOfInteger((newadd, "")):
return "can't"
else:
return "part", ("ConstantDefinition", ""), i
elif not self.is_Integer_CHARCON(lt[i + 3]):
if self.PartOfInteger(lt[i + 3]):
return "can't"
return "part", ("ConstantDefinition", ""), i
i += 4
if newadd == 'COMMA':
return "can't"
return "doit", ("ConstantDefinition", "")
return "can't"
# 部分 <无符号整数>: := <非零数字>{<数字>} | 0
def PartOfUnsignedInteger(self, t: (str, str)) -> bool:
return t[0] == 'INTCON'
# <无符号整数>: := <非零数字>{<数字>} | 0
def UnsignedInteger(self, lt: list[(str, str)]) -> (str, (str, str), int):
if len(lt) < 1:
return "can't"
if lt[0][0] == 'INTCON':
if lt[0][1][0] == '0' and len(lt[0][1]) != 1:
return "can't"
return "doit", ("UnsignedInteger", lt[0][1])
return "can't"
# 部分 <整数> ::= [+|-]<无符号整数>
def PartOfInteger(self, t: (str, str)) -> bool:
pt = ['UnsignedInteger', 'PLUS', 'MINU']
return t[0] in pt or self.PartOfUnsignedInteger(t)
# <整数> ::= [+|-]<无符号整数>
def Integer(self, lt: list[(str, str)]) -> (str, (str, str), int):
if len(lt) < 1:
return "can't"
if lt[0][0] == 'UnsignedInteger':
return "doit", ("Integer", lt[0][1])
if len(lt) >= 2 and lt[0][0] in ["MINU", "PLUS"] and lt[1][0] == 'UnsignedInteger':
if len(lt) == 2:
return "doit", ('Integer', (-1 if lt[0][0] == "MINU" else 1) * lt[1][1])
else:
return "part", ("Integer", (-1 if lt[0][0] == "MINU" else 1) * lt[1][1]), 2
return "can't"
# 部分 <声明头部> ::= int标识符 |char标识符
def PartOfDeclarationHeader(self, t: (str, str)) -> bool:
pt = ['INTTK', 'CHARTK']
return t[0] in pt or self.is_IDENFR(t)
# <声明头部> ::= int标识符 |char标识符
def DeclarationHeader(self, lt: list[(str, str)], newadd: (str, str)) -> (str, (str, str), int):
if len(lt) < 2:
return "can't"
newadd = newadd[0]
if (lt[0][0] == 'INTTK' or lt[0][0] == 'CHARTK') and self.is_IDENFR(lt[1]):
if len(lt) == 2:
if newadd == 'LPARENT':
return "doit", ("DeclarationHeader", "")
elif lt[2][0] == 'LPARENT':
return "doit", ("DeclarationHeader", "")
return "can't"
# <变量说明>: := <变量定义>;{<变量定义>;}
def VariableDeclaration(self, lt: list[(str, str)], newadd: (str, str)) -> (str, (str, str), int):
def is_seq(lt: list[(str, str)]) -> str:
if len(lt) == 1 and lt[0][0] == 'VariableDefinition':
return "can't"
if lt[0][0] == 'VariableDefinition' and lt[1][0] == 'SEMICN':
if len(lt) == 2:
return "doit"
else:
return "part"
if lt[0][0] != 'VariableDefinition' or lt[1][0] != 'SEMICN':
if self.PartOfTYPEIDENFR(lt[0][0]):
return "can't"
return "not"
if len(lt) < 2:
return "can't"
if lt[0][0] != 'VariableDefinition' or lt[1][0] != 'SEMICN':
return "can't"
newadd = newadd[0]
i = 2
while i < len(lt):
match is_seq(lt[i:]):
case "doit":
if newadd == 'VariableDefinition' or self.PartOfVariableDefinition((newadd, "")):
return "can't"
else:
return "doit", ("VariableDeclaration", "")
case "part":
i += 2
case "not":
if self.PartOfVariableDefinition((newadd, "")) or newadd == 'SEMICN':
return "can't"
return "part", ("VariableDeclaration", ""), i
case "can't":
return "can't"
if newadd == 'VariableDefinition' or self.PartOfVariableDefinition((newadd, "")):
return "can't"
else:
return "doit", ("VariableDeclaration", "")
# 部分 <类型标识符> ::= int | char
def PartOfTYPEIDENFR(self, t: (str, str)) -> bool:
lt = ['INTTK', 'CHARTK']
return t[0] in lt
# 部分 <变量定义> ::= <类型标识符>(<标识符>|<标识符>'['<无符号整数>']'){,(<标识符>|<标识符>'['<无符号整数>']' )} //无符号整数表示数组元素的个数其值需大于0
def PartOfVariableDefinition(self, t: (str, str)) -> bool:
lt = ['UnsignedInteger', 'LBRACK', 'RBRACK']
return t in lt or self.is_IDENFR(t) or self.PartOfTYPEIDENFR(t)
# <变量定义> ::= <类型标识符>(<标识符>|<标识符>'['<无符号整数>']'){,(<标识符>|<标识符>'['<无符号整数>']' )} //无符号整数表示数组元素的个数其值需大于0
def VariableDefinition(self, lt: list[(str, str)], newadd: (str, str), last: (str, str)) -> (str, (str, str), int):
def is_seq(lt: list[(str, str)]) -> str: # 3 for long_assign 2 for short_assign, 1 for part, 0 for not
ls = ['IDENFR', 'LBRACK', 'UnsignedInteger', 'RBRACK']
if self.is_IDENFR(lt[0]):
return 'not'
if len(lt) == 1 and self.is_IDENFR(lt[0]):
return "doit"
if len(lt) == 4 and self.is_IDENFR(lt[0]) and lt[1][0] == 'LBRACK' and self.is_UnsignedInteger(lt[2]) and \
lt[3][0] == 'RBRACK':
return "doit"
if len(lt) >= 4 and self.is_IDENFR(lt[0]) and lt[1][0] == 'LBRACK' and self.is_UnsignedInteger(lt[2]) and \
lt[3][0] == 'RBRACK':
return "part2"
if len(lt) >= 1 and self.is_IDENFR(lt[0]) and lt[1][0] != 'LBRACK':
return "part1"
bl = True
for i in range(len(lt)):
if lt[i][0] != ls[i]:
bl = False
break
if bl:
return "can't"
else:
return "not"
if len(lt) < 2 or (lt[0][0] != 'INTTK' and lt[0][0] != 'CHARTK'):
return "can't"
if last[0] == 'CONSTTK' or last[0] == 'COMMA' or last[0] == 'LPARENT':
return "can't"
newadd = newadd[0]
i = 0
if len(lt) >= 5:
if (lt[0][0] == 'INTTK' or lt[0][0] == 'CHARTK') and self.is_IDENFR(lt[1]) and lt[2][0] == 'LBRACK' and \
self.is_UnsignedInteger(lt[3]) and lt[4][0] == 'RBRACK':
i = 5
elif (lt[0][0] == 'INTTK' or lt[0][0] == 'CHARTK') and self.is_IDENFR(lt[1]):
i = 2
while i < len(lt):
if lt[i][0] != 'COMMA':
return "part", ("VariableDefinition", ""), i
i += 1
while i < len(lt):
match is_seq(lt[i:]):
case "doit":
if newadd == 'COMMA':
return "can't"
else:
return "doit", ("VariableDefinition", "")
case "part1":
if lt[i + 1] == 'COMMA':
i = i + 2
else:
return "part", ("VariableDefinition", ""), i + 1
case "part2":
if lt[i + 4] == 'COMMA':
i = i + 5
else:
return "part", ("VariableDefinition", ""), i + 4
case "can't":
return "can't"
case "not":
return "part", ("VariableDefinition", ""), i - 1
if newadd == 'IDENFR':
return "can't"
else:
return "part", ("VariableDefinition", ""), i - 1
if newadd in ['COMMA', 'LBRACK']:
return "can't"
else:
return "doit", ("VariableDefinition", "")
# <有返回值函数定义> ::= <声明头部>'('<参数表>')' '{'<复合语句>'}'
def FunctionDefinitionWithReturnValue(self, lt: list[(str, str)]) -> (str, (str, str), int):
if len(lt) < 7:
return "can't"
if lt[0][0] == 'DeclarationHeader' and lt[1][0] == 'LPARENT' and lt[2][0] == 'ParameterList' and lt[3][
0] == 'RPARENT' and lt[4][0] == 'LBRACE' and lt[5][0] == 'CompoundStatement' and lt[6][0] == 'RBRACE':
if len(lt) == 7:
return "doit", ("FunctionDefinitionWithReturnValue", "")
else:
return "part", ("FunctionDefinitionWithReturnValue", ""), 7
return "can't"
# <无返回值函数定义> ::= void标识符'('<参数表>')''{'<复合语句>'}'
def FunctionDefinitionWithoutReturnValue(self, lt: list[(str, str)]) -> (str, (str, str), int):
if len(lt) < 8:
return "can't"
if lt[0][0] == 'VOIDTK' and self.is_IDENFR(lt[1]) and lt[2][0] == 'LPARENT' and lt[3][0] == 'ParameterList' and \
lt[4][0] == 'RPARENT' and lt[5][0] == 'LBRACE' and lt[6][0] == 'CompoundStatement' and lt[7][
0] == 'RBRACE':
if len(lt) == 8:
return "doit", ("FunctionDefinitionWithoutReturnValue", "")
else:
return "part", ("FunctionDefinitionWithoutReturnValue", ""), 8
return "can't"
# <复合语句> ::= [<常量说明>][<变量说明>]<语句列>
def CompoundStatement(self, lt: list[(str, str)]) -> (str, (str, str), int):
if len(lt) < 1:
return "can't"
if lt[0][0] == 'StatementList':
if len(lt) == 1:
return "doit", ("CompoundStatement", "")
else:
return "part", ("CompoundStatement", ""), 1
if len(lt) < 2:
return "can't"
if (lt[0][0] == 'ConstantDeclaration' or lt[0][0] == 'VariableDeclaration') and lt[1][0] == 'StatementList':
if len(lt) == 2:
return "doit", ("CompoundStatement", "")
else:
return "part", ("CompoundStatement", ""), 2
if len(lt) < 3:
return "can't"
if lt[0][0] == 'ConstantDeclaration' and lt[1][0] == 'VariableDeclaration' and lt[2][0] == 'StatementList':
if len(lt) == 3:
return "doit", ("CompoundStatement", "")
else:
return "part", ("CompoundStatement", ""), 3
return "can't"
# <参数表> ::= <类型标识符><标识符>{,<类型标识符><标识符>}| <空>
def ParameterList(self, lt: list[(str, str)], newadd: (str, str), last: (str, str)) -> (str, (str, str), int):
def is_seq(lt: list[(str, str)]) -> str:
ls = ['COMMA', ['INTTK', 'CHARTK'], 'IDENFR']
for i in range(len(lt)):
if i >= len(ls):
return "full"
if i == 1:
if lt[i][0] not in ls[i]:
return "not"
elif i == 2:
if not self.is_IDENFR(lt[i]):
return "not"
else:
if lt[i][0] != ls[i]:
return "not"
if len(lt) < 3:
return "can't"
else:
return "doit"
if len(lt) < 2 or last[0] != 'LPARENT' or newadd[0] != 'RPARENT':
return "can't"
if lt[0][0] not in ["INTTK", "CHARTK"] or not self.is_IDENFR(lt[1]):
return "can't"
i = 2
while i < len(lt):
match is_seq(lt[i:]):
case "doit":
return "doit", ("ParameterList", "")
case "can't":
return "can't"
case "not":
return "can't"
case "full":
i += 3
return "doit", ("ParameterList", "")
# <主函数> ::= void main() {’<复合语句>‘}
def MainFunction(self, lt: list[(str, str)]) -> (str, (str, str), int):
if len(lt) < 7:
return "can't"
if lt[0][0] == 'VOIDTK' and lt[1][0] == 'MAINTK' and lt[2][0] == 'LPARENT' and lt[3][0] == 'RPARENT' and \
lt[4][0] == 'LBRACE' and lt[5][0] == 'CompoundStatement' and lt[6][0] == 'RBRACE':
if len(lt) == 7:
return "doit", ("MainFunction", "")
else:
return "part", ("MainFunction", ""), 7
return "can't"
# <表达式>: := [+|-]<项>{<加法运算符><项>} // [+ |]只作用于第一个 < 项 >
def Expression(self, lt: list[(str, str)], newadd: (str, str)) -> (str, (str, str), int, bool, list[int], bool):
def is_seq(lt: list[(str, str)]) -> str:
ls = [['PLUS', 'MINU'], 'Term']
if len(lt) == 1 and lt[0][0] in ls[0]:
return "part"
if len(lt) >= 2 and lt[0][0] in ls[0] and lt[1][0] == ls[1]:
if len(lt) == 2:
return "doit"
if len(lt) > 2:
return "full"
if lt[0][0] not in ls[1]:
return "not"
if len(lt) >= 2 and lt[0][0] in ls[0] and lt[1][0] != ls[1]:
return "not"
if len(lt) < 1:
return "can't"
i = 0
if len(lt) >= 2 and lt[0][0] in ['PLUS', 'MINU'] and self.is_Term(lt[1]):
i = 2
elif self.is_Term(lt[0]):
i = 1
else:
return "can't"
newadd = newadd[0]
while i < len(lt):
match is_seq(lt[i:]):
case "doit":
if newadd in ["PLUS", "MINU"]:
return "can't"
else:
return "doit", ("Expression", "Term"), False, [t for t in range(i + 2) if
lt[t][0] == "Expression"], False
case "part":
if newadd == "Term" or (i == 1 and lt[0][0] == "Expression"):
return "can't"
else:
return "part", ("Expression", "Term"), i, False
case "full":
i += 2
case "not":
if i == 1 and lt[0][0] == "Expression":
return "can't"
return "part", ("Expression", "Term"), i, False, [t for t in range(i) if
lt[t][0] == "Expression"], False
if newadd in ["PLUS", "MINU"]:
return "can't"
else:
if lt[i - 1][0] == "Expression":
return "can't"
return "doit", ("Expression", lt[i - 1][1]), False
# <项>: := <因子>{<乘法运算符><因子>}
def Term(self, lt: list[(str, str)], newadd: (str, str)) -> (str, (str, str), int, bool):
def is_seq(lt: list[(str, str)]) -> str:
ls = ['Factor', ['MULT', 'DIV', 'MOD']]
if len(lt) == 1 and lt[0][0] == ls[0]:
return "part"
if len(lt) >= 2 and lt[0][0] == ls[0] and lt[1][0] in ls[1]:
if len(lt) == 2:
return "doit"
if len(lt) > 2:
return "full"
if lt[0][0] != ls[0]:
return "notA"
if len(lt) >= 2 and lt[0][0] == ls[0] and lt[1][0] not in ls[1]:
return "notB"
if len(lt) < 1:
return "can't"
i = 0
if lt[0][0] == 'Factor':
i = 1
else:
return "can't"
while i < len(lt):
match is_seq(lt[i:]):
case "doit":
if newadd in ['MULT', 'DIV', 'MOD']:
return "can't"
else:
return "doit", ("Term", ""), False
case "part":
if self.PartOfFactor((newadd, "")):
return "can't"
else:
return "part", ("Term", lt[0][1] if i == 1 else ""), i, False
case "full":
i += 2
case "notA":
if self.PartOfFactor((lt[i], "")):
return "can't"
return "part", ("Term", lt[0][1] if i == 1 else ""), i, False
case "notB":
return "part", ("Term", lt[0][1] if i == 1 else ""), i, False
if newadd in ['MULT', 'DIV', 'MOD']:
return "can't"
else:
return "doit", ("Term", lt[0][1]), False
# 部分 <因子>: := <标识符>|<标识符>'['<表达式>']' | '('<表达式>')'|<整数> | <字符>|<有返回值函数调用语句>
def PartOfFactor(self, t: (str, str)) -> bool:
pt = ['IDENFR', 'LPARENT', 'RPARENT', 'LBRACK', 'RBRACK', 'Integer', 'CHARCON',
'FunctionDefinitionWithReturnValue']
return t[0] in pt
# <因子>: := <标识符>|<标识符>'['<表达式>']' | '('<表达式>')'|<整数> | <字符>|<有返回值函数调用语句>
def Factor(self, lt: list[(str, str)]) -> (str, (str, str), int):
if len(lt) < 1:
return "can't"
if len(lt) > 4:
if self.is_IDENFR(lt[0]) and lt[1][0] == 'LBRACK' and lt[2][0] == 'Expression' and lt[3][0] == 'RBRACK':
if len(lt) == 4:
return "doit", ("Factor", "")
else:
return "part", ("Factor", ""), 4
elif len(lt) > 3:
if lt[0][0] == 'LPARENT' and lt[1][0] == 'Expression' and lt[2][0] == 'RPARENT':
if len(lt) == 3:
return "doit", ("Factor", "")
else:
return "part", ("Factor", ""), 3
elif lt[0][0] in ["IDENFR", "Integer", "CHARCON", "FunctionCallWithReturnValue"]:
if len(lt) == 1:
return "doit", ("Factor", lt[0][0])
return "part", ("Factor", lt[0][0]), 1
return "can't"
# 部分 <语句> ::= <条件语句>|<循环语句>| '{'<语句列>'}'| <有返回值函数调用语句>; |<无返回值函数调用语句>;|<赋值语句>;|<读语句>;|<写语句>;|<空>;|<返回语句>;
def PartOfStatement(self, t: (str, str)) -> bool:
pt = ['ConditionStatement', 'LoopStatement', 'LBRACE', 'StatementList', 'RBRACE', 'FunctionCallWithReturnValue',
'FunctionCallWithoutReturnValue', 'AssignmentStatement', 'ReadStatement', 'WriteStatement', 'SEMICN',
'ReturnStatement', 'SEMICN']
return t in pt
# <语句> ::= <条件语句>|<循环语句>| '{'<语句列>'}'| <有返回值函数调用语句>; |<无返回值函数调用语句>;|<赋值语句>;|<读语句>;|<写语句>;|<空>;|<返回语句>;
def Statement(self, lt: list[(str, str)], _: (str, str), last: (str, str), last_of_last: (str, str)) -> (
str, (str, str), int):
if len(lt) < 1:
return "can't"
if last_of_last[0] == "FORTK":
return "can't"
if lt[0][0] == 'ConditionStatement':
if len(lt) == 1:
return "doit", ("Statement", "")
else:
return "part", ("Statement", ""), 1
if lt[0][0] == 'LoopStatement':
if len(lt) == 1:
return "doit", ("Statement", "")
else:
return "part", ("Statement", ""), 1
if len(lt) > 2 and lt[0][0] == 'LBRACE' and lt[1][0] == 'StatementList' and lt[2][0] == 'RBRACE':
if len(lt) == 3:
return "doit", ("Statement", "")
else:
return "part", ("Statement", ""), 3
if len(lt) >= 2 and (
lt[0][0] == 'FunctionCallWithReturnValue' or lt[0][0] == 'FunctionCallWithoutReturnValue' or lt[0][
0] == 'AssignmentStatement' or lt[0][0] == 'ReadStatement' or lt[0][0] == 'WriteStatement' or lt[0][
0] == 'ReturnStatement') and lt[1][0] == 'SEMICN':
if len(lt) == 2:
return "doit", ("Statement", "")
else:
return "part", ("Statement", ""), 1
if lt[0][0] == 'SEMICN' and last[0] not in ["VariableDefinition", "ConstantDefinition", "Expression",
"ConditionStatement", "Condition"]:
if len(lt) == 1:
return "doit", ("Statement", "")
else:
return "part", ("Statement", ""), 1
return "can't"
# <赋值语句> ::= <标识符>=<表达式>|<标识符>'['<表达式>']'=<表达式>
def AssignmentStatement(self, lt: list[(str, str)], newadd: (str, str), last: (str, str),
last_of_last: (str, str)) -> (str, (str, str), int, bool, list[int]):
if len(lt) < 3 or self.PartOfTYPEIDENFR(last) or last_of_last[0] == "FORTK":
return "can't"
if self.is_IDENFR(lt[0]) and lt[1][0] == 'ASSIGN' and lt[2][0] == 'Expression':
if len(lt) == 3:
if newadd in ['PLUS', 'MINU']:
return "can't"
return "doit", ("AssignmentStatement", ""), False, ([0] if lt[0][0] != "IDENFR" else [])
else:
if lt[3][0] in ['PLUS', 'MINU']:
return "can't"
return "part", ("AssignmentStatement", ""), 3, False, ([0] if lt[0][0] != "IDENFR" else [])
if len(lt) >= 6 and self.is_IDENFR(lt[0]) and lt[1][0] == 'LBRACK' and lt[2][0] == 'Expression' and lt[3][
0] == 'RBRACK' and lt[4][0] == 'ASSIGN' and lt[5][0] == 'Expression':
if len(lt) == 6:
if newadd in ['PLUS', 'MINU']:
return "can't"
return "doit", ("AssignmentStatement", ""), False, ([0] if lt[0][0] != "IDENFR" else [])
else:
if lt[6][0] in ['PLUS', 'MINU']:
return "can't"
return "part", ("AssignmentStatement", ""), 6, False, ([0] if lt[0][0] != "IDENFR" else [])
return "can't"
# <条件语句> ::= if '('<条件>')'语句else语句
def ConditionalStatement(self, lt: list[(str, str)], newadd: (str, str)) -> (str, (str, str), int):
if len(lt) < 5:
return "can't"
if lt[0][0] != "IFTK" or lt[1][0] != "LPARENT" or lt[2][0] != "Condition" or lt[3][0] != "RPARENT" or lt[4][
0] != "Statement":
return "can't"
if len(lt) == 5:
if newadd == "ELSETK":
return "can't"
return "doit", ("ConditionalStatement", "")
if len(lt) >= 7 and lt[5][0] == "ELSETK" and lt[6][0] == "Statement":
if len(lt) == 7:
return "doit", ("ConditionalStatement", "")
else:
return "part", ("ConditionalStatement", ""), 7
if len(lt) == 6:
if lt[5][0] != "ELSETK":
return "part", ("ConditionalStatement", ""), 5
if newadd[0] == "Statement" or self.PartOfStatement(newadd):
return "can't"
return "part", ("ConditionalStatement", ""), 5
# <条件> ::= <表达式><关系运算符><表达式> |<表达式> //整型表达式之间才能进行关系运算 //表达式为整型其值为0条件为假值不为0时条件为真
def Condition(self, lt: list[(str, str)], newadd: (str, str), last: (str, str), last_of_last: (str, str)) -> (
str, (str, str), int):
if len(lt) < 1:
return "can't"
if not ((last[0] == 'LPARENT' and newadd[0] == 'RPARENT' and last_of_last[0] == "IFTK") or (
last[0] == 'LPARENT' and newadd[0] == 'RPARENT' and last_of_last[0] == "WHILETK") or (
last[0] == 'SEMICN' and newadd[0] == 'SEMICN' and last_of_last[0] == "Expression")):
return "can't"
if lt[0][0] == "Expression":
if len(lt) == 1:
return "doit", ("Condition", "")
if len(lt) == 3 and self.is_RelationOperator(lt[1]) and lt[2][0] == "Expression":
return "doit", ("Condition", "")
return "can't"
return "can't"
# <循环语句> ::= while '('<条件>')'<语句>| do语句while '('<条件>')' |for'('<标识符>=<表达式>;<条件>;<标识符>=<标识符>(+|)<步长>')'<语句>
def LoopStatement(self, lt: list[(str, str)]) -> (str, (str, str), int):
if len(lt) < 5:
return "can't"
if lt[0][0] == "WHILETK" and lt[1][0] == "LPARENT" and lt[2][0] == "Condition" and lt[3][0] == "RPARENT" and \
lt[4][0] == "Statement":
if len(lt) == 5:
return "doit", ("LoopStatement", "")
else:
return "part", ("LoopStatement", ""), 5
if len(lt) < 6:
return "can't"
if lt[0][0] == "DOTK" and lt[1][0] == "Statement" and lt[2][0] == "WHILETK" and lt[3][0] == "LPARENT" and lt[4][
0] == "Condition" and lt[5][0] == "RPARENT":
if len(lt) == 6:
return "doit", ("LoopStatement", "")
else:
return "part", ("LoopStatement", ""), 6
if len(lt) < 15:
return "can't"
if lt[0][0] == "FORTK" and lt[1][0] == "LPARENT" and self.is_IDENFR(lt[2]) and lt[3][0] == "ASSIGN" and lt[4][
0] == "Expression" and lt[5][0] == "SEMICN" and lt[6][0] == "Condition" and lt[7][
0] == "SEMICN" and self.is_IDENFR([8]) and lt[9][0] == "ASSIGN" and self.is_IDENFR(lt[10]) and lt[11][
0] in ['PLUS', 'MINU'] and lt[12] == "Step" and lt[13][0] == "RPARENT" and lt[14][0] == "Statement":
if len(lt) == 15:
return "doit", ("LoopStatement", "")
else:
return "part", ("LoopStatement", ""), 15
return "can't"
# <步长>::= <无符号整数>
def Step(self, lt: list[(str, str)], newadd: (str, str), last: (str, str), last_of_last: (str, str)) -> (
str, (str, str), int):
if len(lt) < 1:
return "can't"
if last[0] not in ["PLUS", "MINU"] or not self.is_IDENFR(last_of_last):
return "can't"
if lt[0][0] == "UnsignedInteger":
if len(lt) == 1:
if newadd == "RPARENT":
return "doit", ("Step", "")
else:
if lt[2] == "RPARENT":
return "part", ("Step", ""), 1
return "can't"
# <有返回值函数调用语句>: := <标识符>'('<值参数表>')'
def FunctionCallWithReturnValue(self, lt: list[(str, str)], newadd: (str, str), last: (str, str)) -> (
str, (str, str), int):
if len(lt) < 4:
return "can't"
if len(lt) >= 4 and self.is_IDENFR(lt[0]) and lt[1][0] == "LPARENT" and lt[2][0] == "ValueParameterList" and \
lt[3][0] == "RPARENT":
if len(lt) == 4:
return "doit", ("FunctionCallWithReturnValue", "")
else:
return "part", ("FunctionCallWithReturnValue", ""), 4
return "can't"
# <无返回值函数调用语句> ::= <标识符>'('<值参数表>')' 无
# <值参数表> ::= <表达式>{,<表达式>}|<空>
def ValueParameterList(self, lt: list[(str, str)], newadd: (str, str), last: (str, str),
last_of_last: (str, str)) -> (str, (str, str), int, bool):
def is_seq(lt: list[(str, str)]) -> str:
ls = ['COMMA', 'Expression']
if lt[0][0] != ls[0]:
return "not"
if len(lt) == 1 and lt[0][0] == ls[0]:
return "can't"
if len(lt) >= 2 and lt[0][0] == ls[0] and lt[1][0] == ls[1]:
if len(lt) == 2:
return "doit"
if len(lt) > 2:
return "full"
else:
return "not"
if len(lt) < 1:
return "can't"
if lt[0][0] != "Expression" or newadd[0] != "RPARENT" or last[0] != "LPARENT" or last_of_last[0] == "RETURNTK":
return "can't"
i = 1
while i < len(lt):
match is_seq(lt[i:]):
case "doit":
if newadd == "COMMA":
return "can't"
return "doit", ("ValueParameterList", ""), False
case "full":
i += 2
case "not":
if lt[i][0] == "RPARENT":
return "part", ("ValueParameterList", ""), i, False
return "can't"
case "can't":
return "can't"
return "can't"
# <语句列> ::= {<语句>}
def StatementList(self, lt: list[(str, str)], newadd: (str, str), last: (str, str)) -> (str, (str, str), int):
if len(lt) < 1:
return "can't"
if last[0] != "LBRACE":
return "can't"
for i in range(len(lt)):
if lt[i][0] != "Statement":
if lt[i][0] == "RBRACE":
return "part", ("StatementList", ""), i
return "can't"
if newadd[0] == "RBRACE":
return "doit", ("StatementList", ""), i
else:
return "can't"
# <读语句> ::= scanf '('<标识符>{,<标识符>}')'
def ReadStatement(self, lt: list[(str, str)]) -> (str, (str, str), int):
if len(lt) < 4:
return "can't"
if lt[0][0] != "SCANFTK":
return "can't"
if lt[0][0] == "SCANFTK" and lt[1][0] == "LPARENT" and self.is_IDENFR(lt[2]):
for i in range(3, len(lt)):
if not self.is_IDENFR(lt[i]):
if lt[i] == "RPARENT":
if i == len(lt) - 1:
return "doit", ("ReadStatement", "")
return "part", ("ReadStatement", ""), i
return "can't"
return "can't"
# <写语句> ::= printf '(' <字符串>,<表达式> ')'| printf '('<字符串> ')'| printf '('<表达式>')'
def WriteStatement(self, lt: list[(str, str)]) -> (str, (str, str), int, bool):
if len(lt) < 4:
return "can't"
if lt[0][0] != "PRINTFTK":
return "can't"
if lt[0][0] == "PRINTFTK" and lt[1][0] == "LPARENT" and (lt[2][0] == "String" or lt[2][0] == "Expression") and \
lt[3][0] == "RPARENT":
if len(lt) == 4:
return "doit", ("WriteStatement", ""), False
else:
return "part", ("WriteStatement", ""), 4, False
if len(lt) >= 6 and lt[0][0] == "PRINTFTK" and lt[1][0] == "LPARENT" and lt[2][0] == "String" and lt[3][
0] == "COMMA" and lt[4][0] == "Expression" and lt[5][0] == "RPARENT":
if len(lt) == 6:
return "doit", ("WriteStatement", ""), False
else:
return "part", ("WriteStatement", ""), 6, False
return "can't"
# <返回语句> ::= return['('<表达式>')']
def ReturnStatement(self, lt: list[(str, str)], addnew: (str, str)) -> (str, (str, str), int, bool):
if len(lt) < 1:
return "can't"
if lt[0][0] != "RETURNTK":
return "can't"
if len(lt) == 1:
if addnew[0] == "LPARENT":
return "can't"
return "doit", ("ReturnStatement", ""), False
if len(lt) == 4 and lt[1][0] == "LPARENT" and lt[2][0] == "Expression" and lt[3][0] == "RPARENT":
return "doit", ("ReturnStatement", ""), False
return "can't"
class GrammaticalAnalysis:
keyword = {
"const": "CONSTTK",
"int": "INTTK",
"char": "CHARTK",
"void": "VOIDTK",
"main": "MAINTK",
"if": "IFTK",
"else": "ELSETK",
"do": "DOTK",
"while": "WHILETK",
"for": "FORTK",
"continue": "CONTINUETK",
"break": "BREAKTK",
"scanf": "SCANFTK",
"printf": "PRINTFTK",
"return": "RETURNTK"}
symbol = {
"+": "PLUS",
"-": "MINU",
"*": "MULT",
"/": "DIV",
"%": "MOD",
"&&": "AND",
"||": "OR",
",": "COMMA",
";": "SEMICN",
"(": "LPARENT",
")": "RPARENT",
"[": "LBRACK",
"]": "RBRACK",
"{": "LBRACE",
"}": "RBRACE",
"=": "ASSIGN",
"<": "LSS",
"<=": "LEQ",
">": "GRE",
">=": "GEQ",
"!=": "NEQ",
"==": "EQL",
"!": "NOT"
}
regex = {
r'^[a-zA-Z][a-zA-Z0-9]*$': "IDENFR",
r'^\d+$': "INTCON",
r'^".*"$': "STRCON",
r"^'.'$": "CHARCON"
}
yacc = Yacc()
patterns = []
seq = 0
final_ans = []
def __init__(self):
self.patterns = [method for method in dir(Yacc) if
callable(getattr(Yacc, method)) and not method.startswith("__") and not method.startswith(
"PartOf") and not method.startswith("is_")]
def readfile(self, filename: str) -> str:
s = ''
with open(filename, 'r', encoding='utf-8') as f:
s = ''.join(f.readlines())
return s
def strsplit(self, s: str) -> list[str]:
lt = []
l = 0
r = 1
while r < len(s):
if not (s[r].isalpha() or s[r].isdigit()):
k = s[l:r].strip()
if len(k) != 0:
lt.append(k)
k = s[r].strip()
if len(k) != 0:
lt.append(k)
l = r + 1
r = l
else:
r += 1
if l < len(s):
lt.append(s[l:r])
i = 0
# 修整 符号配对 + 字符串查找
while i < len(lt) - 1:
if lt[i] in self.symbol and lt[i] + lt[i + 1] in self.symbol:
lt[i] = lt[i] + lt[i + 1]
lt.pop(i + 1)
elif lt[i] == '"':
i += 1
while i < len(lt):
lt[i - 1] += " " + lt[i]
lt.pop(i)
if lt[i - 1][-1] == '"':
i -= 1
break
elif lt[i] == '\'':
if i + 2 < len(lt) and lt[i + 2] == '\'':
lt[i] += lt[i + 1] + lt[i + 2]
lt.pop(i + 1)
lt.pop(i + 1)
i += 1
return lt
def final_ans_insert(self, lt: list[int], newadd: (int, (str, str)), delete: bool = True,
force_del: list[int] = [], cascade_delete: bool = True) -> None:
i = 0
def cas_del(t: int):
dele = 0
while t != -1:
for k in self.final_ans:
if k[0] == t:
if k[1][0] not in ['Factor', 'Term', 'Expression']:
t = -1
break
t = k[2] if len(k) >= 3 and cascade_delete else -1
dele += 1
self.final_ans.remove(k)
break
return dele
while i < len(self.final_ans) and len(lt) > 0:
if self.final_ans[i][0] in lt:
seq = self.final_ans[i][0]
if len(lt) == 1:
if delete == False:
self.final_ans.insert(i + 1, (newadd[0], newadd[1], lt[0]))
else:
self.final_ans.insert(i + 1, newadd)
if (delete and self.final_ans[i][1][0] in ['Factor', 'Term', 'Expression']) or \
self.final_ans[i][0] in force_del:
i -= cas_del(self.final_ans[i][0])
lt.remove(seq)
i += 1
# 规约!!
def reduce(self, ans: list[(int, (str, str), any)], newadd: (str, str)) -> list[(int, str)]:
changed = True
while changed:
changed = False
for i in range(len(ans)):
no_index_ans = [x[1] for x in ans]
tokens = no_index_ans[i:]
redo = False
for method_name in self.patterns:
method = getattr(self.yacc, method_name)
if callable(method):
result = []
r = len(inspect.getfullargspec(method).args)
match r:
case 5:
result = method(tokens, newadd, (-1, -1) if i == 0 else no_index_ans[i - 1],
(-1, -1) if i <= 1 else no_index_ans[i - 2])
case 4:
result = method(tokens, newadd, (-1, -1) if i == 0 else no_index_ans[i - 1])
case 3:
result = method(tokens, newadd)
case 2:
result = method(tokens)
match result[0]:
case "doit":
# print(f"<{self.yacc.translation_dict[result[1][0]]}>")
match len(result):
case 5:
self.final_ans_insert([x[0] for x in ans[i:]], (self.seq, result[1]), result[2],
[ans[i + x][0] for x in result[3]], result[4])
case 4:
self.final_ans_insert([x[0] for x in ans[i:]], (self.seq, result[1]), result[2],
[ans[i + x][0] for x in result[3]])
case 3:
self.final_ans_insert([x[0] for x in ans[i:]], (self.seq, result[1]), result[2])
case 2:
self.final_ans_insert([x[0] for x in ans[i:]], (self.seq, result[1]))
case _:
assert False
ans = ans[:i]
ans.append((self.seq, result[1]))
self.seq += 1
changed = True
redo = True
break
case "part":
# print(f"<{self.yacc.translation_dict[result[1][0]]}>")
match len(result):
case 6:
self.final_ans_insert([x[0] for x in ans[i:i + result[2]]],
(self.seq, result[1]), result[3], result[4], result[5])
case 5:
self.final_ans_insert([x[0] for x in ans[i:i + result[2]]],
(self.seq, result[1]), result[3], result[4])
case 4:
self.final_ans_insert([x[0] for x in ans[i:i + result[2]]],
(self.seq, result[1]), result[3])
case 3:
self.final_ans_insert([x[0] for x in ans[i:i + result[2]]],
(self.seq, result[1]))
case _:
assert False
# 到i截止
temp = ans[i + result[2]:]
ans = ans[:i]
ans.append((self.seq, result[1]))
self.seq += 1
ans += temp
changed = True
redo = True
break
case "can't":
pass
if redo:
break
return ans
def WordAnalyze(self, lt: list[str]) -> list[(int, str)]:
ans = []
i = 0
while i < len(lt):
s = lt[i]
if s in self.keyword:
temp = (self.keyword[s], s)
ans = self.reduce(ans, temp)
ans.append((self.seq, temp))
self.final_ans.append((self.seq, temp))
self.seq += 1
# print(self.keyword[s], s)
elif s in self.symbol:
temp = (self.symbol[s], s)
ans = self.reduce(ans, temp)
ans.append((self.seq, temp))
self.final_ans.append((self.seq, temp))
self.seq += 1
# print(self.symbol[s], s)
else:
no_answer = True
for key, value in self.regex.items():
if re.match(key, s):
if value == 'CHARCON' or value == 'STRCON':
s = s[1:-1]
temp = (value, s)
ans = self.reduce(ans, temp)
ans.append((self.seq, temp))
self.final_ans.append((self.seq, temp))
self.seq += 1
# print(value, s)
no_answer = False
break
if no_answer:
ans.append(('error', 'error'))
self.final_ans.append((self.seq, temp))
i += 1
ans = self.reduce(ans, ('', ''))
return ans
def result_print(self) -> None:
with open('output.txt', 'w', encoding='utf-8') as f:
for vals in self.final_ans:
vals = vals[1]
if vals[0] in self.symbol.values() or vals[0] in self.keyword.values() or vals[
0] in self.regex.values():
print(vals[0], vals[1])
f.write(f'{vals[0]} {vals[1]}\n')
else:
print(f"<{self.yacc.translation_dict[vals[0]]}>")
f.write(f"<{self.yacc.translation_dict[vals[0]]}>\n")
return
def f(self) -> None:
s = self.readfile('testfile.txt')
s = self.strsplit(s)
s = self.WordAnalyze(s)
self.result_print()
print(s)
GrammaticalAnalysis().f()