CompilerDesign/GrammaticalAnalysis.py

1158 lines
51 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import inspect
import re
class Yacc:
translation_dict = {
"String": "字符串",
"Program": "程序",
"ConstantDeclaration": "常量说明",
"ConstantDefinition": "常量定义",
"FunctionDefinitionWithReturnValue": "有返回值函数定义",
"FunctionDefinitionWithoutReturnValue": "无返回值函数定义",
"UnsignedInteger": "无符号整数",
"Integer": "整数",
"DeclarationHeader": "声明头部",
"VariableDeclaration": "变量说明",
"VariableDefinition": "变量定义",
"CompoundStatement": "复合语句",
"ParameterList": "参数表",
"MainFunction": "主函数",
"Expression": "表达式",
"Term": "",
"Factor": "因子",
"Statement": "语句",
"AssignmentStatement": "赋值语句",
"ConditionalStatement": "条件语句",
"Condition": "条件",
"LoopStatement": "循环语句",
"Step": "步长",
"FunctionCallWithReturnValue": "有返回值函数调用语句",
"FunctionCallWithoutReturnValue": "无返回值函数调用语句",
"ValueParameterList": "值参数表",
"StatementList": "语句列",
"ReadStatement": "读语句",
"WriteStatement": "写语句",
"ReturnStatement": "返回语句"
}
def is_INTTK_CHARTK(self, t: (str, str)) -> bool:
if t[0] in ['INTTK', 'CHARTK']:
return True
if t[0] == "Factor" and t[1] in ['INTCON', 'CHARCON']:
return True
if t[0] == "Term" and t[1] in ['INTCON', 'CHARCON']:
return True
return False
def is_IDENFR(self, t: (str, str)) -> bool:
if t[0] == 'IDENFR':
return True
if t[0] == "Factor" and t[1] == "IDENFR":
return True
if t[0] == "Term" and t[1] == "IDENFR":
return True
if t[0] == "Expression" and t[1] == "IDENFR":
return True
return False
def is_Integer_CHARCON(self, t: (str, str)) -> bool:
if t[0] == 'Integer' or t[0] == 'CHARCON':
return True
if t[0] == 'Factor' and (t[1] == 'Integer' or t[1] == 'CHARCON'):
return True
if t[0] == 'Term' and (t[1] == 'Integer' or t[1] == 'CHARCON'):
return True
if t[0] == 'Expression' and (t[1] == 'Integer' or t[1] == 'CHARCON'):
return True
return False
def is_Term(self, t: (str, str)) -> bool:
if t[0] == 'Term':
return True
if t[0] == 'Expression' and t[1] == 'Term':
return True
return False
def is_RelationOperator(self, t: (str, str)) -> bool:
if t[0] in ['LSS', 'LEQ', 'GRE', 'GEQ', 'NEQ', 'EQL']:
return True
return False
def is_UnsignedInteger(self, t: (str, str)) -> bool:
if t[0] == 'UnsignedInteger':
return True
if t[0] == 'Integer' and t[1] >= 0:
return True
if t[0] == 'Factor' and t[1] == 'UnsignedInteger':
return True
if t[0] == 'Term' and t[1] == 'UnsignedInteger':
return True
if t[0] == 'Expression' and t[1] == 'UnsignedInteger':
return True
return False
# <字符串> ::= "十进制编码为32,33,35126的ASCII字符"
def String(self, lt: list[(str, str)]) -> (str, (str, str), int):
regex = r'[ -!#-~]*'
if len(lt) == 1 and lt[0][0] == "STRCON" and re.match(regex, lt[0][1]):
return "doit", ("String", lt[0][1])
return "can't"
# <程序> ::= [<常量说明>][<变量说明>]{<有返回值函数定义>|<无返回值函数定义>} <主函数>
def Program(self, lt: list[(str, str)]) -> (str, (str, str), int):
if len(lt) < 2:
return "can't"
if len(lt) == 2 and (
lt[0][0] == 'FunctionDefinitionWithReturnValue' or lt[0][
0] == 'FunctionDefinitionWithoutReturnValue') and lt[1][
0] == 'MainFunction':
return "doit", ("Program", "")
if lt[0][0] == 'ConstantDeclaration' and lt[1][0] == 'VariableDeclaration':
return self.Program(lt[2:])
if lt[0][0] == 'ConstantDefinition' or lt[1][0] == 'VariableDefinition':
return self.Program(lt[1:])
return "can't"
# 部分 <常量说明>: := const常量定义;{const常量定义;}
def PartOfConstantDeclaration(self, t: (str, str)) -> bool:
pt = ['CONSTTK', 'ConstantDefinition', 'SEMICN']
return t[0] in pt or self.PartOfConstantDefinition(t)
# <常量说明>: := const常量定义;{const常量定义;}
def ConstantDeclaration(self, lt: list[(str, str)], newadd: (str, str)) -> (str, (str, str), int):
if lt[0][0] != 'CONSTTK' or len(lt) < 3:
return "can't"
newadd = newadd[0]
if lt[0][0] == 'CONSTTK' and lt[1][0] == 'ConstantDefinition' and lt[2][0] == 'SEMICN':
i = 3
while i < len(lt):
if lt[i][0] != 'CONSTTK':
return "part", ("ConstantDeclaration", ""), i
if i + 1 == len(lt):
if newadd == 'ConstantDefinition' or self.PartOfConstantDefinition((newadd, "")):
return "can't"
else:
return "part", ("ConstantDeclaration", ""), i
elif lt[i + 1][0] != 'ConstantDefinition':
if self.PartOfConstantDefinition(lt[i + 1]):
return "can't"
else:
return "part", ("ConstantDeclaration", ""), i
if i + 2 == len(lt):
if newadd == 'SEMICN':
return "can't"
else:
return "part", ("ConstantDeclaration", ""), i
elif lt[i + 2][0] != 'SEMICN':
return "part", ("ConstantDeclaration", ""), i
i += 3
if newadd == 'CONSTTK':
return "can't"
else:
return "doit", ("ConstantDeclaration", "")
return "can't"
# 部分 <常量定义>: := int标识符整数{,<标识符>=<整数>} | char标识符字符{,<标识符>=<字符>}
def PartOfConstantDefinition(self, t: (str, str)) -> bool:
pt = ['INTTK', 'CHARTK', 'IDENFR', 'ASSIGN', 'INTCON', 'COMMA', 'CHARCON']
return t[0] in pt
# <常量定义>: := int标识符整数{,<标识符>=<整数>} | char标识符字符{,<标识符>=<字符>}
def ConstantDefinition(self, lt: list[(str, str)], newadd: (str, str)) -> (str, (str, str), int):
if (not (self.is_INTTK_CHARTK(lt[0]))) or len(lt) < 4:
return "can't"
newadd = newadd[0]
if self.is_INTTK_CHARTK(lt[0]) and self.is_IDENFR(lt[1]) and lt[2][0] == 'ASSIGN' and self.is_Integer_CHARCON(
lt[3]):
i = 4
while i < len(lt):
if lt[i][0] != 'COMMA':
return "part", ("ConstantDefinition", ""), i
if i + 1 == len(lt):
if newadd == 'IDENFR':
return "can't"
else:
return "part", ("ConstantDefinition", ""), i
elif not self.is_IDENFR(lt[i + 1]):
return "part", ("ConstantDefinition", ""), i
if i + 2 == len(lt):
if newadd == 'ASSIGN':
return "can't"
else:
return "part", ("ConstantDefinition", ""), i
elif lt[i + 2][0] != 'ASSIGN':
return "part", ("ConstantDefinition", ""), i
if i + 3 == len(lt):
if newadd == 'Integer' or newadd == 'CHARCON' or self.PartOfInteger((newadd, "")):
return "can't"
else:
return "part", ("ConstantDefinition", ""), i
elif not self.is_Integer_CHARCON(lt[i + 3]):
if self.PartOfInteger(lt[i + 3]):
return "can't"
return "part", ("ConstantDefinition", ""), i
i += 4
if newadd == 'COMMA':
return "can't"
return "doit", ("ConstantDefinition", "")
return "can't"
# 部分 <无符号整数>: := <非零数字>{<数字>} | 0
def PartOfUnsignedInteger(self, t: (str, str)) -> bool:
return t[0] == 'INTCON'
# <无符号整数>: := <非零数字>{<数字>} | 0
def UnsignedInteger(self, lt: list[(str, str)]) -> (str, (str, str), int):
if len(lt) < 1:
return "can't"
if lt[0][0] == 'INTCON':
if lt[0][1][0] == '0' and len(lt[0][1]) != 1:
return "can't"
return "doit", ("UnsignedInteger", lt[0][1])
return "can't"
# 部分 <整数> ::= [+|-]<无符号整数>
def PartOfInteger(self, t: (str, str)) -> bool:
pt = ['UnsignedInteger', 'PLUS', 'MINU']
return t[0] in pt or self.PartOfUnsignedInteger(t)
# <整数> ::= [+|-]<无符号整数>
def Integer(self, lt: list[(str, str)]) -> (str, (str, str), int):
if len(lt) < 1:
return "can't"
if lt[0][0] == 'UnsignedInteger':
return "doit", ("Integer", lt[0][1])
if len(lt) >= 2 and lt[0][0] in ["MINU", "PLUS"] and lt[1][0] == 'UnsignedInteger':
if len(lt) == 2:
return "doit", ('Integer', (-1 if lt[0][0] == "MINU" else 1) * lt[1][1])
else:
return "part", ("Integer", (-1 if lt[0][0] == "MINU" else 1) * lt[1][1]), 2
return "can't"
# 部分 <声明头部> ::= int标识符 |char标识符
def PartOfDeclarationHeader(self, t: (str, str)) -> bool:
pt = ['INTTK', 'CHARTK']
return t[0] in pt or self.is_IDENFR(t)
# <声明头部> ::= int标识符 |char标识符
def DeclarationHeader(self, lt: list[(str, str)], newadd: (str, str)) -> (str, (str, str), int):
if len(lt) < 2:
return "can't"
newadd = newadd[0]
if (lt[0][0] == 'INTTK' or lt[0][0] == 'CHARTK') and self.is_IDENFR(lt[1]):
if len(lt) == 2:
if newadd == 'LPARENT':
return "doit", ("DeclarationHeader", "")
elif lt[2][0] == 'LPARENT':
return "doit", ("DeclarationHeader", "")
return "can't"
# <变量说明>: := <变量定义>;{<变量定义>;}
def VariableDeclaration(self, lt: list[(str, str)], newadd: (str, str)) -> (str, (str, str), int):
def is_seq(lt: list[(str, str)]) -> str:
if len(lt) == 1 and lt[0][0] == 'VariableDefinition':
return "can't"
if lt[0][0] == 'VariableDefinition' and lt[1][0] == 'SEMICN':
if len(lt) == 2:
return "doit"
else:
return "part"
if lt[0][0] != 'VariableDefinition' or lt[1][0] != 'SEMICN':
if self.PartOfTYPEIDENFR(lt[0][0]):
return "can't"
return "not"
if len(lt) < 2:
return "can't"
if lt[0][0] != 'VariableDefinition' or lt[1][0] != 'SEMICN':
return "can't"
newadd = newadd[0]
i = 2
while i < len(lt):
match is_seq(lt[i:]):
case "doit":
if newadd == 'VariableDefinition' or self.PartOfVariableDefinition((newadd, "")):
return "can't"
else:
return "doit", ("VariableDeclaration", "")
case "part":
i += 2
case "not":
if self.PartOfVariableDefinition((newadd, "")) or newadd == 'SEMICN':
return "can't"
return "part", ("VariableDeclaration", ""), i
case "can't":
return "can't"
if newadd == 'VariableDefinition' or self.PartOfVariableDefinition((newadd, "")):
return "can't"
else:
return "doit", ("VariableDeclaration", "")
# 部分 <类型标识符> ::= int | char
def PartOfTYPEIDENFR(self, t: (str, str)) -> bool:
lt = ['INTTK', 'CHARTK']
return t[0] in lt
# 部分 <变量定义> ::= <类型标识符>(<标识符>|<标识符>'['<无符号整数>']'){,(<标识符>|<标识符>'['<无符号整数>']' )} //无符号整数表示数组元素的个数其值需大于0
def PartOfVariableDefinition(self, t: (str, str)) -> bool:
lt = ['UnsignedInteger', 'LBRACK', 'RBRACK']
return t in lt or self.is_IDENFR(t) or self.PartOfTYPEIDENFR(t)
# <变量定义> ::= <类型标识符>(<标识符>|<标识符>'['<无符号整数>']'){,(<标识符>|<标识符>'['<无符号整数>']' )} //无符号整数表示数组元素的个数其值需大于0
def VariableDefinition(self, lt: list[(str, str)], newadd: (str, str), last: (str, str)) -> (str, (str, str), int):
def is_seq(lt: list[(str, str)]) -> str: # 3 for long_assign 2 for short_assign, 1 for part, 0 for not
ls = ['IDENFR', 'LBRACK', 'UnsignedInteger', 'RBRACK']
if self.is_IDENFR(lt[0]):
return 'not'
if len(lt) == 1 and self.is_IDENFR(lt[0]):
return "doit"
if len(lt) == 4 and self.is_IDENFR(lt[0]) and lt[1][0] == 'LBRACK' and self.is_UnsignedInteger(lt[2]) and \
lt[3][0] == 'RBRACK':
return "doit"
if len(lt) >= 4 and self.is_IDENFR(lt[0]) and lt[1][0] == 'LBRACK' and self.is_UnsignedInteger(lt[2]) and \
lt[3][0] == 'RBRACK':
return "part2"
if len(lt) >= 1 and self.is_IDENFR(lt[0]) and lt[1][0] != 'LBRACK':
return "part1"
bl = True
for i in range(len(lt)):
if lt[i][0] != ls[i]:
bl = False
break
if bl:
return "can't"
else:
return "not"
if len(lt) < 2 or (lt[0][0] != 'INTTK' and lt[0][0] != 'CHARTK'):
return "can't"
if last[0] == 'CONSTTK' or last[0] == 'COMMA' or last[0] == 'LPARENT':
return "can't"
newadd = newadd[0]
i = 0
if len(lt) >= 5:
if (lt[0][0] == 'INTTK' or lt[0][0] == 'CHARTK') and self.is_IDENFR(lt[1]) and lt[2][0] == 'LBRACK' and \
self.is_UnsignedInteger(lt[3]) and lt[4][0] == 'RBRACK':
i = 5
elif (lt[0][0] == 'INTTK' or lt[0][0] == 'CHARTK') and self.is_IDENFR(lt[1]):
i = 2
while i < len(lt):
if lt[i][0] != 'COMMA':
return "part", ("VariableDefinition", ""), i
i += 1
while i < len(lt):
match is_seq(lt[i:]):
case "doit":
if newadd == 'COMMA':
return "can't"
else:
return "doit", ("VariableDefinition", "")
case "part1":
if lt[i + 1] == 'COMMA':
i = i + 2
else:
return "part", ("VariableDefinition", ""), i + 1
case "part2":
if lt[i + 4] == 'COMMA':
i = i + 5
else:
return "part", ("VariableDefinition", ""), i + 4
case "can't":
return "can't"
case "not":
return "part", ("VariableDefinition", ""), i - 1
if newadd == 'IDENFR':
return "can't"
else:
return "part", ("VariableDefinition", ""), i - 1
if newadd in ['COMMA', 'LBRACK']:
return "can't"
else:
return "doit", ("VariableDefinition", "")
# <有返回值函数定义> ::= <声明头部>'('<参数表>')' '{'<复合语句>'}'
def FunctionDefinitionWithReturnValue(self, lt: list[(str, str)]) -> (str, (str, str), int):
if len(lt) < 7:
return "can't"
if lt[0][0] == 'DeclarationHeader' and lt[1][0] == 'LPARENT' and lt[2][0] == 'ParameterList' and lt[3][
0] == 'RPARENT' and lt[4][0] == 'LBRACE' and lt[5][0] == 'CompoundStatement' and lt[6][0] == 'RBRACE':
if len(lt) == 7:
return "doit", ("FunctionDefinitionWithReturnValue", "")
else:
return "part", ("FunctionDefinitionWithReturnValue", ""), 7
return "can't"
# <无返回值函数定义> ::= void标识符'('<参数表>')''{'<复合语句>'}'
def FunctionDefinitionWithoutReturnValue(self, lt: list[(str, str)]) -> (str, (str, str), int):
if len(lt) < 8:
return "can't"
if lt[0][0] == 'VOIDTK' and self.is_IDENFR(lt[1]) and lt[2][0] == 'LPARENT' and lt[3][0] == 'ParameterList' and \
lt[4][0] == 'RPARENT' and lt[5][0] == 'LBRACE' and lt[6][0] == 'CompoundStatement' and lt[7][
0] == 'RBRACE':
if len(lt) == 8:
return "doit", ("FunctionDefinitionWithoutReturnValue", "")
else:
return "part", ("FunctionDefinitionWithoutReturnValue", ""), 8
return "can't"
# <复合语句> ::= [<常量说明>][<变量说明>]<语句列>
def CompoundStatement(self, lt: list[(str, str)]) -> (str, (str, str), int):
if len(lt) < 1:
return "can't"
if lt[0][0] == 'StatementList':
if len(lt) == 1:
return "doit", ("CompoundStatement", "")
else:
return "part", ("CompoundStatement", ""), 1
if len(lt) < 2:
return "can't"
if (lt[0][0] == 'ConstantDeclaration' or lt[0][0] == 'VariableDeclaration') and lt[1][0] == 'StatementList':
if len(lt) == 2:
return "doit", ("CompoundStatement", "")
else:
return "part", ("CompoundStatement", ""), 2
if len(lt) < 3:
return "can't"
if lt[0][0] == 'ConstantDeclaration' and lt[1][0] == 'VariableDeclaration' and lt[2][0] == 'StatementList':
if len(lt) == 3:
return "doit", ("CompoundStatement", "")
else:
return "part", ("CompoundStatement", ""), 3
return "can't"
# <参数表> ::= <类型标识符><标识符>{,<类型标识符><标识符>}| <空>
def ParameterList(self, lt: list[(str, str)], newadd: (str, str), last: (str, str)) -> (str, (str, str), int):
def is_seq(lt: list[(str, str)]) -> str:
ls = ['COMMA', ['INTTK', 'CHARTK'], 'IDENFR']
for i in range(len(lt)):
if i >= len(ls):
return "full"
if i == 1:
if lt[i][0] not in ls[i]:
return "not"
elif i == 2:
if not self.is_IDENFR(lt[i]):
return "not"
else:
if lt[i][0] != ls[i]:
return "not"
if len(lt) < 3:
return "can't"
else:
return "doit"
if len(lt) < 2 or last[0] != 'LPARENT' or newadd[0] != 'RPARENT':
return "can't"
if lt[0][0] not in ["INTTK", "CHARTK"] or not self.is_IDENFR(lt[1]):
return "can't"
i = 2
while i < len(lt):
match is_seq(lt[i:]):
case "doit":
return "doit", ("ParameterList", "")
case "can't":
return "can't"
case "not":
return "can't"
case "full":
i += 3
return "doit", ("ParameterList", "")
# <主函数> ::= void main() {’<复合语句>‘}
def MainFunction(self, lt: list[(str, str)]) -> (str, (str, str), int):
if len(lt) < 7:
return "can't"
if lt[0][0] == 'VOIDTK' and lt[1][0] == 'MAINTK' and lt[2][0] == 'LPARENT' and lt[3][0] == 'RPARENT' and \
lt[4][0] == 'LBRACE' and lt[5][0] == 'CompoundStatement' and lt[6][0] == 'RBRACE':
if len(lt) == 7:
return "doit", ("MainFunction", "")
else:
return "part", ("MainFunction", ""), 7
return "can't"
# <表达式>: := [+|-]<项>{<加法运算符><项>} // [+ |]只作用于第一个 < 项 >
def Expression(self, lt: list[(str, str)], newadd: (str, str)) -> (str, (str, str), int, bool, list[int], bool):
def is_seq(lt: list[(str, str)]) -> str:
ls = [['PLUS', 'MINU'], 'Term']
if len(lt) == 1 and lt[0][0] in ls[0]:
return "part"
if len(lt) >= 2 and lt[0][0] in ls[0] and lt[1][0] == ls[1]:
if len(lt) == 2:
return "doit"
if len(lt) > 2:
return "full"
if lt[0][0] not in ls[1]:
return "not"
if len(lt) >= 2 and lt[0][0] in ls[0] and lt[1][0] != ls[1]:
return "not"
if len(lt) < 1:
return "can't"
i = 0
if len(lt) >= 2 and lt[0][0] in ['PLUS', 'MINU'] and self.is_Term(lt[1]):
i = 2
elif self.is_Term(lt[0]):
i = 1
else:
return "can't"
newadd = newadd[0]
while i < len(lt):
match is_seq(lt[i:]):
case "doit":
if newadd in ["PLUS", "MINU"]:
return "can't"
else:
return "doit", ("Expression", "Term"), False, [t for t in range(i + 2) if
lt[t][0] == "Expression"], False
case "part":
if newadd == "Term" or (i == 1 and lt[0][0] == "Expression"):
return "can't"
else:
return "part", ("Expression", "Term"), i, False
case "full":
i += 2
case "not":
if i == 1 and lt[0][0] == "Expression":
return "can't"
return "part", ("Expression", "Term"), i, False, [t for t in range(i) if
lt[t][0] == "Expression"], False
if newadd in ["PLUS", "MINU"]:
return "can't"
else:
if lt[i - 1][0] == "Expression":
return "can't"
return "doit", ("Expression", lt[i - 1][1]), False
# <项>: := <因子>{<乘法运算符><因子>}
def Term(self, lt: list[(str, str)], newadd: (str, str)) -> (str, (str, str), int, bool):
def is_seq(lt: list[(str, str)]) -> str:
ls = ['Factor', ['MULT', 'DIV', 'MOD']]
if len(lt) == 1 and lt[0][0] == ls[0]:
return "part"
if len(lt) >= 2 and lt[0][0] == ls[0] and lt[1][0] in ls[1]:
if len(lt) == 2:
return "doit"
if len(lt) > 2:
return "full"
if lt[0][0] != ls[0]:
return "notA"
if len(lt) >= 2 and lt[0][0] == ls[0] and lt[1][0] not in ls[1]:
return "notB"
if len(lt) < 1:
return "can't"
i = 0
if lt[0][0] == 'Factor':
i = 1
else:
return "can't"
while i < len(lt):
match is_seq(lt[i:]):
case "doit":
if newadd in ['MULT', 'DIV', 'MOD']:
return "can't"
else:
return "doit", ("Term", ""), False
case "part":
if self.PartOfFactor((newadd, "")):
return "can't"
else:
return "part", ("Term", lt[0][1] if i == 1 else ""), i, False
case "full":
i += 2
case "notA":
if self.PartOfFactor((lt[i], "")):
return "can't"
return "part", ("Term", lt[0][1] if i == 1 else ""), i, False
case "notB":
return "part", ("Term", lt[0][1] if i == 1 else ""), i, False
if newadd in ['MULT', 'DIV', 'MOD']:
return "can't"
else:
return "doit", ("Term", lt[0][1]), False
# 部分 <因子>: := <标识符>|<标识符>'['<表达式>']' | '('<表达式>')'|<整数> | <字符>|<有返回值函数调用语句>
def PartOfFactor(self, t: (str, str)) -> bool:
pt = ['IDENFR', 'LPARENT', 'RPARENT', 'LBRACK', 'RBRACK', 'Integer', 'CHARCON',
'FunctionDefinitionWithReturnValue']
return t[0] in pt
# <因子>: := <标识符>|<标识符>'['<表达式>']' | '('<表达式>')'|<整数> | <字符>|<有返回值函数调用语句>
def Factor(self, lt: list[(str, str)]) -> (str, (str, str), int):
if len(lt) < 1:
return "can't"
if len(lt) > 4:
if self.is_IDENFR(lt[0]) and lt[1][0] == 'LBRACK' and lt[2][0] == 'Expression' and lt[3][0] == 'RBRACK':
if len(lt) == 4:
return "doit", ("Factor", "")
else:
return "part", ("Factor", ""), 4
elif len(lt) > 3:
if lt[0][0] == 'LPARENT' and lt[1][0] == 'Expression' and lt[2][0] == 'RPARENT':
if len(lt) == 3:
return "doit", ("Factor", "")
else:
return "part", ("Factor", ""), 3
elif lt[0][0] in ["IDENFR", "Integer", "CHARCON", "FunctionCallWithReturnValue"]:
if len(lt) == 1:
return "doit", ("Factor", lt[0][0])
return "part", ("Factor", lt[0][0]), 1
return "can't"
# 部分 <语句> ::= <条件语句>|<循环语句>| '{'<语句列>'}'| <有返回值函数调用语句>; |<无返回值函数调用语句>;|<赋值语句>;|<读语句>;|<写语句>;|<空>;|<返回语句>;
def PartOfStatement(self, t: (str, str)) -> bool:
pt = ['ConditionStatement', 'LoopStatement', 'LBRACE', 'StatementList', 'RBRACE', 'FunctionCallWithReturnValue',
'FunctionCallWithoutReturnValue', 'AssignmentStatement', 'ReadStatement', 'WriteStatement', 'SEMICN',
'ReturnStatement', 'SEMICN']
return t in pt
# <语句> ::= <条件语句>|<循环语句>| '{'<语句列>'}'| <有返回值函数调用语句>; |<无返回值函数调用语句>;|<赋值语句>;|<读语句>;|<写语句>;|<空>;|<返回语句>;
def Statement(self, lt: list[(str, str)], _: (str, str), last: (str, str), last_of_last: (str, str)) -> (
str, (str, str), int):
if len(lt) < 1:
return "can't"
if last_of_last[0] == "FORTK":
return "can't"
if lt[0][0] == 'ConditionStatement':
if len(lt) == 1:
return "doit", ("Statement", "")
else:
return "part", ("Statement", ""), 1
if lt[0][0] == 'LoopStatement':
if len(lt) == 1:
return "doit", ("Statement", "")
else:
return "part", ("Statement", ""), 1
if len(lt) > 2 and lt[0][0] == 'LBRACE' and lt[1][0] == 'StatementList' and lt[2][0] == 'RBRACE':
if len(lt) == 3:
return "doit", ("Statement", "")
else:
return "part", ("Statement", ""), 3
if len(lt) >= 2 and (
lt[0][0] == 'FunctionCallWithReturnValue' or lt[0][0] == 'FunctionCallWithoutReturnValue' or lt[0][
0] == 'AssignmentStatement' or lt[0][0] == 'ReadStatement' or lt[0][0] == 'WriteStatement' or lt[0][
0] == 'ReturnStatement') and lt[1][0] == 'SEMICN':
if len(lt) == 2:
return "doit", ("Statement", "")
else:
return "part", ("Statement", ""), 1
if lt[0][0] == 'SEMICN' and last[0] not in ["VariableDefinition", "ConstantDefinition", "Expression",
"ConditionStatement", "Condition"]:
if len(lt) == 1:
return "doit", ("Statement", "")
else:
return "part", ("Statement", ""), 1
return "can't"
# <赋值语句> ::= <标识符>=<表达式>|<标识符>'['<表达式>']'=<表达式>
def AssignmentStatement(self, lt: list[(str, str)], newadd: (str, str), last: (str, str),
last_of_last: (str, str)) -> (str, (str, str), int, bool, list[int]):
if len(lt) < 3 or self.PartOfTYPEIDENFR(last) or last_of_last[0] == "FORTK":
return "can't"
if self.is_IDENFR(lt[0]) and lt[1][0] == 'ASSIGN' and lt[2][0] == 'Expression':
if len(lt) == 3:
if newadd in ['PLUS', 'MINU']:
return "can't"
return "doit", ("AssignmentStatement", ""), False, ([0] if lt[0][0] != "IDENFR" else [])
else:
if lt[3][0] in ['PLUS', 'MINU']:
return "can't"
return "part", ("AssignmentStatement", ""), 3, False, ([0] if lt[0][0] != "IDENFR" else [])
if len(lt) >= 6 and self.is_IDENFR(lt[0]) and lt[1][0] == 'LBRACK' and lt[2][0] == 'Expression' and lt[3][
0] == 'RBRACK' and lt[4][0] == 'ASSIGN' and lt[5][0] == 'Expression':
if len(lt) == 6:
if newadd in ['PLUS', 'MINU']:
return "can't"
return "doit", ("AssignmentStatement", ""), False, ([0] if lt[0][0] != "IDENFR" else [])
else:
if lt[6][0] in ['PLUS', 'MINU']:
return "can't"
return "part", ("AssignmentStatement", ""), 6, False, ([0] if lt[0][0] != "IDENFR" else [])
return "can't"
# <条件语句> ::= if '('<条件>')'语句else语句
def ConditionalStatement(self, lt: list[(str, str)], newadd: (str, str)) -> (str, (str, str), int):
if len(lt) < 5:
return "can't"
if lt[0][0] != "IFTK" or lt[1][0] != "LPARENT" or lt[2][0] != "Condition" or lt[3][0] != "RPARENT" or lt[4][
0] != "Statement":
return "can't"
if len(lt) == 5:
if newadd == "ELSETK":
return "can't"
return "doit", ("ConditionalStatement", "")
if len(lt) >= 7 and lt[5][0] == "ELSETK" and lt[6][0] == "Statement":
if len(lt) == 7:
return "doit", ("ConditionalStatement", "")
else:
return "part", ("ConditionalStatement", ""), 7
if len(lt) == 6:
if lt[5][0] != "ELSETK":
return "part", ("ConditionalStatement", ""), 5
if newadd[0] == "Statement" or self.PartOfStatement(newadd):
return "can't"
return "part", ("ConditionalStatement", ""), 5
# <条件> ::= <表达式><关系运算符><表达式> |<表达式> //整型表达式之间才能进行关系运算 //表达式为整型其值为0条件为假值不为0时条件为真
def Condition(self, lt: list[(str, str)], newadd: (str, str), last: (str, str), last_of_last: (str, str)) -> (
str, (str, str), int):
if len(lt) < 1:
return "can't"
if not ((last[0] == 'LPARENT' and newadd[0] == 'RPARENT' and last_of_last[0] == "IFTK") or (
last[0] == 'LPARENT' and newadd[0] == 'RPARENT' and last_of_last[0] == "WHILETK") or (
last[0] == 'SEMICN' and newadd[0] == 'SEMICN' and last_of_last[0] == "Expression")):
return "can't"
if lt[0][0] == "Expression":
if len(lt) == 1:
return "doit", ("Condition", "")
if len(lt) == 3 and self.is_RelationOperator(lt[1]) and lt[2][0] == "Expression":
return "doit", ("Condition", "")
return "can't"
return "can't"
# <循环语句> ::= while '('<条件>')'<语句>| do语句while '('<条件>')' |for'('<标识符>=<表达式>;<条件>;<标识符>=<标识符>(+|)<步长>')'<语句>
def LoopStatement(self, lt: list[(str, str)]) -> (str, (str, str), int):
if len(lt) < 5:
return "can't"
if lt[0][0] == "WHILETK" and lt[1][0] == "LPARENT" and lt[2][0] == "Condition" and lt[3][0] == "RPARENT" and \
lt[4][0] == "Statement":
if len(lt) == 5:
return "doit", ("LoopStatement", "")
else:
return "part", ("LoopStatement", ""), 5
if len(lt) < 6:
return "can't"
if lt[0][0] == "DOTK" and lt[1][0] == "Statement" and lt[2][0] == "WHILETK" and lt[3][0] == "LPARENT" and lt[4][
0] == "Condition" and lt[5][0] == "RPARENT":
if len(lt) == 6:
return "doit", ("LoopStatement", "")
else:
return "part", ("LoopStatement", ""), 6
if len(lt) < 15:
return "can't"
if lt[0][0] == "FORTK" and lt[1][0] == "LPARENT" and self.is_IDENFR(lt[2]) and lt[3][0] == "ASSIGN" and lt[4][
0] == "Expression" and lt[5][0] == "SEMICN" and lt[6][0] == "Condition" and lt[7][
0] == "SEMICN" and self.is_IDENFR([8]) and lt[9][0] == "ASSIGN" and self.is_IDENFR(lt[10]) and lt[11][
0] in ['PLUS', 'MINU'] and lt[12] == "Step" and lt[13][0] == "RPARENT" and lt[14][0] == "Statement":
if len(lt) == 15:
return "doit", ("LoopStatement", "")
else:
return "part", ("LoopStatement", ""), 15
return "can't"
# <步长>::= <无符号整数>
def Step(self, lt: list[(str, str)], newadd: (str, str), last: (str, str), last_of_last: (str, str)) -> (
str, (str, str), int):
if len(lt) < 1:
return "can't"
if last[0] not in ["PLUS", "MINU"] or not self.is_IDENFR(last_of_last):
return "can't"
if lt[0][0] == "UnsignedInteger":
if len(lt) == 1:
if newadd == "RPARENT":
return "doit", ("Step", "")
else:
if lt[2] == "RPARENT":
return "part", ("Step", ""), 1
return "can't"
# <有返回值函数调用语句>: := <标识符>'('<值参数表>')'
def FunctionCallWithReturnValue(self, lt: list[(str, str)], newadd: (str, str), last: (str, str)) -> (
str, (str, str), int):
if len(lt) < 4:
return "can't"
if len(lt) >= 4 and self.is_IDENFR(lt[0]) and lt[1][0] == "LPARENT" and lt[2][0] == "ValueParameterList" and \
lt[3][0] == "RPARENT":
if len(lt) == 4:
return "doit", ("FunctionCallWithReturnValue", "")
else:
return "part", ("FunctionCallWithReturnValue", ""), 4
return "can't"
# <无返回值函数调用语句> ::= <标识符>'('<值参数表>')' 无
# <值参数表> ::= <表达式>{,<表达式>}|<空>
def ValueParameterList(self, lt: list[(str, str)], newadd: (str, str), last: (str, str),
last_of_last: (str, str)) -> (str, (str, str), int, bool):
def is_seq(lt: list[(str, str)]) -> str:
ls = ['COMMA', 'Expression']
if lt[0][0] != ls[0]:
return "not"
if len(lt) == 1 and lt[0][0] == ls[0]:
return "can't"
if len(lt) >= 2 and lt[0][0] == ls[0] and lt[1][0] == ls[1]:
if len(lt) == 2:
return "doit"
if len(lt) > 2:
return "full"
else:
return "not"
if len(lt) < 1:
return "can't"
if lt[0][0] != "Expression" or newadd[0] != "RPARENT" or last[0] != "LPARENT" or last_of_last[0] == "RETURNTK":
return "can't"
i = 1
while i < len(lt):
match is_seq(lt[i:]):
case "doit":
if newadd == "COMMA":
return "can't"
return "doit", ("ValueParameterList", ""), False
case "full":
i += 2
case "not":
if lt[i][0] == "RPARENT":
return "part", ("ValueParameterList", ""), i, False
return "can't"
case "can't":
return "can't"
return "can't"
# <语句列> ::= {<语句>}
def StatementList(self, lt: list[(str, str)], newadd: (str, str), last: (str, str)) -> (str, (str, str), int):
if len(lt) < 1:
return "can't"
if last[0] != "LBRACE":
return "can't"
for i in range(len(lt)):
if lt[i][0] != "Statement":
if lt[i][0] == "RBRACE":
return "part", ("StatementList", ""), i
return "can't"
if newadd[0] == "RBRACE":
return "doit", ("StatementList", ""), i
else:
return "can't"
# <读语句> ::= scanf '('<标识符>{,<标识符>}')'
def ReadStatement(self, lt: list[(str, str)]) -> (str, (str, str), int):
if len(lt) < 4:
return "can't"
if lt[0][0] != "SCANFTK":
return "can't"
if lt[0][0] == "SCANFTK" and lt[1][0] == "LPARENT" and self.is_IDENFR(lt[2]):
for i in range(3, len(lt)):
if not self.is_IDENFR(lt[i]):
if lt[i] == "RPARENT":
if i == len(lt) - 1:
return "doit", ("ReadStatement", "")
return "part", ("ReadStatement", ""), i
return "can't"
return "can't"
# <写语句> ::= printf '(' <字符串>,<表达式> ')'| printf '('<字符串> ')'| printf '('<表达式>')'
def WriteStatement(self, lt: list[(str, str)]) -> (str, (str, str), int, bool):
if len(lt) < 4:
return "can't"
if lt[0][0] != "PRINTFTK":
return "can't"
if lt[0][0] == "PRINTFTK" and lt[1][0] == "LPARENT" and (lt[2][0] == "String" or lt[2][0] == "Expression") and \
lt[3][0] == "RPARENT":
if len(lt) == 4:
return "doit", ("WriteStatement", ""), False
else:
return "part", ("WriteStatement", ""), 4, False
if len(lt) >= 6 and lt[0][0] == "PRINTFTK" and lt[1][0] == "LPARENT" and lt[2][0] == "String" and lt[3][
0] == "COMMA" and lt[4][0] == "Expression" and lt[5][0] == "RPARENT":
if len(lt) == 6:
return "doit", ("WriteStatement", ""), False
else:
return "part", ("WriteStatement", ""), 6, False
return "can't"
# <返回语句> ::= return['('<表达式>')']
def ReturnStatement(self, lt: list[(str, str)], addnew: (str, str)) -> (str, (str, str), int, bool):
if len(lt) < 1:
return "can't"
if lt[0][0] != "RETURNTK":
return "can't"
if len(lt) == 1:
if addnew[0] == "LPARENT":
return "can't"
return "doit", ("ReturnStatement", ""), False
if len(lt) == 4 and lt[1][0] == "LPARENT" and lt[2][0] == "Expression" and lt[3][0] == "RPARENT":
return "doit", ("ReturnStatement", ""), False
return "can't"
class GrammaticalAnalysis:
keyword = {
"const": "CONSTTK",
"int": "INTTK",
"char": "CHARTK",
"void": "VOIDTK",
"main": "MAINTK",
"if": "IFTK",
"else": "ELSETK",
"do": "DOTK",
"while": "WHILETK",
"for": "FORTK",
"continue": "CONTINUETK",
"break": "BREAKTK",
"scanf": "SCANFTK",
"printf": "PRINTFTK",
"return": "RETURNTK"}
symbol = {
"+": "PLUS",
"-": "MINU",
"*": "MULT",
"/": "DIV",
"%": "MOD",
"&&": "AND",
"||": "OR",
",": "COMMA",
";": "SEMICN",
"(": "LPARENT",
")": "RPARENT",
"[": "LBRACK",
"]": "RBRACK",
"{": "LBRACE",
"}": "RBRACE",
"=": "ASSIGN",
"<": "LSS",
"<=": "LEQ",
">": "GRE",
">=": "GEQ",
"!=": "NEQ",
"==": "EQL",
"!": "NOT"
}
regex = {
r'^[a-zA-Z][a-zA-Z0-9]*$': "IDENFR",
r'^\d+$': "INTCON",
r'^".*"$': "STRCON",
r"^'.'$": "CHARCON"
}
yacc = Yacc()
patterns = []
seq = 0
final_ans = []
def __init__(self):
self.patterns = [method for method in dir(Yacc) if
callable(getattr(Yacc, method)) and not method.startswith("__") and not method.startswith(
"PartOf") and not method.startswith("is_")]
def readfile(self, filename: str) -> str:
s = ''
with open(filename, 'r', encoding='utf-8') as f:
s = ''.join(f.readlines())
return s
def strsplit(self, s: str) -> list[str]:
lt = []
l = 0
r = 1
while r < len(s):
if not (s[r].isalpha() or s[r].isdigit()):
k = s[l:r].strip()
if len(k) != 0:
lt.append(k)
k = s[r].strip()
if len(k) != 0:
lt.append(k)
l = r + 1
r = l
else:
r += 1
if l < len(s):
lt.append(s[l:r])
i = 0
# 修整 符号配对 + 字符串查找
while i < len(lt) - 1:
if lt[i] in self.symbol and lt[i] + lt[i + 1] in self.symbol:
lt[i] = lt[i] + lt[i + 1]
lt.pop(i + 1)
elif lt[i] == '"':
i += 1
while i < len(lt):
lt[i - 1] += " " + lt[i]
lt.pop(i)
if lt[i - 1][-1] == '"':
i -= 1
break
elif lt[i] == '\'':
if i + 2 < len(lt) and lt[i + 2] == '\'':
lt[i] += lt[i + 1] + lt[i + 2]
lt.pop(i + 1)
lt.pop(i + 1)
i += 1
return lt
def final_ans_insert(self, lt: list[int], newadd: (int, (str, str)), delete: bool = True,
force_del: list[int] = [], cascade_delete: bool = True) -> None:
i = 0
def cas_del(t: int):
dele = 0
while t != -1:
for k in self.final_ans:
if k[0] == t:
if k[1][0] not in ['Factor', 'Term', 'Expression']:
t = -1
break
t = k[2] if len(k) >= 3 and cascade_delete else -1
dele += 1
self.final_ans.remove(k)
break
return dele
while i < len(self.final_ans) and len(lt) > 0:
if self.final_ans[i][0] in lt:
seq = self.final_ans[i][0]
if len(lt) == 1:
if delete == False:
self.final_ans.insert(i + 1, (newadd[0], newadd[1], lt[0]))
else:
self.final_ans.insert(i + 1, newadd)
if (delete and self.final_ans[i][1][0] in ['Factor', 'Term', 'Expression']) or \
self.final_ans[i][0] in force_del:
i -= cas_del(self.final_ans[i][0])
lt.remove(seq)
i += 1
# 规约!!
def reduce(self, ans: list[(int, (str, str), any)], newadd: (str, str)) -> list[(int, str)]:
changed = True
while changed:
changed = False
for i in range(len(ans)):
no_index_ans = [x[1] for x in ans]
tokens = no_index_ans[i:]
redo = False
for method_name in self.patterns:
method = getattr(self.yacc, method_name)
if callable(method):
result = []
r = len(inspect.getfullargspec(method).args)
match r:
case 5:
result = method(tokens, newadd, (-1, -1) if i == 0 else no_index_ans[i - 1],
(-1, -1) if i <= 1 else no_index_ans[i - 2])
case 4:
result = method(tokens, newadd, (-1, -1) if i == 0 else no_index_ans[i - 1])
case 3:
result = method(tokens, newadd)
case 2:
result = method(tokens)
match result[0]:
case "doit":
# print(f"<{self.yacc.translation_dict[result[1][0]]}>")
match len(result):
case 5:
self.final_ans_insert([x[0] for x in ans[i:]], (self.seq, result[1]), result[2],
[ans[i + x][0] for x in result[3]], result[4])
case 4:
self.final_ans_insert([x[0] for x in ans[i:]], (self.seq, result[1]), result[2],
[ans[i + x][0] for x in result[3]])
case 3:
self.final_ans_insert([x[0] for x in ans[i:]], (self.seq, result[1]), result[2])
case 2:
self.final_ans_insert([x[0] for x in ans[i:]], (self.seq, result[1]))
case _:
assert False
ans = ans[:i]
ans.append((self.seq, result[1]))
self.seq += 1
changed = True
redo = True
break
case "part":
# print(f"<{self.yacc.translation_dict[result[1][0]]}>")
match len(result):
case 6:
self.final_ans_insert([x[0] for x in ans[i:i + result[2]]],
(self.seq, result[1]), result[3], result[4], result[5])
case 5:
self.final_ans_insert([x[0] for x in ans[i:i + result[2]]],
(self.seq, result[1]), result[3], result[4])
case 4:
self.final_ans_insert([x[0] for x in ans[i:i + result[2]]],
(self.seq, result[1]), result[3])
case 3:
self.final_ans_insert([x[0] for x in ans[i:i + result[2]]],
(self.seq, result[1]))
case _:
assert False
# 到i截止
temp = ans[i + result[2]:]
ans = ans[:i]
ans.append((self.seq, result[1]))
self.seq += 1
ans += temp
changed = True
redo = True
break
case "can't":
pass
if redo:
break
return ans
def WordAnalyze(self, lt: list[str]) -> list[(int, str)]:
ans = []
i = 0
while i < len(lt):
s = lt[i]
if s in self.keyword:
temp = (self.keyword[s], s)
ans = self.reduce(ans, temp)
ans.append((self.seq, temp))
self.final_ans.append((self.seq, temp))
self.seq += 1
# print(self.keyword[s], s)
elif s in self.symbol:
temp = (self.symbol[s], s)
ans = self.reduce(ans, temp)
ans.append((self.seq, temp))
self.final_ans.append((self.seq, temp))
self.seq += 1
# print(self.symbol[s], s)
else:
no_answer = True
for key, value in self.regex.items():
if re.match(key, s):
if value == 'CHARCON' or value == 'STRCON':
s = s[1:-1]
temp = (value, s)
ans = self.reduce(ans, temp)
ans.append((self.seq, temp))
self.final_ans.append((self.seq, temp))
self.seq += 1
# print(value, s)
no_answer = False
break
if no_answer:
ans.append(('error', 'error'))
self.final_ans.append((self.seq, temp))
i += 1
ans = self.reduce(ans, ('', ''))
return ans
def result_print(self) -> None:
with open('output.txt', 'w', encoding='utf-8') as f:
for vals in self.final_ans:
vals = vals[1]
if vals[0] in self.symbol.values() or vals[0] in self.keyword.values() or vals[
0] in self.regex.values():
print(vals[0], vals[1])
f.write(f'{vals[0]} {vals[1]}\n')
else:
print(f"<{self.yacc.translation_dict[vals[0]]}>")
f.write(f"<{self.yacc.translation_dict[vals[0]]}>\n")
return
def f(self) -> None:
s = self.readfile('testfile.txt')
s = self.strsplit(s)
s = self.WordAnalyze(s)
self.result_print()
print(s)
GrammaticalAnalysis().f()