1158 lines
51 KiB
Python
1158 lines
51 KiB
Python
import inspect
|
||
import re
|
||
|
||
|
||
class Yacc:
|
||
translation_dict = {
|
||
"String": "字符串",
|
||
"Program": "程序",
|
||
"ConstantDeclaration": "常量说明",
|
||
"ConstantDefinition": "常量定义",
|
||
"FunctionDefinitionWithReturnValue": "有返回值函数定义",
|
||
"FunctionDefinitionWithoutReturnValue": "无返回值函数定义",
|
||
"UnsignedInteger": "无符号整数",
|
||
"Integer": "整数",
|
||
"DeclarationHeader": "声明头部",
|
||
"VariableDeclaration": "变量说明",
|
||
"VariableDefinition": "变量定义",
|
||
"CompoundStatement": "复合语句",
|
||
"ParameterList": "参数表",
|
||
"MainFunction": "主函数",
|
||
"Expression": "表达式",
|
||
"Term": "项",
|
||
"Factor": "因子",
|
||
"Statement": "语句",
|
||
"AssignmentStatement": "赋值语句",
|
||
"ConditionalStatement": "条件语句",
|
||
"Condition": "条件",
|
||
"LoopStatement": "循环语句",
|
||
"Step": "步长",
|
||
"FunctionCallWithReturnValue": "有返回值函数调用语句",
|
||
"FunctionCallWithoutReturnValue": "无返回值函数调用语句",
|
||
"ValueParameterList": "值参数表",
|
||
"StatementList": "语句列",
|
||
"ReadStatement": "读语句",
|
||
"WriteStatement": "写语句",
|
||
"ReturnStatement": "返回语句"
|
||
}
|
||
|
||
def is_INTTK_CHARTK(self, t: (str, str)) -> bool:
|
||
if t[0] in ['INTTK', 'CHARTK']:
|
||
return True
|
||
if t[0] == "Factor" and t[1] in ['INTCON', 'CHARCON']:
|
||
return True
|
||
if t[0] == "Term" and t[1] in ['INTCON', 'CHARCON']:
|
||
return True
|
||
return False
|
||
|
||
def is_IDENFR(self, t: (str, str)) -> bool:
|
||
if t[0] == 'IDENFR':
|
||
return True
|
||
if t[0] == "Factor" and t[1] == "IDENFR":
|
||
return True
|
||
if t[0] == "Term" and t[1] == "IDENFR":
|
||
return True
|
||
if t[0] == "Expression" and t[1] == "IDENFR":
|
||
return True
|
||
return False
|
||
|
||
def is_Integer_CHARCON(self, t: (str, str)) -> bool:
|
||
if t[0] == 'Integer' or t[0] == 'CHARCON':
|
||
return True
|
||
if t[0] == 'Factor' and (t[1] == 'Integer' or t[1] == 'CHARCON'):
|
||
return True
|
||
if t[0] == 'Term' and (t[1] == 'Integer' or t[1] == 'CHARCON'):
|
||
return True
|
||
if t[0] == 'Expression' and (t[1] == 'Integer' or t[1] == 'CHARCON'):
|
||
return True
|
||
return False
|
||
|
||
def is_Term(self, t: (str, str)) -> bool:
|
||
if t[0] == 'Term':
|
||
return True
|
||
if t[0] == 'Expression' and t[1] == 'Term':
|
||
return True
|
||
return False
|
||
|
||
def is_RelationOperator(self, t: (str, str)) -> bool:
|
||
if t[0] in ['LSS', 'LEQ', 'GRE', 'GEQ', 'NEQ', 'EQL']:
|
||
return True
|
||
return False
|
||
|
||
def is_UnsignedInteger(self, t: (str, str)) -> bool:
|
||
if t[0] == 'UnsignedInteger':
|
||
return True
|
||
if t[0] == 'Integer' and t[1] >= 0:
|
||
return True
|
||
if t[0] == 'Factor' and t[1] == 'UnsignedInteger':
|
||
return True
|
||
if t[0] == 'Term' and t[1] == 'UnsignedInteger':
|
||
return True
|
||
if t[0] == 'Expression' and t[1] == 'UnsignedInteger':
|
||
return True
|
||
return False
|
||
|
||
# <字符串> ::= "{十进制编码为32,33,35‐126的ASCII字符}"
|
||
def String(self, lt: list[(str, str)]) -> (str, (str, str), int):
|
||
regex = r'[ -!#-~]*'
|
||
if len(lt) == 1 and lt[0][0] == "STRCON" and re.match(regex, lt[0][1]):
|
||
return "doit", ("String", lt[0][1])
|
||
return "can't"
|
||
|
||
# <程序> ::= [<常量说明>][<变量说明>]{<有返回值函数定义>|<无返回值函数定义>} <主函数>
|
||
def Program(self, lt: list[(str, str)]) -> (str, (str, str), int):
|
||
if len(lt) < 2:
|
||
return "can't"
|
||
if len(lt) == 2 and (
|
||
lt[0][0] == 'FunctionDefinitionWithReturnValue' or lt[0][
|
||
0] == 'FunctionDefinitionWithoutReturnValue') and lt[1][
|
||
0] == 'MainFunction':
|
||
return "doit", ("Program", "")
|
||
if lt[0][0] == 'ConstantDeclaration' and lt[1][0] == 'VariableDeclaration':
|
||
return self.Program(lt[2:])
|
||
if lt[0][0] == 'ConstantDefinition' or lt[1][0] == 'VariableDefinition':
|
||
return self.Program(lt[1:])
|
||
return "can't"
|
||
|
||
# 部分 <常量说明>: := const<常量定义>;{const<常量定义>;}
|
||
def PartOfConstantDeclaration(self, t: (str, str)) -> bool:
|
||
pt = ['CONSTTK', 'ConstantDefinition', 'SEMICN']
|
||
return t[0] in pt or self.PartOfConstantDefinition(t)
|
||
|
||
# <常量说明>: := const<常量定义>;{const<常量定义>;}
|
||
def ConstantDeclaration(self, lt: list[(str, str)], newadd: (str, str)) -> (str, (str, str), int):
|
||
if lt[0][0] != 'CONSTTK' or len(lt) < 3:
|
||
return "can't"
|
||
newadd = newadd[0]
|
||
if lt[0][0] == 'CONSTTK' and lt[1][0] == 'ConstantDefinition' and lt[2][0] == 'SEMICN':
|
||
i = 3
|
||
while i < len(lt):
|
||
if lt[i][0] != 'CONSTTK':
|
||
return "part", ("ConstantDeclaration", ""), i
|
||
if i + 1 == len(lt):
|
||
if newadd == 'ConstantDefinition' or self.PartOfConstantDefinition((newadd, "")):
|
||
return "can't"
|
||
else:
|
||
return "part", ("ConstantDeclaration", ""), i
|
||
elif lt[i + 1][0] != 'ConstantDefinition':
|
||
if self.PartOfConstantDefinition(lt[i + 1]):
|
||
return "can't"
|
||
else:
|
||
return "part", ("ConstantDeclaration", ""), i
|
||
if i + 2 == len(lt):
|
||
if newadd == 'SEMICN':
|
||
return "can't"
|
||
else:
|
||
return "part", ("ConstantDeclaration", ""), i
|
||
elif lt[i + 2][0] != 'SEMICN':
|
||
return "part", ("ConstantDeclaration", ""), i
|
||
i += 3
|
||
if newadd == 'CONSTTK':
|
||
return "can't"
|
||
else:
|
||
return "doit", ("ConstantDeclaration", "")
|
||
return "can't"
|
||
|
||
# 部分 <常量定义>: := int<标识符>=<整数>{,<标识符>=<整数>} | char<标识符>=<字符>{,<标识符>=<字符>}
|
||
def PartOfConstantDefinition(self, t: (str, str)) -> bool:
|
||
pt = ['INTTK', 'CHARTK', 'IDENFR', 'ASSIGN', 'INTCON', 'COMMA', 'CHARCON']
|
||
return t[0] in pt
|
||
|
||
# <常量定义>: := int<标识符>=<整数>{,<标识符>=<整数>} | char<标识符>=<字符>{,<标识符>=<字符>}
|
||
def ConstantDefinition(self, lt: list[(str, str)], newadd: (str, str)) -> (str, (str, str), int):
|
||
if (not (self.is_INTTK_CHARTK(lt[0]))) or len(lt) < 4:
|
||
return "can't"
|
||
newadd = newadd[0]
|
||
if self.is_INTTK_CHARTK(lt[0]) and self.is_IDENFR(lt[1]) and lt[2][0] == 'ASSIGN' and self.is_Integer_CHARCON(
|
||
lt[3]):
|
||
i = 4
|
||
while i < len(lt):
|
||
if lt[i][0] != 'COMMA':
|
||
return "part", ("ConstantDefinition", ""), i
|
||
if i + 1 == len(lt):
|
||
if newadd == 'IDENFR':
|
||
return "can't"
|
||
else:
|
||
return "part", ("ConstantDefinition", ""), i
|
||
elif not self.is_IDENFR(lt[i + 1]):
|
||
return "part", ("ConstantDefinition", ""), i
|
||
if i + 2 == len(lt):
|
||
if newadd == 'ASSIGN':
|
||
return "can't"
|
||
else:
|
||
return "part", ("ConstantDefinition", ""), i
|
||
elif lt[i + 2][0] != 'ASSIGN':
|
||
return "part", ("ConstantDefinition", ""), i
|
||
if i + 3 == len(lt):
|
||
if newadd == 'Integer' or newadd == 'CHARCON' or self.PartOfInteger((newadd, "")):
|
||
return "can't"
|
||
else:
|
||
return "part", ("ConstantDefinition", ""), i
|
||
elif not self.is_Integer_CHARCON(lt[i + 3]):
|
||
if self.PartOfInteger(lt[i + 3]):
|
||
return "can't"
|
||
return "part", ("ConstantDefinition", ""), i
|
||
i += 4
|
||
if newadd == 'COMMA':
|
||
return "can't"
|
||
return "doit", ("ConstantDefinition", "")
|
||
return "can't"
|
||
|
||
# 部分 <无符号整数>: := <非零数字>{<数字>} | 0
|
||
def PartOfUnsignedInteger(self, t: (str, str)) -> bool:
|
||
return t[0] == 'INTCON'
|
||
|
||
# <无符号整数>: := <非零数字>{<数字>} | 0
|
||
def UnsignedInteger(self, lt: list[(str, str)]) -> (str, (str, str), int):
|
||
if len(lt) < 1:
|
||
return "can't"
|
||
if lt[0][0] == 'INTCON':
|
||
if lt[0][1][0] == '0' and len(lt[0][1]) != 1:
|
||
return "can't"
|
||
return "doit", ("UnsignedInteger", lt[0][1])
|
||
return "can't"
|
||
|
||
# 部分 <整数> ::= [+|-]<无符号整数>
|
||
def PartOfInteger(self, t: (str, str)) -> bool:
|
||
pt = ['UnsignedInteger', 'PLUS', 'MINU']
|
||
return t[0] in pt or self.PartOfUnsignedInteger(t)
|
||
|
||
# <整数> ::= [+|-]<无符号整数>
|
||
def Integer(self, lt: list[(str, str)]) -> (str, (str, str), int):
|
||
if len(lt) < 1:
|
||
return "can't"
|
||
if lt[0][0] == 'UnsignedInteger':
|
||
return "doit", ("Integer", lt[0][1])
|
||
if len(lt) >= 2 and lt[0][0] in ["MINU", "PLUS"] and lt[1][0] == 'UnsignedInteger':
|
||
if len(lt) == 2:
|
||
return "doit", ('Integer', (-1 if lt[0][0] == "MINU" else 1) * lt[1][1])
|
||
else:
|
||
return "part", ("Integer", (-1 if lt[0][0] == "MINU" else 1) * lt[1][1]), 2
|
||
return "can't"
|
||
|
||
# 部分 <声明头部> ::= int<标识符> |char<标识符>
|
||
def PartOfDeclarationHeader(self, t: (str, str)) -> bool:
|
||
pt = ['INTTK', 'CHARTK']
|
||
return t[0] in pt or self.is_IDENFR(t)
|
||
|
||
# <声明头部> ::= int<标识符> |char<标识符>
|
||
def DeclarationHeader(self, lt: list[(str, str)], newadd: (str, str)) -> (str, (str, str), int):
|
||
if len(lt) < 2:
|
||
return "can't"
|
||
newadd = newadd[0]
|
||
if (lt[0][0] == 'INTTK' or lt[0][0] == 'CHARTK') and self.is_IDENFR(lt[1]):
|
||
if len(lt) == 2:
|
||
if newadd == 'LPARENT':
|
||
return "doit", ("DeclarationHeader", "")
|
||
elif lt[2][0] == 'LPARENT':
|
||
return "doit", ("DeclarationHeader", "")
|
||
return "can't"
|
||
|
||
# <变量说明>: := <变量定义>;{<变量定义>;}
|
||
def VariableDeclaration(self, lt: list[(str, str)], newadd: (str, str)) -> (str, (str, str), int):
|
||
def is_seq(lt: list[(str, str)]) -> str:
|
||
if len(lt) == 1 and lt[0][0] == 'VariableDefinition':
|
||
return "can't"
|
||
if lt[0][0] == 'VariableDefinition' and lt[1][0] == 'SEMICN':
|
||
if len(lt) == 2:
|
||
return "doit"
|
||
else:
|
||
return "part"
|
||
if lt[0][0] != 'VariableDefinition' or lt[1][0] != 'SEMICN':
|
||
if self.PartOfTYPEIDENFR(lt[0][0]):
|
||
return "can't"
|
||
return "not"
|
||
|
||
if len(lt) < 2:
|
||
return "can't"
|
||
if lt[0][0] != 'VariableDefinition' or lt[1][0] != 'SEMICN':
|
||
return "can't"
|
||
newadd = newadd[0]
|
||
i = 2
|
||
while i < len(lt):
|
||
match is_seq(lt[i:]):
|
||
case "doit":
|
||
if newadd == 'VariableDefinition' or self.PartOfVariableDefinition((newadd, "")):
|
||
return "can't"
|
||
else:
|
||
return "doit", ("VariableDeclaration", "")
|
||
case "part":
|
||
i += 2
|
||
case "not":
|
||
if self.PartOfVariableDefinition((newadd, "")) or newadd == 'SEMICN':
|
||
return "can't"
|
||
return "part", ("VariableDeclaration", ""), i
|
||
case "can't":
|
||
return "can't"
|
||
if newadd == 'VariableDefinition' or self.PartOfVariableDefinition((newadd, "")):
|
||
return "can't"
|
||
else:
|
||
return "doit", ("VariableDeclaration", "")
|
||
|
||
# 部分 <类型标识符> ::= int | char
|
||
def PartOfTYPEIDENFR(self, t: (str, str)) -> bool:
|
||
lt = ['INTTK', 'CHARTK']
|
||
return t[0] in lt
|
||
|
||
# 部分 <变量定义> ::= <类型标识符>(<标识符>|<标识符>'['<无符号整数>']'){,(<标识符>|<标识符>'['<无符号整数>']' )} //<无符号整数>表示数组元素的个数,其值需大于0
|
||
def PartOfVariableDefinition(self, t: (str, str)) -> bool:
|
||
lt = ['UnsignedInteger', 'LBRACK', 'RBRACK']
|
||
return t in lt or self.is_IDENFR(t) or self.PartOfTYPEIDENFR(t)
|
||
|
||
# <变量定义> ::= <类型标识符>(<标识符>|<标识符>'['<无符号整数>']'){,(<标识符>|<标识符>'['<无符号整数>']' )} //<无符号整数>表示数组元素的个数,其值需大于0
|
||
def VariableDefinition(self, lt: list[(str, str)], newadd: (str, str), last: (str, str)) -> (str, (str, str), int):
|
||
def is_seq(lt: list[(str, str)]) -> str: # 3 for long_assign 2 for short_assign, 1 for part, 0 for not
|
||
ls = ['IDENFR', 'LBRACK', 'UnsignedInteger', 'RBRACK']
|
||
if self.is_IDENFR(lt[0]):
|
||
return 'not'
|
||
if len(lt) == 1 and self.is_IDENFR(lt[0]):
|
||
return "doit"
|
||
if len(lt) == 4 and self.is_IDENFR(lt[0]) and lt[1][0] == 'LBRACK' and self.is_UnsignedInteger(lt[2]) and \
|
||
lt[3][0] == 'RBRACK':
|
||
return "doit"
|
||
if len(lt) >= 4 and self.is_IDENFR(lt[0]) and lt[1][0] == 'LBRACK' and self.is_UnsignedInteger(lt[2]) and \
|
||
lt[3][0] == 'RBRACK':
|
||
return "part2"
|
||
if len(lt) >= 1 and self.is_IDENFR(lt[0]) and lt[1][0] != 'LBRACK':
|
||
return "part1"
|
||
bl = True
|
||
for i in range(len(lt)):
|
||
if lt[i][0] != ls[i]:
|
||
bl = False
|
||
break
|
||
if bl:
|
||
return "can't"
|
||
else:
|
||
return "not"
|
||
|
||
if len(lt) < 2 or (lt[0][0] != 'INTTK' and lt[0][0] != 'CHARTK'):
|
||
return "can't"
|
||
if last[0] == 'CONSTTK' or last[0] == 'COMMA' or last[0] == 'LPARENT':
|
||
return "can't"
|
||
|
||
newadd = newadd[0]
|
||
i = 0
|
||
if len(lt) >= 5:
|
||
if (lt[0][0] == 'INTTK' or lt[0][0] == 'CHARTK') and self.is_IDENFR(lt[1]) and lt[2][0] == 'LBRACK' and \
|
||
self.is_UnsignedInteger(lt[3]) and lt[4][0] == 'RBRACK':
|
||
i = 5
|
||
elif (lt[0][0] == 'INTTK' or lt[0][0] == 'CHARTK') and self.is_IDENFR(lt[1]):
|
||
i = 2
|
||
while i < len(lt):
|
||
if lt[i][0] != 'COMMA':
|
||
return "part", ("VariableDefinition", ""), i
|
||
i += 1
|
||
while i < len(lt):
|
||
match is_seq(lt[i:]):
|
||
case "doit":
|
||
if newadd == 'COMMA':
|
||
return "can't"
|
||
else:
|
||
return "doit", ("VariableDefinition", "")
|
||
case "part1":
|
||
if lt[i + 1] == 'COMMA':
|
||
i = i + 2
|
||
else:
|
||
return "part", ("VariableDefinition", ""), i + 1
|
||
case "part2":
|
||
if lt[i + 4] == 'COMMA':
|
||
i = i + 5
|
||
else:
|
||
return "part", ("VariableDefinition", ""), i + 4
|
||
case "can't":
|
||
return "can't"
|
||
case "not":
|
||
return "part", ("VariableDefinition", ""), i - 1
|
||
if newadd == 'IDENFR':
|
||
return "can't"
|
||
else:
|
||
return "part", ("VariableDefinition", ""), i - 1
|
||
if newadd in ['COMMA', 'LBRACK']:
|
||
return "can't"
|
||
else:
|
||
return "doit", ("VariableDefinition", "")
|
||
|
||
# <有返回值函数定义> ::= <声明头部>'('<参数表>')' '{'<复合语句>'}'
|
||
def FunctionDefinitionWithReturnValue(self, lt: list[(str, str)]) -> (str, (str, str), int):
|
||
if len(lt) < 7:
|
||
return "can't"
|
||
if lt[0][0] == 'DeclarationHeader' and lt[1][0] == 'LPARENT' and lt[2][0] == 'ParameterList' and lt[3][
|
||
0] == 'RPARENT' and lt[4][0] == 'LBRACE' and lt[5][0] == 'CompoundStatement' and lt[6][0] == 'RBRACE':
|
||
if len(lt) == 7:
|
||
return "doit", ("FunctionDefinitionWithReturnValue", "")
|
||
else:
|
||
return "part", ("FunctionDefinitionWithReturnValue", ""), 7
|
||
return "can't"
|
||
|
||
# <无返回值函数定义> ::= void<标识符>'('<参数表>')''{'<复合语句>'}'
|
||
def FunctionDefinitionWithoutReturnValue(self, lt: list[(str, str)]) -> (str, (str, str), int):
|
||
if len(lt) < 8:
|
||
return "can't"
|
||
if lt[0][0] == 'VOIDTK' and self.is_IDENFR(lt[1]) and lt[2][0] == 'LPARENT' and lt[3][0] == 'ParameterList' and \
|
||
lt[4][0] == 'RPARENT' and lt[5][0] == 'LBRACE' and lt[6][0] == 'CompoundStatement' and lt[7][
|
||
0] == 'RBRACE':
|
||
if len(lt) == 8:
|
||
return "doit", ("FunctionDefinitionWithoutReturnValue", "")
|
||
else:
|
||
return "part", ("FunctionDefinitionWithoutReturnValue", ""), 8
|
||
return "can't"
|
||
|
||
# <复合语句> ::= [<常量说明>][<变量说明>]<语句列>
|
||
def CompoundStatement(self, lt: list[(str, str)]) -> (str, (str, str), int):
|
||
if len(lt) < 1:
|
||
return "can't"
|
||
if lt[0][0] == 'StatementList':
|
||
if len(lt) == 1:
|
||
return "doit", ("CompoundStatement", "")
|
||
else:
|
||
return "part", ("CompoundStatement", ""), 1
|
||
if len(lt) < 2:
|
||
return "can't"
|
||
if (lt[0][0] == 'ConstantDeclaration' or lt[0][0] == 'VariableDeclaration') and lt[1][0] == 'StatementList':
|
||
if len(lt) == 2:
|
||
return "doit", ("CompoundStatement", "")
|
||
else:
|
||
return "part", ("CompoundStatement", ""), 2
|
||
if len(lt) < 3:
|
||
return "can't"
|
||
if lt[0][0] == 'ConstantDeclaration' and lt[1][0] == 'VariableDeclaration' and lt[2][0] == 'StatementList':
|
||
if len(lt) == 3:
|
||
return "doit", ("CompoundStatement", "")
|
||
else:
|
||
return "part", ("CompoundStatement", ""), 3
|
||
return "can't"
|
||
|
||
# <参数表> ::= <类型标识符><标识符>{,<类型标识符><标识符>}| <空>
|
||
def ParameterList(self, lt: list[(str, str)], newadd: (str, str), last: (str, str)) -> (str, (str, str), int):
|
||
def is_seq(lt: list[(str, str)]) -> str:
|
||
ls = ['COMMA', ['INTTK', 'CHARTK'], 'IDENFR']
|
||
for i in range(len(lt)):
|
||
if i >= len(ls):
|
||
return "full"
|
||
if i == 1:
|
||
if lt[i][0] not in ls[i]:
|
||
return "not"
|
||
elif i == 2:
|
||
if not self.is_IDENFR(lt[i]):
|
||
return "not"
|
||
else:
|
||
if lt[i][0] != ls[i]:
|
||
return "not"
|
||
if len(lt) < 3:
|
||
return "can't"
|
||
else:
|
||
return "doit"
|
||
|
||
if len(lt) < 2 or last[0] != 'LPARENT' or newadd[0] != 'RPARENT':
|
||
return "can't"
|
||
|
||
if lt[0][0] not in ["INTTK", "CHARTK"] or not self.is_IDENFR(lt[1]):
|
||
return "can't"
|
||
i = 2
|
||
while i < len(lt):
|
||
match is_seq(lt[i:]):
|
||
case "doit":
|
||
return "doit", ("ParameterList", "")
|
||
case "can't":
|
||
return "can't"
|
||
case "not":
|
||
return "can't"
|
||
case "full":
|
||
i += 3
|
||
return "doit", ("ParameterList", "")
|
||
|
||
# <主函数> ::= void main‘(’‘)’ ‘{’<复合语句>‘}’
|
||
def MainFunction(self, lt: list[(str, str)]) -> (str, (str, str), int):
|
||
if len(lt) < 7:
|
||
return "can't"
|
||
if lt[0][0] == 'VOIDTK' and lt[1][0] == 'MAINTK' and lt[2][0] == 'LPARENT' and lt[3][0] == 'RPARENT' and \
|
||
lt[4][0] == 'LBRACE' and lt[5][0] == 'CompoundStatement' and lt[6][0] == 'RBRACE':
|
||
if len(lt) == 7:
|
||
return "doit", ("MainFunction", "")
|
||
else:
|
||
return "part", ("MainFunction", ""), 7
|
||
return "can't"
|
||
|
||
# <表达式>: := [+|-]<项>{<加法运算符><项>} // [+ |‐]只作用于第一个 < 项 >
|
||
def Expression(self, lt: list[(str, str)], newadd: (str, str)) -> (str, (str, str), int, bool, list[int], bool):
|
||
def is_seq(lt: list[(str, str)]) -> str:
|
||
ls = [['PLUS', 'MINU'], 'Term']
|
||
if len(lt) == 1 and lt[0][0] in ls[0]:
|
||
return "part"
|
||
if len(lt) >= 2 and lt[0][0] in ls[0] and lt[1][0] == ls[1]:
|
||
if len(lt) == 2:
|
||
return "doit"
|
||
if len(lt) > 2:
|
||
return "full"
|
||
if lt[0][0] not in ls[1]:
|
||
return "not"
|
||
if len(lt) >= 2 and lt[0][0] in ls[0] and lt[1][0] != ls[1]:
|
||
return "not"
|
||
|
||
if len(lt) < 1:
|
||
return "can't"
|
||
i = 0
|
||
if len(lt) >= 2 and lt[0][0] in ['PLUS', 'MINU'] and self.is_Term(lt[1]):
|
||
i = 2
|
||
elif self.is_Term(lt[0]):
|
||
i = 1
|
||
else:
|
||
return "can't"
|
||
newadd = newadd[0]
|
||
while i < len(lt):
|
||
match is_seq(lt[i:]):
|
||
case "doit":
|
||
if newadd in ["PLUS", "MINU"]:
|
||
return "can't"
|
||
else:
|
||
return "doit", ("Expression", "Term"), False, [t for t in range(i + 2) if
|
||
lt[t][0] == "Expression"], False
|
||
case "part":
|
||
if newadd == "Term" or (i == 1 and lt[0][0] == "Expression"):
|
||
return "can't"
|
||
else:
|
||
return "part", ("Expression", "Term"), i, False
|
||
case "full":
|
||
i += 2
|
||
case "not":
|
||
if i == 1 and lt[0][0] == "Expression":
|
||
return "can't"
|
||
return "part", ("Expression", "Term"), i, False, [t for t in range(i) if
|
||
lt[t][0] == "Expression"], False
|
||
if newadd in ["PLUS", "MINU"]:
|
||
return "can't"
|
||
else:
|
||
if lt[i - 1][0] == "Expression":
|
||
return "can't"
|
||
return "doit", ("Expression", lt[i - 1][1]), False
|
||
|
||
# <项>: := <因子>{<乘法运算符><因子>}
|
||
def Term(self, lt: list[(str, str)], newadd: (str, str)) -> (str, (str, str), int, bool):
|
||
def is_seq(lt: list[(str, str)]) -> str:
|
||
ls = ['Factor', ['MULT', 'DIV', 'MOD']]
|
||
if len(lt) == 1 and lt[0][0] == ls[0]:
|
||
return "part"
|
||
if len(lt) >= 2 and lt[0][0] == ls[0] and lt[1][0] in ls[1]:
|
||
if len(lt) == 2:
|
||
return "doit"
|
||
if len(lt) > 2:
|
||
return "full"
|
||
if lt[0][0] != ls[0]:
|
||
return "notA"
|
||
if len(lt) >= 2 and lt[0][0] == ls[0] and lt[1][0] not in ls[1]:
|
||
return "notB"
|
||
|
||
if len(lt) < 1:
|
||
return "can't"
|
||
i = 0
|
||
if lt[0][0] == 'Factor':
|
||
i = 1
|
||
else:
|
||
return "can't"
|
||
while i < len(lt):
|
||
match is_seq(lt[i:]):
|
||
case "doit":
|
||
if newadd in ['MULT', 'DIV', 'MOD']:
|
||
return "can't"
|
||
else:
|
||
return "doit", ("Term", ""), False
|
||
case "part":
|
||
if self.PartOfFactor((newadd, "")):
|
||
return "can't"
|
||
else:
|
||
return "part", ("Term", lt[0][1] if i == 1 else ""), i, False
|
||
case "full":
|
||
i += 2
|
||
case "notA":
|
||
if self.PartOfFactor((lt[i], "")):
|
||
return "can't"
|
||
return "part", ("Term", lt[0][1] if i == 1 else ""), i, False
|
||
case "notB":
|
||
return "part", ("Term", lt[0][1] if i == 1 else ""), i, False
|
||
if newadd in ['MULT', 'DIV', 'MOD']:
|
||
return "can't"
|
||
else:
|
||
return "doit", ("Term", lt[0][1]), False
|
||
|
||
# 部分 <因子>: := <标识符>|<标识符>'['<表达式>']' | '('<表达式>')'|<整数> | <字符>|<有返回值函数调用语句>
|
||
def PartOfFactor(self, t: (str, str)) -> bool:
|
||
pt = ['IDENFR', 'LPARENT', 'RPARENT', 'LBRACK', 'RBRACK', 'Integer', 'CHARCON',
|
||
'FunctionDefinitionWithReturnValue']
|
||
return t[0] in pt
|
||
|
||
# <因子>: := <标识符>|<标识符>'['<表达式>']' | '('<表达式>')'|<整数> | <字符>|<有返回值函数调用语句>
|
||
def Factor(self, lt: list[(str, str)]) -> (str, (str, str), int):
|
||
if len(lt) < 1:
|
||
return "can't"
|
||
if len(lt) > 4:
|
||
if self.is_IDENFR(lt[0]) and lt[1][0] == 'LBRACK' and lt[2][0] == 'Expression' and lt[3][0] == 'RBRACK':
|
||
if len(lt) == 4:
|
||
return "doit", ("Factor", "")
|
||
else:
|
||
return "part", ("Factor", ""), 4
|
||
elif len(lt) > 3:
|
||
if lt[0][0] == 'LPARENT' and lt[1][0] == 'Expression' and lt[2][0] == 'RPARENT':
|
||
if len(lt) == 3:
|
||
return "doit", ("Factor", "")
|
||
else:
|
||
return "part", ("Factor", ""), 3
|
||
elif lt[0][0] in ["IDENFR", "Integer", "CHARCON", "FunctionCallWithReturnValue"]:
|
||
if len(lt) == 1:
|
||
return "doit", ("Factor", lt[0][0])
|
||
return "part", ("Factor", lt[0][0]), 1
|
||
return "can't"
|
||
|
||
# 部分 <语句> ::= <条件语句>|<循环语句>| '{'<语句列>'}'| <有返回值函数调用语句>; |<无返回值函数调用语句>;|<赋值语句>;|<读语句>;|<写语句>;|<空>;|<返回语句>;
|
||
def PartOfStatement(self, t: (str, str)) -> bool:
|
||
pt = ['ConditionStatement', 'LoopStatement', 'LBRACE', 'StatementList', 'RBRACE', 'FunctionCallWithReturnValue',
|
||
'FunctionCallWithoutReturnValue', 'AssignmentStatement', 'ReadStatement', 'WriteStatement', 'SEMICN',
|
||
'ReturnStatement', 'SEMICN']
|
||
return t in pt
|
||
|
||
# <语句> ::= <条件语句>|<循环语句>| '{'<语句列>'}'| <有返回值函数调用语句>; |<无返回值函数调用语句>;|<赋值语句>;|<读语句>;|<写语句>;|<空>;|<返回语句>;
|
||
def Statement(self, lt: list[(str, str)], _: (str, str), last: (str, str), last_of_last: (str, str)) -> (
|
||
str, (str, str), int):
|
||
if len(lt) < 1:
|
||
return "can't"
|
||
if last_of_last[0] == "FORTK":
|
||
return "can't"
|
||
if lt[0][0] == 'ConditionStatement':
|
||
if len(lt) == 1:
|
||
return "doit", ("Statement", "")
|
||
else:
|
||
return "part", ("Statement", ""), 1
|
||
if lt[0][0] == 'LoopStatement':
|
||
if len(lt) == 1:
|
||
return "doit", ("Statement", "")
|
||
else:
|
||
return "part", ("Statement", ""), 1
|
||
if len(lt) > 2 and lt[0][0] == 'LBRACE' and lt[1][0] == 'StatementList' and lt[2][0] == 'RBRACE':
|
||
if len(lt) == 3:
|
||
return "doit", ("Statement", "")
|
||
else:
|
||
return "part", ("Statement", ""), 3
|
||
if len(lt) >= 2 and (
|
||
lt[0][0] == 'FunctionCallWithReturnValue' or lt[0][0] == 'FunctionCallWithoutReturnValue' or lt[0][
|
||
0] == 'AssignmentStatement' or lt[0][0] == 'ReadStatement' or lt[0][0] == 'WriteStatement' or lt[0][
|
||
0] == 'ReturnStatement') and lt[1][0] == 'SEMICN':
|
||
if len(lt) == 2:
|
||
return "doit", ("Statement", "")
|
||
else:
|
||
return "part", ("Statement", ""), 1
|
||
if lt[0][0] == 'SEMICN' and last[0] not in ["VariableDefinition", "ConstantDefinition", "Expression",
|
||
"ConditionStatement", "Condition"]:
|
||
if len(lt) == 1:
|
||
return "doit", ("Statement", "")
|
||
else:
|
||
return "part", ("Statement", ""), 1
|
||
return "can't"
|
||
|
||
# <赋值语句> ::= <标识符>=<表达式>|<标识符>'['<表达式>']'=<表达式>
|
||
def AssignmentStatement(self, lt: list[(str, str)], newadd: (str, str), last: (str, str),
|
||
last_of_last: (str, str)) -> (str, (str, str), int, bool, list[int]):
|
||
if len(lt) < 3 or self.PartOfTYPEIDENFR(last) or last_of_last[0] == "FORTK":
|
||
return "can't"
|
||
if self.is_IDENFR(lt[0]) and lt[1][0] == 'ASSIGN' and lt[2][0] == 'Expression':
|
||
if len(lt) == 3:
|
||
if newadd in ['PLUS', 'MINU']:
|
||
return "can't"
|
||
return "doit", ("AssignmentStatement", ""), False, ([0] if lt[0][0] != "IDENFR" else [])
|
||
else:
|
||
if lt[3][0] in ['PLUS', 'MINU']:
|
||
return "can't"
|
||
return "part", ("AssignmentStatement", ""), 3, False, ([0] if lt[0][0] != "IDENFR" else [])
|
||
if len(lt) >= 6 and self.is_IDENFR(lt[0]) and lt[1][0] == 'LBRACK' and lt[2][0] == 'Expression' and lt[3][
|
||
0] == 'RBRACK' and lt[4][0] == 'ASSIGN' and lt[5][0] == 'Expression':
|
||
if len(lt) == 6:
|
||
if newadd in ['PLUS', 'MINU']:
|
||
return "can't"
|
||
return "doit", ("AssignmentStatement", ""), False, ([0] if lt[0][0] != "IDENFR" else [])
|
||
else:
|
||
if lt[6][0] in ['PLUS', 'MINU']:
|
||
return "can't"
|
||
return "part", ("AssignmentStatement", ""), 6, False, ([0] if lt[0][0] != "IDENFR" else [])
|
||
return "can't"
|
||
|
||
# <条件语句> ::= if '('<条件>')'<语句>[else<语句>]
|
||
def ConditionalStatement(self, lt: list[(str, str)], newadd: (str, str)) -> (str, (str, str), int):
|
||
if len(lt) < 5:
|
||
return "can't"
|
||
if lt[0][0] != "IFTK" or lt[1][0] != "LPARENT" or lt[2][0] != "Condition" or lt[3][0] != "RPARENT" or lt[4][
|
||
0] != "Statement":
|
||
return "can't"
|
||
if len(lt) == 5:
|
||
if newadd == "ELSETK":
|
||
return "can't"
|
||
return "doit", ("ConditionalStatement", "")
|
||
if len(lt) >= 7 and lt[5][0] == "ELSETK" and lt[6][0] == "Statement":
|
||
if len(lt) == 7:
|
||
return "doit", ("ConditionalStatement", "")
|
||
else:
|
||
return "part", ("ConditionalStatement", ""), 7
|
||
if len(lt) == 6:
|
||
if lt[5][0] != "ELSETK":
|
||
return "part", ("ConditionalStatement", ""), 5
|
||
if newadd[0] == "Statement" or self.PartOfStatement(newadd):
|
||
return "can't"
|
||
return "part", ("ConditionalStatement", ""), 5
|
||
|
||
# <条件> ::= <表达式><关系运算符><表达式> |<表达式> //整型表达式之间才能进行关系运算 //表达式为整型,其值为0条件为假,值不为0时条件为真
|
||
def Condition(self, lt: list[(str, str)], newadd: (str, str), last: (str, str), last_of_last: (str, str)) -> (
|
||
str, (str, str), int):
|
||
if len(lt) < 1:
|
||
return "can't"
|
||
|
||
if not ((last[0] == 'LPARENT' and newadd[0] == 'RPARENT' and last_of_last[0] == "IFTK") or (
|
||
last[0] == 'LPARENT' and newadd[0] == 'RPARENT' and last_of_last[0] == "WHILETK") or (
|
||
last[0] == 'SEMICN' and newadd[0] == 'SEMICN' and last_of_last[0] == "Expression")):
|
||
return "can't"
|
||
if lt[0][0] == "Expression":
|
||
if len(lt) == 1:
|
||
return "doit", ("Condition", "")
|
||
if len(lt) == 3 and self.is_RelationOperator(lt[1]) and lt[2][0] == "Expression":
|
||
return "doit", ("Condition", "")
|
||
return "can't"
|
||
return "can't"
|
||
|
||
# <循环语句> ::= while '('<条件>')'<语句>| do<语句>while '('<条件>')' |for'('<标识符>=<表达式>;<条件>;<标识符>=<标识符>(+|‐)<步长>')'<语句>
|
||
def LoopStatement(self, lt: list[(str, str)]) -> (str, (str, str), int):
|
||
if len(lt) < 5:
|
||
return "can't"
|
||
if lt[0][0] == "WHILETK" and lt[1][0] == "LPARENT" and lt[2][0] == "Condition" and lt[3][0] == "RPARENT" and \
|
||
lt[4][0] == "Statement":
|
||
if len(lt) == 5:
|
||
return "doit", ("LoopStatement", "")
|
||
else:
|
||
return "part", ("LoopStatement", ""), 5
|
||
if len(lt) < 6:
|
||
return "can't"
|
||
if lt[0][0] == "DOTK" and lt[1][0] == "Statement" and lt[2][0] == "WHILETK" and lt[3][0] == "LPARENT" and lt[4][
|
||
0] == "Condition" and lt[5][0] == "RPARENT":
|
||
if len(lt) == 6:
|
||
return "doit", ("LoopStatement", "")
|
||
else:
|
||
return "part", ("LoopStatement", ""), 6
|
||
if len(lt) < 15:
|
||
return "can't"
|
||
if lt[0][0] == "FORTK" and lt[1][0] == "LPARENT" and self.is_IDENFR(lt[2]) and lt[3][0] == "ASSIGN" and lt[4][
|
||
0] == "Expression" and lt[5][0] == "SEMICN" and lt[6][0] == "Condition" and lt[7][
|
||
0] == "SEMICN" and self.is_IDENFR([8]) and lt[9][0] == "ASSIGN" and self.is_IDENFR(lt[10]) and lt[11][
|
||
0] in ['PLUS', 'MINU'] and lt[12] == "Step" and lt[13][0] == "RPARENT" and lt[14][0] == "Statement":
|
||
if len(lt) == 15:
|
||
return "doit", ("LoopStatement", "")
|
||
else:
|
||
return "part", ("LoopStatement", ""), 15
|
||
return "can't"
|
||
|
||
# <步长>::= <无符号整数>
|
||
def Step(self, lt: list[(str, str)], newadd: (str, str), last: (str, str), last_of_last: (str, str)) -> (
|
||
str, (str, str), int):
|
||
if len(lt) < 1:
|
||
return "can't"
|
||
if last[0] not in ["PLUS", "MINU"] or not self.is_IDENFR(last_of_last):
|
||
return "can't"
|
||
if lt[0][0] == "UnsignedInteger":
|
||
if len(lt) == 1:
|
||
if newadd == "RPARENT":
|
||
return "doit", ("Step", "")
|
||
else:
|
||
if lt[2] == "RPARENT":
|
||
return "part", ("Step", ""), 1
|
||
return "can't"
|
||
|
||
# <有返回值函数调用语句>: := <标识符>'('<值参数表>')'
|
||
def FunctionCallWithReturnValue(self, lt: list[(str, str)], newadd: (str, str), last: (str, str)) -> (
|
||
str, (str, str), int):
|
||
if len(lt) < 4:
|
||
return "can't"
|
||
if len(lt) >= 4 and self.is_IDENFR(lt[0]) and lt[1][0] == "LPARENT" and lt[2][0] == "ValueParameterList" and \
|
||
lt[3][0] == "RPARENT":
|
||
if len(lt) == 4:
|
||
return "doit", ("FunctionCallWithReturnValue", "")
|
||
else:
|
||
return "part", ("FunctionCallWithReturnValue", ""), 4
|
||
return "can't"
|
||
|
||
# <无返回值函数调用语句> ::= <标识符>'('<值参数表>')' 无
|
||
# <值参数表> ::= <表达式>{,<表达式>}|<空>
|
||
def ValueParameterList(self, lt: list[(str, str)], newadd: (str, str), last: (str, str),
|
||
last_of_last: (str, str)) -> (str, (str, str), int, bool):
|
||
def is_seq(lt: list[(str, str)]) -> str:
|
||
ls = ['COMMA', 'Expression']
|
||
if lt[0][0] != ls[0]:
|
||
return "not"
|
||
if len(lt) == 1 and lt[0][0] == ls[0]:
|
||
return "can't"
|
||
if len(lt) >= 2 and lt[0][0] == ls[0] and lt[1][0] == ls[1]:
|
||
if len(lt) == 2:
|
||
return "doit"
|
||
if len(lt) > 2:
|
||
return "full"
|
||
else:
|
||
return "not"
|
||
|
||
if len(lt) < 1:
|
||
return "can't"
|
||
if lt[0][0] != "Expression" or newadd[0] != "RPARENT" or last[0] != "LPARENT" or last_of_last[0] == "RETURNTK":
|
||
return "can't"
|
||
i = 1
|
||
while i < len(lt):
|
||
match is_seq(lt[i:]):
|
||
case "doit":
|
||
if newadd == "COMMA":
|
||
return "can't"
|
||
return "doit", ("ValueParameterList", ""), False
|
||
case "full":
|
||
i += 2
|
||
case "not":
|
||
if lt[i][0] == "RPARENT":
|
||
return "part", ("ValueParameterList", ""), i, False
|
||
return "can't"
|
||
case "can't":
|
||
return "can't"
|
||
return "can't"
|
||
|
||
# <语句列> ::= {<语句>}
|
||
def StatementList(self, lt: list[(str, str)], newadd: (str, str), last: (str, str)) -> (str, (str, str), int):
|
||
if len(lt) < 1:
|
||
return "can't"
|
||
if last[0] != "LBRACE":
|
||
return "can't"
|
||
for i in range(len(lt)):
|
||
if lt[i][0] != "Statement":
|
||
if lt[i][0] == "RBRACE":
|
||
return "part", ("StatementList", ""), i
|
||
return "can't"
|
||
if newadd[0] == "RBRACE":
|
||
return "doit", ("StatementList", ""), i
|
||
else:
|
||
return "can't"
|
||
|
||
# <读语句> ::= scanf '('<标识符>{,<标识符>}')'
|
||
def ReadStatement(self, lt: list[(str, str)]) -> (str, (str, str), int):
|
||
if len(lt) < 4:
|
||
return "can't"
|
||
if lt[0][0] != "SCANFTK":
|
||
return "can't"
|
||
if lt[0][0] == "SCANFTK" and lt[1][0] == "LPARENT" and self.is_IDENFR(lt[2]):
|
||
for i in range(3, len(lt)):
|
||
if not self.is_IDENFR(lt[i]):
|
||
if lt[i] == "RPARENT":
|
||
if i == len(lt) - 1:
|
||
return "doit", ("ReadStatement", "")
|
||
return "part", ("ReadStatement", ""), i
|
||
return "can't"
|
||
return "can't"
|
||
|
||
# <写语句> ::= printf '(' <字符串>,<表达式> ')'| printf '('<字符串> ')'| printf '('<表达式>')'
|
||
def WriteStatement(self, lt: list[(str, str)]) -> (str, (str, str), int, bool):
|
||
if len(lt) < 4:
|
||
return "can't"
|
||
if lt[0][0] != "PRINTFTK":
|
||
return "can't"
|
||
if lt[0][0] == "PRINTFTK" and lt[1][0] == "LPARENT" and (lt[2][0] == "String" or lt[2][0] == "Expression") and \
|
||
lt[3][0] == "RPARENT":
|
||
if len(lt) == 4:
|
||
return "doit", ("WriteStatement", ""), False
|
||
else:
|
||
return "part", ("WriteStatement", ""), 4, False
|
||
if len(lt) >= 6 and lt[0][0] == "PRINTFTK" and lt[1][0] == "LPARENT" and lt[2][0] == "String" and lt[3][
|
||
0] == "COMMA" and lt[4][0] == "Expression" and lt[5][0] == "RPARENT":
|
||
if len(lt) == 6:
|
||
return "doit", ("WriteStatement", ""), False
|
||
else:
|
||
return "part", ("WriteStatement", ""), 6, False
|
||
return "can't"
|
||
|
||
# <返回语句> ::= return['('<表达式>')']
|
||
def ReturnStatement(self, lt: list[(str, str)], addnew: (str, str)) -> (str, (str, str), int, bool):
|
||
if len(lt) < 1:
|
||
return "can't"
|
||
if lt[0][0] != "RETURNTK":
|
||
return "can't"
|
||
if len(lt) == 1:
|
||
if addnew[0] == "LPARENT":
|
||
return "can't"
|
||
return "doit", ("ReturnStatement", ""), False
|
||
if len(lt) == 4 and lt[1][0] == "LPARENT" and lt[2][0] == "Expression" and lt[3][0] == "RPARENT":
|
||
return "doit", ("ReturnStatement", ""), False
|
||
return "can't"
|
||
|
||
|
||
class GrammaticalAnalysis:
|
||
keyword = {
|
||
"const": "CONSTTK",
|
||
"int": "INTTK",
|
||
"char": "CHARTK",
|
||
"void": "VOIDTK",
|
||
"main": "MAINTK",
|
||
"if": "IFTK",
|
||
"else": "ELSETK",
|
||
"do": "DOTK",
|
||
"while": "WHILETK",
|
||
"for": "FORTK",
|
||
"continue": "CONTINUETK",
|
||
"break": "BREAKTK",
|
||
"scanf": "SCANFTK",
|
||
"printf": "PRINTFTK",
|
||
"return": "RETURNTK"}
|
||
symbol = {
|
||
"+": "PLUS",
|
||
"-": "MINU",
|
||
"*": "MULT",
|
||
"/": "DIV",
|
||
"%": "MOD",
|
||
"&&": "AND",
|
||
"||": "OR",
|
||
",": "COMMA",
|
||
";": "SEMICN",
|
||
"(": "LPARENT",
|
||
")": "RPARENT",
|
||
"[": "LBRACK",
|
||
"]": "RBRACK",
|
||
"{": "LBRACE",
|
||
"}": "RBRACE",
|
||
"=": "ASSIGN",
|
||
"<": "LSS",
|
||
"<=": "LEQ",
|
||
">": "GRE",
|
||
">=": "GEQ",
|
||
"!=": "NEQ",
|
||
"==": "EQL",
|
||
"!": "NOT"
|
||
}
|
||
regex = {
|
||
r'^[a-zA-Z][a-zA-Z0-9]*$': "IDENFR",
|
||
r'^\d+$': "INTCON",
|
||
r'^".*"$': "STRCON",
|
||
r"^'.'$": "CHARCON"
|
||
}
|
||
yacc = Yacc()
|
||
patterns = []
|
||
seq = 0
|
||
final_ans = []
|
||
|
||
def __init__(self):
|
||
self.patterns = [method for method in dir(Yacc) if
|
||
callable(getattr(Yacc, method)) and not method.startswith("__") and not method.startswith(
|
||
"PartOf") and not method.startswith("is_")]
|
||
|
||
def readfile(self, filename: str) -> str:
|
||
s = ''
|
||
with open(filename, 'r', encoding='utf-8') as f:
|
||
s = ''.join(f.readlines())
|
||
return s
|
||
|
||
def strsplit(self, s: str) -> list[str]:
|
||
lt = []
|
||
l = 0
|
||
r = 1
|
||
while r < len(s):
|
||
if not (s[r].isalpha() or s[r].isdigit()):
|
||
k = s[l:r].strip()
|
||
if len(k) != 0:
|
||
lt.append(k)
|
||
k = s[r].strip()
|
||
if len(k) != 0:
|
||
lt.append(k)
|
||
l = r + 1
|
||
r = l
|
||
else:
|
||
r += 1
|
||
if l < len(s):
|
||
lt.append(s[l:r])
|
||
i = 0
|
||
# 修整 符号配对 + 字符串查找
|
||
while i < len(lt) - 1:
|
||
if lt[i] in self.symbol and lt[i] + lt[i + 1] in self.symbol:
|
||
lt[i] = lt[i] + lt[i + 1]
|
||
lt.pop(i + 1)
|
||
elif lt[i] == '"':
|
||
i += 1
|
||
while i < len(lt):
|
||
lt[i - 1] += " " + lt[i]
|
||
lt.pop(i)
|
||
if lt[i - 1][-1] == '"':
|
||
i -= 1
|
||
break
|
||
elif lt[i] == '\'':
|
||
if i + 2 < len(lt) and lt[i + 2] == '\'':
|
||
lt[i] += lt[i + 1] + lt[i + 2]
|
||
lt.pop(i + 1)
|
||
lt.pop(i + 1)
|
||
i += 1
|
||
return lt
|
||
|
||
def final_ans_insert(self, lt: list[int], newadd: (int, (str, str)), delete: bool = True,
|
||
force_del: list[int] = [], cascade_delete: bool = True) -> None:
|
||
i = 0
|
||
|
||
def cas_del(t: int):
|
||
dele = 0
|
||
while t != -1:
|
||
for k in self.final_ans:
|
||
if k[0] == t:
|
||
if k[1][0] not in ['Factor', 'Term', 'Expression']:
|
||
t = -1
|
||
break
|
||
t = k[2] if len(k) >= 3 and cascade_delete else -1
|
||
dele += 1
|
||
self.final_ans.remove(k)
|
||
break
|
||
return dele
|
||
|
||
while i < len(self.final_ans) and len(lt) > 0:
|
||
if self.final_ans[i][0] in lt:
|
||
seq = self.final_ans[i][0]
|
||
if len(lt) == 1:
|
||
if delete == False:
|
||
self.final_ans.insert(i + 1, (newadd[0], newadd[1], lt[0]))
|
||
else:
|
||
self.final_ans.insert(i + 1, newadd)
|
||
if (delete and self.final_ans[i][1][0] in ['Factor', 'Term', 'Expression']) or \
|
||
self.final_ans[i][0] in force_del:
|
||
i -= cas_del(self.final_ans[i][0])
|
||
lt.remove(seq)
|
||
i += 1
|
||
|
||
# 规约!!
|
||
def reduce(self, ans: list[(int, (str, str), any)], newadd: (str, str)) -> list[(int, str)]:
|
||
changed = True
|
||
while changed:
|
||
changed = False
|
||
for i in range(len(ans)):
|
||
no_index_ans = [x[1] for x in ans]
|
||
tokens = no_index_ans[i:]
|
||
redo = False
|
||
for method_name in self.patterns:
|
||
method = getattr(self.yacc, method_name)
|
||
if callable(method):
|
||
result = []
|
||
r = len(inspect.getfullargspec(method).args)
|
||
match r:
|
||
case 5:
|
||
result = method(tokens, newadd, (-1, -1) if i == 0 else no_index_ans[i - 1],
|
||
(-1, -1) if i <= 1 else no_index_ans[i - 2])
|
||
case 4:
|
||
result = method(tokens, newadd, (-1, -1) if i == 0 else no_index_ans[i - 1])
|
||
case 3:
|
||
result = method(tokens, newadd)
|
||
case 2:
|
||
result = method(tokens)
|
||
match result[0]:
|
||
case "doit":
|
||
# print(f"<{self.yacc.translation_dict[result[1][0]]}>")
|
||
match len(result):
|
||
case 5:
|
||
self.final_ans_insert([x[0] for x in ans[i:]], (self.seq, result[1]), result[2],
|
||
[ans[i + x][0] for x in result[3]], result[4])
|
||
case 4:
|
||
self.final_ans_insert([x[0] for x in ans[i:]], (self.seq, result[1]), result[2],
|
||
[ans[i + x][0] for x in result[3]])
|
||
case 3:
|
||
self.final_ans_insert([x[0] for x in ans[i:]], (self.seq, result[1]), result[2])
|
||
case 2:
|
||
self.final_ans_insert([x[0] for x in ans[i:]], (self.seq, result[1]))
|
||
case _:
|
||
assert False
|
||
ans = ans[:i]
|
||
ans.append((self.seq, result[1]))
|
||
self.seq += 1
|
||
changed = True
|
||
redo = True
|
||
break
|
||
case "part":
|
||
# print(f"<{self.yacc.translation_dict[result[1][0]]}>")
|
||
match len(result):
|
||
case 6:
|
||
self.final_ans_insert([x[0] for x in ans[i:i + result[2]]],
|
||
(self.seq, result[1]), result[3], result[4], result[5])
|
||
case 5:
|
||
self.final_ans_insert([x[0] for x in ans[i:i + result[2]]],
|
||
(self.seq, result[1]), result[3], result[4])
|
||
case 4:
|
||
self.final_ans_insert([x[0] for x in ans[i:i + result[2]]],
|
||
(self.seq, result[1]), result[3])
|
||
case 3:
|
||
self.final_ans_insert([x[0] for x in ans[i:i + result[2]]],
|
||
(self.seq, result[1]))
|
||
case _:
|
||
assert False
|
||
# 到i截止
|
||
temp = ans[i + result[2]:]
|
||
ans = ans[:i]
|
||
ans.append((self.seq, result[1]))
|
||
self.seq += 1
|
||
ans += temp
|
||
changed = True
|
||
redo = True
|
||
break
|
||
case "can't":
|
||
pass
|
||
if redo:
|
||
break
|
||
return ans
|
||
|
||
def WordAnalyze(self, lt: list[str]) -> list[(int, str)]:
|
||
ans = []
|
||
i = 0
|
||
while i < len(lt):
|
||
s = lt[i]
|
||
if s in self.keyword:
|
||
temp = (self.keyword[s], s)
|
||
ans = self.reduce(ans, temp)
|
||
ans.append((self.seq, temp))
|
||
self.final_ans.append((self.seq, temp))
|
||
self.seq += 1
|
||
# print(self.keyword[s], s)
|
||
elif s in self.symbol:
|
||
temp = (self.symbol[s], s)
|
||
ans = self.reduce(ans, temp)
|
||
ans.append((self.seq, temp))
|
||
self.final_ans.append((self.seq, temp))
|
||
self.seq += 1
|
||
# print(self.symbol[s], s)
|
||
else:
|
||
no_answer = True
|
||
for key, value in self.regex.items():
|
||
if re.match(key, s):
|
||
if value == 'CHARCON' or value == 'STRCON':
|
||
s = s[1:-1]
|
||
temp = (value, s)
|
||
ans = self.reduce(ans, temp)
|
||
ans.append((self.seq, temp))
|
||
self.final_ans.append((self.seq, temp))
|
||
self.seq += 1
|
||
# print(value, s)
|
||
no_answer = False
|
||
break
|
||
if no_answer:
|
||
ans.append(('error', 'error'))
|
||
self.final_ans.append((self.seq, temp))
|
||
i += 1
|
||
ans = self.reduce(ans, ('', ''))
|
||
return ans
|
||
|
||
def result_print(self) -> None:
|
||
with open('output.txt', 'w', encoding='utf-8') as f:
|
||
for vals in self.final_ans:
|
||
vals = vals[1]
|
||
if vals[0] in self.symbol.values() or vals[0] in self.keyword.values() or vals[
|
||
0] in self.regex.values():
|
||
print(vals[0], vals[1])
|
||
f.write(f'{vals[0]} {vals[1]}\n')
|
||
else:
|
||
print(f"<{self.yacc.translation_dict[vals[0]]}>")
|
||
f.write(f"<{self.yacc.translation_dict[vals[0]]}>\n")
|
||
return
|
||
|
||
def f(self) -> None:
|
||
s = self.readfile('testfile.txt')
|
||
s = self.strsplit(s)
|
||
s = self.WordAnalyze(s)
|
||
self.result_print()
|
||
print(s)
|
||
|
||
|
||
GrammaticalAnalysis().f()
|