import inspect import re class Yacc: translation_dict = { "String": "字符串", "Program": "程序", "ConstantDeclaration": "常量说明", "ConstantDefinition": "常量定义", "FunctionDefinitionWithReturnValue": "有返回值函数定义", "FunctionDefinitionWithoutReturnValue": "无返回值函数定义", "UnsignedInteger": "无符号整数", "Integer": "整数", "DeclarationHeader": "声明头部", "VariableDeclaration": "变量说明", "VariableDefinition": "变量定义", "CompoundStatement": "复合语句", "ParameterList": "参数表", "MainFunction": "主函数", "Expression": "表达式", "Term": "项", "Factor": "因子", "Statement": "语句", "AssignmentStatement": "赋值语句", "ConditionalStatement": "条件语句", "Condition": "条件", "LoopStatement": "循环语句", "Step": "步长", "FunctionCallWithReturnValue": "有返回值函数调用语句", "FunctionCallWithoutReturnValue": "无返回值函数调用语句", "ValueParameterList": "值参数表", "StatementList": "语句列", "ReadStatement": "读语句", "WriteStatement": "写语句", "ReturnStatement": "返回语句" } def is_INTTK_CHARTK(self, t: (str, str)) -> bool: if t[0] in ['INTTK', 'CHARTK']: return True if t[0] == "Factor" and t[1] in ['INTCON', 'CHARCON']: return True if t[0] == "Term" and t[1] in ['INTCON', 'CHARCON']: return True return False def is_IDENFR(self, t: (str, str)) -> bool: if t[0] == 'IDENFR': return True if t[0] == "Factor" and t[1] == "IDENFR": return True if t[0] == "Term" and t[1] == "IDENFR": return True if t[0] == "Expression" and t[1] == "IDENFR": return True return False def is_Integer_CHARCON(self, t: (str, str)) -> bool: if t[0] == 'Integer' or t[0] == 'CHARCON': return True if t[0] == 'Factor' and (t[1] == 'Integer' or t[1] == 'CHARCON'): return True if t[0] == 'Term' and (t[1] == 'Integer' or t[1] == 'CHARCON'): return True if t[0] == 'Expression' and (t[1] == 'Integer' or t[1] == 'CHARCON'): return True return False def is_Term(self, t: (str, str)) -> bool: if t[0] == 'Term': return True if t[0] == 'Expression' and t[1] == 'Term': return True return False def is_RelationOperator(self, t: (str, str)) -> bool: if t[0] in ['LSS', 'LEQ', 'GRE', 'GEQ', 'NEQ', 'EQL']: return True return False def is_UnsignedInteger(self, t: (str, str)) -> bool: if t[0] == 'UnsignedInteger': return True if t[0] == 'Integer' and t[1] >= 0: return True if t[0] == 'Factor' and t[1] == 'UnsignedInteger': return True if t[0] == 'Term' and t[1] == 'UnsignedInteger': return True if t[0] == 'Expression' and t[1] == 'UnsignedInteger': return True return False # <字符串> ::= "{十进制编码为32,33,35‐126的ASCII字符}" def String(self, lt: list[(str, str)]) -> (str, (str, str), int): regex = r'[ -!#-~]*' if len(lt) == 1 and lt[0][0] == "STRCON" and re.match(regex, lt[0][1]): return "doit", ("String", lt[0][1]) return "can't" # <程序> ::= [<常量说明>][<变量说明>]{<有返回值函数定义>|<无返回值函数定义>} <主函数> def Program(self, lt: list[(str, str)]) -> (str, (str, str), int): if len(lt) < 2: return "can't" if len(lt) == 2 and ( lt[0][0] == 'FunctionDefinitionWithReturnValue' or lt[0][ 0] == 'FunctionDefinitionWithoutReturnValue') and lt[1][ 0] == 'MainFunction': return "doit", ("Program", "") if lt[0][0] == 'ConstantDeclaration' and lt[1][0] == 'VariableDeclaration': return self.Program(lt[2:]) if lt[0][0] == 'ConstantDefinition' or lt[1][0] == 'VariableDefinition': return self.Program(lt[1:]) return "can't" # 部分 <常量说明>: := const<常量定义>;{const<常量定义>;} def PartOfConstantDeclaration(self, t: (str, str)) -> bool: pt = ['CONSTTK', 'ConstantDefinition', 'SEMICN'] return t[0] in pt or self.PartOfConstantDefinition(t) # <常量说明>: := const<常量定义>;{const<常量定义>;} def ConstantDeclaration(self, lt: list[(str, str)], newadd: (str, str)) -> (str, (str, str), int): if lt[0][0] != 'CONSTTK' or len(lt) < 3: return "can't" newadd = newadd[0] if lt[0][0] == 'CONSTTK' and lt[1][0] == 'ConstantDefinition' and lt[2][0] == 'SEMICN': i = 3 while i < len(lt): if lt[i][0] != 'CONSTTK': return "part", ("ConstantDeclaration", ""), i if i + 1 == len(lt): if newadd == 'ConstantDefinition' or self.PartOfConstantDefinition((newadd, "")): return "can't" else: return "part", ("ConstantDeclaration", ""), i elif lt[i + 1][0] != 'ConstantDefinition': if self.PartOfConstantDefinition(lt[i + 1]): return "can't" else: return "part", ("ConstantDeclaration", ""), i if i + 2 == len(lt): if newadd == 'SEMICN': return "can't" else: return "part", ("ConstantDeclaration", ""), i elif lt[i + 2][0] != 'SEMICN': return "part", ("ConstantDeclaration", ""), i i += 3 if newadd == 'CONSTTK': return "can't" else: return "doit", ("ConstantDeclaration", "") return "can't" # 部分 <常量定义>: := int<标识符>=<整数>{,<标识符>=<整数>} | char<标识符>=<字符>{,<标识符>=<字符>} def PartOfConstantDefinition(self, t: (str, str)) -> bool: pt = ['INTTK', 'CHARTK', 'IDENFR', 'ASSIGN', 'INTCON', 'COMMA', 'CHARCON'] return t[0] in pt # <常量定义>: := int<标识符>=<整数>{,<标识符>=<整数>} | char<标识符>=<字符>{,<标识符>=<字符>} def ConstantDefinition(self, lt: list[(str, str)], newadd: (str, str)) -> (str, (str, str), int): if (not (self.is_INTTK_CHARTK(lt[0]))) or len(lt) < 4: return "can't" newadd = newadd[0] if self.is_INTTK_CHARTK(lt[0]) and self.is_IDENFR(lt[1]) and lt[2][0] == 'ASSIGN' and self.is_Integer_CHARCON( lt[3]): i = 4 while i < len(lt): if lt[i][0] != 'COMMA': return "part", ("ConstantDefinition", ""), i if i + 1 == len(lt): if newadd == 'IDENFR': return "can't" else: return "part", ("ConstantDefinition", ""), i elif not self.is_IDENFR(lt[i + 1]): return "part", ("ConstantDefinition", ""), i if i + 2 == len(lt): if newadd == 'ASSIGN': return "can't" else: return "part", ("ConstantDefinition", ""), i elif lt[i + 2][0] != 'ASSIGN': return "part", ("ConstantDefinition", ""), i if i + 3 == len(lt): if newadd == 'Integer' or newadd == 'CHARCON' or self.PartOfInteger((newadd, "")): return "can't" else: return "part", ("ConstantDefinition", ""), i elif not self.is_Integer_CHARCON(lt[i + 3]): if self.PartOfInteger(lt[i + 3]): return "can't" return "part", ("ConstantDefinition", ""), i i += 4 if newadd == 'COMMA': return "can't" return "doit", ("ConstantDefinition", "") return "can't" # 部分 <无符号整数>: := <非零数字>{<数字>} | 0 def PartOfUnsignedInteger(self, t: (str, str)) -> bool: return t[0] == 'INTCON' # <无符号整数>: := <非零数字>{<数字>} | 0 def UnsignedInteger(self, lt: list[(str, str)]) -> (str, (str, str), int): if len(lt) < 1: return "can't" if lt[0][0] == 'INTCON': if lt[0][1][0] == '0' and len(lt[0][1]) != 1: return "can't" return "doit", ("UnsignedInteger", lt[0][1]) return "can't" # 部分 <整数> ::= [+|-]<无符号整数> def PartOfInteger(self, t: (str, str)) -> bool: pt = ['UnsignedInteger', 'PLUS', 'MINU'] return t[0] in pt or self.PartOfUnsignedInteger(t) # <整数> ::= [+|-]<无符号整数> def Integer(self, lt: list[(str, str)]) -> (str, (str, str), int): if len(lt) < 1: return "can't" if lt[0][0] == 'UnsignedInteger': return "doit", ("Integer", lt[0][1]) if len(lt) >= 2 and lt[0][0] in ["MINU", "PLUS"] and lt[1][0] == 'UnsignedInteger': if len(lt) == 2: return "doit", ('Integer', (-1 if lt[0][0] == "MINU" else 1) * lt[1][1]) else: return "part", ("Integer", (-1 if lt[0][0] == "MINU" else 1) * lt[1][1]), 2 return "can't" # 部分 <声明头部> ::= int<标识符> |char<标识符> def PartOfDeclarationHeader(self, t: (str, str)) -> bool: pt = ['INTTK', 'CHARTK'] return t[0] in pt or self.is_IDENFR(t) # <声明头部> ::= int<标识符> |char<标识符> def DeclarationHeader(self, lt: list[(str, str)], newadd: (str, str)) -> (str, (str, str), int): if len(lt) < 2: return "can't" newadd = newadd[0] if (lt[0][0] == 'INTTK' or lt[0][0] == 'CHARTK') and self.is_IDENFR(lt[1]): if len(lt) == 2: if newadd == 'LPARENT': return "doit", ("DeclarationHeader", "") elif lt[2][0] == 'LPARENT': return "doit", ("DeclarationHeader", "") return "can't" # <变量说明>: := <变量定义>;{<变量定义>;} def VariableDeclaration(self, lt: list[(str, str)], newadd: (str, str)) -> (str, (str, str), int): def is_seq(lt: list[(str, str)]) -> str: if len(lt) == 1 and lt[0][0] == 'VariableDefinition': return "can't" if lt[0][0] == 'VariableDefinition' and lt[1][0] == 'SEMICN': if len(lt) == 2: return "doit" else: return "part" if lt[0][0] != 'VariableDefinition' or lt[1][0] != 'SEMICN': if self.PartOfTYPEIDENFR(lt[0][0]): return "can't" return "not" if len(lt) < 2: return "can't" if lt[0][0] != 'VariableDefinition' or lt[1][0] != 'SEMICN': return "can't" newadd = newadd[0] i = 2 while i < len(lt): match is_seq(lt[i:]): case "doit": if newadd == 'VariableDefinition' or self.PartOfVariableDefinition((newadd, "")): return "can't" else: return "doit", ("VariableDeclaration", "") case "part": i += 2 case "not": if self.PartOfVariableDefinition((newadd, "")) or newadd == 'SEMICN': return "can't" return "part", ("VariableDeclaration", ""), i case "can't": return "can't" if newadd == 'VariableDefinition' or self.PartOfVariableDefinition((newadd, "")): return "can't" else: return "doit", ("VariableDeclaration", "") # 部分 <类型标识符> ::= int | char def PartOfTYPEIDENFR(self, t: (str, str)) -> bool: lt = ['INTTK', 'CHARTK'] return t[0] in lt # 部分 <变量定义> ::= <类型标识符>(<标识符>|<标识符>'['<无符号整数>']'){,(<标识符>|<标识符>'['<无符号整数>']' )} //<无符号整数>表示数组元素的个数,其值需大于0 def PartOfVariableDefinition(self, t: (str, str)) -> bool: lt = ['UnsignedInteger', 'LBRACK', 'RBRACK'] return t in lt or self.is_IDENFR(t) or self.PartOfTYPEIDENFR(t) # <变量定义> ::= <类型标识符>(<标识符>|<标识符>'['<无符号整数>']'){,(<标识符>|<标识符>'['<无符号整数>']' )} //<无符号整数>表示数组元素的个数,其值需大于0 def VariableDefinition(self, lt: list[(str, str)], newadd: (str, str), last: (str, str)) -> (str, (str, str), int): def is_seq(lt: list[(str, str)]) -> str: # 3 for long_assign 2 for short_assign, 1 for part, 0 for not ls = ['IDENFR', 'LBRACK', 'UnsignedInteger', 'RBRACK'] if self.is_IDENFR(lt[0]): return 'not' if len(lt) == 1 and self.is_IDENFR(lt[0]): return "doit" if len(lt) == 4 and self.is_IDENFR(lt[0]) and lt[1][0] == 'LBRACK' and self.is_UnsignedInteger(lt[2]) and \ lt[3][0] == 'RBRACK': return "doit" if len(lt) >= 4 and self.is_IDENFR(lt[0]) and lt[1][0] == 'LBRACK' and self.is_UnsignedInteger(lt[2]) and \ lt[3][0] == 'RBRACK': return "part2" if len(lt) >= 1 and self.is_IDENFR(lt[0]) and lt[1][0] != 'LBRACK': return "part1" bl = True for i in range(len(lt)): if lt[i][0] != ls[i]: bl = False break if bl: return "can't" else: return "not" if len(lt) < 2 or (lt[0][0] != 'INTTK' and lt[0][0] != 'CHARTK'): return "can't" if last[0] == 'CONSTTK' or last[0] == 'COMMA' or last[0] == 'LPARENT': return "can't" newadd = newadd[0] i = 0 if len(lt) >= 5: if (lt[0][0] == 'INTTK' or lt[0][0] == 'CHARTK') and self.is_IDENFR(lt[1]) and lt[2][0] == 'LBRACK' and \ self.is_UnsignedInteger(lt[3]) and lt[4][0] == 'RBRACK': i = 5 elif (lt[0][0] == 'INTTK' or lt[0][0] == 'CHARTK') and self.is_IDENFR(lt[1]): i = 2 while i < len(lt): if lt[i][0] != 'COMMA': return "part", ("VariableDefinition", ""), i i += 1 while i < len(lt): match is_seq(lt[i:]): case "doit": if newadd == 'COMMA': return "can't" else: return "doit", ("VariableDefinition", "") case "part1": if lt[i + 1] == 'COMMA': i = i + 2 else: return "part", ("VariableDefinition", ""), i + 1 case "part2": if lt[i + 4] == 'COMMA': i = i + 5 else: return "part", ("VariableDefinition", ""), i + 4 case "can't": return "can't" case "not": return "part", ("VariableDefinition", ""), i - 1 if newadd == 'IDENFR': return "can't" else: return "part", ("VariableDefinition", ""), i - 1 if newadd in ['COMMA', 'LBRACK']: return "can't" else: return "doit", ("VariableDefinition", "") # <有返回值函数定义> ::= <声明头部>'('<参数表>')' '{'<复合语句>'}' def FunctionDefinitionWithReturnValue(self, lt: list[(str, str)]) -> (str, (str, str), int): if len(lt) < 7: return "can't" if lt[0][0] == 'DeclarationHeader' and lt[1][0] == 'LPARENT' and lt[2][0] == 'ParameterList' and lt[3][ 0] == 'RPARENT' and lt[4][0] == 'LBRACE' and lt[5][0] == 'CompoundStatement' and lt[6][0] == 'RBRACE': if len(lt) == 7: return "doit", ("FunctionDefinitionWithReturnValue", "") else: return "part", ("FunctionDefinitionWithReturnValue", ""), 7 return "can't" # <无返回值函数定义> ::= void<标识符>'('<参数表>')''{'<复合语句>'}' def FunctionDefinitionWithoutReturnValue(self, lt: list[(str, str)]) -> (str, (str, str), int): if len(lt) < 8: return "can't" if lt[0][0] == 'VOIDTK' and self.is_IDENFR(lt[1]) and lt[2][0] == 'LPARENT' and lt[3][0] == 'ParameterList' and \ lt[4][0] == 'RPARENT' and lt[5][0] == 'LBRACE' and lt[6][0] == 'CompoundStatement' and lt[7][ 0] == 'RBRACE': if len(lt) == 8: return "doit", ("FunctionDefinitionWithoutReturnValue", "") else: return "part", ("FunctionDefinitionWithoutReturnValue", ""), 8 return "can't" # <复合语句> ::= [<常量说明>][<变量说明>]<语句列> def CompoundStatement(self, lt: list[(str, str)]) -> (str, (str, str), int): if len(lt) < 1: return "can't" if lt[0][0] == 'StatementList': if len(lt) == 1: return "doit", ("CompoundStatement", "") else: return "part", ("CompoundStatement", ""), 1 if len(lt) < 2: return "can't" if (lt[0][0] == 'ConstantDeclaration' or lt[0][0] == 'VariableDeclaration') and lt[1][0] == 'StatementList': if len(lt) == 2: return "doit", ("CompoundStatement", "") else: return "part", ("CompoundStatement", ""), 2 if len(lt) < 3: return "can't" if lt[0][0] == 'ConstantDeclaration' and lt[1][0] == 'VariableDeclaration' and lt[2][0] == 'StatementList': if len(lt) == 3: return "doit", ("CompoundStatement", "") else: return "part", ("CompoundStatement", ""), 3 return "can't" # <参数表> ::= <类型标识符><标识符>{,<类型标识符><标识符>}| <空> def ParameterList(self, lt: list[(str, str)], newadd: (str, str), last: (str, str)) -> (str, (str, str), int): def is_seq(lt: list[(str, str)]) -> str: ls = ['COMMA', ['INTTK', 'CHARTK'], 'IDENFR'] for i in range(len(lt)): if i >= len(ls): return "full" if i == 1: if lt[i][0] not in ls[i]: return "not" elif i == 2: if not self.is_IDENFR(lt[i]): return "not" else: if lt[i][0] != ls[i]: return "not" if len(lt) < 3: return "can't" else: return "doit" if len(lt) < 2 or last[0] != 'LPARENT' or newadd[0] != 'RPARENT': return "can't" if lt[0][0] not in ["INTTK", "CHARTK"] or not self.is_IDENFR(lt[1]): return "can't" i = 2 while i < len(lt): match is_seq(lt[i:]): case "doit": return "doit", ("ParameterList", "") case "can't": return "can't" case "not": return "can't" case "full": i += 3 return "doit", ("ParameterList", "") # <主函数> ::= void main‘(’‘)’ ‘{’<复合语句>‘}’ def MainFunction(self, lt: list[(str, str)]) -> (str, (str, str), int): if len(lt) < 7: return "can't" if lt[0][0] == 'VOIDTK' and lt[1][0] == 'MAINTK' and lt[2][0] == 'LPARENT' and lt[3][0] == 'RPARENT' and \ lt[4][0] == 'LBRACE' and lt[5][0] == 'CompoundStatement' and lt[6][0] == 'RBRACE': if len(lt) == 7: return "doit", ("MainFunction", "") else: return "part", ("MainFunction", ""), 7 return "can't" # <表达式>: := [+|-]<项>{<加法运算符><项>} // [+ |‐]只作用于第一个 < 项 > def Expression(self, lt: list[(str, str)], newadd: (str, str)) -> (str, (str, str), int, bool, list[int], bool): def is_seq(lt: list[(str, str)]) -> str: ls = [['PLUS', 'MINU'], 'Term'] if len(lt) == 1 and lt[0][0] in ls[0]: return "part" if len(lt) >= 2 and lt[0][0] in ls[0] and lt[1][0] == ls[1]: if len(lt) == 2: return "doit" if len(lt) > 2: return "full" if lt[0][0] not in ls[1]: return "not" if len(lt) >= 2 and lt[0][0] in ls[0] and lt[1][0] != ls[1]: return "not" if len(lt) < 1: return "can't" i = 0 if len(lt) >= 2 and lt[0][0] in ['PLUS', 'MINU'] and self.is_Term(lt[1]): i = 2 elif self.is_Term(lt[0]): i = 1 else: return "can't" newadd = newadd[0] while i < len(lt): match is_seq(lt[i:]): case "doit": if newadd in ["PLUS", "MINU"]: return "can't" else: return "doit", ("Expression", "Term"), False, [t for t in range(i + 2) if lt[t][0] == "Expression"], False case "part": if newadd == "Term" or (i == 1 and lt[0][0] == "Expression"): return "can't" else: return "part", ("Expression", "Term"), i, False case "full": i += 2 case "not": if i == 1 and lt[0][0] == "Expression": return "can't" return "part", ("Expression", "Term"), i, False, [t for t in range(i) if lt[t][0] == "Expression"], False if newadd in ["PLUS", "MINU"]: return "can't" else: if lt[i - 1][0] == "Expression": return "can't" return "doit", ("Expression", lt[i - 1][1]), False # <项>: := <因子>{<乘法运算符><因子>} def Term(self, lt: list[(str, str)], newadd: (str, str)) -> (str, (str, str), int, bool): def is_seq(lt: list[(str, str)]) -> str: ls = ['Factor', ['MULT', 'DIV', 'MOD']] if len(lt) == 1 and lt[0][0] == ls[0]: return "part" if len(lt) >= 2 and lt[0][0] == ls[0] and lt[1][0] in ls[1]: if len(lt) == 2: return "doit" if len(lt) > 2: return "full" if lt[0][0] != ls[0]: return "notA" if len(lt) >= 2 and lt[0][0] == ls[0] and lt[1][0] not in ls[1]: return "notB" if len(lt) < 1: return "can't" i = 0 if lt[0][0] == 'Factor': i = 1 else: return "can't" while i < len(lt): match is_seq(lt[i:]): case "doit": if newadd in ['MULT', 'DIV', 'MOD']: return "can't" else: return "doit", ("Term", ""), False case "part": if self.PartOfFactor((newadd, "")): return "can't" else: return "part", ("Term", lt[0][1] if i == 1 else ""), i, False case "full": i += 2 case "notA": if self.PartOfFactor((lt[i], "")): return "can't" return "part", ("Term", lt[0][1] if i == 1 else ""), i, False case "notB": return "part", ("Term", lt[0][1] if i == 1 else ""), i, False if newadd in ['MULT', 'DIV', 'MOD']: return "can't" else: return "doit", ("Term", lt[0][1]), False # 部分 <因子>: := <标识符>|<标识符>'['<表达式>']' | '('<表达式>')'|<整数> | <字符>|<有返回值函数调用语句> def PartOfFactor(self, t: (str, str)) -> bool: pt = ['IDENFR', 'LPARENT', 'RPARENT', 'LBRACK', 'RBRACK', 'Integer', 'CHARCON', 'FunctionDefinitionWithReturnValue'] return t[0] in pt # <因子>: := <标识符>|<标识符>'['<表达式>']' | '('<表达式>')'|<整数> | <字符>|<有返回值函数调用语句> def Factor(self, lt: list[(str, str)]) -> (str, (str, str), int): if len(lt) < 1: return "can't" if len(lt) > 4: if self.is_IDENFR(lt[0]) and lt[1][0] == 'LBRACK' and lt[2][0] == 'Expression' and lt[3][0] == 'RBRACK': if len(lt) == 4: return "doit", ("Factor", "") else: return "part", ("Factor", ""), 4 elif len(lt) > 3: if lt[0][0] == 'LPARENT' and lt[1][0] == 'Expression' and lt[2][0] == 'RPARENT': if len(lt) == 3: return "doit", ("Factor", "") else: return "part", ("Factor", ""), 3 elif lt[0][0] in ["IDENFR", "Integer", "CHARCON", "FunctionCallWithReturnValue"]: if len(lt) == 1: return "doit", ("Factor", lt[0][0]) return "part", ("Factor", lt[0][0]), 1 return "can't" # 部分 <语句> ::= <条件语句>|<循环语句>| '{'<语句列>'}'| <有返回值函数调用语句>; |<无返回值函数调用语句>;|<赋值语句>;|<读语句>;|<写语句>;|<空>;|<返回语句>; def PartOfStatement(self, t: (str, str)) -> bool: pt = ['ConditionStatement', 'LoopStatement', 'LBRACE', 'StatementList', 'RBRACE', 'FunctionCallWithReturnValue', 'FunctionCallWithoutReturnValue', 'AssignmentStatement', 'ReadStatement', 'WriteStatement', 'SEMICN', 'ReturnStatement', 'SEMICN'] return t in pt # <语句> ::= <条件语句>|<循环语句>| '{'<语句列>'}'| <有返回值函数调用语句>; |<无返回值函数调用语句>;|<赋值语句>;|<读语句>;|<写语句>;|<空>;|<返回语句>; def Statement(self, lt: list[(str, str)], _: (str, str), last: (str, str), last_of_last: (str, str)) -> ( str, (str, str), int): if len(lt) < 1: return "can't" if last_of_last[0] == "FORTK": return "can't" if lt[0][0] == 'ConditionStatement': if len(lt) == 1: return "doit", ("Statement", "") else: return "part", ("Statement", ""), 1 if lt[0][0] == 'LoopStatement': if len(lt) == 1: return "doit", ("Statement", "") else: return "part", ("Statement", ""), 1 if len(lt) > 2 and lt[0][0] == 'LBRACE' and lt[1][0] == 'StatementList' and lt[2][0] == 'RBRACE': if len(lt) == 3: return "doit", ("Statement", "") else: return "part", ("Statement", ""), 3 if len(lt) >= 2 and ( lt[0][0] == 'FunctionCallWithReturnValue' or lt[0][0] == 'FunctionCallWithoutReturnValue' or lt[0][ 0] == 'AssignmentStatement' or lt[0][0] == 'ReadStatement' or lt[0][0] == 'WriteStatement' or lt[0][ 0] == 'ReturnStatement') and lt[1][0] == 'SEMICN': if len(lt) == 2: return "doit", ("Statement", "") else: return "part", ("Statement", ""), 1 if lt[0][0] == 'SEMICN' and last[0] not in ["VariableDefinition", "ConstantDefinition", "Expression", "ConditionStatement", "Condition"]: if len(lt) == 1: return "doit", ("Statement", "") else: return "part", ("Statement", ""), 1 return "can't" # <赋值语句> ::= <标识符>=<表达式>|<标识符>'['<表达式>']'=<表达式> def AssignmentStatement(self, lt: list[(str, str)], newadd: (str, str), last: (str, str), last_of_last: (str, str)) -> (str, (str, str), int, bool, list[int]): if len(lt) < 3 or self.PartOfTYPEIDENFR(last) or last_of_last[0] == "FORTK": return "can't" if self.is_IDENFR(lt[0]) and lt[1][0] == 'ASSIGN' and lt[2][0] == 'Expression': if len(lt) == 3: if newadd in ['PLUS', 'MINU']: return "can't" return "doit", ("AssignmentStatement", ""), False, ([0] if lt[0][0] != "IDENFR" else []) else: if lt[3][0] in ['PLUS', 'MINU']: return "can't" return "part", ("AssignmentStatement", ""), 3, False, ([0] if lt[0][0] != "IDENFR" else []) if len(lt) >= 6 and self.is_IDENFR(lt[0]) and lt[1][0] == 'LBRACK' and lt[2][0] == 'Expression' and lt[3][ 0] == 'RBRACK' and lt[4][0] == 'ASSIGN' and lt[5][0] == 'Expression': if len(lt) == 6: if newadd in ['PLUS', 'MINU']: return "can't" return "doit", ("AssignmentStatement", ""), False, ([0] if lt[0][0] != "IDENFR" else []) else: if lt[6][0] in ['PLUS', 'MINU']: return "can't" return "part", ("AssignmentStatement", ""), 6, False, ([0] if lt[0][0] != "IDENFR" else []) return "can't" # <条件语句> ::= if '('<条件>')'<语句>[else<语句>] def ConditionalStatement(self, lt: list[(str, str)], newadd: (str, str)) -> (str, (str, str), int): if len(lt) < 5: return "can't" if lt[0][0] != "IFTK" or lt[1][0] != "LPARENT" or lt[2][0] != "Condition" or lt[3][0] != "RPARENT" or lt[4][ 0] != "Statement": return "can't" if len(lt) == 5: if newadd == "ELSETK": return "can't" return "doit", ("ConditionalStatement", "") if len(lt) >= 7 and lt[5][0] == "ELSETK" and lt[6][0] == "Statement": if len(lt) == 7: return "doit", ("ConditionalStatement", "") else: return "part", ("ConditionalStatement", ""), 7 if len(lt) == 6: if lt[5][0] != "ELSETK": return "part", ("ConditionalStatement", ""), 5 if newadd[0] == "Statement" or self.PartOfStatement(newadd): return "can't" return "part", ("ConditionalStatement", ""), 5 # <条件> ::= <表达式><关系运算符><表达式> |<表达式> //整型表达式之间才能进行关系运算 //表达式为整型,其值为0条件为假,值不为0时条件为真 def Condition(self, lt: list[(str, str)], newadd: (str, str), last: (str, str), last_of_last: (str, str)) -> ( str, (str, str), int): if len(lt) < 1: return "can't" if not ((last[0] == 'LPARENT' and newadd[0] == 'RPARENT' and last_of_last[0] == "IFTK") or ( last[0] == 'LPARENT' and newadd[0] == 'RPARENT' and last_of_last[0] == "WHILETK") or ( last[0] == 'SEMICN' and newadd[0] == 'SEMICN' and last_of_last[0] == "Expression")): return "can't" if lt[0][0] == "Expression": if len(lt) == 1: return "doit", ("Condition", "") if len(lt) == 3 and self.is_RelationOperator(lt[1]) and lt[2][0] == "Expression": return "doit", ("Condition", "") return "can't" return "can't" # <循环语句> ::= while '('<条件>')'<语句>| do<语句>while '('<条件>')' |for'('<标识符>=<表达式>;<条件>;<标识符>=<标识符>(+|‐)<步长>')'<语句> def LoopStatement(self, lt: list[(str, str)]) -> (str, (str, str), int): if len(lt) < 5: return "can't" if lt[0][0] == "WHILETK" and lt[1][0] == "LPARENT" and lt[2][0] == "Condition" and lt[3][0] == "RPARENT" and \ lt[4][0] == "Statement": if len(lt) == 5: return "doit", ("LoopStatement", "") else: return "part", ("LoopStatement", ""), 5 if len(lt) < 6: return "can't" if lt[0][0] == "DOTK" and lt[1][0] == "Statement" and lt[2][0] == "WHILETK" and lt[3][0] == "LPARENT" and lt[4][ 0] == "Condition" and lt[5][0] == "RPARENT": if len(lt) == 6: return "doit", ("LoopStatement", "") else: return "part", ("LoopStatement", ""), 6 if len(lt) < 15: return "can't" if lt[0][0] == "FORTK" and lt[1][0] == "LPARENT" and self.is_IDENFR(lt[2]) and lt[3][0] == "ASSIGN" and lt[4][ 0] == "Expression" and lt[5][0] == "SEMICN" and lt[6][0] == "Condition" and lt[7][ 0] == "SEMICN" and self.is_IDENFR([8]) and lt[9][0] == "ASSIGN" and self.is_IDENFR(lt[10]) and lt[11][ 0] in ['PLUS', 'MINU'] and lt[12] == "Step" and lt[13][0] == "RPARENT" and lt[14][0] == "Statement": if len(lt) == 15: return "doit", ("LoopStatement", "") else: return "part", ("LoopStatement", ""), 15 return "can't" # <步长>::= <无符号整数> def Step(self, lt: list[(str, str)], newadd: (str, str), last: (str, str), last_of_last: (str, str)) -> ( str, (str, str), int): if len(lt) < 1: return "can't" if last[0] not in ["PLUS", "MINU"] or not self.is_IDENFR(last_of_last): return "can't" if lt[0][0] == "UnsignedInteger": if len(lt) == 1: if newadd == "RPARENT": return "doit", ("Step", "") else: if lt[2] == "RPARENT": return "part", ("Step", ""), 1 return "can't" # <有返回值函数调用语句>: := <标识符>'('<值参数表>')' def FunctionCallWithReturnValue(self, lt: list[(str, str)], newadd: (str, str), last: (str, str)) -> ( str, (str, str), int): if len(lt) < 4: return "can't" if len(lt) >= 4 and self.is_IDENFR(lt[0]) and lt[1][0] == "LPARENT" and lt[2][0] == "ValueParameterList" and \ lt[3][0] == "RPARENT": if len(lt) == 4: return "doit", ("FunctionCallWithReturnValue", "") else: return "part", ("FunctionCallWithReturnValue", ""), 4 return "can't" # <无返回值函数调用语句> ::= <标识符>'('<值参数表>')' 无 # <值参数表> ::= <表达式>{,<表达式>}|<空> def ValueParameterList(self, lt: list[(str, str)], newadd: (str, str), last: (str, str), last_of_last: (str, str)) -> (str, (str, str), int, bool): def is_seq(lt: list[(str, str)]) -> str: ls = ['COMMA', 'Expression'] if lt[0][0] != ls[0]: return "not" if len(lt) == 1 and lt[0][0] == ls[0]: return "can't" if len(lt) >= 2 and lt[0][0] == ls[0] and lt[1][0] == ls[1]: if len(lt) == 2: return "doit" if len(lt) > 2: return "full" else: return "not" if len(lt) < 1: return "can't" if lt[0][0] != "Expression" or newadd[0] != "RPARENT" or last[0] != "LPARENT" or last_of_last[0] == "RETURNTK": return "can't" i = 1 while i < len(lt): match is_seq(lt[i:]): case "doit": if newadd == "COMMA": return "can't" return "doit", ("ValueParameterList", ""), False case "full": i += 2 case "not": if lt[i][0] == "RPARENT": return "part", ("ValueParameterList", ""), i, False return "can't" case "can't": return "can't" return "can't" # <语句列> ::= {<语句>} def StatementList(self, lt: list[(str, str)], newadd: (str, str), last: (str, str)) -> (str, (str, str), int): if len(lt) < 1: return "can't" if last[0] != "LBRACE": return "can't" for i in range(len(lt)): if lt[i][0] != "Statement": if lt[i][0] == "RBRACE": return "part", ("StatementList", ""), i return "can't" if newadd[0] == "RBRACE": return "doit", ("StatementList", ""), i else: return "can't" # <读语句> ::= scanf '('<标识符>{,<标识符>}')' def ReadStatement(self, lt: list[(str, str)]) -> (str, (str, str), int): if len(lt) < 4: return "can't" if lt[0][0] != "SCANFTK": return "can't" if lt[0][0] == "SCANFTK" and lt[1][0] == "LPARENT" and self.is_IDENFR(lt[2]): for i in range(3, len(lt)): if not self.is_IDENFR(lt[i]): if lt[i] == "RPARENT": if i == len(lt) - 1: return "doit", ("ReadStatement", "") return "part", ("ReadStatement", ""), i return "can't" return "can't" # <写语句> ::= printf '(' <字符串>,<表达式> ')'| printf '('<字符串> ')'| printf '('<表达式>')' def WriteStatement(self, lt: list[(str, str)]) -> (str, (str, str), int, bool): if len(lt) < 4: return "can't" if lt[0][0] != "PRINTFTK": return "can't" if lt[0][0] == "PRINTFTK" and lt[1][0] == "LPARENT" and (lt[2][0] == "String" or lt[2][0] == "Expression") and \ lt[3][0] == "RPARENT": if len(lt) == 4: return "doit", ("WriteStatement", ""), False else: return "part", ("WriteStatement", ""), 4, False if len(lt) >= 6 and lt[0][0] == "PRINTFTK" and lt[1][0] == "LPARENT" and lt[2][0] == "String" and lt[3][ 0] == "COMMA" and lt[4][0] == "Expression" and lt[5][0] == "RPARENT": if len(lt) == 6: return "doit", ("WriteStatement", ""), False else: return "part", ("WriteStatement", ""), 6, False return "can't" # <返回语句> ::= return['('<表达式>')'] def ReturnStatement(self, lt: list[(str, str)], addnew: (str, str)) -> (str, (str, str), int, bool): if len(lt) < 1: return "can't" if lt[0][0] != "RETURNTK": return "can't" if len(lt) == 1: if addnew[0] == "LPARENT": return "can't" return "doit", ("ReturnStatement", ""), False if len(lt) == 4 and lt[1][0] == "LPARENT" and lt[2][0] == "Expression" and lt[3][0] == "RPARENT": return "doit", ("ReturnStatement", ""), False return "can't" class GrammaticalAnalysis: keyword = { "const": "CONSTTK", "int": "INTTK", "char": "CHARTK", "void": "VOIDTK", "main": "MAINTK", "if": "IFTK", "else": "ELSETK", "do": "DOTK", "while": "WHILETK", "for": "FORTK", "continue": "CONTINUETK", "break": "BREAKTK", "scanf": "SCANFTK", "printf": "PRINTFTK", "return": "RETURNTK"} symbol = { "+": "PLUS", "-": "MINU", "*": "MULT", "/": "DIV", "%": "MOD", "&&": "AND", "||": "OR", ",": "COMMA", ";": "SEMICN", "(": "LPARENT", ")": "RPARENT", "[": "LBRACK", "]": "RBRACK", "{": "LBRACE", "}": "RBRACE", "=": "ASSIGN", "<": "LSS", "<=": "LEQ", ">": "GRE", ">=": "GEQ", "!=": "NEQ", "==": "EQL", "!": "NOT" } regex = { r'^[a-zA-Z][a-zA-Z0-9]*$': "IDENFR", r'^\d+$': "INTCON", r'^".*"$': "STRCON", r"^'.'$": "CHARCON" } yacc = Yacc() patterns = [] seq = 0 final_ans = [] def __init__(self): self.patterns = [method for method in dir(Yacc) if callable(getattr(Yacc, method)) and not method.startswith("__") and not method.startswith( "PartOf") and not method.startswith("is_")] def readfile(self, filename: str) -> str: s = '' with open(filename, 'r', encoding='utf-8') as f: s = ''.join(f.readlines()) return s def strsplit(self, s: str) -> list[str]: lt = [] l = 0 r = 1 while r < len(s): if not (s[r].isalpha() or s[r].isdigit()): k = s[l:r].strip() if len(k) != 0: lt.append(k) k = s[r].strip() if len(k) != 0: lt.append(k) l = r + 1 r = l else: r += 1 if l < len(s): lt.append(s[l:r]) i = 0 # 修整 符号配对 + 字符串查找 while i < len(lt) - 1: if lt[i] in self.symbol and lt[i] + lt[i + 1] in self.symbol: lt[i] = lt[i] + lt[i + 1] lt.pop(i + 1) elif lt[i] == '"': i += 1 while i < len(lt): lt[i - 1] += " " + lt[i] lt.pop(i) if lt[i - 1][-1] == '"': i -= 1 break elif lt[i] == '\'': if i + 2 < len(lt) and lt[i + 2] == '\'': lt[i] += lt[i + 1] + lt[i + 2] lt.pop(i + 1) lt.pop(i + 1) i += 1 return lt def final_ans_insert(self, lt: list[int], newadd: (int, (str, str)), delete: bool = True, force_del: list[int] = [], cascade_delete: bool = True) -> None: i = 0 def cas_del(t: int): dele = 0 while t != -1: for k in self.final_ans: if k[0] == t: if k[1][0] not in ['Factor', 'Term', 'Expression']: t = -1 break t = k[2] if len(k) >= 3 and cascade_delete else -1 dele += 1 self.final_ans.remove(k) break return dele while i < len(self.final_ans) and len(lt) > 0: if self.final_ans[i][0] in lt: seq = self.final_ans[i][0] if len(lt) == 1: if delete == False: self.final_ans.insert(i + 1, (newadd[0], newadd[1], lt[0])) else: self.final_ans.insert(i + 1, newadd) if (delete and self.final_ans[i][1][0] in ['Factor', 'Term', 'Expression']) or \ self.final_ans[i][0] in force_del: i -= cas_del(self.final_ans[i][0]) lt.remove(seq) i += 1 # 规约!! def reduce(self, ans: list[(int, (str, str), any)], newadd: (str, str)) -> list[(int, str)]: changed = True while changed: changed = False for i in range(len(ans)): no_index_ans = [x[1] for x in ans] tokens = no_index_ans[i:] redo = False for method_name in self.patterns: method = getattr(self.yacc, method_name) if callable(method): result = [] r = len(inspect.getfullargspec(method).args) match r: case 5: result = method(tokens, newadd, (-1, -1) if i == 0 else no_index_ans[i - 1], (-1, -1) if i <= 1 else no_index_ans[i - 2]) case 4: result = method(tokens, newadd, (-1, -1) if i == 0 else no_index_ans[i - 1]) case 3: result = method(tokens, newadd) case 2: result = method(tokens) match result[0]: case "doit": # print(f"<{self.yacc.translation_dict[result[1][0]]}>") match len(result): case 5: self.final_ans_insert([x[0] for x in ans[i:]], (self.seq, result[1]), result[2], [ans[i + x][0] for x in result[3]], result[4]) case 4: self.final_ans_insert([x[0] for x in ans[i:]], (self.seq, result[1]), result[2], [ans[i + x][0] for x in result[3]]) case 3: self.final_ans_insert([x[0] for x in ans[i:]], (self.seq, result[1]), result[2]) case 2: self.final_ans_insert([x[0] for x in ans[i:]], (self.seq, result[1])) case _: assert False ans = ans[:i] ans.append((self.seq, result[1])) self.seq += 1 changed = True redo = True break case "part": # print(f"<{self.yacc.translation_dict[result[1][0]]}>") match len(result): case 6: self.final_ans_insert([x[0] for x in ans[i:i + result[2]]], (self.seq, result[1]), result[3], result[4], result[5]) case 5: self.final_ans_insert([x[0] for x in ans[i:i + result[2]]], (self.seq, result[1]), result[3], result[4]) case 4: self.final_ans_insert([x[0] for x in ans[i:i + result[2]]], (self.seq, result[1]), result[3]) case 3: self.final_ans_insert([x[0] for x in ans[i:i + result[2]]], (self.seq, result[1])) case _: assert False # 到i截止 temp = ans[i + result[2]:] ans = ans[:i] ans.append((self.seq, result[1])) self.seq += 1 ans += temp changed = True redo = True break case "can't": pass if redo: break return ans def WordAnalyze(self, lt: list[str]) -> list[(int, str)]: ans = [] i = 0 while i < len(lt): s = lt[i] if s in self.keyword: temp = (self.keyword[s], s) ans = self.reduce(ans, temp) ans.append((self.seq, temp)) self.final_ans.append((self.seq, temp)) self.seq += 1 # print(self.keyword[s], s) elif s in self.symbol: temp = (self.symbol[s], s) ans = self.reduce(ans, temp) ans.append((self.seq, temp)) self.final_ans.append((self.seq, temp)) self.seq += 1 # print(self.symbol[s], s) else: no_answer = True for key, value in self.regex.items(): if re.match(key, s): if value == 'CHARCON' or value == 'STRCON': s = s[1:-1] temp = (value, s) ans = self.reduce(ans, temp) ans.append((self.seq, temp)) self.final_ans.append((self.seq, temp)) self.seq += 1 # print(value, s) no_answer = False break if no_answer: ans.append(('error', 'error')) self.final_ans.append((self.seq, temp)) i += 1 ans = self.reduce(ans, ('', '')) return ans def result_print(self) -> None: with open('output.txt', 'w', encoding='utf-8') as f: for vals in self.final_ans: vals = vals[1] if vals[0] in self.symbol.values() or vals[0] in self.keyword.values() or vals[ 0] in self.regex.values(): print(vals[0], vals[1]) f.write(f'{vals[0]} {vals[1]}\n') else: print(f"<{self.yacc.translation_dict[vals[0]]}>") f.write(f"<{self.yacc.translation_dict[vals[0]]}>\n") return def f(self) -> None: s = self.readfile('testfile.txt') s = self.strsplit(s) s = self.WordAnalyze(s) self.result_print() print(s) GrammaticalAnalysis().f()