From 0da66d649118db429598050124ef4fe5a0d2c5eb Mon Sep 17 00:00:00 2001 From: lichx Date: Fri, 21 Jun 2024 13:15:53 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9B=E5=BB=BA=E9=A1=B9=E7=9B=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .idea/.gitignore | 8 + GrammarAnalysis.py | 102 ++++ GrammaticalAnalysis.py | 1157 ++++++++++++++++++++++++++++++++++++++++ RE2NFA.py | 168 ++++++ output.txt | 117 ++++ testfile.txt | 12 + 6 files changed, 1564 insertions(+) create mode 100644 .idea/.gitignore create mode 100644 GrammarAnalysis.py create mode 100644 GrammaticalAnalysis.py create mode 100644 RE2NFA.py create mode 100644 output.txt create mode 100644 testfile.txt diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..35410ca --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# 默认忽略的文件 +/shelf/ +/workspace.xml +# 基于编辑器的 HTTP 客户端请求 +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/GrammarAnalysis.py b/GrammarAnalysis.py new file mode 100644 index 0000000..2c90850 --- /dev/null +++ b/GrammarAnalysis.py @@ -0,0 +1,102 @@ +import re + + +class GrammarAnalysis: + keyword = { + r'begin': 1, + r'end': 2 + } + symbol = { + r'+': 13, + r'-': 14, + r'*': 15, + r'/': 16, + r';': 17, + r'=': 18, + r'<': 19, + r'<>': 20, + r'<=': 21, + r'>': 22, + r'>=': 23, + r'(': 24, + r')': 25, + r'#': 0 + } + regex = { + r'^[a-zA-Z][a-zA-Z0-9]*$': 10, + r'^\d+$': 11 + } + + def readfile(self, filename: str) -> str: + s = '' + with open(filename, 'r', encoding='utf-8') as f: + for x in f.readlines(): + s = s.strip() + if len(x) != 0: + s = x + break + return s + + def strsplit(self, s: str) -> list[str]: + lt = [] + l = 0 + r = 1 + while r < len(s): + if not (s[r].isalpha() or s[r].isdigit()): + k = s[l:r].strip() + if len(k) != 0: + lt.append(k) + k = s[r].strip() + if len(k) != 0: + lt.append(k) + l = r + 1 + r = l + else: + r += 1 + if l < len(s): + lt.append(s[l:r]) + return lt + + def analyze(self, lt: list[str]) -> list[(int, str)]: + ans = [] + i = 0 + while i < len(lt): + s = lt[i] + if s in self.keyword: + ans.append((self.keyword[s], s)) + elif s in self.symbol: + if i + 1 < len(lt) and s + lt[i + 1] in self.symbol: + ans.append((self.symbol[s + lt[i + 1]], s + lt[i + 1])) + i += 1 + else: + ans.append((self.symbol[s], s)) + else: + no_answer = True + for key, value in self.regex.items(): + if re.match(key, s): + ans.append((value, s)) + no_answer = False + break + if no_answer: + ans.append((-1, 'error')) + i += 1 + return ans + + def result_print(self, lt: list[(int, str)]) -> None: + with open('output.txt', 'w', encoding='utf-8') as f: + for key, value in lt: + if key == -1: + print(value) + f.write(value + '\n') + else: + print(key, value, sep=' ') + f.write(f'{key} {value}\n') + + def f(self) -> None: + s = self.readfile('testfile.txt') + s = self.strsplit(s) + s = self.analyze(s) + self.result_print(s) + + +GrammarAnalysis().f() diff --git a/GrammaticalAnalysis.py b/GrammaticalAnalysis.py new file mode 100644 index 0000000..8cd73cd --- /dev/null +++ b/GrammaticalAnalysis.py @@ -0,0 +1,1157 @@ +import inspect +import re + + +class Yacc: + translation_dict = { + "String": "字符串", + "Program": "程序", + "ConstantDeclaration": "常量说明", + "ConstantDefinition": "常量定义", + "FunctionDefinitionWithReturnValue": "有返回值函数定义", + "FunctionDefinitionWithoutReturnValue": "无返回值函数定义", + "UnsignedInteger": "无符号整数", + "Integer": "整数", + "DeclarationHeader": "声明头部", + "VariableDeclaration": "变量说明", + "VariableDefinition": "变量定义", + "CompoundStatement": "复合语句", + "ParameterList": "参数表", + "MainFunction": "主函数", + "Expression": "表达式", + "Term": "项", + "Factor": "因子", + "Statement": "语句", + "AssignmentStatement": "赋值语句", + "ConditionalStatement": "条件语句", + "Condition": "条件", + "LoopStatement": "循环语句", + "Step": "步长", + "FunctionCallWithReturnValue": "有返回值函数调用语句", + "FunctionCallWithoutReturnValue": "无返回值函数调用语句", + "ValueParameterList": "值参数表", + "StatementList": "语句列", + "ReadStatement": "读语句", + "WriteStatement": "写语句", + "ReturnStatement": "返回语句" + } + + def is_INTTK_CHARTK(self, t: (str, str)) -> bool: + if t[0] in ['INTTK', 'CHARTK']: + return True + if t[0] == "Factor" and t[1] in ['INTCON', 'CHARCON']: + return True + if t[0] == "Term" and t[1] in ['INTCON', 'CHARCON']: + return True + return False + + def is_IDENFR(self, t: (str, str)) -> bool: + if t[0] == 'IDENFR': + return True + if t[0] == "Factor" and t[1] == "IDENFR": + return True + if t[0] == "Term" and t[1] == "IDENFR": + return True + if t[0] == "Expression" and t[1] == "IDENFR": + return True + return False + + def is_Integer_CHARCON(self, t: (str, str)) -> bool: + if t[0] == 'Integer' or t[0] == 'CHARCON': + return True + if t[0] == 'Factor' and (t[1] == 'Integer' or t[1] == 'CHARCON'): + return True + if t[0] == 'Term' and (t[1] == 'Integer' or t[1] == 'CHARCON'): + return True + if t[0] == 'Expression' and (t[1] == 'Integer' or t[1] == 'CHARCON'): + return True + return False + + def is_Term(self, t: (str, str)) -> bool: + if t[0] == 'Term': + return True + if t[0] == 'Expression' and t[1] == 'Term': + return True + return False + + def is_RelationOperator(self, t: (str, str)) -> bool: + if t[0] in ['LSS', 'LEQ', 'GRE', 'GEQ', 'NEQ', 'EQL']: + return True + return False + + def is_UnsignedInteger(self, t: (str, str)) -> bool: + if t[0] == 'UnsignedInteger': + return True + if t[0] == 'Integer' and t[1] >= 0: + return True + if t[0] == 'Factor' and t[1] == 'UnsignedInteger': + return True + if t[0] == 'Term' and t[1] == 'UnsignedInteger': + return True + if t[0] == 'Expression' and t[1] == 'UnsignedInteger': + return True + return False + + # <字符串> ::= "{十进制编码为32,33,35‐126的ASCII字符}" + def String(self, lt: list[(str, str)]) -> (str, (str, str), int): + regex = r'[ -!#-~]*' + if len(lt) == 1 and lt[0][0] == "STRCON" and re.match(regex, lt[0][1]): + return "doit", ("String", lt[0][1]) + return "can't" + + # <程序> ::= [<常量说明>][<变量说明>]{<有返回值函数定义>|<无返回值函数定义>} <主函数> + def Program(self, lt: list[(str, str)]) -> (str, (str, str), int): + if len(lt) < 2: + return "can't" + if len(lt) == 2 and ( + lt[0][0] == 'FunctionDefinitionWithReturnValue' or lt[0][ + 0] == 'FunctionDefinitionWithoutReturnValue') and lt[1][ + 0] == 'MainFunction': + return "doit", ("Program", "") + if lt[0][0] == 'ConstantDeclaration' and lt[1][0] == 'VariableDeclaration': + return self.Program(lt[2:]) + if lt[0][0] == 'ConstantDefinition' or lt[1][0] == 'VariableDefinition': + return self.Program(lt[1:]) + return "can't" + + # 部分 <常量说明>: := const<常量定义>;{const<常量定义>;} + def PartOfConstantDeclaration(self, t: (str, str)) -> bool: + pt = ['CONSTTK', 'ConstantDefinition', 'SEMICN'] + return t[0] in pt or self.PartOfConstantDefinition(t) + + # <常量说明>: := const<常量定义>;{const<常量定义>;} + def ConstantDeclaration(self, lt: list[(str, str)], newadd: (str, str)) -> (str, (str, str), int): + if lt[0][0] != 'CONSTTK' or len(lt) < 3: + return "can't" + newadd = newadd[0] + if lt[0][0] == 'CONSTTK' and lt[1][0] == 'ConstantDefinition' and lt[2][0] == 'SEMICN': + i = 3 + while i < len(lt): + if lt[i][0] != 'CONSTTK': + return "part", ("ConstantDeclaration", ""), i + if i + 1 == len(lt): + if newadd == 'ConstantDefinition' or self.PartOfConstantDefinition((newadd, "")): + return "can't" + else: + return "part", ("ConstantDeclaration", ""), i + elif lt[i + 1][0] != 'ConstantDefinition': + if self.PartOfConstantDefinition(lt[i + 1]): + return "can't" + else: + return "part", ("ConstantDeclaration", ""), i + if i + 2 == len(lt): + if newadd == 'SEMICN': + return "can't" + else: + return "part", ("ConstantDeclaration", ""), i + elif lt[i + 2][0] != 'SEMICN': + return "part", ("ConstantDeclaration", ""), i + i += 3 + if newadd == 'CONSTTK': + return "can't" + else: + return "doit", ("ConstantDeclaration", "") + return "can't" + + # 部分 <常量定义>: := int<标识符>=<整数>{,<标识符>=<整数>} | char<标识符>=<字符>{,<标识符>=<字符>} + def PartOfConstantDefinition(self, t: (str, str)) -> bool: + pt = ['INTTK', 'CHARTK', 'IDENFR', 'ASSIGN', 'INTCON', 'COMMA', 'CHARCON'] + return t[0] in pt + + # <常量定义>: := int<标识符>=<整数>{,<标识符>=<整数>} | char<标识符>=<字符>{,<标识符>=<字符>} + def ConstantDefinition(self, lt: list[(str, str)], newadd: (str, str)) -> (str, (str, str), int): + if (not (self.is_INTTK_CHARTK(lt[0]))) or len(lt) < 4: + return "can't" + newadd = newadd[0] + if self.is_INTTK_CHARTK(lt[0]) and self.is_IDENFR(lt[1]) and lt[2][0] == 'ASSIGN' and self.is_Integer_CHARCON( + lt[3]): + i = 4 + while i < len(lt): + if lt[i][0] != 'COMMA': + return "part", ("ConstantDefinition", ""), i + if i + 1 == len(lt): + if newadd == 'IDENFR': + return "can't" + else: + return "part", ("ConstantDefinition", ""), i + elif not self.is_IDENFR(lt[i + 1]): + return "part", ("ConstantDefinition", ""), i + if i + 2 == len(lt): + if newadd == 'ASSIGN': + return "can't" + else: + return "part", ("ConstantDefinition", ""), i + elif lt[i + 2][0] != 'ASSIGN': + return "part", ("ConstantDefinition", ""), i + if i + 3 == len(lt): + if newadd == 'Integer' or newadd == 'CHARCON' or self.PartOfInteger((newadd, "")): + return "can't" + else: + return "part", ("ConstantDefinition", ""), i + elif not self.is_Integer_CHARCON(lt[i + 3]): + if self.PartOfInteger(lt[i + 3]): + return "can't" + return "part", ("ConstantDefinition", ""), i + i += 4 + if newadd == 'COMMA': + return "can't" + return "doit", ("ConstantDefinition", "") + return "can't" + + # 部分 <无符号整数>: := <非零数字>{<数字>} | 0 + def PartOfUnsignedInteger(self, t: (str, str)) -> bool: + return t[0] == 'INTCON' + + # <无符号整数>: := <非零数字>{<数字>} | 0 + def UnsignedInteger(self, lt: list[(str, str)]) -> (str, (str, str), int): + if len(lt) < 1: + return "can't" + if lt[0][0] == 'INTCON': + if lt[0][1][0] == '0' and len(lt[0][1]) != 1: + return "can't" + return "doit", ("UnsignedInteger", lt[0][1]) + return "can't" + + # 部分 <整数> ::= [+|-]<无符号整数> + def PartOfInteger(self, t: (str, str)) -> bool: + pt = ['UnsignedInteger', 'PLUS', 'MINU'] + return t[0] in pt or self.PartOfUnsignedInteger(t) + + # <整数> ::= [+|-]<无符号整数> + def Integer(self, lt: list[(str, str)]) -> (str, (str, str), int): + if len(lt) < 1: + return "can't" + if lt[0][0] == 'UnsignedInteger': + return "doit", ("Integer", lt[0][1]) + if len(lt) >= 2 and lt[0][0] in ["MINU", "PLUS"] and lt[1][0] == 'UnsignedInteger': + if len(lt) == 2: + return "doit", ('Integer', (-1 if lt[0][0] == "MINU" else 1) * lt[1][1]) + else: + return "part", ("Integer", (-1 if lt[0][0] == "MINU" else 1) * lt[1][1]), 2 + return "can't" + + # 部分 <声明头部> ::= int<标识符> |char<标识符> + def PartOfDeclarationHeader(self, t: (str, str)) -> bool: + pt = ['INTTK', 'CHARTK'] + return t[0] in pt or self.is_IDENFR(t) + + # <声明头部> ::= int<标识符> |char<标识符> + def DeclarationHeader(self, lt: list[(str, str)], newadd: (str, str)) -> (str, (str, str), int): + if len(lt) < 2: + return "can't" + newadd = newadd[0] + if (lt[0][0] == 'INTTK' or lt[0][0] == 'CHARTK') and self.is_IDENFR(lt[1]): + if len(lt) == 2: + if newadd == 'LPARENT': + return "doit", ("DeclarationHeader", "") + elif lt[2][0] == 'LPARENT': + return "doit", ("DeclarationHeader", "") + return "can't" + + # <变量说明>: := <变量定义>;{<变量定义>;} + def VariableDeclaration(self, lt: list[(str, str)], newadd: (str, str)) -> (str, (str, str), int): + def is_seq(lt: list[(str, str)]) -> str: + if len(lt) == 1 and lt[0][0] == 'VariableDefinition': + return "can't" + if lt[0][0] == 'VariableDefinition' and lt[1][0] == 'SEMICN': + if len(lt) == 2: + return "doit" + else: + return "part" + if lt[0][0] != 'VariableDefinition' or lt[1][0] != 'SEMICN': + if self.PartOfTYPEIDENFR(lt[0][0]): + return "can't" + return "not" + + if len(lt) < 2: + return "can't" + if lt[0][0] != 'VariableDefinition' or lt[1][0] != 'SEMICN': + return "can't" + newadd = newadd[0] + i = 2 + while i < len(lt): + match is_seq(lt[i:]): + case "doit": + if newadd == 'VariableDefinition' or self.PartOfVariableDefinition((newadd, "")): + return "can't" + else: + return "doit", ("VariableDeclaration", "") + case "part": + i += 2 + case "not": + if self.PartOfVariableDefinition((newadd, "")) or newadd == 'SEMICN': + return "can't" + return "part", ("VariableDeclaration", ""), i + case "can't": + return "can't" + if newadd == 'VariableDefinition' or self.PartOfVariableDefinition((newadd, "")): + return "can't" + else: + return "doit", ("VariableDeclaration", "") + + # 部分 <类型标识符> ::= int | char + def PartOfTYPEIDENFR(self, t: (str, str)) -> bool: + lt = ['INTTK', 'CHARTK'] + return t[0] in lt + + # 部分 <变量定义> ::= <类型标识符>(<标识符>|<标识符>'['<无符号整数>']'){,(<标识符>|<标识符>'['<无符号整数>']' )} //<无符号整数>表示数组元素的个数,其值需大于0 + def PartOfVariableDefinition(self, t: (str, str)) -> bool: + lt = ['UnsignedInteger', 'LBRACK', 'RBRACK'] + return t in lt or self.is_IDENFR(t) or self.PartOfTYPEIDENFR(t) + + # <变量定义> ::= <类型标识符>(<标识符>|<标识符>'['<无符号整数>']'){,(<标识符>|<标识符>'['<无符号整数>']' )} //<无符号整数>表示数组元素的个数,其值需大于0 + def VariableDefinition(self, lt: list[(str, str)], newadd: (str, str), last: (str, str)) -> (str, (str, str), int): + def is_seq(lt: list[(str, str)]) -> str: # 3 for long_assign 2 for short_assign, 1 for part, 0 for not + ls = ['IDENFR', 'LBRACK', 'UnsignedInteger', 'RBRACK'] + if self.is_IDENFR(lt[0]): + return 'not' + if len(lt) == 1 and self.is_IDENFR(lt[0]): + return "doit" + if len(lt) == 4 and self.is_IDENFR(lt[0]) and lt[1][0] == 'LBRACK' and self.is_UnsignedInteger(lt[2]) and \ + lt[3][0] == 'RBRACK': + return "doit" + if len(lt) >= 4 and self.is_IDENFR(lt[0]) and lt[1][0] == 'LBRACK' and self.is_UnsignedInteger(lt[2]) and \ + lt[3][0] == 'RBRACK': + return "part2" + if len(lt) >= 1 and self.is_IDENFR(lt[0]) and lt[1][0] != 'LBRACK': + return "part1" + bl = True + for i in range(len(lt)): + if lt[i][0] != ls[i]: + bl = False + break + if bl: + return "can't" + else: + return "not" + + if len(lt) < 2 or (lt[0][0] != 'INTTK' and lt[0][0] != 'CHARTK'): + return "can't" + if last[0] == 'CONSTTK' or last[0] == 'COMMA' or last[0] == 'LPARENT': + return "can't" + + newadd = newadd[0] + i = 0 + if len(lt) >= 5: + if (lt[0][0] == 'INTTK' or lt[0][0] == 'CHARTK') and self.is_IDENFR(lt[1]) and lt[2][0] == 'LBRACK' and \ + self.is_UnsignedInteger(lt[3]) and lt[4][0] == 'RBRACK': + i = 5 + elif (lt[0][0] == 'INTTK' or lt[0][0] == 'CHARTK') and self.is_IDENFR(lt[1]): + i = 2 + while i < len(lt): + if lt[i][0] != 'COMMA': + return "part", ("VariableDefinition", ""), i + i += 1 + while i < len(lt): + match is_seq(lt[i:]): + case "doit": + if newadd == 'COMMA': + return "can't" + else: + return "doit", ("VariableDefinition", "") + case "part1": + if lt[i + 1] == 'COMMA': + i = i + 2 + else: + return "part", ("VariableDefinition", ""), i + 1 + case "part2": + if lt[i + 4] == 'COMMA': + i = i + 5 + else: + return "part", ("VariableDefinition", ""), i + 4 + case "can't": + return "can't" + case "not": + return "part", ("VariableDefinition", ""), i - 1 + if newadd == 'IDENFR': + return "can't" + else: + return "part", ("VariableDefinition", ""), i - 1 + if newadd in ['COMMA', 'LBRACK']: + return "can't" + else: + return "doit", ("VariableDefinition", "") + + # <有返回值函数定义> ::= <声明头部>'('<参数表>')' '{'<复合语句>'}' + def FunctionDefinitionWithReturnValue(self, lt: list[(str, str)]) -> (str, (str, str), int): + if len(lt) < 7: + return "can't" + if lt[0][0] == 'DeclarationHeader' and lt[1][0] == 'LPARENT' and lt[2][0] == 'ParameterList' and lt[3][ + 0] == 'RPARENT' and lt[4][0] == 'LBRACE' and lt[5][0] == 'CompoundStatement' and lt[6][0] == 'RBRACE': + if len(lt) == 7: + return "doit", ("FunctionDefinitionWithReturnValue", "") + else: + return "part", ("FunctionDefinitionWithReturnValue", ""), 7 + return "can't" + + # <无返回值函数定义> ::= void<标识符>'('<参数表>')''{'<复合语句>'}' + def FunctionDefinitionWithoutReturnValue(self, lt: list[(str, str)]) -> (str, (str, str), int): + if len(lt) < 8: + return "can't" + if lt[0][0] == 'VOIDTK' and self.is_IDENFR(lt[1]) and lt[2][0] == 'LPARENT' and lt[3][0] == 'ParameterList' and \ + lt[4][0] == 'RPARENT' and lt[5][0] == 'LBRACE' and lt[6][0] == 'CompoundStatement' and lt[7][ + 0] == 'RBRACE': + if len(lt) == 8: + return "doit", ("FunctionDefinitionWithoutReturnValue", "") + else: + return "part", ("FunctionDefinitionWithoutReturnValue", ""), 8 + return "can't" + + # <复合语句> ::= [<常量说明>][<变量说明>]<语句列> + def CompoundStatement(self, lt: list[(str, str)]) -> (str, (str, str), int): + if len(lt) < 1: + return "can't" + if lt[0][0] == 'StatementList': + if len(lt) == 1: + return "doit", ("CompoundStatement", "") + else: + return "part", ("CompoundStatement", ""), 1 + if len(lt) < 2: + return "can't" + if (lt[0][0] == 'ConstantDeclaration' or lt[0][0] == 'VariableDeclaration') and lt[1][0] == 'StatementList': + if len(lt) == 2: + return "doit", ("CompoundStatement", "") + else: + return "part", ("CompoundStatement", ""), 2 + if len(lt) < 3: + return "can't" + if lt[0][0] == 'ConstantDeclaration' and lt[1][0] == 'VariableDeclaration' and lt[2][0] == 'StatementList': + if len(lt) == 3: + return "doit", ("CompoundStatement", "") + else: + return "part", ("CompoundStatement", ""), 3 + return "can't" + + # <参数表> ::= <类型标识符><标识符>{,<类型标识符><标识符>}| <空> + def ParameterList(self, lt: list[(str, str)], newadd: (str, str), last: (str, str)) -> (str, (str, str), int): + def is_seq(lt: list[(str, str)]) -> str: + ls = ['COMMA', ['INTTK', 'CHARTK'], 'IDENFR'] + for i in range(len(lt)): + if i >= len(ls): + return "full" + if i == 1: + if lt[i][0] not in ls[i]: + return "not" + elif i == 2: + if not self.is_IDENFR(lt[i]): + return "not" + else: + if lt[i][0] != ls[i]: + return "not" + if len(lt) < 3: + return "can't" + else: + return "doit" + + if len(lt) < 2 or last[0] != 'LPARENT' or newadd[0] != 'RPARENT': + return "can't" + + if lt[0][0] not in ["INTTK", "CHARTK"] or not self.is_IDENFR(lt[1]): + return "can't" + i = 2 + while i < len(lt): + match is_seq(lt[i:]): + case "doit": + return "doit", ("ParameterList", "") + case "can't": + return "can't" + case "not": + return "can't" + case "full": + i += 3 + return "doit", ("ParameterList", "") + + # <主函数> ::= void main‘(’‘)’ ‘{’<复合语句>‘}’ + def MainFunction(self, lt: list[(str, str)]) -> (str, (str, str), int): + if len(lt) < 7: + return "can't" + if lt[0][0] == 'VOIDTK' and lt[1][0] == 'MAINTK' and lt[2][0] == 'LPARENT' and lt[3][0] == 'RPARENT' and \ + lt[4][0] == 'LBRACE' and lt[5][0] == 'CompoundStatement' and lt[6][0] == 'RBRACE': + if len(lt) == 7: + return "doit", ("MainFunction", "") + else: + return "part", ("MainFunction", ""), 7 + return "can't" + + # <表达式>: := [+|-]<项>{<加法运算符><项>} // [+ |‐]只作用于第一个 < 项 > + def Expression(self, lt: list[(str, str)], newadd: (str, str)) -> (str, (str, str), int, bool, list[int], bool): + def is_seq(lt: list[(str, str)]) -> str: + ls = [['PLUS', 'MINU'], 'Term'] + if len(lt) == 1 and lt[0][0] in ls[0]: + return "part" + if len(lt) >= 2 and lt[0][0] in ls[0] and lt[1][0] == ls[1]: + if len(lt) == 2: + return "doit" + if len(lt) > 2: + return "full" + if lt[0][0] not in ls[1]: + return "not" + if len(lt) >= 2 and lt[0][0] in ls[0] and lt[1][0] != ls[1]: + return "not" + + if len(lt) < 1: + return "can't" + i = 0 + if len(lt) >= 2 and lt[0][0] in ['PLUS', 'MINU'] and self.is_Term(lt[1]): + i = 2 + elif self.is_Term(lt[0]): + i = 1 + else: + return "can't" + newadd = newadd[0] + while i < len(lt): + match is_seq(lt[i:]): + case "doit": + if newadd in ["PLUS", "MINU"]: + return "can't" + else: + return "doit", ("Expression", "Term"), False, [t for t in range(i + 2) if + lt[t][0] == "Expression"], False + case "part": + if newadd == "Term" or (i == 1 and lt[0][0] == "Expression"): + return "can't" + else: + return "part", ("Expression", "Term"), i, False + case "full": + i += 2 + case "not": + if i == 1 and lt[0][0] == "Expression": + return "can't" + return "part", ("Expression", "Term"), i, False, [t for t in range(i) if + lt[t][0] == "Expression"], False + if newadd in ["PLUS", "MINU"]: + return "can't" + else: + if lt[i - 1][0] == "Expression": + return "can't" + return "doit", ("Expression", lt[i - 1][1]), False + + # <项>: := <因子>{<乘法运算符><因子>} + def Term(self, lt: list[(str, str)], newadd: (str, str)) -> (str, (str, str), int, bool): + def is_seq(lt: list[(str, str)]) -> str: + ls = ['Factor', ['MULT', 'DIV', 'MOD']] + if len(lt) == 1 and lt[0][0] == ls[0]: + return "part" + if len(lt) >= 2 and lt[0][0] == ls[0] and lt[1][0] in ls[1]: + if len(lt) == 2: + return "doit" + if len(lt) > 2: + return "full" + if lt[0][0] != ls[0]: + return "notA" + if len(lt) >= 2 and lt[0][0] == ls[0] and lt[1][0] not in ls[1]: + return "notB" + + if len(lt) < 1: + return "can't" + i = 0 + if lt[0][0] == 'Factor': + i = 1 + else: + return "can't" + while i < len(lt): + match is_seq(lt[i:]): + case "doit": + if newadd in ['MULT', 'DIV', 'MOD']: + return "can't" + else: + return "doit", ("Term", ""), False + case "part": + if self.PartOfFactor((newadd, "")): + return "can't" + else: + return "part", ("Term", lt[0][1] if i == 1 else ""), i, False + case "full": + i += 2 + case "notA": + if self.PartOfFactor((lt[i], "")): + return "can't" + return "part", ("Term", lt[0][1] if i == 1 else ""), i, False + case "notB": + return "part", ("Term", lt[0][1] if i == 1 else ""), i, False + if newadd in ['MULT', 'DIV', 'MOD']: + return "can't" + else: + return "doit", ("Term", lt[0][1]), False + + # 部分 <因子>: := <标识符>|<标识符>'['<表达式>']' | '('<表达式>')'|<整数> | <字符>|<有返回值函数调用语句> + def PartOfFactor(self, t: (str, str)) -> bool: + pt = ['IDENFR', 'LPARENT', 'RPARENT', 'LBRACK', 'RBRACK', 'Integer', 'CHARCON', + 'FunctionDefinitionWithReturnValue'] + return t[0] in pt + + # <因子>: := <标识符>|<标识符>'['<表达式>']' | '('<表达式>')'|<整数> | <字符>|<有返回值函数调用语句> + def Factor(self, lt: list[(str, str)]) -> (str, (str, str), int): + if len(lt) < 1: + return "can't" + if len(lt) > 4: + if self.is_IDENFR(lt[0]) and lt[1][0] == 'LBRACK' and lt[2][0] == 'Expression' and lt[3][0] == 'RBRACK': + if len(lt) == 4: + return "doit", ("Factor", "") + else: + return "part", ("Factor", ""), 4 + elif len(lt) > 3: + if lt[0][0] == 'LPARENT' and lt[1][0] == 'Expression' and lt[2][0] == 'RPARENT': + if len(lt) == 3: + return "doit", ("Factor", "") + else: + return "part", ("Factor", ""), 3 + elif lt[0][0] in ["IDENFR", "Integer", "CHARCON", "FunctionCallWithReturnValue"]: + if len(lt) == 1: + return "doit", ("Factor", lt[0][0]) + return "part", ("Factor", lt[0][0]), 1 + return "can't" + + # 部分 <语句> ::= <条件语句>|<循环语句>| '{'<语句列>'}'| <有返回值函数调用语句>; |<无返回值函数调用语句>;|<赋值语句>;|<读语句>;|<写语句>;|<空>;|<返回语句>; + def PartOfStatement(self, t: (str, str)) -> bool: + pt = ['ConditionStatement', 'LoopStatement', 'LBRACE', 'StatementList', 'RBRACE', 'FunctionCallWithReturnValue', + 'FunctionCallWithoutReturnValue', 'AssignmentStatement', 'ReadStatement', 'WriteStatement', 'SEMICN', + 'ReturnStatement', 'SEMICN'] + return t in pt + + # <语句> ::= <条件语句>|<循环语句>| '{'<语句列>'}'| <有返回值函数调用语句>; |<无返回值函数调用语句>;|<赋值语句>;|<读语句>;|<写语句>;|<空>;|<返回语句>; + def Statement(self, lt: list[(str, str)], _: (str, str), last: (str, str), last_of_last: (str, str)) -> ( + str, (str, str), int): + if len(lt) < 1: + return "can't" + if last_of_last[0] == "FORTK": + return "can't" + if lt[0][0] == 'ConditionStatement': + if len(lt) == 1: + return "doit", ("Statement", "") + else: + return "part", ("Statement", ""), 1 + if lt[0][0] == 'LoopStatement': + if len(lt) == 1: + return "doit", ("Statement", "") + else: + return "part", ("Statement", ""), 1 + if len(lt) > 2 and lt[0][0] == 'LBRACE' and lt[1][0] == 'StatementList' and lt[2][0] == 'RBRACE': + if len(lt) == 3: + return "doit", ("Statement", "") + else: + return "part", ("Statement", ""), 3 + if len(lt) >= 2 and ( + lt[0][0] == 'FunctionCallWithReturnValue' or lt[0][0] == 'FunctionCallWithoutReturnValue' or lt[0][ + 0] == 'AssignmentStatement' or lt[0][0] == 'ReadStatement' or lt[0][0] == 'WriteStatement' or lt[0][ + 0] == 'ReturnStatement') and lt[1][0] == 'SEMICN': + if len(lt) == 2: + return "doit", ("Statement", "") + else: + return "part", ("Statement", ""), 1 + if lt[0][0] == 'SEMICN' and last[0] not in ["VariableDefinition", "ConstantDefinition", "Expression", + "ConditionStatement", "Condition"]: + if len(lt) == 1: + return "doit", ("Statement", "") + else: + return "part", ("Statement", ""), 1 + return "can't" + + # <赋值语句> ::= <标识符>=<表达式>|<标识符>'['<表达式>']'=<表达式> + def AssignmentStatement(self, lt: list[(str, str)], newadd: (str, str), last: (str, str), + last_of_last: (str, str)) -> (str, (str, str), int, bool, list[int]): + if len(lt) < 3 or self.PartOfTYPEIDENFR(last) or last_of_last[0] == "FORTK": + return "can't" + if self.is_IDENFR(lt[0]) and lt[1][0] == 'ASSIGN' and lt[2][0] == 'Expression': + if len(lt) == 3: + if newadd in ['PLUS', 'MINU']: + return "can't" + return "doit", ("AssignmentStatement", ""), False, ([0] if lt[0][0] != "IDENFR" else []) + else: + if lt[3][0] in ['PLUS', 'MINU']: + return "can't" + return "part", ("AssignmentStatement", ""), 3, False, ([0] if lt[0][0] != "IDENFR" else []) + if len(lt) >= 6 and self.is_IDENFR(lt[0]) and lt[1][0] == 'LBRACK' and lt[2][0] == 'Expression' and lt[3][ + 0] == 'RBRACK' and lt[4][0] == 'ASSIGN' and lt[5][0] == 'Expression': + if len(lt) == 6: + if newadd in ['PLUS', 'MINU']: + return "can't" + return "doit", ("AssignmentStatement", ""), False, ([0] if lt[0][0] != "IDENFR" else []) + else: + if lt[6][0] in ['PLUS', 'MINU']: + return "can't" + return "part", ("AssignmentStatement", ""), 6, False, ([0] if lt[0][0] != "IDENFR" else []) + return "can't" + + # <条件语句> ::= if '('<条件>')'<语句>[else<语句>] + def ConditionalStatement(self, lt: list[(str, str)], newadd: (str, str)) -> (str, (str, str), int): + if len(lt) < 5: + return "can't" + if lt[0][0] != "IFTK" or lt[1][0] != "LPARENT" or lt[2][0] != "Condition" or lt[3][0] != "RPARENT" or lt[4][ + 0] != "Statement": + return "can't" + if len(lt) == 5: + if newadd == "ELSETK": + return "can't" + return "doit", ("ConditionalStatement", "") + if len(lt) >= 7 and lt[5][0] == "ELSETK" and lt[6][0] == "Statement": + if len(lt) == 7: + return "doit", ("ConditionalStatement", "") + else: + return "part", ("ConditionalStatement", ""), 7 + if len(lt) == 6: + if lt[5][0] != "ELSETK": + return "part", ("ConditionalStatement", ""), 5 + if newadd[0] == "Statement" or self.PartOfStatement(newadd): + return "can't" + return "part", ("ConditionalStatement", ""), 5 + + # <条件> ::= <表达式><关系运算符><表达式> |<表达式> //整型表达式之间才能进行关系运算 //表达式为整型,其值为0条件为假,值不为0时条件为真 + def Condition(self, lt: list[(str, str)], newadd: (str, str), last: (str, str), last_of_last: (str, str)) -> ( + str, (str, str), int): + if len(lt) < 1: + return "can't" + + if not ((last[0] == 'LPARENT' and newadd[0] == 'RPARENT' and last_of_last[0] == "IFTK") or ( + last[0] == 'LPARENT' and newadd[0] == 'RPARENT' and last_of_last[0] == "WHILETK") or ( + last[0] == 'SEMICN' and newadd[0] == 'SEMICN' and last_of_last[0] == "Expression")): + return "can't" + if lt[0][0] == "Expression": + if len(lt) == 1: + return "doit", ("Condition", "") + if len(lt) == 3 and self.is_RelationOperator(lt[1]) and lt[2][0] == "Expression": + return "doit", ("Condition", "") + return "can't" + return "can't" + + # <循环语句> ::= while '('<条件>')'<语句>| do<语句>while '('<条件>')' |for'('<标识符>=<表达式>;<条件>;<标识符>=<标识符>(+|‐)<步长>')'<语句> + def LoopStatement(self, lt: list[(str, str)]) -> (str, (str, str), int): + if len(lt) < 5: + return "can't" + if lt[0][0] == "WHILETK" and lt[1][0] == "LPARENT" and lt[2][0] == "Condition" and lt[3][0] == "RPARENT" and \ + lt[4][0] == "Statement": + if len(lt) == 5: + return "doit", ("LoopStatement", "") + else: + return "part", ("LoopStatement", ""), 5 + if len(lt) < 6: + return "can't" + if lt[0][0] == "DOTK" and lt[1][0] == "Statement" and lt[2][0] == "WHILETK" and lt[3][0] == "LPARENT" and lt[4][ + 0] == "Condition" and lt[5][0] == "RPARENT": + if len(lt) == 6: + return "doit", ("LoopStatement", "") + else: + return "part", ("LoopStatement", ""), 6 + if len(lt) < 15: + return "can't" + if lt[0][0] == "FORTK" and lt[1][0] == "LPARENT" and self.is_IDENFR(lt[2]) and lt[3][0] == "ASSIGN" and lt[4][ + 0] == "Expression" and lt[5][0] == "SEMICN" and lt[6][0] == "Condition" and lt[7][ + 0] == "SEMICN" and self.is_IDENFR([8]) and lt[9][0] == "ASSIGN" and self.is_IDENFR(lt[10]) and lt[11][ + 0] in ['PLUS', 'MINU'] and lt[12] == "Step" and lt[13][0] == "RPARENT" and lt[14][0] == "Statement": + if len(lt) == 15: + return "doit", ("LoopStatement", "") + else: + return "part", ("LoopStatement", ""), 15 + return "can't" + + # <步长>::= <无符号整数> + def Step(self, lt: list[(str, str)], newadd: (str, str), last: (str, str), last_of_last: (str, str)) -> ( + str, (str, str), int): + if len(lt) < 1: + return "can't" + if last[0] not in ["PLUS", "MINU"] or not self.is_IDENFR(last_of_last): + return "can't" + if lt[0][0] == "UnsignedInteger": + if len(lt) == 1: + if newadd == "RPARENT": + return "doit", ("Step", "") + else: + if lt[2] == "RPARENT": + return "part", ("Step", ""), 1 + return "can't" + + # <有返回值函数调用语句>: := <标识符>'('<值参数表>')' + def FunctionCallWithReturnValue(self, lt: list[(str, str)], newadd: (str, str), last: (str, str)) -> ( + str, (str, str), int): + if len(lt) < 4: + return "can't" + if len(lt) >= 4 and self.is_IDENFR(lt[0]) and lt[1][0] == "LPARENT" and lt[2][0] == "ValueParameterList" and \ + lt[3][0] == "RPARENT": + if len(lt) == 4: + return "doit", ("FunctionCallWithReturnValue", "") + else: + return "part", ("FunctionCallWithReturnValue", ""), 4 + return "can't" + + # <无返回值函数调用语句> ::= <标识符>'('<值参数表>')' 无 + # <值参数表> ::= <表达式>{,<表达式>}|<空> + def ValueParameterList(self, lt: list[(str, str)], newadd: (str, str), last: (str, str), + last_of_last: (str, str)) -> (str, (str, str), int, bool): + def is_seq(lt: list[(str, str)]) -> str: + ls = ['COMMA', 'Expression'] + if lt[0][0] != ls[0]: + return "not" + if len(lt) == 1 and lt[0][0] == ls[0]: + return "can't" + if len(lt) >= 2 and lt[0][0] == ls[0] and lt[1][0] == ls[1]: + if len(lt) == 2: + return "doit" + if len(lt) > 2: + return "full" + else: + return "not" + + if len(lt) < 1: + return "can't" + if lt[0][0] != "Expression" or newadd[0] != "RPARENT" or last[0] != "LPARENT" or last_of_last[0] == "RETURNTK": + return "can't" + i = 1 + while i < len(lt): + match is_seq(lt[i:]): + case "doit": + if newadd == "COMMA": + return "can't" + return "doit", ("ValueParameterList", ""), False + case "full": + i += 2 + case "not": + if lt[i][0] == "RPARENT": + return "part", ("ValueParameterList", ""), i, False + return "can't" + case "can't": + return "can't" + return "can't" + + # <语句列> ::= {<语句>} + def StatementList(self, lt: list[(str, str)], newadd: (str, str), last: (str, str)) -> (str, (str, str), int): + if len(lt) < 1: + return "can't" + if last[0] != "LBRACE": + return "can't" + for i in range(len(lt)): + if lt[i][0] != "Statement": + if lt[i][0] == "RBRACE": + return "part", ("StatementList", ""), i + return "can't" + if newadd[0] == "RBRACE": + return "doit", ("StatementList", ""), i + else: + return "can't" + + # <读语句> ::= scanf '('<标识符>{,<标识符>}')' + def ReadStatement(self, lt: list[(str, str)]) -> (str, (str, str), int): + if len(lt) < 4: + return "can't" + if lt[0][0] != "SCANFTK": + return "can't" + if lt[0][0] == "SCANFTK" and lt[1][0] == "LPARENT" and self.is_IDENFR(lt[2]): + for i in range(3, len(lt)): + if not self.is_IDENFR(lt[i]): + if lt[i] == "RPARENT": + if i == len(lt) - 1: + return "doit", ("ReadStatement", "") + return "part", ("ReadStatement", ""), i + return "can't" + return "can't" + + # <写语句> ::= printf '(' <字符串>,<表达式> ')'| printf '('<字符串> ')'| printf '('<表达式>')' + def WriteStatement(self, lt: list[(str, str)]) -> (str, (str, str), int, bool): + if len(lt) < 4: + return "can't" + if lt[0][0] != "PRINTFTK": + return "can't" + if lt[0][0] == "PRINTFTK" and lt[1][0] == "LPARENT" and (lt[2][0] == "String" or lt[2][0] == "Expression") and \ + lt[3][0] == "RPARENT": + if len(lt) == 4: + return "doit", ("WriteStatement", ""), False + else: + return "part", ("WriteStatement", ""), 4, False + if len(lt) >= 6 and lt[0][0] == "PRINTFTK" and lt[1][0] == "LPARENT" and lt[2][0] == "String" and lt[3][ + 0] == "COMMA" and lt[4][0] == "Expression" and lt[5][0] == "RPARENT": + if len(lt) == 6: + return "doit", ("WriteStatement", ""), False + else: + return "part", ("WriteStatement", ""), 6, False + return "can't" + + # <返回语句> ::= return['('<表达式>')'] + def ReturnStatement(self, lt: list[(str, str)], addnew: (str, str)) -> (str, (str, str), int, bool): + if len(lt) < 1: + return "can't" + if lt[0][0] != "RETURNTK": + return "can't" + if len(lt) == 1: + if addnew[0] == "LPARENT": + return "can't" + return "doit", ("ReturnStatement", ""), False + if len(lt) == 4 and lt[1][0] == "LPARENT" and lt[2][0] == "Expression" and lt[3][0] == "RPARENT": + return "doit", ("ReturnStatement", ""), False + return "can't" + + +class GrammaticalAnalysis: + keyword = { + "const": "CONSTTK", + "int": "INTTK", + "char": "CHARTK", + "void": "VOIDTK", + "main": "MAINTK", + "if": "IFTK", + "else": "ELSETK", + "do": "DOTK", + "while": "WHILETK", + "for": "FORTK", + "continue": "CONTINUETK", + "break": "BREAKTK", + "scanf": "SCANFTK", + "printf": "PRINTFTK", + "return": "RETURNTK"} + symbol = { + "+": "PLUS", + "-": "MINU", + "*": "MULT", + "/": "DIV", + "%": "MOD", + "&&": "AND", + "||": "OR", + ",": "COMMA", + ";": "SEMICN", + "(": "LPARENT", + ")": "RPARENT", + "[": "LBRACK", + "]": "RBRACK", + "{": "LBRACE", + "}": "RBRACE", + "=": "ASSIGN", + "<": "LSS", + "<=": "LEQ", + ">": "GRE", + ">=": "GEQ", + "!=": "NEQ", + "==": "EQL", + "!": "NOT" + } + regex = { + r'^[a-zA-Z][a-zA-Z0-9]*$': "IDENFR", + r'^\d+$': "INTCON", + r'^".*"$': "STRCON", + r"^'.'$": "CHARCON" + } + yacc = Yacc() + patterns = [] + seq = 0 + final_ans = [] + + def __init__(self): + self.patterns = [method for method in dir(Yacc) if + callable(getattr(Yacc, method)) and not method.startswith("__") and not method.startswith( + "PartOf") and not method.startswith("is_")] + + def readfile(self, filename: str) -> str: + s = '' + with open(filename, 'r', encoding='utf-8') as f: + s = ''.join(f.readlines()) + return s + + def strsplit(self, s: str) -> list[str]: + lt = [] + l = 0 + r = 1 + while r < len(s): + if not (s[r].isalpha() or s[r].isdigit()): + k = s[l:r].strip() + if len(k) != 0: + lt.append(k) + k = s[r].strip() + if len(k) != 0: + lt.append(k) + l = r + 1 + r = l + else: + r += 1 + if l < len(s): + lt.append(s[l:r]) + i = 0 + # 修整 符号配对 + 字符串查找 + while i < len(lt) - 1: + if lt[i] in self.symbol and lt[i] + lt[i + 1] in self.symbol: + lt[i] = lt[i] + lt[i + 1] + lt.pop(i + 1) + elif lt[i] == '"': + i += 1 + while i < len(lt): + lt[i - 1] += " " + lt[i] + lt.pop(i) + if lt[i - 1][-1] == '"': + i -= 1 + break + elif lt[i] == '\'': + if i + 2 < len(lt) and lt[i + 2] == '\'': + lt[i] += lt[i + 1] + lt[i + 2] + lt.pop(i + 1) + lt.pop(i + 1) + i += 1 + return lt + + def final_ans_insert(self, lt: list[int], newadd: (int, (str, str)), delete: bool = True, + force_del: list[int] = [], cascade_delete: bool = True) -> None: + i = 0 + + def cas_del(t: int): + dele = 0 + while t != -1: + for k in self.final_ans: + if k[0] == t: + if k[1][0] not in ['Factor', 'Term', 'Expression']: + t = -1 + break + t = k[2] if len(k) >= 3 and cascade_delete else -1 + dele += 1 + self.final_ans.remove(k) + break + return dele + + while i < len(self.final_ans) and len(lt) > 0: + if self.final_ans[i][0] in lt: + seq = self.final_ans[i][0] + if len(lt) == 1: + if delete == False: + self.final_ans.insert(i + 1, (newadd[0], newadd[1], lt[0])) + else: + self.final_ans.insert(i + 1, newadd) + if (delete and self.final_ans[i][1][0] in ['Factor', 'Term', 'Expression']) or \ + self.final_ans[i][0] in force_del: + i -= cas_del(self.final_ans[i][0]) + lt.remove(seq) + i += 1 + + # 规约!! + def reduce(self, ans: list[(int, (str, str), any)], newadd: (str, str)) -> list[(int, str)]: + changed = True + while changed: + changed = False + for i in range(len(ans)): + no_index_ans = [x[1] for x in ans] + tokens = no_index_ans[i:] + redo = False + for method_name in self.patterns: + method = getattr(self.yacc, method_name) + if callable(method): + result = [] + r = len(inspect.getfullargspec(method).args) + match r: + case 5: + result = method(tokens, newadd, (-1, -1) if i == 0 else no_index_ans[i - 1], + (-1, -1) if i <= 1 else no_index_ans[i - 2]) + case 4: + result = method(tokens, newadd, (-1, -1) if i == 0 else no_index_ans[i - 1]) + case 3: + result = method(tokens, newadd) + case 2: + result = method(tokens) + match result[0]: + case "doit": + # print(f"<{self.yacc.translation_dict[result[1][0]]}>") + match len(result): + case 5: + self.final_ans_insert([x[0] for x in ans[i:]], (self.seq, result[1]), result[2], + [ans[i + x][0] for x in result[3]], result[4]) + case 4: + self.final_ans_insert([x[0] for x in ans[i:]], (self.seq, result[1]), result[2], + [ans[i + x][0] for x in result[3]]) + case 3: + self.final_ans_insert([x[0] for x in ans[i:]], (self.seq, result[1]), result[2]) + case 2: + self.final_ans_insert([x[0] for x in ans[i:]], (self.seq, result[1])) + case _: + assert False + ans = ans[:i] + ans.append((self.seq, result[1])) + self.seq += 1 + changed = True + redo = True + break + case "part": + # print(f"<{self.yacc.translation_dict[result[1][0]]}>") + match len(result): + case 6: + self.final_ans_insert([x[0] for x in ans[i:i + result[2]]], + (self.seq, result[1]), result[3], result[4], result[5]) + case 5: + self.final_ans_insert([x[0] for x in ans[i:i + result[2]]], + (self.seq, result[1]), result[3], result[4]) + case 4: + self.final_ans_insert([x[0] for x in ans[i:i + result[2]]], + (self.seq, result[1]), result[3]) + case 3: + self.final_ans_insert([x[0] for x in ans[i:i + result[2]]], + (self.seq, result[1])) + case _: + assert False + # 到i截止 + temp = ans[i + result[2]:] + ans = ans[:i] + ans.append((self.seq, result[1])) + self.seq += 1 + ans += temp + changed = True + redo = True + break + case "can't": + pass + if redo: + break + return ans + + def WordAnalyze(self, lt: list[str]) -> list[(int, str)]: + ans = [] + i = 0 + while i < len(lt): + s = lt[i] + if s in self.keyword: + temp = (self.keyword[s], s) + ans = self.reduce(ans, temp) + ans.append((self.seq, temp)) + self.final_ans.append((self.seq, temp)) + self.seq += 1 + # print(self.keyword[s], s) + elif s in self.symbol: + temp = (self.symbol[s], s) + ans = self.reduce(ans, temp) + ans.append((self.seq, temp)) + self.final_ans.append((self.seq, temp)) + self.seq += 1 + # print(self.symbol[s], s) + else: + no_answer = True + for key, value in self.regex.items(): + if re.match(key, s): + if value == 'CHARCON' or value == 'STRCON': + s = s[1:-1] + temp = (value, s) + ans = self.reduce(ans, temp) + ans.append((self.seq, temp)) + self.final_ans.append((self.seq, temp)) + self.seq += 1 + # print(value, s) + no_answer = False + break + if no_answer: + ans.append(('error', 'error')) + self.final_ans.append((self.seq, temp)) + i += 1 + ans = self.reduce(ans, ('', '')) + return ans + + def result_print(self) -> None: + with open('output.txt', 'w', encoding='utf-8') as f: + for vals in self.final_ans: + vals = vals[1] + if vals[0] in self.symbol.values() or vals[0] in self.keyword.values() or vals[ + 0] in self.regex.values(): + print(vals[0], vals[1]) + f.write(f'{vals[0]} {vals[1]}\n') + else: + print(f"<{self.yacc.translation_dict[vals[0]]}>") + f.write(f"<{self.yacc.translation_dict[vals[0]]}>\n") + return + + def f(self) -> None: + s = self.readfile('testfile.txt') + s = self.strsplit(s) + s = self.WordAnalyze(s) + self.result_print() + print(s) + + +GrammaticalAnalysis().f() diff --git a/RE2NFA.py b/RE2NFA.py new file mode 100644 index 0000000..e216078 --- /dev/null +++ b/RE2NFA.py @@ -0,0 +1,168 @@ +import functools + + +class Edge: + # 符号 开始状态 结束状态 + def __init__(self, label: chr, start: int, end: int): + self.label = label + self.start = start + self.end = end + + +class EdgeGroup: + def __init__(self, start: int, end: int, numOfState: int, edges: list[Edge]): + self.edges = edges + self.start = start + self.end = end + self.numOfState = numOfState + + def append(self, edges: list[Edge]): + self.edges += edges + + +class NFA: + # ( ) | * . others + # priority * . | + + def Priority(self, a: chr, b: chr) -> int: # a > b 1 ,a == b 0 ,a < b -1 + if a == b: + return 0 + pri = ['|', '.', '*', '('] + return 1 if pri.index(a) > pri.index(b) else -1 + + def EdgePriority(self, a: Edge, b: Edge) -> int: + a = a.label + b = b.label + if a == '~': + return -1 + if b == '~': + return 1 + return 1 if ord(b) < ord(a) else -1 + + def AddConcat(self, s: str) -> str: + lt = list(s) + i = 0 + while i < len(lt): + if lt[i] in '|.(': + pass + elif i + 1 < len(lt) and (lt[i + 1].isalpha() or lt[i + 1] == '('): + lt.insert(i + 1, '.') + i += 1 + return ''.join(lt) + + def ToPostfix(self, s: str) -> str: + tempSt = [] + operSt = [] + for c in s: + match c: + case '(': + operSt.append(c) + case ')': + while operSt[-1] != '(': + tempSt.append(operSt.pop()) + operSt.pop() + case '*' | '|' | '.': + if len(operSt) == 0 or operSt[-1] == '(': + operSt.append(c) + else: + while len(operSt) != 0 and self.Priority(c, operSt[-1]) != 1: + tempSt.append(operSt.pop()) + operSt.append(c) + case _: + tempSt.append(c) + while len(operSt) != 0: + tempSt.append(operSt.pop()) + return ''.join(tempSt) + + def ToNFA(self, s: str) -> EdgeGroup: + stack = [] + for c in s: + match c: + case '|': + edgeGroups = stack[-2:] + stack = stack[:-2] + eg = EdgeGroup(0, edgeGroups[0].numOfState + edgeGroups[1].numOfState - 3, + edgeGroups[0].numOfState + edgeGroups[1].numOfState - 2, []) + for edge in edgeGroups[0].edges: + if edgeGroups[0].end == edge.end: + edge.end = eg.end + eg.append([edge]) + for edge in edgeGroups[1].edges: + if edgeGroups[1].end == edge.end: + edge.end = eg.end + else: + edge.end += edgeGroups[0].numOfState - 2 + edge.start += edgeGroups[0].numOfState - 2 if edge.start != 0 else 0 + eg.append([edge]) + stack.append(eg) + case '.': + edgeGroups = stack[-2:] + stack = stack[:-2] + eg = EdgeGroup(0, edgeGroups[0].numOfState + edgeGroups[1].numOfState - 2, + edgeGroups[0].numOfState + edgeGroups[1].numOfState - 1, edgeGroups[0].edges) + for edge in edgeGroups[1].edges: + edge.start += edgeGroups[0].numOfState - 1 + edge.end += edgeGroups[0].numOfState - 1 + eg.append([edge]) + stack.append(eg) + case '*': + edgeGroup = stack[-1] + stack.pop() + eg = EdgeGroup(0, edgeGroup.numOfState, edgeGroup.numOfState + 1, []) + for edge in edgeGroup.edges: + edge.start += 1 + edge.end = edgeGroup.start+1 if edge.end == edgeGroup.end else edge.end + 1 + eg.append([edge]) + eg.append([Edge('~', 0, 1), Edge('~', 1, eg.end)]) + stack.append(eg) + case _: + stack.append(EdgeGroup(0, 1, 2, [Edge(c, 0, 1)])) + return stack[0] + + def printEdge(self, eg: Edge, st: str = '', ed: str = '') -> None: + # 0‐~‐>1 + print(eg.start - 1 if st == '' else st, '-', eg.label, '->', eg.end - 1 if ed == '' else ed, sep='', end=' ') + + def printNFA(self, eg: EdgeGroup) -> None: + dt = {} + for edge in eg.edges: + if edge.start in dt: + dt[edge.start].append(edge) + else: + dt[edge.start] = [edge] + i = 0 + while i in dt: + edges = dt[i] + edges.sort(key=functools.cmp_to_key(self.EdgePriority)) + if i == 0: + print('X', end=' ') + for edge in edges: + self.printEdge(edge, st='X') + print('\nY') + i += 1 + continue + print(i - 1, end=' ') + if i + 1 not in dt: + for edge in edges: + self.printEdge(edge, ed='Y') + else: + for edge in edges: + self.printEdge(edge) + print() + i += 1 + pass + + def f(self, s: str) -> None: + s = self.AddConcat(s) + print(s) + s = self.ToPostfix(s) + print(s) + s = self.ToNFA(s) + self.printNFA(s) + + +nfa = NFA() +nfa.f("(a|b)*baa") +nfa.f(input().lower()) + +# print(nfa.AddConcat("(a*b)k")) diff --git a/output.txt b/output.txt new file mode 100644 index 0000000..c037e98 --- /dev/null +++ b/output.txt @@ -0,0 +1,117 @@ +CONSTTK const +INTTK int +IDENFR const1 +ASSIGN = +INTCON 1 +<无符号整数> +<整数> +COMMA , +IDENFR const2 +ASSIGN = +MINU - +INTCON 100 +<无符号整数> +<整数> +<常量定义> +SEMICN ; +CONSTTK const +CHARTK char +IDENFR const3 +ASSIGN = +CHARCON _ +<常量定义> +SEMICN ; +<常量说明> +INTTK int +IDENFR change1 +<变量定义> +SEMICN ; +CHARTK char +IDENFR change3 +<变量定义> +SEMICN ; +<变量说明> +INTTK int +IDENFR gets1 +<声明头部> +LPARENT ( +INTTK int +IDENFR var1 +COMMA , +INTTK int +IDENFR var2 +<参数表> +RPARENT ) +LBRACE { +IDENFR change1 +ASSIGN = +IDENFR var1 +<因子> +<项> +PLUS + +IDENFR var2 +<因子> +<项> +<表达式> +<赋值语句> +SEMICN ; +<语句> +RETURNTK return +LPARENT ( +IDENFR change1 +<因子> +<项> +<表达式> +RPARENT ) +<返回语句> +SEMICN ; +<语句> +<语句列> +<复合语句> +RBRACE } +<有返回值函数定义> +VOIDTK void +MAINTK main +LPARENT ( +RPARENT ) +LBRACE { +PRINTFTK printf +LPARENT ( +STRCON Hello World +<字符串> +RPARENT ) +<写语句> +SEMICN ; +<语句> +PRINTFTK printf +LPARENT ( +IDENFR gets1 +LPARENT ( +INTCON 10 +<无符号整数> +<整数> +<因子> +<项> +<表达式> +COMMA , +INTCON 20 +<无符号整数> +<整数> +<因子> +<项> +<表达式> +<值参数表> +RPARENT ) +<有返回值函数调用语句> +<因子> +<项> +<表达式> +RPARENT ) +<写语句> +SEMICN ; +<语句> +<语句列> +<复合语句> +RBRACE } +<主函数> +<程序> diff --git a/testfile.txt b/testfile.txt new file mode 100644 index 0000000..0a1cdc9 --- /dev/null +++ b/testfile.txt @@ -0,0 +1,12 @@ +const int const1 = 1, const2 = -100; +const char const3 = '_'; +int change1; +char change3; +int gets1(int var1,int var2){ +change1 = var1 + var2; +return (change1); +} +void main(){ +printf("Hello World"); +printf(gets1(10, 20)); +} \ No newline at end of file