CompilerDesign/RE2NFA.py

169 lines
5.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import functools
class Edge:
# 符号 开始状态 结束状态
def __init__(self, label: chr, start: int, end: int):
self.label = label
self.start = start
self.end = end
class EdgeGroup:
def __init__(self, start: int, end: int, numOfState: int, edges: list[Edge]):
self.edges = edges
self.start = start
self.end = end
self.numOfState = numOfState
def append(self, edges: list[Edge]):
self.edges += edges
class NFA:
# ( ) | * . others
# priority * . |
def Priority(self, a: chr, b: chr) -> int: # a > b 1 ,a == b 0 ,a < b -1
if a == b:
return 0
pri = ['|', '.', '*', '(']
return 1 if pri.index(a) > pri.index(b) else -1
def EdgePriority(self, a: Edge, b: Edge) -> int:
a = a.label
b = b.label
if a == '~':
return -1
if b == '~':
return 1
return 1 if ord(b) < ord(a) else -1
def AddConcat(self, s: str) -> str:
lt = list(s)
i = 0
while i < len(lt):
if lt[i] in '|.(':
pass
elif i + 1 < len(lt) and (lt[i + 1].isalpha() or lt[i + 1] == '('):
lt.insert(i + 1, '.')
i += 1
return ''.join(lt)
def ToPostfix(self, s: str) -> str:
tempSt = []
operSt = []
for c in s:
match c:
case '(':
operSt.append(c)
case ')':
while operSt[-1] != '(':
tempSt.append(operSt.pop())
operSt.pop()
case '*' | '|' | '.':
if len(operSt) == 0 or operSt[-1] == '(':
operSt.append(c)
else:
while len(operSt) != 0 and self.Priority(c, operSt[-1]) != 1:
tempSt.append(operSt.pop())
operSt.append(c)
case _:
tempSt.append(c)
while len(operSt) != 0:
tempSt.append(operSt.pop())
return ''.join(tempSt)
def ToNFA(self, s: str) -> EdgeGroup:
stack = []
for c in s:
match c:
case '|':
edgeGroups = stack[-2:]
stack = stack[:-2]
eg = EdgeGroup(0, edgeGroups[0].numOfState + edgeGroups[1].numOfState - 3,
edgeGroups[0].numOfState + edgeGroups[1].numOfState - 2, [])
for edge in edgeGroups[0].edges:
if edgeGroups[0].end == edge.end:
edge.end = eg.end
eg.append([edge])
for edge in edgeGroups[1].edges:
if edgeGroups[1].end == edge.end:
edge.end = eg.end
else:
edge.end += edgeGroups[0].numOfState - 2
edge.start += edgeGroups[0].numOfState - 2 if edge.start != 0 else 0
eg.append([edge])
stack.append(eg)
case '.':
edgeGroups = stack[-2:]
stack = stack[:-2]
eg = EdgeGroup(0, edgeGroups[0].numOfState + edgeGroups[1].numOfState - 2,
edgeGroups[0].numOfState + edgeGroups[1].numOfState - 1, edgeGroups[0].edges)
for edge in edgeGroups[1].edges:
edge.start += edgeGroups[0].numOfState - 1
edge.end += edgeGroups[0].numOfState - 1
eg.append([edge])
stack.append(eg)
case '*':
edgeGroup = stack[-1]
stack.pop()
eg = EdgeGroup(0, edgeGroup.numOfState, edgeGroup.numOfState + 1, [])
for edge in edgeGroup.edges:
edge.start += 1
edge.end = edgeGroup.start+1 if edge.end == edgeGroup.end else edge.end + 1
eg.append([edge])
eg.append([Edge('~', 0, 1), Edge('~', 1, eg.end)])
stack.append(eg)
case _:
stack.append(EdgeGroup(0, 1, 2, [Edge(c, 0, 1)]))
return stack[0]
def printEdge(self, eg: Edge, st: str = '', ed: str = '') -> None:
# 0~>1
print(eg.start - 1 if st == '' else st, '-', eg.label, '->', eg.end - 1 if ed == '' else ed, sep='', end=' ')
def printNFA(self, eg: EdgeGroup) -> None:
dt = {}
for edge in eg.edges:
if edge.start in dt:
dt[edge.start].append(edge)
else:
dt[edge.start] = [edge]
i = 0
while i in dt:
edges = dt[i]
edges.sort(key=functools.cmp_to_key(self.EdgePriority))
if i == 0:
print('X', end=' ')
for edge in edges:
self.printEdge(edge, st='X')
print('\nY')
i += 1
continue
print(i - 1, end=' ')
if i + 1 not in dt:
for edge in edges:
self.printEdge(edge, ed='Y')
else:
for edge in edges:
self.printEdge(edge)
print()
i += 1
pass
def f(self, s: str) -> None:
s = self.AddConcat(s)
print(s)
s = self.ToPostfix(s)
print(s)
s = self.ToNFA(s)
self.printNFA(s)
nfa = NFA()
nfa.f("(a|b)*baa")
nfa.f(input().lower())
# print(nfa.AddConcat("(a*b)k"))