Created
April 14, 2023 22:23
-
-
Save HParker/7b541a360c2e475f2f83c2208bad854b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
# Steps to convert a regular expression into a NFA | |
# | |
# 1. create NFAs for each trivial part of the regular expression. | |
# 2. join them together replacing the transition part with one of the trivial parts created above. | |
# 3. mark the final state a success | |
require 'set' | |
class State | |
attr_reader :id | |
@@id = 0 | |
def initialize | |
@id = @@id | |
@@id += 1 | |
end | |
def self.reset! | |
@@id = 0 | |
end | |
end | |
class Transition | |
attr_reader :from | |
attr_reader :to | |
attr_reader :char | |
def initialize(from, to, char) | |
if from.nil? || to.nil? || char.nil? | |
raise "WOW NO NILS! #{[from, to, char]}" | |
end | |
@from = from | |
@to = to | |
@char = char | |
end | |
end | |
class FA | |
def dot | |
output = "digraph G {\n" | |
output.concat " rankdir=LR;\n" | |
output.concat " size=\"8,5\";\n" | |
@states.each do |s| | |
if @finish == s | |
output.concat " \"#{s.id}\" [shape=\"doublecircle\"]\n" | |
else | |
output.concat " \"#{s.id}\" [shape=\"circle\"]\n" | |
end | |
end | |
@transitions.each do |t| | |
if t.char == "epsilon" | |
output.concat " \"#{t.from.id}\" -> \"#{t.to.id}\" [label=\"ε\"];\n" | |
else | |
output.concat " \"#{t.from.id}\" -> \"#{t.to.id}\" [label=\"#{t.char}\"];\n" | |
end | |
end | |
output.concat "}\n" | |
puts output | |
end | |
def ascii | |
@transitions.each do |t| | |
puts "(#{t.from.id}) -#{t.char}-> (#{t.to.id})"; | |
end | |
end | |
def merge(nfa) | |
@transitions.concat(nfa.transitions) | |
@states.concat(nfa.states) | |
end | |
def new_transition(start, finish, char) | |
transition = Transition.new(start, finish, char) | |
@transitions << transition | |
transition | |
end | |
def new_state | |
state = State.new | |
@states << state | |
state | |
end | |
end | |
class DFA < FA | |
attr_reader :transitions, :states, :start, :finish | |
attr_writer :start, :finish | |
def initialize | |
@start = nil | |
@finish = nil | |
@transitions = [] | |
@states = [] | |
@end_states = [] | |
end | |
# TODO: hopcroft DFA minimization | |
def self.from_nfa(nfa) | |
dfa = self.new | |
dfa.start = dfa.new_state | |
cur_dfa_state = dfa.start | |
states_to_resolve = [[nfa.start.id]] | |
nfa_state_ids_to_dfa_states = {} | |
nfa_state_ids_to_dfa_states[[nfa.start.id]] = dfa.start | |
while !states_to_resolve.empty? | |
cur_nfa_states = states_to_resolve.pop | |
cur_dfa_state = nfa_state_ids_to_dfa_states[cur_nfa_states] | |
nfa.uniq_transition_keys.each do |transition_key| | |
state_ids = [] | |
ids = nfa.epsilon_closure(cur_nfa_states, char: transition_key) | |
state_ids.concat(ids) | |
state_ids.uniq! | |
state_ids.sort! | |
if !state_ids.empty? | |
if nfa_state_ids_to_dfa_states[state_ids].nil? | |
new_state = dfa.new_state | |
nfa_state_ids_to_dfa_states[state_ids] = new_state | |
dfa.new_transition(cur_dfa_state, new_state, transition_key) | |
if !state_ids.empty? | |
states_to_resolve << state_ids | |
end | |
else | |
dfa.new_transition(cur_dfa_state, nfa_state_ids_to_dfa_states[state_ids], transition_key) | |
end | |
end | |
state_ids = [] | |
end | |
end | |
dfa | |
end | |
end | |
class NFA < FA | |
attr_reader :transitions, :states, :start, :finish | |
attr_writer :start, :finish | |
def initialize | |
@start = nil | |
@finish = nil | |
@transitions = [] | |
@states = [] | |
@end_states = [] | |
end | |
def uniq_transition_keys | |
s = Set.new | |
@transitions.each do |t| | |
if t.char != "epsilon" | |
s.add(t.char) | |
end | |
end | |
s.to_a | |
end | |
def to_dfa | |
DFA.from_nfa(self) | |
end | |
def epsilon_closure(state_ids, char: nil) | |
states = [] | |
@transitions.each do |t| | |
if state_ids.include?(t.from.id) && t.char == char | |
states << t.to.id | |
states.concat(epsilon_closure([t.to.id], char: "epsilon")) | |
end | |
end | |
states | |
end | |
def self.from_string(string) | |
nfa = nil | |
last_nfa = nil | |
i = 0 | |
chars = string.chars | |
while i < chars.size | |
char = chars[i] | |
puts "PROCESSING #{char}" | |
if char == "*" | |
if !last_nfa.nil? | |
nfa = NFA.concatination(nfa, NFA.closure(last_nfa)) | |
last_nfa = nil | |
else | |
nfa = NFA.closure(nfa) | |
end | |
elsif char == "|" | |
if !last_nfa.nil? | |
nfa = NFA.concatination(nfa, last_nfa) | |
last_nfa = nil | |
end | |
alternative_path, new_index = from_string(chars.last(chars.size - (i + 1)).join) | |
nfa = NFA.alternation(nfa, alternative_path) | |
return [nfa, new_index + i + 1] | |
elsif char == "(" | |
if !last_nfa.nil? | |
nfa = NFA.concatination(nfa, last_nfa) | |
last_nfa = nil | |
end | |
parenthetical, new_index = from_string(chars.last(chars.size - (i + 1)).join) | |
i = new_index + i | |
last_nfa = parenthetical | |
elsif char == ")" | |
if !last_nfa.nil? | |
nfa = NFA.concatination(nfa, last_nfa) | |
last_nfa = nil | |
end | |
return [nfa, i + 1] | |
else | |
if !last_nfa.nil? | |
nfa = NFA.concatination(nfa, last_nfa) | |
end | |
if nfa.nil? | |
nfa = NFA.simple(char) | |
else | |
last_nfa = NFA.simple(char) | |
end | |
# | |
end | |
i += 1 | |
end | |
[nfa, i] | |
end | |
def self.simple(char) | |
nfa = self.new | |
nfa.start = nfa.new_state | |
nfa.finish = nfa.new_state | |
nfa.new_transition(nfa.start, nfa.finish, char) | |
nfa | |
end | |
def self.concatination(left_nfa, right_nfa) | |
nfa = self.new | |
nfa.start = left_nfa.start | |
nfa.finish = right_nfa.finish | |
nfa.merge(left_nfa) | |
nfa.merge(right_nfa) | |
nfa.new_transition(left_nfa.finish, right_nfa.start, "epsilon") | |
nfa | |
end | |
def self.alternation(left_nfa, right_nfa) | |
nfa = self.new | |
nfa.start = nfa.new_state | |
nfa.finish = nfa.new_state | |
nfa.merge(left_nfa) | |
nfa.merge(right_nfa) | |
nfa.new_transition(nfa.start, left_nfa.start, "epsilon") | |
nfa.new_transition(nfa.start, right_nfa.start, "epsilon") | |
nfa.new_transition(left_nfa.finish, nfa.finish, "epsilon") | |
nfa.new_transition(right_nfa.finish, nfa.finish,"epsilon") | |
nfa | |
end | |
def self.closure(inner_nfa) | |
nfa = self.new | |
nfa.merge(inner_nfa) | |
nfa.start = nfa.new_state | |
nfa.finish = nfa.new_state | |
nfa.new_transition(nfa.start, inner_nfa.start, "epsilon") | |
nfa.new_transition(inner_nfa.finish, nfa.finish, "epsilon") | |
nfa.new_transition(nfa.start, nfa.finish, "epsilon") | |
nfa.new_transition(inner_nfa.finish, inner_nfa.start, "epsilon") | |
nfa | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment