-
-
Save wylswz/3b60d282110aa1194e72b24da67e49f2 to your computer and use it in GitHub Desktop.
佛祖保佑,永无 BUG
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.xmbsmdsj.regex; | |
import javax.xml.soap.Node; | |
import java.nio.file.NotDirectoryException; | |
import java.util.*; | |
public class DFANode implements Matcher{ | |
private static final DFANode EMPTY = new DFANode(Collections.emptySet()); | |
private final Set<NFANode> nfaNodes; | |
private final Map<String, DFANode> transitionMap; | |
public DFANode(Set<NFANode> nfaNodes) { | |
this.nfaNodes = nfaNodes; | |
this.transitionMap = new HashMap<>(); | |
} | |
public boolean accepts() { | |
for (NFANode n : nfaNodes) { | |
if (n.isAcceptState()) { | |
return true; | |
} | |
} | |
return false; | |
} | |
private boolean doMatch(String s, int idx) { | |
if (idx == s.length()) { | |
return accepts(); | |
} | |
String token = s.substring(idx, idx + 1); | |
if ((!transitionMap.containsKey(token)) && (!transitionMap.containsKey(NFANode.SIGMA))) { | |
return false; | |
} | |
DFANode next = transitionMap.get(token); | |
if (next == null) { | |
next = transitionMap.get(NFANode.SIGMA); | |
} | |
return next.doMatch(s, idx + 1); | |
} | |
public boolean match(String s) { | |
return doMatch(s, 0); | |
} | |
public static DFANode subsetConstruction(NFANode root) { | |
DFANode start = new DFANode(root.epsilonClosure()); | |
Queue<DFANode> queue = new LinkedList<>(); | |
Set<DFANode> visited = new HashSet<>(); | |
Map<DFANode, DFANode> cache = new HashMap<>(); | |
queue.add(start); | |
while (!queue.isEmpty()) { | |
DFANode next = queue.poll(); | |
Set<String> possibleTransitions = next.possibleTransitions(); | |
for (String transition : possibleTransitions) { | |
DFANode transited = next.performTransition(transition); | |
if (!cache.containsKey(transited)) { | |
queue.add(transited); | |
cache.put(transited, transited); | |
next.transitionMap.put(transition, transited); | |
} else { | |
next.transitionMap.put(transition, cache.get(transited)); | |
} | |
} | |
} | |
return start; | |
} | |
private DFANode performTransition(String transition) { | |
if (nfaNodes.isEmpty()) { | |
return EMPTY; | |
} | |
Set<NFANode> resNodes = new HashSet<>(); | |
nfaNodes.forEach(n -> { | |
resNodes.addAll(n.doTransition(transition)); | |
}); | |
return new DFANode(resNodes); | |
} | |
private Set<String> possibleTransitions() { | |
Set<String> res = new HashSet<>(); | |
nfaNodes.forEach(n -> { | |
res.addAll(n.transitions()); | |
}); | |
return res; | |
} | |
@Override | |
public boolean equals(Object o) { | |
if (this == o) return true; | |
if (o == null || getClass() != o.getClass()) return false; | |
DFANode dfaNode = (DFANode) o; | |
return Objects.equals(nfaNodes, dfaNode.nfaNodes); | |
} | |
@Override | |
public int hashCode() { | |
return Objects.hash(nfaNodes); | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// _oo0oo_ | |
// o8888888o | |
// 88" . "88 | |
// (| -_- |) | |
// 0\ = /0 | |
// ___/`---'\___ | |
// .' \\| |// '. | |
// / \\||| : |||// \ | |
// / _||||| -:- |||||- \ | |
// | | \\\ - /// | | | |
// | \_| ''\---/'' |_/ | | |
// \ .-\__ '-' ___/-. / | |
// ___'. .' /--.--\ `. .'___ | |
// ."" '< `.___\_<|>_/___.' >' "". | |
// | | : `- \`.;`\ _ /`;.`/ - ` : | | | |
// \ \ `_. \_ __\ /__ _/ .-` / / | |
// =====`-.____`.___ \_____/___.-`___.-'===== | |
// `=---=' | |
// | |
// | |
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
// | |
// 佛祖保佑 永无BUG | |
// | |
// | |
// |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# _oo0oo_ | |
# o8888888o | |
# 88" . "88 | |
# (| -_- |) | |
# 0\ = /0 | |
# ___/`---'\___ | |
# .' \\| |# '. | |
# / \\||| : |||# \ | |
# / _||||| -:- |||||- \ | |
# | | \\\ - #/ | | | |
# | \_| ''\---/'' |_/ | | |
# \ .-\__ '-' ___/-. / | |
# ___'. .' /--.--\ `. .'___ | |
# ."" '< `.___\_<|>_/___.' >' "". | |
# | | : `- \`.;`\ _ /`;.`/ - ` : | | | |
# \ \ `_. \_ __\ /__ _/ .-` / / | |
# =====`-.____`.___ \_____/___.-`___.-'===== | |
# `=---=' | |
# | |
# | |
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
# | |
# 佛祖保佑 永无BUG | |
# | |
# | |
# |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.xmbsmdsj.regex; | |
import java.util.*; | |
public class NFANode implements Matcher { | |
@Override | |
public boolean equals(Object o) { | |
if (this == o) return true; | |
if (o == null || getClass() != o.getClass()) return false; | |
NFANode NFANode = (NFANode) o; | |
return id == NFANode.id; | |
} | |
@Override | |
public int hashCode() { | |
return Objects.hash(id); | |
} | |
static class SearchContext { | |
private boolean found = false; | |
private final Set<Long> searchStates = new HashSet<>(); | |
} | |
private static final String EPSILON = "@epsilon"; | |
public static final String SIGMA = "."; | |
private static int globalId = 0; | |
private final Map<String, List<NFANode>> children; | |
private final int id; | |
private boolean isFinal; | |
public NFANode(int id, boolean isFinal) { | |
this.id = id; | |
this.isFinal = isFinal; | |
this.children = new HashMap<>(2); | |
} | |
public static int nextId() { | |
return globalId++; | |
} | |
private static boolean isStarChar(String pattern, int idx) { | |
return idx < pattern.length() - 1 && pattern.charAt(idx + 1) == '*'; | |
} | |
/** | |
* construct an epsilon block | |
* @param c character | |
* @return tail of that block | |
*/ | |
private static NFANode constructEpsilonBlock(NFANode root, char c) { | |
NFANode x = new NFANode(NFANode.nextId(), false); | |
NFANode y = new NFANode(NFANode.nextId(), false); | |
NFANode tail = new NFANode(NFANode.nextId(), false); | |
root.children.put(EPSILON, Arrays.asList(x, tail)); | |
x.children.put(String.valueOf(c), Collections.singletonList(y)); | |
y.children.put(EPSILON, Arrays.asList(x, tail)); | |
return tail; | |
} | |
public Set<NFANode> epsilonClosure() { | |
Queue<NFANode> q = new LinkedList<>(); | |
Set<NFANode> res = new HashSet<>(); | |
Set<NFANode> closeSet = new HashSet<>(); | |
q.add(this); | |
res.add(this); | |
while (!q.isEmpty()) { | |
NFANode next = q.poll(); | |
closeSet.add(next); | |
List<NFANode> epsilonReachable = next.children.get(EPSILON); | |
if (epsilonReachable == null || epsilonReachable.isEmpty()) { | |
continue; | |
} | |
epsilonReachable.forEach(n -> { | |
res.add(n); | |
if (!closeSet.contains(n)) { | |
q.add(n); | |
closeSet.add(n); | |
} | |
}); | |
} | |
return res; | |
} | |
/** | |
* Compile to NFA | |
* @param pattern | |
* @return | |
*/ | |
public static NFANode compile(String pattern) { | |
NFANode.globalId = 0; | |
NFANode root = new NFANode(NFANode.nextId(), false); | |
NFANode current = root; | |
int i = 0; | |
while (i<pattern.length()) { | |
boolean isStarChar = isStarChar(pattern, i); | |
if (!isStarChar) { | |
NFANode next = new NFANode(NFANode.nextId(), false); | |
current.children.put(String.valueOf(pattern.charAt(i)), Collections.singletonList(next)); | |
current = next; | |
i+=1; | |
} else { | |
current = constructEpsilonBlock(current, pattern.charAt(i)); | |
i+=2; | |
} | |
} | |
current.isFinal = true; | |
return root; | |
} | |
public boolean doMatch(String s, int idx, SearchContext searchContext) { | |
Long state = (((long) id) << 32) + idx; | |
if (searchContext.searchStates.contains(state)) { | |
return false; | |
} else { | |
searchContext.searchStates.add(state); | |
} | |
if (searchContext.found) { | |
return true; | |
} | |
if (idx == s.length()) { | |
if (isFinal) { | |
// final node is found | |
searchContext.found = true; | |
return true; | |
} | |
} | |
List<NFANode> children = null; | |
// if idx == s.length and is not final, we still want to explore epsilon-closures | |
if (idx < s.length()) { | |
char c = s.charAt(idx); | |
children = new LinkedList<>(); | |
// try to match | |
children.addAll(Optional.ofNullable(this.children.get(String.valueOf(c))).orElse(Collections.emptyList())); | |
children.addAll(Optional.ofNullable(this.children.get(SIGMA)).orElse(Collections.emptyList())); | |
} | |
if (children == null || children.isEmpty()) { | |
// No matched node, go to closure | |
if (!this.children.containsKey(EPSILON)) { | |
return false; | |
} | |
for (NFANode n : this.children.get(EPSILON)) { | |
if (n.id == this.id) continue; | |
if( n.doMatch(s, idx, searchContext)) { | |
searchContext.found = true; | |
return true; | |
} | |
} | |
return false; | |
} else { | |
for (NFANode next : children) { | |
boolean res = next.doMatch(s, idx + 1, searchContext); | |
if (res) { | |
searchContext.found = true; | |
return true; | |
} | |
} | |
return false; | |
} | |
} | |
public boolean match(String s) { | |
// reset id | |
return doMatch(s, 0, new SearchContext()); | |
} | |
public boolean isAcceptState() { | |
return isFinal; | |
} | |
public Map<String, List<NFANode>> getChildren() { | |
return children; | |
} | |
/** | |
* Get transitions except epsilon and sigma | |
* @return | |
*/ | |
public Set<String> transitions() { | |
Set<String> keySet = new HashSet<>(children.keySet()); | |
keySet.remove(EPSILON); | |
return keySet; | |
} | |
public Set<NFANode> doTransition(String transition) { | |
Set<NFANode> transited = new HashSet<>(); | |
if (this.children.containsKey(SIGMA)) { | |
transited.addAll(this.children.get(SIGMA)); | |
} | |
if (this.children.containsKey(transition)) { | |
transited.addAll(this.children.get(transition)); | |
} | |
Set<NFANode> res = new HashSet<>(); | |
for (NFANode nfaNode : transited) { | |
res.addAll(nfaNode.epsilonClosure()); | |
} | |
return res; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment