package dk.brics.xact.analysis.xmlgraph;
import java.util.Arrays;
import java.util.BitSet;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Stack;
import dk.brics.xmlgraph.ChoiceNode;
import dk.brics.xmlgraph.MultiContentNode;
import dk.brics.xmlgraph.Node;
import dk.brics.xmlgraph.ReachableNodesProcessor;
import dk.brics.xmlgraph.SequenceNode;
import dk.brics.xmlgraph.XMLGraph;
/**
* Detects and eliminates some cycles in an XML graph while maintaining the same
* XML language. Removing such cycles enables the validator to validate the graph
* more precisely.
* <p/>
* Define a <em>reducible component</em> as a set of nodes S satisfying that
* <ul>
* <li>Every node in S is either a choice node or a sequence node
* <li>Sequence nodes in S have exactly one outgoing edge
* <li>For any pair of nodes A,B both in S, there exists a path from A to B
* where every node on that path is in S.
* <li>There are at least two nodes in S.
* </ul>
* If there is an edge from A to B we say that A reaches B.
* For a reducible component S, define <em>Out(S)</em> to be the set of nodes
* reached by at least one node in S, minus the set S itself. That is, Out(S) and
* S are always disjoint.
* <p/>
* We reduce such a component by replacing the set of outgoing edges in all
* choice nodes in S by Out(S). The result may still be a reducible component
* if a cycle of sequence nodes exists.
*/
public class CycleUnwinder {
// Adapted Tarjan's algorithm
private BitSet onstack;
private XMLGraph graph;
private Stack<Integer> stack = new Stack<Integer>();
private int[] lowlink;
private int[] vindex;
private int index;
/**
* Creates an unwinder for an XML graph with the specified number of nodes. The instance
* can be reused to analyze several XML graphs (but not concurrently).
* @param numnodes number of nodes in the XML graph to analyze
*/
public CycleUnwinder(int numnodes) {
onstack = new BitSet(numnodes);
lowlink = new int[numnodes];
vindex = new int[numnodes];
}
public void unwind(XMLGraph graph) {
if (graph.getNodes().size() > vindex.length)
throw new IllegalArgumentException("XMLGraph has too many nodes");
this.graph = graph;
Arrays.fill(vindex, -1);
Arrays.fill(lowlink, 0);
this.index = 0;
// find reducible components
List<Node> reachable = ReachableNodesProcessor.getReachableNodes(graph);
for (Node node : reachable) {
if (reducible(node) && vindex[node.getIndex()] == -1) {
tarjan((MultiContentNode)node);
}
}
}
private void tarjan(MultiContentNode node) {
int v = node.getIndex();
vindex[v] = index;
lowlink[v] = index;
index += 1;
stack.push(v);
onstack.set(v);
for (int child : node.getContents()) {
Node childnode = graph.getNode(child);
if (reducible(childnode)) {
if (vindex[child] == -1) {
tarjan((MultiContentNode)childnode);
lowlink[v] = Math.min(lowlink[v], lowlink[child]);
}
else if (onstack.get(child) == true) {
lowlink[v] = Math.min(lowlink[v], vindex[child]);
}
}
}
if (lowlink[v] == vindex[v]) {
int v2 = stack.pop();
onstack.clear(v2);
if (v2 == v) {
// only one node in component, ignore it
} else {
// NOTE: We make all the choice nodes share the same instance of LinkedHashSet
// Apart from saving memory, it allows us to update all the nodes' content sets at once
LinkedHashSet<Integer> cs = new LinkedHashSet<Integer>();
HashSet<Integer> component = new HashSet<Integer>();
while (true) {
MultiContentNode node2 = (MultiContentNode)graph.getNode(v2);
if (node2 instanceof ChoiceNode) {
((ChoiceNode)node2).setContent(cs, graph);
cs.addAll(node2.getContents());
}
component.add(v2);
if (v2 == v)
break;
v2 = stack.pop();
onstack.clear(v2);
}
cs.removeAll(component);
}
}
}
private boolean reducible(Node node) {
if (node instanceof ChoiceNode)
return true;
if (node instanceof SequenceNode) {
return ((SequenceNode)node).getContents().size() == 1;
}
return false;
}
}