package dk.brics.xact.analysis.xmlgraph;
import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.Map;
import java.util.Set;
import dk.brics.misc.Origin;
import dk.brics.xact.analysis.Debug;
import dk.brics.xact.analysis.ErrorHandler;
import dk.brics.xact.analysis.ErrorType;
import dk.brics.xact.analysis.XMLAnalysisException;
import dk.brics.xact.analysis.flowgraph.Entity;
import dk.brics.xact.analysis.flowgraph.FlowGraph;
import dk.brics.xact.analysis.flowgraph.SchemaType;
import dk.brics.xact.analysis.flowgraph.Statement;
import dk.brics.xact.analysis.flowgraph.statements.AnalyzeStm;
import dk.brics.xact.analysis.flowgraph.statements.BasicStatementVisitor;
import dk.brics.xact.analysis.flowgraph.statements.CastStm;
import dk.brics.xact.analysis.flowgraph.statements.CheckStm;
import dk.brics.xact.analysis.flowgraph.statements.ConstStm;
import dk.brics.xact.analysis.flowgraph.statements.CopyStm;
import dk.brics.xact.analysis.flowgraph.statements.EmptyStm;
import dk.brics.xact.analysis.flowgraph.statements.EscapeStm;
import dk.brics.xact.analysis.flowgraph.statements.GapifyStm;
import dk.brics.xact.analysis.flowgraph.statements.GetStm;
import dk.brics.xact.analysis.flowgraph.statements.InsertStm;
import dk.brics.xact.analysis.flowgraph.statements.NodeStm;
import dk.brics.xact.analysis.flowgraph.statements.NopStm;
import dk.brics.xact.analysis.flowgraph.statements.PlugStm;
import dk.brics.xact.analysis.flowgraph.statements.RemoveStm;
import dk.brics.xact.analysis.flowgraph.statements.SetStm;
import dk.brics.xact.analysis.flowgraph.statements.UnknownStm;
import dk.brics.xact.analysis.flowgraph.statements.VarStm;
import dk.brics.xmlgraph.AttributeNode;
import dk.brics.xmlgraph.ChoiceNode;
import dk.brics.xmlgraph.ElementNode;
import dk.brics.xmlgraph.MultiContentNode;
import dk.brics.xmlgraph.Node;
import dk.brics.xmlgraph.NodeProcessor;
import dk.brics.xmlgraph.OneOrMoreNode;
import dk.brics.xmlgraph.SequenceNode;
import dk.brics.xmlgraph.TextNode;
import dk.brics.xmlgraph.XMLGraph;
import dk.brics.xmlgraph.XMLGraphFragment;
import dk.brics.xmlgraph.validator.ValidationErrorHandler;
import dk.brics.xmlgraph.validator.Validator;
/**
* Checks XML graphs of various program expressions.
*/
public class XMLGraphChecker {
private ErrorHandler errors;
private CycleUnwinder cycleUnwinder;
/**
* Constructs a new checker.
*/
public XMLGraphChecker(ErrorHandler errors) {
this.errors = errors;
}
public static boolean isStatementChecked(Statement s) {
if (s instanceof ConstStm || s instanceof EmptyStm || s instanceof UnknownStm || s instanceof VarStm
|| s instanceof EscapeStm || s instanceof NopStm)
return false;
else
return true;
}
private String formatExample(String s) {
if (s == null)
return "";
if (s.isEmpty())
return "[empty]";
if (s.trim().isEmpty())
return "[whitespace text]";
return s;
}
private String formatNode(ElementNode node) {
if (node == null)
return "document root";
return node.getName().getShortestExample(true);
}
private void printProgress(Statement s) {
Debug.println(2, true, "Checking '" + s.getOpName() + "' at " + s.getOrigin());
}
private void error(Entity s, ErrorType errorType, String msg2, Object ... args) {
error(s.getOrigin(), errorType, msg2, args);
}
private void error(Origin or, ErrorType errorType, String msg2, Object ... args) {
errors.error(or, errorType, msg2, args);
}
/**
* Returns the list of errors detected. Should be called after {@link #run(FlowGraph, XMLGraphBuilder)}.
* @return a reference to the list of errors.
*/
public ErrorHandler getErrors() {
return errors;
}
private XMLGraph getGapAnnotatedGraph(final XMLGraph graph, final SchemaType schema, final XMLGraphBuilder b, final Origin origin) {
//if (gaptypes == null || gaptypes.size() == 0)
// return graph;
final XMLGraph g = graph.clone();
g.processReachableNodes(new NodeProcessor<Object>() {
@Override
public Object process(ChoiceNode n) {
if (!n.isGap() || !n.isOpen())
return n;
//String type = gaptypes == null ? null : gaptypes.get(n.getName());
Node typeNode = schema.getGapTypeNodes().get(n.getName());
Set<Integer> cs = new LinkedHashSet<Integer>(n.getContents());
if (typeNode == null) {
// close the gap if no annotation is present
if (g.getOpenTemplateGaps().remove(n.getName())) {
cs.add(b.getEmptySequence().getIndex());
g.getClosedTemplateGaps().add(n.getName());
n.setContentAndStatus(false, n.isRemoved(), cs, g);
}
} else {
cs.add(typeNode.getIndex());
if (g.getOpenAttributeGaps().remove(n.getName())) {
g.getClosedAttributeGaps().add(n.getName());
}
if (g.getOpenTemplateGaps().remove(n.getName())) {
g.getClosedTemplateGaps().add(n.getName());
}
XMLGraph sub = b.getGlobalXMLGraph().clone();
sub.useFragment(new XMLGraphFragment(typeNode, null, null, null));
g.merge(sub);
n.setContentAndStatus(false, n.isRemoved(), cs, g);
// get the original roots back
g.getRoots().clear();
g.getRoots().addAll(graph.getRoots());
}
return n;
}
});
closeAttributeGaps(g, b);
return g;
}
private void closeAttributeGaps(final XMLGraph g, final XMLGraphBuilder b) {
g.processReachableNodes(new NodeProcessor<Object>() {
@Override
public Object process(ChoiceNode n) {
boolean addempty=false;
for (int child : n.getContents()) {
Node node = g.getNode(child);
if (!(node instanceof AttributeNode))
continue;
AttributeNode attr = (AttributeNode)node;
if (g.getNode(attr.getContent()) instanceof TextNode)
continue;
ChoiceNode content = (ChoiceNode)g.getNode(attr.getContent());
if (content.isGap() && content.isOpen() && g.getOpenAttributeGaps().contains(content.getName())) {
addempty=true;
break;
}
}
if (addempty) {
LinkedHashSet<Integer> cs = new LinkedHashSet<Integer>(n.getContents());
cs.add(b.getEmptySequence().getIndex());
n.setContent(cs, g);
}
return n;
}
});
}
/**
* Runs the checker.
*/
public void run(final FlowGraph fg, final XMLGraphBuilder b) {
cycleUnwinder = new CycleUnwinder(b.getGlobalXMLGraph().getNodes().size());
for (Statement s : fg.getNodes()) {
s.visitBy(new BasicStatementVisitor() {
@Override
public void visitCastStm(CastStm s) {
printProgress(s);
// TODO: check CastStm
}
@Override
public void visitCheckStm(CheckStm s) {
printProgress(s);
if (b.getEmptyXPathStatements().contains(s))
error(s, ErrorType.EMPTY_RESULT, "XPath expression at '%s' has empty result", s.getOpName());
if (b.getCheckFailsStatements().contains(s)) {
switch (s.getKind()) {
case GETNUMBER:
error(s, ErrorType.EMPTY_RESULT, "No digits at '%s'", s.getOpName());
break;
case GETSTRING:
error(s, ErrorType.EMPTY_RESULT, "No non-whitespace text at '%s'", s.getOpName());
break;
case HAS:
case ISATTRIBUTE:
case ISELEMENT:
case ISTEXT:
error(s, ErrorType.TEST_FAILS, "Test at '%s' always fails", s.getOpName());
break;
case TODOCUMENT:
error(s, ErrorType.NOT_UNIQUE_ROOT, "Not one root element at '%s'", s.getOpName());
break;
}
}
}
@Override
public void visitCopyStm(CopyStm s) {
printProgress(s);
// TODO: check CopyStm
}
@Override
public void visitGapifyStm(GapifyStm s) {
printProgress(s);
checkNonEmpty(s);
}
private void checkNonEmpty(Statement s) {
if (b.getEmptyXPathStatements().contains(s)) {
error(s, ErrorType.EMPTY_RESULT,
"XPath expression at '%s' has empty result",
s.getOpName());
}
}
@Override
public void visitGetStm(GetStm s) {
printProgress(s);
final XMLGraph g = b.getOut(s, s.getDest());
boolean empty;
if (g.isUnknown()) {
empty = false;
} else {
empty = true;
for (int i : g.getRoots()) {
// TODO use Emptiness instead, or upgrade sharpen() so we can just test
// if roots are empty
// true=definitely empty, false=maybe empty
empty &= g.getNode(i).process(new CachedNodeProcessor<Boolean>() {
@Override
public Boolean cycle() {
return false;
}
@Override
public Boolean process(AttributeNode n) {
return false;
}
@Override
public Boolean process(ElementNode n) {
return false;
}
@Override
public Boolean process(TextNode n) {
if (n.getText().isEmpty())
return true;
else
return false;
}
@Override
public Boolean process(OneOrMoreNode n) {
return g.getNode(n.getContent()).process(this);
}
@Override
public Boolean process(ChoiceNode n) {
if (n.isGap() && n.isOpen())
return false; // gaps are not to be considered "empty content"
else
return null;
}
@Override
public Boolean process(MultiContentNode n) {
boolean empty = true;
for (int child : n.getContents()) {
empty &= g.getNode(child).process(this);
}
return empty;
}
});
}
}
if (empty)
error(s, ErrorType.EMPTY_RESULT, "'%s' has empty result", s.getOpName());
}
@Override
public void visitInsertStm(InsertStm s) {
printProgress(s);
checkNonEmpty(s);
}
@Override
public void visitNodeStm(NodeStm s) {
printProgress(s);
// TODO: check NodeStm
}
@Override
public void visitPlugStm(final PlugStm s) {
printProgress(s);
XMLGraph g = b.getIn(s, s.getBase());
if (g.isUnknown())
return;
switch (s.getKind()) {
case PLUG:
case PLUGMULTI:
case PLUGWRAP: {
if (!g.getOpenAttributeGaps().contains(s.getGapName())
&& !g.getOpenTemplateGaps().contains(s.getGapName()))
error(s, ErrorType.MISSING_GAP, "the gap '%s' is absent", s.getGapName());
if (g.getOpenAttributeGaps().contains(s.getGapName())
&& !b.getIn(s, s.getXMLSource()).getRoots().isEmpty())
error(s, ErrorType.XML_IN_ATTRIBUTE, "maybe plugging XML data into attribute gap '%s'", s.getGapName());
final String gapType = g.getGapTypeMap().get(s.getGapName());
if (gapType != null) {
XMLGraph value_xg = b.getIn(s, s.getXMLSource());
XMLGraph type_xg = b.getGlobalXMLGraph().clone();
// SequenceNode n = b.getSchemaTypes().get(gapType);
Node n = fg.getTypemap().get(gapType);
type_xg.useFragment(new XMLGraphFragment(n, null, null, null));
Validator validator = new Validator(new ValidationErrorHandler() {
public boolean error(ElementNode n, Origin origin, String msg,
String example, Origin schema) {
if (example == null)
example = "";
XMLGraphChecker.this.error(
s,
ErrorType.INVALID_PLUG_TYPE,
"Plug statement violates the gap type %s\n" +
"because of %s created at %s\n" +
"%s %s",
gapType,
formatNode(n),
origin,
msg,
formatExample(example));
return true;
}
});
validator.validate(value_xg, type_xg, -1);
}
break;
}
case CLOSE:
if (g.getGapTypeMap().isEmpty())
return;
XMLGraph empty_xg = b.getGlobalXMLGraph().clone();
empty_xg.useFragment(new XMLGraphFragment(b.getEmptySequence(), null, null, null));
XMLGraph type_xg = b.getGlobalXMLGraph().clone();
for (final Map.Entry<String,String> gapentry : g.getGapTypeMap().entrySet()) {
final String gapName = gapentry.getKey();
final String gapType = gapentry.getValue();
Node n = fg.getTypemap().get(gapType);
type_xg.useFragment(new XMLGraphFragment(n, null, null, null));
Validator validator = new Validator(new ValidationErrorHandler() {
public boolean error(ElementNode n, Origin origin, String msg,
String example, Origin schema) {
if (example == null)
example = "";
XMLGraphChecker.this.error(
s,
ErrorType.INVALID_PLUG_TYPE,
"Close statement violates the type of gap %s\n" +
"%s does not permit an empty sequence\n" +
"Perhaps add '?' or '*' quantifier?",
gapName,
gapType);
return true;
}
});
validator.validate(empty_xg, type_xg, -1);
}
break;
}
}
@Override
public void visitRemoveStm(RemoveStm s) {
printProgress(s);
checkNonEmpty(s);
}
@Override
public void visitSetStm(SetStm s) {
printProgress(s);
checkNonEmpty(s);
}
});
}
for (Statement s : fg.getNodes()) {
s.visitBy(new BasicStatementVisitor() {
@Override
public void visitAnalyzeStm(final AnalyzeStm s) {
if (s.getKind() == AnalyzeStm.Kind.HOTSPOT)
return; // hotspots should not be analyzed here
printProgress(s);
XMLGraph value_xg = b.getIn(s, s.getBase());
if (value_xg.isUnknown()) {
String message;
if (s.getKind() == AnalyzeStm.Kind.TYPEANNOTATION) {
message = "Non-validated XML data assigned to type-annotated variable";
} else {
message = "Non-validated XML data at 'analyze' statement";
}
error(s, ErrorType.INVALID, message);
} else {
Validator validator = new Validator(new ValidationErrorHandler() {
public boolean error(ElementNode n, Origin origin, String msg,
String example, Origin schema) {
XMLGraphChecker.this.error(
s.getOrigin(),
ErrorType.INVALID,
"Problem in %s created at %s\n" +
"%s %s",
formatNode(n),
origin,
msg,
formatExample(example));
return true;
}
});
XMLGraph type_xg = b.getGlobalXMLGraph().clone();
SchemaType schema = s.getSchema();
String type = schema.getType();
Node n = fg.getTypemap().get(type);
if (n == null)
throw new XMLAnalysisException("No schema definition found for " + type, s.getOrigin());
type_xg.useFragment(new XMLGraphFragment(n, null, null, null));
Map<String,String> gaptypes = schema.getGapTypes();
if (gaptypes == null)
gaptypes = Collections.emptyMap();
for (Map.Entry<String,String> valueGapType : value_xg.getGapTypeMap().entrySet()) {
String typeGapType = gaptypes.get(valueGapType.getKey());
if (!valueGapType.getValue().equals(typeGapType)) {
error(s.getOrigin(),
ErrorType.INVALID,
"The type-strong gap %s might not have been plugged",
valueGapType.getKey());
}
}
value_xg = getGapAnnotatedGraph(value_xg, schema, b, s.getOrigin());
prepareGraphForValidation(value_xg);
validator.validate(value_xg, type_xg, -1);
}
}
});
}
}
/**
* Simplifies the XML graph so the validation algorithm gives more precise
* results.
* @param xg XML graph to modify
*/
private void prepareGraphForValidation(final XMLGraph xg) {
// unwind simple cycles
cycleUnwinder.unwind(xg);
// remove references to empty-language text nodes
// and replace empty-string text nodes with empty sequence nodes
xg.processReachableNodes(new NodeProcessor<Object>() {
@Override
public Object process(TextNode n) {
if (n.getText().isEmptyString()) {
xg.setNode(n.getIndex(), new SequenceNode(new ArrayList<Integer>(), n.getOrigin()));
}
return this;
}
@Override
public Object process(ChoiceNode n) {
LinkedList<Integer> remove = new LinkedList<Integer>();
for (int child : n.getContents()) {
Node node = xg.getNode(child);
if (node instanceof TextNode) {
TextNode tn = (TextNode)node;
if (tn.getText().isEmpty()) {
remove.add(child);
}
}
}
if (remove.size() > 0) {
LinkedHashSet<Integer> cs = new LinkedHashSet<Integer>(n.getContents());
cs.removeAll(remove);
n.setContent(cs, xg);
}
return this;
}
});
}
}