/*
* $Id: ValidatingParser.java,v 1.4 1999/04/11 16:07:50 db Exp $
*
* Copyright (c) 1998-1999 Sun Microsystems, Inc. All Rights Reserved.
*
* This software is the confidential and proprietary information of Sun
* Microsystems, Inc. ("Confidential Information"). You shall not
* disclose such Confidential Information and shall use it only in
* accordance with the terms of the license agreement you entered into
* with Sun.
*
* SUN MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF THE
* SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
* IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
* PURPOSE, OR NON-INFRINGEMENT. SUN SHALL NOT BE LIABLE FOR ANY DAMAGES
* SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING
* THIS SOFTWARE OR ITS DERIVATIVES.
*/
package com.sun.xml.parser;
import java.util.Enumeration;
import java.util.StringTokenizer;
import java.util.Vector;
import org.xml.sax.HandlerBase;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import com.sun.xml.util.XmlNames;
/**
* This parser tests XML documents against the validity constraints
* specified in the XML 1.0 specification as it parses them. It
* reports violations of those constraints using the standard SAX API.
*
* <P><em>This parser should be configured to use an <code>ErrorHandler</code>
* that reject documents with validity errors, otherwise they will be accepted
* despite errors.</em> The default error handling, as specified by SAX,
* ignores all validity errors. The simplest way to have validity errors
* have a useful effect is to pass a boolean <em>true</em> value to
* the parser's constructor.
*
* <P> Note that most validity checks are performed during parsing by
* the base class, for efficiency. They're disabled by default in
* that class, and enabled by the constructor in this class.
*
* @author David Brownell
* @version $Revision: 1.4 $
*/
public class ValidatingParser extends Parser
{
private SimpleHashtable ids = new SimpleHashtable ();
/** Constructs a SAX parser object. */
public ValidatingParser ()
{
setIsValidating (true);
}
/**
* Constructs a SAX parser object, optionally assigning the error
* handler to report exceptions on recoverable errors (which include
* all validity errors) as well as fatal errors.
*
* @param rejectValidityErrors When true, the parser will use an
* error handler which throws exceptions on recoverable errors.
* Otherwise it uses the default SAX error handler, which ignores
* such errors.
*/
public ValidatingParser (boolean rejectValidityErrors)
{
this ();
if (rejectValidityErrors)
setErrorHandler (new HandlerBase () {
public void error (SAXParseException x)
throws SAXException
{ throw x; }
});
}
// REMINDER: validation errors are not fatal, so code flow
// must continue correctly if error() returns.
// package private ... overrides base class method
void afterRoot () throws SAXException
{
// Make sure all IDREFs match declared ID attributes. We scan
// after the document element is parsed, since XML allows forward
// references, and only now can we know if they're all resolved.
for (Enumeration e = ids.keys ();
e.hasMoreElements ();
) {
String id = (String) e.nextElement ();
Boolean value = (Boolean) ids.get (id);
if (Boolean.FALSE == value)
error ("V-024", new Object [] { id });
}
}
// package private ... overrides base class method
void afterDocument ()
{
ids.clear ();
}
// package private ... overrides base class method
void validateAttributeSyntax (AttributeDecl attr, String value)
throws SAXException
{
// ID, IDREF(S) ... values are Names
if (AttributeDecl.ID == attr.type) {
if (!XmlNames.isName (value))
error ("V-025", new Object [] { value });
Boolean b = (Boolean) ids.getNonInterned (value);
if (b == null || b.equals (Boolean.FALSE))
ids.put (value.intern (), Boolean.TRUE);
else
error ("V-026", new Object [] { value });
} else if (AttributeDecl.IDREF == attr.type) {
if (!XmlNames.isName (value))
error ("V-027", new Object [] { value });
Boolean b = (Boolean) ids.getNonInterned (value);
if (b == null)
ids.put (value.intern (), Boolean.FALSE);
} else if (AttributeDecl.IDREFS == attr.type) {
StringTokenizer tokenizer = new StringTokenizer (value);
Boolean b;
boolean sawValue = false;
while (tokenizer.hasMoreTokens ()) {
value = tokenizer.nextToken ();
if (!XmlNames.isName (value))
error ("V-027", new Object [] { value });
b = (Boolean) ids.getNonInterned (value);
if (b == null)
ids.put (value.intern (), Boolean.FALSE);
sawValue = true;
}
if (!sawValue)
error ("V-039", null);
// NMTOKEN(S) ... values are Nmtoken(s)
} else if (AttributeDecl.NMTOKEN == attr.type) {
if (!XmlNames.isNmtoken (value))
error ("V-028", new Object [] { value });
} else if (AttributeDecl.NMTOKENS == attr.type) {
StringTokenizer tokenizer = new StringTokenizer (value);
boolean sawValue = false;
while (tokenizer.hasMoreTokens ()) {
value = tokenizer.nextToken ();
if (!XmlNames.isNmtoken (value))
error ("V-028", new Object [] { value });
sawValue = true;
}
if (!sawValue)
error ("V-032", null);
// ENUMERATION ... values match one of the tokens
} else if (AttributeDecl.ENUMERATION == attr.type) {
for (int i = 0; i < attr.values.length; i++)
if (value.equals (attr.values [i]))
return;
error ("V-029", new Object [] { value });
// NOTATION values match a notation name
} else if (AttributeDecl.NOTATION == attr.type) {
//
// XXX XML 1.0 spec should probably list references to
// externally defined notations in standalone docs as
// validity errors. Ditto externally defined unparsed
// entities; neither should show up in attributes, else
// one needs to read the external declarations in order
// to make sense of the document (exactly what tagging
// a doc as "standalone" intends you won't need to do).
//
for (int i = 0; i < attr.values.length; i++)
if (value.equals (attr.values [i]))
return;
error ("V-030", new Object [] { value });
// ENTITY(IES) values match an unparsed entity(ies)
} else if (AttributeDecl.ENTITY == attr.type) {
// see note above re standalone
if (!isUnparsedEntity (value))
error ("V-031", new Object [] { value });
} else if (AttributeDecl.ENTITIES == attr.type) {
StringTokenizer tokenizer = new StringTokenizer (value);
boolean sawValue = false;
while (tokenizer.hasMoreTokens ()) {
value = tokenizer.nextToken ();
// see note above re standalone
if (!isUnparsedEntity (value))
error ("V-031", new Object [] { value });
sawValue = true;
}
if (!sawValue)
error ("V-040", null);
} else if (AttributeDecl.CDATA != attr.type)
throw new InternalError (attr.type);
}
// package private ... overrides base class method
ContentModel newContentModel (String tag)
{
return new ContentModel (tag);
}
// package private ... overrides base class method
ContentModel newContentModel (char type, ContentModel next)
{
return new ContentModel (type, next);
}
// package private ... overrides base class method
ElementValidator newValidator (ElementDecl element)
{
if (element.validator != null)
return element.validator;
if (element.model != null)
return new ChildrenValidator (element);
//
// most types of content model have very simple validation
// algorithms; only "children" needs mutable state.
//
if (element.contentType == null || strANY == element.contentType)
element.validator = ElementValidator.ANY;
else if (strEMPTY == element.contentType)
element.validator = EMPTY;
else // (element.contentType.charAt (1) == '#')
element.validator = new MixedValidator (element);
return element.validator;
}
private final EmptyValidator EMPTY = new EmptyValidator ();
// "EMPTY" model allows nothing
class EmptyValidator extends ElementValidator
{
public void consume (String token) throws SAXException
{ error ("V-033", null); }
public void text () throws SAXException
{ error ("V-033", null); }
}
// Mixed content models allow text with selected elements
class MixedValidator extends ElementValidator
{
private ElementDecl element;
MixedValidator (ElementDecl element)
{ this.element = element; }
public void consume (String type) throws SAXException
{
String model = element.contentType;
for (int index = 8; // skip "(#PCDATA|"
(index = model.indexOf (type, index + 1)) >= 9;
) {
char c;
// allow this type name to suffix -- "|xxTYPE"
if (model.charAt (index -1) != '|')
continue;
c = model.charAt (index + type.length ());
if (c == '|' || c == ')')
return;
// allow this type name to prefix -- "|TYPExx"
}
error ("V-034", new Object [] { element.name, type, model });
}
}
class ChildrenValidator extends ElementValidator
{
private ContentModelState state;
private String name;
ChildrenValidator (ElementDecl element)
{
state = new ContentModelState (element.model);
name = element.name;
}
public void consume (String token) throws SAXException
{
if (state == null)
error ("V-035", new Object [] { name, token });
else try {
state = state.advance (token);
} catch (EndOfInputException e) {
error ("V-036", new Object [] { name, token });
}
}
public void text () throws SAXException
{
error ("V-037", new Object [] { name });
}
public void done () throws SAXException
{
if (state != null && !state.terminate ())
error ("V-038", new Object [] { name });
}
}
private boolean isUnparsedEntity (String name)
{
Object e = entities.getNonInterned (name);
if (e == null || !(e instanceof ExternalEntity))
return false;
return ((ExternalEntity)e).notation != null;
}
}