/*
* $Id: ValidatingParser.java,v 1.1.1.1 2000/11/23 01:53:33 edwingo Exp $
*
* The Apache Software License, Version 1.1
*
*
* Copyright (c) 2000 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Crimson" and "Apache Software Foundation" must
* not be used to endorse or promote products derived from this
* software without prior written permission. For written
* permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* nor may "Apache" appear in their name, without prior written
* permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation and was
* originally based on software copyright (c) 1999, Sun Microsystems, Inc.,
* http://www.sun.com. For more information on the Apache Software
* Foundation, please see <http://www.apache.org/>.
*/
package org.apache.crimson.parser;
import java.util.Enumeration;
import java.util.StringTokenizer;
import java.util.Vector;
import org.xml.sax.HandlerBase;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.apache.crimson.util.XmlNames;
/**
* This parser tests XML documents against the validity constraints
* specified in the XML 1.0 specification as it parses them. It
* reports violations of those constraints using the standard SAX API.
*
* <P><em>This parser should be configured to use an <code>ErrorHandler</code>
* that reject documents with validity errors, otherwise they will be accepted
* despite errors.</em> The default error handling, as specified by SAX,
* ignores all validity errors. The simplest way to have validity errors
* have a useful effect is to pass a boolean <em>true</em> value to
* the parser's constructor.
*
* <P> Note that most validity checks are performed during parsing by
* the base class, for efficiency. They're disabled by default in
* that class, and enabled by the constructor in this class.
*
* @author David Brownell
* @version $Revision: 1.1.1.1 $
*/
public class ValidatingParser extends Parser2
{
private SimpleHashtable ids = new SimpleHashtable ();
/** Constructs a SAX parser object. */
public ValidatingParser ()
{
setIsValidating (true);
}
/**
* Constructs a SAX parser object, optionally assigning the error
* handler to report exceptions on recoverable errors (which include
* all validity errors) as well as fatal errors.
*
* @param rejectValidityErrors When true, the parser will use an
* error handler which throws exceptions on recoverable errors.
* Otherwise it uses the default SAX error handler, which ignores
* such errors.
*/
public ValidatingParser (boolean rejectValidityErrors)
{
this ();
if (rejectValidityErrors)
setErrorHandler (new HandlerBase () {
public void error (SAXParseException x)
throws SAXException
{ throw x; }
});
}
// REMINDER: validation errors are not fatal, so code flow
// must continue correctly if error() returns.
// package private ... overrides base class method
void afterRoot () throws SAXException
{
// Make sure all IDREFs match declared ID attributes. We scan
// after the document element is parsed, since XML allows forward
// references, and only now can we know if they're all resolved.
for (Enumeration e = ids.keys ();
e.hasMoreElements ();
) {
String id = (String) e.nextElement ();
Boolean value = (Boolean) ids.get (id);
if (Boolean.FALSE == value)
error ("V-024", new Object [] { id });
}
}
// package private ... overrides base class method
void afterDocument ()
{
ids.clear ();
}
// package private ... overrides base class method
void validateAttributeSyntax (AttributeDecl attr, String value)
throws SAXException
{
// ID, IDREF(S) ... values are Names
if (AttributeDecl.ID == attr.type) {
if (!XmlNames.isName (value))
error ("V-025", new Object [] { value });
Boolean b = (Boolean) ids.getNonInterned (value);
if (b == null || b.equals (Boolean.FALSE))
ids.put (value.intern (), Boolean.TRUE);
else
error ("V-026", new Object [] { value });
} else if (AttributeDecl.IDREF == attr.type) {
if (!XmlNames.isName (value))
error ("V-027", new Object [] { value });
Boolean b = (Boolean) ids.getNonInterned (value);
if (b == null)
ids.put (value.intern (), Boolean.FALSE);
} else if (AttributeDecl.IDREFS == attr.type) {
StringTokenizer tokenizer = new StringTokenizer (value);
Boolean b;
boolean sawValue = false;
while (tokenizer.hasMoreTokens ()) {
value = tokenizer.nextToken ();
if (!XmlNames.isName (value))
error ("V-027", new Object [] { value });
b = (Boolean) ids.getNonInterned (value);
if (b == null)
ids.put (value.intern (), Boolean.FALSE);
sawValue = true;
}
if (!sawValue)
error ("V-039", null);
// NMTOKEN(S) ... values are Nmtoken(s)
} else if (AttributeDecl.NMTOKEN == attr.type) {
if (!XmlNames.isNmtoken (value))
error ("V-028", new Object [] { value });
} else if (AttributeDecl.NMTOKENS == attr.type) {
StringTokenizer tokenizer = new StringTokenizer (value);
boolean sawValue = false;
while (tokenizer.hasMoreTokens ()) {
value = tokenizer.nextToken ();
if (!XmlNames.isNmtoken (value))
error ("V-028", new Object [] { value });
sawValue = true;
}
if (!sawValue)
error ("V-032", null);
// ENUMERATION ... values match one of the tokens
} else if (AttributeDecl.ENUMERATION == attr.type) {
for (int i = 0; i < attr.values.length; i++)
if (value.equals (attr.values [i]))
return;
error ("V-029", new Object [] { value });
// NOTATION values match a notation name
} else if (AttributeDecl.NOTATION == attr.type) {
//
// XXX XML 1.0 spec should probably list references to
// externally defined notations in standalone docs as
// validity errors. Ditto externally defined unparsed
// entities; neither should show up in attributes, else
// one needs to read the external declarations in order
// to make sense of the document (exactly what tagging
// a doc as "standalone" intends you won't need to do).
//
for (int i = 0; i < attr.values.length; i++)
if (value.equals (attr.values [i]))
return;
error ("V-030", new Object [] { value });
// ENTITY(IES) values match an unparsed entity(ies)
} else if (AttributeDecl.ENTITY == attr.type) {
// see note above re standalone
if (!isUnparsedEntity (value))
error ("V-031", new Object [] { value });
} else if (AttributeDecl.ENTITIES == attr.type) {
StringTokenizer tokenizer = new StringTokenizer (value);
boolean sawValue = false;
while (tokenizer.hasMoreTokens ()) {
value = tokenizer.nextToken ();
// see note above re standalone
if (!isUnparsedEntity (value))
error ("V-031", new Object [] { value });
sawValue = true;
}
if (!sawValue)
error ("V-040", null);
} else if (AttributeDecl.CDATA != attr.type)
throw new InternalError (attr.type);
}
// package private ... overrides base class method
ContentModel newContentModel (String tag)
{
return new ContentModel (tag);
}
// package private ... overrides base class method
ContentModel newContentModel (char type, ContentModel next)
{
return new ContentModel (type, next);
}
// package private ... overrides base class method
ElementValidator newValidator (ElementDecl element)
{
if (element.validator != null)
return element.validator;
if (element.model != null)
return new ChildrenValidator (element);
//
// most types of content model have very simple validation
// algorithms; only "children" needs mutable state.
//
if (element.contentType == null || strANY == element.contentType)
element.validator = ElementValidator.ANY;
else if (strEMPTY == element.contentType)
element.validator = EMPTY;
else // (element.contentType.charAt (1) == '#')
element.validator = new MixedValidator (element);
return element.validator;
}
private final EmptyValidator EMPTY = new EmptyValidator ();
// "EMPTY" model allows nothing
class EmptyValidator extends ElementValidator
{
public void consume (String token) throws SAXException
{ error ("V-033", null); }
public void text () throws SAXException
{ error ("V-033", null); }
}
// Mixed content models allow text with selected elements
class MixedValidator extends ElementValidator
{
private ElementDecl element;
MixedValidator (ElementDecl element)
{ this.element = element; }
public void consume (String type) throws SAXException
{
String model = element.contentType;
for (int index = 8; // skip "(#PCDATA|"
(index = model.indexOf (type, index + 1)) >= 9;
) {
char c;
// allow this type name to suffix -- "|xxTYPE"
if (model.charAt (index -1) != '|')
continue;
c = model.charAt (index + type.length ());
if (c == '|' || c == ')')
return;
// allow this type name to prefix -- "|TYPExx"
}
error ("V-034", new Object [] { element.name, type, model });
}
}
class ChildrenValidator extends ElementValidator
{
private ContentModelState state;
private String name;
ChildrenValidator (ElementDecl element)
{
state = new ContentModelState (element.model);
name = element.name;
}
public void consume (String token) throws SAXException
{
if (state == null)
error ("V-035", new Object [] { name, token });
else try {
state = state.advance (token);
} catch (EndOfInputException e) {
error ("V-036", new Object [] { name, token });
}
}
public void text () throws SAXException
{
error ("V-037", new Object [] { name });
}
public void done () throws SAXException
{
if (state != null && !state.terminate ())
error ("V-038", new Object [] { name });
}
}
private boolean isUnparsedEntity (String name)
{
Object e = entities.getNonInterned (name);
if (e == null || !(e instanceof ExternalEntity))
return false;
return ((ExternalEntity)e).notation != null;
}
}