/*
* Copyright 2002-2009 Andy Clark, Marc Guillemot
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/
package com.googlecode.html;
import java.util.ArrayList;
import java.util.List;
import org.apache.xerces.util.XMLAttributesImpl;
import org.apache.xerces.xni.Augmentations;
import org.apache.xerces.xni.NamespaceContext;
import org.apache.xerces.xni.QName;
import org.apache.xerces.xni.XMLAttributes;
import org.apache.xerces.xni.XMLDocumentHandler;
import org.apache.xerces.xni.XMLLocator;
import org.apache.xerces.xni.XMLResourceIdentifier;
import org.apache.xerces.xni.XMLString;
import org.apache.xerces.xni.XNIException;
import org.apache.xerces.xni.parser.XMLComponentManager;
import org.apache.xerces.xni.parser.XMLConfigurationException;
import org.apache.xerces.xni.parser.XMLDocumentFilter;
import org.apache.xerces.xni.parser.XMLDocumentSource;
import com.googlecode.html.HTMLElements.Element;
import com.googlecode.html.filters.NamespaceBinder;
import com.googlecode.html.xercesbridge.XercesBridge;
/**
* Balances tags in an HTML document. This component receives document events and tries to correct
* many common mistakes that human (and computer) HTML document authors make. This tag balancer can:
* <ul>
* <li>add missing parent elements;
* <li>automatically close elements with optional end tags; and
* <li>handle mis-matched inline element tags.
* </ul>
* <p>
* This component recognizes the following features:
* <ul>
* <li>http://cyberneko.org/html/features/augmentations
* <li>http://cyberneko.org/html/features/report-errors
* <li>http://cyberneko.org/html/features/balance-tags/document-fragment
* <li>http://cyberneko.org/html/features/balance-tags/ignore-outside-content
* </ul>
* <p>
* This component recognizes the following properties:
* <ul>
* <li>http://cyberneko.org/html/properties/names/elems
* <li>http://cyberneko.org/html/properties/names/attrs
* <li>http://cyberneko.org/html/properties/error-reporter
* <li>http://cyberneko.org/html/properties/balance-tags/current-stack
* </ul>
*
* @see HTMLElements
*
* @author Andy Clark
* @author Marc Guillemot
*
* @version $Id: HTMLTagBalancer.java,v 1.20 2005/02/14 04:06:22 andyc Exp $
*/
public class HTMLTagBalancer implements XMLDocumentFilter, HTMLComponent {
//
// Constants
//
// features
/**
* Element info for each start element. This information is used when closing unbalanced inline
* elements. For example:
*
* <pre>
* <i>unbalanced <b>HTML</i> content</b>
* </pre>
* <p>
* It seems that it is a waste of processing and memory to copy the attributes for every start
* element even if there are no unbalanced inline elements in the document. However, if the
* attributes are <em>not</em> saved, then important attributes such as style information would
* be lost.
*
* @author Andy Clark
*/
public static class Info {
//
// Data
//
/** The element attributes. */
public XMLAttributes attributes;
/** The element. */
public HTMLElements.Element element;
/** The element qualified name. */
public QName qname;
//
// Constructors
//
/**
* Creates an element information object.
* <p>
* <strong>Note:</strong> This constructor makes a copy of the element information.
*
* @param element The element qualified name.
*/
public Info(HTMLElements.Element element, QName qname) {
this(element, qname, null);
} // <init>(HTMLElements.Element,QName)
/**
* Creates an element information object.
* <p>
* <strong>Note:</strong> This constructor makes a copy of the element information.
*
* @param element The element qualified name.
* @param attributes The element attributes.
*/
public Info(HTMLElements.Element element, QName qname, XMLAttributes attributes) {
this.element = element;
this.qname = new QName(qname);
if (attributes != null) {
int length = attributes.getLength();
if (length > 0) {
QName aqname = new QName();
XMLAttributes newattrs = new XMLAttributesImpl();
for (int i = 0; i < length; i++) {
attributes.getName(i, aqname);
String type = attributes.getType(i);
String value = attributes.getValue(i);
String nonNormalizedValue = attributes.getNonNormalizedValue(i);
boolean specified = attributes.isSpecified(i);
newattrs.addAttribute(aqname, type, value);
newattrs.setNonNormalizedValue(i, nonNormalizedValue);
newattrs.setSpecified(i, specified);
}
this.attributes = newattrs;
}
}
} // <init>(HTMLElements.Element,QName,XMLAttributes)
/**
* Simple representation to make debugging easier
*/
@Override
public String toString() {
return super.toString() + qname;
}
} // class Info
/** Unsynchronized stack of element information. */
public static class InfoStack {
//
// Data
//
/** The stack data. */
public Info[] data = new Info[10];
/** The top of the stack. */
public int top;
//
// Public methods
//
/** Peeks at the top of the stack. */
public Info peek() {
return data[top - 1];
} // peek():Info
/** Pops the top item off of the stack. */
public Info pop() {
return data[--top];
} // pop():Info
/** Pushes element information onto the stack. */
public void push(Info info) {
if (top == data.length) {
Info[] newarray = new Info[top + 10];
System.arraycopy(data, 0, newarray, 0, top);
data = newarray;
}
data[top++] = info;
} // push(Info)
/**
* Simple representation to make debugging easier
*/
@Override
public String toString() {
final StringBuffer sb = new StringBuffer("InfoStack(");
for (int i = top - 1; i >= 0; --i) {
sb.append(data[i]);
if (i != 0) {
sb.append(", ");
}
}
sb.append(")");
return sb.toString();
}
} // class InfoStack
/**
* Structure to hold information about an element placed in buffer to be comsumed later
*/
static class ElementEntry {
private final Augmentations augs_;
private final QName name_;
ElementEntry(final QName element, final Augmentations augs) {
name_ = new QName(element);
augs_ = augs == null ? null : new HTMLAugmentations(augs);
}
}
/**
* <font color="red">EXPERIMENTAL: may change in next release</font><br/>
* Name of the property holding the stack of elements in which context a document fragment should
* be parsed.
**/
public static final String FRAGMENT_CONTEXT_STACK = "http://cyberneko.org/html/properties/balance-tags/fragment-context-stack";
/** Include infoset augmentations. */
protected static final String AUGMENTATIONS = "http://cyberneko.org/html/features/augmentations";
/** Document fragment balancing only. */
protected static final String DOCUMENT_FRAGMENT = "http://cyberneko.org/html/features/balance-tags/document-fragment";
/** Document fragment balancing only (deprecated). */
protected static final String DOCUMENT_FRAGMENT_DEPRECATED = "http://cyberneko.org/html/features/document-fragment";
/** Error reporter. */
protected static final String ERROR_REPORTER = "http://cyberneko.org/html/properties/error-reporter";
// properties
/** Ignore outside content. */
protected static final String IGNORE_OUTSIDE_CONTENT = "http://cyberneko.org/html/features/balance-tags/ignore-outside-content";
/** Modify HTML attribute names: { "upper", "lower", "default" }. */
protected static final String NAMES_ATTRS = "http://cyberneko.org/html/properties/names/attrs";
/** Modify HTML element names: { "upper", "lower", "default" }. */
protected static final String NAMES_ELEMS = "http://cyberneko.org/html/properties/names/elems";
/** Lowercase HTML names. */
protected static final short NAMES_LOWERCASE = 2;
/** Match HTML element names. */
protected static final short NAMES_MATCH = 0;
/** Don't modify HTML names. */
protected static final short NAMES_NO_CHANGE = 0;
// modify HTML names
/** Uppercase HTML names. */
protected static final short NAMES_UPPERCASE = 1;
/** Namespaces. */
protected static final String NAMESPACES = "http://xml.org/sax/features/namespaces";
/** Report errors. */
protected static final String REPORT_ERRORS = "http://cyberneko.org/html/features/report-errors";
/** Synthesized event info item. */
protected static final HTMLEventInfo SYNTHESIZED_ITEM = new HTMLEventInfo.SynthesizedItem();
// static vars
/** Recognized features. */
private static final String[] RECOGNIZED_FEATURES = {
NAMESPACES, AUGMENTATIONS, REPORT_ERRORS, DOCUMENT_FRAGMENT_DEPRECATED,
DOCUMENT_FRAGMENT, IGNORE_OUTSIDE_CONTENT,};
//
// Data
//
// features
/** Recognized features defaults. */
private static final Boolean[] RECOGNIZED_FEATURES_DEFAULTS = {
null, null, null, null, Boolean.FALSE, Boolean.FALSE,};
/** Recognized properties. */
private static final String[] RECOGNIZED_PROPERTIES = {
NAMES_ELEMS, NAMES_ATTRS, ERROR_REPORTER, FRAGMENT_CONTEXT_STACK,};
/** Recognized properties defaults. */
private static final Object[] RECOGNIZED_PROPERTIES_DEFAULTS = {null, null, null, null,};
/**
* Converts HTML names string value to constant value.
*
* @see #NAMES_NO_CHANGE
* @see #NAMES_LOWERCASE
* @see #NAMES_UPPERCASE
*/
protected static final short getNamesValue(String value) {
if (value.equals("lower")) {
return NAMES_LOWERCASE;
}
if (value.equals("upper")) {
return NAMES_UPPERCASE;
}
return NAMES_NO_CHANGE;
} // getNamesValue(String):short
/** Modifies the given name based on the specified mode. */
protected static final String modifyName(String name, short mode) {
switch (mode) {
case NAMES_UPPERCASE:
return name.toUpperCase();
case NAMES_LOWERCASE:
return name.toLowerCase();
}
return name;
} // modifyName(String,short):String
/** Allows self closing iframe tags. */
protected boolean fAllowSelfclosingIframe;
// properties
/** Include infoset augmentations. */
protected boolean fAugmentations;
/** Document fragment balancing only. */
protected boolean fDocumentFragment;
/** The document handler. */
protected XMLDocumentHandler fDocumentHandler;
// connections
/** The document source. */
protected XMLDocumentSource fDocumentSource;
/** The element stack. */
protected final InfoStack fElementStack = new InfoStack();
// state
/** Error reporter. */
protected HTMLErrorReporter fErrorReporter;
/** Ignore outside content. */
protected boolean fIgnoreOutsideContent;
/** The inline stack. */
protected final InfoStack fInlineStack = new InfoStack();
/** Modify HTML attribute names. */
protected short fNamesAttrs;
/** Modify HTML element names. */
protected short fNamesElems;
/** Namespaces. */
protected boolean fNamespaces;
/** True if a form is in the stack (allow to discard opening of nested forms) */
protected boolean fOpenedForm;
/** Report errors. */
protected boolean fReportErrors;
/** True if seen anything. Important for xml declaration. */
protected boolean fSeenAnything;
// temp vars
/** True if seen <body< element. */
protected boolean fSeenBodyElement;
/** True if root element has been seen. */
protected boolean fSeenDoctype;
/** True if seen <head< element. */
protected boolean fSeenHeadElement;
/** True if root element has been seen. */
protected boolean fSeenRootElement;
/**
* True if seen the end of the document element. In other words, this variable is set to false
* <em>until</em> the end </HTML> tag is seen (or synthesized). This is used to ensure that
* extraneous events after the end of the document element do not make the document stream
* ill-formed.
*/
protected boolean fSeenRootElementEnd;
protected HTMLTagBalancingListener tagBalancingListener;
private List/* ElementEntry */endElementsBuffer_ = new ArrayList();
/** Empty attributes. */
private final XMLAttributes fEmptyAttrs = new XMLAttributesImpl();
/** Augmentations. */
private final HTMLAugmentations fInfosetAugs = new HTMLAugmentations();
private boolean forcedEndElement_ = false;
//
// HTMLComponent methods
//
private boolean forcedStartElement_ = false;
/** A qualified name. */
private final QName fQName = new QName();
//
// XMLComponent methods
//
/**
* Stack of elements determining the context in which a document fragment should be parsed
*/
private QName[] fragmentContextStack_ = null;
private int fragmentContextStackSize_ = 0; // not 0 only when a fragment is
// parsed and
// fragmentContextStack_ is set
private LostText lostText_ = new LostText();
/** Characters. */
public void characters(final XMLString text, final Augmentations augs) throws XNIException {
// check for end of document
if (fSeenRootElementEnd) {
return;
}
if (fElementStack.top == 0 && !fDocumentFragment) {
// character before first opening tag
lostText_.add(text, augs);
return;
}
// is this text whitespace?
boolean whitespace = true;
for (int i = 0; i < text.length; i++) {
if (!Character.isWhitespace(text.ch[text.offset + i])) {
whitespace = false;
break;
}
}
if (!fDocumentFragment) {
// handle bare characters
if (!fSeenRootElement) {
if (whitespace) {
return;
}
forceStartBody();
}
if (whitespace && (fElementStack.top < 2 || endElementsBuffer_.size() == 1)) {
// ignore spaces directly within <html>
return;
}
// handle character content in head
// NOTE: This frequently happens when the document looks like:
// <title>Title</title>
// And here's some text.
else if (!whitespace) {
Info info = fElementStack.peek();
if (info.element.code == HTMLElements.HEAD || info.element.code == HTMLElements.HTML) {
String hname = modifyName("head", fNamesElems);
String bname = modifyName("body", fNamesElems);
if (fReportErrors) {
fErrorReporter.reportWarning("HTML2009", new Object[]{hname, bname});
}
forceStartBody();
}
}
}
// call handler
if (fDocumentHandler != null) {
fDocumentHandler.characters(text, augs);
}
} // characters(XMLString,Augmentations)
/** Comment. */
public void comment(XMLString text, Augmentations augs) throws XNIException {
fSeenAnything = true;
consumeEarlyTextIfNeeded();
if (fDocumentHandler != null) {
fDocumentHandler.comment(text, augs);
}
} // comment(XMLString,Augmentations)
//
// XMLDocumentSource methods
//
/** Doctype declaration. */
public void doctypeDecl(String rootElementName, String publicId, String systemId,
Augmentations augs) throws XNIException {
fSeenAnything = true;
if (fReportErrors) {
if (fSeenRootElement) {
fErrorReporter.reportError("HTML2010", null);
} else if (fSeenDoctype) {
fErrorReporter.reportError("HTML2011", null);
}
}
if (!fSeenRootElement && !fSeenDoctype) {
fSeenDoctype = true;
if (fDocumentHandler != null) {
fDocumentHandler.doctypeDecl(rootElementName, publicId, systemId, augs);
}
}
} // doctypeDecl(String,String,String,Augmentations)
// @since Xerces 2.1.0
/** Empty element. */
public void emptyElement(final QName element, XMLAttributes attrs, Augmentations augs)
throws XNIException {
startElement(element, attrs, augs);
// browser ignore the closing indication for non empty tags like <form
// .../> but not for unknown element
final HTMLElements.Element elem = getElement(element);
if (elem.isEmpty() || elem.code == HTMLElements.UNKNOWN || elem.code == HTMLElements.IFRAME
&& fAllowSelfclosingIframe) {
endElement(element, augs);
}
} // emptyElement(QName,XMLAttributes,Augmentations)
//
// XMLDocumentHandler methods
//
// since Xerces-J 2.2.0
/** End CDATA section. */
public void endCDATA(Augmentations augs) throws XNIException {
// check for end of document
if (fSeenRootElementEnd) {
return;
}
// call handler
if (fDocumentHandler != null) {
fDocumentHandler.endCDATA(augs);
}
} // endCDATA(Augmentations)
// old methods
/** End document. */
public void endDocument(Augmentations augs) throws XNIException {
// </body> and </html> have been buffered to consider outside content
fIgnoreOutsideContent = true; // endElement should not ignore the elements
// passed from buffer
consumeBufferedEndElements();
// handle empty document
if (!fSeenRootElement && !fDocumentFragment) {
if (fReportErrors) {
fErrorReporter.reportError("HTML2000", null);
}
if (fDocumentHandler != null) {
fSeenRootElementEnd = false;
forceStartBody(); // will force <html> and <head></head>
final String body = modifyName("body", fNamesElems);
fQName.setValues(null, body, body, null);
callEndElement(fQName, synthesizedAugs());
final String ename = modifyName("html", fNamesElems);
fQName.setValues(null, ename, ename, null);
callEndElement(fQName, synthesizedAugs());
}
}
// pop all remaining elements
else {
int length = fElementStack.top - fragmentContextStackSize_;
for (int i = 0; i < length; i++) {
Info info = fElementStack.pop();
if (fReportErrors) {
String ename = info.qname.rawname;
fErrorReporter.reportWarning("HTML2001", new Object[]{ename});
}
if (fDocumentHandler != null) {
callEndElement(info.qname, synthesizedAugs());
}
}
}
// call handler
if (fDocumentHandler != null) {
fDocumentHandler.endDocument(augs);
}
} // endDocument(Augmentations)
/** End element. */
public void endElement(final QName element, final Augmentations augs) throws XNIException {
final boolean forcedEndElement = forcedEndElement_;
// is there anything to do?
if (fSeenRootElementEnd) {
notifyDiscardedEndElement(element, augs);
return;
}
// get element information
HTMLElements.Element elem = getElement(element);
// if we consider outside content, just buffer </body> and </html> to
// consider them at the very end
if (!fIgnoreOutsideContent
&& (elem.code == HTMLElements.BODY || elem.code == HTMLElements.HTML)) {
endElementsBuffer_.add(new ElementEntry(element, augs));
return;
}
// check for end of document
if (elem.code == HTMLElements.HTML) {
fSeenRootElementEnd = true;
} else if (elem.code == HTMLElements.FORM) {
fOpenedForm = false;
} else if (elem.code == HTMLElements.HEAD && !forcedEndElement) {
// consume </head> first when <body> is reached to retrieve content
// lost between </head> and <body>
endElementsBuffer_.add(new ElementEntry(element, augs));
return;
}
// empty element
int depth = getElementDepth(elem);
if (depth == -1) {
if (elem.code == HTMLElements.P) {
forceStartElement(element, emptyAttributes(), synthesizedAugs());
endElement(element, augs);
} else if (!elem.isEmpty()) {
notifyDiscardedEndElement(element, augs);
}
return;
}
// find unbalanced inline elements
if (depth > 1 && elem.isInline()) {
final int size = fElementStack.top;
fInlineStack.top = 0;
for (int i = 0; i < depth - 1; i++) {
final Info info = fElementStack.data[size - i - 1];
final HTMLElements.Element pelem = info.element;
if (pelem.isInline() || pelem.code == HTMLElements.FONT) { // TODO:
// investigate
// if
// only
// FONT
// NOTE: I don't have to make a copy of the info because
// it will just be popped off of the element stack
// as soon as we close it, anyway.
fInlineStack.push(info);
}
}
}
// close children up to appropriate element
for (int i = 0; i < depth; i++) {
Info info = fElementStack.pop();
if (fReportErrors && i < depth - 1) {
String ename = modifyName(element.rawname, fNamesElems);
String iname = info.qname.rawname;
fErrorReporter.reportWarning("HTML2007", new Object[]{ename, iname});
}
if (fDocumentHandler != null) {
// PATCH: Marc-André Morissette
callEndElement(info.qname, i < depth - 1 ? synthesizedAugs() : augs);
}
}
// re-open inline elements
if (depth > 1) {
int size = fInlineStack.top;
for (int i = 0; i < size; i++) {
Info info = fInlineStack.pop();
XMLAttributes attributes = info.attributes;
if (fReportErrors) {
String iname = info.qname.rawname;
fErrorReporter.reportWarning("HTML2008", new Object[]{iname});
}
forceStartElement(info.qname, attributes, synthesizedAugs());
}
}
} // endElement(QName,Augmentations)
/** End entity. */
public void endGeneralEntity(String name, Augmentations augs) throws XNIException {
// check for end of document
if (fSeenRootElementEnd) {
return;
}
// call handler
if (fDocumentHandler != null) {
fDocumentHandler.endGeneralEntity(name, augs);
}
} // endGeneralEntity(String,Augmentations)
/** End prefix mapping. */
public void endPrefixMapping(String prefix, Augmentations augs) throws XNIException {
// check for end of document
if (fSeenRootElementEnd) {
return;
}
// call handler
if (fDocumentHandler != null) {
XercesBridge.getInstance().XMLDocumentHandler_endPrefixMapping(fDocumentHandler, prefix,
augs);
}
} // endPrefixMapping(String,Augmentations)
/** Returns the document handler. */
public XMLDocumentHandler getDocumentHandler() {
return fDocumentHandler;
} // getDocumentHandler():XMLDocumentHandler
/** Returns the document source. */
public XMLDocumentSource getDocumentSource() {
return fDocumentSource;
} // getDocumentSource():XMLDocumentSource
/** Returns the default state for a feature. */
public Boolean getFeatureDefault(String featureId) {
int length = RECOGNIZED_FEATURES != null ? RECOGNIZED_FEATURES.length : 0;
for (int i = 0; i < length; i++) {
if (RECOGNIZED_FEATURES[i].equals(featureId)) {
return RECOGNIZED_FEATURES_DEFAULTS[i];
}
}
return null;
} // getFeatureDefault(String):Boolean
/** Returns the default state for a property. */
public Object getPropertyDefault(String propertyId) {
int length = RECOGNIZED_PROPERTIES != null ? RECOGNIZED_PROPERTIES.length : 0;
for (int i = 0; i < length; i++) {
if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) {
return RECOGNIZED_PROPERTIES_DEFAULTS[i];
}
}
return null;
} // getPropertyDefault(String):Object
/** Returns recognized features. */
public String[] getRecognizedFeatures() {
return RECOGNIZED_FEATURES;
} // getRecognizedFeatures():String[]
/** Returns recognized properties. */
public String[] getRecognizedProperties() {
return RECOGNIZED_PROPERTIES;
} // getRecognizedProperties():String[]
/** Ignorable whitespace. */
public void ignorableWhitespace(XMLString text, Augmentations augs) throws XNIException {
characters(text, augs);
} // ignorableWhitespace(XMLString,Augmentations)
/** Processing instruction. */
public void processingInstruction(String target, XMLString data, Augmentations augs)
throws XNIException {
fSeenAnything = true;
consumeEarlyTextIfNeeded();
if (fDocumentHandler != null) {
fDocumentHandler.processingInstruction(target, data, augs);
}
} // processingInstruction(String,XMLString,Augmentations)
/** Resets the component. */
public void reset(final XMLComponentManager manager) throws XMLConfigurationException {
// get features
fNamespaces = manager.getFeature(NAMESPACES);
fAugmentations = manager.getFeature(AUGMENTATIONS);
fReportErrors = manager.getFeature(REPORT_ERRORS);
fDocumentFragment = manager.getFeature(DOCUMENT_FRAGMENT)
|| manager.getFeature(DOCUMENT_FRAGMENT_DEPRECATED);
fIgnoreOutsideContent = manager.getFeature(IGNORE_OUTSIDE_CONTENT);
fAllowSelfclosingIframe = manager.getFeature(HTMLScanner.ALLOW_SELFCLOSING_IFRAME);
// get properties
fNamesElems = getNamesValue(String.valueOf(manager.getProperty(NAMES_ELEMS)));
fNamesAttrs = getNamesValue(String.valueOf(manager.getProperty(NAMES_ATTRS)));
fErrorReporter = (HTMLErrorReporter) manager.getProperty(ERROR_REPORTER);
fragmentContextStack_ = (QName[]) manager.getProperty(FRAGMENT_CONTEXT_STACK);
} // reset(XMLComponentManager)
/** Sets the document handler. */
public void setDocumentHandler(XMLDocumentHandler handler) {
fDocumentHandler = handler;
} // setDocumentHandler(XMLDocumentHandler)
/** Sets the document source. */
public void setDocumentSource(XMLDocumentSource source) {
fDocumentSource = source;
} // setDocumentSource(XMLDocumentSource)
/** Sets a feature. */
public void setFeature(String featureId, boolean state) throws XMLConfigurationException {
if (featureId.equals(AUGMENTATIONS)) {
fAugmentations = state;
return;
}
if (featureId.equals(REPORT_ERRORS)) {
fReportErrors = state;
return;
}
if (featureId.equals(IGNORE_OUTSIDE_CONTENT)) {
fIgnoreOutsideContent = state;
return;
}
} // setFeature(String,boolean)
/** Sets a property. */
public void setProperty(String propertyId, Object value) throws XMLConfigurationException {
if (propertyId.equals(NAMES_ELEMS)) {
fNamesElems = getNamesValue(String.valueOf(value));
return;
}
if (propertyId.equals(NAMES_ATTRS)) {
fNamesAttrs = getNamesValue(String.valueOf(value));
return;
}
} // setProperty(String,Object)
/** Start CDATA section. */
public void startCDATA(Augmentations augs) throws XNIException {
fSeenAnything = true;
consumeEarlyTextIfNeeded();
// check for end of document
if (fSeenRootElementEnd) {
return;
}
// call handler
if (fDocumentHandler != null) {
fDocumentHandler.startCDATA(augs);
}
} // startCDATA(Augmentations)
/** Start document. */
public void startDocument(XMLLocator locator, String encoding, Augmentations augs)
throws XNIException {
startDocument(locator, encoding, null, augs);
} // startDocument(XMLLocator,String,Augmentations)
/** Start document. */
public void startDocument(XMLLocator locator, String encoding, NamespaceContext nscontext,
Augmentations augs) throws XNIException {
// reset state
fElementStack.top = 0;
if (fragmentContextStack_ != null) {
fragmentContextStackSize_ = fragmentContextStack_.length;
for (int i = 0; i < fragmentContextStack_.length; ++i) {
final QName name = fragmentContextStack_[i];
final Element elt = HTMLElements.getElement(name.localpart);
fElementStack.push(new Info(elt, name));
}
} else {
fragmentContextStackSize_ = 0;
}
fSeenAnything = false;
fSeenDoctype = false;
fSeenRootElement = false;
fSeenRootElementEnd = false;
fSeenHeadElement = false;
fSeenBodyElement = false;
// pass on event
if (fDocumentHandler != null) {
XercesBridge.getInstance().XMLDocumentHandler_startDocument(fDocumentHandler, locator,
encoding, nscontext, augs);
}
} // startDocument(XMLLocator,String,Augmentations)
// @since Xerces 2.1.0
/** Start element. */
public void startElement(final QName elem, XMLAttributes attrs, final Augmentations augs)
throws XNIException {
fSeenAnything = true;
final boolean isForcedCreation = forcedStartElement_;
forcedStartElement_ = false;
// check for end of document
if (fSeenRootElementEnd) {
notifyDiscardedStartElement(elem, attrs, augs);
return;
}
// get element information
final HTMLElements.Element element = getElement(elem);
final short elementCode = element.code;
// the creation of some elements like TABLE or SELECT can't be forced. Any
// others?
if (isForcedCreation
&& (elementCode == HTMLElements.TABLE || elementCode == HTMLElements.SELECT)) {
return; // don't accept creation
}
// ignore multiple html, head, body elements
if (fSeenRootElement && elementCode == HTMLElements.HTML) {
notifyDiscardedStartElement(elem, attrs, augs);
return;
}
if (elementCode == HTMLElements.HEAD) {
if (fSeenHeadElement) {
notifyDiscardedStartElement(elem, attrs, augs);
return;
}
fSeenHeadElement = true;
} else if (elementCode == HTMLElements.FRAMESET) {
consumeBufferedEndElements(); // </head> (if any) has been buffered
} else if (elementCode == HTMLElements.BODY) {
// create <head></head> if none was present
if (!fSeenHeadElement) {
final QName head = createQName("head");
forceStartElement(head, null, synthesizedAugs());
endElement(head, synthesizedAugs());
}
consumeBufferedEndElements(); // </head> (if any) has been buffered
if (fSeenBodyElement) {
notifyDiscardedStartElement(elem, attrs, augs);
return;
}
fSeenBodyElement = true;
} else if (elementCode == HTMLElements.FORM) {
if (fOpenedForm) {
notifyDiscardedStartElement(elem, attrs, augs);
return;
}
fOpenedForm = true;
} else if (elementCode == HTMLElements.UNKNOWN) {
consumeBufferedEndElements();
}
// if block element, save immediate parent inline elements
int depth = 0;
if (element.flags == 0) {
int length = fElementStack.top;
fInlineStack.top = 0;
for (int i = length - 1; i >= 0; i--) {
Info info = fElementStack.data[i];
if (!info.element.isInline()) {
break;
}
fInlineStack.push(info);
endElement(info.qname, synthesizedAugs());
}
depth = fInlineStack.top;
}
// close previous elements
// all elements close a <script>
// in head, no element has children
if (fElementStack.top > 1 && fElementStack.peek().element.code == HTMLElements.SCRIPT
|| fElementStack.top > 2
&& fElementStack.data[fElementStack.top - 2].element.code == HTMLElements.HEAD) {
final Info info = fElementStack.pop();
if (fDocumentHandler != null) {
callEndElement(info.qname, synthesizedAugs());
}
}
if (element.closes != null) {
int length = fElementStack.top;
for (int i = length - 1; i >= 0; i--) {
Info info = fElementStack.data[i];
// does it close the element we're looking at?
if (element.closes(info.element.code)) {
if (fReportErrors) {
String ename = elem.rawname;
String iname = info.qname.rawname;
fErrorReporter.reportWarning("HTML2005", new Object[]{ename, iname});
}
for (int j = length - 1; j >= i; j--) {
info = fElementStack.pop();
if (fDocumentHandler != null) {
// PATCH: Marc-André Morissette
callEndElement(info.qname, synthesizedAugs());
}
}
length = i;
continue;
}
// should we stop searching?
if (info.element.isBlock() || element.isParent(info.element)) {
break;
}
}
}
// TODO: investigate if only table is special here
// table closes all opened inline elements
else if (elementCode == HTMLElements.TABLE) {
for (int i = fElementStack.top - 1; i >= 0; i--) {
final Info info = fElementStack.data[i];
if (!info.element.isInline()) {
break;
}
endElement(info.qname, synthesizedAugs());
}
}
// call handler
fSeenRootElement = true;
if (element != null && element.isEmpty()) {
if (attrs == null) {
attrs = emptyAttributes();
}
if (fDocumentHandler != null) {
fDocumentHandler.emptyElement(elem, attrs, augs);
}
} else {
boolean inline = element != null && element.isInline();
fElementStack.push(new Info(element, elem, inline ? attrs : null));
if (attrs == null) {
attrs = emptyAttributes();
}
if (fDocumentHandler != null) {
callStartElement(elem, attrs, augs);
}
}
// re-open inline elements
for (int i = 0; i < depth; i++) {
Info info = fInlineStack.pop();
forceStartElement(info.qname, info.attributes, synthesizedAugs());
}
if (elementCode == HTMLElements.BODY) {
lostText_.refeed(this);
}
} // startElement(QName,XMLAttributes,Augmentations)
/** Start entity. */
public void startGeneralEntity(String name, XMLResourceIdentifier id, String encoding,
Augmentations augs) throws XNIException {
fSeenAnything = true;
// check for end of document
if (fSeenRootElementEnd) {
return;
}
// insert body, if needed
if (!fDocumentFragment) {
boolean insertBody = !fSeenRootElement;
if (!insertBody) {
Info info = fElementStack.peek();
if (info.element.code == HTMLElements.HEAD || info.element.code == HTMLElements.HTML) {
String hname = modifyName("head", fNamesElems);
String bname = modifyName("body", fNamesElems);
if (fReportErrors) {
fErrorReporter.reportWarning("HTML2009", new Object[]{hname, bname});
}
fQName.setValues(null, hname, hname, null);
endElement(fQName, synthesizedAugs());
insertBody = true;
}
}
if (insertBody) {
forceStartBody();
}
}
// call handler
if (fDocumentHandler != null) {
fDocumentHandler.startGeneralEntity(name, id, encoding, augs);
}
} // startGeneralEntity(String,XMLResourceIdentifier,String,Augmentations)
// removed since Xerces-J 2.3.0
/** Start prefix mapping. */
public void startPrefixMapping(String prefix, String uri, Augmentations augs)
throws XNIException {
// check for end of document
if (fSeenRootElementEnd) {
return;
}
// call handler
if (fDocumentHandler != null) {
XercesBridge.getInstance().XMLDocumentHandler_startPrefixMapping(fDocumentHandler, prefix,
uri, augs);
}
} // startPrefixMapping(String,String,Augmentations)
/** Text declaration. */
public void textDecl(String version, String encoding, Augmentations augs) throws XNIException {
fSeenAnything = true;
// check for end of document
if (fSeenRootElementEnd) {
return;
}
// call handler
if (fDocumentHandler != null) {
fDocumentHandler.textDecl(version, encoding, augs);
}
} // textDecl(String,String,Augmentations)
/** XML declaration. */
public void xmlDecl(String version, String encoding, String standalone, Augmentations augs)
throws XNIException {
if (!fSeenAnything && fDocumentHandler != null) {
fDocumentHandler.xmlDecl(version, encoding, standalone, augs);
}
} // xmlDecl(String,String,String,Augmentations)
//
// Protected methods
//
/** Call document handler end element. */
protected final void callEndElement(QName element, Augmentations augs) throws XNIException {
fDocumentHandler.endElement(element, augs);
} // callEndElement(QName,Augmentations)
/** Call document handler start element. */
protected final void callStartElement(QName element, XMLAttributes attrs, Augmentations augs)
throws XNIException {
fDocumentHandler.startElement(element, attrs, augs);
} // callStartElement(QName,XMLAttributes,Augmentations)
/** Returns a set of empty attributes. */
protected final XMLAttributes emptyAttributes() {
fEmptyAttrs.removeAllAttributes();
return fEmptyAttrs;
} // emptyAttributes():XMLAttributes
/** Returns an HTML element. */
protected HTMLElements.Element getElement(final QName elementName) {
String name = elementName.rawname;
if (fNamespaces && NamespaceBinder.XHTML_1_0_URI.equals(elementName.uri)) {
int index = name.indexOf(':');
if (index != -1) {
name = name.substring(index + 1);
}
}
return HTMLElements.getElement(name);
} // getElement(String):HTMLElements.Element
/**
* Returns the depth of the open tag associated with the specified element name or -1 if no
* matching element is found.
*
* @param element The element.
*/
protected final int getElementDepth(HTMLElements.Element element) {
final boolean container = element.isContainer();
final short elementCode = element.code;
final boolean tableBodyOrHtml = elementCode == HTMLElements.TABLE
|| elementCode == HTMLElements.BODY || elementCode == HTMLElements.HTML;
int depth = -1;
for (int i = fElementStack.top - 1; i >= fragmentContextStackSize_; i--) {
Info info = fElementStack.data[i];
if (info.element.code == element.code) {
depth = fElementStack.top - i;
break;
}
if (!container && info.element.isBlock()) {
break;
}
if (info.element.code == HTMLElements.TABLE && !tableBodyOrHtml) {
return -1; // current element not allowed to close a table
}
}
return depth;
} // getElementDepth(HTMLElements.Element)
/**
* Returns the depth of the open tag associated with the specified element parent names or -1 if
* no matching element is found.
*
* @param parents The parent elements.
*/
protected int getParentDepth(HTMLElements.Element[] parents, short bounds) {
if (parents != null) {
for (int i = fElementStack.top - 1; i >= 0; i--) {
Info info = fElementStack.data[i];
if (info.element.code == bounds) {
break;
}
for (int j = 0; j < parents.length; j++) {
if (info.element.code == parents[j].code) {
return fElementStack.top - i;
}
}
}
}
return -1;
} // getParentDepth(HTMLElements.Element[],short):int
/** Returns an augmentations object with a synthesized item added. */
protected final Augmentations synthesizedAugs() {
HTMLAugmentations augs = null;
if (fAugmentations) {
augs = fInfosetAugs;
augs.removeAllItems();
augs.putItem(AUGMENTATIONS, SYNTHESIZED_ITEM);
}
return augs;
} // synthesizedAugs():Augmentations
//
// Protected static methods
//
void setTagBalancingListener(final HTMLTagBalancingListener tagBalancingListener) {
this.tagBalancingListener = tagBalancingListener;
}
/**
* Consume elements that have been buffered, like </body></html> that are first consumed at the
* end of document
*/
private void consumeBufferedEndElements() {
final List toConsume = new ArrayList(endElementsBuffer_);
endElementsBuffer_.clear();
for (int i = 0; i < toConsume.size(); ++i) {
final ElementEntry entry = (ElementEntry) toConsume.get(i);
forcedEndElement_ = true;
endElement(entry.name_, entry.augs_);
}
endElementsBuffer_.clear();
}
//
// Classes
//
private void consumeEarlyTextIfNeeded() {
if (!lostText_.isEmpty()) {
if (!fSeenBodyElement) {
forceStartBody();
}
lostText_.refeed(this);
}
}
private QName createQName(String tagName) {
tagName = modifyName(tagName, fNamesElems);
return new QName(null, tagName, tagName, NamespaceBinder.XHTML_1_0_URI);
}
/**
* Generates a missing <body> (which creates missing <head> when needed)
*/
private void forceStartBody() {
final QName body = createQName("body");
if (fReportErrors) {
fErrorReporter.reportWarning("HTML2006", new Object[]{body.localpart});
}
forceStartElement(body, null, synthesizedAugs());
}
/**
* Forces an element start, taking care to set the information to allow startElement to "see"
* that's the element has been forced.
*
* @return <code>true</code> if creation could be done (TABLE's creation for instance can't be
* forced)
*/
private boolean forceStartElement(final QName elem, XMLAttributes attrs, final Augmentations augs)
throws XNIException {
forcedStartElement_ = true;
startElement(elem, attrs, augs);
return fElementStack.top > 0 && elem.equals(fElementStack.peek().qname);
}
/**
* Notifies the tagBalancingListener (if any) of an ignored end element
*/
private void notifyDiscardedEndElement(final QName element, final Augmentations augs) {
if (tagBalancingListener != null) {
tagBalancingListener.ignoredEndElement(element, augs);
}
}
/**
* Notifies the tagBalancingListener (if any) of an ignored start element
*/
private void notifyDiscardedStartElement(final QName elem, final XMLAttributes attrs,
final Augmentations augs) {
if (tagBalancingListener != null) {
tagBalancingListener.ignoredStartElement(elem, attrs, augs);
}
}
} // class HTMLTagBalancer