Package jp.aonir.fuzzyxml

Source Code of jp.aonir.fuzzyxml.FuzzyXMLParser$Range

package jp.aonir.fuzzyxml;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.Stack;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import jp.aonir.fuzzyxml.event.FuzzyXMLErrorEvent;
import jp.aonir.fuzzyxml.event.FuzzyXMLErrorListener;
import jp.aonir.fuzzyxml.internal.FuzzyXMLAttributeImpl;
import jp.aonir.fuzzyxml.internal.FuzzyXMLCDATAImpl;
import jp.aonir.fuzzyxml.internal.FuzzyXMLCommentImpl;
import jp.aonir.fuzzyxml.internal.FuzzyXMLDocTypeImpl;
import jp.aonir.fuzzyxml.internal.FuzzyXMLDocumentImpl;
import jp.aonir.fuzzyxml.internal.FuzzyXMLElementImpl;
import jp.aonir.fuzzyxml.internal.FuzzyXMLPreImpl;
import jp.aonir.fuzzyxml.internal.FuzzyXMLProcessingInstructionImpl;
import jp.aonir.fuzzyxml.internal.FuzzyXMLScriptImpl;
import jp.aonir.fuzzyxml.internal.FuzzyXMLStyleImpl;
import jp.aonir.fuzzyxml.internal.FuzzyXMLTextImpl;
import jp.aonir.fuzzyxml.internal.FuzzyXMLUtil;
import jp.aonir.fuzzyxml.internal.RenderContext;
import jp.aonir.fuzzyxml.resources.Messages;

import org.objectstyle.wolips.wodclipse.core.util.WodHtmlUtils;

public class FuzzyXMLParser {

  private Stack<FuzzyXMLNode> _stack = new Stack<FuzzyXMLNode>();
  private String _originalSource;
  private List<FuzzyXMLNode> _roots;
  private FuzzyXMLDocType _docType;

  private List<FuzzyXMLErrorListener> _listeners = new ArrayList<FuzzyXMLErrorListener>();
  private List<FuzzyXMLElement> _nonCloseElements = new ArrayList<FuzzyXMLElement>();
  private List<String> _looseNamespaces = new ArrayList<String>();
  private List<String> _autocloseTags = new ArrayList<String>();
  private List<String> _looseTags = new ArrayList<String>();

  private boolean _wellFormedRequired = false;
  private boolean _isHTML = false;

  // �p�[�X�Ɏg�p���鐳�K�\��
  private Pattern _tag = Pattern.compile("<((|/)([^<>]*))([^<]?|>)");
  // private Pattern attr =
  // Pattern.compile("([\\w:]+?)\\s*=(\"|')([^\"]*?)\\2");
  private Pattern _docTypeName = Pattern.compile("^<!DOCTYPE[ \r\n\t]+([\\w\\-_]*)");
  private Pattern _docTypePublic = Pattern.compile("PUBLIC[ \r\n\t]+\"([^\"]*)\"[ \r\n\t]*\"*([^\">]*)\"*");
  private Pattern _docTypeSystem = Pattern.compile("SYSTEM[ \r\n\t]+\"([^\"]*)\"");
  private Pattern _docTypeSubset = Pattern.compile("\\[([^\\]]*)\\]>");
  private Pattern _invalidStringPattern = Pattern.compile("([<>&])");
  private Pattern _preCloseTagPattern = Pattern.compile("<\\s*/\\s*PRE\\s*>", Pattern.CASE_INSENSITIVE);

  public FuzzyXMLParser(boolean wellFormedRequired) {
    this(wellFormedRequired, false);
  }

  public FuzzyXMLParser(boolean wellFormedRequired, boolean isHTML) {
    super();
    _wellFormedRequired = wellFormedRequired;
    _roots = new LinkedList<FuzzyXMLNode>();
    _isHTML = isHTML;
    // MS: Hardcoded that "wo" is a loose namespace
    addLooseNamespace("wo");
    addLooseNamespace("webobject");
    addLooseNamespace("webobjects");
    if (!_wellFormedRequired) {
      addAutocloseTag("img");
      addAutocloseTag("br");
      addAutocloseTag("hr");
      addAutocloseTag("meta");
      addAutocloseTag("link");
      addAutocloseTag("input");
      addAutocloseTag("spacer");
      addAutocloseTag("frame");
      addAutocloseTag("basefont");
      addAutocloseTag("base");
      addAutocloseTag("area");
      addAutocloseTag("col");
      addAutocloseTag("isindex");
      addAutocloseTag("param");
      addLooseTag("p");
      addLooseTag("li");
    }
  }

  /**
   * An autoclose tag is like br or link where it commonly does not have a
   * closing tag
   * but it also never has contents.
   *
   * @param autocloseTag
   *          the name of the tag to make loose
   */
  public void addAutocloseTag(String autocloseTag) {
    _autocloseTags.add(autocloseTag);
    addLooseTag(autocloseTag);
  }

  /**
   * A "loose" tag is like li or p where people lazily often do not close them
   * properly,
   * but they may have content.
   *
   * @param looseTag
   *          the name of the tag to make loose
   */
  public void addLooseTag(String looseTag) {
    _looseTags.add(looseTag);
  }

  /**
   * A "loose" namespace is like the wo: namespace. We don't actually require
   * that
   * wo:if have a corresponding wo:if close tag -- it actually just needs a
   * wo close tag.
   *
   * @param namespace
   *          the name of the namespace to make loose
   */
  public void addLooseNamespace(String namespace) {
    _looseNamespaces.add(namespace);
  }

  /**
   * �G���[�n���h�����O�p�̃��X�i��lj����܂��B
   *
   * @param listener
   *          ���X�i
   */
  public void addErrorListener(FuzzyXMLErrorListener listener) {
    _listeners.add(listener);
  }

  private void fireErrorEvent(int offset, int length, String message, FuzzyXMLNode node) {
    FuzzyXMLErrorEvent evt = new FuzzyXMLErrorEvent(offset, length, message, node);
    for (FuzzyXMLErrorListener listener : _listeners) {
      listener.error(evt);
    }
  }

  /**
   * ��̓X�g���[������XML�h�L�������g���p�[�X���܂��B
   * �����R�[�h��XML�錾�ɂ��������Ĕ��ʂ���܂��B
   *
   * @param in
   *          ��̓X�g���[��
   * @return �p�[�X����
   * @throws IOException
   */
  public FuzzyXMLDocument parse(InputStream in) throws IOException {
    byte[] bytes = FuzzyXMLUtil.readStream(in);
    String encode = FuzzyXMLUtil.getEncoding(bytes);
    if (encode == null) {
      return parse(new String(bytes));
    }
    return parse(new String(bytes, encode));
  }

  /**
   * �t�@�C������XML�h�L�������g���p�[�X���܂��B
   * �����R�[�h��XML�錾�ɂ��������Ĕ��ʂ���܂��B
   *
   * @param file
   *          �t�@�C��
   * @return �p�[�X����
   * @throws IOException
   */
  public FuzzyXMLDocument parse(File file) throws IOException {
    byte[] bytes = FuzzyXMLUtil.readStream(new FileInputStream(file));
    String encode = FuzzyXMLUtil.getEncoding(bytes);
    if (encode == null) {
      return parse(new String(bytes));
    }
    return parse(new String(bytes, encode));
  }

  protected int _parse(String source, int initialOffset, boolean woOnly, boolean parseAsSynthetic) {
    // �p�[�X���J�n
    Matcher matcher = _tag.matcher(source);
    int lastIndex = initialOffset - 1;
    while (matcher.find()) {
      int start = matcher.start() + initialOffset;
      int end = matcher.end() + initialOffset;
      if (lastIndex == -1 && start > 0) {
        handleText(0, start, true);
      }
      else if (lastIndex != (initialOffset - 1) && lastIndex < start) {
        handleText(lastIndex, start, true);
      }
      String originalText = matcher.group(1);
      String text = originalText.trim();
      // �‚��^�O
      if (!woOnly && text.startsWith("%")) {
        // ignore
        handleText(start, end, false);
      }
      else if (!woOnly && text.startsWith("?")) {
        handleDeclaration(start, end);
      }
      else if (!woOnly && (text.startsWith("!DOCTYPE") || text.startsWith("!doctype"))) {
        handleDoctype(start, end, text);
      }
      else if (!woOnly && text.startsWith("![CDATA[")) {
        handleCDATA(start, end, _originalSource.substring(start, end));
      }
      else if (!woOnly && (text.equalsIgnoreCase("pre") || text.toLowerCase().startsWith("pre "))) {
        end = handlePreTag(start, end);
        matcher.region(end, source.length());
      }
      else if (text.startsWith("/") && (!woOnly || WodHtmlUtils.isWOTag(text.substring(1)))) {
        handleCloseTag(start, end, text);
      }
      else if (text.endsWith("/") && (!woOnly || WodHtmlUtils.isWOTag(text))) {
        if (originalText.endsWith(" ")) {
          fireErrorEvent(start, end - start, "You can not have a space between the / and the > in your webobject tags.", null);
        }
        handleEmptyTag(start, end, parseAsSynthetic);
      }
      else if (!woOnly && text.startsWith("!--")) {
        end = _originalSource.indexOf("-->", start);
        if (end > 0) {
          end += 3;
        }
        handleComment(start, end, _originalSource.substring(start, end));
        matcher.region(end, source.length());
      }
      else if (!woOnly || WodHtmlUtils.isWOTag(text)) {
        handleStartTag(start, end, parseAsSynthetic);
      }
      lastIndex = end;
    }
    return lastIndex;
  }

  /**
   * ��Ƃ��ēn���ꂽXML�\�[�X���p�[�X����FuzzyXMLDocument�I�u�W�F�N�g��ԋp���܂��B
   *
   * @param source
   *          XML�\�[�X
   * @return �p�[�X���ʂ�FuzzyXMLDocument�I�u�W�F�N�g
   */
  public FuzzyXMLDocument parse(String source) {
    // �I���W�i���̃\�[�X��ۑ����Ă���
    _originalSource = source;
    // �R�����g�ACDATA�ADOCTYPE����������
    source = FuzzyXMLUtil.comment2space(source, true);
    source = FuzzyXMLUtil.escapeScript(source);
    source = FuzzyXMLUtil.scriptlet2space(source, true);
    source = FuzzyXMLUtil.cdata2space(source, true);
    source = FuzzyXMLUtil.doctype2space(source, true);
    source = FuzzyXMLUtil.processing2space(source, true);
    source = FuzzyXMLUtil.escapeString(source);

    int lastIndex = _parse(source, 0, false, false);

    if (_stack.size() > 0 && _nonCloseElements.size() > 0) {
      FuzzyXMLElementImpl lastElement = (FuzzyXMLElementImpl) _nonCloseElements.get(_nonCloseElements.size() - 1);
      String lowercaseLastElementName = lastElement.getName().toLowerCase();
      if (!_looseTags.contains(lowercaseLastElementName)) {
        fireErrorEvent(lastElement.getOffset(), lastElement.getLength(), Messages.getMessage("error.noCloseTag", lastElement.getName()), null);
      }

      for (FuzzyXMLNode openNode : _stack) {
        if (openNode instanceof FuzzyXMLElementImpl) {
          FuzzyXMLElementImpl openElement = (FuzzyXMLElementImpl) openNode;
          openElement.setLength(lastIndex - openElement.getOffset());
          if (openElement.getParentNode() == null) {
            _roots.add(openElement);
          }
          else {
            ((FuzzyXMLElementImpl) openElement.getParentNode()).appendChildWithNoCheck(openElement);
          }
        }
      }
    }

    // MS: Capture trailing text that isn't inside of a tag at all
    if (lastIndex != source.length()) {
      handleText(Math.max(0, lastIndex), source.length(), true);
    }

    FuzzyXMLElement docElement = null;
    if (_roots.size() == 0) {
      docElement = new FuzzyXMLElementImpl(null, "document", 0, _originalSource.length(), 0);
      // docElement.appendChild(root);
    }
    else {
      FuzzyXMLNode firstRoot = _roots.get(0);
      FuzzyXMLNode lastRoot = _roots.get(_roots.size() - 1);
      docElement = new FuzzyXMLElementImpl(null, "document", firstRoot.getOffset(), lastRoot.getOffset() + lastRoot.getLength() - firstRoot.getOffset(), 0);
      for (FuzzyXMLNode root : _roots) {
        ((FuzzyXMLElementImpl) docElement).appendChildWithNoCheck(root);
      }
    }
    FuzzyXMLDocumentImpl doc = new FuzzyXMLDocumentImpl(docElement, _docType);
    doc.setHTML(_isHTML);
    return doc;
  }

  /** CDATA�m�[�h���������܂��B */
  private void handleCDATA(int offset, int end, String text) {
    closeAutocloseTags();
    text = text.replaceFirst("<!\\[CDATA\\[", "");
    text = text.replaceFirst("\\]\\]>", "");
    FuzzyXMLCDATAImpl cdata = new FuzzyXMLCDATAImpl(getParent(), text, offset, end - offset);
    if (getParent() != null) {
      ((FuzzyXMLElement) getParent()).appendChild(cdata);
    }
    else {
      _roots.add(cdata);
    }

    _stack.push(cdata);
    _parse(text, offset + "<![CDATA[".length(), true, true);
    FuzzyXMLNode poppedNode = _stack.pop();
    if (poppedNode != cdata) {
      _stack.push(poppedNode);
    }
  }

  private int handlePreTag(int offset, int end) {
    closeAutocloseTags();
    String[] content = _preCloseTagPattern.split(_originalSource.substring(end, _originalSource.length()), 2);
    String text = content[0];
    TagInfo info = parseTagContents(_originalSource.substring(offset + 1, end - 1));
    FuzzyXMLPreImpl preNode = new FuzzyXMLPreImpl(getParent(), text, offset, text.length());
    handleStartTag(preNode, info, offset, end);
    String preBlock = _originalSource.substring(offset, end + text.length() + 1);
    return _parse(preBlock, offset, true, false) - 1;
  }

  /** �e�L�X�g�m�[�h���������܂��B */
  private void handleText(int offset, int end, boolean escape) {
    String text = _originalSource.substring(offset, end);
    // System.out.println("FuzzyXMLParser.handleText: '" + text + "'");
    closeAutocloseTags();
    FuzzyXMLTextImpl textNode = new FuzzyXMLTextImpl(getParent(), FuzzyXMLUtil.decode(text, _isHTML), offset, end - offset);
    textNode.setEscape(escape);
    if (getParent() != null) {
      ((FuzzyXMLElement) getParent()).appendChild(textNode);
    }
    else {
      _roots.add(textNode);
    }
  }

  /** XML�錾�i�������߁j���������܂��B */
  private void handleDeclaration(int offset, int end) {
    closeAutocloseTags();
    String text = _originalSource.substring(offset, end);
    text = text.replaceFirst("^<\\?", "");
    text = text.replaceFirst("\\?>$", "");
    text = text.trim();

    String[] dim = text.split("[ \r\n\t]+");
    String name = dim[0];
    String data = text.substring(name.length()).trim();

    FuzzyXMLProcessingInstructionImpl pi = new FuzzyXMLProcessingInstructionImpl(null, name, data, offset, end - offset);
    if (getParent() != null) {
      // �]�v�ȕ��������
      ((FuzzyXMLElement) getParent()).appendChild(pi);
    }
    else {
      _roots.add(pi);
    }

    // XML should not have autoclosing tags
    if (name.startsWith("xml")) {
      _autocloseTags.clear();
    }
  }

  /** DOCTYPE�錾���������܂��B */
  private void handleDoctype(int offset, int end, String text) {
    closeAutocloseTags();
    if (_docType == null) {
      String name = "";
      String publicId = "";
      String systemId = "";
      String internalSubset = "";

      text = _originalSource.substring(offset, end);
      Matcher matcher = _docTypeName.matcher(text);
      if (matcher.find()) {
        name = matcher.group(1);
      }
      matcher = _docTypePublic.matcher(text);
      if (matcher.find()) {
        publicId = matcher.group(1);
        systemId = matcher.group(2);
      }
      else {
        matcher = _docTypeSystem.matcher(text);
        if (matcher.find()) {
          systemId = matcher.group(1);
        }
      }
      matcher = _docTypeSubset.matcher(text);
      if (matcher.find()) {
        internalSubset = matcher.group(1);
      }
      _docType = new FuzzyXMLDocTypeImpl(null, name, publicId, systemId, internalSubset, offset, end - offset);
    }
  }

  private void closeAutocloseTags() {
    if (_stack.size() > 0) {
      FuzzyXMLElementImpl lastOpenElement = (FuzzyXMLElementImpl) _stack.peek();
      String name = lastOpenElement.getName().toLowerCase();
      if (_autocloseTags.contains(name) || lastOpenElement.isForbiddenFromHavingChildren()) {
        int openTagEndOffset = lastOpenElement.getOffset() + lastOpenElement.getOpenTagLength();
        handleCloseTag(openTagEndOffset, openTagEndOffset, "/" + name, false);
      }
    }
  }

  /** �‚��^�O���������܂��B */
  private void handleCloseTag(int offset, int end, String text) {
    handleCloseTag(offset, end, text, true);
  }

  private void handleCloseTag(int offset, int end, String text, boolean showMismatchError) {
    if (_stack.size() == 0) {
      return;
    }
    String tagName = text.substring(1).trim();

    // MS: Chuck does close tags like </webobject closing something else>
    int chuckIndex = tagName.indexOf(' ');
    if (chuckIndex != -1) {
      String chuckWord = tagName.substring(0, chuckIndex);
      if (WodHtmlUtils.isWOTag(chuckWord)) {
        tagName = chuckWord;
      }
    }

    FuzzyXMLElementImpl lastOpenElement = (FuzzyXMLElementImpl) _stack.pop();
    String lowercaseLastOpenElementName = lastOpenElement.getName().toLowerCase();
    String lowercaseCloseTagName = tagName.toLowerCase();

    boolean closeTagMatches = lowercaseLastOpenElementName.equals(lowercaseCloseTagName);
    // System.out.println("FuzzyXMLParser.handleCloseTag: lastOpen = " +
    // lowercaseLastOpenElementName + ", close = " + lowercaseCloseTagName);
    if (!closeTagMatches) {
      closeAutocloseTags();

      // Allow </wo> to close </wo:if>
      boolean looseNamespace = false;
      int colonIndex = lowercaseLastOpenElementName.indexOf(':');
      if (colonIndex != -1) {
        String elementNamespace = lowercaseLastOpenElementName.substring(0, colonIndex);
        if (lowercaseCloseTagName.equals(elementNamespace) && _looseNamespaces.contains(elementNamespace)) {
          tagName = lastOpenElement.getName();
          lowercaseCloseTagName = lowercaseLastOpenElementName;
          looseNamespace = true;
        }
      }

      if (!looseNamespace) {
        boolean looseTag = false;
        if (_looseTags.contains(lowercaseLastOpenElementName)) {
          looseTag = true;
        }

        if (looseTag) {
          while (lowercaseLastOpenElementName != null && !lowercaseLastOpenElementName.equals(lowercaseCloseTagName) && _looseTags.contains(lowercaseLastOpenElementName)) {
            int lastOpenElementEndOffset = end;
            // int lastOpenElementEndOffset = lastOpenElement.getOffset() +
            // lastOpenElement.getLength();
            _stack.push(lastOpenElement);
            handleCloseTag(lastOpenElementEndOffset, lastOpenElementEndOffset, "/" + lastOpenElement.getName(), false);

            /*
             * FuzzyXMLElement looseElement = lastOpenElement;
             * FuzzyXMLNode[] looseElementChildren =
             * lastOpenElement.getChildren();
             * FuzzyXMLElement looseElementParent = (FuzzyXMLElement)
             * lastOpenElement.getParentNode();
             * for (FuzzyXMLNode looseElementChild : looseElementChildren) {
             * looseElement.removeChild(looseElementChild);
             * looseElementParent.insertAfter(looseElementChild, looseElement);
             * //((AbstractFuzzyXMLNode)
             * looseElementChild).setOffset(looseElementChild.getOffset() + 1);
             * }
             */

            if (_stack.size() == 0) {
              lastOpenElement = null;
              lowercaseLastOpenElementName = null;
            }
            else {
              lastOpenElement = (FuzzyXMLElementImpl) _stack.pop();
              lowercaseLastOpenElementName = lastOpenElement.getName().toLowerCase();
            }
          }
        }
        else {
          FuzzyXMLElement matchingOpenElement = null;
          for (FuzzyXMLElement nonCloseElement : _nonCloseElements) {
            if (nonCloseElement.getName().equalsIgnoreCase(lowercaseCloseTagName)) {
              matchingOpenElement = nonCloseElement;
            }
          }
          if (matchingOpenElement == null) {
            if (showMismatchError) {
              fireErrorEvent(offset, end - offset, Messages.getMessage("error.noStartTag", tagName), null);
            }
            _stack.push(lastOpenElement);
            return;
          }

          // System.out.println("FuzzyXMLParser.handleCloseTag: mismatched close "
          // + lastOpenElement.getName());
          if (showMismatchError) {
            // fireErrorEvent(offset, end - offset, "Found </" + tagName +
            // "> before </" + lastOpenElement.getName() + ">", null);
            fireErrorEvent(lastOpenElement.getOffset(), lastOpenElement.getLength(), "Missing </" + lastOpenElement.getName() + "> tag", null);
          }
          _stack.push(lastOpenElement);
          handleCloseTag(offset, offset, "/" + lastOpenElement.getName(), false);
          lastOpenElement = (FuzzyXMLElementImpl) _stack.pop();
          lowercaseLastOpenElementName = lastOpenElement.getName().toLowerCase();
        }
        /*
         * boolean matchesOpenElement = false;
         * if (looseTag) {
         * for (FuzzyXMLElement nonCloseElement : nonCloseElements) {
         * if
         * (nonCloseElement.getName().equalsIgnoreCase(lowercaseCloseTagName)) {
         * matchesOpenElement = true;
         * }
         * }
         * if (matchesOpenElement) {
         * nonCloseElements.remove(lastOpenElement);
         * }
         * }
         *
         * if (lastOpenElement.getParentNode() != null) {
         * ((FuzzyXMLElementImpl)
         * lastOpenElement.getParentNode()).appendChildWithNoCheck
         * (lastOpenElement);
         * FuzzyXMLNode[] nodes = lastOpenElement.getChildren();
         * for (int i = 0; i < nodes.length; i++) {
         * ((AbstractFuzzyXMLNode)
         * nodes[i]).setParentNode(lastOpenElement.getParentNode());
         * lastOpenElement.removeChild(nodes[i]);
         * ((FuzzyXMLElementImpl)
         * lastOpenElement.getParentNode()).appendChildWithNoCheck(nodes[i]);
         * }
         * }
         * else {
         * //System.out.println(tagName + "�̊J�n�^�O�����‚���܂���B");
         * fireErrorEvent(offset, end - offset,
         * Messages.getMessage("error.noStartTag", tagName), null);
         * }
         * if (matchesOpenElement) {
         * handleCloseTag(offset, end, text);
         * }
         * // stack.push(element);
         * return;
         */
      }
    }

    if (lastOpenElement != null) {
      // ��^�O�̏ꍇ�͋�̃e�L�X�g�m�[�h��lj����Ă���
      if (lastOpenElement.getChildren().length == 0) {
        // MS: Hopefully this doesn't break things ... Sure wish I could read
        // Japanese to know what the original author said about this :)
        // lastOpenElement.appendChild(new FuzzyXMLTextImpl(getParent(), "",
        // offset, 0));
      }
      lastOpenElement.setLength(end - lastOpenElement.getOffset());
      if (closeTagMatches) {
        lastOpenElement.setCloseTagOffset(offset);
        lastOpenElement.setCloseTagLength(end - offset - 2);
        lastOpenElement.setCloseNameOffset(text.indexOf(tagName));
      }
      _nonCloseElements.remove(lastOpenElement);
      if (lastOpenElement.getParentNode() == null) {
        _roots.add(lastOpenElement);
        for (FuzzyXMLElement error : _nonCloseElements) {
          // System.out.println(error.getName() + "�͕‚��Ă��܂���B");
          if (showMismatchError) {
            fireErrorEvent(error.getOffset(), error.getLength(), Messages.getMessage("error.noCloseTag", error.getName()), error);
          }
        }
      }
      else {
        ((FuzzyXMLElementImpl) lastOpenElement.getParentNode()).appendChildWithNoCheck(lastOpenElement);
      }
    }
  }

  private void checkAttributeValue(FuzzyXMLAttribute attr) {
    String str = attr.getRawValue();
    if (str != null) {
      // MS: Don't consider nested tags for escaping ...
      if (attr.hasNestedTag()) {
        str = str.replaceAll("<[^>]*>", "");
      }

      str = str.replaceAll("&[^&; \"]+;", " ");
      Matcher invalidStringMatcher = _invalidStringPattern.matcher(str);
      while (invalidStringMatcher.find()) {
        String invalidPart = invalidStringMatcher.group();
        fireErrorEvent(attr.getParentNode().getOffset() + attr.getValueDataOffset() + 1, attr.getValueDataLength(), "The character '" + invalidPart + "' must be escaped.", attr);
      }

    }
  }

  /** ��^�O���������܂��B */
  private void handleEmptyTag(int offset, int end, boolean synthetic) {
    closeAutocloseTags();
    TagInfo info = parseTagContents(_originalSource.substring(offset + 1, end - 1));
    FuzzyXMLNode parent = getParent();
    FuzzyXMLElementImpl element = new FuzzyXMLElementImpl(parent, info.name, offset, end - offset, info.nameOffset);
    if (parent == null) {
      _roots.add(element);
    }
    else {
      ((FuzzyXMLElement) parent).appendChild(element);
    }
    // ������lj�
    AttrInfo[] attrs = info.getAttrs();
    for (int i = 0; i < attrs.length; i++) {
      FuzzyXMLAttributeImpl attr = createFuzzyXMLAttribute(element, offset, attrs[i]);
      element.appendChild(attr);
    }

    element.setSynthetic(synthetic);

    checkElement(element);
  }

  protected void checkElement(FuzzyXMLElement element) {
    for (FuzzyXMLAttribute attr : element.getAttributes()) {
      if (!_wellFormedRequired) {
        if (!WodHtmlUtils.isWOTag((FuzzyXMLElement) attr.getParentNode())) {
          _stack.push(attr.getParentNode());
          _parse(attr.getValue(), element.getOffset() + attr.getValueDataOffset() + 1, true, true);
          FuzzyXMLNode poppedNode = _stack.pop();
          if (poppedNode != attr.getParentNode()) {
            _stack.push(poppedNode);
          }
        }
      }
      else {
        checkAttributeValue(attr);
      }
    }
  }

  /** �R�����g���������܂��B */
  private void handleComment(int offset, int end, String text) {
    closeAutocloseTags();
    FuzzyXMLNode parent = getParent();
    FuzzyXMLCommentImpl comment = new FuzzyXMLCommentImpl(parent, text, offset, end - offset);

    if (parent == null) {
      _roots.add(comment);
    }
    else {
      ((FuzzyXMLElement) parent).appendChild(comment);
    }

    _stack.push(comment);
    _parse(text.replaceFirst("<[^>]+-->$", ""), offset, true, true);
    FuzzyXMLNode poppedNode = _stack.pop();
    if (poppedNode != comment) {
      _stack.push(poppedNode);
    }
  }

  /** �J�n�^�O���������܂��B */
  private void handleStartTag(int offset, int end, boolean synthetic) {
    closeAutocloseTags();
    String tagContents = _originalSource.substring(offset, end);
    // MS: If you're in the middle of typing, offset + 1 to end - 1 can put
    // you in an invalid state (for instance, if you just type "<" that will
    // overlap.
    if (tagContents.startsWith("<")) {
      tagContents = tagContents.substring(1);
    }
    if (tagContents.endsWith(">")) {
      tagContents = tagContents.substring(0, tagContents.length() - 1);
    }
    TagInfo info = parseTagContents(tagContents);
    // System.out.println("FuzzyXMLParser.handleStartTag: open " + info.name);
    FuzzyXMLElement element;
    if (info.name.equalsIgnoreCase("script")) {
      element = new FuzzyXMLScriptImpl(getParent(), info.name, offset, end - offset, info.nameOffset);
    }
    else if (info.name.equalsIgnoreCase("style")) {
      element = new FuzzyXMLStyleImpl(getParent(), info.name, offset, end - offset, info.nameOffset);
    }
    else {
      element = new FuzzyXMLElementImpl(getParent(), info.name, offset, end - offset, info.nameOffset);
    }
    handleStartTag(element, info, offset, end);
    element.setSynthetic(synthetic);
  }

  protected FuzzyXMLAttributeImpl createFuzzyXMLAttribute(FuzzyXMLElement element, int offset, AttrInfo attrInfo) {
    String namespace = null;
    String name = attrInfo.name;
    if (name != null) {
      int colonIndex = name.indexOf(':');
      if (colonIndex != -1) {
        namespace = name.substring(0, colonIndex);
        name = name.substring(colonIndex + 1);
      }
    }
    if (_wellFormedRequired) {
      FuzzyXMLAttributeImpl attr = new FuzzyXMLAttributeImpl(element, namespace, name, FuzzyXMLUtil.decode(attrInfo.value, false), attrInfo.rawValue, attrInfo.offset + offset, attrInfo.end - attrInfo.offset + 1, attrInfo.valueOffset);
      attr.setHasNestedTag(attrInfo.hasNestedTag);
      attr.setQuoteCharacter(attrInfo.quote);
      return attr;
    }
    FuzzyXMLAttributeImpl attr = new FuzzyXMLAttributeImpl(element, namespace, name, attrInfo.value, attrInfo.rawValue, attrInfo.offset + offset, attrInfo.end - attrInfo.offset + 1, attrInfo.valueOffset);
    attr.setHasNestedTag(attrInfo.hasNestedTag);
    attr.setQuoteCharacter(attrInfo.quote);
    if (attrInfo.value.indexOf('"') >= 0 || attrInfo.value.indexOf('\'') >= 0 || attrInfo.value.indexOf('<') >= 0 || attrInfo.value.indexOf('>') >= 0 || attrInfo.value.indexOf('&') >= 0) {
      attr.setEscape(false);
    }
    return attr;
  }

  /** �J�n�^�O���������܂��B */
  private void handleStartTag(FuzzyXMLElement element, TagInfo info, int offset, int end) {
    // ������lj�
    AttrInfo[] attrs = info.getAttrs();
    for (int i = 0; i < attrs.length; i++) {
      // // ���O��Ԃ̃T�|�[�g
      // if(attrs[i].name.startsWith("xmlns")){
      // String uri = attrs[i].value;
      // String prefix = null;
      // String[] dim = attrs[i].name.split(":");
      // if(dim.length > 1){
      // prefix = dim[1];
      // }
      // element.addNamespaceURI(prefix,uri);
      // }
      element.appendChild(createFuzzyXMLAttribute(element, offset, attrs[i]));
    }
    _stack.push(element);
    _nonCloseElements.add(element);

    checkElement(element);
  }

  /** �X�^�b�N�̍Ō�̗v�f���擾���܂�(�X�^�b�N����͍폜���܂���)�B */
  private FuzzyXMLNode getParent() {
    if (_stack.size() == 0) {
      return null;
    }
    return _stack.get(_stack.size() - 1);
  }

  /** �^�O�������p�[�X���܂��B */
  private TagInfo parseTagContents(String text) {
    // �g����
    Range trimmedRange = Range.trimmedRange(text);
    text = trimmedRange.trim(text);
    // �‚��^�O��������Ō�̃X���b�V�����폜
    if (text.endsWith("/")) {
      text = text.substring(0, text.length() - 1);
    }
    // �ŏ��̃X�y�[�X�܂ł��^�O��
    TagInfo info = new TagInfo();
    if (FuzzyXMLUtil.getSpaceIndex(text) != -1) {
      info.name = text.substring(0, FuzzyXMLUtil.getSpaceIndex(text)).trim();
      info.nameOffset = trimmedRange.getOffset();
      parseAttributeContents(info, text);
    }
    else {
      info.name = text;
    }
    return info;
  }

  private static enum AttributeParseState {
    Start, BeforeAttributeName, InAttributeName, AfterAttributeName, InAttributeValue, InNestedTag,
  }

  /** �A�g���r���[�g�������p�[�X���܂��B */
  private void parseAttributeContents(TagInfo info, String text) {

    AttributeParseState state = AttributeParseState.Start;
    StringBuffer tokenBuffer = new StringBuffer();
    String name = null;
    char quoteCharacter = 0;
    int start = -1;
    int valueOffset = -1;
    boolean escape = false;
    boolean hasNestedTag = false;

    for (int i = 0; i < text.length(); i++) {
      char c = text.charAt(i);
      if (state == AttributeParseState.Start && FuzzyXMLUtil.isWhitespace(c)) {
        state = AttributeParseState.BeforeAttributeName;
      }
      else if (state == AttributeParseState.BeforeAttributeName && !FuzzyXMLUtil.isWhitespace(c)) {
        if (start == -1) {
          start = i;
        }
        state = AttributeParseState.InAttributeName;
        tokenBuffer.append(c);
      }
      else if (state == AttributeParseState.InAttributeName) {
        if (c == '=') {
          state = AttributeParseState.AfterAttributeName;
          name = tokenBuffer.toString().trim();
          tokenBuffer.setLength(0);
          valueOffset = -1;
        }
        else {
          tokenBuffer.append(c);
        }
      }
      else if (state == AttributeParseState.AfterAttributeName && !FuzzyXMLUtil.isWhitespace(c)) {
        if (valueOffset == -1) {
          valueOffset = i;
        }
        if (c == '\'' || c == '\"') {
          quoteCharacter = c;
        }
        else {
          quoteCharacter = 0;
          tokenBuffer.append(c);
        }
        state = AttributeParseState.InAttributeValue;
      }
      else if (state == AttributeParseState.InAttributeValue) {
        if (c == quoteCharacter && escape == true) {
          tokenBuffer.append(c);
          escape = false;
        }
        else if (c == quoteCharacter || (quoteCharacter == 0 && FuzzyXMLUtil.isWhitespace(c))) {
          // add an attribute
          AttrInfo attr = new AttrInfo();
          attr.name = FuzzyXMLUtil.decode(name, _isHTML);
          attr.rawValue = tokenBuffer.toString();
          attr.value = FuzzyXMLUtil.decode(attr.rawValue, _isHTML);
          attr.valueOffset = valueOffset;
          attr.offset = start;
          attr.end = i + 1;
          attr.quote = quoteCharacter;
          attr.hasNestedTag = hasNestedTag;
          info.addAttr(attr);

          // reset
          tokenBuffer.setLength(0);
          state = AttributeParseState.BeforeAttributeName;
          start = -1;
          hasNestedTag = false;
        }
        else if (c == '\\') {
          if (escape == true) {
            tokenBuffer.append(c);
            escape = false;
          }
          else {
            // MS: I took out escaping .. This is potentially a really sketchy
            // thing to do, but it
            // was breaking attributes like numberformat = "\$#,##0.00"
            // Q: moved append to following 'else' block
            escape = true;
          }
        }
        else if (c == '<') {
          hasNestedTag = true;
          state = AttributeParseState.InNestedTag;
          tokenBuffer.append(c);
        }
        else {
          if (escape) {
            tokenBuffer.append('\\');
            escape = false;
          }
          tokenBuffer.append(c);
        }
      }
      else if (state == AttributeParseState.InNestedTag) {
        tokenBuffer.append(c);
        if (c == '>') {
          state = AttributeParseState.InAttributeValue;
        }
      }
    }
    if ((state == AttributeParseState.InAttributeValue || state == AttributeParseState.InNestedTag) && quoteCharacter == 0) {
      AttrInfo attr = new AttrInfo();
      attr.name = FuzzyXMLUtil.decode(name, _isHTML);
      attr.rawValue = tokenBuffer.toString();
      attr.value = FuzzyXMLUtil.decode(attr.rawValue, _isHTML);
      attr.valueOffset = valueOffset;
      attr.offset = start;
      attr.end = text.length();
      attr.quote = quoteCharacter;
      attr.hasNestedTag = hasNestedTag;
      info.addAttr(attr);
    }
    if (state == AttributeParseState.InAttributeValue && quoteCharacter != 0) {
      // System.out.println("FuzzyXMLParser.parseAttributeContents: " +
      // info.name);
    }
    // Matcher matcher = attr.matcher(text);
    // while(matcher.find()){
    // AttrInfo attr = new AttrInfo();
    // attr.name = matcher.group(1);
    // attr.value = FuzzyXMLUtil.decode(matcher.group(3));
    // attr.offset = matcher.start();
    // attr.end = matcher.end();
    // info.addAttr(attr);
    // }
  }

  private class TagInfo {
    private String name;
    private int nameOffset;
    private ArrayList<AttrInfo> attrs = new ArrayList<AttrInfo>();

    public void addAttr(AttrInfo attr) {
      // �������̂������Ă��lj����Ȃ�
      AttrInfo[] info = getAttrs();
      for (int i = 0; i < info.length; i++) {
        if (info[i].name.equals(attr.name)) {
          return;
        }
      }
      attrs.add(attr);
    }

    public AttrInfo[] getAttrs() {
      return attrs.toArray(new AttrInfo[attrs.size()]);
    }
  }

  private class AttrInfo {
    private String name;
    private String value;
    private String rawValue;
    private int offset;
    private int valueOffset;
    private int end;
    private char quote;
    private boolean hasNestedTag;
  }

  public static class Range {
    private int _offset;
    private int _length;

    public Range() {
    }

    public int getOffset() {
      return _offset;
    }

    public int getLength() {
      return _length;
    }

    public String trim(String str) {
      return str.substring(_offset, _offset + _length);
    }

    public static Range trimmedRange(String str) {
      int i = 0;
      int length = str.length();
      Range r = new Range();
      for (i = 0; i < length && str.charAt(i) <= ' '; i++) {
        // DO NOTHING
      }
      r._offset = i;

      for (i = length - 1; i > r._offset && str.charAt(i) <= ' '; i--) {
        // DO NOTHING
      }
      r._length = (i - r._offset + 1);
      return r;
    }
  }

}
TOP

Related Classes of jp.aonir.fuzzyxml.FuzzyXMLParser$Range

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.