Package ru.org.linux.util.bbcode

Source Code of ru.org.linux.util.bbcode.Parser

/*
* Copyright 1998-2014 Linux.org.ru
*    Licensed under the Apache License, Version 2.0 (the "License");
*    you may not use this file except in compliance with the License.
*    You may obtain a copy of the License at
*
*        http://www.apache.org/licenses/LICENSE-2.0
*
*    Unless required by applicable law or agreed to in writing, software
*    distributed under the License is distributed on an "AS IS" BASIS,
*    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*    See the License for the specific language governing permissions and
*    limitations under the License.
*/

/*
* Copyright (c) 2005-2006, Luke Plant
* All rights reserved.
* E-mail: <L.Plant.98@cantab.net>
* Web: http://lukeplant.me.uk/
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
*      * Redistributions of source code must retain the above copyright
*        notice, this list of conditions and the following disclaimer.
*
*      * Redistributions in binary form must reproduce the above
*        copyright notice, this list of conditions and the following
*        disclaimer in the documentation and/or other materials provided
*        with the distribution.
*
*      * The name of Luke Plant may not be used to endorse or promote
*        products derived from this software without specific prior
*        written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Rewrite with Java language and modified for lorsource by Ildar Hizbulin 2011
* E-mail: <hizel@vyborg.ru>
*/

package ru.org.linux.util.bbcode;

import org.apache.commons.lang.StringUtils;
import ru.org.linux.util.StringUtil;
import ru.org.linux.util.bbcode.nodes.*;
import ru.org.linux.util.bbcode.tags.Tag;
import ru.org.linux.util.formatter.RuTypoChanger;

import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
* Основной класс преобразования LORCODE в html
*/
public class Parser {
  /**
   * Регулярное выражение поиска тэга
   */
  private static final Pattern BBTAG_REGEXP = Pattern.compile("\\[\\[?/?([A-Za-z\\*]+)(:[a-f0-9]+)?(=[^\\]]+)?\\]?\\]");

  /**
   * Регулярное выражения поиска двойного перевода строки
   */
  private static final Pattern P_REGEXP = Pattern.compile("(\r?\n){2,}");

  private final ParserParameters parserParameters;

  /**
   * Конструктор по умолчанию.
   *
   * @param parserParameters параметры парсера
   */
  public Parser(ParserParameters parserParameters) {
    this.parserParameters = parserParameters;
  }

  public static String escape(String html) {
    return StringUtil.escapeHtml(html);
  }

  public RootNode getRootNode() {
    return new RootNode(parserParameters);
  }

  /**
   * Точка входа для разбора LORCODE
   *
   * @param rootNode корневой узел нового дерева
   * @param bbcode   обрабатываемы LORCODE
   * @return возвращает инвалидный html
   */
  public RootNode parseRoot(RootNode rootNode, String bbcode) {
    Node currentNode = rootNode;
    ParserAutomatonState automatonState = new ParserAutomatonState(rootNode, parserParameters);

    while (automatonState.getPos() < bbcode.length()) {
      Matcher match = BBTAG_REGEXP.matcher(bbcode).region(automatonState.getPos(), bbcode.length());
      if (match.find()) {
        if (!automatonState.isFirstCode()) {
          currentNode = pushTextNode(automatonState, currentNode, bbcode.substring(automatonState.getPos(), match.start()));
        } else {
          currentNode = trimNewLine(automatonState, currentNode, bbcode, match);
        }
        automatonState.processTagMatcher(match);

        if (automatonState.isTagEscaped()) {
          currentNode = processEscapedTag(currentNode, automatonState);
        } else {
          if (automatonState.getAllTagsNames().contains(automatonState.getTagname())) {
            currentNode = processKnownTag(currentNode, automatonState);
          } else {
            currentNode = pushTextNode(automatonState, currentNode, automatonState.getWholematch());
          }
        }
        automatonState.setPos(match.end());
      } else {
        currentNode = pushTextNode(automatonState, currentNode, bbcode.substring(automatonState.getPos()));
        automatonState.setPos(bbcode.length());
      }
    }
    return automatonState.getRootNode();
  }

  /**
   * Добавление текстового узда
   *
   * @param automatonState текущее состояние автомата
   * @param currentNode    текущий узел
   * @param text           текст
   * @return возвращает новый текущий узел
   */
  private Node pushTextNode(ParserAutomatonState automatonState, Node currentNode, String text) {
    if (text.trim().isEmpty() && !currentNode.allows("text")) {
      return currentNode;
    }

    while (!currentNode.allows("text")) {
      if (currentNode.allows("p")) {
        TagNode node = new TagNode(currentNode, parserParameters, "p", "", automatonState.getRootNode());
        currentNode.addChildren(node);
        currentNode = node;
      } else if (currentNode.allows("div")) {
        TagNode node = new TagNode(currentNode, parserParameters, "div", "", automatonState.getRootNode());
        currentNode.addChildren(node);
        currentNode = node;
      } else {
        currentNode = currentNode.getParent();
      }
    }

    boolean isParagraph = false;
    boolean isAllow = true;
    boolean isParagraphed = false;

    if (TagNode.class.isInstance(currentNode)) {
      TagNode tempNode = (TagNode) currentNode;
      Set<String> disallowedParagraphTags = parserParameters.getDisallowedParagraphTags();
      Set<String> paragraphedTags = parserParameters.getParagraphedTags();
      if (disallowedParagraphTags.contains(tempNode.getBbtag().getName())) {
        isAllow = false;
      }
      if (paragraphedTags.contains(tempNode.getBbtag().getName())) {
        isParagraphed = true;
      }
      if ("p".equals(tempNode.getBbtag().getName())) {
        isParagraph = true;
      }
    }

    /**
     * Если мы находим двойной перенос строки и в тексте
     * и в текущем тэге разрешена вставка нового тэга p -
     * вставляем p
     * за исключеним, если текущий тэг p, тогда поднимаемся на уровень
     * выше в дереве и вставляем p с текстом
     */
    Matcher matcher = P_REGEXP.matcher(text);

    if (isAllow && matcher.find()) {
      String head = text.substring(0, matcher.start());
      String tail = text.substring(matcher.end());

      if (!head.isEmpty()) {
        currentNode.addChildren(rawPushTextNode(automatonState, currentNode, head));
      }
      if (isParagraph) {
        currentNode = currentNode.getParent();
      }
      if (!tail.isEmpty()) {
        TagNode node = new TagNode(currentNode, parserParameters, "p", " ", automatonState.getRootNode());
        currentNode.addChildren(node);
        currentNode = node;
        currentNode = pushTextNode(automatonState, currentNode, tail);
      }
    } else {
      if (isParagraphed) {
        currentNode.addChildren(rawPushTextNode(automatonState, currentNode, text));
      } else {
        currentNode.addChildren(rawPushTextNode(automatonState, currentNode, matcher.replaceAll("")));
      }
    }

    return currentNode;
  }

  private TextNode rawPushTextNode(ParserAutomatonState automatonState, Node currentNode, String text) {
    if (!automatonState.isCode()) {
      return new TextNode(currentNode, parserParameters, text, automatonState);
    } else {
      return new TextCodeNode(currentNode, parserParameters, text, automatonState);
    }
  }

  /**
   * Добавление в дерево нового узла с тэгом
   *
   * @param automatonState текущее состояние автомата
   * @param currentNode    текущий узел
   * @param name           название тэга
   * @param parameter      параметры тэга
   * @return возвращает новый текущий узел дерева
   */
  private Node pushTagNode(ParserAutomatonState automatonState, Node currentNode, String name, String parameter) {
    if (!currentNode.allows(name)) {
      Map<String, Tag> allTagsDict = parserParameters.getAllTagsDict();
      Set<String> blockLevelTags = parserParameters.getBlockLevelTags();
      Tag newTag = allTagsDict.get(name);

      if (newTag.isDiscardable()) {
        return currentNode;
      } else if (currentNode == automatonState.getRootNode()
              || blockLevelTags.contains(((TagNode) currentNode).getBbtag().getName()) && newTag.getImplicitTag() != null) {
        if (currentNode != automatonState.getRootNode() && TagNode.class.isInstance(currentNode)) {
          TagNode currentTagNode = (TagNode) currentNode;
          if ("p".equals(currentTagNode.getBbtag().getName())) {
            currentNode = currentNode.getParent();
            return pushTagNode(automatonState, currentNode, name, parameter);
          }
        }
        currentNode = pushTagNode(automatonState, currentNode, newTag.getImplicitTag(), "");
        currentNode = pushTagNode(automatonState, currentNode, name, parameter);
      } else {
        currentNode = currentNode.getParent();
        currentNode = pushTagNode(automatonState, currentNode, name, parameter);
      }
    } else {
      TagNode node = new TagNode(currentNode, parserParameters, name, parameter, automatonState.getRootNode());
      currentNode.addChildren(node);
      if (!node.getBbtag().isSelfClosing()) {
        currentNode = node;
      }
    }
    return currentNode;
  }

  /**
   * Обрабатывает закрытие тэга
   *
   * @param rootNode    корневой узел
   * @param currentNode текущий узел
   * @param name        имя закрываемого тэга
   * @return новый текущий узел после закрытия тэга
   */
  private Node closeTagNode(RootNode rootNode, Node currentNode, String name) {
    Node tempNode = currentNode;
    while (true) {
      if (tempNode == rootNode) {
        break;
      }
      if (TagNode.class.isInstance(tempNode)) {
        TagNode node = (TagNode) tempNode;
        String tagName = node.getBbtag().getName();
        if (tagName.equals(name) || ("url".equals(name) && "url2".equals(tagName))) {
          currentNode = tempNode;
          currentNode = currentNode.getParent();
          break;
        }
      }
      tempNode = tempNode.getParent();
    }
    return currentNode;
  }

  private Node processKnownTag(Node currentNode, ParserAutomatonState automatonState) {
    if (automatonState.getWholematch().startsWith("[[")) {
      currentNode = pushTextNode(automatonState, currentNode, "[");
    }

    boolean tagNameIsCode = "code".equals(automatonState.getTagname()) || "inline".equals(automatonState.getTagname());

    if (automatonState.isCloseTag()) {
      currentNode = processCloseTag(automatonState, currentNode, tagNameIsCode);
    } else {
      currentNode = processTag(automatonState, currentNode, tagNameIsCode);
    }

    if (automatonState.getWholematch().endsWith("]]")) {
      currentNode = pushTextNode(automatonState, currentNode, "]");
    }

    return currentNode;
  }

  private Node processTag(ParserAutomatonState automatonState, Node currentNode, boolean tagNameIsCode) {
    if (automatonState.isCode() && !tagNameIsCode) {
      currentNode = pushTextNode(automatonState, currentNode, automatonState.getWholematch());
    } else if (tagNameIsCode) {
      automatonState.setCode(true);
      automatonState.setFirstCode(true);
      currentNode = pushTagNode(automatonState, currentNode, automatonState.getTagname(), automatonState.getParameter());
    } else {
      if ("url".equals(automatonState.getTagname()) && ! StringUtils.isEmpty(automatonState.getParameter())) {
        // специальная проверка для [url] с параметром
        currentNode = pushTagNode(automatonState, currentNode, "url2", automatonState.getParameter());
      } else {
        currentNode = pushTagNode(automatonState, currentNode, automatonState.getTagname(), automatonState.getParameter());
      }
    }
    return currentNode;
  }

  private Node processEscapedTag(Node currentNode, ParserAutomatonState automatonState) {
    String textNode;
    if (automatonState.getAllTagsNames().contains(automatonState.getTagname()) && !automatonState.isCode()) {
      textNode = automatonState.getWholematch().substring(1, automatonState.getWholematch().length() - 1);
    } else {
      textNode = automatonState.getWholematch();
    }
    currentNode = pushTextNode(automatonState, currentNode, textNode);
    return currentNode;
  }

  private Node processCloseTag(ParserAutomatonState automatonState, Node currentNode, boolean tagNameIsCode) {
    if (!automatonState.isCode() || tagNameIsCode) {
      currentNode = closeTagNode(automatonState.getRootNode(), currentNode, automatonState.getTagname());
    } else {
      currentNode = pushTextNode(automatonState, currentNode, automatonState.getWholematch());
    }
    if (tagNameIsCode) {
      automatonState.setCode(false);
    }
    return currentNode;
  }

  private Node trimNewLine(ParserAutomatonState automatonState, Node currentNode, String bbcode, Matcher match) {
    String fixWhole = bbcode.substring(automatonState.getPos(), match.start());
    if (fixWhole.startsWith("\n")) {
      fixWhole = fixWhole.substring(1); // откусить ведущий перевод строки
    } else if (fixWhole.startsWith("\r\n")) {
      fixWhole = fixWhole.substring(2); // откусить ведущий перевод строки
    }
    automatonState.setFirstCode(false);
    return pushTextNode(automatonState, currentNode, fixWhole);
  }

  public class ParserAutomatonState {
    private final RootNode rootNode;
    private final Set<String> allTagsNames;

    private int pos = 0;
    private boolean isCode = false;
    private boolean firstCode = false;

    private final RuTypoChanger changer = new RuTypoChanger();

    private String tagname;
    private String parameter;
    private String wholematch;

    private ParserAutomatonState(RootNode rootNode, ParserParameters parserParameters) {
      this.rootNode = rootNode;
      allTagsNames = parserParameters.getAllTagsNames();
    }

    private void processTagMatcher(Matcher match) {
      tagname = match.group(1).toLowerCase();
      parameter = match.group(3);
      wholematch = match.group(0);

      if (!StringUtils.isEmpty(parameter)){
        parameter = parameter.substring(1);
      }
    }

    private boolean isTagEscaped() {
      return wholematch.startsWith("[[") && wholematch.endsWith("]]");
    }

    private boolean isCloseTag() {
      return wholematch.startsWith("[/") || wholematch.startsWith("[[/");
    }

    private int getPos() {
      return pos;
    }

    private void setPos(int pos) {
      this.pos = pos;
    }

    private boolean isCode() {
      return isCode;
    }

    private void setCode(boolean code) {
      isCode = code;
    }

    private boolean isFirstCode() {
      return firstCode;
    }

    private void setFirstCode(boolean firstCode) {
      this.firstCode = firstCode;
    }

    private String getTagname() {
      return tagname;
    }

    private String getParameter() {
      return parameter;
    }

    private String getWholematch() {
      return wholematch;
    }

    public RootNode getRootNode() {
      return rootNode;
    }

    private Set<String> getAllTagsNames() {
      return allTagsNames;
    }

    public RuTypoChanger getTypoChanger() {
      return changer;
    }
  }
}
TOP

Related Classes of ru.org.linux.util.bbcode.Parser

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.