Package com.overzealous.remark.convert

Source Code of com.overzealous.remark.convert.InlineStyle$Rules

/*
* Copyright 2011 OverZealous Creations, LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.overzealous.remark.convert;

import com.overzealous.remark.Options;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.nodes.TextNode;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
* Handles various inline styling (italics and bold), such as em, i, strong, b, span, and font tags.
* @author Phil DeJarnett
*/
public class InlineStyle extends AbstractNodeHandler {

  private static final char ITALICS_WRAPPER = '*';
  private static final String BOLD_WRAPPER = "**";

  private static final Pattern ITALICS_PATTERN = Pattern.compile("font-style:\\s*italic", Pattern.CASE_INSENSITIVE);
  private static final Pattern BOLD_PATTERN = Pattern.compile("font-weight:\\s*bold", Pattern.CASE_INSENSITIVE);
 
  private static final Pattern INWORD_CHARACTER = Pattern.compile("\\w");
 
  private static final Pattern SPACE_CONTENT_SPACE = Pattern.compile("^(\\s*+)(.*?)(\\s*)$", Pattern.DOTALL);

  private int italicDepth = 0;
  private int boldDepth = 0;

  /**
   * Renders inline styling (bold, italics) for the given tag.  It handles implicit styling ({@code em}, {@code strong}) as
   * well as explicit styling via the {@code style} attribute.
   * <p>This object keeps track of the depth of the styling, to prevent recursive situations like this:</p>
   *
   * <blockquote>{@code <em>hello <em>world</em></em>}</blockquote>
   *
   * <p>A naive method would be render the example incorrectly (the output would be {@code *hello **world*})</p>
   *
   * @param parent The previous node walker, in case we just want to remove an element.
   * @param node    Node to handle
   * @param converter Parent converter for this object.
   */
  public void handleNode(NodeHandler parent, Element node, DocumentConverter converter) {
    if(checkInnerBlock(node)) {
      // not valid to have an inline node around block nodes, so we have to
      // simply ignore them.
      // just recurse like it's not here.
      converter.walkNodes(parent, node);
    } else {
      Rules rules = checkInword(node, converter);
      if(rules.emphasisPreserved) {
        checkTag(node, rules);
 
        if(rules.bold || rules.italics) {
          handleStyled(parent, node, converter, rules);
        } else {
          converter.walkNodes(this, node, converter.inlineNodes);
        }
      } else { // emphasis has been disabled for this section
        // mark as if emphasis was already processed
        italicDepth++;
        boldDepth++;
        converter.walkNodes(this, node, converter.inlineNodes);
        italicDepth--;
        boldDepth--;
      }
    }
  }

  @Override
  public void handleTextNode(TextNode node, DocumentConverter converter) {
    // Override to provide special handling for ignoring
    // leading or trailing all-space nodes.
    if((node.previousSibling() != null && node.nextSibling() != null) ||
           node.text().trim().length() != 0) {
      super.handleTextNode(node, converter);
    }
  }

  /**
   * Minor class to hold onto the styling rules for this class.
   */
  private class Rules {
    boolean emphasisPreserved = true;
    boolean addSpacing = false;
    boolean italics = false;
    boolean bold = false;
  }

  /**
   * Handles dealing with a styled node (one that has markers on either side).
   *
   * <p>It's unique because we have to deal with leading and trailing spaces, among other issues.</p>
   *
   * @param parent The previous node walker, in case we just want to remove an element.
   * @param node    Node to handle
   * @param converter Parent converter for this object.
   * @param rules The styling rules that are active
   */
  private void handleStyled(NodeHandler parent, Element node, DocumentConverter converter, Rules rules) {
    // prevent double styling
    if(rules.bold) { boldDepth++; }
    if(rules.italics) { italicDepth++; }
    String content = converter.getInlineContent(this, node, true);
    if(rules.bold) { boldDepth--; }
    if(rules.italics) { italicDepth--; }
   
    // only proceed if we have content
    if(content.length() > 0) {
   
     
      Matcher parts = SPACE_CONTENT_SPACE.matcher(content);
      if(parts.find()) {
        // write any leading space
        converter.output.write(parts.group(1));
       
        // don't write the markers if the content ends up empty
        if(parts.group(2).length() > 0) {
          // write leading style marker
          start(rules, parts.group(1), converter);
         
          // write content
          converter.output.write(parts.group(2));
         
          // write trailing marker
          end(rules, parts.group(3), converter);
        }
       
        // write any trailing space
        converter.output.write(parts.group(3));
         
      } // else, something weird happened, like (1 == 0)
    }
  }

  /**
   * Check to see if there is a block-level node somewhere inside this node.
   *
   * @param node Current node
   * @return True is there is a block inside this node (which would be invalid HTML)
   */
  private boolean checkInnerBlock(Element node) {
    boolean blockExists = false;
    for(final Element child : node.children()) {
      blockExists = child.isBlock() || checkInnerBlock(child);
      if(blockExists) {
        break;
      }
    }
    return blockExists;
  }

  /**
   * Handles the situation where InWordEmphasis needs to be manipulated.
   *
   * <p>This isn't a terribly intelligent check - it merely looks for the
   * situation where a styled node is immediately <em>followed</em> by a
   * text node, and that text node starts with a word character.</p>
   *
   * @param node The current node (should be an inline-styled node)
   * @param converter The current converter
   * @return flags for checking.
   */
  private Rules checkInword(Element node, DocumentConverter converter) {
    Rules result = new Rules();
    Options.InWordEmphasis iwe = converter.options.getInWordEmphasis();
    if(!iwe.isEmphasisPreserved() || iwe.isAdditionalSpacingNeeded()) {
      // peek behind for inline styling
      Node n = node.previousSibling();
      if(n != null && n instanceof TextNode) {
        TextNode tn = (TextNode)n;
        String text = tn.text();
        if(INWORD_CHARACTER.matcher(text.substring(text.length()-1)).matches()) {
          result.emphasisPreserved = iwe.isEmphasisPreserved();
          result.addSpacing = iwe.isAdditionalSpacingNeeded();
        }
      }
      // peek ahead for inline styling
      n = node.nextSibling();
      if(n != null && n instanceof TextNode) {
        TextNode tn = (TextNode)n;
        if(INWORD_CHARACTER.matcher(tn.text().substring(0,1)).matches()) {
          result.emphasisPreserved = iwe.isEmphasisPreserved();
          result.addSpacing = iwe.isAdditionalSpacingNeeded();
        }
      }
    }
    return result;
  }

  /**
   * Check the styling rules that may or may not apply to this tag.
   * @param node The node to look at
   * @param rules The rules object to hold the result
   */
  private void checkTag(Element node, Rules rules) {
    String tn = node.tagName();
    if(tn.equals("i") || tn.equals("em")) {
      rules.italics = (italicDepth == 0);
    } else if(tn.equals("b") || tn.equals("strong")) {
      rules.bold = (boldDepth == 0);
    } else {
      // check inline-style
      if(node.hasAttr("style")) {
        String style = node.attr("style");
        if(ITALICS_PATTERN.matcher(style).find()) {
          rules.italics = (italicDepth == 0);
        }
        if(BOLD_PATTERN.matcher(style).find()) {
          rules.bold = (boldDepth == 0);
        }
      }
    }
  }

  /**
   * Render the starting styling tag as necessary.
   *
   * @param style Rules to render
   * @param leadingSpaces Leading spaces string (if any)
   * @param converter parent converter
   */
  private void start(Rules style, String leadingSpaces, DocumentConverter converter) {
    if(style.addSpacing &&
           (italicDepth == 0 || boldDepth == 0) &&
           (leadingSpaces == null || leadingSpaces.length() == 0)) {
      converter.output.write(' ');
    }
    if(style.italics) {
      if(italicDepth == 0) {
        converter.output.write(ITALICS_WRAPPER);
      }
    }
    if(style.bold) {
      if(boldDepth == 0) {
        converter.output.write(BOLD_WRAPPER);
      }
    }
  }

  /**
   * Render the ending tag as necessary.
   *
   * @param style Rules to render
   * @param trailingSpaces Trailing spaces (if any)
   * @param converter parent converter
   */
  private void end(Rules style, String trailingSpaces, DocumentConverter converter) {
    if(style.bold) {
      if(boldDepth == 0) {
        converter.output.write(BOLD_WRAPPER);
      }
    }
    if(style.italics) {
      if(italicDepth == 0) {
        converter.output.write(ITALICS_WRAPPER);
      }
    }
    if(style.addSpacing &&
          (italicDepth == 0 || boldDepth == 0) &&
          (trailingSpaces == null || trailingSpaces.length() == 0)) {
      converter.output.write(' ');
    }
  }
}
TOP

Related Classes of com.overzealous.remark.convert.InlineStyle$Rules

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.