Package winterwell.utils.io

Source Code of winterwell.utils.io.RTFHelper

package winterwell.utils.io;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

import winterwell.utils.IReplace;
import winterwell.utils.StrUtils;

/**
* Partially convert RTF into plain-text/markdown. See
* http://latex2rtf.sourceforge.net/RTF-Spec-1.0.txt
*
* @author daniel
* @testedby {@link RTFHelperTest}
*/
public class RTFHelper {

  static final Pattern CONTROL_CODE = Pattern
      .compile("(^|\\b|[^\\\\])(\\\\[a-zA-Z0-9\\-]+\\s?|\\{[^\\} \r\n]+\\})");

  public String decodeRTF(String text) {
    // Ignored codes include:
    // \pard = default paragraph settings
    // \s<N> = user defined style, referenced by number

    text = text.replaceAll("\\\\tab\\b", "\t");
    text = text.replaceAll("\\\\lquote\\s?", "'");
    text = text.replaceAll("\\\\rquote\\s?", "'");
    text = text.replaceAll("\\\\ldblquote\\s?", "\"");
    text = text.replaceAll("\\\\rdblquote\\s?", "\"");
    // hex-encoded non-ascii chars
    text = StrUtils.replace(text, Pattern.compile("\\\\'([0-9a-f]{2})"),
        new IReplace() {
          @Override
          public void appendReplacementTo(StringBuilder sb,
              Matcher match) {
            // String s = match.group();
            char c = (char) Integer.parseInt(match.group(1), 16);
            sb.append(c);
          }
        });
    // paragraph markers -- can we ignore these and count on line breaks??
    text = text.replaceAll("\\\\par\\b[^\r\n]", "\n\n");
    text = text.replaceAll("\\\\par\\b", "");
    text = text.replaceAll("\\\\line\\b", "\n");

    // italics and bold into markdown
    // NB: \i0 is "italics off"
    text = text.replaceAll("\\\\b0?", "**");
    text = text.replaceAll("\\\\i0?", "*");

    // strip the stuff we don't convert!
    text = StrUtils.replace(text, CONTROL_CODE, new IReplace() {
      @Override
      public void appendReplacementTo(StringBuilder sb, Matcher match) {
        String s = match.group();
        String s1 = match.group(1);
        // String s2 = match.group(2);
        // String code = StrUtils.ellipsize(match.group(2), 140);
        // Log.report("rtf", "Ignoring code "+code, Level.FINER);
        sb.append(s1);
      }
    });
    // nested {{}}s can fool the clean-up above -- get rid of any stray
    // unescaped }s
    text = text.replaceAll("(^|\\b|[^\\\\])\\}", "$1");

    text = text.replace("\\~", " ");
    text = text.replace("\\{", "{");
    text = text.replace("\\}", "}");
    return text;
  }

}
TOP

Related Classes of winterwell.utils.io.RTFHelper

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.