Package com.cubusmail.mail.text

Source Code of com.cubusmail.mail.text.MessageTextUtil

/* MessageTextUtil.java

   Copyright (c) 2009 Juergen Schlierf, All Rights Reserved
  
   This file is part of Cubusmail (http://code.google.com/p/cubusmail/).
 
   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 3 of the License, or (at your option) any later version.
 
   This library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.
 
   You should have received a copy of the GNU Lesser General Public
   License along with Cubusmail. If not, see <http://www.gnu.org/licenses/>.
  
*/
package com.cubusmail.mail.text;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.mail.MessagingException;
import javax.mail.Multipart;
import javax.mail.Part;
import javax.mail.internet.ContentType;
import javax.mail.internet.MimeBodyPart;
import javax.mail.internet.MimeMessage;

import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
import org.htmlcleaner.CleanerProperties;
import org.htmlcleaner.HtmlCleaner;
import org.htmlcleaner.TagNode;
import org.springframework.web.util.HtmlUtils;

import com.cubusmail.core.CubusConstants;
import com.cubusmail.gwtui.domain.Preferences;
import com.cubusmail.mail.MessageHandler;
import com.cubusmail.mail.util.MessageUtils;

/**
* Util class for message text preparation.
*
* @author Juergen Schlierf
*/
public class MessageTextUtil {

  private static Logger log = Logger.getLogger( MessageTextUtil.class.getName() );

  private static final CleanerProperties CLEANER_PROPERTIES = new CleanerProperties();
  static {
    CLEANER_PROPERTIES.setPruneTags( "style, script" );
    CLEANER_PROPERTIES.setOmitUnknownTags( true );
  }

  public static final Pattern PATTERN_HREF = Pattern
      .compile(
          "<a\\s+href[^>]+>.*?</a>|((?:https?://|ftp://|mailto:|news\\.|www\\.)(?:[-A-Z0-9+@#/%?=~_|!:,.;]|&amp;|&(?!\\w+;))*(?:[-A-Z0-9+@#/%=~_|]|&amp;|&(?!\\w+;)))",
          Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE );

  private static final Pattern PATTERN_TARGET = Pattern.compile( "(<a[^>]*?target=\"?)([^\\s\">]+)(\"?.*</a>)",
      Pattern.CASE_INSENSITIVE );

  private static final String STR_BLANK = "_blank";
  private static final String HTML_BR = "<br />";
  private static final String REPL_LINEBREAK = "\r?\n";

  private static final int STRBLD_SIZE = 32768; // 32K
  private static final int BUFSIZE = 8192; // 8K
  private static final String STR_IMG_SRC = "src=";

  /**
   * @param part
   * @param messageHandler
   * @param loadImages
   * @param reply
   * @throws MessagingException
   * @throws IOException
   */
  public static void messageTextFromPart( Part part, MessageHandler messageHandler, boolean loadImages,
      MessageTextMode mode, Preferences preferences, int level ) throws MessagingException, IOException {

    log.debug( "Content type of part: " + part.getContentType() );

    if ( mode == MessageTextMode.DISPLAY || mode == MessageTextMode.DRAFT ) {
      if ( MessageUtils.isImagepart( part ) ) {
        messageHandler.setMessageImageHtml( createImageMessageText( messageHandler.getId() ) );
      }
      else if ( !preferences.isShowHtml() && !StringUtils.isEmpty( messageHandler.getMessageTextPlain() ) ) {
        return;
      }
      else if ( preferences.isShowHtml() && !StringUtils.isEmpty( messageHandler.getMessageTextHtml() ) ) {
        return;
      }
      else if ( part.isMimeType( "text/plain" ) ) {
        String text = readPart( part );
        if ( !StringUtils.isBlank( text ) ) {
          messageHandler.setMessageTextPlain( formatPlainText( text, mode ) );
        }
      }
      else if ( part.isMimeType( "text/html" ) ) {
        if ( preferences.isShowHtml() ) {
          String text = readPart( part );
          boolean[] hasImages = new boolean[] { false };
          if ( !StringUtils.isBlank( convertHtml2PlainText( text ) ) ) {
            text = formatHTMLText( text, loadImages, hasImages );
            messageHandler.setMessageTextHtml( text );
            messageHandler.setHtmlMessage( true );
            messageHandler.setHasImages( hasImages[0] );
          }
        }
        else {
          // only if there is no plain text part found
          if ( StringUtils.isEmpty( messageHandler.getMessageTextPlain() ) ) {
            String text = readPart( part );
            text = convertHtml2PlainText( text );
            if ( !StringUtils.isBlank( text ) ) {
              text = formatPlainText( text, mode );
              messageHandler.setMessageTextPlain( text );
            }
          }
        }
      }
      else if ( part.isMimeType( "multipart/*" ) ) {
        Multipart mp = (Multipart) part.getContent();
        int count = mp.getCount();
        for (int i = 0; i < count; i++) {
          Part subPart = mp.getBodyPart( i );
          messageTextFromPart( subPart, messageHandler, loadImages, mode, preferences, level++ );
        }
      }
    }
    else if ( mode == MessageTextMode.REPLY ) {
      if ( !preferences.isCreateHtmlMsgs() && !StringUtils.isEmpty( messageHandler.getMessageTextPlain() ) ) {
        return;
      }
      else if ( preferences.isCreateHtmlMsgs() && !StringUtils.isEmpty( messageHandler.getMessageTextHtml() ) ) {
        return;
      }
      else if ( part.isMimeType( "text/plain" ) ) {
        String text = readPart( part );
        text = quotePlainText( text );
        if ( preferences.isCreateHtmlMsgs() ) {
          text = convertPlainText2Html( text, mode );
          messageHandler.setMessageTextHtml( text );
          messageHandler.setHtmlMessage( true );
        }
        else {
          messageHandler.setMessageTextPlain( text );
        }
      }
      else if ( part.isMimeType( "text/html" ) && StringUtils.isEmpty( messageHandler.getMessageTextPlain() ) ) {
        String text = readPart( part );
        text = convertHtml2PlainText( text );
        text = quotePlainText( text );
        if ( preferences.isCreateHtmlMsgs() ) {
          text = convertPlainText2Html( text, mode );
          messageHandler.setMessageTextHtml( text );
          messageHandler.setHtmlMessage( true );
        }
        else {
          messageHandler.setMessageTextPlain( text );
        }
      }
      else if ( part.isMimeType( "multipart/*" ) ) {
        Multipart mp = (Multipart) part.getContent();
        int count = mp.getCount();
        for (int i = 0; i < count; i++) {
          Part subPart = mp.getBodyPart( i );
          messageTextFromPart( subPart, messageHandler, loadImages, mode, preferences, level++ );
        }
      }
    }
  }

  /**
   * Process the HTML message text either for display or reply/draft.
   *
   * @param messageText
   * @param charset
   * @param imageLoad
   * @param hasImages
   * @return
   */
  public static String formatHTMLText( String messageText, boolean loadImages, boolean[] hasImages ) {

    HtmlCleaner cleaner = new HtmlCleaner( CLEANER_PROPERTIES );
    String result = "";

    try {
      TagNode rootNode = cleaner.clean( new StringReader( messageText ) );

      TagNode[] nodes = rootNode.getElementsByName( "a", true );
      if ( nodes != null && nodes.length > 0 ) {
        for (TagNode tagnode : nodes) {
          tagnode.removeAttribute( "target" );
          tagnode.addAttribute( "target", "_blank" );
        }
      }

      nodes = rootNode.getElementsByName( "img", true );
      if ( nodes != null && nodes.length > 0 ) {
        hasImages[0] = true;
        if ( !loadImages ) {
          for (TagNode tagnode : nodes) {
            tagnode.removeAttribute( "src" );
            tagnode.addAttribute( "src", "NO_IMAGE" );
          }
        }
      }

      result = cleaner.getInnerHtml( rootNode );
    }
    catch (IOException e) {
      log.error( e.getMessage(), e );
    }

    return result;
  }

  /**
   * Process plain text messages for display.
   *
   * @param plainText
   * @return
   */
  public static String formatPlainText( String plainText, MessageTextMode mode ) {

    if ( !StringUtils.isEmpty( plainText ) ) {
      if ( mode == MessageTextMode.REPLY ) {
        return quotePlainText( plainText );
      }
      else if ( mode == MessageTextMode.DISPLAY ) {
        return convertPlainText2Html( plainText, mode );
      }
    }

    return plainText;
  }

  /**
   * Convert html text to plain text.
   *
   * @param htmlText
   * @return
   */
  public static String convertHtml2PlainText( String htmlText ) {

    HtmlCleaner cleaner = new HtmlCleaner( CLEANER_PROPERTIES );

    try {
      TagNode rootNode = cleaner.clean( new StringReader( htmlText ) );
      return rootNode.getText().toString();
    }
    catch (IOException e) {
      log.error( e.getMessage(), e );
    }

    return "";
  }

  /**
   * Convert a plaint text to html.
   *
   * @param plainText
   * @return
   */
  public static String convertPlainText2Html( String plainText, MessageTextMode mode ) {

    try {
      plainText = HtmlUtils.htmlEscape( plainText ).replaceAll( REPL_LINEBREAK, HTML_BR );

      final Matcher m = PATTERN_HREF.matcher( plainText );
      final StringBuffer sb = new StringBuffer( plainText.length() );
      final StringBuilder tmp = new StringBuilder( 256 );
      while (m.find()) {
        final String nonHtmlLink = m.group( 1 );
        if ( (nonHtmlLink == null) || (hasSrcAttribute( plainText, m.start( 1 ) )) ) {
          m.appendReplacement( sb, Matcher.quoteReplacement( checkTarget( m.group() ) ) );
        }
        else {
          tmp.setLength( 0 );
          m.appendReplacement( sb, tmp.append( "<a href=\"" ).append(
              (nonHtmlLink.startsWith( "www" ) || nonHtmlLink.startsWith( "news" ) ? "http://" : "") )
              .append( "$1\" target=\"_blank\">$1</a>" ).toString() );
        }
      }
      m.appendTail( sb );

      if ( mode == MessageTextMode.DISPLAY ) {
        sb.insert( 0, "<p style=\"font-family: monospace; font-size: 10pt;\">" );
        sb.append( "</p>" );
      }

      return sb.toString();
    }
    catch (final Exception e) {
      log.error( e.getMessage(), e );
    }
    catch (final StackOverflowError error) {
      log.error( StackOverflowError.class.getName(), error );
    }

    return plainText;
  }

  /**
   * @param line
   * @param urlStart
   * @return
   */
  private static boolean hasSrcAttribute( final String line, final int urlStart ) {

    return (urlStart >= 5)
        && ((STR_IMG_SRC.equalsIgnoreCase( line.substring( urlStart - 5, urlStart - 1 ) )) || (STR_IMG_SRC
            .equalsIgnoreCase( line.substring( urlStart - 4, urlStart ) )));
  }

  /**
   * @param anchorTag
   * @return
   */
  private static String checkTarget( final String anchorTag ) {

    final Matcher m = PATTERN_TARGET.matcher( anchorTag );
    if ( m.matches() ) {
      if ( !STR_BLANK.equalsIgnoreCase( m.group( 2 ) ) ) {
        final StringBuilder sb = new StringBuilder( 128 );
        return sb.append( m.group( 1 ) ).append( STR_BLANK ).append( m.group( 3 ) ).toString();
      }
      return anchorTag;
    }

    final int pos = anchorTag.indexOf( '>' );
    if ( pos == -1 ) {
      return anchorTag;
    }
    final StringBuilder sb = new StringBuilder( anchorTag.length() + 16 );
    return sb.append( anchorTag.substring( 0, pos ) ).append( " target=\"" ).append( STR_BLANK ).append( '"' )
        .append( anchorTag.substring( pos ) ).toString();
  }

  /**
   * Reads the string out of part's input stream. On first try the input
   * stream retrieved by <code>javax.mail.Part.getInputStream()</code> is
   * used. If an I/O error occurs (<code>java.io.IOException</code>) then the
   * next try is with part's raw input stream. If everything fails an empty
   * string is returned.
   *
   * @param p
   *            - the <code>javax.mail.Part</code> object
   * @param ct
   *            - the part's content type
   * @return the string read from part's input stream or the empty string ""
   *         if everything failed
   * @throws MessagingException
   *             - if an error occurs in part's getter methods
   */
  public static String readPart( final Part p ) throws MessagingException {

    String contentType = p.getContentType();
    ContentType type = new ContentType( contentType );

    /*
     * Use specified charset if available else use default one
     */
    String charset = type.getParameter( "charset" );
    if ( null == charset || charset.equalsIgnoreCase( CubusConstants.US_ASCII ) ) {
      charset = CubusConstants.DEFAULT_CHARSET;
    }
    try {
      return readStream( p.getInputStream(), charset );
    }
    catch (final IOException e) {
      /*
       * Try to get data from raw input stream
       */
      final InputStream inStream;
      if ( p instanceof MimeBodyPart ) {
        final MimeBodyPart mpb = (MimeBodyPart) p;
        inStream = mpb.getRawInputStream();
      }
      else if ( p instanceof MimeMessage ) {
        final MimeMessage mm = (MimeMessage) p;
        inStream = mm.getRawInputStream();
      }
      else {
        inStream = null;
      }
      if ( inStream == null ) {
        /*
         * Neither a MimeBodyPart nor a MimeMessage
         */
        return "";
      }
      try {
        return readStream( inStream, charset );
      }
      catch (final IOException e1) {
        log.error( e1.getLocalizedMessage(), e1 );
        return e1.getLocalizedMessage();
        // return STR_EMPTY;
      }
      finally {
        try {
          inStream.close();
        }
        catch (final IOException e1) {
          log.error( e1.getLocalizedMessage(), e1 );
        }
      }
    }
  }

  /**
   * Reads a string from given input stream using direct buffering
   *
   * @param inStream
   *            - the input stream
   * @param charset
   *            - the charset
   * @return the <code>String</code> read from input stream
   * @throws IOException
   *             - if an I/O error occurs
   */
  public static String readStream( final InputStream inStream, final String charset ) throws IOException {

    InputStreamReader isr = null;
    try {
      int count = 0;
      final char[] c = new char[BUFSIZE];
      isr = new InputStreamReader( inStream, charset );
      if ( (count = isr.read( c )) > 0 ) {
        final StringBuilder sb = new StringBuilder( STRBLD_SIZE );
        do {
          sb.append( c, 0, count );
        }
        while ((count = isr.read( c )) > 0);
        return sb.toString();
      }
      return "";
    }
    catch (final UnsupportedEncodingException e) {
      log.error( "Unsupported encoding in a message detected and monitored.", e );
      return "";
    }
    finally {
      if ( null != isr ) {
        try {
          isr.close();
        }
        catch (final IOException e) {
          log.error( e.getLocalizedMessage(), e );
        }
      }
    }
  }

  private static String createImageMessageText( long id ) {

    String imageText = "<img src=\"" + "cubusmail/retrieveImage.rpc?messageId=" + id
        + "&attachmentIndex=0&thumbnail=false" + "\" />";
    return imageText;
  }

  /**
   * @param textContent
   * @return
   */
  private static String quotePlainText( final String textContent ) {

    return textContent.replaceAll( "(?m)^", "> " );
  }
}
TOP

Related Classes of com.cubusmail.mail.text.MessageTextUtil

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.