/* MessageTextUtil.java
Copyright (c) 2009 Juergen Schlierf, All Rights Reserved
This file is part of Cubusmail (http://code.google.com/p/cubusmail/).
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 3 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with Cubusmail. If not, see <http://www.gnu.org/licenses/>.
*/
package com.cubusmail.mail.text;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.mail.MessagingException;
import javax.mail.Multipart;
import javax.mail.Part;
import javax.mail.internet.ContentType;
import javax.mail.internet.MimeBodyPart;
import javax.mail.internet.MimeMessage;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
import org.htmlcleaner.CleanerProperties;
import org.htmlcleaner.HtmlCleaner;
import org.htmlcleaner.TagNode;
import org.springframework.web.util.HtmlUtils;
import com.cubusmail.core.CubusConstants;
import com.cubusmail.gwtui.domain.Preferences;
import com.cubusmail.mail.MessageHandler;
import com.cubusmail.mail.util.MessageUtils;
/**
* Util class for message text preparation.
*
* @author Juergen Schlierf
*/
public class MessageTextUtil {
private static Logger log = Logger.getLogger( MessageTextUtil.class.getName() );
private static final CleanerProperties CLEANER_PROPERTIES = new CleanerProperties();
static {
CLEANER_PROPERTIES.setPruneTags( "style, script" );
CLEANER_PROPERTIES.setOmitUnknownTags( true );
}
public static final Pattern PATTERN_HREF = Pattern
.compile(
"<a\\s+href[^>]+>.*?</a>|((?:https?://|ftp://|mailto:|news\\.|www\\.)(?:[-A-Z0-9+@#/%?=~_|!:,.;]|&|&(?!\\w+;))*(?:[-A-Z0-9+@#/%=~_|]|&|&(?!\\w+;)))",
Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE );
private static final Pattern PATTERN_TARGET = Pattern.compile( "(<a[^>]*?target=\"?)([^\\s\">]+)(\"?.*</a>)",
Pattern.CASE_INSENSITIVE );
private static final String STR_BLANK = "_blank";
private static final String HTML_BR = "<br />";
private static final String REPL_LINEBREAK = "\r?\n";
private static final int STRBLD_SIZE = 32768; // 32K
private static final int BUFSIZE = 8192; // 8K
private static final String STR_IMG_SRC = "src=";
/**
* @param part
* @param messageHandler
* @param loadImages
* @param reply
* @throws MessagingException
* @throws IOException
*/
public static void messageTextFromPart( Part part, MessageHandler messageHandler, boolean loadImages,
MessageTextMode mode, Preferences preferences, int level ) throws MessagingException, IOException {
log.debug( "Content type of part: " + part.getContentType() );
if ( mode == MessageTextMode.DISPLAY || mode == MessageTextMode.DRAFT ) {
if ( MessageUtils.isImagepart( part ) ) {
messageHandler.setMessageImageHtml( createImageMessageText( messageHandler.getId() ) );
}
else if ( !preferences.isShowHtml() && !StringUtils.isEmpty( messageHandler.getMessageTextPlain() ) ) {
return;
}
else if ( preferences.isShowHtml() && !StringUtils.isEmpty( messageHandler.getMessageTextHtml() ) ) {
return;
}
else if ( part.isMimeType( "text/plain" ) ) {
String text = readPart( part );
if ( !StringUtils.isBlank( text ) ) {
messageHandler.setMessageTextPlain( formatPlainText( text, mode ) );
}
}
else if ( part.isMimeType( "text/html" ) ) {
if ( preferences.isShowHtml() ) {
String text = readPart( part );
boolean[] hasImages = new boolean[] { false };
if ( !StringUtils.isBlank( convertHtml2PlainText( text ) ) ) {
text = formatHTMLText( text, loadImages, hasImages );
messageHandler.setMessageTextHtml( text );
messageHandler.setHtmlMessage( true );
messageHandler.setHasImages( hasImages[0] );
}
}
else {
// only if there is no plain text part found
if ( StringUtils.isEmpty( messageHandler.getMessageTextPlain() ) ) {
String text = readPart( part );
text = convertHtml2PlainText( text );
if ( !StringUtils.isBlank( text ) ) {
text = formatPlainText( text, mode );
messageHandler.setMessageTextPlain( text );
}
}
}
}
else if ( part.isMimeType( "multipart/*" ) ) {
Multipart mp = (Multipart) part.getContent();
int count = mp.getCount();
for (int i = 0; i < count; i++) {
Part subPart = mp.getBodyPart( i );
messageTextFromPart( subPart, messageHandler, loadImages, mode, preferences, level++ );
}
}
}
else if ( mode == MessageTextMode.REPLY ) {
if ( !preferences.isCreateHtmlMsgs() && !StringUtils.isEmpty( messageHandler.getMessageTextPlain() ) ) {
return;
}
else if ( preferences.isCreateHtmlMsgs() && !StringUtils.isEmpty( messageHandler.getMessageTextHtml() ) ) {
return;
}
else if ( part.isMimeType( "text/plain" ) ) {
String text = readPart( part );
text = quotePlainText( text );
if ( preferences.isCreateHtmlMsgs() ) {
text = convertPlainText2Html( text, mode );
messageHandler.setMessageTextHtml( text );
messageHandler.setHtmlMessage( true );
}
else {
messageHandler.setMessageTextPlain( text );
}
}
else if ( part.isMimeType( "text/html" ) && StringUtils.isEmpty( messageHandler.getMessageTextPlain() ) ) {
String text = readPart( part );
text = convertHtml2PlainText( text );
text = quotePlainText( text );
if ( preferences.isCreateHtmlMsgs() ) {
text = convertPlainText2Html( text, mode );
messageHandler.setMessageTextHtml( text );
messageHandler.setHtmlMessage( true );
}
else {
messageHandler.setMessageTextPlain( text );
}
}
else if ( part.isMimeType( "multipart/*" ) ) {
Multipart mp = (Multipart) part.getContent();
int count = mp.getCount();
for (int i = 0; i < count; i++) {
Part subPart = mp.getBodyPart( i );
messageTextFromPart( subPart, messageHandler, loadImages, mode, preferences, level++ );
}
}
}
}
/**
* Process the HTML message text either for display or reply/draft.
*
* @param messageText
* @param charset
* @param imageLoad
* @param hasImages
* @return
*/
public static String formatHTMLText( String messageText, boolean loadImages, boolean[] hasImages ) {
HtmlCleaner cleaner = new HtmlCleaner( CLEANER_PROPERTIES );
String result = "";
try {
TagNode rootNode = cleaner.clean( new StringReader( messageText ) );
TagNode[] nodes = rootNode.getElementsByName( "a", true );
if ( nodes != null && nodes.length > 0 ) {
for (TagNode tagnode : nodes) {
tagnode.removeAttribute( "target" );
tagnode.addAttribute( "target", "_blank" );
}
}
nodes = rootNode.getElementsByName( "img", true );
if ( nodes != null && nodes.length > 0 ) {
hasImages[0] = true;
if ( !loadImages ) {
for (TagNode tagnode : nodes) {
tagnode.removeAttribute( "src" );
tagnode.addAttribute( "src", "NO_IMAGE" );
}
}
}
result = cleaner.getInnerHtml( rootNode );
}
catch (IOException e) {
log.error( e.getMessage(), e );
}
return result;
}
/**
* Process plain text messages for display.
*
* @param plainText
* @return
*/
public static String formatPlainText( String plainText, MessageTextMode mode ) {
if ( !StringUtils.isEmpty( plainText ) ) {
if ( mode == MessageTextMode.REPLY ) {
return quotePlainText( plainText );
}
else if ( mode == MessageTextMode.DISPLAY ) {
return convertPlainText2Html( plainText, mode );
}
}
return plainText;
}
/**
* Convert html text to plain text.
*
* @param htmlText
* @return
*/
public static String convertHtml2PlainText( String htmlText ) {
HtmlCleaner cleaner = new HtmlCleaner( CLEANER_PROPERTIES );
try {
TagNode rootNode = cleaner.clean( new StringReader( htmlText ) );
return rootNode.getText().toString();
}
catch (IOException e) {
log.error( e.getMessage(), e );
}
return "";
}
/**
* Convert a plaint text to html.
*
* @param plainText
* @return
*/
public static String convertPlainText2Html( String plainText, MessageTextMode mode ) {
try {
plainText = HtmlUtils.htmlEscape( plainText ).replaceAll( REPL_LINEBREAK, HTML_BR );
final Matcher m = PATTERN_HREF.matcher( plainText );
final StringBuffer sb = new StringBuffer( plainText.length() );
final StringBuilder tmp = new StringBuilder( 256 );
while (m.find()) {
final String nonHtmlLink = m.group( 1 );
if ( (nonHtmlLink == null) || (hasSrcAttribute( plainText, m.start( 1 ) )) ) {
m.appendReplacement( sb, Matcher.quoteReplacement( checkTarget( m.group() ) ) );
}
else {
tmp.setLength( 0 );
m.appendReplacement( sb, tmp.append( "<a href=\"" ).append(
(nonHtmlLink.startsWith( "www" ) || nonHtmlLink.startsWith( "news" ) ? "http://" : "") )
.append( "$1\" target=\"_blank\">$1</a>" ).toString() );
}
}
m.appendTail( sb );
if ( mode == MessageTextMode.DISPLAY ) {
sb.insert( 0, "<p style=\"font-family: monospace; font-size: 10pt;\">" );
sb.append( "</p>" );
}
return sb.toString();
}
catch (final Exception e) {
log.error( e.getMessage(), e );
}
catch (final StackOverflowError error) {
log.error( StackOverflowError.class.getName(), error );
}
return plainText;
}
/**
* @param line
* @param urlStart
* @return
*/
private static boolean hasSrcAttribute( final String line, final int urlStart ) {
return (urlStart >= 5)
&& ((STR_IMG_SRC.equalsIgnoreCase( line.substring( urlStart - 5, urlStart - 1 ) )) || (STR_IMG_SRC
.equalsIgnoreCase( line.substring( urlStart - 4, urlStart ) )));
}
/**
* @param anchorTag
* @return
*/
private static String checkTarget( final String anchorTag ) {
final Matcher m = PATTERN_TARGET.matcher( anchorTag );
if ( m.matches() ) {
if ( !STR_BLANK.equalsIgnoreCase( m.group( 2 ) ) ) {
final StringBuilder sb = new StringBuilder( 128 );
return sb.append( m.group( 1 ) ).append( STR_BLANK ).append( m.group( 3 ) ).toString();
}
return anchorTag;
}
final int pos = anchorTag.indexOf( '>' );
if ( pos == -1 ) {
return anchorTag;
}
final StringBuilder sb = new StringBuilder( anchorTag.length() + 16 );
return sb.append( anchorTag.substring( 0, pos ) ).append( " target=\"" ).append( STR_BLANK ).append( '"' )
.append( anchorTag.substring( pos ) ).toString();
}
/**
* Reads the string out of part's input stream. On first try the input
* stream retrieved by <code>javax.mail.Part.getInputStream()</code> is
* used. If an I/O error occurs (<code>java.io.IOException</code>) then the
* next try is with part's raw input stream. If everything fails an empty
* string is returned.
*
* @param p
* - the <code>javax.mail.Part</code> object
* @param ct
* - the part's content type
* @return the string read from part's input stream or the empty string ""
* if everything failed
* @throws MessagingException
* - if an error occurs in part's getter methods
*/
public static String readPart( final Part p ) throws MessagingException {
String contentType = p.getContentType();
ContentType type = new ContentType( contentType );
/*
* Use specified charset if available else use default one
*/
String charset = type.getParameter( "charset" );
if ( null == charset || charset.equalsIgnoreCase( CubusConstants.US_ASCII ) ) {
charset = CubusConstants.DEFAULT_CHARSET;
}
try {
return readStream( p.getInputStream(), charset );
}
catch (final IOException e) {
/*
* Try to get data from raw input stream
*/
final InputStream inStream;
if ( p instanceof MimeBodyPart ) {
final MimeBodyPart mpb = (MimeBodyPart) p;
inStream = mpb.getRawInputStream();
}
else if ( p instanceof MimeMessage ) {
final MimeMessage mm = (MimeMessage) p;
inStream = mm.getRawInputStream();
}
else {
inStream = null;
}
if ( inStream == null ) {
/*
* Neither a MimeBodyPart nor a MimeMessage
*/
return "";
}
try {
return readStream( inStream, charset );
}
catch (final IOException e1) {
log.error( e1.getLocalizedMessage(), e1 );
return e1.getLocalizedMessage();
// return STR_EMPTY;
}
finally {
try {
inStream.close();
}
catch (final IOException e1) {
log.error( e1.getLocalizedMessage(), e1 );
}
}
}
}
/**
* Reads a string from given input stream using direct buffering
*
* @param inStream
* - the input stream
* @param charset
* - the charset
* @return the <code>String</code> read from input stream
* @throws IOException
* - if an I/O error occurs
*/
public static String readStream( final InputStream inStream, final String charset ) throws IOException {
InputStreamReader isr = null;
try {
int count = 0;
final char[] c = new char[BUFSIZE];
isr = new InputStreamReader( inStream, charset );
if ( (count = isr.read( c )) > 0 ) {
final StringBuilder sb = new StringBuilder( STRBLD_SIZE );
do {
sb.append( c, 0, count );
}
while ((count = isr.read( c )) > 0);
return sb.toString();
}
return "";
}
catch (final UnsupportedEncodingException e) {
log.error( "Unsupported encoding in a message detected and monitored.", e );
return "";
}
finally {
if ( null != isr ) {
try {
isr.close();
}
catch (final IOException e) {
log.error( e.getLocalizedMessage(), e );
}
}
}
}
private static String createImageMessageText( long id ) {
String imageText = "<img src=\"" + "cubusmail/retrieveImage.rpc?messageId=" + id
+ "&attachmentIndex=0&thumbnail=false" + "\" />";
return imageText;
}
/**
* @param textContent
* @return
*/
private static String quotePlainText( final String textContent ) {
return textContent.replaceAll( "(?m)^", "> " );
}
}