package org.virbo.email2xml;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintStream;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.mail.Address;
import javax.mail.Message;
import javax.mail.MessagingException;
import javax.mail.Multipart;
import javax.mail.Part;
import javax.mail.internet.MimeBodyPart;
import javax.mail.internet.MimeMultipart;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Result;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;
/**
*
* @author Ed Jackson
*/
public class MailConverter {
private static Logger logger = Logger.getLogger("org.virbo.email2xml");
private String outputRoot;
private static DateFormat xsdDateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"); //compatible with XSD dateTime, UTC
private static DateFormat cleanDateFormat = new SimpleDateFormat("yyyyMMdd'T'HHmmss'Z'"); //without punctuation for filenames
private Message message;
private String dateString;
private String cleanDateString;
private DocumentBuilderFactory dbf;
private DocumentBuilder db;
private Document dom;
private MessageInfo info;
static {
xsdDateFormat.setTimeZone(java.util.TimeZone.getTimeZone("UTC"));
cleanDateFormat.setTimeZone(java.util.TimeZone.getTimeZone("UTC"));
}
public MailConverter(Message source, String outputRoot) {
this.message = source;
this.outputRoot = outputRoot; // Errors will be detected during message
dbf = DocumentBuilderFactory.newInstance();
try {
db = dbf.newDocumentBuilder();
dom = db.newDocument();
} catch (ParserConfigurationException ex) {
logger.log(Level.SEVERE, "Error instantiating XML document builder.", ex);
return;
}
try {
Date when = message.getReceivedDate();
dateString = xsdDateFormat.format(when);
cleanDateString = cleanDateFormat.format(when);
} catch (MessagingException ex) {
logger.log(Level.WARNING, "Error retrieving message date!", ex);
}
}
// This is mainly for debugging output.
public void print(PrintStream stream) {
try {
Address addrs[] = message.getFrom();
if (addrs != null) for (Address addr : addrs) {
stream.println("From: " + addr.toString());
}
addrs = message.getRecipients(Message.RecipientType.TO);
if (addrs != null) for (Address addr: addrs) {
stream.println("To: " + addr.toString());
}
addrs = message.getRecipients(Message.RecipientType.CC);
if (addrs != null) for (Address addr: addrs) {
stream.println("Cc: " + addr.toString());
}
stream.println("Received: " + message.getReceivedDate().toString());
stream.println("Subject: " + message.getSubject());
Object content = message.getContent();
if (content instanceof Multipart) {
Multipart mp = (Multipart)content;
int nparts = mp.getCount();
stream.printf("This is a multi-part message containing %d parts.%n", nparts);
for (int i=0; i<nparts; i++) {
MimeBodyPart part = (MimeBodyPart)mp.getBodyPart(i);
stream.printf(" Part %d is type %s.%n",i,part.getContentType());
if (part.isMimeType("text/*")) {
stream.print((String)part.getContent());
}
}
} else {
String type = message.getContentType();
stream.printf("This is a message of type %s, and has no attachments.%n", type);
if (message.isMimeType("text/*")) {
stream.print((String)message.getContent());
}
}
} catch (RuntimeException ex) {
//Make sure runtime exceptions still get thrown!
throw(ex);
} catch (Exception ex) {
// MessagingException, IOException
logger.log(Level.WARNING,"Error processing message.", ex);
}
stream.println();
}
//Write the XML plus binaries for any attachments.
public void writeFiles() {
writeFiles(false);
}
public void writeFiles(boolean longXML) {
// Variables to populate MessageInfo:
String xmlFileName;
String longxmlFileName="";
List<String> attachmentFileNames = new ArrayList<String>();
boolean validAttachments = false; // flag set to true when any attachment is processed
Element root = dom.createElement("message");
dom.appendChild(root);
try {
Address addrs[] = message.getFrom();
if (addrs != null) {
for (Address addr : addrs) {
root.appendChild(textElement("fromaddress", addr.toString()));
}
}
addrs = message.getRecipients(Message.RecipientType.TO);
if (addrs != null) {
for (Address addr : addrs) {
root.appendChild(textElement("toaddress", addr.toString()));
}
}
addrs = message.getRecipients(Message.RecipientType.CC);
if (addrs != null) {
for (Address addr : addrs) {
root.appendChild(textElement("ccaddress", addr.toString()));
}
}
root.appendChild(textElement("receivedDate", dateString));
root.appendChild(textElement("subject", message.getSubject()));
if (message.isMimeType("text/*")) {
root.appendChild(textElement("body", (String) message.getContent()));
} else if (message.getContent() instanceof MimeMultipart) {
MimeMultipart mp = (MimeMultipart) message.getContent();
root.appendChild(textElement("body", (String) mp.getBodyPart(0).getContent()));
Element attachments = dom.createElement("attachments");
File attachmentFolder = new File(outputRoot + cleanDateString + "-attachments/");
for (int i = 1; i < mp.getCount(); i++) {
MimeBodyPart p = (MimeBodyPart) mp.getBodyPart(i);
if (p.getDisposition().equals(Part.INLINE)) {
logger.fine("Ignoring inline attachment.");
continue;
}
Element attachment = dom.createElement("attachment");
attachment.appendChild(textElement("filename", p.getFileName()));
attachment.appendChild(textElement("mimetype", p.getContentType()));
if (longXML && p.getEncoding().equalsIgnoreCase("base64")) {
InputStream is = p.getRawInputStream();
StringBuilder builder = new StringBuilder();
try {
BufferedReader reader = new BufferedReader(new InputStreamReader(is, "UTF-8"));
String line;
while ((line = reader.readLine()) != null) {
builder.append(line);
builder.append("\n");
}
} finally {
is.close();
}
attachment.appendChild(textElement("filedata64", builder.toString()));
}
attachments.appendChild(attachment);
logger.finer(String.format("Writing attachment file %s...",p.getFileName()));
attachmentFolder.mkdir(); // Does nothing if it already exists
try {
p.saveFile(new File(attachmentFolder, p.getFileName()));
} catch (IOException ex) {
logger.log(Level.WARNING, "Error decoding attachment; file may be corrupt!", ex);
}
attachmentFileNames.add(attachmentFolder.getCanonicalPath() + "/" + p.getFileName());
validAttachments = true;
logger.finer("Done writing attachment.");
}
root.appendChild(attachments);
} else {
logger.log(Level.WARNING, "Encountered single-part non-text message.");
}
} catch (MessagingException ex) {
logger.log(Level.WARNING, "Error processing message.", ex);
} catch (IOException ex) {
logger.log(Level.WARNING, "I/O error processing message.", ex);
}
if (longXML && validAttachments) {
longxmlFileName = outputRoot + cleanDateString + "-full.xml";
domToXML(longxmlFileName);
}
// Remove the filedata64 sections for the shorter version of xml output
NodeList mimeChunks = dom.getElementsByTagName("filedata64");
for (int i=0; i < mimeChunks.getLength(); i++) {
Node chunk = mimeChunks.item(i);
chunk.getParentNode().removeChild(chunk);
}
xmlFileName = outputRoot + cleanDateString + ".xml";
domToXML(xmlFileName);
info = new MessageInfo(xmlFileName, (longXML & validAttachments), longxmlFileName, attachmentFileNames.toArray(new String[0]));
}
//Convenience method because we do this a lot.
private Element textElement(String name, String text) {
Element e = dom.createElement(name);
Text t = dom.createTextNode(text);
e.appendChild(t);
return e;
}
private void domToXML(String outFileName) {
logger.finer(String.format("Writing xml file %s...",outFileName));
FileWriter fw=null;
try {
// Create the XML transformer
TransformerFactory transfac = TransformerFactory.newInstance();
Transformer trans = transfac.newTransformer();
trans.setOutputProperty(OutputKeys.INDENT, "yes");
// Create output file.
File outfile = new File(outFileName);
fw = new FileWriter(outfile);
// Create source and result objects for XML transformation
Result result = new StreamResult(fw);
Source source = new DOMSource(dom);
// Transform DOM to XML text
trans.transform(source, result);
// Streams closed in finally block
} catch (RuntimeException ex) {
throw(ex);
} catch (Exception ex) {
logger.log(Level.SEVERE, "Error writing XML file.", ex);
} finally {
// ensure output gets closed.
try {
if (fw != null) {
fw.flush();
fw.close();
}
} catch (IOException ex) {
}
}
logger.finer("Done writing xml.");
}
// public static void setOutputRoot(String outputRoot) {
// MailConverter.outputRoot = outputRoot;
// }
/** Return an ISO8601-compliant string representing the received date of this message. */
public String getDateString() {
return cleanDateString;
}
// This is a little sloppy because there's no guarantee info has been initialized.
// Just don't call this before writeFiles, okay?
public MessageInfo getMessageInfo() {
return info;
}
}