package com.icentris.util;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.StringTokenizer;
import java.util.Vector;
import javax.mail.internet.AddressException;
import javax.mail.internet.InternetAddress;
public class BulkMail {
/**
* Logger for this class
*/
private static final Log logger = LogFactory.getLog(BulkMail.class);
private Reader reader;
private int parseSize = 50000;
private long contentLength = -1;
char[] cbuf = new char[parseSize];
private long lastPos = 0;
private String leftovers = "";
private Vector addressExceptions = new Vector();
private boolean allDone = false;
private String[] extraDelimiters;
private boolean alwaysTokenize;
/**
* Read, parse, scrub, and return the next batch of InternetAddress'es. The differences between this and normal InternetAddress.parse are:
* <ul>
* <li>This takes a little bit of the stream at a time, so you can start sending emails as you stream rather than having to get the whole recipient list
* into memory, then send.
* <li>InternetAddress.parse throws error (therefore returns nothing) when there is any parse error, this figures out how to pull out the bad address so you
* can get the good ones.
* </ul>
*
* <p>
* Will retry every 100 milliseconds up to a total of 10 seconds if I haven't yet received {@link #getParseSize parseSize} bytes from the Reader.
*/
public InternetAddress[] parseNextBatch() throws IOException, AddressException {
if (allDone == true)
return null;
if (reader == null) {
throw new IllegalStateException("You haven't called setReader for me to pull stuff from yet!");
}
StringBuffer sb = new StringBuffer();
int retries = 100;
while (retries > 0) {
int totalCharsRead = 0;
int offset = 0;
int length = cbuf.length; // should be same as parseSize
int charsRead = -1;
while (parseSize > sb.length() && contentLength > sb.length() && (charsRead = reader.read(cbuf, offset, length)) != -1) {
// if we didn't get as big a chunk as we expected
if (charsRead < length && charsRead < (contentLength - lastPos)) {
// I don't think this is likely to happen
if (logger.isDebugEnabled()) {
logger.debug("parseNextBatch() - nn******************nnnnnDEBUG: [BulkMail] wow! it happend!"); //$NON-NLS-1$
}
if (logger.isDebugEnabled()) {
logger.debug("parseNextBatch() - DEBUG: [BulkMail] offset=[" + offset + "]"); //$NON-NLS-1$ //$NON-NLS-2$
}
if (logger.isDebugEnabled()) {
logger.debug("parseNextBatch() - DEBUG: [BulkMail] length=[" + length + "]"); //$NON-NLS-1$ //$NON-NLS-2$
}
if (logger.isDebugEnabled()) {
logger.debug("parseNextBatch() - DEBUG: [BulkMail] charsRead=[" + charsRead + "]"); //$NON-NLS-1$ //$NON-NLS-2$
}
if (logger.isDebugEnabled()) {
logger.debug("parseNextBatch() - DEBUG: [BulkMail] parseSize=[" + parseSize + "]"); //$NON-NLS-1$ //$NON-NLS-2$
}
if (logger.isDebugEnabled()) {
logger.debug("parseNextBatch() - DEBUG: [BulkMail] sb.length()=[" + sb.length() + "]"); //$NON-NLS-1$ //$NON-NLS-2$
}
if (logger.isDebugEnabled()) {
logger.debug("parseNextBatch() - DEBUG: [BulkMail] contentLength=[" + contentLength + "]"); //$NON-NLS-1$ //$NON-NLS-2$
}
if (logger.isDebugEnabled()) {
logger.debug("parseNextBatch() - nnnnn******************nn"); //$NON-NLS-1$
}
offset = charsRead;
length = length - charsRead;
}
int charsToUse = charsRead;
// if we've gotten more than we wanted
if (contentLength <= (lastPos + sb.length() + charsRead)) {
// only use the part we wanted
charsToUse = (int) (contentLength - (lastPos + (long) sb.length()));
allDone = true;
}
sb.append(cbuf, 0, charsToUse);
}
// if we still expected to find more content
if (parseSize > sb.length() && contentLength > (lastPos + sb.length())) {
// sleep for 100 milliseconds, then try again
try {
Thread.sleep(100);
}
catch (Exception e) {
logger.error("parseNextBatch()", e); //$NON-NLS-1$
}
retries--;
}
else {
break;
}
}
if (sb.length() == 0) {
allDone = true;
return null;
}
lastPos += sb.length();
String stuffToParse = leftovers + sb.toString();
if (allDone == false) {
int lastCommaPos = stuffToParse.lastIndexOf(",");
if (lastCommaPos > -1) {
// keep track of possibly truncated addresses for next pass
leftovers = stuffToParse.substring(lastCommaPos + 1);
stuffToParse = stuffToParse.substring(0, lastCommaPos);
}
else {
leftovers = "";
}
}
else {
leftovers = "";
}
// replace all extra delimiters with commas before doing the standard InternetAddress.parse()
if (extraDelimiters != null) {
for (int i = 0; i < extraDelimiters.length; i++) {
stuffToParse = StringUtil.replace(stuffToParse, extraDelimiters[i], ",");
}
}
ArrayList inetAddresses = new ArrayList();
if (alwaysTokenize() == true) {
doTokenizeParse(stuffToParse, inetAddresses, addressExceptions);
}
boolean tryAgain = true;
while (tryAgain == true && alwaysTokenize() == false) {
tryAgain = false;
try {
inetAddresses.addAll(Arrays.asList(InternetAddress.parse(stuffToParse)));
}
catch (AddressException e) {
String problemString = e.getRef();
if (problemString == null) {
throw e;
}
int refPos;
// let's tokenize on commas if InternetAddress.parse() is totally hosing us
// to the point that this error is bigger than half the current stringToParse
if (problemString.length() > (stuffToParse.length() / 2)) {
doTokenizeParse(stuffToParse, inetAddresses, addressExceptions);
// this is a tiny error, let's try to recover from it before resorting to our own parsing
}
else if ((refPos = stuffToParse.indexOf(problemString)) > -1) {
stuffToParse = tryToRemoveBadAddress(stuffToParse, problemString, refPos, e);
tryAgain = true;
addressExceptions.add(e);
// there's not enough information to recover from this, let's give up and throw an error
}
else {
throw e;
}
}
}
ArrayList copy = (ArrayList) inetAddresses.clone();
Iterator it = copy.iterator();
int ind = -1;
while (it.hasNext()) {
InternetAddress ele = (InternetAddress) it.next();
if (ele.getAddress() != null) {
if ((ind = ele.getAddress().indexOf('@')) < 0 || (ind = ele.getAddress().indexOf('.')) < 0
|| (ind = ele.getAddress().indexOf('.')) == (ele.getAddress().length() - 1)) {
if ("".equals(ele.getAddress())) {
addressExceptions.add(new AddressException("Email address is empty.Please verify it", ele.getAddress(), ind));
} else {
addressExceptions.add(new AddressException("Missing symbol @ or incorrect domain name or invalid email address", ele.getAddress(), ind));
}
inetAddresses.remove(ele);
}
}
}
return (InternetAddress[]) inetAddresses.toArray(new InternetAddress[0]);
}
private static void doTokenizeParse(String stuffToParse, ArrayList inetAddresses, Vector addressExceptions) {
StringTokenizer tokenizer = new StringTokenizer(stuffToParse, ",");
while (tokenizer.hasMoreTokens()) {
String addr = tokenizer.nextToken();
try {
inetAddresses.addAll(Arrays.asList(InternetAddress.parse(addr)));
}
catch (AddressException e) {
addressExceptions.add(e);
}
}
}
private static String tryToRemoveBadAddress(String stuffToParse, String problemString, int refPos, AddressException e) {
int problemPos = refPos;
int problemStart = 0;
int problemEnd = refPos + problemString.length();
if (e.getPos() > -1) {
problemPos += e.getPos();
}
int prevComma = stuffToParse.lastIndexOf(",", problemPos);
int nextComma = stuffToParse.indexOf(",", problemPos);
if (prevComma > -1) {
problemStart = prevComma;
}
else {
problemStart = 0;
}
if (nextComma > -1) {
problemEnd = nextComma;
return stuffToParse.substring(0, problemStart) + stuffToParse.substring(problemEnd);
}
else {
return stuffToParse.substring(0, problemStart);
}
}
/**
* All the AddressExceptions generated since last call to getAddressExceptions().
*
* @see AddressException for why I couldn't just use AddressException
*/
public AddressException[] getAddressExceptions() {
synchronized (addressExceptions) {
AddressException[] exceptions = (AddressException[]) addressExceptions.toArray(new AddressException[0]);
addressExceptions.clear();
return exceptions;
}
}
public Reader getReader() {
return reader;
}
/**
* Set the reader I'm supposed to pull your recipient list from.
*
* <p>
* Usually, the call to this will look something like:
*
* <pre>
* setReader(new FileReader(file), file.length());
* </pre>
*
* @param Reader
* reader is the reader I'm supposed to pull your recipient list from
* @param long
* contentLength is the length (in bytes) of content I should expect
*
* @throws IllegalArgumentException
* if reader is null or contentLength <= 0
*/
public void setReader(Reader reader, long contentLength) {
if (reader == null) {
throw new IllegalArgumentException("Don't give me a null reader!");
}
if (contentLength <= 0) {
throw new IllegalArgumentException("Give me contentLength > 0, not [" + contentLength + "]!");
}
allDone = false;
this.contentLength = contentLength;
this.reader = reader;
}
public int getParseSize() {
return parseSize;
}
/** Defaults to 50000 bytes to attempt to get about 1000 addresses per parse. */
public void setParseSize(int parseSize) {
this.parseSize = parseSize;
cbuf = new char[parseSize];
}
public String[] getExtraDelimiters() {
return extraDelimiters;
}
/**
* Designed to help if you want to support non-standard delimiters (e.g. semicolon (;), newline (\n), or cr-lf (\r\n)). This will first replace all the
* dilimiters you want converted to commas, then let the standard InternetAddress.parse() do the parsing.
*/
public void setExtraDelimiters(String[] extraDelimiters) {
this.extraDelimiters = extraDelimiters;
}
public boolean alwaysTokenize() {
return alwaysTokenize;
}
public void setAlwaysTokenize(boolean alwaysTokenize) {
this.alwaysTokenize = alwaysTokenize;
}
}