/*
* @(#)GenericReceivedHeaderParser.java 17/11/2004
*
* Copyright (c) 2004, 2005 jASEN.org
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the distribution.
*
* 3. The names of the authors may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* 4. Any modification or additions to the software must be contributed back
* to the project.
*
* 5. Any investigation or reverse engineering of source code or binary to
* enable emails to bypass the filters, and hence inflict spam and or viruses
* onto users who use or do not use jASEN could subject the perpetrator to
* criminal and or civil liability.
*
* THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESSED OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JASEN.ORG,
* OR ANY CONTRIBUTORS TO THIS SOFTWARE BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
* OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
package org.jasen.core.parsers;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.text.ParseException;
import java.util.Date;
import org.jasen.error.JasenParseException;
import org.jasen.error.ParseErrorType;
import org.jasen.interfaces.InetAddressResolver;
import org.jasen.interfaces.ReceivedHeaderParserData;
import org.jasen.util.DNSUtils;
/**
* <P>
* This is a generic header parser which <em>should</em> parse most "Received" MIME headers.
* </P>
* @author Jason Polites
*/
public class GenericReceivedHeaderParser extends AbstractReceivedHeaderParser {
public static final String SPLIT_REGEX = "[\\{\\[\\]\\}\\)\\(\\s]+";
/**
*
*/
public GenericReceivedHeaderParser() {
super();
}
/*
* (non-Javadoc)
* @see org.jasen.interfaces.ReceivedHeaderParser#parse(java.lang.String, org.jasen.interfaces.InetAddressResolver)
*/
public ReceivedHeaderParserData parse(String header,
InetAddressResolver resolver) throws JasenParseException {
ReceivedHeaderParserDataImpl data = new ReceivedHeaderParserDataImpl();
InetAddress senderHost = null;
// Enforce lower case
header = header.toLowerCase();
// Remove folds
header = header.replaceAll("[\n\r]+", "");
// remove the end date part if it exists
int colonIndex = header.indexOf(';');
if (colonIndex > -1 && colonIndex < header.length() - 1) {
try {
// Use the last token
data.setDateReceived(data.parseDate(header.substring(
colonIndex + 1, header.length()).trim()));
header = header.substring(0, colonIndex);
} catch (ParseException e) {
// Default to today
data.setDateReceived(new Date());
}
}
// Split the header
String[] split = split(header);
int index = 0;
/****************NEW CODE*************************/
String token = null;
// Capture tokens until from...
if (split.length > 0) {
do {
token = split[index];
index++;
} while (!token.equals("from") && index < split.length);
// If we weren't the first token, throw an exception
if(index > 1) {
throw new JasenParseException("'from' token out of place",ParseErrorType.PARSE_ERROR);
}
// We should now be at the sender host
if (index < split.length && token.equals("from")) {
boolean hostSet = false;
boolean ipSet = false;
do {
token = split[index];
// See if we are a hostname
if (DNSUtils.isDomain(token) && !hostSet) {
// We have the sender host
data.setSenderHostName(token);
hostSet = true;
} else if (DNSUtils.isIPAddress(token)) {
// If this is the first ip.. assume its the host
if (!hostSet) {
data.setSenderHostName(token);
} else if(!ipSet){
// Assume its the real ip
data.setSenderIPAddress(token);
ipSet = true;
}
}
index++;
} while (!token.equals("by") && index < split.length);
// Now, look for the recipient data
if (index < split.length && token.equals("by")) {
// Assmume that the next token is the receiver
token = split[index];
data.setReceiverHostName(token);
}
} else {
throw new JasenParseException("Couldn't locate 'from' token",
ParseErrorType.PARSE_ERROR);
}
} else {
throw new JasenParseException(
"Couldn't parse header. No tokens found",
ParseErrorType.PARSE_ERROR);
}
if(data.getSenderIPAddress() == null && data.getSenderHostName() != null) {
try {
// Use the host name as the ip
InetAddress host = resolver.getByName(data.getSenderHostName());
if(host != null) {
data.setSenderIPAddress(host.getHostAddress());
}
else
{
// We couldn't resolve the host, just use the name
data.setSenderIPAddress(data.getSenderHostName());
}
} catch (UnknownHostException e) {
// We couldn't resolve the host, just use the name
data.setSenderIPAddress(data.getSenderHostName());
}
}
/********************* END NEW CODE ****************/
/*
// if the first token is not from, abort
if (split.length <= 4 || !split[index].equalsIgnoreCase("from")) {
throw new JasenParseException("Couldn't locate 'from' token",
ParseErrorType.PARSE_ERROR);
}
// The next token should be the sender host
data.setSenderHostName(split[++index]);
// The next will either be an IP address, or the "by" token
if (split[++index].equalsIgnoreCase("by")) {
// Get the receiver host
data.setReceiverHostName(split[++index]);
try {
// Resolve the IP
senderHost = resolver.getByName(data.getSenderHostName());
if (senderHost != null) {
data.setSenderIPAddress(senderHost.getHostAddress());
}
} catch (UnknownHostException e) {
// There is no host for this sender..
data.setSenderIPAddress(data.getSenderHostName());
}
} else {
// Try to get the IP address...
if (DNSUtils.isIPAddress(split[index])) {
data.setSenderIPAddress(split[index]);
} else {
try {
// Resolve the IP
senderHost = resolver.getByName(data.getSenderHostName());
if (senderHost != null) {
data.setSenderIPAddress(senderHost.getHostAddress());
}
} catch (UnknownHostException e) {
// There is no host for this sender..
data.setSenderIPAddress(data.getSenderHostName());
}
}
if (split[++index].equalsIgnoreCase("by")) {
// Get the receiver host
data.setReceiverHostName(split[++index]);
}
}*/
return data;
}
/**
* A manual char-by-char tokenizer for Received headers
* <p>
* In an ideal world, this would be done by a nice, maintainable
* regular expression...
* </p>
* <p>
* Unfortunately the idiosyncrasies and deviations contained in
* many received headers makes this very difficult...
* </p>
* <p>
* Performance wise, this shouldn't be all that much worse than a regex...
* </p>
* @param header
* @return
*/
protected String[] split(String header) {
String[] tokens = header.split(SPLIT_REGEX);
return tokens;
/*
char[] chars = header.toCharArray();
Vector tokens = new Vector(10);
StringBuffer buffer = new StringBuffer();
boolean inToken = false;
int parenthesisCount = 0;
char c;
for (int i = 0; i < chars.length; i++) {
c = chars[i];
if (whitespace(c) || newline(c)) {
if (inToken && parenthesisCount <= 0) {
tokens.add(buffer.toString());
buffer.delete(0, buffer.length());
inToken = false;
} else if (parenthesisCount > 0) {
buffer.append(c);
}
} else if (openParenthesis(c)) {
if (inToken && parenthesisCount <= 0) {
tokens.add(buffer.toString());
buffer.delete(0, buffer.length());
inToken = false;
}
parenthesisCount++;
} else if (closeParenthesis(c)) {
parenthesisCount--;
} else {
inToken = true;
buffer.append(c);
}
}
return (String[]) tokens.toArray(new String[tokens.size()]);*/
}
private boolean whitespace(char c) {
return (c == ' ' || c == '\t');
}
private boolean newline(char c) {
return (c == '\n' || c == '\r');
}
private boolean openParenthesis(char c) {
return (c == '(' || c == '[' || c == '{');
}
private boolean closeParenthesis(char c) {
return (c == ')' || c == ']' || c == '}');
}
}