Source Code of org.jasen.core.parsers.GenericReceivedHeaderParser

/*
 * @(#)GenericReceivedHeaderParser.java  17/11/2004
 *
 * Copyright (c) 2004, 2005  jASEN.org
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *   1. Redistributions of source code must retain the above copyright notice,
 *      this list of conditions and the following disclaimer.
 *
 *   2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in
 *      the documentation and/or other materials provided with the distribution.
 *
 *   3. The names of the authors may not be used to endorse or promote products
 *      derived from this software without specific prior written permission.
 *
 *   4. Any modification or additions to the software must be contributed back
 *      to the project.
 *
 *   5. Any investigation or reverse engineering of source code or binary to
 *      enable emails to bypass the filters, and hence inflict spam and or viruses
 *      onto users who use or do not use jASEN could subject the perpetrator to
 *      criminal and or civil liability.
 *
 * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESSED OR IMPLIED WARRANTIES,
 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JASEN.ORG,
 * OR ANY CONTRIBUTORS TO THIS SOFTWARE BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 */
package org.jasen.core.parsers;


import java.net.InetAddress;
import java.net.UnknownHostException;
import java.text.ParseException;
import java.util.Date;


import org.jasen.error.JasenParseException;
import org.jasen.error.ParseErrorType;
import org.jasen.interfaces.InetAddressResolver;
import org.jasen.interfaces.ReceivedHeaderParserData;
import org.jasen.util.DNSUtils;


/**
 * <P>
 *   This is a generic header parser which <em>should</em> parse most &quot;Received&quot; MIME headers.
 * </P>
 * @author Jason Polites
 */
public class GenericReceivedHeaderParser extends AbstractReceivedHeaderParser {


    public static final String SPLIT_REGEX = "[\\{\\[\\]\\}\\)\\(\\s]+";


    /**
     *
     */
    public GenericReceivedHeaderParser() {
        super();
    }


    /*
     * (non-Javadoc)
     * @see org.jasen.interfaces.ReceivedHeaderParser#parse(java.lang.String, org.jasen.interfaces.InetAddressResolver)
     */
    public ReceivedHeaderParserData parse(String header,
            InetAddressResolver resolver) throws JasenParseException {


        ReceivedHeaderParserDataImpl data = new ReceivedHeaderParserDataImpl();


        InetAddress senderHost = null;


        // Enforce lower case
        header = header.toLowerCase();


        // Remove folds
        header = header.replaceAll("[\n\r]+", "");


        // remove the end date part if it exists
        int colonIndex = header.indexOf(';');
        if (colonIndex > -1 && colonIndex < header.length() - 1) {
            try {
                // Use the last token
                data.setDateReceived(data.parseDate(header.substring(
                        colonIndex + 1, header.length()).trim()));
                header = header.substring(0, colonIndex);
            } catch (ParseException e) {
                // Default to today
                data.setDateReceived(new Date());
            }
        }


        // Split the header
        String[] split = split(header);
        int index = 0;


        /****************NEW CODE*************************/
        String token = null;


        // Capture tokens until from...
        if (split.length > 0) {


            do {
                token = split[index];
                index++;
            } while (!token.equals("from") && index < split.length);
            
            // If we weren't the first token, throw an exception
            if(index > 1) {
                throw new JasenParseException("'from' token out of place",ParseErrorType.PARSE_ERROR);
            }


            // We should now be at the sender host
            if (index < split.length && token.equals("from")) {
                boolean hostSet = false;
                boolean ipSet = false;


                do {
                    token = split[index];


                    // See if we are a hostname
                    if (DNSUtils.isDomain(token) && !hostSet) {
                        // We have the sender host
                        data.setSenderHostName(token);
                        hostSet = true;
                    } else if (DNSUtils.isIPAddress(token)) {
                        // If this is the first ip.. assume its the host
                        if (!hostSet) {
                            data.setSenderHostName(token);
                        } else if(!ipSet){
                            // Assume its the real ip
                            data.setSenderIPAddress(token);
                            ipSet = true;
                        }
                    }


                    index++;
                } while (!token.equals("by") && index < split.length);


                // Now, look for the recipient data
                if (index < split.length && token.equals("by")) {


                    // Assmume that the next token is the receiver
                    token = split[index];
                    data.setReceiverHostName(token);
                }


            } else {
                throw new JasenParseException("Couldn't locate 'from' token",
                        ParseErrorType.PARSE_ERROR);
            }
        } else {
            throw new JasenParseException(
                    "Couldn't parse header.  No tokens found",
                    ParseErrorType.PARSE_ERROR);
        }
        
        if(data.getSenderIPAddress() == null && data.getSenderHostName() != null) {
            try {
                // Use the host name as the ip
                InetAddress host = resolver.getByName(data.getSenderHostName());
                
                if(host != null) {
                    data.setSenderIPAddress(host.getHostAddress());
                }
                else
                {
                    // We couldn't resolve the host, just use the name
                    data.setSenderIPAddress(data.getSenderHostName());
                }
                
            } catch (UnknownHostException e) {
                // We couldn't resolve the host, just use the name
                data.setSenderIPAddress(data.getSenderHostName());
            }
        }


        /********************* END NEW CODE ****************/
        /*
         


         // if the first token is not from, abort
         if (split.length <= 4 || !split[index].equalsIgnoreCase("from")) {
         throw new JasenParseException("Couldn't locate 'from' token",
         ParseErrorType.PARSE_ERROR);
         }


         // The next token should be the sender host
         data.setSenderHostName(split[++index]);


         // The next will either be an IP address, or the "by" token
         if (split[++index].equalsIgnoreCase("by")) {
         // Get the receiver host
         data.setReceiverHostName(split[++index]);


         try {
         // Resolve the IP
         senderHost = resolver.getByName(data.getSenderHostName());
         if (senderHost != null) {
         data.setSenderIPAddress(senderHost.getHostAddress());
         }


         } catch (UnknownHostException e) {
         // There is no host for this sender..
         data.setSenderIPAddress(data.getSenderHostName());
         }
         } else {
         // Try to get the IP address...
         if (DNSUtils.isIPAddress(split[index])) {
         data.setSenderIPAddress(split[index]);
         } else {
         try {
         // Resolve the IP
         senderHost = resolver.getByName(data.getSenderHostName());
         if (senderHost != null) {
         data.setSenderIPAddress(senderHost.getHostAddress());
         }
         } catch (UnknownHostException e) {
         // There is no host for this sender..
         data.setSenderIPAddress(data.getSenderHostName());
         }
         }


         if (split[++index].equalsIgnoreCase("by")) {
         // Get the receiver host
         data.setReceiverHostName(split[++index]);
         }
         }*/
        return data;
    }


    /**
     * A manual char-by-char tokenizer for Received headers
     * <p>
     * In an ideal world, this would be done by a nice, maintainable
     *  regular expression...
     * </p>
     * <p>
     * Unfortunately the idiosyncrasies and deviations contained in
     *  many received headers makes this very difficult...
     * </p>
     * <p>
     * Performance wise, this shouldn't be all that much worse than a regex...
     * </p>
     * @param header
     * @return
     */
    protected String[] split(String header) {


        String[] tokens = header.split(SPLIT_REGEX);


        return tokens;
        /*
         char[] chars = header.toCharArray();


         Vector tokens = new Vector(10);


         StringBuffer buffer = new StringBuffer();


         boolean inToken = false;
         int parenthesisCount = 0;


         char c;


         for (int i = 0; i < chars.length; i++) {
         c = chars[i];


         if (whitespace(c) || newline(c)) {
         if (inToken && parenthesisCount <= 0) {
         tokens.add(buffer.toString());
         buffer.delete(0, buffer.length());
         inToken = false;
         } else if (parenthesisCount > 0) {
         buffer.append(c);
         }
         } else if (openParenthesis(c)) {
         if (inToken && parenthesisCount <= 0) {
         tokens.add(buffer.toString());
         buffer.delete(0, buffer.length());
         inToken = false;
         }


         parenthesisCount++;
         } else if (closeParenthesis(c)) {
         parenthesisCount--;
         } else {
         inToken = true;
         buffer.append(c);
         }
         }


         return (String[]) tokens.toArray(new String[tokens.size()]);*/
    }


    private boolean whitespace(char c) {
        return (c == ' ' || c == '\t');
    }


    private boolean newline(char c) {
        return (c == '\n' || c == '\r');
    }


    private boolean openParenthesis(char c) {
        return (c == '(' || c == '[' || c == '{');
    }


    private boolean closeParenthesis(char c) {
        return (c == ')' || c == ']' || c == '}');
    }
}
Source Code of org.jasen.core.parsers.GenericReceivedHeaderParser

Related Classes of org.jasen.core.parsers.GenericReceivedHeaderParser