Package org.jasen.core.parsers

Source Code of org.jasen.core.parsers.GenericReceivedHeaderParser

/*
* @(#)GenericReceivedHeaderParser.java  17/11/2004
*
* Copyright (c) 2004, 2005  jASEN.org
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
*   1. Redistributions of source code must retain the above copyright notice,
*      this list of conditions and the following disclaimer.
*
*   2. Redistributions in binary form must reproduce the above copyright
*      notice, this list of conditions and the following disclaimer in
*      the documentation and/or other materials provided with the distribution.
*
*   3. The names of the authors may not be used to endorse or promote products
*      derived from this software without specific prior written permission.
*
*   4. Any modification or additions to the software must be contributed back
*      to the project.
*
*   5. Any investigation or reverse engineering of source code or binary to
*      enable emails to bypass the filters, and hence inflict spam and or viruses
*      onto users who use or do not use jASEN could subject the perpetrator to
*      criminal and or civil liability.
*
* THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESSED OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JASEN.ORG,
* OR ANY CONTRIBUTORS TO THIS SOFTWARE BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
* OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
package org.jasen.core.parsers;

import java.net.InetAddress;
import java.net.UnknownHostException;
import java.text.ParseException;
import java.util.Date;

import org.jasen.error.JasenParseException;
import org.jasen.error.ParseErrorType;
import org.jasen.interfaces.InetAddressResolver;
import org.jasen.interfaces.ReceivedHeaderParserData;
import org.jasen.util.DNSUtils;

/**
* <P>
*   This is a generic header parser which <em>should</em> parse most &quot;Received&quot; MIME headers.
* </P>
* @author Jason Polites
*/
public class GenericReceivedHeaderParser extends AbstractReceivedHeaderParser {

    public static final String SPLIT_REGEX = "[\\{\\[\\]\\}\\)\\(\\s]+";

    /**
     *
     */
    public GenericReceivedHeaderParser() {
        super();
    }

    /*
     * (non-Javadoc)
     * @see org.jasen.interfaces.ReceivedHeaderParser#parse(java.lang.String, org.jasen.interfaces.InetAddressResolver)
     */
    public ReceivedHeaderParserData parse(String header,
            InetAddressResolver resolver) throws JasenParseException {

        ReceivedHeaderParserDataImpl data = new ReceivedHeaderParserDataImpl();

        InetAddress senderHost = null;

        // Enforce lower case
        header = header.toLowerCase();

        // Remove folds
        header = header.replaceAll("[\n\r]+", "");

        // remove the end date part if it exists
        int colonIndex = header.indexOf(';');
        if (colonIndex > -1 && colonIndex < header.length() - 1) {
            try {
                // Use the last token
                data.setDateReceived(data.parseDate(header.substring(
                        colonIndex + 1, header.length()).trim()));
                header = header.substring(0, colonIndex);
            } catch (ParseException e) {
                // Default to today
                data.setDateReceived(new Date());
            }
        }

        // Split the header
        String[] split = split(header);
        int index = 0;

        /****************NEW CODE*************************/
        String token = null;

        // Capture tokens until from...
        if (split.length > 0) {

            do {
                token = split[index];
                index++;
            } while (!token.equals("from") && index < split.length);
           
            // If we weren't the first token, throw an exception
            if(index > 1) {
                throw new JasenParseException("'from' token out of place",ParseErrorType.PARSE_ERROR);
            }

            // We should now be at the sender host
            if (index < split.length && token.equals("from")) {
                boolean hostSet = false;
                boolean ipSet = false;

                do {
                    token = split[index];

                    // See if we are a hostname
                    if (DNSUtils.isDomain(token) && !hostSet) {
                        // We have the sender host
                        data.setSenderHostName(token);
                        hostSet = true;
                    } else if (DNSUtils.isIPAddress(token)) {
                        // If this is the first ip.. assume its the host
                        if (!hostSet) {
                            data.setSenderHostName(token);
                        } else if(!ipSet){
                            // Assume its the real ip
                            data.setSenderIPAddress(token);
                            ipSet = true;
                        }
                    }

                    index++;
                } while (!token.equals("by") && index < split.length);

                // Now, look for the recipient data
                if (index < split.length && token.equals("by")) {

                    // Assmume that the next token is the receiver
                    token = split[index];
                    data.setReceiverHostName(token);
                }

            } else {
                throw new JasenParseException("Couldn't locate 'from' token",
                        ParseErrorType.PARSE_ERROR);
            }
        } else {
            throw new JasenParseException(
                    "Couldn't parse header.  No tokens found",
                    ParseErrorType.PARSE_ERROR);
        }
       
        if(data.getSenderIPAddress() == null && data.getSenderHostName() != null) {
            try {
                // Use the host name as the ip
                InetAddress host = resolver.getByName(data.getSenderHostName());
               
                if(host != null) {
                    data.setSenderIPAddress(host.getHostAddress());
                }
                else
                {
                    // We couldn't resolve the host, just use the name
                    data.setSenderIPAddress(data.getSenderHostName());
                }
               
            } catch (UnknownHostException e) {
                // We couldn't resolve the host, just use the name
                data.setSenderIPAddress(data.getSenderHostName());
            }
        }

        /********************* END NEW CODE ****************/
        /*
        

         // if the first token is not from, abort
         if (split.length <= 4 || !split[index].equalsIgnoreCase("from")) {
         throw new JasenParseException("Couldn't locate 'from' token",
         ParseErrorType.PARSE_ERROR);
         }

         // The next token should be the sender host
         data.setSenderHostName(split[++index]);

         // The next will either be an IP address, or the "by" token
         if (split[++index].equalsIgnoreCase("by")) {
         // Get the receiver host
         data.setReceiverHostName(split[++index]);

         try {
         // Resolve the IP
         senderHost = resolver.getByName(data.getSenderHostName());
         if (senderHost != null) {
         data.setSenderIPAddress(senderHost.getHostAddress());
         }

         } catch (UnknownHostException e) {
         // There is no host for this sender..
         data.setSenderIPAddress(data.getSenderHostName());
         }
         } else {
         // Try to get the IP address...
         if (DNSUtils.isIPAddress(split[index])) {
         data.setSenderIPAddress(split[index]);
         } else {
         try {
         // Resolve the IP
         senderHost = resolver.getByName(data.getSenderHostName());
         if (senderHost != null) {
         data.setSenderIPAddress(senderHost.getHostAddress());
         }
         } catch (UnknownHostException e) {
         // There is no host for this sender..
         data.setSenderIPAddress(data.getSenderHostName());
         }
         }

         if (split[++index].equalsIgnoreCase("by")) {
         // Get the receiver host
         data.setReceiverHostName(split[++index]);
         }
         }*/
        return data;
    }

    /**
     * A manual char-by-char tokenizer for Received headers
     * <p>
     * In an ideal world, this would be done by a nice, maintainable
     *  regular expression...
     * </p>
     * <p>
     * Unfortunately the idiosyncrasies and deviations contained in
     *  many received headers makes this very difficult...
     * </p>
     * <p>
     * Performance wise, this shouldn't be all that much worse than a regex...
     * </p>
     * @param header
     * @return
     */
    protected String[] split(String header) {

        String[] tokens = header.split(SPLIT_REGEX);

        return tokens;
        /*
         char[] chars = header.toCharArray();

         Vector tokens = new Vector(10);

         StringBuffer buffer = new StringBuffer();

         boolean inToken = false;
         int parenthesisCount = 0;

         char c;

         for (int i = 0; i < chars.length; i++) {
         c = chars[i];

         if (whitespace(c) || newline(c)) {
         if (inToken && parenthesisCount <= 0) {
         tokens.add(buffer.toString());
         buffer.delete(0, buffer.length());
         inToken = false;
         } else if (parenthesisCount > 0) {
         buffer.append(c);
         }
         } else if (openParenthesis(c)) {
         if (inToken && parenthesisCount <= 0) {
         tokens.add(buffer.toString());
         buffer.delete(0, buffer.length());
         inToken = false;
         }

         parenthesisCount++;
         } else if (closeParenthesis(c)) {
         parenthesisCount--;
         } else {
         inToken = true;
         buffer.append(c);
         }
         }

         return (String[]) tokens.toArray(new String[tokens.size()]);*/
    }

    private boolean whitespace(char c) {
        return (c == ' ' || c == '\t');
    }

    private boolean newline(char c) {
        return (c == '\n' || c == '\r');
    }

    private boolean openParenthesis(char c) {
        return (c == '(' || c == '[' || c == '{');
    }

    private boolean closeParenthesis(char c) {
        return (c == ')' || c == ']' || c == '}');
    }
}
TOP

Related Classes of org.jasen.core.parsers.GenericReceivedHeaderParser

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.