Package org.jasen.core

Source Code of org.jasen.core.StandardParserData

/*
* @(#)StandardParserData.java  31/10/2004
*
* Copyright (c) 2004, 2005  jASEN.org
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
*   1. Redistributions of source code must retain the above copyright notice,
*      this list of conditions and the following disclaimer.
*
*   2. Redistributions in binary form must reproduce the above copyright
*      notice, this list of conditions and the following disclaimer in
*      the documentation and/or other materials provided with the distribution.
*
*   3. The names of the authors may not be used to endorse or promote products
*      derived from this software without specific prior written permission.
*
*   4. Any modification or additions to the software must be contributed back
*      to the project.
*
*   5. Any investigation or reverse engineering of source code or binary to
*      enable emails to bypass the filters, and hence inflict spam and or viruses
*      onto users who use or do not use jASEN could subject the perpetrator to
*      criminal and or civil liability.
*
* THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESSED OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JASEN.ORG,
* OR ANY CONTRIBUTORS TO THIS SOFTWARE BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
* OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
package org.jasen.core;

import java.util.List;

import org.jasen.core.token.CountTokenErrorReport;
import org.jasen.core.token.ObfuscatedCharacterTokenErrorRecorder;
import org.jasen.interfaces.ParserData;
import org.jasen.interfaces.TokenErrorRecorder;
import org.jasen.interfaces.TokenErrorReport;

/**
* <P>
*   Holds the information obtained from parsing and tokenizing the message.
* </P>
* @author Jason Polites
*/
public class StandardParserData implements ParserData
{
    private String htmlAsText;
    private String textParsed;
    private String[] messageTokens;

  private int concealedHtmlCount = 0;
  private int srcCgiCount = 0;
  private int imageCount = 0;
  private int srcPortCount = 0;
  private int falseAnchorCount = 0;

  private TokenErrorRecorder recorder;

  private List ports;


    /**
     *
     */
    public StandardParserData() {
        super ();
    }

    /*
     * (non-Javadoc)
     * @see org.jasen.interfaces.ParserData#getMessageTokens()
     */
    public String[] getMessageTokens() {
        return messageTokens;
    }

    /**
     * Sets the message tokens obtained from tokenization.
     * @param htmlTokens
     * @see org.jasen.core.token.EmailTokenizer
     */
    public void setMessageTokens(String[] htmlTokens) {
        this.messageTokens = htmlTokens;
    }

    /*
     * (non-Javadoc)
     * @see org.jasen.interfaces.ParserData#getHtmlAsText()
     */
    public String getHtmlAsText() {
        return htmlAsText;
    }

    /**
     * Sets the parsed html.  That is, the plain text components of the html in the message.
     * @param parsedHtml
     */
    public void setHtmlAsText(String parsedHtml) {
        this.htmlAsText = parsedHtml;
    }

    /**
     * Gets the number of occurrances of concealed HTML.
     * @return Returns the concealedHtmlCount.
     */
    public int getConcealedHtmlCount() {
        return concealedHtmlCount;
    }

    /**
     * Sets the number of occurrances of concealed HTML.
     * @param concealedHtmlCount The concealedHtmlCount to set.
     */
    public void setConcealedHtmlCount(int concealedHtmlCount) {
        this.concealedHtmlCount = concealedHtmlCount;
    }

    /**
     * Gets the number of images in the email body.
     * @return Returns the imageCount.
     */
    public int getImageCount() {
        return imageCount;
    }

    /**
     * Sets the number of images in the email body.
     * @param imageCount The imageCount to set.
     */
    public void setImageCount(int imageCount) {
        this.imageCount = imageCount;
    }

    /**
     * Gets the number of occurrances of SRC cgi references.
     * <P>
     * That is, occurrances of HTML tags where a SRC (or other remote reference) which
     * would normally be expected to be a flat file (eg the IMG tag) was found to reference
     * a cgi script or similar.  This often indicates the presence of mail bugs
     * </P>
     * @return Returns the srcCgiCount.
     */
    public int getSrcCgiCount() {
        return srcCgiCount;
    }

    /**
     * Sets the number of occurrances of SRC cgi references.
     * @param srcCgiCount The srcCgiCount to set.
     * @see StandardParserData#getSrcCgiCount()
     */
    public void setSrcCgiCount(int srcCgiCount) {
        this.srcCgiCount = srcCgiCount;
    }

    /*
     * (non-Javadoc)
     * @see org.jasen.interfaces.ParserData#getTextParsed()
     */
    public String getTextParsed() {
        return textParsed;
    }

    /**
     * Sets the parsed (cleaned) text resulting from the message parse.
     * @param textParsed
     */
    public void setTextParsed(String textParsed) {
        this.textParsed = textParsed;
    }

    /**
     * Gets the list of TCP ports found appended to URLs in the HTML body of the message.
     * @return A list of String objects
     */
    public List getPorts() {
        return ports;
    }
   
    /**
     * Sets the list of TCP ports found appended to URLs in the HTML body of the message.
     * @param ports A list of String objects
     */
    public void setPorts(List ports) {
        this.ports = ports;
    }

    /**
     * Gets the list of anchor or image src (or href) attributes which had alternate TCP ports appended.
     * @return The number of occurrances
     */
    public int getSrcPortCount() {
        return srcPortCount;
    }

    /**
     * Sets the list of anchor or image src (or href) attributes which had alternate TCP ports appended.
     * @param srcPortCount The number of occurrances.
     */
    public void setSrcPortCount(int srcPortCount) {
        this.srcPortCount = srcPortCount;
    }

    /**
     * Gets the count of anchor tags whose text was URL text (eg http://...) .
     * but did not match the href attribute.
     * @return The number of occurrances.
     */
    public int getFalseAnchorCount() {
        return falseAnchorCount;
    }

    /**
     * Sets the count of anchor tags whose text was URL text (eg http://...) but did not match the href attribute .
     * @param falseAnchorCount The number of occurrances.
     */
    public void setFalseAnchorCount(int falseAnchorCount) {
        this.falseAnchorCount = falseAnchorCount;
    }

    /**
     * Gets the number of character obfuscation observations.
     * <br/>
     * These are instances where non ascii characters are used to obscure normal words.
     * @return The number of occurrances.
     */
    public int getObfuscatedCharacterCount() {

        if(recorder != null) {
            TokenErrorReport report = recorder.getReport();

            if(report instanceof CountTokenErrorReport) {
                return ((CountTokenErrorReport)report).getCount();
            }
        }

        return 0;
    }

    /*
     * (non-Javadoc)
     * @see org.jasen.interfaces.ParserData#getTokenErrorRecorder()
     */
    public TokenErrorRecorder getTokenErrorRecorder() {
        if(recorder == null) {
            synchronized(this) {
                if(recorder == null) {
                    recorder = new ObfuscatedCharacterTokenErrorRecorder();
                }
                notifyAll();
            }
        }
        return recorder;
    }
}
TOP

Related Classes of org.jasen.core.StandardParserData

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.