/*
* @(#)StandardParserData.java 31/10/2004
*
* Copyright (c) 2004, 2005 jASEN.org
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the distribution.
*
* 3. The names of the authors may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* 4. Any modification or additions to the software must be contributed back
* to the project.
*
* 5. Any investigation or reverse engineering of source code or binary to
* enable emails to bypass the filters, and hence inflict spam and or viruses
* onto users who use or do not use jASEN could subject the perpetrator to
* criminal and or civil liability.
*
* THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESSED OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JASEN.ORG,
* OR ANY CONTRIBUTORS TO THIS SOFTWARE BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
* OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
package org.jasen.core;
import java.util.List;
import org.jasen.core.token.CountTokenErrorReport;
import org.jasen.core.token.ObfuscatedCharacterTokenErrorRecorder;
import org.jasen.interfaces.ParserData;
import org.jasen.interfaces.TokenErrorRecorder;
import org.jasen.interfaces.TokenErrorReport;
/**
* <P>
* Holds the information obtained from parsing and tokenizing the message.
* </P>
* @author Jason Polites
*/
public class StandardParserData implements ParserData
{
private String htmlAsText;
private String textParsed;
private String[] messageTokens;
private int concealedHtmlCount = 0;
private int srcCgiCount = 0;
private int imageCount = 0;
private int srcPortCount = 0;
private int falseAnchorCount = 0;
private TokenErrorRecorder recorder;
private List ports;
/**
*
*/
public StandardParserData() {
super ();
}
/*
* (non-Javadoc)
* @see org.jasen.interfaces.ParserData#getMessageTokens()
*/
public String[] getMessageTokens() {
return messageTokens;
}
/**
* Sets the message tokens obtained from tokenization.
* @param htmlTokens
* @see org.jasen.core.token.EmailTokenizer
*/
public void setMessageTokens(String[] htmlTokens) {
this.messageTokens = htmlTokens;
}
/*
* (non-Javadoc)
* @see org.jasen.interfaces.ParserData#getHtmlAsText()
*/
public String getHtmlAsText() {
return htmlAsText;
}
/**
* Sets the parsed html. That is, the plain text components of the html in the message.
* @param parsedHtml
*/
public void setHtmlAsText(String parsedHtml) {
this.htmlAsText = parsedHtml;
}
/**
* Gets the number of occurrances of concealed HTML.
* @return Returns the concealedHtmlCount.
*/
public int getConcealedHtmlCount() {
return concealedHtmlCount;
}
/**
* Sets the number of occurrances of concealed HTML.
* @param concealedHtmlCount The concealedHtmlCount to set.
*/
public void setConcealedHtmlCount(int concealedHtmlCount) {
this.concealedHtmlCount = concealedHtmlCount;
}
/**
* Gets the number of images in the email body.
* @return Returns the imageCount.
*/
public int getImageCount() {
return imageCount;
}
/**
* Sets the number of images in the email body.
* @param imageCount The imageCount to set.
*/
public void setImageCount(int imageCount) {
this.imageCount = imageCount;
}
/**
* Gets the number of occurrances of SRC cgi references.
* <P>
* That is, occurrances of HTML tags where a SRC (or other remote reference) which
* would normally be expected to be a flat file (eg the IMG tag) was found to reference
* a cgi script or similar. This often indicates the presence of mail bugs
* </P>
* @return Returns the srcCgiCount.
*/
public int getSrcCgiCount() {
return srcCgiCount;
}
/**
* Sets the number of occurrances of SRC cgi references.
* @param srcCgiCount The srcCgiCount to set.
* @see StandardParserData#getSrcCgiCount()
*/
public void setSrcCgiCount(int srcCgiCount) {
this.srcCgiCount = srcCgiCount;
}
/*
* (non-Javadoc)
* @see org.jasen.interfaces.ParserData#getTextParsed()
*/
public String getTextParsed() {
return textParsed;
}
/**
* Sets the parsed (cleaned) text resulting from the message parse.
* @param textParsed
*/
public void setTextParsed(String textParsed) {
this.textParsed = textParsed;
}
/**
* Gets the list of TCP ports found appended to URLs in the HTML body of the message.
* @return A list of String objects
*/
public List getPorts() {
return ports;
}
/**
* Sets the list of TCP ports found appended to URLs in the HTML body of the message.
* @param ports A list of String objects
*/
public void setPorts(List ports) {
this.ports = ports;
}
/**
* Gets the list of anchor or image src (or href) attributes which had alternate TCP ports appended.
* @return The number of occurrances
*/
public int getSrcPortCount() {
return srcPortCount;
}
/**
* Sets the list of anchor or image src (or href) attributes which had alternate TCP ports appended.
* @param srcPortCount The number of occurrances.
*/
public void setSrcPortCount(int srcPortCount) {
this.srcPortCount = srcPortCount;
}
/**
* Gets the count of anchor tags whose text was URL text (eg http://...) .
* but did not match the href attribute.
* @return The number of occurrances.
*/
public int getFalseAnchorCount() {
return falseAnchorCount;
}
/**
* Sets the count of anchor tags whose text was URL text (eg http://...) but did not match the href attribute .
* @param falseAnchorCount The number of occurrances.
*/
public void setFalseAnchorCount(int falseAnchorCount) {
this.falseAnchorCount = falseAnchorCount;
}
/**
* Gets the number of character obfuscation observations.
* <br/>
* These are instances where non ascii characters are used to obscure normal words.
* @return The number of occurrances.
*/
public int getObfuscatedCharacterCount() {
if(recorder != null) {
TokenErrorReport report = recorder.getReport();
if(report instanceof CountTokenErrorReport) {
return ((CountTokenErrorReport)report).getCount();
}
}
return 0;
}
/*
* (non-Javadoc)
* @see org.jasen.interfaces.ParserData#getTokenErrorRecorder()
*/
public TokenErrorRecorder getTokenErrorRecorder() {
if(recorder == null) {
synchronized(this) {
if(recorder == null) {
recorder = new ObfuscatedCharacterTokenErrorRecorder();
}
notifyAll();
}
}
return recorder;
}
}