/*
* @(#)HeuristicScanner.java 1/12/2004
*
* Copyright (c) 2004, 2005 jASEN.org
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the distribution.
*
* 3. The names of the authors may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* 4. Any modification or additions to the software must be contributed back
* to the project.
*
* 5. Any investigation or reverse engineering of source code or binary to
* enable emails to bypass the filters, and hence inflict spam and or viruses
* onto users who use or do not use jASEN could subject the perpetrator to
* criminal and or civil liability.
*
* THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESSED OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JASEN.ORG,
* OR ANY CONTRIBUTORS TO THIS SOFTWARE BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
* OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.jasen.plugins;
import java.util.Properties;
import javax.mail.internet.MimeMessage;
import org.jasen.core.PointTestResult;
import org.jasen.core.StandardParserData;
import org.jasen.core.engine.Jasen;
import org.jasen.error.JasenException;
import org.jasen.interfaces.HeuristicAnalyzer;
import org.jasen.interfaces.HeuristicDefinitionSet;
import org.jasen.interfaces.JasenMessage;
import org.jasen.interfaces.JasenPlugin;
import org.jasen.interfaces.JasenTestResult;
import org.jasen.interfaces.ParserData;
import org.jasen.interfaces.ReceivedHeaderParser;
/**
* <P>
* Performs a heuristic scan of an email.
* </P>
* <p>
* The scan is done across the entire message text (and html) and looks for known spam identifiers
* </p>
* <p>
* The term "heuristic" in this context is defined as: "A method based on empirical information that has no explicit rationalization"
* <br/>
* This essentially correlates to a "hard-coded" search for specific string patterns
* </p>
* <p>
* The scanner makes use of a HeuristicAnalyzer to perform the analysis. The default analyzer for jASEN
* uses regular expressions to interrogate the email content.
* </p>
* <P>
* Implementation Note: When adding heuristic definitions one should be very careful to ensure the definition is precise.
* An imprecise heuristic definition (regular expression) may lead to false identification of spam
* </p>
* @author Jason Polites
*/
public class HeuristicScanner implements JasenPlugin
{
private HeuristicAnalyzer analyzer;
private HeuristicDefinitionSet definitions;
private float min = 0.5f;
private float max = 0.9f;
private int threshold = 1;
/**
*
*/
public HeuristicScanner() {
super ();
}
/* (non-Javadoc)
* @see org.jasen.interfaces.JasenPlugin#init(java.util.Properties)
*/
public void init(Properties properties) throws JasenException {
if(properties != null) {
String strMin = properties.getProperty("min");
String strMax = properties.getProperty("max");
String strThresh = properties.getProperty("threshold");
if(strMin != null) {
min = Float.parseFloat(strMin);
}
if(strMax != null) {
max = Float.parseFloat(strMax);
}
if(strThresh != null) {
threshold = Integer.parseInt(strThresh);
}
// Load the definition set and analyzer
String strDefinitionSetClass = properties.getProperty("def-class");
String strAnalyzerClass = properties.getProperty("analyzer-class");
try
{
definitions = (HeuristicDefinitionSet)Class.forName(strDefinitionSetClass).newInstance();
definitions.init(properties);
analyzer = (HeuristicAnalyzer)Class.forName(strAnalyzerClass).newInstance();
}
catch (InstantiationException e)
{
throw new JasenException(e);
}
catch (IllegalAccessException e)
{
throw new JasenException(e);
}
catch (ClassNotFoundException e)
{
throw new JasenException(e);
}
}
}
/*
* (non-Javadoc)
* @see org.jasen.interfaces.JasenPlugin#destroy()
*/
public void destroy() throws JasenException {}
/* (non-Javadoc)
* @see org.jasen.interfaces.JasenPlugin#test(org.jasen.core.Jasen, javax.mail.internet.MimeMessage, org.jasen.interfaces.JasenMessage, org.jasen.interfaces.ParserData, org.jasen.interfaces.ReceivedHeaderParser)
*/
public JasenTestResult test(Jasen engine, MimeMessage rawMessage, JasenMessage parsedMessage, ParserData data, ReceivedHeaderParser parser) throws JasenException {
PointTestResult result = new PointTestResult();
result.setMin(min);
result.setMax(max);
result.setThreshold(threshold);
int matches = analyzer.analyze(rawMessage, parsedMessage, definitions, threshold);
if(data instanceof StandardParserData) {
result.setPoints(matches);
}
return result;
}
}