/*
* Copyright (C) 2012 Chris Neasbitt
* Author: Chris Neasbitt
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package edu.uga.cs.fluxbuster.classification;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import weka.classifiers.trees.J48;
import weka.core.Instances;
import weka.core.converters.ConverterUtils.DataSource;
import edu.uga.cs.fluxbuster.clustering.StoredDomainCluster;
import edu.uga.cs.fluxbuster.db.DBInterface;
import edu.uga.cs.fluxbuster.db.DBInterfaceFactory;
import edu.uga.cs.fluxbuster.utils.PropertiesUtils;
/**
* This class runs the classifier on the clusters and stores
* the derived classes in the database.
*
* @author Chris Neasbitt
*/
public class Classifier {
private static final String featuresHeader = "@RELATION FastFlux\n\n"+
"@ATTRIBUTE Network_Cardinality NUMERIC\n"+
"@ATTRIBUTE Network_Prefixes NUMERIC\n" +
"@ATTRIBUTE Domains_Per_Network NUMERIC\n" +
"@ATTRIBUTE Number_of_Domains NUMERIC\n" +
"@ATTRIBUTE TTL_Per_Domain NUMERIC\n" +
"@ATTRIBUTE IP_Growth_Ratio NUMERIC\n" +
"@ATTRIBUTE class {Flux, NOT_Flux}\n\n" +
"@DATA\n";
private static final String MODEL_PATHKEY = "MODEL_PATH";
private String modelfile;
private DBInterface dbi;
private Properties localprops = null;
private static Log log = LogFactory.getLog(Classifier.class);
/**
* Instantiates a new classifier. The object is configured via
* the properties file.
*
* @throws IOException if there is an error reading the
* Classifer.properties file
*/
public Classifier() throws IOException{
this(DBInterfaceFactory.loadDBInterface());
}
/**
* Instantiates a new feature calculator with a specific database
* interface. The object is configured via the properties file.
*
* @param dbi the database interface
* @throws IOException if there is an error reading the
* Classifer.properties file
*/
public Classifier(DBInterface dbi) throws IOException{
if(localprops == null){
localprops = PropertiesUtils.loadProperties(this.getClass());
}
setModelPath(new File(localprops.getProperty(MODEL_PATHKEY))
.getCanonicalPath());
this.dbi = dbi;
}
/**
* Instantiates a new classifier.
*
* @param modelfile the path to the classification model file
*/
public Classifier(String modelfile){
this(modelfile, DBInterfaceFactory.loadDBInterface());
}
/**
* Instantiates a new feature calculator with a specific database
* interface.
*
* @param modelfile the absolute path to the classification model file
* @param dbi the database interface
*/
public Classifier(String modelfile, DBInterface dbi){
this.modelfile = modelfile;
this.dbi = dbi;
}
/**
* Sets the path to the trained J48 decision tree.
*
* @param modelfile the path to the serialized model
*/
public void setModelPath(String modelfile){
this.modelfile = modelfile;
}
/**
* Gets the path to the trained J48 decision tree.
*/
public String getModelPath(){
return this.modelfile;
}
/**
* Prepares the features from the db, executes the classifier, and
* stores the results in the database.
*
* @param logDate the clustering run date
* @param minCardinality the minimum network cardinality of clusters
* to classify
* @throws IOException if there is an error creating the features file
*/
public void updateClusterClasses(Date logDate, int minCardinality) throws IOException{
String simplename = null;
if(log.isInfoEnabled()){
simplename = this.getClass().getSimpleName();
log.info(simplename + " Started: "
+ Calendar.getInstance().getTime());
}
dbi.initClassificationTables(logDate);
Map<ClusterClass, List<StoredDomainCluster>> clusterClasses =
classifyClusters(logDate, minCardinality);
if(log.isDebugEnabled()){
String retval = "";
for(ClusterClass cls : clusterClasses.keySet()){
retval += "Cluster Class: " + cls + "\n";
for(StoredDomainCluster cluster : clusterClasses.get(cls)){
retval += "\t" + cluster.getClusterId() + "\n";
}
}
log.debug(retval);
}
storeClusterClasses(logDate, clusterClasses);
if(log.isInfoEnabled()){
log.info(simplename + " Finished: "
+ Calendar.getInstance().getTime());
}
}
/**
* Prepares the features from the db and executes the classifier.
*
* @param logDate the run date of the clusters
* @param minCardinality the minimum network cardinality for a cluster
* to be consider for classification
* @return a map of the classified clusters, the keys are the classes
* and the values are lists of cluster id's belonging to those classes
* @throws IOException
*/
public Map<ClusterClass, List<StoredDomainCluster>> classifyClusters(Date logDate,
int minCardinality) throws IOException{
Map<ClusterClass, List<StoredDomainCluster>> retval = null;
if(log.isDebugEnabled()){
log.debug("Retrieving features from db.");
}
List<StoredDomainCluster> clusters = dbi.getClusters(logDate, minCardinality);
if(log.isDebugEnabled()){
log.debug("Features retrieved.");
log.debug("Preparing features file.");
}
String prepfeatures = prepareFeatures(clusters);
if(log.isDebugEnabled()){
log.debug("File prepared.");
log.debug("Executing J48 classifier.");
}
retval = executeClassifier(prepfeatures, modelfile, clusters);
if(log.isDebugEnabled()){
log.debug("J48 execution complete.");
}
return retval;
}
/**
* Generates a String of the features in arff format
*
* @param clusters the list of clusters from which to pull features
* @return the arff format version of the features
*/
private String prepareFeatures(List<StoredDomainCluster> clusters){
StringBuffer buf = new StringBuffer();
buf.append(featuresHeader);
for(StoredDomainCluster cluster : clusters){
buf.append(cluster.getNetworkCardinality() + ", " + cluster.getIpDiversity() +
", " + cluster.getDomainsPerNetwork() + ", " + cluster.getNumberOfDomains() +
", " + cluster.getTtlPerDomain() + ", " + cluster.getIpGrowthRatio() +
", " + ClusterClass.NOT_FLUX + "\n");
}
return buf.toString();
}
/**
* Executes the classifier.
*
* @param prepfeatures the prepared features in arff format
* @param modelfile the path to the serialized model
* @param clusters the clusters to classify
* @return a map of the classified clusters, the keys are the classes
* and the values are lists of cluster id's belonging to those classes
*/
private Map<ClusterClass, List<StoredDomainCluster>> executeClassifier(String prepfeatures, String modelfile,
List<StoredDomainCluster> clusters){
Map<ClusterClass, List<StoredDomainCluster>> retval =
new HashMap<ClusterClass, List<StoredDomainCluster>>();
try{
DataSource source = new DataSource(new ByteArrayInputStream(prepfeatures.getBytes()));
Instances data = source.getDataSet();
if (data.classIndex() == -1){
data.setClassIndex(data.numAttributes() - 1);
}
String[] options = weka.core.Utils.splitOptions("-p 0");
J48 cls = (J48)weka.core.SerializationHelper.read(modelfile);
cls.setOptions(options);
for(int i = 0; i < data.numInstances(); i++){
double pred = cls.classifyInstance(data.instance(i));
ClusterClass clusClass = ClusterClass.valueOf(
data.classAttribute().value((int)pred).toUpperCase());
if(!retval.containsKey(clusClass)){
retval.put(clusClass, new ArrayList<StoredDomainCluster>());
}
retval.get(clusClass).add(clusters.get(i));
}
} catch (Exception e) {
if(log.isErrorEnabled()){
log.error("Error executing classifier.", e);
}
}
return retval;
}
/**
* Store cluster classes in the database.
*
* @param logDate the clustering run date
* @param clusterClasses the map of classified clusters
*/
private void storeClusterClasses(Date logDate, Map<ClusterClass, List<StoredDomainCluster>> clusterClasses){
dbi.storeClusterClasses(logDate, clusterClasses, false);
}
}