/*
* Copyright (C) 2012 Chris Neasbitt
* Author: Chris Neasbitt
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package edu.uga.cs.fluxbuster.analytics;
import java.io.IOException;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.Formatter;
import java.util.List;
import java.util.Properties;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.joda.time.DateTime;
import edu.uga.cs.fluxbuster.db.DBInterface;
import edu.uga.cs.fluxbuster.db.DBInterfaceFactory;
import edu.uga.cs.fluxbuster.utils.PropertiesUtils;
/**
* This class calculates the ip and domain based similarities between
* the clusters generated by two runs of hierarchical clustering
*
* @author Chris Neasbitt
*/
public class ClusterSimilarityCalculator {
private Properties properties = null;
private static String IPKEY = "INTERSECTION_QUERY_IP";
private static String DOMAINSKEY = "INTERSECTION_QUERY_DOMAINNAME";
private DBInterface db = null;
private static Log log = LogFactory.getLog(ClusterSimilarityCalculator.class);
/**
* The Enum SIM_TYPE represents a type of similarity.
*/
public static enum SIM_TYPE {/** The IP. */ IP,
/** The DOMAINNAME. */ DOMAINNAME};
/**
* Instantiates a new cluster similarity calculator.
*
* @throws IOException if the ClusterSimilarityCalculator.properties file
* can not be loaded
*/
public ClusterSimilarityCalculator() throws IOException{
properties = PropertiesUtils.loadProperties(this.getClass());
db = DBInterfaceFactory.loadDBInterface();
}
/**
* Update all cluster similarities in the database between the run on the
* supplied date and the run one day previous.
*
* @param adate the date of the first clustering run
*/
public void updateClusterSimilarities(Date adate){
DateTime adt = new DateTime(adate.getTime());
Date bdate = new Date(adt.minusDays(1).getMillis());
this.updateClusterSimilarities(adate, bdate);
}
/**
* Update all cluster similarities in the database between the runs on the
* two supplied dates.
*
* @param adate the date of the first clustering run
* @param bdate the date of the second clustering run
*/
public void updateClusterSimilarities(Date adate, Date bdate){
String simplename = null;
if(log.isInfoEnabled()){
simplename = this.getClass().getSimpleName();
log.info(simplename + " Started: "
+ Calendar.getInstance().getTime());
log.info("a-date: " + adate + " b-date: " + bdate );
log.info("Updating ip based cluster similarities.");
}
try {
db.initSimilarityTables(adate);
updateIpClusterSimilarities(adate, bdate);
if(log.isInfoEnabled()){
log.info("Ip based cluster similarities updated.");
log.info("Updating domainname based cluster similarities.");
}
updateDomainnameClusterSimilarities(adate, bdate);
if(log.isInfoEnabled()){
log.info("Domainname based cluster similarities updated.");
}
} catch (Exception e) {
if(log.isErrorEnabled()){
log.error("Error calculating cluster similarities.", e);
}
}
if(log.isInfoEnabled()){
log.info(simplename + " Finished: "
+ Calendar.getInstance().getTime());
}
}
/**
* Update ip-based cluster similarities in the database between the runs on
* the two supplied dates.
*
* @param adate the date of the first clustering run
* @param bdate the date of the second clustering run
* @throws Exception if unable to calculate or store the similarity results
*/
public void updateIpClusterSimilarities(Date adate, Date bdate) throws Exception{
storeIpClusterSimiliarities(calculateIpSimilarities(adate, bdate));
}
/**
* Update domainname-based cluster similarities in the database between the runs
* on the two supplied dates.
*
* @param adate the date of the first clustering run
* @param bdate the date of the second clustering run
* @throws Exception if unable to calculate or store the similarity results
*/
public void updateDomainnameClusterSimilarities(Date adate, Date bdate) throws Exception{
storeDomainnameClusterSimiliarities(calculateDomainnameSimilarities(adate, bdate));
}
/**
* Store ip-based cluster similiarities in the database.
*
* @param sims the cluster similarities
*/
public void storeIpClusterSimiliarities(List<ClusterSimilarity> sims){
DBInterface db = DBInterfaceFactory.loadDBInterface();
db.storeIpClusterSimilarities(sims);
}
/**
* Store domainname-based cluster similiarities in the database.
*
* @param sims the cluster similarities
*/
public void storeDomainnameClusterSimiliarities(List<ClusterSimilarity> sims){
DBInterface db = DBInterfaceFactory.loadDBInterface();
db.storeDomainnameClusterSimilarities(sims);
}
/**
* Calculate ip-based cluster similarities between all of the clusters generated
* during the runs on the two supplied dates.
*
* @param adate the date of the first clustering run
* @param bdate the date of the second clustering run
* @return the list of ip-based cluster similarities
* @throws IOException if the similarities could not be calculated
*/
public List<ClusterSimilarity> calculateIpSimilarities(Date adate, Date bdate) throws IOException{
SimpleDateFormat df = new SimpleDateFormat("yyyyMMdd");
String adatestr = df.format(adate);
String bdatestr = df.format(bdate);
String query = properties.getProperty(IPKEY);
StringBuffer querybuf = new StringBuffer();
Formatter formatter = new Formatter(querybuf);
formatter.format(query, adatestr, adatestr, bdatestr);
query = querybuf.toString();
formatter.close();
return this.executeSimilarityQuery(query, adate, bdate);
}
/**
* Calculate domainname-based cluster similarities between all of the clusters generated
* during the runs on the two supplied dates.
*
* @param adate the date of the first clustering run
* @param bdate the date of the second clustering run
* @return the list of domainname-based cluster similarities
* @throws IOException if the similarities could not be calculated
*/
public List<ClusterSimilarity> calculateDomainnameSimilarities(Date adate, Date bdate) throws IOException{
SimpleDateFormat df = new SimpleDateFormat("yyyyMMdd");
String adatestr = df.format(adate);
String bdatestr = df.format(bdate);
String query = properties.getProperty(DOMAINSKEY);
StringBuffer querybuf = new StringBuffer();
Formatter formatter = new Formatter(querybuf);
formatter.format(query, adatestr, adatestr, adatestr,
adatestr, bdatestr, bdatestr);
query = querybuf.toString();
formatter.close();
return this.executeSimilarityQuery(query, adate, bdate);
}
/**
* Executes the similarity query.
*
* @param query the query to execute.
* @param adate the date of the first clustering run
* @param bdate the date of the second clustering run
* @return the list of cluster similarities
*/
private List<ClusterSimilarity> executeSimilarityQuery(String query, Date adate, Date bdate){
List<ClusterSimilarity> retval = new ArrayList<ClusterSimilarity>();
ResultSet rs = null;
try{
rs = db.executeQueryWithResult(query);
while(rs.next()){
retval.add(new ClusterSimilarity(adate, bdate,
rs.getInt(1), rs.getInt(2), rs.getDouble(3)));
}
} catch (SQLException e) {
if(log.isErrorEnabled()){
log.error(e);
}
} finally {
try {
rs.close();
} catch (SQLException e) {
if(log.isErrorEnabled()){
log.error(e);
}
}
}
return retval;
}
}