* Copyright (C) 2012 Chris Neasbitt
* Author: Chris Neasbitt
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* GNU General Public License for more details.
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
package edu.uga.cs.fluxbuster.analytics;
import java.io.IOException;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.Formatter;
import java.util.List;
import java.util.Properties;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.joda.time.DateTime;
import edu.uga.cs.fluxbuster.db.DBInterface;
import edu.uga.cs.fluxbuster.db.DBInterfaceFactory;
import edu.uga.cs.fluxbuster.utils.PropertiesUtils;
* This class calculates the ip and domain based similarities between
* the clusters generated by two runs of hierarchical clustering
* @author Chris Neasbitt
public class ClusterSimilarityCalculator {
private Properties properties = null;
private static String IPKEY = "INTERSECTION_QUERY_IP";
private DBInterface db = null;
private static Log log = LogFactory.getLog(ClusterSimilarityCalculator.class);
* The Enum SIM_TYPE represents a type of similarity.
public static enum SIM_TYPE {/** The IP. */ IP,
* Instantiates a new cluster similarity calculator.
* @throws IOException if the ClusterSimilarityCalculator.properties file
* can not be loaded
public ClusterSimilarityCalculator() throws IOException{
properties = PropertiesUtils.loadProperties(this.getClass());
db = DBInterfaceFactory.loadDBInterface();
* Update all cluster similarities in the database between the run on the
* supplied date and the run one day previous.
* @param adate the date of the first clustering run
public void updateClusterSimilarities(Date adate){
DateTime adt = new DateTime(adate.getTime());
Date bdate = new Date(adt.minusDays(1).getMillis());
this.updateClusterSimilarities(adate, bdate);
* Update all cluster similarities in the database between the runs on the
* two supplied dates.
* @param adate the date of the first clustering run
* @param bdate the date of the second clustering run
public void updateClusterSimilarities(Date adate, Date bdate){
String simplename = null;
simplename = this.getClass().getSimpleName();
log.info(simplename + " Started: "
+ Calendar.getInstance().getTime());
log.info("a-date: " + adate + " b-date: " + bdate );
log.info("Updating ip based cluster similarities.");
try {
updateIpClusterSimilarities(adate, bdate);
log.info("Ip based cluster similarities updated.");
log.info("Updating domainname based cluster similarities.");
updateDomainnameClusterSimilarities(adate, bdate);
log.info("Domainname based cluster similarities updated.");
} catch (Exception e) {
log.error("Error calculating cluster similarities.", e);
log.info(simplename + " Finished: "
+ Calendar.getInstance().getTime());
* Update ip-based cluster similarities in the database between the runs on
* the two supplied dates.
* @param adate the date of the first clustering run
* @param bdate the date of the second clustering run
* @throws Exception if unable to calculate or store the similarity results
public void updateIpClusterSimilarities(Date adate, Date bdate) throws Exception{
storeIpClusterSimiliarities(calculateIpSimilarities(adate, bdate));
* Update domainname-based cluster similarities in the database between the runs
* on the two supplied dates.
* @param adate the date of the first clustering run
* @param bdate the date of the second clustering run
* @throws Exception if unable to calculate or store the similarity results
public void updateDomainnameClusterSimilarities(Date adate, Date bdate) throws Exception{
storeDomainnameClusterSimiliarities(calculateDomainnameSimilarities(adate, bdate));
* Store ip-based cluster similiarities in the database.
* @param sims the cluster similarities
public void storeIpClusterSimiliarities(List<ClusterSimilarity> sims){
DBInterface db = DBInterfaceFactory.loadDBInterface();
* Store domainname-based cluster similiarities in the database.
* @param sims the cluster similarities
public void storeDomainnameClusterSimiliarities(List<ClusterSimilarity> sims){
DBInterface db = DBInterfaceFactory.loadDBInterface();
* Calculate ip-based cluster similarities between all of the clusters generated
* during the runs on the two supplied dates.
* @param adate the date of the first clustering run
* @param bdate the date of the second clustering run
* @return the list of ip-based cluster similarities
* @throws IOException if the similarities could not be calculated
public List<ClusterSimilarity> calculateIpSimilarities(Date adate, Date bdate) throws IOException{
SimpleDateFormat df = new SimpleDateFormat("yyyyMMdd");
String adatestr = df.format(adate);
String bdatestr = df.format(bdate);
String query = properties.getProperty(IPKEY);
StringBuffer querybuf = new StringBuffer();
Formatter formatter = new Formatter(querybuf);
formatter.format(query, adatestr, adatestr, bdatestr);
query = querybuf.toString();
return this.executeSimilarityQuery(query, adate, bdate);
* Calculate domainname-based cluster similarities between all of the clusters generated
* during the runs on the two supplied dates.
* @param adate the date of the first clustering run
* @param bdate the date of the second clustering run
* @return the list of domainname-based cluster similarities
* @throws IOException if the similarities could not be calculated
public List<ClusterSimilarity> calculateDomainnameSimilarities(Date adate, Date bdate) throws IOException{
SimpleDateFormat df = new SimpleDateFormat("yyyyMMdd");
String adatestr = df.format(adate);
String bdatestr = df.format(bdate);
String query = properties.getProperty(DOMAINSKEY);
StringBuffer querybuf = new StringBuffer();
Formatter formatter = new Formatter(querybuf);
formatter.format(query, adatestr, adatestr, adatestr,
adatestr, bdatestr, bdatestr);
query = querybuf.toString();
return this.executeSimilarityQuery(query, adate, bdate);
* Executes the similarity query.
* @param query the query to execute.
* @param adate the date of the first clustering run
* @param bdate the date of the second clustering run
* @return the list of cluster similarities
private List<ClusterSimilarity> executeSimilarityQuery(String query, Date adate, Date bdate){
List<ClusterSimilarity> retval = new ArrayList<ClusterSimilarity>();
ResultSet rs = null;
rs = db.executeQueryWithResult(query);
retval.add(new ClusterSimilarity(adate, bdate,
rs.getInt(1), rs.getInt(2), rs.getDouble(3)));
} catch (SQLException e) {
} finally {
try {
} catch (SQLException e) {
return retval;