package dmt.clustering;
import java.io.IOException;
import java.util.Set;
import java.util.Vector;
import com.aliasi.cluster.CompleteLinkClusterer;
import com.aliasi.cluster.Dendrogram;
import com.aliasi.cluster.HierarchicalClusterer;
import com.aliasi.cluster.SingleLinkClusterer;
import dmt.tools.CSVFileWriter;
public class ClusteringProcessor
{
/**
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException
{
TextInstance.loadTextInstances();
// eval clusterers
HierarchicalClusterer<TextInstance> clClusterer = new CompleteLinkClusterer<TextInstance>(
TextInstance.EUCLIDEAN_DISTANCE);
Dendrogram<TextInstance> completeLinkDendrogram = clClusterer
.hierarchicalCluster(TextInstance.textInstancesSet);
int k = 30;
Set<Set<TextInstance>> clResponsePartition = completeLinkDendrogram
.partitionK(k);
Object[] clusters = clResponsePartition.toArray();
CSVFileWriter out = new CSVFileWriter("completeLinkClustersCSV.csv",
',');
Vector<String> header = new Vector<String>();
header.add("Cluster label");
out.writeFields(header);
Object[] textInstancesArray = TextInstance.textInstancesSet.toArray();
for (int i = 0; i < textInstancesArray.length; i++)
{
for (int j = 0; j < clusters.length; j++)
{
if (((Set<TextInstance>) clusters[j])
.contains(textInstancesArray[i]))
{
Vector<String> clusterLabel = new Vector<String>();
clusterLabel.add((j + 1) + "");
out.writeFields(clusterLabel);
}
}
}
out.close();
HierarchicalClusterer<TextInstance> slClusterer = new SingleLinkClusterer<TextInstance>(
TextInstance.EUCLIDEAN_DISTANCE);
Dendrogram<TextInstance> singleLinkDendrogram = slClusterer
.hierarchicalCluster(TextInstance.textInstancesSet);
Set<Set<TextInstance>> slResponsePartition = singleLinkDendrogram
.partitionK(k);
clusters = slResponsePartition.toArray();
out = new CSVFileWriter("singleLinkClustersCSV.csv", ',');
out.writeFields(header);
for (int i = 0; i < textInstancesArray.length; i++)
{
for (int j = 0; j < clusters.length; j++)
{
if (((Set<TextInstance>) clusters[j])
.contains(textInstancesArray[i]))
{
Vector<String> clusterLabel = new Vector<String>();
clusterLabel.add((j + 1) + "");
out.writeFields(clusterLabel);
}
}
}
out.close();
}
}