package dmt.clustering;
import java.io.IOException;
import java.util.Vector;
import dmt.tools.CSVFileWriter;
import dmt.tools.clustering.*;
public class KMeansClusteringProcessor
{
public static void main(String[] args) throws IOException
{
TextInstance.loadTextInstances();
Object[] textInstancesArray = TextInstance.textInstancesSet.toArray();
Vector<DataPoint> dataPoints = new Vector<DataPoint>();
int test = 1000;
for (int i = 0; i < test; i++)
{
dataPoints.add(new DataPoint(((TextInstance) textInstancesArray[i])
.getBagOfWordsTfIdf(),
((TextInstance) textInstancesArray[i]).getId()));
}
ClusterAnalysis jca = new ClusterAnalysis(5, 100, dataPoints);
jca.startAnalysis();
Vector<Vector<DataPoint>> clusters = jca.getClusterOutput();
CSVFileWriter out = new CSVFileWriter("kMeansClustersCSV.csv",
',');
Vector<String> header = new Vector<String>();
header.add("Document ID");
header.add("XPath");
header.add("Cluster label");
out.writeFields(header);
for (int i = 0; i < clusters.size(); i++)
{
for (int j = 0; j < clusters.get(i).size(); j++)
{
DataPoint point = clusters.get(i).get(j);
String documentId = point.getId().substring(0, point.getId().indexOf("/"));
String XPath = point.getId().substring(point.getId().indexOf("/"), point.getId().length());
Vector<String> fields = new Vector<String>();
fields.add(documentId);
fields.add(XPath);
fields.add(i+"");
out.writeFields(fields);
}
}
out.close();
}
}