Package dmt.clustering

Source Code of dmt.clustering.KMeansClusteringProcessor

package dmt.clustering;

import java.io.IOException;
import java.util.Vector;

import dmt.tools.CSVFileWriter;
import dmt.tools.clustering.*;

public class KMeansClusteringProcessor
{
  public static void main(String[] args) throws IOException
  {
    TextInstance.loadTextInstances();
    Object[] textInstancesArray = TextInstance.textInstancesSet.toArray();
    Vector<DataPoint> dataPoints = new Vector<DataPoint>();
    int test = 1000;
    for (int i = 0; i < test; i++)
    {
      dataPoints.add(new DataPoint(((TextInstance) textInstancesArray[i])
          .getBagOfWordsTfIdf(),
          ((TextInstance) textInstancesArray[i]).getId()));
    }
    ClusterAnalysis jca = new ClusterAnalysis(5, 100, dataPoints);
    jca.startAnalysis();

    Vector<Vector<DataPoint>> clusters = jca.getClusterOutput();
    CSVFileWriter out = new CSVFileWriter("kMeansClustersCSV.csv",
        ',');
    Vector<String> header = new Vector<String>();
    header.add("Document ID");
    header.add("XPath");
    header.add("Cluster label");
    out.writeFields(header);
    for (int i = 0; i < clusters.size(); i++)
    {
      for (int j = 0; j < clusters.get(i).size(); j++)
      {
        DataPoint point = clusters.get(i).get(j);
        String documentId = point.getId().substring(0, point.getId().indexOf("/"));
        String XPath = point.getId().substring(point.getId().indexOf("/"), point.getId().length());
        Vector<String> fields = new Vector<String>();
        fields.add(documentId);
        fields.add(XPath);
        fields.add(i+"");
        out.writeFields(fields);
      }
    }
    out.close();
  }
}
TOP

Related Classes of dmt.clustering.KMeansClusteringProcessor

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.