Package dmt.clustering

Source Code of dmt.clustering.ClusterCentersProcessor

package dmt.clustering;

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.StringTokenizer;
import java.util.Vector;

import dmt.tools.CSVFileWriter;
import dmt.tools.SparseVector;

public class ClusterCentersProcessor
{
  public static void main(String[] args) throws IOException
  {
    for (int fileIndex = 2; fileIndex < 11; fileIndex++)
    {
      BufferedReader reader = new BufferedReader(new FileReader(
          "clustering_outputs/em_" + fileIndex
              + "clusters_5iterations.txt"));
      String line = reader.readLine();
     
      Vector<Vector<String>> clusterCenters = new Vector<Vector<String>>();
      Vector<String> headers = new Vector<String>();
      headers.add(" ");
      int clustersCount = 0;
      int countingClusters = 1;
      int currentCount = 0;
      boolean addDoubleValues = false;
      while (line != null)
      {
        //System.out.println(line);
        line = line.trim();
        int index0 = line.indexOf("word_");
        if (index0 == 0)
        {
          line = line.substring(index0);
          StringTokenizer st = new StringTokenizer(line, " \t");
          String token = st.nextToken();
          token = token.substring(token.indexOf("_")+1);
          headers.add(token);
        }
        int index = line.indexOf("mean");
        if (index != -1)
        {
          line = line.substring(index + 4);
          addDoubleValues = true;
          if (countingClusters == 1)
          {
            countingClusters = 2;
          }
        }
        int index1 = line.indexOf("std. dev.");
        if (index1 != -1)
        {
          addDoubleValues = false;
        }
        if (countingClusters != 1)
        {
          line = line.trim();
          StringTokenizer st = new StringTokenizer(line, " \t");
          while (st.hasMoreTokens())
          {
            String token = st.nextToken();
            try
            {
              double db = Double.valueOf(token);
              if (countingClusters == 2)
              {
                clustersCount++;
                clusterCenters.add(new Vector<String>());
              }
              if (addDoubleValues)
              {
                currentCount++;
                clusterCenters.get(currentCount - 1).add(
                    db + "");
              }

            } catch (Exception e)
            {
              countingClusters = 0;
              currentCount = 0;
            }
          }
        }

        line = reader.readLine();
      }

      CSVFileWriter out = new CSVFileWriter("output/" + clustersCount
          + "clusterCentersMarina.csv", ',');
      out.writeFields(headers);
      for (int i = 0; i < clustersCount; i++)
      {
        Vector<String> fields = clusterCenters.get(i);
        fields.add(0, "Cluster" + i);
        out.writeFields(fields);
      }
      out.close();

    }
  }
}
TOP

Related Classes of dmt.clustering.ClusterCentersProcessor

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.