package dmt.clustering;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.StringTokenizer;
import java.util.Vector;
import dmt.tools.CSVFileWriter;
import dmt.tools.SparseVector;
public class ClusterCentersProcessor
{
public static void main(String[] args) throws IOException
{
for (int fileIndex = 2; fileIndex < 11; fileIndex++)
{
BufferedReader reader = new BufferedReader(new FileReader(
"clustering_outputs/em_" + fileIndex
+ "clusters_5iterations.txt"));
String line = reader.readLine();
Vector<Vector<String>> clusterCenters = new Vector<Vector<String>>();
Vector<String> headers = new Vector<String>();
headers.add(" ");
int clustersCount = 0;
int countingClusters = 1;
int currentCount = 0;
boolean addDoubleValues = false;
while (line != null)
{
//System.out.println(line);
line = line.trim();
int index0 = line.indexOf("word_");
if (index0 == 0)
{
line = line.substring(index0);
StringTokenizer st = new StringTokenizer(line, " \t");
String token = st.nextToken();
token = token.substring(token.indexOf("_")+1);
headers.add(token);
}
int index = line.indexOf("mean");
if (index != -1)
{
line = line.substring(index + 4);
addDoubleValues = true;
if (countingClusters == 1)
{
countingClusters = 2;
}
}
int index1 = line.indexOf("std. dev.");
if (index1 != -1)
{
addDoubleValues = false;
}
if (countingClusters != 1)
{
line = line.trim();
StringTokenizer st = new StringTokenizer(line, " \t");
while (st.hasMoreTokens())
{
String token = st.nextToken();
try
{
double db = Double.valueOf(token);
if (countingClusters == 2)
{
clustersCount++;
clusterCenters.add(new Vector<String>());
}
if (addDoubleValues)
{
currentCount++;
clusterCenters.get(currentCount - 1).add(
db + "");
}
} catch (Exception e)
{
countingClusters = 0;
currentCount = 0;
}
}
}
line = reader.readLine();
}
CSVFileWriter out = new CSVFileWriter("output/" + clustersCount
+ "clusterCentersMarina.csv", ',');
out.writeFields(headers);
for (int i = 0; i < clustersCount; i++)
{
Vector<String> fields = clusterCenters.get(i);
fields.add(0, "Cluster" + i);
out.writeFields(fields);
}
out.close();
}
}
}