* @param numIterations
* the int number of iterations to perform
*/
public static void iterateSeq(Configuration conf, Path inPath, Path priorPath, Path outPath, int numIterations)
throws IOException {
ClusterClassifier classifier = new ClusterClassifier();
classifier.readFromSeqFiles(conf, priorPath);
Path clustersOut = null;
int iteration = 1;
while (iteration <= numIterations) {
for (VectorWritable vw : new SequenceFileDirValueIterable<VectorWritable>(inPath, PathType.LIST,
PathFilters.logsCRCFilter(), conf)) {
Vector vector = vw.get();
// classification yields probabilities
Vector probabilities = classifier.classify(vector);
// policy selects weights for models given those probabilities
Vector weights = classifier.getPolicy().select(probabilities);
// training causes all models to observe data
for (Vector.Element e : weights.nonZeroes()) {
int index = e.index();
classifier.train(index, vector, weights.get(index));
}
}
// compute the posterior models
classifier.close();
// update the policy
classifier.getPolicy().update(classifier);
// output the classifier
clustersOut = new Path(outPath, Cluster.CLUSTERS_DIR + iteration);
classifier.writeToSeqFiles(clustersOut);
FileSystem fs = FileSystem.get(outPath.toUri(), conf);
iteration++;
if (isConverged(clustersOut, conf, fs)) {
break;
}