// initial codebook:
templateChunks = new Vector(cbSize);
// create a placeholder template chunk
FeatChunk template0 = new FeatChunk("templateChunk0", 0, 0, null);
template0.setFeatures(DSP.mean(DSP.transpose(features)));
templateChunks.add(template0);
// distortions of between each codeword and each chunk
double[][] distortion = new double[cbSize][ndat];
for (int x = 0; x < distortion.length; x++)
Arrays.fill(distortion[x], Double.MAX_VALUE);
// indicies into cbMeans for each chunk
int[] idx = new int[ndat];
// how much should the means be nudged when splitting
double delta = 1e-3;
// start from one codeword and go from there
for (int nValidCW = 2; nValidCW <= cbSize; nValidCW = Math.min(
2 * nValidCW, cbSize))
{
if (debug)
System.out
.println("Splitting into " + nValidCW + " codewords.");
// split codewords
for (int c = 0; c < nValidCW; c += 2)
{
FeatChunk ch = (FeatChunk) templateChunks.get(c);
ch.setFeatures(DSP.minus(ch.getFeatures(), delta));
templateChunks.set(c, ch);
FeatChunk newch = new FeatChunk("templateChunk" + c, 0, 0, null);
newch.setFeatures(DSP.plus(ch.getFeatures(), delta));
templateChunks.add(c + 1, newch);
}
double currTotalDist = 0;
double prevTotalDist = Double.MAX_VALUE;
do
{
prevTotalDist = currTotalDist;
currTotalDist = 0;
for (int c = 0; c < nValidCW; c++)
{
FeatChunk cw = (FeatChunk) templateChunks.get(c);
for (int n = 0; n < ndat; n++)
{
FeatChunk ch = (FeatChunk) trainFile.chunks.get(n);
distortion[c][n] = dist.distance(cw, ch);
currTotalDist += distortion[c][n];
}
}
// quantize
for (int n = 0; n < ndat; n++)
idx[n] = DSP.argmin(DSP.getColumn(distortion, n));
// update means
double[] newCW = new double[ndim];
for (int c = 0; c < nValidCW; c++)
{
FeatChunk ch = (FeatChunk) templateChunks.get(c);
Arrays.fill(newCW, 0);
int nmatch = 0;
for (int n = 0; n < ndat; n++)
{
if (idx[n] == c)
{
nmatch++;
for (int i = 0; i < ndim; i++)
newCW[i] += features[n][i];
}
}
if (nmatch != 0)
ch.setFeatures(DSP.rdivide(newCW, nmatch));
}
if (debug)
System.out.println(" distortion = "
+ Math.abs(currTotalDist - prevTotalDist));
}