// Obtain the total number of documents and the document frequencies
int numDocuments = wordList.getNumDocuments();
int[] docFrequencies = wordList.getDocumentFrequencies();
// Create the result structure
WVTWordVector result = new WVTWordVector();
double[] wv = new double[docFrequencies.length];
// Create the vector
// If the document contains at least one term
if (numTermOccurences > 0) {
double length = 0.0;
for (int i = 0; i < wv.length; i++) {
// Note: docFrequencies[i] is always > 0 as otherwise the word
// would not be in the word list, it is also always smaller as
// the total number of documents
double idf = Math.log(((double) numDocuments) / ((double) docFrequencies[i]));
wv[i] = (((double) frequencies[i]) / ((double) numTermOccurences)) * idf;
length = length + wv[i] * wv[i];
}
length = Math.sqrt(length);
// Normalize the vector
if (length > 0.0)
for (int i = 0; i < wv.length; i++)
wv[i] = wv[i] / length;
} else
for (int i = 0; i < wv.length; i++)
wv[i] = 0.0;
result.setDocumentInfo(d);
result.setValues(wv);
return result;
}