BagOfWordsData bagOfWordsData, BagOfWordsDecorator bDecorator) {
FastVector wekaAttributes = new FastVector(bagOfWordsData
.getNumericWords().size()
+ bagOfWordsData.getNominalWordValueMap().size() + 2);
// add instance id attribute
wekaAttributes.addElement(new Attribute(INSTANCE_ID));
// add numeric word attributes
for (String word : bagOfWordsData.getNumericWords()) {
Attribute attribute = new Attribute(word);
wekaAttributes.addElement(attribute);
}
// add nominal word attributes
for (Map.Entry<String, SortedSet<String>> nominalWordEntry : bagOfWordsData
.getNominalWordValueMap().entrySet()) {
FastVector wordValues = new FastVector(nominalWordEntry.getValue()
.size());
for (String wordValue : nominalWordEntry.getValue()) {
wordValues.addElement(wordValue);
}
Attribute attribute = new Attribute(nominalWordEntry.getKey(),
wordValues);
wekaAttributes.addElement(attribute);
}
// add class attribute
FastVector wekaClassLabels = new FastVector(bagOfWordsData.getClasses()
.size());
for (String classLabel : bagOfWordsData.getClasses()) {
wekaClassLabels.addElement(classLabel);
}
wekaAttributes.addElement(new Attribute(CLASS, wekaClassLabels));
Instances instances = new Instances(arffRelation, wekaAttributes, 0);
instances.setClassIndex(instances.numAttributes() - 1);
return instances;
}