// Use a HashSet to calculate the total vocabulary size
Set<String> vocabulary = new HashSet<String>();
// Read tuples from generate job
for(FileStatus fileStatus : fileSystem.globStatus(generatedModel)) {
TupleFile.Reader reader = new TupleFile.Reader(fileSystem, conf, fileStatus.getPath());
Tuple tuple = new Tuple(reader.getSchema());
while(reader.next(tuple)) {
// Read Tuple
Integer count = (Integer) tuple.get("count");
Category category = (Category) tuple.get("category");
String word = tuple.get("word").toString();
vocabulary.add(word);
tokensPerCategory.put(category, MapUtils.getInteger(tokensPerCategory, category, 0) + count);
wordCountPerCategory.get(category).put(word, count);
}
}