* @throws Exception
*/
public WVTWordList createWordList(WVTInputList input, WVTConfiguration config, List initialWords, boolean addWords) throws WVToolException {
// Initialize the word list
WVTWordList wordList = new WVTWordList(initialWords, input.getNumClasses());
wordList.setAppendWords(addWords);
wordList.setUpdateOnlyCurrent(false);
// Initialize pointers to components for the individual steps
WVTDocumentLoader loader = null;
WVTInputFilter infilter = null;
WVTCharConverter charConverter = null;
WVTTokenizer tokenizer = null;
WVTWordFilter wordFilter = null;
WVTStemmer stemmer = null;
// Obtain an expanded list of all documents to consider
Iterator inList = input.getEntries();
// Get through the list
while (inList.hasNext()) {
WVTDocumentInfo d = (WVTDocumentInfo) inList.next();
try {
// Intialize all required components for this document
loader = (WVTDocumentLoader) config.getComponentForStep(WVTConfiguration.STEP_LOADER, d);
infilter = (WVTInputFilter) config.getComponentForStep(WVTConfiguration.STEP_INPUT_FILTER, d);
charConverter = (WVTCharConverter) config.getComponentForStep(WVTConfiguration.STEP_CHAR_MAPPER, d);
tokenizer = (WVTTokenizer) config.getComponentForStep(WVTConfiguration.STEP_TOKENIZER, d);
wordFilter = (WVTWordFilter) config.getComponentForStep(WVTConfiguration.STEP_WORDFILTER, d);
stemmer = (WVTStemmer) config.getComponentForStep(WVTConfiguration.STEP_STEMMER, d);
// Process the document
TokenEnumeration tokens = stemmer.stem(wordFilter.filter(tokenizer.tokenize(charConverter.convertChars(infilter.convertToPlainText(loader.loadDocument(d), d), d), d), d), d);
while (tokens.hasMoreTokens()) {
wordList.addWordOccurance(tokens.nextToken());
}
wordList.closeDocument(d);
loader.close(d);
} catch (WVToolException e) {
WVToolLogger.getGlobalLogger().logException("Problems processing document " + d.getSourceName(), e);