266267268269270271272273
* Tokenizes the document using a WordTokenizer. This is not the final version. * */ protected void tokenize() throws IOException { String txt = getText(); WordTokenizer tok = new WordTokenizer(getLocale()); tokenSpans = tok.getTokenSpans(txt); }