private static final Pattern SPACE_DIGITS= Pattern.compile("([\\d]) ([\\d])",Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CASE);
public CatalanWordTokenizer() {
tagger = new CatalanTagger();
// Apostrophe at the beginning of a word. Ex.: l'home, s'estima, n'omple, hivern, etc.
// It creates 2 tokens: <token>l'</token><token>home</token>
patterns[0] = Pattern.compile("^([lnmtsd]')([^'\\-]*)$",Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CASE);