String input = "㌰゙5℃№㈱㌘ザゾ";
CharFilter reader = new ICUNormalizer2CharFilter(new StringReader(input),
Normalizer2.getInstance(null, "nfkc_cf", Normalizer2.Mode.COMPOSE));
Tokenizer tokenStream = new NGramTokenizer(TEST_VERSION_CURRENT, newAttributeFactory(), reader, 1, 1);
assertTokenStreamContents(tokenStream,
new String[] {"ピ", "ゴ", "5", "°", "c", "n", "o", "(", "株", ")", "グ", "ラ", "ム", "ザ", "ゾ"},
new int[]{0, 1, 2, 3, 3, 4, 4, 5, 5, 5, 6, 6, 6, 7, 9},
new int[]{1, 2, 3, 3, 4, 4, 5, 5, 5, 6, 6, 6, 7, 9, 11},