// String[] alphabet = { null, "a", "ā", "b", "g", "h", "j", "l", "m",
// "n", "r", "s", "t", "y", "#" };
// StringReader sr = new StringReader("caggtcagtcacggtatca#");
// String[] alphabet = { null, "a", "c", "g", "t", "#" };
Tokenizer<String> tokenizer = new RegexTokenizer(sr, "[\\W\\w]", new LowerCaseNormalizer());
Corpus<String> corpus = new TreeMapCorpus(tokenizer, alphabet);
System.out.println("alphabet size " + corpus.alphabetSize());
System.out.println("text size " + corpus.sequence().size());
System.out.println("text " + corpus.sequence());
System.out.println("alphabet " + corpus.alphabet());