// String[] alphabet = { null, "a", "c", "g", "t", "#" };
Tokenizer<String> tokenizer = new RegexTokenizer(sr, "[\\W\\w]", new LowerCaseNormalizer());
Corpus<String> corpus = new TreeMapCorpus(tokenizer, alphabet);
System.out.println("alphabet size " + corpus.alphabetSize());
System.out.println("text size " + corpus.sequence().size());
System.out.println("text " + corpus.sequence());
System.out.println("alphabet " + corpus.alphabet());
Iterator<String> iter = corpus.iterator(corpus.sequence());
while (iter.hasNext())
System.out.print(iter.next());