}
// we have to carefully output the possibilities as compact utf-16
// range expressions, or jflex will OOM!
static void outputMacro(String name, String pattern) {
UnicodeSet set = new UnicodeSet(pattern);
set.removeAll(BMP);
System.out.println(name + " = (");
// if the set is empty, we have to do this or jflex will barf
if (set.isEmpty()) {
System.out.println("\t []");
}
HashMap<Character,UnicodeSet> utf16ByLead = new HashMap<Character,UnicodeSet>();
for (UnicodeSetIterator it = new UnicodeSetIterator(set); it.next();) {
char utf16[] = Character.toChars(it.codepoint);
UnicodeSet trails = utf16ByLead.get(utf16[0]);
if (trails == null) {
trails = new UnicodeSet();
utf16ByLead.put(utf16[0], trails);
}
trails.add(utf16[1]);
}
Map<String,UnicodeSet> utf16ByTrail = new HashMap<String,UnicodeSet>();
for (Map.Entry<Character,UnicodeSet> entry : utf16ByLead.entrySet()) {
String trail = entry.getValue().getRegexEquivalent();
UnicodeSet leads = utf16ByTrail.get(trail);
if (leads == null) {
leads = new UnicodeSet();
utf16ByTrail.put(trail, leads);
}
leads.add(entry.getKey());
}
boolean isFirst = true;
for (Map.Entry<String,UnicodeSet> entry : utf16ByTrail.entrySet()) {
System.out.print( isFirst ? "\t " : "\t| ");