import java.io.*;
public class TransliterationChart {
public static void main(String[] args) throws IOException {
System.out.println("Start");
UnicodeSet lengthMarks = new UnicodeSet("[\u09D7\u0B56-\u0B57\u0BD7\u0C56\u0CD5-\u0CD6\u0D57\u0C55\u0CD5]");
int[] indicScripts = {
UScript.LATIN,
UScript.DEVANAGARI,
UScript.BENGALI,
UScript.GURMUKHI,
UScript.GUJARATI,
UScript.ORIYA,
UScript.TAMIL,
UScript.TELUGU,
UScript.KANNADA,
UScript.MALAYALAM,
};
String[] names = new String[indicScripts.length];
UnicodeSet[] sets = new UnicodeSet[indicScripts.length];
Transliterator[] fallbacks = new Transliterator[indicScripts.length];
for (int i = 0; i < indicScripts.length; ++i) {
names[i] = UScript.getName(indicScripts[i]);
sets[i] = new UnicodeSet("[[:" + names[i] + ":]&[[:L:][:M:]]&[:age=3.1:]]");
fallbacks[i] = Transliterator.getInstance("any-" + names[i]);
}
EquivClass eq = new EquivClass(new ReverseComparator());
PrintWriter pw = openPrintWriter("transChart.html");
pw.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
pw.println("<title>Indic Transliteration Chart</title><style>");
pw.println("td { text-align: Center; font-size: 200% }");
pw.println("tt { font-size: 50% }");
pw.println("td.miss { background-color: #CCCCFF }");
pw.println("</style></head><body bgcolor='#FFFFFF'>");
Transliterator anyToLatin = Transliterator.getInstance("any-latin");
String testString = "\u0946\u093E";
UnicodeSet failNorm = new UnicodeSet();
Set latinFail = new TreeSet();
for (int i = 0; i < indicScripts.length; ++i) {
if (indicScripts[i] == UScript.LATIN) continue;
String source = names[i];
System.out.println(source);
UnicodeSet sourceChars = sets[i];
for (int j = 0; j < indicScripts.length; ++j) {
if (i == j) continue;
String target = names[j];
Transliterator forward = Transliterator.getInstance(source + '-' + target);
Transliterator backward = forward.getInverse();
UnicodeSetIterator it = new UnicodeSetIterator(sourceChars);
while (it.next()) {
if (lengthMarks.contains(it.codepoint)) continue;
String s = Normalizer.normalize(it.codepoint,Normalizer.NFC,0);
//if (!Normalizer.isNormalized(s,Normalizer.NFC,0)) continue;
if (!s.equals(Normalizer.normalize(s,Normalizer.NFD,0))) {
failNorm.add(it.codepoint);
}
String t = fix(forward.transliterate(s));
if (t.equals(testString)) {
System.out.println("debug");
}
String r = fix(backward.transliterate(t));
if (Normalizer.compare(s,r,0) == 0) {
if (indicScripts[j] != UScript.LATIN) eq.add(s,t);
} else {
if (indicScripts[j] == UScript.LATIN) {
latinFail.add(s + " - " + t + " - " + r);
}
}
}
}
}
// collect equivalents
pw.println("<table border='1' cellspacing='0'><tr>");
for (int i = 0; i < indicScripts.length; ++i) {
pw.print("<th width='10%'>" + names[i].substring(0,3) + "</th>");
}
pw.println("</tr>");
Iterator rit = eq.getSetIterator(new MyComparator());
while(rit.hasNext()) {
Set equivs = (Set)rit.next();
pw.print("<tr>");
Iterator sit = equivs.iterator();
String source = (String)sit.next();
String item = anyToLatin.transliterate(source);
if (item.equals("") || source.equals(item)) item = " ";
pw.print("<td>" + item + "</td>");
for (int i = 1; i < indicScripts.length; ++i) {
sit = equivs.iterator();
item = "";
while (sit.hasNext()) {
String trial = (String)sit.next();
if (!sets[i].containsAll(trial)) continue;
item = trial;
break;
}
String classString = "";
if (item.equals("")) {
classString = " class='miss'";
String temp = fallbacks[i].transliterate(source);
if (!temp.equals("") && !temp.equals(source)) item = temp;
}
String backup = item.equals("") ? " " : item;
pw.print("<td" + classString + " title='" + getName(item, "; ") + "'>"
+ backup + "<br><tt>" + Utility.hex(item) + "</tt></td>");
}
/*
Iterator sit = equivs.iterator();
while (sit.hasNext()) {
String item = (String)sit.next();
pw.print("<td>" + item + "</td>");
}
*/
pw.println("</tr>");
}
pw.println("</table>");
if (true) {
pw.println("<h2>Failed Normalization</h2>");
UnicodeSetIterator it = new UnicodeSetIterator(failNorm);
UnicodeSet pieces = new UnicodeSet();
while (it.next()) {
String s = UTF16.valueOf(it.codepoint);
String d = Normalizer.normalize(s,Normalizer.NFD,0);
pw.println("Norm:" + s + ", " + Utility.hex(s) + " " + UCharacter.getName(it.codepoint)
+ "; " + d + ", " + Utility.hex(d) + ", ");
pw.println(UCharacter.getName(d.charAt(1)) + "<br>");
if (UCharacter.getName(d.charAt(1)).indexOf("LENGTH") >= 0) pieces.add(d.charAt(1));
}
pw.println(pieces);
pw.println("<h2>Failed Round-Trip</h2>");
Iterator cit = latinFail.iterator();