/**
* Copyright (C) 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
*
* Author: Oliver Hitz
*
* This file is part of GNU Libidn.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.Iterator;
import java.util.StringTokenizer;
import java.util.regex.Pattern;
public class GenerateNFKC
{
static String stripComment(String in)
{
int c = in.indexOf('#');
if (c == -1) {
return in;
} else {
return in.substring(0, c);
}
}
static String[] split(String in, char sep)
{
StringBuffer sb = new StringBuffer(in);
int c = 0;
for (int i = 0; i < sb.length(); i++) {
if (sb.charAt(i) == sep) {
c++;
}
}
String out[] = new String[c+1];
c = 0;
int l = 0;
for (int i = 0; i < sb.length(); i++) {
if (sb.charAt(i) == sep) {
if (l >= i) {
out[c] = "";
} else {
out[c] = sb.substring(l, i);
}
l = i+1;
c++;
}
}
if (l < sb.length()) {
out[c] = sb.substring(l);
}
return out;
}
static boolean isCompatibilityMapping(String in)
{
return in.length() > 0 && in.charAt(0) == '<';
}
static String stripCompatibilityTag(String in)
{
return in.substring(in.indexOf('>')+2);
}
static String toJavaString(String in)
{
StringBuffer out = new StringBuffer();
String[] chars = split(in, ' ');
for (int i = 0; i < chars.length; i++) {
if (chars[i].equals("005C")) {
out.append("\\\\");
} else if (chars[i].equals("0022")) {
out.append("\\\"");
} else {
out.append("\\u");
out.append(chars[i]);
}
}
return out.toString();
}
static String decompose(String in, TreeMap mappings)
{
StringBuffer out = new StringBuffer("");
String[] c = split(in, ' ');
for (int i = 0; i < c.length; i++) {
if (mappings.containsKey(c[i])) {
if (out.length() > 0) {
out.append(" ");
}
out.append(decompose((String) mappings.get(c[i]), mappings));
} else {
if (out.length() > 0) {
out.append(" ");
}
out.append(c[i]);
}
}
return out.toString();
}
public static void main(String[] arg)
throws Exception
{
// Check if the unicode files exist
{
File f1 = new File("CompositionExclusions.txt");
File f2 = new File("UnicodeData.txt");
if (!f1.exists() || !f2.exists()) {
System.err.println("Unable to find UnicodeData.txt or CompositionExclusions.txt.");
System.err.println("Please download the latest version of these file from:");
System.err.println("http://www.unicode.org/Public/UNIDATA/");
System.exit(1);
}
}
// Read CompositionExclusions
TreeSet exclusions = new TreeSet();
{
BufferedReader r = new BufferedReader(new FileReader("CompositionExclusions.txt"));
String line;
while (null != (line = r.readLine())) {
line = stripComment(line);
line = line.trim();
if (line.length() == 0) {
// Empty line
} else if (line.length() == 4) {
exclusions.add(line);
} else {
// Skip code points > 0xffff
}
}
r.close();
}
// Read UnicodeData
TreeMap canonical = new TreeMap();
TreeMap compatibility = new TreeMap();
TreeMap combiningClasses = new TreeMap();
{
BufferedReader r = new BufferedReader(new FileReader("UnicodeData.txt"));
String line;
while (null != (line = r.readLine())) {
line = stripComment(line);
line = line.trim();
if (line.length() == 0) {
// Empty line
} else {
String[] f = split(line, ';');
if (f[0].length() == 4) {
if (!f[5].equals("")) {
if (isCompatibilityMapping(f[5])) {
compatibility.put(f[0], stripCompatibilityTag(f[5]));
} else {
compatibility.put(f[0], f[5]);
if (!exclusions.contains(f[0])) {
canonical.put(f[0], f[5]);
}
}
}
if (!f[3].equals("0")) {
combiningClasses.put(new Integer(Integer.parseInt(f[0], 16)), f[3]);
}
} else {
// Skip code points > 0xffff
}
}
}
r.close();
}
// Recursively apply compatibility mappings
while (true) {
boolean replaced = false;
Iterator i = compatibility.keySet().iterator();
while (i.hasNext()) {
String k = (String) i.next();
String v = (String) compatibility.get(k);
String d = decompose(v, compatibility);
if (!d.equals(v)) {
replaced = true;
compatibility.put(k, d);
}
}
if (!replaced) {
break;
}
}
// Eliminate duplicate mappings
TreeMap compatibilityKeys = new TreeMap();
ArrayList compatibilityMappings = new ArrayList();
{
Iterator i = compatibility.keySet().iterator();
while (i.hasNext()) {
String k = (String) i.next();
String v = (String) compatibility.get(k);
int index = compatibilityMappings.indexOf(v);
if (index == -1) {
index = compatibilityMappings.size();
compatibilityMappings.add(v);
}
compatibilityKeys.put(k, new Integer(index));
}
}
// Create composition tables
TreeMap firstMap = new TreeMap();
TreeMap secondMap = new TreeMap();
{
Iterator i = canonical.keySet().iterator();
while (i.hasNext()) {
String k = (String) i.next();
String v = (String) canonical.get(k);
String[] s = split(v, ' ');
if (s.length == 2) {
// If both characters have the same combining class, they
// won't be combined (in the sequence AB, B is blocked from
// A if both have the same combining class)
String cc1 = (String) combiningClasses.get(new Integer(Integer.parseInt(s[0], 16)));
String cc2 = (String) combiningClasses.get(new Integer(Integer.parseInt(s[1], 16)));
if (cc1 != null || (cc1 != null && cc1.equals(cc2))) {
// Ignore this composition
i.remove();
continue;
}
if (firstMap.containsKey(s[0])) {
Integer c = (Integer) firstMap.get(s[0]);
firstMap.put(s[0], new Integer(c.intValue()+1));
} else {
firstMap.put(s[0], new Integer(1));
}
if (secondMap.containsKey(s[1])) {
Integer c = (Integer) secondMap.get(s[1]);
secondMap.put(s[1], new Integer(c.intValue()+1));
} else {
secondMap.put(s[1], new Integer(1));
}
} else if (s.length > 2) {
System.err.println("? wrong canonical mapping for "+k);
System.exit(1);
}
}
}
TreeMap singleFirstComposition = new TreeMap();
TreeMap singleSecondComposition = new TreeMap();
TreeMap complexComposition = new TreeMap();
int composeLookupMax = 0;
{
Iterator i = canonical.keySet().iterator();
while (i.hasNext()) {
String k = (String) i.next();
String v = (String) canonical.get(k);
String[] s = split(v, ' ');
if (s.length == 2) {
Integer first = (Integer) firstMap.get(s[0]);
Integer second = (Integer) secondMap.get(s[1]);
if (first.intValue() == 1) {
singleFirstComposition.put(s[0], new String[] { s[1], k });
composeLookupMax = Math.max(composeLookupMax, Integer.parseInt(s[0], 16));
} else if (second.intValue() == 1) {
singleSecondComposition.put(s[1], new String[] { s[0], k });
composeLookupMax = Math.max(composeLookupMax, Integer.parseInt(s[1], 16));
} else {
if (complexComposition.containsKey(s[0])) {
TreeMap m = (TreeMap) complexComposition.get(s[0]);
if (m.containsKey(s[1])) {
System.err.println("? ambiguous canonical mapping for "+s[0]);
System.exit(1);
}
m.put(s[1], k);
} else {
TreeMap m = new TreeMap();
m.put(s[1], k);
complexComposition.put(s[0], m);
}
composeLookupMax = Math.max(composeLookupMax, Integer.parseInt(s[0], 16));
composeLookupMax = Math.max(composeLookupMax, Integer.parseInt(s[1], 16));
}
}
}
}
System.out.print("Creating CombiningClass.java...");
// Dump combining classes
{
PrintWriter w = new PrintWriter(new FileWriter("CombiningClass.java"));
w.println("/* Do not edit - automatically generated */");
w.println();
w.println("public class CombiningClass");
w.println("{");
w.println(" public final static int[][] c = new int[][] {");
StringBuffer index = new StringBuffer();
int count = 0;
for (int i = 0; i < 256; i++) {
boolean empty = true;
StringBuffer page = new StringBuffer();
page.append(" { /* Page "+i+" */");
for (int j = 0; j < 256; j++) {
Integer c = new Integer((i << 8) + j);
String cc = (String) combiningClasses.get(c);
if (0 == (j & 31)) {
page.append("\n ");
}
if (cc == null) {
page.append("0, ");
} else {
page.append(cc+", ");
empty = false;
}
}
page.append("\n },");
index.append(" ");
if (!empty) {
w.println(page.toString());
index.append(count++);
index.append(",\n");
} else {
index.append("-1,\n");
}
}
w.println(" };\n");
w.println(" public final static int[] i = new int[] {");
w.print(index.toString());
w.println(" };");
w.println("}");
w.close();
}
System.out.println(" Ok.");
System.out.print("Creating DecompositionKeys.java...");
// Dump compatibility decomposition
{
PrintWriter w = new PrintWriter(new FileWriter("DecompositionKeys.java"));
w.println("/* Do not edit - automatically generated */");
w.println();
w.println("public class DecompositionKeys");
w.println("{");
w.println(" public final static int[] k = new int[] {");
Iterator i = compatibilityKeys.keySet().iterator();
while (i.hasNext()) {
String k = (String) i.next();
int index = ((Integer) compatibilityKeys.get(k)).intValue();
w.println(" '\\u"+k+"', "+index+",");
}
w.println(" };");
w.println("}");
w.close();
}
System.out.println(" Ok.");
System.out.print("Creating DecompositionMappings.java...");
{
PrintWriter w = new PrintWriter(new FileWriter("DecompositionMappings.java"));
w.println("/* Do not edit - automatically generated */");
w.println();
w.println("public class DecompositionMappings");
w.println("{");
w.println(" public final static String[] m = new String[] {");
Iterator i = compatibilityMappings.iterator();
while (i.hasNext()) {
String m = (String) i.next();
w.println(" \""+toJavaString(m)+"\",");
}
w.println(" };");
w.println("}");
w.close();
}
System.out.println(" Ok.");
System.out.print("Creating Composition.java...");
// Dump canonical composition
{
PrintWriter w = new PrintWriter(new FileWriter("Composition.java"));
w.println("/* Do not edit - automatically generated */");
w.println();
w.println("public class Composition");
w.println("{");
Iterator i;
int index = 0;
TreeMap indices = new TreeMap();
i = complexComposition.keySet().iterator();
while (i.hasNext()) {
String s0 = (String) i.next();
indices.put(new Integer(Integer.parseInt(s0, 16)), new Integer(index));
index++;
}
int multiSecondStart = index;
w.println(" public final static char[][] multiFirst = new char[][] {");
i = complexComposition.keySet().iterator();
while (i.hasNext()) {
String s0 = (String) i.next();
TreeMap m = (TreeMap) complexComposition.get(s0);
TreeMap line = new TreeMap();
int maxIndex = 1;
Iterator i2 = m.keySet().iterator();
while (i2.hasNext()) {
String s1 = (String) i2.next();
String k = (String) m.get(s1);
Integer s1i = new Integer(Integer.parseInt(s1, 16));
if (!indices.containsKey(s1i)) {
indices.put(s1i, new Integer(index));
index++;
}
line.put(indices.get(s1i), k);
maxIndex = Math.max(maxIndex, ((Integer) indices.get(s1i)).intValue());
}
w.print(" { ");
for (int j = multiSecondStart; j <= maxIndex; j++) {
if (line.containsKey(new Integer(j))) {
String s = (String) line.get(new Integer(j));
w.print("'"+toJavaString(s)+"', ");
} else {
w.print(" 0, ");
}
}
w.println("},");
}
w.println(" };");
int singleFirstStart = index;
w.println(" public final static char[][] singleFirst = new char[][] {");
i = singleFirstComposition.keySet().iterator();
while (i.hasNext()) {
String k = (String) i.next();
String[] v = ((String[]) singleFirstComposition.get(k));
w.println(" { '"+toJavaString(v[0])+"', '"+toJavaString(v[1])+"' },");
if (indices.containsKey(new Integer(Integer.parseInt(k, 16)))) {
System.out.println(k+" already indexed!");
}
indices.put(new Integer(Integer.parseInt(k, 16)), new Integer(index));
index++;
}
w.println(" };");
int singleSecondStart = index;
w.println(" public final static char[][] singleSecond = new char[][] {");
i = singleSecondComposition.keySet().iterator();
while (i.hasNext()) {
String k = (String) i.next();
String[] v = ((String[]) singleSecondComposition.get(k));
w.println(" { '"+toJavaString(v[0])+"', '"+toJavaString(v[1])+"' },");
indices.put(new Integer(Integer.parseInt(k, 16)), new Integer(index));
index++;
}
w.println(" };");
w.println(" public final static int multiSecondStart = "+multiSecondStart+";");
w.println(" public final static int singleFirstStart = "+singleFirstStart+";");
w.println(" public final static int singleSecondStart = "+singleSecondStart+";");
StringBuffer compositionPages = new StringBuffer();
w.println(" public final static int[] composePage = new int[] {");
int pageCount = 0;
for (int j = 0; j*256 < composeLookupMax+255; j++) {
boolean empty = true;
StringBuffer page = new StringBuffer();
for (int k = 0; k < 256; k++) {
if (k % 16 == 0) {
page.append("\n ");
}
if (indices.containsKey(new Integer(j*256+k))) {
page.append(indices.get(new Integer(j*256+k)));
page.append(", ");
empty = false;
} else {
page.append("-1, ");
}
}
if (empty) {
w.println(" -1,");
} else {
w.println(" "+pageCount+",");
compositionPages.append(" {");
compositionPages.append(page);
compositionPages.append("\n },\n");
pageCount++;
}
}
w.println(" };");
w.println(" public final static int[][] composeData = new int[][] {");
w.print(compositionPages);
w.println(" };");
w.println("}");
w.close();
}
System.out.println(" Ok.");
}
}