/*
**********************************************************************
* Copyright (c) 2002-2011, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Author: Alan Liu
* Created: November 5 2002
* Since: ICU 2.4
* 2010nov19 Markus Scherer Rewrite for formatVersion 2.
**********************************************************************
*/
package com.ibm.icu.impl;
import java.io.BufferedInputStream;
import java.io.DataInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.MissingResourceException;
import com.ibm.icu.lang.UProperty;
import com.ibm.icu.util.BytesTrie;
/**
* Wrapper for the pnames.icu binary data file. This data file is imported from icu4c. It contains property and property value aliases from
* the UCD files PropertyAliases.txt and PropertyValueAliases.txt. The file is built by the icu4c tool genpname. It must be an ASCII
* big-endian file to be usable in icu4j.
*
* This class performs two functions.
*
* (1) It can import the flat binary data into usable objects.
*
* (2) It provides an API to access the tree of objects.
*
* Needless to say, this class is tightly coupled to the binary format of icu4c's pnames.icu file.
*
* Each time a UPropertyAliases is constructed, the pnames.icu file is read, parsed, and data structures assembled. Clients should create
* one singleton instance and cache it.
*
* @author Alan Liu
* @since ICU 2.4
*/
@SuppressWarnings("deprecation")
public final class UPropertyAliases {
// Byte offsets from the start of the data, after the generic header.
private static final int IX_VALUE_MAPS_OFFSET = 0;
private static final int IX_BYTE_TRIES_OFFSET = 1;
private static final int IX_NAME_GROUPS_OFFSET = 2;
private static final int IX_RESERVED3_OFFSET = 3;
// private static final int IX_RESERVED4_OFFSET=4;
// private static final int IX_TOTAL_SIZE=5;
// Other values.
// private static final int IX_MAX_NAME_LENGTH=6;
// private static final int IX_RESERVED7=7;
// private static final int IX_COUNT=8;
//----------------------------------------------------------------
// Runtime data. This is an unflattened representation of the
// data in pnames.icu.
private int[] valueMaps;
private byte[] bytesTries;
private String nameGroups;
private static final class IsAcceptable implements ICUBinary.Authenticate {
// @Override when we switch to Java 6
public boolean isDataVersionAcceptable(final byte version[]) {
return version[0] == 2;
}
}
private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable();
private static final byte DATA_FORMAT[] = { 0x70, 0x6E, 0x61, 0x6D }; // "pnam"
private void load(final InputStream data) throws IOException {
BufferedInputStream bis = new BufferedInputStream(data);
//dataVersion=ICUBinary.readHeaderAndDataVersion(bis, DATA_FORMAT, IS_ACCEPTABLE);
ICUBinary.readHeader(bis, DATA_FORMAT, IS_ACCEPTABLE);
DataInputStream ds = new DataInputStream(bis);
int indexesLength = ds.readInt() / 4; // inIndexes[IX_VALUE_MAPS_OFFSET]/4
if (indexesLength < 8) { // formatVersion 2 initially has 8 indexes
throw new IOException("pnames.icu: not enough indexes");
}
int[] inIndexes = new int[indexesLength];
inIndexes[0] = indexesLength * 4;
for (int i = 1; i < indexesLength; ++i) {
inIndexes[i] = ds.readInt();
}
// Read the valueMaps.
int offset = inIndexes[IX_VALUE_MAPS_OFFSET];
int nextOffset = inIndexes[IX_BYTE_TRIES_OFFSET];
int numInts = (nextOffset - offset) / 4;
valueMaps = new int[numInts];
for (int i = 0; i < numInts; ++i) {
valueMaps[i] = ds.readInt();
}
// Read the bytesTries.
offset = nextOffset;
nextOffset = inIndexes[IX_NAME_GROUPS_OFFSET];
int numBytes = nextOffset - offset;
bytesTries = new byte[numBytes];
ds.readFully(bytesTries);
// Read the nameGroups and turn them from ASCII bytes into a Java String.
offset = nextOffset;
nextOffset = inIndexes[IX_RESERVED3_OFFSET];
numBytes = nextOffset - offset;
StringBuilder sb = new StringBuilder(numBytes);
for (int i = 0; i < numBytes; ++i) {
sb.append((char) ds.readByte());
}
nameGroups = sb.toString();
data.close();
}
private UPropertyAliases() throws IOException {
load(ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE + "/pnames.icu"));
}
private int findProperty(final int property) {
int i = 1; // valueMaps index, initially after numRanges
for (int numRanges = valueMaps[0]; numRanges > 0; --numRanges) {
// Read and skip the start and limit of this range.
int start = valueMaps[i];
int limit = valueMaps[i + 1];
i += 2;
if (property < start) {
break;
}
if (property < limit) {
return i + (property - start) * 2;
}
i += (limit - start) * 2; // Skip all entries for this range.
}
return 0;
}
private int findPropertyValueNameGroup(int valueMapIndex, final int value) {
if (valueMapIndex == 0) {
return 0; // The property does not have named values.
}
++valueMapIndex; // Skip the BytesTrie offset.
int numRanges = valueMaps[valueMapIndex++];
if (numRanges < 0x10) {
// Ranges of values.
for (; numRanges > 0; --numRanges) {
// Read and skip the start and limit of this range.
int start = valueMaps[valueMapIndex];
int limit = valueMaps[valueMapIndex + 1];
valueMapIndex += 2;
if (value < start) {
break;
}
if (value < limit) {
return valueMaps[valueMapIndex + value - start];
}
valueMapIndex += limit - start; // Skip all entries for this range.
}
} else {
// List of values.
int valuesStart = valueMapIndex;
int nameGroupOffsetsStart = valueMapIndex + numRanges - 0x10;
do {
int v = valueMaps[valueMapIndex];
if (value < v) {
break;
}
if (value == v) {
return valueMaps[nameGroupOffsetsStart + valueMapIndex - valuesStart];
}
} while (++valueMapIndex < nameGroupOffsetsStart);
}
return 0;
}
private String getName(int nameGroupsIndex, int nameIndex) {
int numNames = nameGroups.charAt(nameGroupsIndex++);
if (nameIndex < 0 || numNames <= nameIndex) {
throw new IllegalIcuArgumentException("Invalid property (value) name choice");
}
// Skip nameIndex names.
for (; nameIndex > 0; --nameIndex) {
while (0 != nameGroups.charAt(nameGroupsIndex++)) {
}
}
// Find the end of this name.
int nameStart = nameGroupsIndex;
while (0 != nameGroups.charAt(nameGroupsIndex)) {
++nameGroupsIndex;
}
if (nameStart == nameGroupsIndex) {
return null; // no name (Property[Value]Aliases.txt has "n/a")
}
return nameGroups.substring(nameStart, nameGroupsIndex);
}
private static int asciiToLowercase(final int c) {
return 'A' <= c && c <= 'Z' ? c + 0x20 : c;
}
private boolean containsName(final BytesTrie trie, final CharSequence name) {
BytesTrie.Result result = BytesTrie.Result.NO_VALUE;
for (int i = 0; i < name.length(); ++i) {
int c = name.charAt(i);
// Ignore delimiters '-', '_', and ASCII White_Space.
if (c == '-' || c == '_' || c == ' ' || (0x09 <= c && c <= 0x0d)) {
continue;
}
if (!result.hasNext()) {
return false;
}
c = asciiToLowercase(c);
result = trie.next(c);
}
return result.hasValue();
}
//----------------------------------------------------------------
// Public API
public static final UPropertyAliases INSTANCE;
static {
try {
INSTANCE = new UPropertyAliases();
} catch (IOException e) {
///CLOVER:OFF
MissingResourceException mre = new MissingResourceException("Could not construct UPropertyAliases. Missing pnames.icu", "", "");
mre.initCause(e);
throw mre;
///CLOVER:ON
}
}
/**
* Returns a property name given a property enum. Multiple names may be available for each property; the nameChoice selects among them.
*/
public String getPropertyName(final int property, final int nameChoice) {
int valueMapIndex = findProperty(property);
if (valueMapIndex == 0) {
throw new IllegalArgumentException("Invalid property enum " + property + " (0x" + Integer.toHexString(property) + ")");
}
return getName(valueMaps[valueMapIndex], nameChoice);
}
/**
* Returns a value name given a property enum and a value enum. Multiple names may be available for each value; the nameChoice selects
* among them.
*/
public String getPropertyValueName(final int property, final int value, final int nameChoice) {
int valueMapIndex = findProperty(property);
if (valueMapIndex == 0) {
throw new IllegalArgumentException("Invalid property enum " + property + " (0x" + Integer.toHexString(property) + ")");
}
int nameGroupOffset = findPropertyValueNameGroup(valueMaps[valueMapIndex + 1], value);
if (nameGroupOffset == 0) {
throw new IllegalArgumentException("Property " + property + " (0x" + Integer.toHexString(property)
+ ") does not have named values");
}
return getName(nameGroupOffset, nameChoice);
}
private int getPropertyOrValueEnum(final int bytesTrieOffset, final CharSequence alias) {
BytesTrie trie = new BytesTrie(bytesTries, bytesTrieOffset);
if (containsName(trie, alias)) {
return trie.getValue();
} else {
return UProperty.UNDEFINED;
}
}
/**
* Returns a property enum given one of its property names. If the property name is not known, this method returns UProperty.UNDEFINED.
*/
public int getPropertyEnum(final CharSequence alias) {
return getPropertyOrValueEnum(0, alias);
}
/**
* Returns a value enum given a property enum and one of its value names.
*/
public int getPropertyValueEnum(final int property, final CharSequence alias) {
int valueMapIndex = findProperty(property);
if (valueMapIndex == 0) {
throw new IllegalArgumentException("Invalid property enum " + property + " (0x" + Integer.toHexString(property) + ")");
}
valueMapIndex = valueMaps[valueMapIndex + 1];
if (valueMapIndex == 0) {
throw new IllegalArgumentException("Property " + property + " (0x" + Integer.toHexString(property)
+ ") does not have named values");
}
// valueMapIndex is the start of the property's valueMap,
// where the first word is the BytesTrie offset.
return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias);
}
/**
* Compare two property names, returning <0, 0, or >0. The comparison is that described as "loose" matching in the Property*Aliases.txt
* files.
*/
public static int compare(final String stra, final String strb) {
// Note: This implementation is a literal copy of
// uprv_comparePropertyNames. It can probably be improved.
int istra = 0, istrb = 0, rc;
int cstra = 0, cstrb = 0;
for (;;) {
/* Ignore delimiters '-', '_', and ASCII White_Space */
while (istra < stra.length()) {
cstra = stra.charAt(istra);
switch (cstra) {
case '-':
case '_':
case ' ':
case '\t':
case '\n':
case 0xb/*\v*/:
case '\f':
case '\r':
++istra;
continue;
}
break;
}
while (istrb < strb.length()) {
cstrb = strb.charAt(istrb);
switch (cstrb) {
case '-':
case '_':
case ' ':
case '\t':
case '\n':
case 0xb/*\v*/:
case '\f':
case '\r':
++istrb;
continue;
}
break;
}
/* If we reach the ends of both strings then they match */
boolean endstra = istra == stra.length();
boolean endstrb = istrb == strb.length();
if (endstra) {
if (endstrb)
return 0;
cstra = 0;
} else if (endstrb) {
cstrb = 0;
}
rc = asciiToLowercase(cstra) - asciiToLowercase(cstrb);
if (rc != 0) {
return rc;
}
++istra;
++istrb;
}
}
}