* @(#)$Id: CollationUtils.java 3619 2008-03-26 07:23:03Z yui $
* Copyright 2006-2008 Makoto YUI
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
* Contributors:
* Makoto YUI - initial implementation
package xbird.util.resource;
import java.io.InputStream;
import java.net.URI;
import java.net.URL;
import java.text.*;
import java.util.Locale;
import xbird.util.io.IOUtils;
import xbird.xquery.XQueryConstants;
import xbird.xquery.meta.StaticContext;
* Collator construction utility.
* <DIV lang="en">
* <pre>
* Resolving <code>Collator</code> strategy is as following:
* (1) If arg is null or "http://www.w3.org/2005/04/xpath-functions/collation/codepoint",
* assumed to use default collation, and return null.
* (2) If arg is start with "http:://xbird.metabrick.org/collation?",
* assumed to use using system collation.
* (3) Else, try to read arg URI and constructs <code>RuleBasedCollator</code>
* with the respond content as collation rule.
* (4) If fail to construct a collation in the step (1) to (3), return null.
* System collation form: <i>SYSTEM_COLLATION?GETS_ARGS</i>
* [1] <i>SYSTEM_COLLATION</i> = http:://xbird.metabrick.org/collation?
* - Precisely saying, the value defined in {@link CollationUtils#SYSTEM_COLLATION}.
* [2] <i>GETS_ARGS</i> := <i>LOCALE</i>("#"<i>COLOPTS</i> )*
* [3] <i>LOCALE</i> := <i>LANG</i>"-"<i>COUNTRY</i>"-"<i>VARIANT</i>
* - LOCALE(LANG, COUNTRY, VARIANT) constructs <code>java.util.Locale</code>.
* [4] <i>COLOPTS</i> := <i>STLENGTH</i> | <i>DECOMPOSITION</i>
* [5] <i>STLENGTH</i> := "primary" | "secondary" | "tertiary"
* [6] <i>DECOMPOSITION</i> := "canonical" | "NFD" | "full" | "NFKD"
* - "canonical" and "NFD" means <a href="http://www.unicode.org/unicode/reports/tr15/#Canonical_Composition_Examples">CANONICAL_DECOMPOSITION</a>.
* - "full" and "NFKD" means <a href="http://www.unicode.org/unicode/reports/tr15/#Compatibility_Composition_Examples">FULL_DECOMPOSITION</a>.
* Cautions:
* - parameter is overrided within last specified.
* </pre>
* </DIV>
* <DIV lang="ja"></DIV>
* @author Makoto YUI (yuin405+xbird@gmail.com)
* @see Locale
* @see Collator
* @see RuleBasedCollator
public final class CollationUtils {
public static final String SYSTEM_COLLATION = "http:://xbird/collation?";
private CollationUtils() {}
public static Collator resolve(URI uri, StaticContext statEnv) {
return resolve(uri.toString(), statEnv);
public static Collator resolve(String uri, StaticContext statEnv) {
if(uri == null) {
uri = statEnv.getDefaultCollation();
if(uri == null || uri.equals(XQueryConstants.UNICODE_CODEPOINT_COLLATION)) {
return null;
int pos = uri.indexOf(SYSTEM_COLLATION);
if(pos > 0) {
String getsArg = uri.substring(SYSTEM_COLLATION.length());
final int sharppos = getsArg.indexOf('#');
String qloc = (sharppos > 0) ? getsArg.substring(0, sharppos - 1) : getsArg;
String args[] = qloc.split("-");
final Locale loc;
final int loclen = args.length;
switch(loclen) {
case 1:
loc = new Locale(args[0]);
case 2:
loc = new Locale(args[0], args[1]);
case 3:
loc = new Locale(args[0], args[1], args[2]);
throw new IllegalArgumentException("invalid collation uri: " + uri);
final Collator col = Collator.getInstance(loc);
if(sharppos > 0) {
if(getsArg.indexOf("#primary") > 0) {
} else if(getsArg.indexOf("#secondary") > 0) {
} else if(getsArg.indexOf("#tertiary") > 0) {
} else if(getsArg.indexOf("#identical") > 0) {
} else if(getsArg.indexOf("#canonical") > 0 || getsArg.indexOf("#NFD") > 0) {
} else if(getsArg.indexOf("#full") > 0 || getsArg.indexOf("#NFKD") > 0) {
return col;
} else {
final Collator col;
try {
URI baseuri = statEnv.getBaseURI();
final URI targetUri;
if(baseuri == null) {
targetUri = URI.create(uri);
} else {
targetUri = baseuri.resolve(uri);
URL targetUrl = targetUri.toURL();
InputStream is = targetUrl.openStream();
String rules = IOUtils.toString(is);
col = new RuleBasedCollator(rules);
} catch (Exception e) {
return null;
return col;
public static int indexOf(final String src, final String pattern, final RuleBasedCollator collator) {
if(collator == null) {
return src.indexOf(pattern);
} else {
return indexOf(src, pattern, collator, false);
public static boolean endsWith(final String src, final String pattern, final RuleBasedCollator collator) {
if(collator == null) {
return src.endsWith(pattern);
} else {
return indexOf(src, pattern, collator, true) != -1;
private static int indexOf(final String src, final String pattern, final RuleBasedCollator collator, boolean endsWith) {
assert (collator != null);
CollationElementIterator srcItor = collator.getCollationElementIterator(src);
CollationElementIterator patternItor = collator.getCollationElementIterator(pattern);
final int ptnlen = patternItor.getOffset();
int seeked = 0;
boolean more = true;
int srccp, ptncp = -1;
for(int i = 0; (srccp = srcItor.next()) != CollationElementIterator.NULLORDER; i++) {
if(more) {
ptncp = patternItor.next();
if(ptncp == CollationElementIterator.NULLORDER) {
if(endsWith) {
if(srcItor.next() != CollationElementIterator.NULLORDER) {
return -1;
return i - 1;
if(srccp != ptncp) {
more = false;
seeked = 0;
} else {
if(seeked == ptnlen) {
if(endsWith) {
if(srcItor.next() != CollationElementIterator.NULLORDER) {
return -1;
return i;
more = true;
return -1;