/* eXist Native XML Database
* Copyright (C) 2006-2009, The eXist Project
* http://exist-db.org/
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Library General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* $Id$
*/
package org.exist.xquery.functions.fn;
import org.apache.log4j.Logger;
import org.exist.dom.QName;
import org.exist.xquery.Cardinality;
import org.exist.xquery.Dependency;
import org.exist.xquery.ErrorCodes;
import org.exist.xquery.Function;
import org.exist.xquery.FunctionSignature;
import org.exist.xquery.Profiler;
import org.exist.xquery.XPathException;
import org.exist.xquery.XQueryContext;
import org.exist.xquery.value.FunctionParameterSequenceType;
import org.exist.xquery.value.FunctionReturnSequenceType;
import org.exist.xquery.value.Item;
import org.exist.xquery.value.Sequence;
import org.exist.xquery.value.SequenceType;
import org.exist.xquery.value.StringValue;
import org.exist.xquery.value.Type;
import java.lang.reflect.Constructor;
import java.lang.reflect.Field;
import java.lang.reflect.Method;
/**
* Implements fn:normalize-unicode()
* Uses icu4j by introspection
*
* @author perig
*
*/
public class FunNormalizeUnicode extends Function {
protected static final Logger logger = Logger.getLogger(FunNormalizeUnicode.class);
private String normalizationForm = null;
private Class<?> clazz = null;
private Field modeField = null;
private Object modeObject = null;
private static final Integer DUMMY_INTEGER = Integer.valueOf(0);
private Constructor<?> constructor = null;
private Method method = null;
protected static final String FUNCTION_DESCRIPTION_0_PARAM =
"Returns the value of the context item normalized according to the " +
"nomalization form \"NFC\"\n\n";
protected static final String FUNCTION_DESCRIPTION_1_PARAM =
"Returns the value of $arg normalized according to the " +
"normalization criteria for a normalization form identified " +
"by the value of $normalization-form. The effective value of " +
"the $normalization-form is computed by removing leading and " +
"trailing blanks, if present, and converting to upper case.\n\n" +
"If the value of $arg is the empty sequence, returns the zero-length string.\n\n" +
"See [Character Model for the World Wide Web 1.0: Normalization] " +
"for a description of the normalization forms.\n\n" +
"- If the effective value of $normalization-form is \"NFC\", then the value " +
"returned by the function is the value of $arg in Unicode Normalization Form C (NFC).\n" +
"- If the effective value of $normalization-form is \"NFD\", then the value " +
"returned by the function is the value of $arg in Unicode Normalization Form D (NFD).\n" +
"- If the effective value of $normalization-form is \"NFKC\", then the value " +
"returned by the function is the value of $arg in Unicode Normalization Form KC (NFKC).\n" +
"- If the effective value of $normalization-form is \"NFKD\", then the value " +
"returned by the function is the value of $arg in Unicode Normalization Form KD (NFKD).\n" +
"- If the effective value of $normalization-form is \"FULLY-NORMALIZED\", then the value " +
"returned by the function is the value of $arg in the fully normalized form.\n" +
"- If the effective value of $normalization-form is the zero-length string, " +
"no normalization is performed and $arg is returned.\n\n" +
"Conforming implementations must support normalization form \"NFC\" and may " +
"support normalization forms \"NFD\", \"NFKC\", \"NFKD\", \"FULLY-NORMALIZED\". " +
"They may also support other normalization forms with implementation-defined semantics. " +
"If the effective value of the $normalization-form is other than one of the values " +
"supported by the implementation, then an error is raised [err:FOCH0003].";
protected static final FunctionParameterSequenceType ARG_PARAM = new FunctionParameterSequenceType("arg", Type.STRING, Cardinality.ZERO_OR_ONE, "The unicode string to normalize");
protected static final FunctionParameterSequenceType NF_PARAM = new FunctionParameterSequenceType("normalization-form", Type.STRING, Cardinality.ONE, "The normalization form");
protected static final FunctionReturnSequenceType RETURN_TYPE = new FunctionReturnSequenceType(Type.STRING, Cardinality.ONE, "the normalized text");
public final static FunctionSignature signatures [] = {
new FunctionSignature(
new QName("normalize-unicode", Function.BUILTIN_FUNCTION_NS),
FUNCTION_DESCRIPTION_0_PARAM,
new SequenceType[] { ARG_PARAM },
RETURN_TYPE
),
new FunctionSignature (
new QName("normalize-unicode", Function.BUILTIN_FUNCTION_NS),
FUNCTION_DESCRIPTION_1_PARAM,
new SequenceType[] { ARG_PARAM, NF_PARAM },
RETURN_TYPE
)
};
public FunNormalizeUnicode(XQueryContext context, FunctionSignature signature) {
super(context, signature);
}
public Sequence eval(Sequence contextSequence, Item contextItem) throws XPathException {
if (context.getProfiler().isEnabled()) {
context.getProfiler().start(this);
context.getProfiler().message(this, Profiler.DEPENDENCIES, "DEPENDENCIES", Dependency.getDependenciesName(this.getDependencies()));
if (contextSequence != null)
{context.getProfiler().message(this, Profiler.START_SEQUENCES, "CONTEXT SEQUENCE", contextSequence);}
if (contextItem != null)
{context.getProfiler().message(this, Profiler.START_SEQUENCES, "CONTEXT ITEM", contextItem.toSequence());}
}
if (contextItem != null)
{contextSequence = contextItem.toSequence();}
Sequence result;
final Sequence s1 = getArgument(0).eval(contextSequence);
if (s1.isEmpty())
{result = StringValue.EMPTY_STRING;}
else {
String newNormalizationForm = "NFC";
if (getArgumentCount() > 1)
{newNormalizationForm = getArgument(1).eval(contextSequence).getStringValue().toUpperCase().trim();}
//TODO : handle the "FULLY-NORMALIZED" string...
if ("".equals(newNormalizationForm))
{result = new StringValue(s1.getStringValue());}
else {
Object returnedObject = null;
try {
if (clazz == null)
{clazz = Class.forName("com.ibm.icu.text.Normalizer");}
if (modeField == null || !normalizationForm.equals(newNormalizationForm)) {
try {
modeField = clazz.getField(newNormalizationForm);
} catch (final NoSuchFieldException e) {
logger.error("err:FOCH0003: unknown normalization form");
throw new XPathException(this, ErrorCodes.FOCH0003, "unknown normalization form");
}
//com.ibm.icu.text.Normalizer.Mode
modeObject = modeField.get(null);
normalizationForm = newNormalizationForm;
}
if (constructor == null)
//Second argument shouldn't be a problem : modeField always has the same type
{constructor = clazz.getConstructor(
new Class[] { String.class, modeField.getType(), Integer.TYPE}
);}
final Object[] args = new Object[] { s1.getStringValue(), modeObject, DUMMY_INTEGER };
if (method == null)
{method = clazz.getMethod( "getText", (Class[])null );}
//Normalizer n = new Normalizer(s1.getStringValue(), Normalizer.NFC, 0);
final Object instance = constructor.newInstance(args);
//result = new StringValue(n.getText());
returnedObject = method.invoke( instance, (Object[])null );
} catch (final Exception e) {
logger.error("Can not find the ICU4J library in the classpath " + e.getMessage());
throw new XPathException(this, "Can not find the ICU4J library in the classpath " + e.getMessage());
}
result = new StringValue((String)returnedObject);
}
}
if (context.getProfiler().isEnabled())
{context.getProfiler().end(this, "", result);}
return result;
}
}