Package org.exist.xquery.functions.fn

Source Code of org.exist.xquery.functions.fn.FunNormalizeUnicode

/* eXist Native XML Database
* Copyright (C) 2006-2009, The eXist Project
* http://exist-db.org/
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU Library General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* $Id$
*/

package org.exist.xquery.functions.fn;

import org.apache.log4j.Logger;

import org.exist.dom.QName;
import org.exist.xquery.Cardinality;
import org.exist.xquery.Dependency;
import org.exist.xquery.ErrorCodes;
import org.exist.xquery.Function;
import org.exist.xquery.FunctionSignature;
import org.exist.xquery.Profiler;
import org.exist.xquery.XPathException;
import org.exist.xquery.XQueryContext;
import org.exist.xquery.value.FunctionParameterSequenceType;
import org.exist.xquery.value.FunctionReturnSequenceType;
import org.exist.xquery.value.Item;
import org.exist.xquery.value.Sequence;
import org.exist.xquery.value.SequenceType;
import org.exist.xquery.value.StringValue;
import org.exist.xquery.value.Type;

import java.lang.reflect.Constructor;
import java.lang.reflect.Field;
import java.lang.reflect.Method;

/**
* Implements fn:normalize-unicode()
* Uses icu4j by introspection
*
* @author perig
*
*/
public class FunNormalizeUnicode extends Function {
    protected static final Logger logger = Logger.getLogger(FunNormalizeUnicode.class);

  private String normalizationForm = null;
  private Class<?> clazz = null
  private Field  modeField = null;
  private Object modeObject = null;
  private static final Integer DUMMY_INTEGER = Integer.valueOf(0);
  private Constructor<?> constructor = null;
  private Method method = null;
 
  protected static final String FUNCTION_DESCRIPTION_0_PARAM =
        "Returns the value of the context item normalized according to the " +
    "nomalization form \"NFC\"\n\n";
  protected static final String FUNCTION_DESCRIPTION_1_PARAM =
    "Returns the value of $arg normalized according to the " +
    "normalization criteria for a normalization form identified " +
    "by the value of $normalization-form. The effective value of " +
    "the $normalization-form is computed by removing leading and " +
    "trailing blanks, if present, and converting to upper case.\n\n" +
    "If the value of $arg is the empty sequence, returns the zero-length string.\n\n" +
    "See [Character Model for the World Wide Web 1.0: Normalization] " +
    "for a description of the normalization forms.\n\n" +

    "- If the effective value of $normalization-form is \"NFC\", then the value " +
    "returned by the function is the value of $arg in Unicode Normalization Form C (NFC).\n" +
    "- If the effective value of $normalization-form is \"NFD\", then the value " +
    "returned by the function is the value of $arg in Unicode Normalization Form D (NFD).\n" +
    "- If the effective value of $normalization-form is \"NFKC\", then the value " +
    "returned by the function is the value of $arg in Unicode Normalization Form KC (NFKC).\n" +
    "- If the effective value of $normalization-form is \"NFKD\", then the value " +
    "returned by the function is the value of $arg in Unicode Normalization Form KD (NFKD).\n" +
    "- If the effective value of $normalization-form is \"FULLY-NORMALIZED\", then the value " +
    "returned by the function is the value of $arg in the fully normalized form.\n" +
    "- If the effective value of $normalization-form is the zero-length string, " +
    "no normalization is performed and $arg is returned.\n\n" +
    "Conforming implementations must support normalization form \"NFC\" and may " +
    "support normalization forms \"NFD\", \"NFKC\", \"NFKD\", \"FULLY-NORMALIZED\". " +
    "They may also support other normalization forms with implementation-defined semantics. " +
    "If the effective value of the $normalization-form is other than one of the values " +
    "supported by the implementation, then an error is raised [err:FOCH0003].";

  protected static final FunctionParameterSequenceType ARG_PARAM = new FunctionParameterSequenceType("arg", Type.STRING, Cardinality.ZERO_OR_ONE, "The unicode string to normalize");
  protected static final FunctionParameterSequenceType NF_PARAM = new FunctionParameterSequenceType("normalization-form", Type.STRING, Cardinality.ONE, "The normalization form");
  protected static final FunctionReturnSequenceType RETURN_TYPE = new FunctionReturnSequenceType(Type.STRING, Cardinality.ONE, "the normalized text");

  public final static FunctionSignature signatures [] = {
      new FunctionSignature(
        new QName("normalize-unicode", Function.BUILTIN_FUNCTION_NS),
        FUNCTION_DESCRIPTION_0_PARAM,
        new SequenceType[] { ARG_PARAM },
        RETURN_TYPE
      ),
      new FunctionSignature (
          new QName("normalize-unicode", Function.BUILTIN_FUNCTION_NS),
        FUNCTION_DESCRIPTION_1_PARAM,
        new SequenceType[] { ARG_PARAM, NF_PARAM },
        RETURN_TYPE
    )
  };

    public FunNormalizeUnicode(XQueryContext context, FunctionSignature signature) {
      super(context, signature);
    }

    public Sequence eval(Sequence contextSequence, Item contextItem) throws XPathException {
        if (context.getProfiler().isEnabled()) {
            context.getProfiler().start(this);      
            context.getProfiler().message(this, Profiler.DEPENDENCIES, "DEPENDENCIES", Dependency.getDependenciesName(this.getDependencies()));
            if (contextSequence != null)
                {context.getProfiler().message(this, Profiler.START_SEQUENCES, "CONTEXT SEQUENCE", contextSequence);}
            if (contextItem != null)
                {context.getProfiler().message(this, Profiler.START_SEQUENCES, "CONTEXT ITEM", contextItem.toSequence());}
        }
       
    if (contextItem != null)
        {contextSequence = contextItem.toSequence();}
   
        Sequence result;

        final Sequence s1 = getArgument(0).eval(contextSequence);
        if (s1.isEmpty())
            {result = StringValue.EMPTY_STRING;}
        else {
            String newNormalizationForm = "NFC";
      if (getArgumentCount() > 1)
        {newNormalizationForm = getArgument(1).eval(contextSequence).getStringValue().toUpperCase().trim();}
      //TODO : handle the "FULLY-NORMALIZED" string...
      if ("".equals(newNormalizationForm))
        {result =  new StringValue(s1.getStringValue());}
      else {
        Object returnedObject = null;
        try {
              if (clazz == null)
                {clazz = Class.forName("com.ibm.icu.text.Normalizer");}
              if (modeField == null || !normalizationForm.equals(newNormalizationForm)) {
                try {
                  modeField = clazz.getField(newNormalizationForm);
                } catch (final NoSuchFieldException e) {
                            logger.error("err:FOCH0003: unknown normalization form");
                  throw new XPathException(this, ErrorCodes.FOCH0003, "unknown normalization form");
                }
                  //com.ibm.icu.text.Normalizer.Mode
                  modeObject = modeField.get(null);
                normalizationForm = newNormalizationForm;
              }
              if (constructor == null)
                //Second argument shouldn't be a problem : modeField always has the same type
                  {constructor = clazz.getConstructor(
                      new Class[] { String.class, modeField.getType(), Integer.TYPE}
                    );}
              final Object[] args = new Object[] { s1.getStringValue(), modeObject, DUMMY_INTEGER };
              if (method == null)
                {method = clazz.getMethod( "getText", (Class[])null );}
 
              //Normalizer n = new Normalizer(s1.getStringValue(), Normalizer.NFC, 0);
              final Object instance = constructor.newInstance(args);
              //result = new StringValue(n.getText());
              returnedObject = method.invoke( instance, (Object[])null );
            } catch (final Exception e) {
                    logger.error("Can not find the ICU4J library in the classpath " + e.getMessage());
              throw new XPathException(this, "Can not find the ICU4J library in the classpath " + e.getMessage());
            }
            result = new StringValue((String)returnedObject);
      }
        }
       
        if (context.getProfiler().isEnabled())
            {context.getProfiler().end(this, "", result);}
       
        return result;       
    }

}
TOP

Related Classes of org.exist.xquery.functions.fn.FunNormalizeUnicode

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.