Source Code of com.google.caja.parser.Normalizer

// Copyright (C) 2008 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


package com.google.caja.parser;


import com.google.caja.SomethingWidgyHappenedError;


import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;


/**
 * Provides a similar interface to {@code java.text.Normalizer}, but will
 * compile and run on JDK1.5 in a stricter mode.
 *
 * @author mikesamuel@gmail.com
 */
final class Normalizer {
  private static final Method IS_NORMALIZED;
  private static final Object NORMAL_FORM_C;


  static {
    Method isNormalized = null;
    Object normalFormC = null;
    try {
      Class<?> normalizer = Class.forName("java.text.Normalizer");
      Class<?> normalizerForm = Class.forName("java.text.Normalizer$Form");


      isNormalized = normalizer.getMethod(
          "isNormalized", CharSequence.class, normalizerForm);
      normalFormC = normalizerForm.getField("NFC").get(null);
    } catch (ClassNotFoundException ex) {
      // JVM versions < 1.5 don't provide Normalizer.
      // Use heuristic below.
    } catch (IllegalAccessException ex) {
      throw new SomethingWidgyHappenedError(
          "Normalizer exists but is unexpectedly inaccessible", ex);
    } catch (NoSuchFieldException ex) {
      // AppEngine doesn't provide Normalizer.Form.
      // Use heuristic below.
      throw new SomethingWidgyHappenedError(
          "Normalizer.Form unexpectedly missing", ex);
    } catch (NoSuchMethodException ex) {
        // Don't use the normalizer.
        // Use heuristic below.
      throw new SomethingWidgyHappenedError(
          "Normalizer unexpectedly missing methods", ex);
    }


    IS_NORMALIZED = isNormalized;
    NORMAL_FORM_C = normalFormC;
  }


  /**
   * A conservative heuristic as to whether s is normalized according to Unicode
   * Normal Form C.  It is heuristic, because Caja needs to run with versions
   * of the Java standard libraries that do not include normalization.
   * @return false if s is not normalized.
   */
  public static boolean isNormalized(CharSequence s) {
    if (IS_NORMALIZED != null) {
      try {
        return ((Boolean) IS_NORMALIZED.invoke(null, s, NORMAL_FORM_C))
            .booleanValue();
      } catch (IllegalAccessException ex) {
        throw new SomethingWidgyHappenedError(
            "Normalizer unexpectedly uninvokable", ex);
      } catch (InvocationTargetException ex) {
        Throwable th = ex.getTargetException();
          throw new SomethingWidgyHappenedError(
              "Normalizer unexpectedly uninvokable", th);
      }
    }


    // From http://unicode.org/reports/tr15/#D6
    // Legacy character sets are classified into three categories
    // based on their normalization behavior with accepted
    // transcoders.
    // 1. Prenormalized. Any string in the character set is already in
    //    Normalization Form X.
    //    For example, ISO 8859-1 is prenormalized in NFC.
    // ...
    for (int i = s.length(); --i >= 0;) {
      char ch = s.charAt(i);
      // Codepoints in [32, 126] U [160, 255] are identical in both Unicode and
      // ISO 8859-1.
      // Codepoints in [0, 31] and [127, 159] are not part of ISO 8859-1.  They
      // are control characters in Unicode, and disallowed in identifiers so
      // will never reach here.
      if (ch >= 256) { return false; }
    }
    return true;
  }
}
Source Code of com.google.caja.parser.Normalizer

Related Classes of com.google.caja.parser.Normalizer