Package com.ibm.icu.impl

Source Code of com.ibm.icu.impl.Trie2_16

/*
*******************************************************************************
* Copyright (C) 2009-2010, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*/
package com.ibm.icu.impl;

import java.io.DataOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;

/**
* @author aheninger
*
*         A read-only Trie2, holding 16 bit data values.
*
*         A Trie2 is a highly optimized data structure for mapping from Unicode code points (values ranging from 0 to 0x10ffff) to a 16 or
*         32 bit value.
*
*         See class Trie2 for descriptions of the API for accessing the contents of a trie.
*
*         The fundamental data access methods are declared final in this class, with the intent that applications might gain a little extra
*         performance, when compared with calling the same methods via the abstract UTrie2 base class.
*/
public final class Trie2_16 extends Trie2 {

  /**
   * Internal constructor, not for general use.
   */
  Trie2_16() {
  }

  /**
   * Create a Trie2 from its serialized form. Inverse of utrie2_serialize(). The serialized format is identical between ICU4C and ICU4J,
   * so this function will work with serialized Trie2s from either.
   *
   * The serialized Trie2 on the stream may be in either little or big endian byte order. This allows using serialized Tries from ICU4C
   * without needing to consider the byte order of the system that created them.
   *
   * @param is
   *            an input stream to the serialized form of a UTrie2.
   * @return An unserialized Trie_16, ready for use.
   * @throws IllegalArgumentException
   *             if the stream does not contain a serialized Trie2.
   * @throws IOException
   *             if a read error occurs on the InputStream.
   * @throws ClassCastException
   *             if the stream contains a serialized Trie2_32
   */
  public static Trie2_16 createFromSerialized(InputStream is) throws IOException {
    return (Trie2_16) Trie2.createFromSerialized(is);
  }

  /**
   * Get the value for a code point as stored in the Trie2.
   *
   * @param codePoint
   *            the code point
   * @return the value
   */
  @Override
  public final int get(int codePoint) {
    int value;
    int ix;

    if (codePoint >= 0) {
      if (codePoint < 0x0d800 || (codePoint > 0x0dbff && codePoint <= 0x0ffff)) {
        // Ordinary BMP code point, excluding leading surrogates.
        // BMP uses a single level lookup.  BMP index starts at offset 0 in the Trie2 index.
        // 16 bit data is stored in the index array itself.
        ix = index[codePoint >> UTRIE2_SHIFT_2];
        ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK);
        value = index[ix];
        return value;
      }
      if (codePoint <= 0xffff) {
        // Lead Surrogate Code Point.  A Separate index section is stored for
        // lead surrogate code units and code points.
        //   The main index has the code unit data.
        //   For this function, we need the code point data.
        // Note: this expression could be refactored for slightly improved efficiency, but
        //       surrogate code points will be so rare in practice that it's not worth it.
        ix = index[UTRIE2_LSCP_INDEX_2_OFFSET + ((codePoint - 0xd800) >> UTRIE2_SHIFT_2)];
        ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK);
        value = index[ix];
        return value;
      }
      if (codePoint < highStart) {
        // Supplemental code point, use two-level lookup.
        ix = (UTRIE2_INDEX_1_OFFSET - UTRIE2_OMITTED_BMP_INDEX_1_LENGTH) + (codePoint >> UTRIE2_SHIFT_1);
        ix = index[ix];
        ix += (codePoint >> UTRIE2_SHIFT_2) & UTRIE2_INDEX_2_MASK;
        ix = index[ix];
        ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK);
        value = index[ix];
        return value;
      }
      if (codePoint <= 0x10ffff) {
        value = index[highValueIndex];
        return value;
      }
    }

    // Fall through.  The code point is outside of the legal range of 0..0x10ffff.
    return errorValue;
  }

  /**
   * Get a Trie2 value for a UTF-16 code unit.
   *
   * This function returns the same value as get() if the input character is outside of the lead surrogate range
   *
   * There are two values stored in a Trie2 for inputs in the lead surrogate range. This function returns the alternate value, while
   * Trie2.get() returns the main value.
   *
   * @param codeUnit
   *            a 16 bit code unit or lead surrogate value.
   * @return the value
   */
  @Override
  public int getFromU16SingleLead(char codeUnit) {
    int value;
    int ix;

    // Because the input is a 16 bit char, we can skip the tests for it being in
    // the BMP range.  It is.
    ix = index[codeUnit >> UTRIE2_SHIFT_2];
    ix = (ix << UTRIE2_INDEX_SHIFT) + (codeUnit & UTRIE2_DATA_MASK);
    value = index[ix];
    return value;
  }

  /**
   * Serialize a Trie2_16 onto an OutputStream.
   *
   * A Trie2 can be serialized multiple times. The serialized data is compatible with ICU4C UTrie2 serialization. Trie2 serialization is
   * unrelated to Java object serialization.
   *
   * @param os
   *            the stream to which the serialized Trie2 data will be written.
   * @return the number of bytes written.
   * @throw IOException on an error writing to the OutputStream.
   */
  public int serialize(OutputStream os) throws IOException {
    DataOutputStream dos = new DataOutputStream(os);
    int bytesWritten = 0;

    bytesWritten += serializeHeader(dos);
    for (int i = 0; i < dataLength; i++) {
      dos.writeChar(index[data16 + i]);
    }
    bytesWritten += dataLength * 2;
    return bytesWritten;
  }

  /**
   * @return the number of bytes of the serialized trie
   */
  public int getSerializedLength() {
    return 16 + (header.indexLength + dataLength) * 2;
  }

  /**
   * Given a starting code point, find the last in a range of code points, all with the same value.
   *
   * This function is part of the implementation of iterating over the Trie2's contents.
   *
   * @param startingCP
   *            The code point at which to begin looking.
   * @return The last code point with the same value as the starting code point.
   */
  @Override
  int rangeEnd(int startingCP, int limit, int value) {
    int cp = startingCP;
    int block = 0;
    int index2Block = 0;

    // Loop runs once for each of
    //   - a partial data block
    //   - a reference to the null (default) data block.
    //   - a reference to the index2 null block

    outerLoop: for (;;) {
      if (cp >= limit) {
        break;
      }
      if (cp < 0x0d800 || (cp > 0x0dbff && cp <= 0x0ffff)) {
        // Ordinary BMP code point, excluding leading surrogates.
        // BMP uses a single level lookup.  BMP index starts at offset 0 in the Trie2 index.
        // 16 bit data is stored in the index array itself.
        index2Block = 0;
        block = index[cp >> UTRIE2_SHIFT_2] << UTRIE2_INDEX_SHIFT;
      } else if (cp < 0xffff) {
        // Lead Surrogate Code Point, 0xd800 <= cp < 0xdc00
        index2Block = UTRIE2_LSCP_INDEX_2_OFFSET;
        block = index[index2Block + ((cp - 0xd800) >> UTRIE2_SHIFT_2)] << UTRIE2_INDEX_SHIFT;
      } else if (cp < highStart) {
        // Supplemental code point, use two-level lookup.
        int ix = (UTRIE2_INDEX_1_OFFSET - UTRIE2_OMITTED_BMP_INDEX_1_LENGTH) + (cp >> UTRIE2_SHIFT_1);
        index2Block = index[ix];
        block = index[index2Block + ((cp >> UTRIE2_SHIFT_2) & UTRIE2_INDEX_2_MASK)] << UTRIE2_INDEX_SHIFT;
      } else {
        // Code point above highStart.
        if (value == index[highValueIndex]) {
          cp = limit;
        }
        break;
      }

      if (index2Block == index2NullOffset) {
        if (value != initialValue) {
          break;
        }
        cp += UTRIE2_CP_PER_INDEX_1_ENTRY;
      } else if (block == dataNullOffset) {
        // The block at dataNullOffset has all values == initialValue.
        // Because Trie2 iteration always proceeds in ascending order, we will always
        //   encounter a null block at its beginning, and can skip over
        //   a number of code points equal to the length of the block.
        if (value != initialValue) {
          break;
        }
        cp += UTRIE2_DATA_BLOCK_LENGTH;
      } else {
        // Current position refers to an ordinary data block.
        // Walk over the data entries, checking the values.
        int startIx = block + (cp & UTRIE2_DATA_MASK);
        int limitIx = block + UTRIE2_DATA_BLOCK_LENGTH;
        for (int ix = startIx; ix < limitIx; ix++) {
          if (index[ix] != value) {
            // We came to an entry with a different value.
            //   We are done.
            cp += (ix - startIx);
            break outerLoop;
          }
        }
        // The ordinary data block contained our value until its end.
        //  Advance the current code point, and continue the outerloop.
        cp += limitIx - startIx;
      }
    }
    if (cp > limit) {
      cp = limit;
    }

    return cp - 1;
  }
}
TOP

Related Classes of com.ibm.icu.impl.Trie2_16

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.