Package java.text

Source Code of java.text.BreakIterator$BreakIteratorCache

* @(#)  1.43 06/06/08
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.

* (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
* (C) Copyright IBM Corp. 1996 - 1998 - All Rights Reserved
* The original version of this source code and documentation
* is copyrighted and owned by Taligent, Inc., a wholly-owned
* subsidiary of IBM. These materials are provided under terms
* of a License Agreement between Taligent and Sun. This technology
* is protected by multiple US and International patents.
* This notice and attribution to Taligent may not be removed.
* Taligent is a registered trademark of Taligent, Inc.

package java.text;

import java.lang.ref.SoftReference;
import java.text.CharacterIterator;
import java.text.StringCharacterIterator;
import java.text.spi.BreakIteratorProvider;
import java.util.Locale;
import java.util.MissingResourceException;
import java.util.ResourceBundle;
import java.util.spi.LocaleServiceProvider;
import sun.util.LocaleServiceProviderPool;
import sun.util.resources.LocaleData;

* The <code>BreakIterator</code> class implements methods for finding
* the location of boundaries in text. Instances of <code>BreakIterator</code>
* maintain a current position and scan over text
* returning the index of characters where boundaries occur.
* Internally, <code>BreakIterator</code> scans text using a
* <code>CharacterIterator</code>, and is thus able to scan text held
* by any object implementing that protocol. A <code>StringCharacterIterator</code>
* is used to scan <code>String</code> objects passed to <code>setText</code>.
* <p>
* You use the factory methods provided by this class to create
* instances of various types of break iterators. In particular,
* use <code>getWordIterator</code>, <code>getLineIterator</code>,
* <code>getSentenceIterator</code>, and <code>getCharacterIterator</code>
* to create <code>BreakIterator</code>s that perform
* word, line, sentence, and character boundary analysis respectively.
* A single <code>BreakIterator</code> can work only on one unit
* (word, line, sentence, and so on). You must use a different iterator
* for each unit boundary analysis you wish to perform.
* <p><a name="line"></a>
* Line boundary analysis determines where a text string can be
* broken when line-wrapping. The mechanism correctly handles
* punctuation and hyphenated words. Actual line breaking needs
* to also consider the available line width and is handled by
* higher-level software.
* <p><a name="sentence"></a>
* Sentence boundary analysis allows selection with correct interpretation
* of periods within numbers and abbreviations, and trailing punctuation
* marks such as quotation marks and parentheses.
* <p><a name="word"></a>
* Word boundary analysis is used by search and replace functions, as
* well as within text editing applications that allow the user to
* select words with a double click. Word selection provides correct
* interpretation of punctuation marks within and following
* words. Characters that are not part of a word, such as symbols
* or punctuation marks, have word-breaks on both sides.
* <p><a name="character"></a>
* Character boundary analysis allows users to interact with characters
* as they expect to, for example, when moving the cursor through a text
* string. Character boundary analysis provides correct navigation
* through character strings, regardless of how the character is stored.
* The boundaries returned may be those of supplementary characters,
* combining character sequences, or ligature clusters.
* For example, an accented character might be stored as a base character
* and a diacritical mark. What users consider to be a character can
* differ between languages.
* <p>
* The <code>BreakIterator</code> instances returned by the factory methods
* of this class are intended for use with natural languages only, not for
* programming language text. It is however possible to define subclasses
* that tokenize a programming language.
* <P>
* <strong>Examples</strong>:<P>
* Creating and using text boundaries:
* <blockquote>
* <pre>
* public static void main(String args[]) {
*      if (args.length == 1) {
*          String stringToExamine = args[0];
*          //print each word in order
*          BreakIterator boundary = BreakIterator.getWordInstance();
*          boundary.setText(stringToExamine);
*          printEachForward(boundary, stringToExamine);
*          //print each sentence in reverse order
*          boundary = BreakIterator.getSentenceInstance(Locale.US);
*          boundary.setText(stringToExamine);
*          printEachBackward(boundary, stringToExamine);
*          printFirst(boundary, stringToExamine);
*          printLast(boundary, stringToExamine);
*      }
* }
* </pre>
* </blockquote>
* Print each element in order:
* <blockquote>
* <pre>
* public static void printEachForward(BreakIterator boundary, String source) {
*     int start = boundary.first();
*     for (int end =;
*          end != BreakIterator.DONE;
*          start = end, end = {
*          System.out.println(source.substring(start,end));
*     }
* }
* </pre>
* </blockquote>
* Print each element in reverse order:
* <blockquote>
* <pre>
* public static void printEachBackward(BreakIterator boundary, String source) {
*     int end = boundary.last();
*     for (int start = boundary.previous();
*          start != BreakIterator.DONE;
*          end = start, start = boundary.previous()) {
*         System.out.println(source.substring(start,end));
*     }
* }
* </pre>
* </blockquote>
* Print first element:
* <blockquote>
* <pre>
* public static void printFirst(BreakIterator boundary, String source) {
*     int start = boundary.first();
*     int end =;
*     System.out.println(source.substring(start,end));
* }
* </pre>
* </blockquote>
* Print last element:
* <blockquote>
* <pre>
* public static void printLast(BreakIterator boundary, String source) {
*     int end = boundary.last();
*     int start = boundary.previous();
*     System.out.println(source.substring(start,end));
* }
* </pre>
* </blockquote>
* Print the element at a specified position:
* <blockquote>
* <pre>
* public static void printAt(BreakIterator boundary, int pos, String source) {
*     int end = boundary.following(pos);
*     int start = boundary.previous();
*     System.out.println(source.substring(start,end));
* }
* </pre>
* </blockquote>
* Find the next word:
* <blockquote>
* <pre>
* public static int nextWordStartAfter(int pos, String text) {
*     BreakIterator wb = BreakIterator.getWordInstance();
*     wb.setText(text);
*     int last = wb.following(pos);
*     int current =;
*     while (current != BreakIterator.DONE) {
*         for (int p = last; p < current; p++) {
*             if (Character.isLetter(text.codePointAt(p)))
*                 return last;
*         }
*         last = current;
*         current =;
*     }
*     return BreakIterator.DONE;
* }
* </pre>
* (The iterator returned by BreakIterator.getWordInstance() is unique in that
* the break positions it returns don't represent both the start and end of the
* thing being iterated over.  That is, a sentence-break iterator returns breaks
* that each represent the end of one sentence and the beginning of the next.
* With the word-break iterator, the characters between two boundaries might be a
* word, or they might be the punctuation or whitespace between two words.  The
* above code uses a simple heuristic to determine which boundary is the beginning
* of a word: If the characters between this boundary and the next boundary
* include at least one letter (this can be an alphabetical letter, a CJK ideograph,
* a Hangul syllable, a Kana character, etc.), then the text between this boundary
* and the next is a word; otherwise, it's the material between words.)
* </blockquote>
* @see CharacterIterator

public abstract class BreakIterator implements Cloneable
     * Constructor. BreakIterator is stateless and has no default behavior.
    protected BreakIterator()

     * Create a copy of this iterator
     * @return A copy of this
    public Object clone()
        try {
            return super.clone();
        catch (CloneNotSupportedException e) {
            throw new InternalError();

     * DONE is returned by previous(), next(), next(int), preceding(int)
     * and following(int) when either the first or last text boundary has been
     * reached.
    public static final int DONE = -1;

     * Returns the first boundary. The iterator's current position is set
     * to the first text boundary.
     * @return The character index of the first text boundary.
    public abstract int first();

     * Returns the last boundary. The iterator's current position is set
     * to the last text boundary.
     * @return The character index of the last text boundary.
    public abstract int last();

     * Returns the nth boundary from the current boundary. If either
     * the first or last text boundary has been reached, it returns
     * <code>BreakIterator.DONE</code> and the current position is set to either
     * the first or last text boundary depending on which one is reached. Otherwise,
     * the iterator's current position is set to the new boundary.
     * For example, if the iterator's current position is the mth text boundary 
     * and three more boundaries exist from the current boundary to the last text
     * boundary, the next(2) call will return m + 2. The new text position is set
     * to the (m + 2)th text boundary. A next(4) call would return
     * <code>BreakIterator.DONE</code> and the last text boundary would become the
     * new text position.
     * @param n which boundary to return.  A value of 0
     * does nothing.  Negative values move to previous boundaries
     * and positive values move to later boundaries.
     * @return The character index of the nth boundary from the current position
     * or <code>BreakIterator.DONE</code> if either first or last text boundary
     * has been reached.
    public abstract int next(int n);

     * Returns the boundary following the current boundary. If the current boundary
     * is the last text boundary, it returns <code>BreakIterator.DONE</code> and
     * the iterator's current position is unchanged. Otherwise, the iterator's
     * current position is set to the boundary following the current boundary.
     * @return The character index of the next text boundary or
     * <code>BreakIterator.DONE</code> if the current boundary is the last text
     * boundary. 
     * Equivalent to next(1).
     * @see #next(int)
    public abstract int next();

     * Returns the boundary preceding the current boundary. If the current boundary
     * is the first text boundary, it returns <code>BreakIterator.DONE</code> and
     * the iterator's current position is unchanged. Otherwise, the iterator's
     * current position is set to the boundary preceding the current boundary.
     * @return The character index of the previous text boundary or
     * <code>BreakIterator.DONE</code> if the current boundary is the first text
     * boundary.
    public abstract int previous();

     * Returns the first boundary following the specified character offset. If the
     * specified offset equals to the last text boundary, it returns
     * <code>BreakIterator.DONE</code> and the iterator's current position is unchanged.
     * Otherwise, the iterator's current position is set to the returned boundary.
     * The value returned is always greater than the offset or the value
     * <code>BreakIterator.DONE</code>.
     * @param offset the character offset to begin scanning.
     * @return The first boundary after the specified offset or
     * <code>BreakIterator.DONE</code> if the last text boundary is passed in
     * as the offset.
     * @exception  IllegalArgumentException if the specified offset is less than
     * the first text boundary or greater than the last text boundary.
    public abstract int following(int offset);

     * Returns the last boundary preceding the specified character offset. If the
     * specified offset equals to the first text boundary, it returns
     * <code>BreakIterator.DONE</code> and the iterator's current position is unchanged.
     * Otherwise, the iterator's current position is set to the returned boundary.
     * The value returned is always less than the offset or the value
     * <code>BreakIterator.DONE</code>.
     * @param offset the characater offset to begin scanning. 
     * @return The last boundary before the specified offset or
     * <code>BreakIterator.DONE</code> if the first text boundary is passed in
     * as the offset.
     * @exception   IllegalArgumentException if the specified offset is less than
     * the first text boundary or greater than the last text boundary.
     * @since 1.2
    public int preceding(int offset) {
        // NOTE:  This implementation is here solely because we can't add new
        // abstract methods to an existing class.  There is almost ALWAYS a
        // better, faster way to do this.
        int pos = following(offset);
        while (pos >= offset && pos != DONE)
            pos = previous();
        return pos;

     * Returns true if the specified character offset is a text boundary.
     * @param offset the character offset to check.
     * @return <code>true</code> if "offset" is a boundary position,
     * <code>false</code> otherwise.
     * @since 1.2
    public boolean isBoundary(int offset) {
        // NOTE: This implementation probably is wrong for most situations
        // because it fails to take into account the possibility that a
        // CharacterIterator passed to setText() may not have a begin offset
        // of 0.  But since the abstract BreakIterator doesn't have that
        // knowledge, it assumes the begin offset is 0.  If you subclass
        // BreakIterator, copy the SimpleTextBoundary implementation of this
        // function into your subclass.  [This should have been abstract at
        // this level, but it's too late to fix that now.]
        if (offset == 0)
            return true;
            return following(offset - 1) == offset;

     * Returns character index of the text boundary that was most
     * recently returned by next(), next(int), previous(), first(), last(),
     * following(int) or preceding(int). If any of these methods returns
     * <code>BreakIterator.DONE</code> because either first or last text boundary
     * has been reached, it returns the first or last text boundary depending on
     * which one is reached.
     * @return The text boundary returned from the above methods, first or last
     * text boundary.
     * @see #next()
     * @see #next(int)
     * @see #previous()
     * @see #first()
     * @see #last()
     * @see #following(int)
     * @see #preceding(int)
    public abstract int current();

     * Get the text being scanned
     * @return the text being scanned
    public abstract CharacterIterator getText();

     * Set a new text string to be scanned.  The current scan
     * position is reset to first().
     * @param newText new text to scan.
    public void setText(String newText)
        setText(new StringCharacterIterator(newText));

     * Set a new text for scanning.  The current scan
     * position is reset to first().
     * @param newText new text to scan.
    public abstract void setText(CharacterIterator newText);

    private static final int CHARACTER_INDEX = 0;
    private static final int WORD_INDEX = 1;
    private static final int LINE_INDEX = 2;
    private static final int SENTENCE_INDEX = 3;
    private static final SoftReference[] iterCache = new SoftReference[4];

     * Returns a new <code>BreakIterator</code> instance
     * for <a href="#word">word breaks</a>
     * for the {@linkplain Locale#getDefault() default locale}.
     * @return A break iterator for word breaks
    public static BreakIterator getWordInstance()
        return getWordInstance(Locale.getDefault());

     * Returns a new <code>BreakIterator</code> instance
     * for <a href="#word">word breaks</a>
     * for the given locale.
     * @param locale the desired locale
     * @return A break iterator for word breaks
     * @exception NullPointerException if <code>locale</code> is null
    public static BreakIterator getWordInstance(Locale locale)
        return getBreakInstance(locale,

     * Returns a new <code>BreakIterator</code> instance
     * for <a href="#line">line breaks</a>
     * for the {@linkplain Locale#getDefault() default locale}.
     * @return A break iterator for line breaks
    public static BreakIterator getLineInstance()
        return getLineInstance(Locale.getDefault());

     * Returns a new <code>BreakIterator</code> instance
     * for <a href="#line">line breaks</a>
     * for the given locale.
     * @param locale the desired locale
     * @return A break iterator for line breaks
     * @exception NullPointerException if <code>locale</code> is null
    public static BreakIterator getLineInstance(Locale locale)
        return getBreakInstance(locale,

     * Returns a new <code>BreakIterator</code> instance
     * for <a href="#character">character breaks</a>
     * for the {@linkplain Locale#getDefault() default locale}.
     * @return A break iterator for character breaks
    public static BreakIterator getCharacterInstance()
        return getCharacterInstance(Locale.getDefault());

     * Returns a new <code>BreakIterator</code> instance
     * for <a href="#character">character breaks</a>
     * for the given locale.
     * @param locale the desired locale
     * @return A break iterator for character breaks
     * @exception NullPointerException if <code>locale</code> is null
    public static BreakIterator getCharacterInstance(Locale locale)
        return getBreakInstance(locale,

     * Returns a new <code>BreakIterator</code> instance
     * for <a href="#sentence">sentence breaks</a>
     * for the {@linkplain Locale#getDefault() default locale}.
     * @return A break iterator for sentence breaks
    public static BreakIterator getSentenceInstance()
        return getSentenceInstance(Locale.getDefault());

     * Returns a new <code>BreakIterator</code> instance
     * for <a href="#sentence">sentence breaks</a>
     * for the given locale.
     * @param locale the desired locale
     * @return A break iterator for sentence breaks
     * @exception NullPointerException if <code>locale</code> is null
    public static BreakIterator getSentenceInstance(Locale locale)
        return getBreakInstance(locale,

    private static BreakIterator getBreakInstance(Locale locale,
                                                  int type,
                                                  String dataName,
                                                  String dictionaryName) {
        if (iterCache[type] != null) {
            BreakIteratorCache cache = (BreakIteratorCache) iterCache[type].get();
            if (cache != null) {
                if (cache.getLocale().equals(locale)) {
                    return cache.createBreakInstance();

        BreakIterator result = createBreakInstance(locale,
        BreakIteratorCache cache = new BreakIteratorCache(locale, result);
        iterCache[type] = new SoftReference(cache);
        return result;

    private static ResourceBundle getBundle(final String baseName, final Locale locale) {
         return (ResourceBundle) AccessController.doPrivileged(new PrivilegedAction() {
            public Object run() {
                return ResourceBundle.getBundle(baseName, locale);

    private static BreakIterator createBreakInstance(Locale locale,
                                                     int type,
                                                     String dataName,
                                                     String dictionaryName) {

        // Check whether a provider can provide an implementation that's closer
        // to the requested locale than what the Java runtime itself can provide.
        LocaleServiceProviderPool pool =
        if (pool.hasProviders()) {
            BreakIterator providersInstance = pool.getLocalizedObject(
                                                    locale, type);
            if (providersInstance != null) {
                return providersInstance;

        ResourceBundle bundle = getBundle(
                        "sun.text.resources.BreakIteratorInfo", locale);
        String[] classNames = bundle.getStringArray("BreakIteratorClasses");
        String dataFile = bundle.getString(dataName);

        try {
            if (classNames[type].equals("RuleBasedBreakIterator")) {
                return new RuleBasedBreakIterator(dataFile);
            else if (classNames[type].equals("DictionaryBasedBreakIterator")) {
                String dictionaryFile = bundle.getString(dictionaryName);
                return new DictionaryBasedBreakIterator(dataFile, dictionaryFile);
            else {
                throw new IllegalArgumentException("Invalid break iterator class \"" +
                                classNames[type] + "\"");
        catch (Exception e) {
            throw new InternalError(e.toString());

     * Returns an array of all locales for which the
     * <code>get*Instance</code> methods of this class can return
     * localized instances.
     * The returned array represents the union of locales supported by the Java
     * runtime and by installed
     * {@link java.text.spi.BreakIteratorProvider BreakIteratorProvider} implementations. 
     * It must contain at least a <code>Locale</code>
     * instance equal to {@link java.util.Locale#US Locale.US}.
     * @return An array of locales for which localized
     *         <code>BreakIterator</code> instances are available.
    public static synchronized Locale[] getAvailableLocales()
        LocaleServiceProviderPool pool =
        return pool.getAvailableLocales();

    private static final class BreakIteratorCache {

        private BreakIterator iter;
        private Locale locale;

        BreakIteratorCache(Locale locale, BreakIterator iter) {
            this.locale = locale;
            this.iter = (BreakIterator) iter.clone();

        Locale getLocale() {
            return locale;

        BreakIterator createBreakInstance() {
            return (BreakIterator) iter.clone();

    static long getLong(byte[] buf, int offset) {
        long num = buf[offset]&0xFF;
        for (int i = 1; i < 8; i++) {
            num = num<<8 | (buf[offset+i]&0xFF);
        return num;

    static int getInt(byte[] buf, int offset) {
        int num = buf[offset]&0xFF;
        for (int i = 1; i < 4; i++) {
            num = num<<8 | (buf[offset+i]&0xFF);
        return num;

    static short getShort(byte[] buf, int offset) {
        short num = (short)(buf[offset]&0xFF);
        num = (short)(num<<8 | (buf[offset+1]&0xFF));
        return num;

     * Obtains a BreakIterator instance from a BreakIteratorProvider
     * implementation.
    private static class BreakIteratorGetter
        implements LocaleServiceProviderPool.LocalizedObjectGetter<BreakIteratorProvider, BreakIterator> {
        private static final BreakIteratorGetter INSTANCE =
            new BreakIteratorGetter();

        public BreakIterator getObject(BreakIteratorProvider breakIteratorProvider,
                                Locale locale,
                                String key,
                                Object... params) {
            assert params.length == 1;

      switch ((Integer)params[0]) {
    return breakIteratorProvider.getCharacterInstance(locale);
      case WORD_INDEX:
    return breakIteratorProvider.getWordInstance(locale);
      case LINE_INDEX:
    return breakIteratorProvider.getLineInstance(locale);
            case SENTENCE_INDEX:
                return breakIteratorProvider.getSentenceInstance(locale);
    assert false : "should not happen";
            return null;

Related Classes of java.text.BreakIterator$BreakIteratorCache

Copyright © 2018 All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact