Package com.ibm.icu.dev.test.rbbi

Source Code of com.ibm.icu.dev.test.rbbi.SimpleBITest

/*
*******************************************************************************
* Copyright (C) 1996-2006, International Business Machines Corporation and    *
* others. All Rights Reserved.                                                *
*******************************************************************************
*/
package com.ibm.icu.dev.test.rbbi;

import java.io.IOException;
import java.io.InputStream;
import java.util.ListResourceBundle;
import java.util.MissingResourceException;

import com.ibm.icu.dev.test.TestFmwk;
import com.ibm.icu.text.BreakIterator;
import com.ibm.icu.text.DictionaryBasedBreakIterator;
import com.ibm.icu.text.RuleBasedBreakIterator;

// TODO: {dlf} this test currently doesn't test anything!
// You'll notice that the resource that uses the dictionary isn't even on the resource path,
// so the dictionary never gets used.  Good thing, too, because it would throw a security
// exception if run with a security manager.  Not that it would matter, the dictionary
// resource isn't even in the icu source tree!
// In order to fix this:
// 1) make sure english.dict matches the current dictionary format required by dbbi
// 2) make sure english.dict gets included in icu4jtests.jar
// 3) have this test use getResourceAsStream to get a stream on the dictionary, and
//    directly instantiate a DictionaryBasedBreakIterator.  It can use the rules from
//    the appropriate section of ResourceBundle_en_US_TEST.  I'd suggest just copying
//    the rules into this file.
// 4) change the test text by inserting '|' at word breaks, and '||' at line breaks. 
// 5) process this text to a) create tables of break indices, and b) clean up the test
//    for the break iterator to work on
//
// This would NOT test the ability to load dictionary-based break iterators through our
// normal resource mechanism.  One could install such a break iterator and its
// resources into the icu4j jar, and it would work, but there's no way to register entire
// resources from outside yet.  Even if there were, the access restrictions are a bit
// difficult to manage, if one wanted to register a break iterator whose code and data
// resides outside the icu4j jar.  Since the code to instantiate would be going through
// two protection domains, each domain would have to allow access to the data-- but
// icu4j's domain wouldn't know about ours.  So we could instantiate before registering
// the break iterator, but this would mean we'd have to fully initialize the dictionary(s)
// at instantiation time, rather than let this be deferred until they are actually needed.
//
// I've done items 2 and 3 above.  Unfortunately, since I haven't done item 1, the
// dictionary builder crashes.  So for now I'm disabling this test.  This is not
// that important, since we have a thai dictionary that we do test thoroughly.
//

public class SimpleBITest extends TestFmwk{
    public static final String testText =
//        "The rain in Spain stays mainly on the plain.  The plains in Spain are mainly pained with rain.";
//"one-two now--  Hah!  You owe me exactly $1,345.67...  Pay up, huh?  By the way, why don't I send you my re\u0301sume\u0301?  This is a line\r\nbreak.";
//"nowisthetimeforallgoodmen...  tocometothehelpoftheircountry";
"When, in the course of human events, it becomes necessary for one people to dissolve the political bonds which have "
//"When,inthecourseofhumanevents,itbecomesnecessaryforonepeopletodissolvethepoliticalbondswhichhave"
+ "connectedthemwithanother,andtoassumeamongthepowersoftheearth,theseparateandequalstationtowhichthelaws"
+ "ofnatureandofnature'sGodentitlethem,adecentrespecttotheopinionsofmankindrequiresthattheyshoulddeclarethe"
+ "causeswhichimpelthemtotheseparation\n"
+ "Weholdthesetruthstobeself-evident,thatallmenarecreatedequal,thattheyareendowedbytheirCreatorwithcertain"
+ "unalienablerights,thatamongthesearelife,libertyandthepursuitofhappiness.Thattosecuretheserights,governmentsare"
+ "institutedamongmen,derivingtheirjustpowersfromtheconsentofthegoverned.Thatwheneveranyformofgovernment"
+ "becomesdestructivetotheseends,itistherightofthepeopletoalterortoabolishit,andtoinstitutenewgovernment,laying"
+ "itsfoundationonsuchprinciplesandorganizingitspowersinsuchform,astothemshallseemmostlikelytoeffecttheirsafety"
+ "andhappiness.Prudence,indeed,willdictatethatgovernmentslongestablishedshouldnotbechangedforlightandtransient"
+ "causes;andaccordinglyallexperiencehathshownthatmankindaremoredisposedtosuffer,whileevilsaresufferable,than"
+ "torightthemselvesbyabolishingtheformstowhichtheyareaccustomed.Butwhenalongtrainofabusesandusurpations,"
+ "pursuinginvariablythesameobjectevincesadesigntoreducethemunderabsolutedespotism,itistheirright,itistheirduty,"
+ "tothrowoffsuchgovernment,andtoprovidenewguardsfortheirfuturesecurity.--Suchhasbeenthepatientsufferanceof"
+ "thesecolonies;andsuchisnowthenecessitywhichconstrainsthemtoaltertheirformersystemsofgovernment.Thehistory"
+ "ofthepresentKingofGreatBritainisahistoryofrepeatedinjuriesandusurpations,allhavingindirectobjectthe"
+ "establishmentofanabsolutetyrannyoverthesestates.Toprovethis,letfactsbesubmittedtoacandidworld.\n"
+ "Hehasrefusedhisassenttolaws,themostwholesomeandnecessaryforthepublicgood.\n"
+ "Hehasforbiddenhisgovernorstopasslawsofimmediateandpressingimportance,unlesssuspendedintheiroperationtill"
+ "hisassentshouldbeobtained;andwhensosuspended,hehasutterlyneglectedtoattendtothem.\n"
+ "Hehasrefusedtopassotherlawsfortheaccommodationoflargedistrictsofpeople,unlessthosepeoplewouldrelinquish"
+ "therightofrepresentationinthelegislature,arightinestimabletothemandformidabletotyrantsonly.\n"
+ "Hehascalledtogetherlegislativebodiesatplacesunusual,uncomfortable,anddistantfromthedepositoryoftheirpublic"
+ "records,forthesolepurposeoffatiguingthemintocompliancewithhismeasures.\n"
+ "Hehasdissolvedrepresentativehousesrepeatedly,foropposingwithmanlyfirmnesshisinvasionsontherightsofthepeople.\n"
+ "Hehasrefusedforalongtime,aftersuchdissolutions,tocauseotherstobeelected;wherebythelegislativepowers,"
+ "incapableofannihilation,havereturnedtothepeopleatlargefortheirexercise;thestateremaininginthemeantimeexposed"
+ "toallthedangersofinvasionfromwithout,andconvulsionswithin.\n"
+ "Hehasendeavoredtopreventthepopulationofthesestates;forthatpurposeobstructingthelawsfornaturalizationof"
+ "foreigners;refusingtopassotherstoencouragetheirmigrationhither,andraisingtheconditionsofnewappropriationsof"
+ "lands.\n"
+ "Hehasobstructedtheadministrationofjustice,byrefusinghisassenttolawsforestablishingjudiciarypowers.\n"
+ "Hehasmadejudgesdependentonhiswillalone,forthetenureoftheiroffices,andtheamountandpaymentoftheirsalaries.\n"
+ "Hehaserectedamultitudeofnewoffices,andsenthitherswarmsofofficerstoharassourpeople,andeatouttheir"
+ "substance.\n"
+ "Hehaskeptamongus,intimesofpeace,standingarmieswithouttheconsentofourlegislature.\n"
+ "Hehasaffectedtorenderthemilitaryindependentofandsuperiortocivilpower.\n"
+ "Hehascombinedwithotherstosubjectustoajurisdictionforeigntoourconstitution,andunacknowledgedbyourlaws;"
+ "givinghisassenttotheiractsofpretendedlegislation:\n"
+ "Forquarteringlargebodiesofarmedtroopsamongus:\n"
+ "Forprotectingthem,bymocktrial,frompunishmentforanymurderswhichtheyshouldcommitontheinhabitantsofthese"
+ "states:\n"
+ "Forcuttingoffourtradewithallpartsoftheworld:\n"
+ "Forimposingtaxesonuswithoutourconsent:\n"
+ "Fordeprivingusinmanycases,ofthebenefitsoftrialbyjury:\n"
+ "Fortransportingusbeyondseastobetriedforpretendedoffenses:\n"
+ "ForabolishingthefreesystemofEnglishlawsinaneighboringprovince,establishingthereinanarbitrarygovernment,and"
+ "enlargingitsboundariessoastorenderitatonceanexampleandfitinstrumentforintroducingthesameabsoluteruleinthese"
+ "colonies:\n"
+ "Fortakingawayourcharters,abolishingourmostvaluablelaws,andalteringfundamentallytheformsofourgovernments:\n"
+ "Forsuspendingourownlegislatures,anddeclaringthemselvesinvestedwithpowertolegislateforusinallcaseswhatsoever.\n"
+ "Hehasabdicatedgovernmenthere,bydeclaringusoutofhisprotectionandwagingwaragainstus.\n"
+ "Hehasplunderedourseas,ravagedourcoasts,burnedourtowns,anddestroyedthelivesofourpeople.\n"
+ "Heisatthistimetransportinglargearmiesofforeignmercenariestocompletetheworksofdeath,desolationandtyranny,"
+ "alreadybegunwithcircumstancesofcrueltyandperfidyscarcelyparalleledinthemostbarbarousages,andtotalyunworth"
+ "theheadofacivilizednation.\n"
+ "Hehasconstrainedourfellowcitizenstakencaptiveonthehighseastobeararmsagainsttheircountry,tobecomethe"
+ "executionersoftheirfriendsandbrethren,ortofallthemselvesbytheirhands.\n"
+ "Hehasexciteddomesticinsurrectionsamongstus,andhasendeavoredtobringontheinhabitantsofourfrontiers,the"
+ "mercilessIndiansavages,whoseknownruleofwarfare,isundistinguisheddestructionofallages,sexesandconditions.\n"
+ "Ineverystageoftheseoppressionswehavepetitionedforredressinthemosthumbleterms:ourrepeatedpetitionshave"
+ "beenansweredonlybyrepeatedinjury.Aprince,whosecharacteristhusmarkedbyeveryactwhichmaydefineatyrant,is"
+ "unfittobetherulerofafreepeople.\n"
+ "NorhavewebeenwantinginattentiontoourBritishbrethren.Wehavewarnedthemfromtimetotimeofattemptsbytheir"
+ "legislaturetoextendanunwarrantablejurisdictionoverus.Wehaveremindedthemofthecircumstancesofouremigration"
+ "andsettlementhere.Wehaveappealedtotheirnativejusticeandmagnanimity,andwehaveconjuredthembythetiesofour"
+ "commonkindredtodisavowtheseusurpations,which,wouldinevitablyinterruptourconnectionsandcorrespondence.We"
+ "must,therefore,acquiesceinthenecessity,whichdenouncesourseparation,andholdthem,asweholdtherestofmankind,"
+ "enemiesinwar,inpeacefriends.\n"
+ "We,therefore,therepresentativesoftheUnitedStatesofAmerica,inGeneralCongress,assembled,appealingtothe"
+ "SupremeJudgeoftheworldfortherectitudeofourintentions,do,inthename,andbytheauthorityofthegoodpeopleof"
+ "thesecolonies,solemnlypublishanddeclare,thattheseunitedcoloniesare,andofrightoughttobefreeandindependent"
+ "states;thattheyareabsolvedfromallallegiancetotheBritishCrown,andthatallpoliticalconnectionbetweenthemandthe"
+ "stateofGreatBritain,isandoughttobetotallydissolved;andthatasfreeandindependentstates,theyhavefullpowerto"
+ "leveywar,concludepeace,contractalliances,establishcommerce,andtodoallotheractsandthingswhichindependent"
+ "statesmayofrightdo.Andforthesupportofthisdeclaration,withafirmrelianceontheprotectionofDivineProvidence,we"
+ "mutuallypledgetoeachotherourlives,ourfortunesandoursacredhonor.\n";

    public static void main(String[] args) throws Exception {
        new SimpleBITest().run(args);
    }
   
    protected boolean validate() {
        // TODO: remove when english.dict gets fixed
        return false;
    }

    private BreakIterator createTestIterator(int kind) {
        final String bname = "com.ibm.icu.dev.test.rbbi.BreakIteratorRules_en_US_TEST";

        BreakIterator iter = null;

        ListResourceBundle bundle = null;
        try {
            Class cls = Class.forName(bname);
            bundle = (ListResourceBundle)cls.newInstance();
        }
        catch (Exception e) {
            errln("could not create bundle: " + bname + "exception: " + e.getMessage());
            return null;
        }
       
        final String[] kindNames = {
            "Character", "Word", "Line", "Sentence"
        };
        String rulesName = kindNames[kind] + "BreakRules";
        String dictionaryName = kindNames[kind] + "BreakDictionary";
       
        String[] classNames = bundle.getStringArray("BreakIteratorClasses");
        String rules = bundle.getString(rulesName);
        if (classNames[kind].equals("RuleBasedBreakIterator")) {
            iter = new RuleBasedBreakIterator(rules);
        }
        else if (classNames[kind].equals("DictionaryBasedBreakIterator")) {
            try {
                String dictionaryPath = bundle.getString(dictionaryName);
                InputStream dictionary = bundle.getClass().getResourceAsStream(dictionaryPath);
                System.out.println("looking for " + dictionaryPath + " from " + bundle.getClass() + " returned " + dictionary);
                iter = new DictionaryBasedBreakIterator(rules, dictionary);
            }
            catch(IOException e) {
                e.printStackTrace();
                errln(e.getMessage());
                System.out.println(e); // debug
            }
            catch(MissingResourceException e) {
                errln(e.getMessage());
                System.out.println(e); // debug
            }
        }
        if (iter == null) {
            errln("could not create iterator");
        }
       
        return iter;
    }
   
    public void testWordBreak() throws Exception {
        BreakIterator wordBreak = createTestIterator(BreakIterator.KIND_WORD);
        int breaks = doTest(wordBreak);
        logln(String.valueOf(breaks));
    }

    public void testLineBreak() throws Exception {
        BreakIterator lineBreak = createTestIterator(BreakIterator.KIND_LINE);
        int breaks = doTest(lineBreak);
        logln(String.valueOf(breaks));
    }

    public void testSentenceBreak() throws Exception {
        BreakIterator sentenceBreak = createTestIterator(BreakIterator.KIND_SENTENCE);
        int breaks = doTest(sentenceBreak);
        logln(String.valueOf(breaks));
    }

    private int doTest(BreakIterator bi) {
        // forward
        bi.setText(testText);
        int p = bi.first();
        int lastP = p;
        String fragment;
        int breaks = 0;
        logln("Forward...");
        while (p != BreakIterator.DONE) {
            p = bi.next();
            if (p != BreakIterator.DONE) {
                fragment = testText.substring(lastP, p);
            } else {
                fragment = testText.substring(lastP);
            }
            debugPrintln(": >" + fragment + "<");
            ++breaks;
            lastP = p;
        }
        return breaks;
    }

    private void debugPrintln(String s) {
        final String zeros = "0000";
        String temp;
        StringBuffer out = new StringBuffer();
        for (int i = 0; i < s.length(); i++) {
            char c = s.charAt(i);
            if (c >= ' ' && c < '\u007f')
                out.append(c);
            else {
                out.append("\\u");
                temp = Integer.toHexString((int)c);
                out.append(zeros.substring(0, 4 - temp.length()));
                out.append(temp);
            }
        }
        logln(out.toString());
    }

/*    private void debugPrintln2(String s) {
        StringBuffer out = new StringBuffer();
        for (int i = 0; i < s.length(); i++) {
            char c = s.charAt(i);
            if (c >= '\u0100')
                out.append("<" + ((int)c - 0x100) + ">");
            else
                out.append(c);
        }
        logln(out.toString());
    }*/
}

 
TOP

Related Classes of com.ibm.icu.dev.test.rbbi.SimpleBITest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.