Package opennlp.tools.util

Examples of opennlp.tools.util.StringList


   * Tests the lookup of tokens of different case.
   */
  @Test
  public void testDifferentCaseLookupCaseSensitive() {

    StringList entry1 = new StringList(new String[]{"1a", "1b"});
    StringList entry2 = new StringList(new String[]{"1A", "1B"});

    Dictionary dict = getCaseSensitive();

    dict.put(entry1);

View Full Code Here


  private Dictionary getDict() {
    return new Dictionary(true);
  }
 
  private StringList asSL(String str) {
    return new StringList(str);
  }
View Full Code Here

  private Dictionary getDict() {
    return new Dictionary(false);
  }
 
  private StringList asSL(String str) {
    return new StringList(str);
  }
View Full Code Here

  @Test
  public void testParsingEnglishSample() throws IOException {

    ObjectStream<StringList> sampleStream = openData("census90.sample");

    StringList personName = sampleStream.read();

    // verify the first 5 taken from the Surname data
    assertNotNull(personName);
    assertEquals("Smith", personName.getToken(0));
    personName = sampleStream.read();
    assertNotNull(personName);
    assertEquals("Johnson", personName.getToken(0));
    personName = sampleStream.read();
    assertNotNull(personName);
    assertEquals("Williams", personName.getToken(0));
    personName = sampleStream.read();
    assertNotNull(personName);
    assertEquals("Jones", personName.getToken(0));
    personName = sampleStream.read();
    assertNotNull(personName);
    assertEquals("Brown", personName.getToken(0));

    // verify the next 5 taken from the female names
    personName = sampleStream.read();
    assertNotNull(personName);
    assertEquals("Mary", personName.getToken(0));
    personName = sampleStream.read();
    assertNotNull(personName);
    assertEquals("Patricia", personName.getToken(0));
    personName = sampleStream.read();
    assertNotNull(personName);
    assertEquals("Linda", personName.getToken(0));
    personName = sampleStream.read();
    assertNotNull(personName);
    assertEquals("Barbara", personName.getToken(0));
    personName = sampleStream.read();
    assertNotNull(personName);
    assertEquals("Elizabeth", personName.getToken(0));

    // verify the last 5 taken from the male names
    personName = sampleStream.read();
    assertNotNull(personName);
    assertEquals("James", personName.getToken(0));
    personName = sampleStream.read();
    assertNotNull(personName);
    assertEquals("John", personName.getToken(0));
    personName = sampleStream.read();
    assertNotNull(personName);
    assertEquals("Robert", personName.getToken(0));
    personName = sampleStream.read();
    assertNotNull(personName);
    assertEquals("Michael", personName.getToken(0));
    personName = sampleStream.read();
    assertNotNull(personName);
    assertEquals("William", personName.getToken(0));

    // verify the end of the file.
    personName = sampleStream.read();
    assertNull(personName);
  }
View Full Code Here

      String text = nameAnnotation.getCoveredText();

      // if possible replace text with normalization from dictionary
      if (mLookupDictionary != null) {

        StringList tokens = new StringList(text);

        String normalizedText = mLookupDictionary.get(tokens);

        if (normalizedText != null) {
          text = normalizedText;
View Full Code Here

   * @throws IOException
   */
  public static Dictionary createDictionary(ObjectStream<StringList> sampleStream) throws IOException {

    Dictionary mNameDictionary = new Dictionary(true);
    StringList entry;

    entry = sampleStream.read();
    while (entry != null) {
      if (!mNameDictionary.contains(entry)) {
        mNameDictionary.put(entry);
View Full Code Here

      //add all uni-grams
      for (int wi=0;wi<words.length;wi++) {
        words[wi] = pwords[wi].toString();
      }

      mdict.add(new StringList(words), 1, 1);
      //add tri-grams and bi-grams for inital sequence
      Parse[] chunks = collapsePunctuation(ParserEventStream.getInitialChunks(p),rules.getPunctuationTags());
      String[] cwords = new String[chunks.length];
      for (int wi=0;wi<cwords.length;wi++) {
        cwords[wi] = chunks[wi].getHead().toString();
      }
      mdict.add(new StringList(cwords), 2, 3);

      //emulate reductions to produce additional n-grams
      int ci = 0;
      while (ci < chunks.length) {
        //System.err.println("chunks["+ci+"]="+chunks[ci].getHead().toString()+" chunks.length="+chunks.length);
        if (lastChild(chunks[ci], chunks[ci].getParent(),rules.getPunctuationTags())) {
          //perform reduce
          int reduceStart = ci;
          while (reduceStart >=0 && chunks[reduceStart].getParent() == chunks[ci].getParent()) {
            reduceStart--;
          }
          reduceStart++;
          chunks = ParserEventStream.reduceChunks(chunks,ci,chunks[ci].getParent());
          ci = reduceStart;
          if (chunks.length != 0) {
            String[] window = new String[5];
            int wi = 0;
            if (ci-2 >= 0) window[wi++] = chunks[ci-2].getHead().toString();
            if (ci-1 >= 0) window[wi++] = chunks[ci-1].getHead().toString();
            window[wi++] = chunks[ci].getHead().toString();
            if (ci+1 < chunks.length) window[wi++] = chunks[ci+1].getHead().toString();
            if (ci+2 < chunks.length) window[wi++] = chunks[ci+2].getHead().toString();
            if (wi < 5) {
              String[] subWindow = new String[wi];
              for (int swi=0;swi<wi;swi++) {
                subWindow[swi]=window[swi];
              }
              window = subWindow;
            }
            if (window.length >=3) {
              mdict.add(new StringList(window), 2, 3);
            }
            else if (window.length == 2) {
              mdict.add(new StringList(window), 2, 2);
            }
          }
          ci=reduceStart-1; //ci will be incremented at end of loop
        }
        ci++;
View Full Code Here

        return mDictionaryIterator.hasNext();
      }

      public Entry next() {

        StringList tokens = mDictionaryIterator.next();

        Attributes attributes = new Attributes();

        attributes.setValue("value", get(tokens));
View Full Code Here

    POSSample sample;
    while((sample = samples.read()) != null) {
      String[] words = sample.getSentence();
     
      if (words.length > 0)
        ngramModel.add(new StringList(words), 1, 1);
    }
   
    ngramModel.cutoff(cutoff, Integer.MAX_VALUE);
   
    return ngramModel.toDictionary(true);
View Full Code Here

    List<String> e = new ArrayList<String>();
    e.add("default");
    // add the word itself
    e.add("w=" + lex);
    dictGram[0] = lex;
    if (dict == null || !dict.contains(new StringList(dictGram))) {
      // do some basic suffix analysis
      String[] suffs = getSuffixes(lex);
      for (int i = 0; i < suffs.length; i++) {
        e.add("suf=" + suffs[i]);
      }
View Full Code Here

TOP

Related Classes of opennlp.tools.util.StringList

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.