Examples of opennlp.tools.util.StringList

opennlp.tools.util.StringList
The {@link StringList} is an immutable list of {@link String}s.

  private Dictionary getDict() {
    return new Dictionary(false);
  }


  private StringList asSL(String str) {
    return new StringList(str);
  }

View Full Code Here

  @Test
  public void testParsingEnglishSample() throws IOException {


    ObjectStream<StringList> sampleStream = openData("census90.sample");


    StringList personName = sampleStream.read();


    // verify the first 5 taken from the Surname data
    assertNotNull(personName);
    assertEquals("Smith", personName.getToken(0));
    personName = sampleStream.read();
    assertNotNull(personName);
    assertEquals("Johnson", personName.getToken(0));
    personName = sampleStream.read();
    assertNotNull(personName);
    assertEquals("Williams", personName.getToken(0));
    personName = sampleStream.read();
    assertNotNull(personName);
    assertEquals("Jones", personName.getToken(0));
    personName = sampleStream.read();
    assertNotNull(personName);
    assertEquals("Brown", personName.getToken(0));


    // verify the next 5 taken from the female names
    personName = sampleStream.read();
    assertNotNull(personName);
    assertEquals("Mary", personName.getToken(0));
    personName = sampleStream.read();
    assertNotNull(personName);
    assertEquals("Patricia", personName.getToken(0));
    personName = sampleStream.read();
    assertNotNull(personName);
    assertEquals("Linda", personName.getToken(0));
    personName = sampleStream.read();
    assertNotNull(personName);
    assertEquals("Barbara", personName.getToken(0));
    personName = sampleStream.read();
    assertNotNull(personName);
    assertEquals("Elizabeth", personName.getToken(0));


    // verify the last 5 taken from the male names
    personName = sampleStream.read();
    assertNotNull(personName);
    assertEquals("James", personName.getToken(0));
    personName = sampleStream.read();
    assertNotNull(personName);
    assertEquals("John", personName.getToken(0));
    personName = sampleStream.read();
    assertNotNull(personName);
    assertEquals("Robert", personName.getToken(0));
    personName = sampleStream.read();
    assertNotNull(personName);
    assertEquals("Michael", personName.getToken(0));
    personName = sampleStream.read();
    assertNotNull(personName);
    assertEquals("William", personName.getToken(0));


    // verify the end of the file.
    personName = sampleStream.read();
    assertNull(personName);
  }

View Full Code Here

      //add all uni-grams
      for (int wi=0;wi<words.length;wi++) {
        words[wi] = pwords[wi].getCoveredText();
      }


      mdict.add(new StringList(words), 1, 1);
      //add tri-grams and bi-grams for inital sequence
      Parse[] chunks = collapsePunctuation(ParserEventStream.getInitialChunks(p),rules.getPunctuationTags());
      String[] cwords = new String[chunks.length];
      for (int wi=0;wi<cwords.length;wi++) {
        cwords[wi] = chunks[wi].getHead().getCoveredText();
      }
      mdict.add(new StringList(cwords), 2, 3);


      //emulate reductions to produce additional n-grams
      int ci = 0;
      while (ci < chunks.length) {
        //System.err.println("chunks["+ci+"]="+chunks[ci].getHead().getCoveredText()+" chunks.length="+chunks.length + "  " + chunks[ci].getParent());


        if (chunks[ci].getParent() == null) {
          chunks[ci].show();
        }
        if (lastChild(chunks[ci], chunks[ci].getParent(),rules.getPunctuationTags())) {
          //perform reduce
          int reduceStart = ci;
          while (reduceStart >=0 && chunks[reduceStart].getParent() == chunks[ci].getParent()) {
            reduceStart--;
          }
          reduceStart++;
          chunks = ParserEventStream.reduceChunks(chunks,ci,chunks[ci].getParent());
          ci = reduceStart;
          if (chunks.length != 0) {
            String[] window = new String[5];
            int wi = 0;
            if (ci-2 >= 0) window[wi++] = chunks[ci-2].getHead().getCoveredText();
            if (ci-1 >= 0) window[wi++] = chunks[ci-1].getHead().getCoveredText();
            window[wi++] = chunks[ci].getHead().getCoveredText();
            if (ci+1 < chunks.length) window[wi++] = chunks[ci+1].getHead().getCoveredText();
            if (ci+2 < chunks.length) window[wi++] = chunks[ci+2].getHead().getCoveredText();
            if (wi < 5) {
              String[] subWindow = new String[wi];
              for (int swi=0;swi<wi;swi++) {
                subWindow[swi]=window[swi];
              }
              window = subWindow;
            }
            if (window.length >=3) {
              mdict.add(new StringList(window), 2, 3);
            }
            else if (window.length == 2) {
              mdict.add(new StringList(window), 2, 2);
            }
          }
          ci=reduceStart-1; //ci will be incremented at end of loop
        }
        ci++;

View Full Code Here

        String word = iterator.next();


        Attributes tagAttribute = new Attributes();
        tagAttribute.setValue("tags", tagsToString(getTags(word)));


        return new Entry(new StringList(word), tagAttribute);
      }


      public void remove() {
        throw new UnsupportedOperationException();
      }

View Full Code Here


        String tagString = entry.getAttributes().getValue("tags");


        String[] tags = tagString.split(" ");


        StringList word = entry.getTokens();


        if (word.size() != 1)
          throw new InvalidFormatException("Each entry must have exactly one token! "+word);


        newPosDict.dictionary.put(word.getToken(0), tags);
      }});


    newPosDict.caseSensitive = isCaseSensitive;


    // TODO: The dictionary API needs to be improved to do this better!

View Full Code Here

          return dictionaryIterator.hasNext();
        }


        public Entry next() {


          StringList tokens = dictionaryIterator.next();


          return new Entry(tokens, new Attributes());
        }


        public void remove() {

View Full Code Here

        int tokenIndex = 0;
        while (whiteSpaceTokenizer.hasMoreTokens()) {
          tokens[tokenIndex++] = whiteSpaceTokenizer.nextToken();
        }


        dictionary.put(new StringList(tokens));
      }
    }


    return dictionary;
  }

View Full Code Here

        boolean result = false;


        if (obj instanceof String) {
          String str = (String) obj;


          result = entrySet.contains(new StringListWrapper(new StringList(str)));


        }


        return result;
      }

View Full Code Here

              "", entry.getAttributes().getValue(key));
    }


    hd.startElement("", "", ENTRY_ELEMENT, entryAttributes);


    StringList tokens = entry.getTokens();


    for (Iterator<String> it = tokens.iterator(); it.hasNext(); ) {


      hd.startElement("", "", TOKEN_ELEMENT, new AttributesImpl());


      String token = it.next();

View Full Code Here

       else if (ENTRY_ELEMENT.equals(localName)) {


         String[] tokens = mTokenList.toArray(
             new String[mTokenList.size()]);


         Entry entry = new Entry(new StringList(tokens), mAttributes);


         try {
           mInserter.insert(entry);
         } catch (InvalidFormatException e) {
           throw new SAXException("Invalid dictionary format!", e);

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of opennlp.tools.util.StringList

io.lumify.opennlpDictionary.OpenNLPDictionaryExtractorGraphPropertyWorker

io.lumify.opennlpDictionary.OpenNLPDictionaryExtractorGraphPropertyWorkerTest

opennlp.tools.cmdline.namefind.CensusDictionaryCreatorTool

opennlp.tools.coref.CorefModel

opennlp.tools.dictionary.Dictionary

opennlp.tools.dictionary.DictionaryAsSetCaseInsensitiveTest

opennlp.tools.dictionary.DictionaryAsSetCaseSensitiveTest

opennlp.tools.dictionary.DictionaryTest

opennlp.tools.dictionary.Index

opennlp.tools.dictionary.serializer.DictionarySerializer

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.