Package opennlp.tools.util

Examples of opennlp.tools.util.StringList


    if (dict != null) {

      if (p_2 != null) {
        unigram[0] = p_2.getHead().toString();
        u_2 = dict.contains(new StringList(unigram));
      }

      if (p2 != null) {
        unigram[0] = p2.getHead().toString();
        u2 = dict.contains(new StringList(unigram));
      }

      unigram[0] = p0.getHead().toString();
      u0 = dict.contains(new StringList(unigram));

      if (p_2 != null && p_1 != null) {
        bigram[0] = p_2.getHead().toString();
        bigram[1] = p_1.getHead().toString();
        b_2_1 = dict.contains(new StringList(bigram));

        trigram[0] = p_2.getHead().toString();
        trigram[1] = p_1.getHead().toString();
        trigram[2] = p0.getHead().toString();
        t_2_10 = dict.contains(new StringList(trigram));
      }
      if (p_1 != null && p1 != null) {
        trigram[0] = p_1.getHead().toString();
        trigram[1] = p0.getHead().toString();
        trigram[2] = p1.getHead().toString();
        t_101 = dict.contains(new StringList(trigram));
      }
      if (p_1 != null) {
        unigram[0] = p_1.getHead().toString();
        u_1 = dict.contains(new StringList(unigram));

        //extra check for 2==null case
        b_2_1 = b_2_1 && u_1 & u_2;
        t_2_10 = t_2_10 && u_1 & u_2 & u0;
        t_101 = t_101 && u_1 & u0 && u1;

        bigram[0] = p_1.getHead().toString();
        bigram[1] = p0.getHead().toString();
        b_10 = dict.contains(new StringList(bigram)) && u_1 && u0;
      }
      if (p1 != null && p2 != null) {
        bigram[0] = p1.getHead().toString();
        bigram[1] = p2.getHead().toString();
        b12 = dict.contains(new StringList(bigram));

        trigram[0] = p0.getHead().toString();
        trigram[1] = p1.getHead().toString();
        trigram[2] = p2.getHead().toString();
        t012 = dict.contains(new StringList(trigram));
      }
      if (p1 != null) {
        unigram[0] = p1.getHead().toString();
        u1 = dict.contains(new StringList(unigram));

        //extra check for 2==null case
        b12 = b12 && u1 && u2;
        t012 = t012 && u1 && u2 && u0;
        t_101 = t_101 && u0 && u_1 && u1;

        bigram[0] = p0.getHead().toString();
        bigram[1] = p1.getHead().toString();
        b01 = dict.contains(new StringList(bigram));
        b01 = b01 && u0 && u1;
      }
    }

    String consp_2 = cons(p_2, -2);
View Full Code Here


              "", entry.getAttributes().getValue(key));
    }

    hd.startElement("", "", ENTRY_ELEMENT, entryAttributes);

    StringList tokens = entry.getTokens();

    for (Iterator<String> it = tokens.iterator(); it.hasNext(); ) {

      hd.startElement("", "", TOKEN_ELEMENT, new AttributesImpl());

      String token = it.next();
View Full Code Here

      for (int wi=0;wi<words.length;wi++) {
        words[wi] =
            tt[wi].substring(0,tt[wi].lastIndexOf('_'));
      }

      ngramModel.add(new StringList(words), 1, 1);
    }

    System.out.println("Saving the dictionary");

    ngramModel.cutoff(cutoff, Integer.MAX_VALUE);
View Full Code Here

       else if (ENTRY_ELEMENT.equals(localName)) {

         String[] tokens = mTokenList.toArray(
             new String[mTokenList.size()]);

         Entry entry = new Entry(new StringList(tokens), mAttributes);

         try {
           mInserter.insert(entry);
         } catch (InvalidFormatException e) {
           throw new SAXException("Invalid dictionary format!", e);
View Full Code Here

  private static Dictionary readNames(String nameFile) throws IOException {
    Dictionary names = new Dictionary();

    BufferedReader nameReader = new BufferedReader(new FileReader(nameFile));
    for (String line = nameReader.readLine(); line != null; line = nameReader.readLine()) {
      names.put(new StringList(line));
    }

    return names;
  }
View Full Code Here

      //add all uni-grams
      for (int wi=0;wi<words.length;wi++) {
        words[wi] = pwords[wi].getCoveredText();
      }

      mdict.add(new StringList(words), 1, 1);
      //add tri-grams and bi-grams for inital sequence
      Parse[] chunks = collapsePunctuation(ParserEventStream.getInitialChunks(p),rules.getPunctuationTags());
      String[] cwords = new String[chunks.length];
      for (int wi=0;wi<cwords.length;wi++) {
        cwords[wi] = chunks[wi].getHead().getCoveredText();
      }
      mdict.add(new StringList(cwords), 2, 3);

      //emulate reductions to produce additional n-grams
      int ci = 0;
      while (ci < chunks.length) {
        //System.err.println("chunks["+ci+"]="+chunks[ci].getHead().getCoveredText()+" chunks.length="+chunks.length);
        if (lastChild(chunks[ci], chunks[ci].getParent(),rules.getPunctuationTags())) {
          //perform reduce
          int reduceStart = ci;
          while (reduceStart >=0 && chunks[reduceStart].getParent() == chunks[ci].getParent()) {
            reduceStart--;
          }
          reduceStart++;
          chunks = ParserEventStream.reduceChunks(chunks,ci,chunks[ci].getParent());
          ci = reduceStart;
          if (chunks.length != 0) {
            String[] window = new String[5];
            int wi = 0;
            if (ci-2 >= 0) window[wi++] = chunks[ci-2].getHead().getCoveredText();
            if (ci-1 >= 0) window[wi++] = chunks[ci-1].getHead().getCoveredText();
            window[wi++] = chunks[ci].getHead().getCoveredText();
            if (ci+1 < chunks.length) window[wi++] = chunks[ci+1].getHead().getCoveredText();
            if (ci+2 < chunks.length) window[wi++] = chunks[ci+2].getHead().getCoveredText();
            if (wi < 5) {
              String[] subWindow = new String[wi];
              for (int swi=0;swi<wi;swi++) {
                subWindow[swi]=window[swi];
              }
              window = subWindow;
            }
            if (window.length >=3) {
              mdict.add(new StringList(window), 2, 3);
            }
            else if (window.length == 2) {
              mdict.add(new StringList(window), 2, 2);
            }
          }
          ci=reduceStart-1; //ci will be incremented at end of loop
        }
        ci++;
View Full Code Here

    if (dict != null) {

      if (p_2 != null) {
        unigram[0] = p_2.getHead().getCoveredText();
        u_2 = dict.contains(new StringList(unigram));
      }

      if (p2 != null) {
        unigram[0] = p2.getHead().getCoveredText();
        u2 = dict.contains(new StringList(unigram));
      }

      unigram[0] = p0.getHead().getCoveredText();
      u0 = dict.contains(new StringList(unigram));

      if (p_2 != null && p_1 != null) {
        bigram[0] = p_2.getHead().getCoveredText();
        bigram[1] = p_1.getHead().getCoveredText();
        b_2_1 = dict.contains(new StringList(bigram));

        trigram[0] = p_2.getHead().getCoveredText();
        trigram[1] = p_1.getHead().getCoveredText();
        trigram[2] = p0.getHead().getCoveredText();
        t_2_10 = dict.contains(new StringList(trigram));
      }
      if (p_1 != null && p1 != null) {
        trigram[0] = p_1.getHead().getCoveredText();
        trigram[1] = p0.getHead().getCoveredText();
        trigram[2] = p1.getHead().getCoveredText();
        t_101 = dict.contains(new StringList(trigram));
      }
      if (p_1 != null) {
        unigram[0] = p_1.getHead().getCoveredText();
        u_1 = dict.contains(new StringList(unigram));

        //extra check for 2==null case
        b_2_1 = b_2_1 && u_1 & u_2;
        t_2_10 = t_2_10 && u_1 & u_2 & u0;
        t_101 = t_101 && u_1 & u0 && u1;

        bigram[0] = p_1.getHead().getCoveredText();
        bigram[1] = p0.getHead().getCoveredText();
        b_10 = dict.contains(new StringList(bigram)) && u_1 && u0;
      }
      if (p1 != null && p2 != null) {
        bigram[0] = p1.getHead().getCoveredText();
        bigram[1] = p2.getHead().getCoveredText();
        b12 = dict.contains(new StringList(bigram));

        trigram[0] = p0.getHead().getCoveredText();
        trigram[1] = p1.getHead().getCoveredText();
        trigram[2] = p2.getHead().getCoveredText();
        t012 = dict.contains(new StringList(trigram));
      }
      if (p1 != null) {
        unigram[0] = p1.getHead().getCoveredText();
        u1 = dict.contains(new StringList(unigram));

        //extra check for 2==null case
        b12 = b12 && u1 && u2;
        t012 = t012 && u1 && u2 && u0;
        t_101 = t_101 && u0 && u_1 && u1;

        bigram[0] = p0.getHead().getCoveredText();
        bigram[1] = p1.getHead().getCoveredText();
        b01 = dict.contains(new StringList(bigram));
        b01 = b01 && u0 && u1;
      }
    }

    String consp_2 = cons(p_2, -2);
View Full Code Here

   */
  public Index(Iterator<StringList> tokenLists) {

    while (tokenLists.hasNext()) {

      StringList tokens = tokenLists.next();

      for (int i = 0; i < tokens.size(); i++) {
        this.tokens.add(tokens.getToken(i));
      }
    }
  }
View Full Code Here

        for (int i = textIndex; i < textIndex + lengthIndex; i++) {
          grams[i - textIndex] = ngram.getToken(i);
        }

        add(new StringList(grams));
      }
    }
  }
View Full Code Here

          textIndex + lengthIndex - 1 < chars.length(); textIndex++) {

        String gram =
            chars.substring(textIndex, textIndex + lengthIndex).toLowerCase();

        add(new StringList(new String[]{gram}));
      }
    }
  }
View Full Code Here

TOP

Related Classes of opennlp.tools.util.StringList

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.