Package opennlp.tools.util

Examples of opennlp.tools.util.StringList


    if (cutoffUnder > 0 || cutoffOver < Integer.MAX_VALUE) {

      for (Iterator<StringList> it = iterator(); it.hasNext();) {

        StringList ngram = it.next();

        int count = getCount(ngram);

        if (count < cutoffUnder ||
            count > cutoffOver) {
View Full Code Here


            return mDictionaryIterator.hasNext();
          }

          public Entry next() {

            StringList tokens = mDictionaryIterator.next();

            Attributes attributes = new Attributes();

            attributes.setValue(COUNT, Integer.toString(getCount(tokens)));
View Full Code Here

    DictionarySerializer.create(in, new EntryInserter() {
      public void insert(Entry entry) throws InvalidFormatException {

        String operationString = entry.getAttributes().getValue("operation");

        StringList word = entry.getTokens();

        if (word.size() != 1)
          throw new InvalidFormatException("Each entry must have exactly one token! "+word);
       
        // parse operation
        Operation operation = Operation.parse(operationString);
       
        if (operation == null)
            throw new InvalidFormatException("Unkown operation type: " + operationString);
       
        operationTable.put(word.getToken(0), operation);
      }});
  }
View Full Code Here

    this.lineStream = new PlainTextByLineStream(in, this.encoding);
  }

  public StringList read() throws IOException {
    String line = lineStream.read();
    StringList name = null;

    if ((line != null) &&
        (!StringUtil.isEmpty(line))) {
      String name2;
      // find the location of the name separator in the line of data.
      int pos = line.indexOf(' ');
      if ((pos != -1)) {
        String parsed = line.substring(0, pos);
        // the data is in ALL CAPS ... so the easiest way is to convert
        // back to standard mixed case.
        if ((parsed.length() > 2) &&
            (parsed.startsWith("MC"))) {
          name2 = parsed.substring(0,1).toUpperCase(locale) +
                  parsed.substring(1,2).toLowerCase(locale) +
                  parsed.substring(2,3).toUpperCase(locale) +
                  parsed.substring(3).toLowerCase(locale);
        } else {
          name2 = parsed.substring(0,1).toUpperCase(locale) +
                  parsed.substring(1).toLowerCase(locale);
        }
        name = new StringList(new String[]{name2});
      }
    }

    return name;
  }
View Full Code Here

        String token = iterator.next();

        Attributes attributes = new Attributes();
        attributes.setValue("operation", getOperation(token).toString());

        return new Entry(new StringList(token), attributes);
      }

      public void remove() {
        throw new UnsupportedOperationException();
      }
View Full Code Here

   */
  public Index(Iterator<StringList> tokenLists) {

    while (tokenLists.hasNext()) {

      StringList tokens = tokenLists.next();

      for (int i = 0; i < tokens.size(); i++) {
        this.tokens.add(tokens.getToken(i));
      }
    }
  }
View Full Code Here

          return dictionaryIterator.hasNext();
        }

        public Entry next() {

          StringList tokens = dictionaryIterator.next();
         
          return new Entry(tokens, new Attributes());
        }

        public void remove() {
View Full Code Here

        int tokenIndex = 0;
        while (whiteSpaceTokenizer.hasMoreTokens()) {
          tokens[tokenIndex++] = whiteSpaceTokenizer.nextToken();
        }

        dictionary.put(new StringList(tokens));
      }
    }

    return dictionary;
  }
View Full Code Here

        boolean result = false;

        if (obj instanceof String) {
          String str = (String) obj;

          result = entrySet.contains(new StringListWrapper(new StringList(str)));

        }

        return result;
      }
View Full Code Here

    NGramModel model = new NGramModel();
    model.add(tokens[index], minLength, maxLength);

    for (Iterator<StringList> it = model.iterator(); it.hasNext();) {

      StringList tokenList = it.next();

      if (tokenList.size() > 0) {
        features.add("ng=" + tokenList.getToken(0).toLowerCase());
      }
    }
  }
View Full Code Here

TOP

Related Classes of opennlp.tools.util.StringList

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.