Package edu.stanford.nlp.ling

Examples of edu.stanford.nlp.ling.TaggedWord


      String[] sent3 = { "It", "can", "can", "it", "." };
      String[] tag3 = { "PRP", "MD", "VB", "PRP", "." }; // Parser gets second "can" wrong without help
      List<TaggedWord> sentence3 = new ArrayList<TaggedWord>();
      for (int i = 0; i < sent3.length; i++) {
        sentence3.add(new TaggedWord(sent3[i], tag3[i]));
      }
      Tree parse = lp.parse(sentence3);
      parse.pennPrint();

      List<List<? extends HasWord>> tmp =
View Full Code Here


        throw new IllegalArgumentException("File " + filename + " line #" +
                                           linesRead + " too short");
      }
      String word = pieces[wordColumn];
      String tag = pieces[tagColumn];
      next.add(new TaggedWord(word, tag));
      try {
        line = reader.readLine();
        ++linesRead;
      } catch (IOException e) {
        throw new RuntimeException(e);
View Full Code Here

      if (indexUnd < 0) {
        throw new IllegalArgumentException("Data format error: can't find delimiter \"" + tagSeparator + "\" in word \"" + token + "\" (line " + (numSentences+1) + " of " + filename + ')');
      }
      String word = token.substring(0, indexUnd).intern();
      String tag = token.substring(indexUnd + 1).intern();
      next.add(new TaggedWord(word, tag));
    }
  }
View Full Code Here

    int counter = 0;
    for (Tree tree : trees) {
      counter++;
      List<TaggedWord> taggedWords = tree.taggedYield();
      for (int i = 0, size = taggedWords.size(); i < size; i++) {
        TaggedWord taggedWord = taggedWords.get(i);
        String word = taggedWord.word();
        if (word.equals(Lexicon.BOUNDARY)) {
          continue;
        }
        wordCounter.incrementCount(taggedWord);
        wordLengthCounter.incrementCount(Integer.valueOf(word.length()));
        for (int j = 0, length = word.length(); j < length; j++) {
          Symbol sym = Symbol.cannonicalSymbol(word.charAt(j));
          charCounter.incrementCount(sym);
        }
        charCounter.incrementCount(Symbol.END_WORD);
      }
    }

    Set<Symbol> singletonChars = Counters.keysBelow(charCounter, 1.5);
    Set<TaggedWord> singletonWords = Counters.keysBelow(wordCounter, 1.5);

    ClassicCounter<String> singletonWordPOSes = new ClassicCounter<String>();
    for (TaggedWord taggedWord : singletonWords) {
      singletonWordPOSes.incrementCount(taggedWord.tag());
    }
    Distribution<String> singletonWordPOSDist = Distribution.getDistribution(singletonWordPOSes);

    ClassicCounter<Character> singletonCharRads = new ClassicCounter<Character>();
    for (Symbol s : singletonChars) {
View Full Code Here

            sentenceB.add((HasWord) newLabel);
          } else {
            throw new AssertionError("This should have been a HasWord");
          }
        } else if (word instanceof HasTag) {
          TaggedWord tw = new TaggedWord(word.word(), ((HasTag) word).tag());
          sentenceB.add(tw);
        } else {
          sentenceB.add(new Word(word.word()));
        }
      }
      for (HasWord word : sentenceB) {
        word.setWord(op.wordFunction.apply(word.word()));
      }
    } else {
      sentenceB = new ArrayList<HasWord>(sentence);
    }

    if (op.testOptions.addMissingFinalPunctuation) {
      addedPunct = addSentenceFinalPunctIfNeeded(sentenceB, length);
    }
    if (length > op.testOptions.maxLength) {
      parseSkipped = true;
      throw new UnsupportedOperationException("Sentence too long: length " + length);
    }
    TreePrint treePrint = getTreePrint();
    PrintWriter pwOut = op.tlpParams.pw();

    //Insert the boundary symbol
    if(sentence.get(0) instanceof CoreLabel) {
      CoreLabel boundary = new CoreLabel();
      boundary.setWord(Lexicon.BOUNDARY);
      boundary.setValue(Lexicon.BOUNDARY);
      boundary.setTag(Lexicon.BOUNDARY_TAG);
      boundary.setIndex(sentence.size()+1);//1-based indexing used in the parser
      sentenceB.add(boundary);
    } else {
      sentenceB.add(new TaggedWord(Lexicon.BOUNDARY, Lexicon.BOUNDARY_TAG));
    }

    if (Thread.interrupted()) {
      throw new RuntimeInterruptedException();
    }
View Full Code Here

   */
  public <X extends List<TaggedWord>> X taggedYield(X ty) {
    Tree[] kids = children();
    // this inlines the content of isPreTerminal()
    if (kids.length == 1 && kids[0].isLeaf()) {
      ty.add(new TaggedWord(kids[0].label(), label()));
    } else {
      for (Tree kid : kids) {
        kid.taggedYield(ty);
      }
    }
View Full Code Here

    final boolean hasOffset;
    hasOffset = origWords != null && origWords.size() > 0 && (origWords.get(0) instanceof HasOffset);
    ArrayList<TaggedWord> taggedSentence = new ArrayList<TaggedWord>();
    for (int j = 0; j < size - 1; j++) {
      String tag = finalTags[j];
      TaggedWord w = new TaggedWord(sent.get(j), tag);
      if (hasOffset) {
        HasOffset offset = (HasOffset) origWords.get(j);
        w.setBeginPosition(offset.beginPosition());
        w.setEndPosition(offset.endPosition());
      }
      taggedSentence.add(w);
    }
    return taggedSentence;
  }
View Full Code Here

      if (totalCount == 0) {
        ++loc;
        continue;
      }
      for (String tag : counts.keySet()) {
        TaggedWord newTW = new TaggedWord(tw.word(), tag);
        train(newTW, loc, weight * counts.getCount(tag) / totalCount);
      }
      ++loc;
    }
  }
View Full Code Here

  public TaggedWord toTaggedWord(Index<String> wordIndex,
                                 Index<String> tagIndex) {
    String wordStr = wordString(wordIndex);
    String tagStr = tagString(tagIndex);
    return new TaggedWord(wordStr, tagStr);
  }
View Full Code Here

    for (List<TaggedWord> sentence : reader) {
      if (maxentTagger.wordFunction != null) {
        List<TaggedWord> newSentence =
          new ArrayList<TaggedWord>(sentence.size());
        for (TaggedWord word : sentence) {
          TaggedWord newWord =
            new TaggedWord(maxentTagger.wordFunction.apply(word.word()),
                           word.tag());
          newSentence.add(newWord);
        }
        sentence = newSentence;
      }
View Full Code Here

TOP

Related Classes of edu.stanford.nlp.ling.TaggedWord

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.