Package edu.stanford.nlp.ling

Examples of edu.stanford.nlp.ling.CoreLabel


          System.err.println("Length:\t" + sentence.size());
        }
        boolean printSpace = false;
        for (HasWord word : sentence) {
          if (printOriginalText) {
            CoreLabel cl = (CoreLabel) word;
            if ( ! printSpace) {
              pw.print(cl.get(CoreAnnotations.BeforeAnnotation.class));
              printSpace = true;
            }
            pw.print(cl.get(CoreAnnotations.OriginalTextAnnotation.class));
            pw.print(cl.get(CoreAnnotations.AfterAnnotation.class));
          } else {
            if (printSpace) pw.print(" ");
            printSpace = true;
            pw.print(word.word());
          }
View Full Code Here


    for (Entry<String, List<CoreLabel>> en : sents.entrySet()) {
      List<CoreLabel> value = en.getValue();
      CoreLabel[] sent = value.toArray(new CoreLabel[value.size()]);

      for (int i = 0; i < sent.length; i++) {
        CoreLabel l = sent[i];

        boolean chooseThis = false;
        boolean ignoreclass = false;
        Boolean datumlabel = false;
        for (Class cl : otherIgnoreClasses.keySet()) {
          if ((Boolean) l.get(cl)) {    // cast is needed for jdk 1.6
            ignoreclass = true;
          }
        }
        if (l.get(answerClass).equals(answerLabel)) {
          datumlabel = true;
          chooseThis = true;
          numpos++;
        }
        if (chooseThis) {
          chosen.add(new Pair<String, Integer>(en.getKey(), i));

          Counter<ScorePhraseMeasures> feat = null;
          if (forLearningPattern) {
            feat = getPhraseFeaturesForPattern(label, l.word());
          } else {
            feat = getFeatures(label, l.word(), wordsPatExtracted.getCounter(l.word()), allSelectedPatterns);
          }
          RVFDatum<String, ScorePhraseMeasures> datum = new RVFDatum<String, ScorePhraseMeasures>(feat, datumlabel.toString());
          dataset.add(datum);
        }
      }

      for (int i = 0; i < sent.length; i++) {
        CoreLabel l = sent[i];
        if (numneg >= numpos)
          break;
        boolean chooseThis = false;
        boolean ignoreclass = false;
        Boolean datumlabel = false;
        if (l.get(answerClass).equals(answerLabel)) {
          continue;
        } else if ((ignoreclass || negativeWords.contains(l.word().toLowerCase())) && getRandomBoolean(rneg, perSelectNeg)) {
          chooseThis = true;
          datumlabel = false;
          numneg++;
        } else if (getRandomBoolean(r, perSelectRand)) {
          chooseThis = true;
          datumlabel = false;
          numneg++;
        } else
          chooseThis = false;
        if (chooseThis) {
          chosen.add(new Pair<String, Integer>(en.getKey(), i));
          Counter<ScorePhraseMeasures> feat = null;
          if (forLearningPattern) {
            feat = getPhraseFeaturesForPattern(label, l.word());
          } else {
            feat = getFeatures(label, l.word(), wordsPatExtracted.getCounter(l.word()), allSelectedPatterns);
          }
          RVFDatum<String, ScorePhraseMeasures> datum = new RVFDatum<String, ScorePhraseMeasures>(feat, datumlabel.toString());
          dataset.add(datum);
        }
      }
View Full Code Here

  }


  protected Collection<String> featuresC(PaddedList<? extends CoreLabel> cInfo, int loc) {
    Collection<String> features = new ArrayList<String>();
    CoreLabel c = cInfo.get(loc);
    CoreLabel c2 = cInfo.get(loc + 1);
    CoreLabel c3 = cInfo.get(loc + 2);
    CoreLabel p = cInfo.get(loc - 1);
    CoreLabel p2 = cInfo.get(loc - 2);
    CoreLabel p3 = cInfo.get(loc - 3);
    String charc = c.getString(CoreAnnotations.CharAnnotation.class);
    String charc2 = c2.getString(CoreAnnotations.CharAnnotation.class);
    String charc3 = c3.getString(CoreAnnotations.CharAnnotation.class);
    String charp = p.getString(CoreAnnotations.CharAnnotation.class);
    String charp2 = p2.getString(CoreAnnotations.CharAnnotation.class);
    String charp3 = p3.getString(CoreAnnotations.CharAnnotation.class);
    Integer cI = c.get(CoreAnnotations.UTypeAnnotation.class);
    String uTypec = (cI != null ? cI.toString() : "");
    Integer c2I = c2.get(CoreAnnotations.UTypeAnnotation.class);
    String uTypec2 = (c2I != null ? c2I.toString() : "");
    Integer c3I = c3.get(CoreAnnotations.UTypeAnnotation.class);
View Full Code Here

  }


  protected Collection<String> featuresCpC(PaddedList<? extends CoreLabel> cInfo, int loc) {
    Collection<String> features = new ArrayList<String>();
    CoreLabel c = cInfo.get(loc);
    CoreLabel c2 = cInfo.get(loc + 1);
    CoreLabel c3 = cInfo.get(loc + 2);
    CoreLabel p = cInfo.get(loc - 1);
    CoreLabel p2 = cInfo.get(loc - 2);
    CoreLabel p3 = cInfo.get(loc - 3);

    String charc = c.getString(CoreAnnotations.CharAnnotation.class);
    String charc2 = c2.getString(CoreAnnotations.CharAnnotation.class);
    String charc3 = c3.getString(CoreAnnotations.CharAnnotation.class);
    String charp = p.getString(CoreAnnotations.CharAnnotation.class);
    String charp2 = p2.getString(CoreAnnotations.CharAnnotation.class);
    String charp3 = p3.getString(CoreAnnotations.CharAnnotation.class);

    Integer cI = c.get(CoreAnnotations.UTypeAnnotation.class);
    String uTypec = (cI != null ? cI.toString() : "");
    Integer c2I = c2.get(CoreAnnotations.UTypeAnnotation.class);
    String uTypec2 = (c2I != null ? c2I.toString() : "");
View Full Code Here

   @return Collection of String features (sparse set of boolean features
   */
  protected Collection<String> featuresCnC(PaddedList<? extends CoreLabel> cInfo, int loc) {
    Collection<String> features = new ArrayList<String>();
    if (flags.useWordn) {
      CoreLabel c = cInfo.get(loc);
      CoreLabel c2 = cInfo.get(loc + 1);
      CoreLabel p = cInfo.get(loc - 1);
      CoreLabel p2 = cInfo.get(loc - 2);
      String charc = c.getString(CoreAnnotations.CharAnnotation.class);
      String charc2 = c2.getString(CoreAnnotations.CharAnnotation.class);
      String charp = p.getString(CoreAnnotations.CharAnnotation.class);
      String charp2 = p2.getString(CoreAnnotations.CharAnnotation.class);

      features.add(charc +"c");
      features.add(charc2+"c2");
      features.add(charp +"p");
      features.add(charp2 + "p2");
View Full Code Here

   @param loc Position of c in list
   *  @return Collection of String features (sparse set of boolean features
   */
  protected Collection<String> featuresCpCp2C(PaddedList<? extends CoreLabel> cInfo, int loc) {
    Collection<String> features = new ArrayList<String>();
    CoreLabel c = cInfo.get(loc);
    CoreLabel c2 = cInfo.get(loc + 1);
    CoreLabel c3 = cInfo.get(loc + 2);
    CoreLabel p = cInfo.get(loc - 1);
    CoreLabel p2 = cInfo.get(loc - 2);
    CoreLabel p3 = cInfo.get(loc - 3);

    String charc = c.getString(CoreAnnotations.CharAnnotation.class);
    String charc2 = c2.getString(CoreAnnotations.CharAnnotation.class);
    String charc3 = c3.getString(CoreAnnotations.CharAnnotation.class);
    String charp = p.getString(CoreAnnotations.CharAnnotation.class);
    String charp2 = p2.getString(CoreAnnotations.CharAnnotation.class);
    String charp3 = p3.getString(CoreAnnotations.CharAnnotation.class);

    // N-gram features. N is up to 3
    if (flags.useWord3) {
      features.add(charc +"::c");
      features.add(charc2+"::n");
View Full Code Here


  protected Collection<String> featuresCpCp2Cp3C(PaddedList<? extends CoreLabel> cInfo, int loc) {
    Collection<String> features = new ArrayList<String>();
    if (flags.use4Clique && flags.maxLeft >= 3) {
      CoreLabel c = cInfo.get(loc);
      CoreLabel c2 = cInfo.get(loc + 1);
      CoreLabel p = cInfo.get(loc - 1);
      CoreLabel p2 = cInfo.get(loc - 2);
      CoreLabel p3 = cInfo.get(loc - 3);
      String charc = c.getString(CoreAnnotations.CharAnnotation.class);
      String charp = p.getString(CoreAnnotations.CharAnnotation.class);
      String charp2 = p2.getString(CoreAnnotations.CharAnnotation.class);
      String charp3 = p3.getString(CoreAnnotations.CharAnnotation.class);
      Integer cI = c.get(CoreAnnotations.UTypeAnnotation.class);
      String uTypec = (cI != null ? cI.toString() : "");
      Integer c2I = c2.get(CoreAnnotations.UTypeAnnotation.class);
      String uTypec2 = (c2I != null ? c2I.toString() : "");
      Integer pI = p.get(CoreAnnotations.UTypeAnnotation.class);
      String uTypep = (pI != null ? pI.toString() : "");
      Integer p2I = p2.get(CoreAnnotations.UTypeAnnotation.class);
      String uTypep2 = (p2I != null ? p2I.toString() : "");
      Integer p3I = p3.get(CoreAnnotations.UTypeAnnotation.class);
      String uTypep3 = (p3I != null ? p3I.toString() : "");


      if (flags.useLongSequences) {
        features.add(charp3 + charp2 + charp + charc + "p3p2pc");
View Full Code Here

        //to make sure we discard phrases with stopwords in between, but include the ones in which stop words were removed at the ends if removeStopWordsFromSelectedPhrases is true
        boolean[] addedindices = new boolean[e-s];
        Arrays.fill(addedindices, false);
       
        for (int i = s; i < e; i++) {
          CoreLabel l = sent.get(i);
          l.set(PatternsAnnotations.MatchedPattern.class, true);

          if(!l.containsKey(PatternsAnnotations.MatchedPatterns.class))
            l.set(PatternsAnnotations.MatchedPatterns.class, new HashSet<Pattern>());
          l.get(PatternsAnnotations.MatchedPatterns.class).add(matchedPat);

          // if (restrictToMatched) {
          // tokensMatchedPattern.add(sentid, i);
          // }
          for (Entry<Class, Object> ig : constVars.getIgnoreWordswithClassesDuringSelection().get(label).entrySet()) {
            if (l.containsKey(ig.getKey()) && l.get(ig.getKey()).equals(ig.getValue())) {
              doNotUse = true;
            }
          }
          boolean containsStop = containsStopWord(l, constVars.getCommonEngWords(), PatternFactory.ignoreWordRegex);
          if (removePhrasesWithStopWords && containsStop) {
            doNotUse = true;
          } else {
            if (!containsStop || !removeStopWordsFromSelectedPhrases) {
             
              if (label == null || l.get(constVars.getAnswerClass().get(label)) == null || !l.get(constVars.getAnswerClass().get(label)).equals(label.toString())) {
                useWordNotLabeled = true;
              }
              phrase += " " + l.word();
              phraseLemma += " " + l.lemma();
              addedindices[i-s] = true;
            }
          }
        }
       
View Full Code Here

  private static final Pattern numberPattern = Pattern.compile("[0-9]+");
  private static final Pattern ordinalEndPattern = Pattern.compile("(?:st|nd|rd|th)", Pattern.CASE_INSENSITIVE);

  private boolean isOrdinal(List<? extends CoreLabel> wordInfos, int pos) {
    CoreLabel c = wordInfos.get(pos);
    String cWord = getWord(c);
    Matcher m = ordinalPattern.matcher(cWord);
    if (m.matches()) { return true; }
    m = numberPattern.matcher(cWord);
    if (m.matches()) {
      if (pos+1 < wordInfos.size()) {
        CoreLabel n = wordInfos.get(pos+1);
        m = ordinalEndPattern.matcher(getWord(n));
        if (m.matches()) { return true; }
      }
      return false;
    }

    m = ordinalEndPattern.matcher(cWord);
    if (m.matches()) {
      if (pos > 0) {
        CoreLabel p = wordInfos.get(pos-1);
        m = numberPattern.matcher(getWord(p));
        if (m.matches()) { return true; }
      }
    }
    if (cWord.equals("-")) {
      if (pos+1 < wordInfos.size() && pos > 0) {
        CoreLabel p = wordInfos.get(pos-1);
        CoreLabel n = wordInfos.get(pos+1);
        m = ordinalPattern.matcher(getWord(p));
        if (m.matches()) {
          m = ordinalPattern.matcher(getWord(n));
          if (m.matches()) {
            return true;
View Full Code Here

  protected HasWord getNext() {
    while (wordIter == null || ! wordIter.hasNext()) {
      if ( ! tok.hasNext()) {
        return null;
      }
      CoreLabel token = tok.next();
      String s = token.word();
      if (s == null) {
        return null;
      }
      if (s.equals(WhitespaceLexer.NEWLINE)) {
        // if newlines were significant, we should make sure to return
View Full Code Here

TOP

Related Classes of edu.stanford.nlp.ling.CoreLabel

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.