Package edu.stanford.nlp.ling

Examples of edu.stanford.nlp.ling.CoreLabel


        int absPos = pos - leftWindow() + i;
        if (absPos < 0) {
          continue;
        }
        answers[i] = tagIndex.get(tags[absPos]);
        CoreLabel li = lineInfos.get(absPos);
        li.set(CoreAnnotations.AnswerAnnotation.class, answers[i]);
        if (i < leftWindow()) {
          pre[i] = answers[i];
        }
      }
      double[] scores = new double[tagIndex.size()];
View Full Code Here


  private int sample(Map<String, List<CoreLabel>> sents, Random r, Random rneg, double perSelectNeg, double perSelectRand, int numrand, List<Pair<String, Integer>> chosen, RVFDataset<String, String> dataset){
    for (Entry<String, List<CoreLabel>> en : sents.entrySet()) {
      CoreLabel[] sent = en.getValue().toArray(new CoreLabel[0]);

      for (int i = 0; i < sent.length; i++) {
        CoreLabel l = sent[i];

        boolean chooseThis = false;

        if (l.get(answerClass).equals(answerLabel)){
          chooseThis = true;
          }
        else if ((!l.get(answerClass).equals("O") || negativeWords.contains(l
            .word().toLowerCase())) && getRandomBoolean(r, perSelectNeg)) {
          chooseThis = true;
        } else if (getRandomBoolean(r, perSelectRand)) {
          numrand++;
          chooseThis = true;
View Full Code Here

    return features;
  }

  private RVFDatum<String, String> getDatum(CoreLabel[] sent, int i) {
    Counter<String> feat = new ClassicCounter<String>();
    CoreLabel l = sent[i];

    String label;
    if (l.get(answerClass).toString().equals(answerLabel))
      label = answerLabel;
    else
      label = "O";
   
      CollectionValuedMap<String, String> matchedPhrases = l
          .get(PatternsAnnotations.MatchedPhrases.class);
      if (matchedPhrases == null) {
        matchedPhrases = new CollectionValuedMap<String, String>();
        matchedPhrases.add(label, l.word());
      }

      for (String w : matchedPhrases.allValues()) {
        Integer num = this.clusterIds.get(w);
        if (num == null)
          num = -1;
        feat.setCount("Cluster-" + num, 1.0);
      }

   

    // feat.incrementCount("WORD-" + l.word());
    // feat.incrementCount("LEMMA-" + l.lemma());
    // feat.incrementCount("TAG-" + l.tag());
    int window = 0;
    for (int j = Math.max(0, i - window); j < i; j++) {
      CoreLabel lj = sent[j];
      feat.incrementCount("PREV-" + "WORD-" + lj.word());
      feat.incrementCount("PREV-" + "LEMMA-" + lj.lemma());
      feat.incrementCount("PREV-" + "TAG-" + lj.tag());
    }

    for (int j = i + 1; j < sent.length && j <= i + window; j++) {
      CoreLabel lj = sent[j];
      feat.incrementCount("NEXT-" + "WORD-" + lj.word());
      feat.incrementCount("NEXT-" + "LEMMA-" + lj.lemma());
      feat.incrementCount("NEXT-" + "TAG-" + lj.tag());
    }


    // System.out.println("adding " + l.word() + " as " + label);
    return new RVFDatum<String, String>(feat, label);
View Full Code Here

      //line = line.replaceAll(" +"," ");
      //System.err.println("pichuan: processing line = "+line);
     
      String[] toks = line.split(" ");
      for (String word : toks) {
        CoreLabel wi = new CoreLabel();
        Matcher lowerMatcher = allLower.matcher(word);
       
        if (lowerMatcher.matches()) {
          wi.set(CoreAnnotations.AnswerAnnotation.class, "LOWER");
          wi.set(CoreAnnotations.GoldAnswerAnnotation.class, "LOWER");
        } else {
          Matcher upperMatcher = allUpper.matcher(word);
          if (!THREE_CLASSES && upperMatcher.matches()) {
            wi.set(CoreAnnotations.AnswerAnnotation.class, "UPPER");
            wi.set(CoreAnnotations.GoldAnswerAnnotation.class, "UPPER");
          } else {
            Matcher startUpperMatcher = startUpper.matcher(word);

            boolean isINIT_UPPER; // = false;
            if (word.length() > 1) {
              String w2 = word.substring(1);
              String lcw2 = w2.toLowerCase();
              isINIT_UPPER = w2.equals(lcw2);
            } else {
              isINIT_UPPER = false;
            }

            if (startUpperMatcher.matches() && isINIT_UPPER) {
              wi.set(CoreAnnotations.AnswerAnnotation.class, "INIT_UPPER");
              wi.set(CoreAnnotations.GoldAnswerAnnotation.class, "INIT_UPPER");
            } else {
              wi.set(CoreAnnotations.AnswerAnnotation.class, "O");
              wi.set(CoreAnnotations.GoldAnswerAnnotation.class, "O");
            }
          }
        }
       
        wi.setWord(word.toLowerCase());
        wi.set(CoreAnnotations.PositionAnnotation.class, pos + "");
        doc.add(wi);
        pos++;
      }
      return doc;
    }
View Full Code Here

  private Object getNext(String txt, String originalText, String annotation) {
    txt = removeSoftHyphens(txt);
    Label w = (Label) tokenFactory.makeToken(txt, yychar, yylength());
    if (invertible || annotation != null) {
      CoreLabel word = (CoreLabel) w;
      if (invertible) {
        String str = prevWordAfter.toString();
        prevWordAfter.setLength(0);
        word.set(CoreAnnotations.OriginalTextAnnotation.class, originalText);
        word.set(CoreAnnotations.BeforeAnnotation.class, str);
        prevWord.set(CoreAnnotations.AfterAnnotation.class, str);
        prevWord = word;
      }
      if (annotation != null) {
        word.set(CoreAnnotations.ParentAnnotation.class, annotation);
      }
    }
    return w;
  }
View Full Code Here

        if (line.trim().length() == 0) {
          continue;
        }
        String[] info = whitePattern.split(line);
        // todo: We could speed things up here by having one time only having converted map into an array of CoreLabel keys (Class<? extends CoreAnnotation<?>>) and then instantiating them. Need new constructor.
        CoreLabel wi;
        try {
          wi = new CoreLabel(map, info);
        } catch (RuntimeException e) {
          System.err.println("Error on line " + lineCount + ": " + line);
          throw e;
        }
        words.add(wi);
View Full Code Here

        if (line.trim().length() < 1)
          continue;
        int idx = line.lastIndexOf(" ");
        if(idx < 0)
          throw new RuntimeException("Bad line: "+line);
        CoreLabel wi = new CoreLabel();
        wi.setWord(line.substring(0,idx));
        wi.set(CoreAnnotations.AnswerAnnotation.class, line.substring(idx+1));
        wi.set(CoreAnnotations.GoldAnswerAnnotation.class, line.substring(idx+1));
        words.add(wi);
      }
      return words;
    }
View Full Code Here

              (T) lexer.next();
      } while (nextToken != null && nextToken.word().length() == 0);

      // Check for compounds to split
      if (splitAny && nextToken instanceof CoreLabel) {
        CoreLabel cl = (CoreLabel) nextToken;
  if (cl.containsKey(ParentAnnotation.class)) {
      if(splitCompounds && cl.get(ParentAnnotation.class).equals(SpanishLexer.COMPOUND_ANNOTATION))
    nextToken = (T) processCompound(cl);
      else if (splitVerbs && cl.get(ParentAnnotation.class).equals(SpanishLexer.VB_PRON_ANNOTATION))
    nextToken = (T) processVerb(cl);
      else if (splitContractions && cl.get(ParentAnnotation.class).equals(SpanishLexer.CONTR_ANNOTATION))
    nextToken = (T) processContraction(cl);
  }
      }

      return nextToken;
View Full Code Here

  }


  /* Copies the CoreLabel cl with the new word part */
  private CoreLabel copyCoreLabel(CoreLabel cl, String part) {
      CoreLabel newLabel = new CoreLabel(cl);
      newLabel.setWord(part);
      newLabel.setValue(part);
      newLabel.set(OriginalTextAnnotation.class, part);
      return newLabel;
  }
View Full Code Here

   */
  private CoreLabel processCompound(CoreLabel cl) {
    cl.remove(ParentAnnotation.class);
    String[] parts = cl.word().replaceAll("\\-", " - ").split("\\s+");
    for (String part : parts) {
      CoreLabel newLabel = new CoreLabel(cl);
      newLabel.setWord(part);
      newLabel.setValue(part);
      newLabel.set(OriginalTextAnnotation.class, part);
      compoundBuffer.add(newLabel);
    }
    return compoundBuffer.remove(0);
  }
View Full Code Here

TOP

Related Classes of edu.stanford.nlp.ling.CoreLabel

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.