Package edu.stanford.nlp.sequences

Examples of edu.stanford.nlp.sequences.SequenceModel


    }

    // cdm Aug 2005: why is this next line needed?  Seems really ugly!!!  [2006: it broke things! removed]
    // document.add(0, new CoreLabel());

    SequenceModel ts = new Scorer<IN>(document,
                                      classIndex,
                                      this,
                                      (!flags.useTaggySequences ? (flags.usePrevSequences ? 1 : 0) : flags.maxLeft),
                                      (flags.useNextSequences ? 1 : 0),
                                      answerArrays);
View Full Code Here


    if (document.isEmpty()) {
      return;
    }

    SequenceModel ts = getSequenceModel(document);

    //    TagScorer ts = new PrevOnlyScorer(document, tagIndex, this, (!flags.useTaggySequences ? (flags.usePrevSequences ? 1 : 0) : flags.maxLeft), 0, answerArrays);

    int[] tags;
    //System.err.println("***begin test***");
    if (flags.useViterbi) {
      ExactBestSequenceFinder ti = new ExactBestSequenceFinder();
      tags = ti.bestSequence(ts);
    } else {
      BeamBestSequenceFinder ti = new BeamBestSequenceFinder(flags.beamSize, true, true);
      tags = ti.bestSequence(ts, document.size());
    }
    //System.err.println("***end test***");

    // used to improve recall in task 1b
    if (flags.lowerNewgeneThreshold) {
      System.err.println("Using NEWGENE threshold: " + flags.newgeneThreshold);

      int[] copy = new int[tags.length];
      System.arraycopy(tags, 0, copy, 0, tags.length);

      // for each sequence marked as NEWGENE in the gazette
      // tag the entire sequence as NEWGENE and sum the score
      // if the score is greater than newgeneThreshold, accept
      int ngTag = classIndex.indexOf("G");
      //int bgTag = classIndex.indexOf(BACKGROUND);
      int bgTag = classIndex.indexOf(flags.backgroundSymbol);

      for (int i = 0, dSize = document.size(); i < dSize; i++) {
        CoreLabel wordInfo =document.get(i);

        if ("NEWGENE".equals(wordInfo.get(CoreAnnotations.GazAnnotation.class))) {
          int start = i;
          int j;
          for (j = i; j < document.size(); j++) {
            wordInfo = document.get(j);
            if (!"NEWGENE".equals(wordInfo.get(CoreAnnotations.GazAnnotation.class))) {
              break;
            }
          }
          int end = j;
          //int end = i + 1;

          int winStart = Math.max(0, start - 4);
          int winEnd = Math.min(tags.length, end + 4);
          // clear a window around the sequences
          for (j = winStart; j < winEnd; j++) {
            copy[j] = bgTag;
          }

          // score as nongene
          double bgScore = 0.0;
          for (j = start; j < end; j++) {
            double[] scores = ts.scoresOf(copy, j);
            scores = Scorer.recenter(scores);
            bgScore += scores[bgTag];
          }

          // first pass, compute all of the scores
          ClassicCounter<Pair<Integer,Integer>> prevScores = new ClassicCounter<Pair<Integer,Integer>>();
          for (j = start; j < end; j++) {
            // clear the sequence
            for (int k = start; k < end; k++) {
              copy[k] = bgTag;
            }

            // grow the sequence from j until the end
            for (int k = j; k < end; k++) {
              copy[k] = ngTag;
              // score the sequence
              double ngScore = 0.0;
              for (int m = start; m < end; m++) {
                double[] scores = ts.scoresOf(copy, m);
                scores = Scorer.recenter(scores);
                ngScore += scores[tags[m]];
              }
              prevScores.incrementCount(new Pair<Integer,Integer>(Integer.valueOf(j), Integer.valueOf(k)), ngScore - bgScore);
            }
View Full Code Here

TOP

Related Classes of edu.stanford.nlp.sequences.SequenceModel

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.