Package cc.mallet.types

Examples of cc.mallet.types.Sequence


            testData = new InstanceList(p);
            testData.addThruPipe(new LineGroupIterator(new StringReader(line),
                                                       pattern, true));

            for (int i = 0; i < testData.size(); i++) {
                Sequence input = (Sequence)testData.get(i).getData();
                Sequence[] outputs = apply(crf, input, nBestOption.value);
                int k = outputs.length;
                boolean error = false;
                for (int a = 0; a < k; a++) {
                    if (outputs[a].size() != input.size()) {
                        logger.info("Failed to decode input sequence " + i + ", answer " + a);
                        error = true;
                    }
                }
                if (!error) {             
                  ConstrainedForwardBackwardConfidenceEstimator cfb = new ConstrainedForwardBackwardConfidenceEstimator(crf);
                  SumLatticeDefault lattice = new SumLatticeDefault (cfb.getTransducer(), input);
                    double conf = 0;
                   
                    for (int j = 0; j < input.size(); j++) {
                        StringBuffer buf = new StringBuffer();
                        for (int a = 0; a < k; a++) {
                            String tag = outputs[a].get(j).toString();
                         
                          //Lets's get the confidence if this is an entity
                          if(tag.startsWith("B-")) {
                            int endTagIdx = j+1;
                            while(endTagIdx < input.size() && outputs[a].get(endTagIdx).toString().startsWith("I-")) {
                              endTagIdx++;
                            }
                            endTagIdx -= 1;
                           
                            Segment s = new Segment(input, outputs[a], outputs[a], j, endTagIdx, tag, outputs[a].get(endTagIdx));
                            conf = cfb.estimateConfidenceFor(s, lattice);
                          }
                         
                          if(!tag.equals("O")) {
                            tag = tag + ":" + Double.toString(conf);
                          }
                         
                          buf.append(tag).append(" ");
                        }
                        if (includeInput) {
                            FeatureVector fv = (FeatureVector)input.get(j);
                            buf.append(fv.toString(true));               
                        }
                        System.out.println(buf.toString());
                    }
                    //System.out.println();
View Full Code Here


    totalTokens = numCorrectTokens = 0;
    for (int n = 0; n < numTrueSegments.length; n++)
      numTrueSegments[n] = numPredictedSegments[n] = numCorrectSegments[n] = 0;
    for (int i = 0; i < data.size(); i++) {
      Instance instance = data.get(i);
      Sequence input = (Sequence) instance.getData();
      //String tokens = null;
      //if (instance.getSource() != null)
      //tokens = (String) instance.getSource().toString();
      Sequence trueOutput = (Sequence) instance.getTarget();
      assert (input.size() == trueOutput.size());
      Sequence predOutput = model.transduce (input);
      assert (predOutput.size() == trueOutput.size());
      int trueStart, predStart;        // -1 for non-start, otherwise index into segmentStartTag
      for (int j = 0; j < trueOutput.size(); j++) {
        totalTokens++;
        if (trueOutput.get(j).equals(predOutput.get(j)))
          numCorrectTokens++;
        trueStart = predStart = -1;
        // Count true segment starts
        for (int n = 0; n < segmentStartTags.length; n++) {
          if (segmentStartTags[n].equals(trueOutput.get(j))) {
            numTrueSegments[n]++;
            numTrueSegments[allIndex]++;
            numTrueSegments[anyTypeIndex]++;
            trueStart = n;
            break;
          }
        }
        // Count predicted segment starts
        for (int n = 0; n < segmentStartTags.length; n++) {
          if (segmentStartTags[n].equals(predOutput.get(j))) {
            numPredictedSegments[n]++;
            numPredictedSegments[allIndex]++;
            numPredictedSegments[anyTypeIndex]++;
            predStart = n;
          }
        }
        //if (trueStart != -1 && trueStart == predStart) {
        if (trueStart != -1 && predStart != -1) {
          // Truth and Prediction both agree that the same segment tag-type is starting now
          int m;
          boolean trueContinue = false;
          boolean predContinue = false;
          for (m = j+1; m < trueOutput.size(); m++) {
            trueContinue = segmentContinueTags[predStart].equals (trueOutput.get(m));
            predContinue = segmentContinueTags[predStart].equals (predOutput.get(m));
            if (!trueContinue || !predContinue) {
              if (trueContinue == predContinue) {
                // They agree about a segment is ending somehow
              numCorrectSegments[anyTypeIndex]++;
              if(trueStart == predStart) {
View Full Code Here

    double[] trueCounts = new double[instances.getTargetAlphabet().size()];

    int total = 0;
    for (int i = 0; i < instances.size(); i++) {
      Instance instance = instances.get(i);
      Sequence trueOutput = (Sequence) instance.getTarget();
      Sequence predOutput = (Sequence) transducer.getTransducer().transduce((Sequence)instance.getData());
      for (int j = 0; j < predOutput.size(); j++) {
        total++;
        predCounts[instances.getTargetAlphabet().lookupIndex(predOutput.get(j))]++;
        trueCounts[instances.getTargetAlphabet().lookupIndex(trueOutput.get(j))]++;
      }
    }

    for (int li = 0; li < predCounts.length; li++) {
View Full Code Here

      numTrueSegments[n] = numPredictedSegments[n] = numCorrectSegments[n] = 0;
    for (int i = 0; i < data.size(); i++) {
      if (viterbiOutputStream != null)
        viterbiOutputStream.println ("Viterbi path for "+description+" instance #"+i);
      Instance instance = data.get(i);
      Sequence input = (Sequence) instance.getData();     
      //String tokens = null;
      //if (instance.getSource() != null)
      //tokens = (String) instance.getSource().toString();
      Sequence trueOutput = (Sequence) instance.getTarget();
      assert (input.size() == trueOutput.size());
      Sequence predOutput = (Sequence) predictedSequences.get (i);
      if (predOutput == null) // skip this instance
        continue;
      assert (predOutput.size() == trueOutput.size());
      int trueStart, predStart;        // -1 for non-start, otherwise index into segmentStartTag
      for (int j = 0; j < trueOutput.size(); j++) {
        totalTokens++;
        if (trueOutput.get(j).equals(predOutput.get(j)))
          numCorrectTokens++;
        trueStart = predStart = -1;
        // Count true segment starts
        for (int n = 0; n < segmentStartTags.length; n++) {
          if (segmentStartTags[n].equals(trueOutput.get(j))) {
            numTrueSegments[n]++;
            numTrueSegments[allIndex]++;
            trueStart = n;
            break;
          }
        }
        // Count predicted segment starts
        for (int n = 0; n < segmentStartTags.length; n++) {
          if (segmentStartTags[n].equals(predOutput.get(j))) {
            numPredictedSegments[n]++;
            numPredictedSegments[allIndex]++;
            predStart = n;
          }
        }
        if (trueStart != -1 && trueStart == predStart) {
          // Truth and Prediction both agree that the same segment tag-type is starting now
          int m;
          boolean trueContinue = false;
          boolean predContinue = false;
          for (m = j+1; m < trueOutput.size(); m++) {
            trueContinue = segmentContinueTags[predStart].equals (trueOutput.get(m));
            predContinue = segmentContinueTags[predStart].equals (predOutput.get(m));
            if (!trueContinue || !predContinue) {
              if (trueContinue == predContinue) {
                // They agree about a segment is ending somehow
                numCorrectSegments[predStart]++;
                numCorrectSegments[allIndex]++;
              }
              break;
            }
          }
          // for the case of the end of the sequence
          if (m == trueOutput.size()) {
            if (trueContinue == predContinue) {
              numCorrectSegments[predStart]++;
              numCorrectSegments[allIndex]++;
            }
          }
        }

        if (viterbiOutputStream != null) {
          FeatureVector fv = (FeatureVector) input.get(j);
          //viterbiOutputStream.println (tokens.charAt(j)+" "+trueOutput.get(j).toString()+
          //'/'+predOutput.get(j).toString()+"  "+ fv.toString(true));
          if (sourceTokenSequence != null)
            viterbiOutputStream.print (sourceTokenSequence.get(j).getText()+": ");
          viterbiOutputStream.println (trueOutput.get(j).toString()+
                                       '/'+predOutput.get(j).toString()+"  "+ fv.toString(true));
           
        }
      }
    }
    DecimalFormat f = new DecimalFormat ("0.####");
View Full Code Here

TOP

Related Classes of cc.mallet.types.Sequence

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.