Package cc.mallet.types

Examples of cc.mallet.types.Sequence


      System.out.println("Testing  Accuracy after training = "
          + crf.averageTokenAccuracy(lists[1]));
      System.out.println("Training results:");
      for (int i = 0; i < lists[0].size(); i++) {
        Instance inst = lists[0].get(i);
        Sequence input = (Sequence) inst.getData();
        Sequence output = crf.transduce(input);
        System.out.println(output);
      }
      System.out.println("Testing results:");
      for (int i = 0; i < lists[1].size(); i++) {
        Instance inst = lists[1].get(i);
        Sequence input = (Sequence) inst.getData();
        Sequence output = crf.transduce(input);
        System.out.println(output);
      }
    }
  }
View Full Code Here


    CRFTrainerByLabelLikelihood crft1 = new CRFTrainerByLabelLikelihood(
        crf1);
    crft1.train(instances, 10); // Let's get some parameters

    Instance inst = instances.get(0);
    Sequence input = (Sequence) inst.getData();
    SumLatticeDefault lattice = new SumLatticeDefault(crf1, input,
        (Sequence) inst.getTarget(), null, true);
    for (int ip = 0; ip < lattice.length() - 1; ip++) {
      for (int i = 0; i < crf1.numStates(); i++) {
        Transducer.State state = crf1.getState(i);
View Full Code Here

    crf.addFullyConnectedStatesForLabels();
    CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood(crf);
    crft.trainIncremental(training);

    // Check that the notstart state is used at test time
    Sequence input = (Sequence) training.get(0).getData();
    Sequence output = new MaxLatticeDefault(crf, input)
        .bestOutputSequence();

    boolean notstartFound = false;
    for (int i = 0; i < output.size(); i++) {
      if (output.get(i).toString().equals("notstart")) {
        notstartFound = true;
      }
    }
    System.err.println(output.toString());
    assertTrue(notstartFound);

    // Now add -infinite weight onto a transition, and make sure that it's
    // honored.
    CRF.State state = crf.getState("notstart");
    int widx = crf.getWeightsIndex("BadBad");
    int numFeatures = crf.getInputAlphabet().size();
    SparseVector w = new SparseVector(new double[numFeatures]);
    w.setAll(Double.NEGATIVE_INFINITY);
    crf.setWeights(widx, w);

    state.addWeight(0, "BadBad");
    state.addWeight(1, "BadBad");

    // Verify that this effectively prevents the notstart state from being
    // used
    output = new MaxLatticeDefault(crf, input).bestOutputSequence();
    notstartFound = false;
    for (int i = 0; i < output.size() - 1; i++) {
      if (output.get(i).toString().equals("notstart")) {
        notstartFound = true;
      }
    }
    assertTrue(!notstartFound);
  }
View Full Code Here

  public void skiptestOldCrf() {
    CRF crf = (CRF) FileUtils.readObject(new File(oldCrfFile));
    Instance inst = crf.getInputPipe().instanceFrom(
        new Instance(testString, null, null, null));
    Sequence output = crf.transduce((Sequence) inst.getData());
    String std = output.toString();
    assertEquals(" B-PER I-PER O O", std);
  }
View Full Code Here

    Transducer transducer = trainer.getTransducer();
    totalTokens = numCorrectTokens = 0;
    for (int i = 0; i < instances.size(); i++) {
      Instance instance = instances.get(i);
      Sequence input = (Sequence) instance.getData();
      Sequence trueOutput = (Sequence) instance.getTarget();
      assert (input.size() == trueOutput.size());
      //System.err.println ("TokenAccuracyEvaluator "+i+" length="+input.size());
      Sequence predOutput = transducer.transduce (input);
      assert (predOutput.size() == trueOutput.size());

      for (int j = 0; j < trueOutput.size(); j++) {
        totalTokens++;
        if (trueOutput.get(j).equals(predOutput.get(j)))
          numCorrectTokens++;
      }
      //System.err.println ("TokenAccuracyEvaluator "+i+" numCorrectTokens="+numCorrectTokens+" totalTokens="+totalTokens+" accuracy="+((double)numCorrectTokens)/totalTokens);
    }
    double acc = ((double)numCorrectTokens)/totalTokens;
View Full Code Here

  public void evaluateInstanceList (TransducerTrainer tt, InstanceList data, String description)
  {
    int correct = 0;
    for (int i = 0; i < data.size(); i++) {
      Instance instance = data.get(i);
      Sequence input = (Sequence) instance.getData();
      Sequence trueOutput = (Sequence) instance.getTarget();
      assert (input.size() == trueOutput.size());
      Sequence predOutput = tt.getTransducer().transduce (input);
      assert (predOutput.size() == trueOutput.size());
      if (sequencesMatch (trueOutput, predOutput))
        correct++;
      }
    double acc = ((double)correct) / data.size();
    accuracy.put(description, acc);
View Full Code Here

    double loglik = 0.0;
    for (int i = 0; i < trainingSample.size(); i++) {
      Instance trainingInstance = trainingSample.get(i);
      FeatureVectorSequence fvs = (FeatureVectorSequence) trainingInstance
          .getData();
      Sequence labelSequence = (Sequence) trainingInstance.getTarget();
      loglik += new SumLatticeDefault(crf, fvs, labelSequence, null)
          .getTotalWeight();
      loglik -= new SumLatticeDefault(crf, fvs, null, null)
          .getTotalWeight();
    }
View Full Code Here

    double singleLoglik;
    constraints.zero();
    expectations.zero();
    FeatureVectorSequence fvs = (FeatureVectorSequence) trainingInstance
        .getData();
    Sequence labelSequence = (Sequence) trainingInstance.getTarget();
    singleLoglik = new SumLatticeDefault(crf, fvs, labelSequence,
        constraints.new Incrementor()).getTotalWeight();
    singleLoglik -= new SumLatticeDefault(crf, fvs, null,
        expectations.new Incrementor()).getTotalWeight();
    // Calculate parameter gradient given these instances: (constraints -
View Full Code Here

    totalTokens = numCorrectTokens = 0;
    for (int n = 0; n < numTrueSegments.length; n++)
      numTrueSegments[n] = numPredictedSegments[n] = numCorrectSegments[n] = 0;
    for (int i = 0; i < data.size(); i++) {
      Instance instance = data.get(i);
      Sequence input = (Sequence) instance.getData();
      //String tokens = null;
      //if (instance.getSource() != null)
      //tokens = (String) instance.getSource().toString();
      Sequence trueOutput = (Sequence) instance.getTarget();
      assert (input.size() == trueOutput.size());
      Sequence predOutput = model.transduce (input);
      assert (predOutput.size() == trueOutput.size());
      int trueStart, predStart;        // -1 for non-start, otherwise index into segmentStartTag
      for (int j = 0; j < trueOutput.size(); j++) {
        totalTokens++;
        if (trueOutput.get(j).equals(predOutput.get(j)))
          numCorrectTokens++;
        trueStart = predStart = -1;
        // Count true segment starts
        for (int n = 0; n < segmentStartTags.length; n++) {
          if (segmentStartTags[n].equals(trueOutput.get(j))) {
            numTrueSegments[n]++;
            numTrueSegments[allIndex]++;
            trueStart = n;
            break;
          }
        }
        // Count predicted segment starts
        for (int n = 0; n < segmentStartTags.length; n++) {
          if (segmentStartTags[n].equals(predOutput.get(j))) {
            numPredictedSegments[n]++;
            numPredictedSegments[allIndex]++;
            predStart = n;
          }
        }
        if (trueStart != -1 && trueStart == predStart) {
          // Truth and Prediction both agree that the same segment tag-type is starting now
          int m;
          boolean trueContinue = false;
          boolean predContinue = false;
          for (m = j+1; m < trueOutput.size(); m++) {
            trueContinue = segmentContinueTags[predStart].equals (trueOutput.get(m));
            predContinue = segmentContinueTags[predStart].equals (predOutput.get(m));
            if (!trueContinue || !predContinue) {
              if (trueContinue == predContinue) {
                // They agree about a segment is ending somehow
                numCorrectSegments[predStart]++;
                numCorrectSegments[allIndex]++;
View Full Code Here

      numTrueSegments[n] = numPredictedSegments[n] = numCorrectSegments[n] = 0;
    for (int i = 0; i < data.size(); i++) {
      if (viterbiOutputStream != null)
        viterbiOutputStream.println ("Viterbi path for "+description+" instance #"+i);
      Instance instance = data.get(i);
      Sequence input = (Sequence) instance.getData();     
      //String tokens = null;
      //if (instance.getSource() != null)
      //tokens = (String) instance.getSource().toString();
      Sequence trueOutput = (Sequence) instance.getTarget();
      assert (input.size() == trueOutput.size());
      Sequence predOutput = (Sequence) predictedSequences.get (i);
      if (predOutput == null) // skip this instance
        continue;
      assert (predOutput.size() == trueOutput.size());
      int trueStart, predStart;        // -1 for non-start, otherwise index into segmentStartTag
      for (int j = 0; j < trueOutput.size(); j++) {
        totalTokens++;
        if (trueOutput.get(j).equals(predOutput.get(j)))
          numCorrectTokens++;
        trueStart = predStart = -1;
        // Count true segment starts
        for (int n = 0; n < segmentStartTags.length; n++) {
          if (segmentStartTags[n].equals(trueOutput.get(j))) {
            numTrueSegments[n]++;
            numTrueSegments[allIndex]++;
            trueStart = n;
            break;
          }
        }
        // Count predicted segment starts
        for (int n = 0; n < segmentStartTags.length; n++) {
          if (segmentStartTags[n].equals(predOutput.get(j))) {
            numPredictedSegments[n]++;
            numPredictedSegments[allIndex]++;
            predStart = n;
          }
        }
        if (trueStart != -1 && trueStart == predStart) {
          // Truth and Prediction both agree that the same segment tag-type is starting now
          int m;
          boolean trueContinue = false;
          boolean predContinue = false;
          for (m = j+1; m < trueOutput.size(); m++) {
            trueContinue = segmentContinueTags[predStart].equals (trueOutput.get(m));
            predContinue = segmentContinueTags[predStart].equals (predOutput.get(m));
            if (!trueContinue || !predContinue) {
              if (trueContinue == predContinue) {
                // They agree about a segment is ending somehow
                numCorrectSegments[predStart]++;
                numCorrectSegments[allIndex]++;
              }
              break;
            }
          }
          // for the case of the end of the sequence
          if (m == trueOutput.size()) {
            if (trueContinue == predContinue) {
              numCorrectSegments[predStart]++;
              numCorrectSegments[allIndex]++;
            }
          }
        }

        if (viterbiOutputStream != null) {
          FeatureVector fv = (FeatureVector) input.get(j);
          //viterbiOutputStream.println (tokens.charAt(j)+" "+trueOutput.get(j).toString()+
          //'/'+predOutput.get(j).toString()+"  "+ fv.toString(true));
          if (sourceTokenSequence != null)
            viterbiOutputStream.print (sourceTokenSequence.get(j).getText()+": ");
          viterbiOutputStream.println (trueOutput.get(j).toString()+
                                       '/'+predOutput.get(j).toString()+"  "+ fv.toString(true));
           
        }
      }
    }
    DecimalFormat f = new DecimalFormat ("0.####");
View Full Code Here

TOP

Related Classes of cc.mallet.types.Sequence

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.