Examples of cc.mallet.types.LabelAlphabet

cc.mallet.types.LabelAlphabet
A mapping from arbitrary objects (usually String's) to integers (and corresponding Label objects) and back. @author Andrew McCallum mccallum@cs.umass.edu

  public void evaluate (String description, Extraction extraction, PrintStream out)
  {
    int numDocs = extraction.getNumDocuments ();
    assert numDocs == extraction.getNumRecords ();


    LabelAlphabet dict = extraction.getLabelAlphabet();
    int numLabels = dict.size();
    int[] numCorr = new int [numLabels];
    int[] numPred = new int [numLabels];
    int[] numTrue = new int [numLabels];


    for (int docnum = 0; docnum < numDocs; docnum++) {
      Record extracted = extraction.getRecord (docnum);
      Record target = extraction.getTargetRecord (docnum);


      // Calc precision
      Iterator it = extracted.fieldsIterator ();
      while (it.hasNext ()) {
        Field predField = (Field) it.next ();
        Label name = predField.getName ();
        Field trueField = target.getField (name);
        int idx = name.getIndex ();


        for (int j = 0; j < predField.numValues(); j++) {
          numPred [idx]++;
          if (trueField != null && trueField.isValue (predField.value (j), comparator)) {
            numCorr [idx]++;
          } else {
            // We have an error, report if necessary (this should be moved to the per-field rather than per-filler level.)
            if (errorOutputStream != null) {
              //xxx TODO: Display name of supporting document
              errorOutputStream.println ("Error in extraction!");
              errorOutputStream.println ("Predicted "+predField);
              errorOutputStream.println ("True "+trueField);
              errorOutputStream.println ();
            }
          }


        }
      }


      // Calc true
      it = target.fieldsIterator ();
      while (it.hasNext ()) {
        Field trueField = (Field) it.next ();
        Label name = trueField.getName ();
        numTrue [name.getIndex ()] += trueField.numValues ();
      }
    }


    out.println (description+" SEGMENT counts");
    out.println ("Name\tCorrect\tPred\tTarget");
    for (int i = 0; i < numLabels; i++) {
      Label name = dict.lookupLabel (i);
      out.println (name+"\t"+numCorr[i]+"\t"+numPred[i]+"\t"+numTrue[i]);
    }
    out.println ();


    DecimalFormat f = new DecimalFormat ("0.####");


    double totalF1 = 0;
    int totalFields = 0;
    out.println (description+" per-field F1");
    out.println ("Name\tP\tR\tF1");
    for (int i = 0; i < numLabels; i++) {
      double P = (numPred[i] == 0) ? 0 : ((double)numCorr[i]) / numPred [i];
      double R = (numTrue[i] == 0) ? 1 : ((double)numCorr[i]) / numTrue [i];
      double F1 = (P + R == 0) ? 0 : (2 * P * R) / (P + R);
      if ((numPred[i] > 0) || (numTrue[i] > 0)) {
        totalF1 += F1;
        totalFields++;
      }
      Label name = dict.lookupLabel (i);
      out.println (name+"\t"+f.format(P)+"\t"+f.format(R)+"\t"+f.format(F1));
    }


    int totalCorr = MatrixOps.sum (numCorr);
    int totalPred = MatrixOps.sum (numPred);

View Full Code Here




  private static LabelAlphabet createBlankAlphabet (int numOutcomes)
  {
    if (numOutcomes > 0) {
      LabelAlphabet outcomes = new LabelAlphabet ();
      /* Setup default outcomes */
      for (int i = 0; i < numOutcomes; i++) {
        outcomes.lookupIndex (new Integer (i));
      }
      return outcomes;
    } else return null;
  }

View Full Code Here

  private LabelAlphabet ld;
  private LabelVector lv;


  protected void setUp ()
  {
    ld = new LabelAlphabet ();
    lv = new LabelVector (ld,
                          new int[] {
                            ld.lookupIndex ("a"),
                            ld.lookupIndex ("b"),
                            ld.lookupIndex ("c"),

View Full Code Here

  public void evaluate (String description, Extraction extraction, PrintWriter out)
  {
    int numDocs = extraction.getNumDocuments ();
    assert numDocs == extraction.getNumRecords ();


    LabelAlphabet dict = extraction.getLabelAlphabet();
    int numLabels = dict.size();
    int[] numCorr = new int [numLabels];
    int[] numPred = new int [numLabels];
    int[] numTrue = new int [numLabels];


    for (int docnum = 0; docnum < numDocs; docnum++) {
      Record extracted = extraction.getRecord (docnum);
      Record target = extraction.getTargetRecord (docnum);


      // Calc precision
      Iterator it = extracted.fieldsIterator ();
      while (it.hasNext ()) {
        Field predField = (Field) it.next ();
        Label name = predField.getName ();
        Field trueField = target.getField (name);
        int idx = name.getIndex ();


        numPred [idx]++;


        if (predField.numValues() > 1)
          System.err.println ("Warning: Field "+predField+" has more than one extracted value. Picking arbitrarily...");
        if (trueField != null && trueField.isValue (predField.value (0), comparator)) {
          numCorr [idx]++;
        } else {
          // We have an error, report if necessary
          if (errorOutputStream != null) {
            //xxx TODO: Display name of supporting document
            errorOutputStream.println ("Error in extraction! Document "+extraction.getDocumentExtraction (docnum).getName ());
            errorOutputStream.println ("Predicted "+predField);
            errorOutputStream.println ("True "+trueField);
            errorOutputStream.println ();
          }
        }
      }


      // Calc true
      it = target.fieldsIterator ();
      while (it.hasNext ()) {
        Field trueField = (Field) it.next ();
        Label name = trueField.getName ();
        numTrue [name.getIndex ()]++;
      }
    }


    DecimalFormat f = new DecimalFormat ("0.####");


    double totalF1 = 0;
    int totalFields = 0;
    out.println (description+" per-document F1");
    out.println ("Name\tP\tR\tF1");
    for (int i = 0; i < numLabels; i++) {
      double P = (numPred[i] == 0) ? 0 : ((double)numCorr[i]) / numPred [i];
      double R = (numTrue[i] == 0) ? 1 : ((double)numCorr[i]) / numTrue [i];
      double F1 = (P + R == 0) ? 0 : (2 * P * R) / (P + R);
      if ((numPred[i] > 0) || (numTrue[i] > 0)) {
        totalF1 += F1;
        totalFields++;
      }
      Label name = dict.lookupLabel (i);
      out.println (name+"\t"+f.format(P)+"\t"+f.format(R)+"\t"+f.format(F1));
    }


    int totalCorr = MatrixOps.sum (numCorr);
    int totalPred = MatrixOps.sum (numPred);

View Full Code Here

          maxidx = assn.get (var);
        } else {
          maxidx = 0;
        }


        LabelAlphabet dict = labelOfVar (var).getLabelAlphabet ();
        theseLabels[i] = dict.lookupLabel (maxidx);
      }


      lbls[t] = new Labels (theseLabels);
    }

View Full Code Here

    out.println (".pred { background-color:#FFFF66 }");
    out.close ();




    //PRED css
    LabelAlphabet dict = extraction.getLabelAlphabet ();
    String[] fields = determineFieldNames (dict);
    String[] colors = ColorUtils.rainbow (fields.length, (float) SATURATION, 1);
    out = new PrintWriter (new FileWriter (new File (directory, DOC_ERRS_PRED_CSS_FNAME)));
    out.println (".class_legend { border-style: dashed; border-width: 2px; padding: 10px; padding-top: 0ex; float: right; margin:2em; }");
    out.println (".tf_legend { visibility: hidden; }");

View Full Code Here

  private class OverlappingFeaturePipe extends Pipe {


    private static final long serialVersionUID = 1L;


    public OverlappingFeaturePipe () {
      super (new Alphabet(), new LabelAlphabet());      
    }

View Full Code Here

            positive = false;
            break;
          }
        }
      }
      LabelAlphabet ldict = (LabelAlphabet)getTargetAlphabet();
      String label = positive ? "YES" : "NO";      
      carrier.setTarget(ldict.lookupLabel(label));
      return carrier;
    }

View Full Code Here

        char type = in.readChar ();
        Object obj;
        
        switch (type) {
        case TYPE_LABEL:
            LabelAlphabet ldict = (LabelAlphabet) getTargetAlphabet ();
            String name = (String) in.readObject ();
            obj = ldict.lookupLabel (name);
            break;
        case TYPE_FEATURE_VECTOR:
            int[] indices = (int[]) in.readObject ();
            double[] values = (double[]) in.readObject ();
            obj = new FeatureVector(getDataAlphabet (), indices, values);

View Full Code Here

    super (name);
  }


  public void testSerializable () throws IOException, ClassNotFoundException
  {
    LabelAlphabet dict = new LabelAlphabet ();
    Labels lbls1 = new Labels (new Label[] {
      dict.lookupLabel ("A"),
      dict.lookupLabel ("B"),
    });
    Labels lbls2 = new Labels (new Label[] {
      dict.lookupLabel ("C"),
      dict.lookupLabel ("A"),
    });
    LabelsSequence lblseq  = new LabelsSequence (new Labels[] { lbls1, lbls2 });
    LabelsSequence lblseq2 = (LabelsSequence) TestSerializable.cloneViaSerialization (lblseq);
    assertEquals (lblseq.size(), lblseq2.size());
    assertEquals (lblseq.getLabels(0).toString(), lblseq2.getLabels(0).toString ());

View Full Code Here

0 1 2 3 4 5

TOP

Related Classes of cc.mallet.types.LabelAlphabet

cc.mallet.classify.evaluate.ConfusionMatrix

cc.mallet.classify.MaxEnt

cc.mallet.classify.MaxEntOptimizableByLabelDistribution

cc.mallet.classify.MaxEntOptimizableByLabelLikelihood

cc.mallet.classify.MCMaxEnt

cc.mallet.classify.MCMaxEntTrainer$MaximizableTrainer

cc.mallet.classify.RankMaxEnt

cc.mallet.classify.RankMaxEntTrainer$MaximizableTrainer

cc.mallet.cluster.examples.FirstOrderClusterExample$OverlappingFeaturePipe

cc.mallet.cluster.tui.Clusterings2Clusterer$ClusteringPipe

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.