Package cc.mallet.types

Examples of cc.mallet.types.InfoGain


    ClassifyingNeighborEvaluator eval =
      new ClassifyingNeighborEvaluator(me, "YES");
                                          
    Trial trial = new Trial(me, trainList);
    System.err.println(new ConfusionMatrix(trial));
    InfoGain ig = new InfoGain(trainList);
    ig.print();

//     Clusterer clusterer = new GreedyAgglomerative(training.getInstances().getPipe(),
//                                                   eval, 0.5);
    Clusterer clusterer = new GreedyAgglomerativeByDensity(training.getInstances().getPipe(),
                                                           eval, 0.5, false,
View Full Code Here


   * @return List of features with the highest information gains.
   */
  public static ArrayList<Integer> selectFeaturesByInfoGain(InstanceList list, int numFeatures) {
    ArrayList<Integer> features = new ArrayList<Integer>();
   
    InfoGain infogain = new InfoGain(list);
    for (int rank = 0; rank < numFeatures; rank++) {
      features.add(infogain.getIndexAtRank(rank));
    }
    return features;
  }
View Full Code Here

   
    int numLabels = list.getTargetAlphabet().size();
   
    int minRank = 100 * numLabels;
   
    InfoGain infogain = new InfoGain(list);
    double sum = 0;
    for (int rank = 0; rank < minRank; rank++) {
      sum += infogain.getValueAtRank(rank);
    }
    double mean = sum / minRank;
   
    for (int i = 0; i < features.size(); i++) {
      int fi = features.get(i);
     
      // reject features with infogain
      // less than cutoff
      if (reject && infogain.value(fi) < mean) {
        //System.err.println("Oracle labeler rejected labeling: " + list.getDataAlphabet().lookupObject(fi));
        logger.info("Oracle labeler rejected labeling: " + list.getDataAlphabet().lookupObject(fi));
        continue;
      }
     
View Full Code Here

      }
      logger.info("generated " + trainingInstances.size()
          + " training instances");
      Classifier classifier = new MaxEntTrainer().train(trainingInstances);
      logger.info("InfoGain:\n");
      new InfoGain(trainingInstances).printByRank(System.out);
      logger.info("pairwise training accuracy="
          + new Trial(classifier, trainingInstances).getAccuracy());
      NeighborEvaluator neval = new PairwiseEvaluator(classifier, "YES",
          new PairwiseEvaluator.Average(), true);       
      clusterer = new GreedyAgglomerativeByDensity(
View Full Code Here

    // xxx Also calculate some sort of inverted entropy for feature induction,
    // in order to find the one class needs a new feature with a negative weight.

    public Node (InstanceList ilist, Node parent, FeatureSelection fs)
    {
      InfoGain ig = new InfoGain (ilist);
      this.featureIndex = ig.getMaxValuedIndexIn (fs);
      this.infoGain = ig.value(featureIndex);
      this.ilist = ilist;
      this.dictionary = ilist.getDataAlphabet();
      this.parent = parent;
      this.labeling = ig.getBaseLabelDistribution();
      this.labelEntropy = ig.getBaseEntropy();
      this.child0 = this.child1 = null;
    }
View Full Code Here

   * @return List of features with the highest information gains.
   */
  public static ArrayList<Integer> selectFeaturesByInfoGain(InstanceList list, int numFeatures) {
    ArrayList<Integer> features = new ArrayList<Integer>();
   
    InfoGain infogain = new InfoGain(list);
    for (int rank = 0; rank < numFeatures; rank++) {
      features.add(infogain.getIndexAtRank(rank));
    }
    return features;
  }
View Full Code Here

   
    int numLabels = list.getTargetAlphabet().size();
   
    int minRank = 100 * numLabels;
   
    InfoGain infogain = new InfoGain(list);
    double sum = 0;
    for (int rank = 0; rank < minRank; rank++) {
      sum += infogain.getValueAtRank(rank);
    }
    double mean = sum / minRank;
   
    for (int i = 0; i < features.size(); i++) {
      int fi = features.get(i);
     
      // reject features with infogain
      // less than cutoff
      if (infogain.value(fi) < mean) {
        logger.info("Oracle labeler rejected labeling: " + list.getDataAlphabet().lookupObject(fi));
        continue;
      }
     
      double[] prob = featureLabelCounts[fi];
View Full Code Here

TOP

Related Classes of cc.mallet.types.InfoGain

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.