Package weka.classifiers

Examples of weka.classifiers.Evaluation


        AbstractOutput output = new PlainText();
        output.setBuffer(new StringBuffer());
        output.setHeader(test);
        output.setAttributes("first");
       
    Evaluation eval = new Evaluation(train);
        eval.evaluateModel(filteredClassifier, test, output);
       
        // Convert predictions to CSV
        // Format: inst#, actual, predicted, error, probability, (ID)
        String[] scores = new String[new Double(eval.numInstances()).intValue()];
        double[] probabilities = new double[new Double(eval.numInstances()).intValue()];
        for (String line : output.getBuffer().toString().split("\n"))
        {
          String[] linesplit = line.split("\\s+");

          // If there's been an error, the length of linesplit is 6, otherwise 5,
          // due to the error flag "+"
         
          int id;
          String expectedValue, classification;
          double probability;
         
          if (line.contains("+"))
          {
               id = Integer.parseInt(linesplit[6].substring(1, linesplit[6].length() - 1));
            expectedValue = linesplit[2].substring(2);
            classification = linesplit[3].substring(2);
            probability = Double.parseDouble(linesplit[5]);
          } else {
            id = Integer.parseInt(linesplit[5].substring(1, linesplit[5].length() - 1));
            expectedValue = linesplit[2].substring(2);
            classification = linesplit[3].substring(2);
            probability = Double.parseDouble(linesplit[4]);
          }
         
          scores[id - 1] = classification;
          probabilities[id - 1] = probability;
        }
               
        System.out.println(eval.toSummaryString());
      System.out.println(eval.toMatrixString());
     
      // Output classifications
      StringBuilder sb = new StringBuilder();
      for (String score : scores)
        sb.append(score.toString() + LF);
     
      FileUtils.writeStringToFile(
        new File(OUTPUT_DIR + "/" + testDataset.toString() + "/" + wekaClassifier.toString() + "/" + testDataset.toString() + ".csv"),
        sb.toString());
     
      // Output probabilities
      sb = new StringBuilder();
      for (Double probability : probabilities)
        sb.append(probability.toString() + LF);
     
      FileUtils.writeStringToFile(
        new File(OUTPUT_DIR + "/" + testDataset.toString() + "/" + wekaClassifier.toString() + "/" + testDataset.toString() + ".probabilities.csv"),
        sb.toString());
     
      // Output predictions
      FileUtils.writeStringToFile(
        new File(OUTPUT_DIR + "/" + testDataset.toString() + "/" + wekaClassifier.toString() + "/" + testDataset.toString() + ".predictions.txt"),
        output.getBuffer().toString());
     
      // Output meta information
      sb = new StringBuilder();
      sb.append(classifier.toString() + LF);
      sb.append(eval.toSummaryString() + LF);
      sb.append(eval.toMatrixString() + LF);
     
      FileUtils.writeStringToFile(
        new File(OUTPUT_DIR + "/" + testDataset.toString() + "/" + wekaClassifier.toString() + "/" + testDataset.toString() + ".meta.txt"),
        sb.toString());
  }
View Full Code Here


    // Randomize the data
    data.randomize(random);
 
    // Perform cross-validation
      Instances predictedData = null;
      Evaluation eval = new Evaluation(data);
     
      for (int n = 0; n < folds; n++)
      {
        Instances train = data.trainCV(folds, n, random);
          Instances test = data.testCV(folds, n);
         
          // Apply log filter
//        Filter logFilter = new LogFilter();
//          logFilter.setInputFormat(train);
//          train = Filter.useFilter(train, logFilter);       
//          logFilter.setInputFormat(test);
//          test = Filter.useFilter(test, logFilter);
         
          // Copy the classifier
          Classifier classifier = AbstractClassifier.makeCopy(baseClassifier);
                                
          // Instantiate the FilteredClassifier
          FilteredClassifier filteredClassifier = new FilteredClassifier();
          filteredClassifier.setFilter(removeIDFilter);
          filteredClassifier.setClassifier(classifier);
            
          // Build the classifier
          filteredClassifier.buildClassifier(train);
         
          // Evaluate
          eval.evaluateModel(filteredClassifier, test);
         
          // Add predictions
          AddClassification filter = new AddClassification();
          filter.setClassifier(classifier);
          filter.setOutputClassification(true);
          filter.setOutputDistribution(false);
          filter.setOutputErrorFlag(true);
          filter.setInputFormat(train);
          Filter.useFilter(train, filter)// trains the classifier
         
          Instances pred = Filter.useFilter(test, filter)// performs predictions on test set
          if (predictedData == null)
            predictedData = new Instances(pred, 0);
          for (int j = 0; j < pred.numInstances(); j++)
            predictedData.add(pred.instance(j));           
      }
     
      System.out.println(eval.toSummaryString());
      System.out.println(eval.toMatrixString());
     
      // Prepare output scores
      String[] scores = new String[predictedData.numInstances()];
     
      for (Instance predInst : predictedData)
      {
        int id = new Double(predInst.value(predInst.attribute(0))).intValue() - 1;
       
        int valueIdx = predictedData.numAttributes() - 2;
       
        String value = predInst.stringValue(predInst.attribute(valueIdx));
       
        scores[id] = value;
      }
     
      // Output classifications
      StringBuilder sb = new StringBuilder();
      for (String score : scores)
        sb.append(score.toString() + LF);
     
      FileUtils.writeStringToFile(
        new File(OUTPUT_DIR + "/" + dataset.toString() + "/" + wekaClassifier.toString() + "/" + dataset.toString() + ".csv"),
        sb.toString());
     
      // Output prediction arff
      DataSink.write(
        OUTPUT_DIR + "/" + dataset.toString() + "/" + wekaClassifier.toString() + "/" + dataset.toString() + ".predicted.arff",
        predictedData);
     
      // Output meta information
      sb = new StringBuilder();
      sb.append(baseClassifier.toString() + LF);
      sb.append(eval.toSummaryString() + LF);
      sb.append(eval.toMatrixString() + LF);
     
      FileUtils.writeStringToFile(
        new File(OUTPUT_DIR + "/" + dataset.toString() + "/" + wekaClassifier.toString() + "/" + dataset.toString() + ".meta.txt"),
        sb.toString());
  }
View Full Code Here

   * @throws Exception if something goes wrong
   */
  private double evaluatePredictions(Instances instances,
      double[][] temp_predictions, int metric) throws Exception {
   
    Evaluation eval = new Evaluation(instances);
    for (int i = 0; i < instances.numInstances(); ++i) {
      eval.evaluateModelOnceAndRecordPrediction(temp_predictions[i],
    instances.instance(i));
    }
    return EnsembleMetricHelper.getMetric(eval, metric);
  }
View Full Code Here

   * @throws Exception if subset can't be evaluated
   */
  protected double estimatePerformance(BitSet feature_set, int num_atts)
  throws Exception {

    m_evaluation = new Evaluation(m_theInstances);
    int i;
    int [] fs = new int [num_atts];

    double [] instA = new double [num_atts];
    int classI = m_theInstances.classIndex();
View Full Code Here

    int        i;
    Enumeration<PointDouble>  enm;
    Vector<Performance>    performances;
    PointDouble      values;
    Instances      data;
    Evaluation      eval;
    PointDouble      result;
    Classifier      classifier;
    Filter      filter;
    int        size;
    boolean      cached;
    boolean      allCached;
    Performance      p1;
    Performance      p2;
    double      x;
    double      y;
   
    performances = new Vector();
   
    log("Determining best pair with " + cv + "-fold CV in Grid:\n" + grid + "\n");
   
    if (m_Traversal == TRAVERSAL_BY_COLUMN)
      size = grid.width();
    else
      size = grid.height();
   
    allCached = true;

    for (i = 0; i < size; i++) {
      if (m_Traversal == TRAVERSAL_BY_COLUMN)
  enm = grid.column(i);
      else
  enm = grid.row(i);
     
      filter = null;
      data   = null;
     
      while (enm.hasMoreElements()) {
  values = enm.nextElement();
 
  // already calculated?
  cached = m_Cache.isCached(cv, values);
  if (cached) {
    performances.add(m_Cache.get(cv, values));
  }
  else {
    allCached = false;
   
    x = evaluate(values.getX(), true);
    y = evaluate(values.getY(), false);
   
    // data pass through filter
    if (filter == null) {
      filter = (Filter) setup(getFilter(), x, y);
      filter.setInputFormat(inst);
      data = Filter.useFilter(inst, filter);
      // make sure that the numbers don't get too small - otherwise NaNs!
      Filter cleaner = new NumericCleaner();
      cleaner.setInputFormat(data);
      data = Filter.useFilter(data, cleaner);
    }

    // setup classifier
    classifier = (Classifier) setup(getClassifier(), x, y);

    // evaluate
    eval = new Evaluation(data);
    eval.crossValidateModel(classifier, data, cv, new Random(getSeed()));
    performances.add(new Performance(values, eval));
   
    // add to cache
    m_Cache.add(cv, new Performance(values, eval));
  }
View Full Code Here

  // compute average error
  double expansionError = 0;
  int count = 0;

  for (int i=0; i<m_numFoldsPruning; i++) {
    Evaluation eval;

    // calculate error rate if only root node
    if (expansion==0) {
      m_roots[i].m_isLeaf = true;
      eval = new Evaluation(test[i]);
      eval.evaluateModel(m_roots[i], test[i]);
      if (m_UseErrorRate) expansionError += eval.errorRate();
      else expansionError += eval.rootMeanSquaredError();
      count ++;
    }

    // make tree - expand one node at a time
    else {
      if (m_roots[i] == null) continue; // if the tree cannot be expanded, go to next fold
      m_roots[i].m_isLeaf = false;
      BFTree nodeToSplit = (BFTree)
      (((FastVector)(parallelBFElements[i].elementAt(0))).elementAt(0));
      if (!m_roots[i].makeTree(parallelBFElements[i], m_roots[i], train[i],
    nodeToSplit.m_SortedIndices, nodeToSplit.m_Weights,
    nodeToSplit.m_Dists, nodeToSplit.m_ClassProbs,
    nodeToSplit.m_TotalWeight, nodeToSplit.m_Props, m_minNumObj,
    m_Heuristic, m_UseGini)) {
        m_roots[i] = null; // cannot be expanded
        continue;
      }
      eval = new Evaluation(test[i]);
      eval.evaluateModel(m_roots[i], test[i]);
      if (m_UseErrorRate) expansionError += eval.errorRate();
      else expansionError += eval.rootMeanSquaredError();
      count ++;
    }
  }

  // no tree can be expanded any more
  if (count==0) break;

  expansionError /=count;
  errorList.addElement(new Double(expansionError));
  currentError = expansionError;

  if (!m_UseOneSE) {
    if (currentError>previousError)
      break;
  }

  else {
    if (expansionError < minError) {
      minError = expansionError;
      minExpansion = expansion;
    }

    if (currentError>previousError) {
      double oneSE = Math.sqrt(minError*(1-minError)/
    data.numInstances());
      if (currentError > minError + oneSE) {
        break;
      }
    }
  }

  expansion ++;
  previousError = currentError;
      }

      if (!m_UseOneSE) expansion = expansion - 1;
      else {
  double oneSE = Math.sqrt(minError*(1-minError)/data.numInstances());
  for (int i=0; i<errorList.size(); i++) {
    double error = ((Double)(errorList.elementAt(i))).doubleValue();
    if (error<=minError + oneSE) { // && counts[i]>=m_numFoldsPruning/2) {
      expansion = i;
      break;
    }
  }
      }
    }

    // build a postpruned tree
    else {
      FastVector[] modelError = new FastVector[m_numFoldsPruning];

      // calculate error of each expansion for each fold
      for (int i = 0; i < m_numFoldsPruning; i++) {
  modelError[i] = new FastVector();

  m_roots[i].m_isLeaf = true;
  Evaluation eval = new Evaluation(test[i]);
  eval.evaluateModel(m_roots[i], test[i]);
  double error;
  if (m_UseErrorRate) error = eval.errorRate();
  else error = eval.rootMeanSquaredError();
  modelError[i].addElement(new Double(error));

  m_roots[i].m_isLeaf = false;
  BFTree nodeToSplit = (BFTree)
  (((FastVector)(parallelBFElements[i].elementAt(0))).elementAt(0));
View Full Code Here

      subsetWeights,dists, m_Attribute, useHeuristic, useGini);
  for (int i=0; i<2; i++){
    m_Successors[i].makeLeaf(train);
  }

  Evaluation eval = new Evaluation(test);
  eval.evaluateModel(root, test);
  double error;
  if (useErrorRate) error = eval.errorRate();
  else error = eval.rootMeanSquaredError();
  modelError.addElement(new Double(error));
      }

      if (BestFirstElements.size()!=0) {
  FastVector nextSplitElement = (FastVector)BestFirstElements.elementAt(0);
View Full Code Here

    delTransform.setInputFormat(trainCopy);
    trainCopy = Filter.useFilter(trainCopy, delTransform);

    // max of 5 repetitions of cross validation
    for (i = 0; i < 5; i++) {
      m_Evaluation = new Evaluation(trainCopy);
      m_Evaluation.crossValidateModel(m_BaseClassifier, trainCopy, m_folds, Rnd);
     
      switch (m_evaluationMeasure) {
      case EVAL_DEFAULT:
        repError[i] = m_Evaluation.errorRate();
View Full Code Here

    cvParam.m_ParamValue += increment) {
  findParamsByCrossValidation(depth + 1, trainData, random);
      }
    } else {
     
      Evaluation evaluation = new Evaluation(trainData);

      // Set the classifier options
      String [] options = createOptions();
      if (m_Debug) {
  System.err.print("Setting options for "
       + m_Classifier.getClass().getName() + ":");
  for (int i = 0; i < options.length; i++) {
    System.err.print(" " + options[i]);
  }
  System.err.println("");
      }
      ((OptionHandler)m_Classifier).setOptions(options);
      for (int j = 0; j < m_NumFolds; j++) {

        // We want to randomize the data the same way for every
        // learning scheme.
  Instances train = trainData.trainCV(m_NumFolds, j, new Random(1));
  Instances test = trainData.testCV(m_NumFolds, j);
  m_Classifier.buildClassifier(train);
  evaluation.setPriors(train);
  evaluation.evaluateModel(m_Classifier, test);
      }
      double error = evaluation.errorRate();
      if (m_Debug) {
  System.err.println("Cross-validated error rate: "
         + Utils.doubleToString(error, 6, 4));
      }
      if ((m_BestPerformance == -99) || (error < m_BestPerformance)) {
View Full Code Here

   */
  public double evaluateAttribute (int attribute)
    throws Exception {
    int[] featArray = new int[2]; // feat + class
    double errorRate;
    Evaluation o_Evaluation;
    Remove delTransform = new Remove();
    delTransform.setInvertSelection(true);
    // copy the instances
    Instances trainCopy = new Instances(m_trainInstances);
    featArray[0] = attribute;
    featArray[1] = trainCopy.classIndex();
    delTransform.setAttributeIndicesArray(featArray);
    delTransform.setInputFormat(trainCopy);
    trainCopy = Filter.useFilter(trainCopy, delTransform);
    o_Evaluation = new Evaluation(trainCopy);
    String [] oneROpts = { "-B", ""+getMinimumBucketSize()};
    Classifier oneR = AbstractClassifier.forName("weka.classifiers.rules.OneR", oneROpts);
    if (m_evalUsingTrainingData) {
      oneR.buildClassifier(trainCopy);
      o_Evaluation.evaluateModel(oneR, trainCopy);
    } else {
      /*      o_Evaluation.crossValidateModel("weka.classifiers.rules.OneR",
              trainCopy, 10,
              null, new Random(m_randomSeed)); */
      o_Evaluation.crossValidateModel(oneR, trainCopy, m_folds, new Random(m_randomSeed));
    }
    errorRate = o_Evaluation.errorRate();
    return  (1 - errorRate)*100.0;
  }
View Full Code Here

TOP

Related Classes of weka.classifiers.Evaluation

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.