Package etc.aloe.data

Source Code of etc.aloe.data.ExampleSet

/*
* This file is part of ALOE.
*
* ALOE is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.

* ALOE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU General Public License for more details.

* You should have received a copy of the GNU General Public License
* along with ALOE.  If not, see <http://www.gnu.org/licenses/>.
*
* Copyright (c) 2012 SCCL, University of Washington (http://depts.washington.edu/sccl)
*/
package etc.aloe.data;

import weka.core.Attribute;
import weka.core.Instance;
import weka.core.Instances;
import weka.filters.Filter;
import weka.filters.unsupervised.instance.RemoveWithValues;

/**
* ExampleSet contains information about data points that have features
* extracted. These data points are ready for labeling by a model.
*
* Instances in an ExampleSet always have at least these attributes: 'message' -
* which contains the message text. '*id' - which is a unique integer
* identifying the message. 'label' - the ground truth label for the instance (0
* or 1)
*
* @author Michael Brooks <mjbrooks@uw.edu>
*/
public class ExampleSet {

    public final static String ID_ATTR_NAME = "*id";
    public final static String MESSAGE_ATTR_NAME = "message";
    public final static String LABEL_ATTR_NAME = "label";
    public final static String PARTICIPANT_ATTR_NAME = "participant";
   
    private Instances instances;

    /**
     * Construct an ExampleSet containing the given instances.
     *
     * @param instances
     */
    public ExampleSet(Instances instances) {
        this.instances = instances;
    }

    /**
     * Make a copy of the ExampleSet, copying the underlying instances.
     *
     * @return
     */
    public ExampleSet copy() {
        return new ExampleSet(new Instances(instances));
    }

    /**
     * The size of the example set.
     *
     * @return
     */
    public int size() {
        return instances.size();
    }

    /**
     * Returns a new example set containing only those examples with labels.
     *
     * @return
     */
    public ExampleSet onlyLabeled() {
        RemoveWithValues filter = new RemoveWithValues();
        filter.setAttributeIndex("" + (instances.classIndex() + 1));
        filter.setMatchMissingValues(true);
        filter.setInvertSelection(true);

        try {
            filter.setInputFormat(instances);
            Instances result = Filter.useFilter(instances, filter);
            ExampleSet resultSet = new ExampleSet(result);
            return resultSet;
        } catch (Exception ex) {
            System.err.println("Unable to apply filter!");
            return null;
        }
    }

    /**
     * Get the ith instance.
     *
     * @param i
     * @return
     */
    public Instance get(int i) {
        return instances.get(i);
    }

    /**
     * Get the underlying instances.
     *
     * @return
     */
    public Instances getInstances() {
        return instances;
    }

    /**
     * Gets the actual label of the given example. If the example is unlabeled,
     * returns null;
     *
     * @param i
     * @return
     */
    public Boolean getTrueLabel(int i) {
        Instance instance = instances.get(i);
        return getClassLabel(instance.classValue());
    }

    /**
     * Converts a double class value into a boolean given the string labels for
     * the class attribute in this data set. Returns null if the class value is
     * weka missing.
     *
     * @param classValue
     * @return
     */
    public Boolean getClassLabel(double classValue) {
        if (Double.isNaN(classValue)) {
            return null;
        }

        Attribute classAttr = instances.classAttribute();
        String classValueStr = classAttr.value((int) classValue);
        return Boolean.parseBoolean(classValueStr);
    }

    /**
     * Gets the confidence in the positive class.
     * @param classDistribution
     * @param classValue
     * @return
     */
    public Double getConfidence(double[] classDistribution, double classValue) {
        if (Double.isNaN(classValue)) {
            return null;
        }
       
        return classDistribution[(int) classValue];
    }

    /**
     * Set the underlying instances.
     *
     * @param instances
     */
    public void setInstances(Instances instances) {
        this.instances = instances;
    }
}
TOP

Related Classes of etc.aloe.data.ExampleSet

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.