* This file is part of ALOE.
* ALOE is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* ALOE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* GNU General Public License for more details.
* You should have received a copy of the GNU General Public License
* along with ALOE. If not, see <http://www.gnu.org/licenses/>.
* Copyright (c) 2012 SCCL, University of Washington (http://depts.washington.edu/sccl)
package etc.aloe.data;
import weka.core.Attribute;
import weka.core.Instance;
import weka.core.Instances;
import weka.filters.Filter;
import weka.filters.unsupervised.instance.RemoveWithValues;
* ExampleSet contains information about data points that have features
* extracted. These data points are ready for labeling by a model.
* Instances in an ExampleSet always have at least these attributes: 'message' -
* which contains the message text. '*id' - which is a unique integer
* identifying the message. 'label' - the ground truth label for the instance (0
* or 1)
* @author Michael Brooks <mjbrooks@uw.edu>
public class ExampleSet {
public final static String ID_ATTR_NAME = "*id";
public final static String MESSAGE_ATTR_NAME = "message";
public final static String LABEL_ATTR_NAME = "label";
public final static String PARTICIPANT_ATTR_NAME = "participant";
private Instances instances;
* Construct an ExampleSet containing the given instances.
* @param instances
public ExampleSet(Instances instances) {
this.instances = instances;
* Make a copy of the ExampleSet, copying the underlying instances.
* @return
public ExampleSet copy() {
return new ExampleSet(new Instances(instances));
* The size of the example set.
* @return
public int size() {
return instances.size();
* Returns a new example set containing only those examples with labels.
* @return
public ExampleSet onlyLabeled() {
RemoveWithValues filter = new RemoveWithValues();
filter.setAttributeIndex("" + (instances.classIndex() + 1));
try {
Instances result = Filter.useFilter(instances, filter);
ExampleSet resultSet = new ExampleSet(result);
return resultSet;
} catch (Exception ex) {
System.err.println("Unable to apply filter!");
return null;
* Get the ith instance.
* @param i
* @return
public Instance get(int i) {
return instances.get(i);
* Get the underlying instances.
* @return
public Instances getInstances() {
return instances;
* Gets the actual label of the given example. If the example is unlabeled,
* returns null;
* @param i
* @return
public Boolean getTrueLabel(int i) {
Instance instance = instances.get(i);
return getClassLabel(instance.classValue());
* Converts a double class value into a boolean given the string labels for
* the class attribute in this data set. Returns null if the class value is
* weka missing.
* @param classValue
* @return
public Boolean getClassLabel(double classValue) {
if (Double.isNaN(classValue)) {
return null;
Attribute classAttr = instances.classAttribute();
String classValueStr = classAttr.value((int) classValue);
return Boolean.parseBoolean(classValueStr);
* Gets the confidence in the positive class.
* @param classDistribution
* @param classValue
* @return
public Double getConfidence(double[] classDistribution, double classValue) {
if (Double.isNaN(classValue)) {
return null;
return classDistribution[(int) classValue];
* Set the underlying instances.
* @param instances
public void setInstances(Instances instances) {
this.instances = instances;