}
if (m_removeFilter == null) {
// establish attributes to remove from first batch
Instances toFilter = getInputFormat();
int[] attsToDelete = new int[toFilter.numAttributes()];
int numToDelete = 0;
for(int i = 0; i < toFilter.numAttributes(); i++) {
if (i==toFilter.classIndex()) continue; // skip class
AttributeStats stats = toFilter.attributeStats(i);
if (stats.distinctCount < 2) {
// remove constant attributes
attsToDelete[numToDelete++] = i;
} else if (toFilter.attribute(i).isNominal()) {
// remove nominal attributes that vary too much
double variancePercent = (double) stats.distinctCount
/ (double)(stats.totalCount - stats.missingCount) * 100.0;
if (variancePercent > m_maxVariancePercentage) {
attsToDelete[numToDelete++] = i;
}
}
}
int[] finalAttsToDelete = new int[numToDelete];
System.arraycopy(attsToDelete, 0, finalAttsToDelete, 0, numToDelete);
m_removeFilter = new Remove();
m_removeFilter.setAttributeIndicesArray(finalAttsToDelete);
m_removeFilter.setInvertSelection(false);
m_removeFilter.setInputFormat(toFilter);
for (int i = 0; i < toFilter.numInstances(); i++) {
m_removeFilter.input(toFilter.instance(i));
}
m_removeFilter.batchFinished();
Instance processed;
Instances outputDataset = m_removeFilter.getOutputFormat();
// restore old relation name to hide attribute filter stamp
outputDataset.setRelationName(toFilter.relationName());
setOutputFormat(outputDataset);
while ((processed = m_removeFilter.output()) != null) {
processed.setDataset(outputDataset);
push(processed);