List<String> targets, String timeStampName, boolean dateOnly,
Periodicity userHint, String skipEntries, Object... missingReport) {
Instances result = toReplace;
Attribute timeStampAtt = null;
TSLagMaker.PeriodicityHandler detected = null;
List<Integer> missingTargetList = null;
List<Integer> missingTimeStampList = null;
List<String> missingTimeStampRows = null;
if (missingReport.length > 0) {
missingTargetList = (List<Integer>) missingReport[0];
if (missingReport.length == 2) {
missingTimeStampList = (List<Integer>) missingReport[1];
}
if (missingReport.length == 3) {
missingTimeStampRows = (List<String>) missingReport[2];
}
}
if (timeStampName != null && timeStampName.length() > 0) {
timeStampAtt = toReplace.attribute(timeStampName);
// must be a non-artificial time stamp
if (timeStampAtt != null) {
detected = weka.classifiers.timeseries.core.TSLagMaker
.determinePeriodicity(result, timeStampName, userHint);
// check insertMissing (if periodicity is not UNKNOWN)
/*
* If we do this first, then we can interpolate the missing target
* values that will be created for the rows that get inserted
*/
if (detected.getPeriodicity() != Periodicity.UNKNOWN) {
insertMissing(toReplace, timeStampAtt, detected, skipEntries,
missingTimeStampRows);
}
}
}
// do a quick check to see if we need to replace any missing values
boolean ok = true;
for (int i = 0; i < toReplace.numInstances(); i++) {
if (toReplace.instance(i).hasMissingValue()) {
// now check against targets and possibly date
if (!dateOnly) {
for (String target : targets) {
int attIndex = toReplace.attribute(target).index();
if (toReplace.instance(i).isMissing(attIndex)) {
ok = false;
break;
}
}
if (!ok) {
break; // outer loop
}
}
// check date if necessary
if (timeStampAtt != null) {
if (toReplace.instance(i).isMissing(timeStampAtt)) {
ok = false;
break;
}
}
}
}
if (ok) {
// nothing to do
return result;
}
// process the target(s) first
if (!dateOnly) {
for (String target : targets) {
if (result.attribute(target) != null) {
int attIndex = result.attribute(target).index();
double lastNonMissing = weka.core.Utils.missingValue();
// We won't handle missing target values at the start or end
// as experiments with using simple linear regression to fill
// the missing values that are created by default by the lagging
// process showed inferior performance compared to just letting
// Weka take care of it via mean/mode replacement
for (int i = 0; i < result.numInstances(); i++) {
Instance current = result.instance(i);
if (current.isMissing(attIndex)) {
if (!weka.core.Utils.isMissingValue(lastNonMissing)) {
// Search forward to the next non missing value (if any)
double futureNonMissing = weka.core.Utils.missingValue();
double x2 = 2; // number of x steps (lastNonMissing is at 0 on x
// axis)
for (int j = i + 1; j < result.numInstances(); j++) {
if (!result.instance(j).isMissing(attIndex)) {
futureNonMissing = result.instance(j).value(attIndex);
break;
}
x2++;
}
if (!weka.core.Utils.isMissingValue(futureNonMissing)) {
// Now do the linear interpolation
double offset = lastNonMissing;
double slope = (futureNonMissing - lastNonMissing) / x2;
// fill in the missing values
for (int j = i; j < i + x2; j++) {
if (result.instance(j).isMissing(attIndex)) {
double interpolated = (((j - i) + 1) * slope) + offset;
result.instance(j).setValue(attIndex, interpolated);
if (missingTargetList != null) {
missingTargetList.add(new Integer(j + 1));
}
}
}
}
} else {
// won't do anything with start/end missing values
}
} else {
lastNonMissing = current.value(attIndex);
}
}
}
}
}
// now check for missing date values (if necessary)
if (timeStampAtt != null) {
int attIndex = timeStampAtt.index(); // result.attribute(timeStampName).index();
double firstNonMissing = result.instance(0).value(attIndex);
double previousNonMissing = firstNonMissing;
int firstNonMissingIndex = -1;
boolean leadingMissingDates = weka.core.Utils