Package weka.classifiers.timeseries.core

Source Code of weka.classifiers.timeseries.core.TSLagMaker$PeriodicityHandler

/*
* Copyright (c) 2010 Pentaho Corporation.  All rights reserved.
* This software was developed by Pentaho Corporation and is provided under the terms
* of the GNU Lesser General Public License, Version 2.1. You may not use
* this file except in compliance with the license. If you need a copy of the license,
* please go to http://www.gnu.org/licenses/lgpl-2.1.txt. The Original Code is Time Series
* Forecasting.  The Initial Developer is Pentaho Corporation.
*
* Software distributed under the GNU Lesser Public License is distributed on an "AS IS"
* basis, WITHOUT WARRANTY OF ANY KIND, either express or  implied. Please refer to
* the license for the specific language governing your rights and limitations.
*/

/*
*    TSLagMaker.java
*    Copyright (C) 2010 Pentaho Corporation
*/

package weka.classifiers.timeseries.core;

import java.io.Serializable;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.Enumeration;
import java.util.GregorianCalendar;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Vector;

import weka.classifiers.Classifier;
import weka.core.Attribute;
import weka.core.DenseInstance;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.Range;
import weka.core.SelectedTag;
import weka.core.Utils;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.Add;
import weka.filters.unsupervised.attribute.AddExpression;
import weka.filters.unsupervised.attribute.AddID;
import weka.filters.unsupervised.attribute.Copy;
import weka.filters.unsupervised.attribute.MathExpression;
import weka.filters.unsupervised.attribute.Remove;
import weka.filters.unsupervised.attribute.RenameAttribute;

/**
* A class for creating lagged versions of target variable(s) for use in time
* series forecasting. Uses the TimeseriesTranslate filter. Has options for
* creating averages of consecutive lagged variables (which can be useful for
* long lagged variables). Some polynomials of time are also created (if there
* is a time stamp), such as time^2 and time^3. Also creates cross products
* between time and the lagged and averaged lagged variables. If there is no
* date time stamp in the data then the user has the option of having an
* artificial time stamp created. Time stamps, real or otherwise, are used for
* modeling trends rather than using a differencing-based approach.
*
* Also has routines for dealing with a date timestamp - i.e. it can detect a
* monthly time period (because months are different lengths) and maps date time
* stamps to equal spaced time intervals. For example, in general, a date time
* stamp is remapped by subtracting the first observed value and adding this
* value divided by the constant delta (difference between consecutive steps) to
* the result. In the case of a detected monthly time period, the remapping
* involves subtracting the base year and then adding to this the number of the
* month within the current year plus twelve times the number of intervening
* years since the base year.
*
* Also has routines for adding new attributes derived from a date time stamp to
* the data - e.g. AM indicator, day of the week, month, quarter etc. In the
* case where there is no real data time stamp, the user may specify a nominal
* periodic variable (if one exists in the data). For example, month might be
* coded as a nominal value. In this case it can be specified as the primary
* periodic variable. The point is, that in all these cases (nominal periodic
* and date-derived periodics), we are able to determine what the value of these
* variables will be in future instances (as computed from the last known
* historic instance).
*
* @author Mark Hall (mhall{[at]}pentaho{[dot]}com)
* @version $Revision: 51180 $
*/
public class TSLagMaker implements Serializable {

  /** For serialization */
  private static final long serialVersionUID = -1697901820770907975L;

  /** The names of the fields to create lagged variables for */
  protected List<String> m_fieldsToLag = null;

  /**
   * The names of the fields to be considered "overlay" data - i.e. we will be
   * supplied with values for these for future instances.
   */
  protected List<String> m_overlayFields = null;

  /** The minimum lag */
  protected int m_minLag = 1;

  /** The maximum lag */
  protected int m_maxLag = 12;

  /**
   * Optionally fine tune the selection of lagged attributes within the min and
   * max via a range string.
   */
  protected String m_lagFineTune = "";

  /**
   * Whether to replace a number of consecutive long lagged variables with their
   * average.
   */
  protected boolean m_averageConsecutiveLongLags = false;

  /**
   * If replacing long lagged variables with averages, do so for those long
   * lagged variables with lag greater than this
   */
  protected int m_averageLagsAfter = 2;

  /**
   * How many consecutive lagged variables to average, if averaging long lagged
   * variables
   */
  protected int m_numConsecutiveToAverage = 2;

  /** The name of the timestamp attribute (if there is one) */
  protected String m_timeStampName = "";

  /**
   * Whether to adjust for trends. If a timestamp attribute is named then
   * adjusting for trends will occur. If there is no timestamp attribute in the
   * data, then turning this on will result in an artificial timestamp attribute
   * getting added to the data.
   */
  protected boolean m_adjustForTrends = true;

  /**
   * Whether to stabilize the variance in the field to be forecast by applying a
   * log transform
   */
  protected boolean m_adjustForVariance = false;

  /** True if an artificial time index has been added to the data */
  protected boolean m_useArtificialTimeIndex = false;

  /** Include time/lag interaction terms? */
  protected boolean m_includeTimeLagCrossProducts = true;

  /** artificial time and last known real time value */
  protected double m_lastTimeValue = -1;

  /**
   * Used to add an artificial time attribute to the data if the user has
   * selected to adjust for trends and there isn't a time stamp in the data
   */
  protected AddID m_artificialTimeMaker;

  /** Filters for creating the various lagged and derived attributes */
  protected List<Filter> m_varianceAdjusters;
  protected List<Filter> m_lagMakers;
  protected List<Filter> m_averagedLagMakers;
  protected List<Filter> m_timeIndexMakers;
  protected List<Filter> m_timeLagCrossProductMakers;
  protected Remove m_extraneousAttributeRemover;

  /** The name of the primary periodic attribute */
  protected String m_primaryPeriodicName = "";

  /**
   * Holds a map of primary periodic values as keys and their immediate
   * successors (chronologically) as values. The primary periodic attribute (if
   * available) should relate to the time interval of the instances (e.g.
   * hourly, daily, monthly etc.).
   */
  protected Map<String, String> m_primaryPeriodicSequence;

  /**
   * A map (keyed by attribute) of maps for looking up the values of secondary
   * periodic attribute values that correspond to the values of the primary
   * periodic attribute
   */
  protected Map<Attribute, Map<String, String>> m_secondaryPeriodicLookups;

  protected Instances m_originalHeader;

  /**
   * This holds the most recent (time wise) training or primed instance. We can
   * use it to determine the t+1 periodic value for the primary periodic
   * attribute
   */
  protected Instance m_lastHistoricInstance;

  /** pre-defined fields that can be derived from a genuine date time stamp */
  protected boolean m_am = false;
  protected boolean m_dayOfWeek = false;
  protected boolean m_weekend = false;
  protected boolean m_monthOfYear = false;
  protected boolean m_quarter = false;
  protected boolean m_dayOfMonth = false;
  protected boolean m_numDaysInMonth = false;

  /** custom defined fields that can be derived from a genuine date time stamp */
  protected Map<String, ArrayList<CustomPeriodicTest>> m_customPeriodics;

  protected List<Filter> m_derivedPeriodicMakers;
  // protected boolean m_advanceTimeStampByMonth = false;
  protected PeriodicityHandler m_dateBasedPeriodicity = new PeriodicityHandler();
  protected Periodicity m_userHintPeriodicity = Periodicity.UNKNOWN;

  /**
   * Delete instances from the start of the transformed series where lagged
   * variables are missing? Default leaves missing value handling to the base
   * learner.
   */
  protected boolean m_deleteMissingFromStartOfSeries = false;

  /** Stores the first time stamp value in the data */
  protected long m_dateTimeStampBase;
  protected Add m_addDateMap;

  /**
   * Holds the difference between the time stamps for the two most recent
   * training instances or the average difference over consecutive training
   * instances if the differences are not constant. Either this or date
   * arithmetic (to advance time stamp by month) is used to advance the
   * timestamp for future instances.
   */
  // protected double m_deltaTime = -1;

  /**
   * Date time stamps that should be skipped - i.e. not considered as an
   * increment. E.g financial markets don't trade on the weekend, so the
   * difference between friday closing and the following monday closing is one
   * time unit (and not three). Can accept strings such as "sat", "sunday",
   * "jan", "august", or explicit dates (with optional formatting string) such
   * as "2011-07-04@yyyy-MM-dd", or integers. Integers are interpreted with
   * respect to the periodicity - e.g for daily data they are interpreted as day
   * of the year; for hourly data, hour of the day; weekly data, week of the
   * year.
   */
  protected String m_skipEntries;

  /** Default formatting string for explicit dates in the skip list */
  protected String m_dateFormat = "yyyy-MM-dd'T'HH:mm:ss";

  /**
   * Reset the lag maker.
   */
  public void reset() {
    m_artificialTimeMaker = null;
    m_varianceAdjusters = null;
    m_lagMakers = null;
    m_averagedLagMakers = null;
    m_timeIndexMakers = null;
    m_timeLagCrossProductMakers = null;
    m_derivedPeriodicMakers = null;
    m_extraneousAttributeRemover = null;
    m_lastTimeValue = -1;
    // m_deltaTime = -1;
    // m_dateBasedPeriodicity = Periodicity.UNKNOWN;
    // m_skipEntries = null;
    // m_dateFormat = "yyyy-MM-dd'T'HH:mm:ss";
  }

  /**
   * Returns an enumeration describing the available options.
   *
   * @return an enumeration of all the available options.
   */
  public Enumeration<Option> listOptions() {
    Vector<Option> newVector = new Vector<Option>();

    newVector.add(new Option("\tSet the fields to lag.", "F", 1,
        "-F <comma separated list of names>"));
    newVector.add(new Option("\tSet the fields to be considered "
        + "as overlay data.", "overlay", 1,
        "-overlay <comma separated list of names>"));

    newVector.add(new Option("\tSet the minimum lag length to generate."
        + "\n\t(default = 1)", "L", 1, "-L <num>"));
    newVector.add(new Option("\tSet the maximum lag length to generate."
        + "\n\t(default = 12)", "M", 1, "-M <num>"));
    newVector.add(new Option("\tAverage consecutive long lags.", "A", 0, "-A"));
    newVector.add(new Option("\tAverage those lags longer than this number of"
        + "time steps.\n\tUse in conjuction with -A is selected.\n\t"
        + "(default = 2)", "B", 1, "-B <num>"));
    newVector.add(new Option("\tFine tune selection of lags within min and "
        + "max by specifying" + " ranges", "R", 1, "-R <ranges>"));
    newVector.add(new Option("\tAverage this many consecutive long lags.\n\t"
        + "Use in conjuction with -B (default = 2)", "C", 1, "-C <num>"));
    newVector.add(new Option("\tDon't adjust for trends.", "Z", 0, "-Z"));
    newVector.add(new Option("\tSpecify the name of the timestamp field", "G",
        1, "-G <timestamp name>"));
    newVector.add(new Option("\tAdjust for variance.", "V", 0, "-V"));
    newVector.add(new Option(
        "\tAdd an AM/PM indicator (requires a date timestamp)", "am-pm", 0,
        "-am-pm"));
    newVector.add(new Option("\tAdd a day of the week field (requres a date"
        + " timestamp)", "dayofweek", 0, "-dayofweek"));
    newVector.add(new Option("\tAdd a day of the month field (requres a date"
        + " timestamp)", "dayofmonth", 0, "-dayofmonth"));
    newVector.add(new Option(
        "\tAdd a number of days in the month field (requres a date"
            + " timestamp)", "numdaysinmonth", 0, "-numdaysinmonth"));
    newVector.add(new Option(
        "\tAdd a weekend indicator (requires a date timestamp)", "weekend", 0,
        "-weekend"));
    newVector.add(new Option("\tAdd a month field (requires a date timestamp)",
        "month", 0, "-month"));
    newVector.add(new Option("\tAdd a quarter of the year field ("
        + "requires a date timestamp)", "quarter", 0, "-quarter"));
    newVector.add(new Option("\tAdd a custom date-derived boolean field ("
        + "requires a date timestamp).\n\tFormat: \"fieldName="
        + "Test Test|Test Test| ...\n\twhere "
        + "Test=OPERATORyear:month:week-of-yr:week-of-month:"
        + "day-of-yr:day-of-month:day-of-week:hour:min:second\n\te.g."
        + "XmasHoliday=>:dec::::24::: <:jan::::3:::\n\t"
        + "Legal OPERATORs are =,>,<,>=,<=. For = operator only\n\t"
        + "one Test is needed rather than a pair.\n\tThis option may"
        + " be specified more than once on the command line\n\t"
        + "in order to define multiple variables.", "custom", 1, "-custom"));
    newVector
        .add(new Option(
            "\tAdd a comma-separated 'skip' list of dates that should not\n\t"
                + "be considered as a time step. Days of the week,\n\t"
                + "months of the year, 'weekend', integers (indicating day of year\n\t"
                + ", hour of day etc.) or specific dates are all valid entries.\n\t"
                + "E.g sat,sun,27-08-2011,28-08-2011", "skip", 1, "-skip"));

    return newVector.elements();
  }

  /**
   * Creates a Range object for the user-specified lag range String
   *
   * @param lagRange a range as a String
   * @return a Range object
   * @throws Exception if the supplied range is illegal with respect to the min
   *           and max lag values.
   */
  protected Range getLagRangeSelection(String lagRange) throws Exception {
    Range r = new Range(lagRange);
    try {
      r.setUpper(m_maxLag);
    } catch (IllegalArgumentException e) {
      throw new Exception("The lag selection range '" + lagRange + "' is"
          + "illegal with respect to the specified min and max" + "lags.");
    }

    // still need to check against the min
    int[] selectedIndexes = r.getSelection();
    int max = selectedIndexes[Utils.maxIndex(selectedIndexes)] + 1;
    int min = selectedIndexes[Utils.minIndex(selectedIndexes)] + 1;
    if (max < m_minLag || min > m_maxLag) {
      throw new Exception("The lag selection range '" + lagRange + "' is"
          + "illegal with respect to the specified min and max" + "lags.");
    }

    return r;
  }

  /**
   * Parses a given list of options.
   *
   * @param options the list of options as an array of strings
   * @exception Exception if an option is not supported
   */
  public void setOptions(String[] options) throws Exception {
    String fieldsToLag = Utils.getOption('F', options);
    if (fieldsToLag.length() == 0) {
      throw new Exception("Must specify the name of at least one field "
          + "to create lags for!");
    }
    String[] fieldNames = fieldsToLag.split(",");
    List<String> fieldList = new ArrayList<String>();
    for (String f : fieldNames) {
      fieldList.add(f);
    }
    setFieldsToLag(fieldList);

    String overlayFields = Utils.getOption("overlay", options);
    if (overlayFields.length() > 0) {
      String[] names = overlayFields.split(",");
      List<String> nameList = new ArrayList<String>();
      for (String f : names) {
        nameList.add(f);
      }

      setOverlayFields(nameList);
    }

    String minL = Utils.getOption('L', options);
    if (minL.length() > 0) {
      int mL = Integer.parseInt(minL);
      setMinLag(mL);
      if (mL < 1) {
        throw new Exception("Minimum lag can't be less than 1!");
      }
    }

    String maxL = Utils.getOption('M', options);
    if (maxL.length() > 0) {
      int mL = Integer.parseInt(maxL);
      setMaxLag(mL);
    }

    if (getMaxLag() < getMinLag()) {
      throw new Exception("Can't have the maximum lag set lower than the "
          + "minimum lag!");
    }

    String lagRange = Utils.getOption('R', options);
    m_lagFineTune = lagRange;
    if (m_lagFineTune.length() > 0) {

      // check the range for consistency with respect to min and max
      getLagRangeSelection(lagRange);
    }

    boolean avLongLags = Utils.getFlag('A', options);
    setAverageConsecutiveLongLags(avLongLags);

    String avLongerThan = Utils.getOption('B', options);
    if (avLongerThan.length() > 0) {
      int avL = Integer.parseInt(avLongerThan);
      if (avL < getMinLag() || avL > getMaxLag()) {
        throw new Exception("Average consecutive long lags value can't "
            + "be less than the minimum lag or greater than the "
            + "maximum lag!");
      }
      setAverageLagsAfter(avL);
    }

    String consecutiveLongLagS = Utils.getOption('C', options);
    if (consecutiveLongLagS.length() > 0) {
      int consecutive = Integer.parseInt(consecutiveLongLagS);
      if (consecutive < 1 || consecutive > (getMaxLag() - getMinLag())) {
        throw new Exception("Number of consecutive long lags to average "
            + "must be greater than 0 and less than "
            + (getMaxLag() - getMinLag()));
      }
      setNumConsecutiveLongLagsToAverage(consecutive);
    }

    boolean dontAdjTrends = Utils.getFlag('Z', options);
    setAdjustForTrends(!dontAdjTrends);

    boolean adjVariance = Utils.getFlag("V", options);
    setAdjustForVariance(adjVariance);

    String timeStampF = Utils.getOption('G', options);
    if (timeStampF.length() > 0) {
      setTimeStampField(timeStampF);
    }

    setAddAMIndicator(Utils.getFlag("am-pm", options));
    setAddDayOfWeek(Utils.getFlag("dayofweek", options));
    setAddDayOfMonth(Utils.getFlag("dayofmonth", options));
    setAddNumDaysInMonth(Utils.getFlag("numdaysinmonth", options));
    setAddWeekendIndicator(Utils.getFlag("weekend", options));
    setAddMonthOfYear(Utils.getFlag("month", options));
    setAddQuarterOfYear(Utils.getFlag("quarter", options));

    // custom date-derived periodic fields
    String customPeriodic = Utils.getOption("custom", options);
    while (customPeriodic.length() > 0) {
      addCustomPeriodic(customPeriodic);
    }

    String primaryPeriodicN = Utils.getOption("periodic", options);
    if (primaryPeriodicN.length() > 0) {
      setPrimaryPeriodicFieldName(primaryPeriodicN);
    }

    String skipString = Utils.getOption("skip", options);
    if (skipString.length() > 0) {
      setSkipEntries(skipString);
    }
  }

  /**
   * Gets the current settings of the LagMaker.
   *
   * @return an array of strings suitable for passing to setOptions
   */
  public String[] getOptions() {
    ArrayList<String> options = new ArrayList<String>();

    List<String> fieldsToLag = getFieldsToLag();
    options.add("-F");
    options.add(fieldsToLag.toString());

    if (getOverlayFields() != null && getOverlayFields().size() > 0) {
      options.add("-O");
      options.add(getOverlayFields().toString());
    }

    options.add("-L");
    options.add("" + getMinLag());
    options.add("-M");
    options.add("" + getMaxLag());

    if (m_lagFineTune.length() > 0) {
      options.add("-R");
      options.add(getLagRange());
    }

    if (getAverageConsecutiveLongLags()) {
      options.add("-A");

      options.add("-B");
      options.add("" + getAverageLagsAfter());
      options.add("-C");
      options.add("" + getNumConsecutiveLongLagsToAverage());
    }

    if (!getAdjustForTrends()) {
      options.add("-Z");
    }

    if (getAdjustForVariance()) {
      options.add("-V");
    }

    if (getTimeStampField() != null && getTimeStampField().length() > 0) {
      options.add("-G");
      options.add(getTimeStampField());
    }

    if (getAddAMIndicator()) {
      options.add("-am-pm");
    }

    if (getAddDayOfWeek()) {
      options.add("-dayofweek");
    }

    if (getAddDayOfMonth()) {
      options.add("-dayofmonth");
    }

    if (getAddNumDaysInMonth()) {
      options.add("-numdaysinmonth");
    }

    if (getAddWeekendIndicator()) {
      options.add("-weekend");
    }

    if (getAddMonthOfYear()) {
      options.add("-month");
    }

    if (getAddQuarterOfYear()) {
      options.add("-quarter");
    }

    if (getSkipEntries() != null && getSkipEntries().length() > 0) {
      options.add("-skip");
      options.add(getSkipEntries());
    }

    if (m_customPeriodics != null && m_customPeriodics.keySet().size() > 0) {
      for (String name : m_customPeriodics.keySet()) {
        List<CustomPeriodicTest> tests = m_customPeriodics.get(name);

        options.add("-custom");
        StringBuffer tempBuff = new StringBuffer();
        tempBuff.append("\"");
        for (int i = 0; i < tests.size(); i++) {
          tempBuff.append(tests.get(i).toString());
          if (i < tests.size() - 1) {
            tempBuff.append("|");
          } else {
            tempBuff.append("\"");
          }
        }
        options.add(tempBuff.toString());
      }
    }

    return options.toArray(new String[1]);
  }

  /**
   * Get the date-derived custom periodic attributes in use.
   *
   * @return a Map, keyed by field name, of custom date-derived periodic fields.
   */
  public Map<String, ArrayList<CustomPeriodicTest>> getCustomPeriodics() {
    return m_customPeriodics;
  }

  /**
   * Add a custom date-derived periodic
   *
   * @param customPeriodic the new custom date-derived periodic in textual form.
   */
  public void addCustomPeriodic(String customPeriodic) {
    if (m_customPeriodics == null) {
      m_customPeriodics = new HashMap<String, ArrayList<CustomPeriodicTest>>();
    }

    ArrayList<CustomPeriodicTest> tests = new ArrayList<CustomPeriodicTest>();
    int nameSplit = customPeriodic.indexOf('=');
    String fieldName = customPeriodic.substring(0, nameSplit);
    customPeriodic = customPeriodic.substring(nameSplit + 1,
        customPeriodic.length());
    String[] parts = customPeriodic.split("|");
    for (String p : parts) {
      CustomPeriodicTest c = new CustomPeriodicTest(p);
      tests.add(c);
    }
    m_customPeriodics.put(fieldName, tests);
  }

  /**
   * Clear all custom date-derived periodic fields.
   */
  public void clearCustomPeriodics() {
    m_customPeriodics = null;
  }

  /**
   * Set the date-derived custom periodic fields to use/compute
   *
   * @param custom a Map, keyed by field name, of custom date-derived periodic
   *          fields to use.
   */
  public void setCustomPeriodics(
      Map<String, ArrayList<CustomPeriodicTest>> custom) {
    m_customPeriodics = custom;
  }

  /**
   * Set the names of the fields to create lagged variables for
   *
   * @param names a List of field names for which to create lagged variables
   * @throws Exception if a problem occurs
   */
  public void setFieldsToLag(List<String> names) throws Exception {
    m_fieldsToLag = names;
  }

  /**
   * Get the names of the fields to create lagged variables for.
   *
   * @return a List of field names for which lagged variables will be created.
   */
  public List<String> getFieldsToLag() {
    return m_fieldsToLag;
  }

  /**
   * Set the names of fields in the data that are to be considered "overlay"
   * fields - i.e. they will be externally provided for future instances.
   *
   * @param overlayNames the names of the fields that are to be considered
   *          "overlay" fields
   */
  public void setOverlayFields(List<String> overlayNames) {
    m_overlayFields = overlayNames;
  }

  /**
   * Get overlay fields
   *
   * @return a list of field names that are set as "overlay" fields
   */
  public List<String> getOverlayFields() {
    return m_overlayFields;
  }

  /**
   * Set the name of the time stamp field in the data
   *
   * @param name the name of the time stamp field
   */
  public void setTimeStampField(String name) {
    m_timeStampName = name;
    /*
     * if (name == null || name.length() == 0) { m_useArtificialTimeIndex =
     * false; } else { m_useArtificialTimeIndex = true; }
     */
  }

  /**
   * Get the name of the time stamp field.
   *
   * @return the name of the time stamp field or null if one hasn't been
   *         specified.
   */
  public String getTimeStampField() {
    return m_timeStampName;
  }

  /**
   * Set whether to adjust for trends or not. If there is no time stamp field
   * specified, and this is set to true, then an artificial time stamp will be
   * created.
   *
   * @param a true if we are to adjust for trends via a real or artificial time
   *          stamp
   */
  public void setAdjustForTrends(boolean a) {
    m_adjustForTrends = a;
  }

  /**
   * Returns true if we are adjusting for trends via a real or artificial time
   * stamp.
   *
   * @return true if we are adjusting for trends via a real or artificial time
   *         stamp in the data.
   */
  public boolean getAdjustForTrends() {
    return m_adjustForTrends;
  }

  /**
   * Set whether to adjust for variance in the data by taking the log of the
   * target(s).
   *
   * @param v true to adjust for variance by taking the log of the target(s).
   */
  public void setAdjustForVariance(boolean v) {
    m_adjustForVariance = v;
  }

  /**
   * Returns true if we are adjusting for variance by taking the log of the
   * target(s).
   *
   * @return true if we are adjusting for variance.
   */
  public boolean getAdjustForVariance() {
    return m_adjustForVariance;
  }

  /**
   * Set ranges by which to fine-tune the creation of lagged attributes.
   *
   * @param ranges a list of ranges as a string
   */
  public void setFineTuneLags(String ranges) {
    m_lagFineTune = ranges;
  }

  /**
   * Get the ranges used to fine tune the creation of lagged attributes.
   *
   * @return the ranges as a string
   */
  public String getFineTuneLags() {
    return m_lagFineTune;
  }

  /**
   * Set the minimum lag to create (default = 1, i.e. t-1).
   *
   * @param min the minimum lag to create
   */
  public void setMinLag(int min) {
    m_minLag = min;
  }

  /**
   * Get the minimum lag to create.
   *
   * @return the minimum lag to create.
   */
  public int getMinLag() {
    return m_minLag;
  }

  /**
   * Set the maximum lag to create (default = 12, i.e. t-12).
   *
   * @param max the maximum lag to create.
   */
  public void setMaxLag(int max) {
    m_maxLag = max;
  }

  /**
   * Get the maximum lag to create.
   *
   * @return the maximum lag to create.
   */
  public int getMaxLag() {
    return m_maxLag;
  }

  /**
   * Set ranges to fine tune lag selection.
   *
   * @param lagRange a set of ranges (e.g. 2,3,4,7-9).
   */
  public void setLagRange(String lagRange) {
    m_lagFineTune = lagRange;
  }

  /**
   * Get the ranges used to fine tune lag selection
   *
   * @return the ranges (if any) used to fine tune lag selection
   */
  public String getLagRange() {
    return m_lagFineTune;
  }

  /**
   * Sets whether to average consecutive long lagged variables. Setting this to
   * true creates new variables that are averages of long lags and the original
   * lagged variables involved are removed.
   *
   * @param avg true if consecutive long lags are to be averaged.
   */
  public void setAverageConsecutiveLongLags(boolean avg) {
    m_averageConsecutiveLongLags = avg;
  }

  /**
   * Returns true if consecutive long lagged variables are to be averaged.
   *
   * @return true if consecutive long lagged variables are to be averaged.
   */
  public boolean getAverageConsecutiveLongLags() {
    return m_averageConsecutiveLongLags;
  }

  /**
   * Set at which point consecutive long lagged variables are to be averaged
   * (default = 2, i.e. start replacing lagged variables after t-2 with
   * averages).
   *
   * @param a the point at which to start averaging consecutive long lagged
   *          variables.
   */
  public void setAverageLagsAfter(int a) {
    m_averageLagsAfter = a;
  }

  /**
   * Return the point after which long lagged variables will be averaged.
   *
   * @return the point after which long lagged variables will be averaged.
   */
  public int getAverageLagsAfter() {
    return m_averageLagsAfter;
  }

  /**
   * Set the number of long lagged variables to average for each averaged
   * variable created (default = 2, e.g. a set average after value of 2 and a
   * num consecutive to average = 2 will average t-3 and t-4 into a new
   * variable, t-5 and t-6 into a new variable ect.
   *
   * @param c the number of consecutive long lagged variables to average.
   */
  public void setNumConsecutiveLongLagsToAverage(int c) {
    m_numConsecutiveToAverage = c;
  }

  /**
   * Get the number of consecutive long lagged variables to average.
   *
   * @return the number of long lagged variables to average.
   */
  public int getNumConsecutiveLongLagsToAverage() {
    return m_numConsecutiveToAverage;
  }

  /**
   * Set the name of a periodic attribute in the data. This attribute has to be
   * nominal and cyclic so that it is possible to know what the value will be
   * given the current one.
   *
   * @param p the name of the primary periodic attribute (if any) in the data.
   */
  public void setPrimaryPeriodicFieldName(String p) {
    m_primaryPeriodicName = p;
  }

  /**
   * The name of the primary periodic attribute or null if one hasn't been
   * specified.
   *
   * @return the name of the primary periodic attribute or null if one hasn't
   *         been specified.
   */
  public String getPrimaryPeriodicFieldName() {
    return m_primaryPeriodicName;
  }

  /**
   * Set whether to create an AM indicator attribute. Has no effect if there
   * isn't a date-based time stamp in the data.
   *
   * @param am true if an AM indicator attribute is to be created.
   */
  public void setAddAMIndicator(boolean am) {
    m_am = am;
  }

  /**
   * Return true if an AM indicator attribute is to be created.
   *
   * @return true if an AM indiciator attribute is to be created.
   */
  public boolean getAddAMIndicator() {
    return m_am;
  }

  /**
   * Set whether to create a day of the week attribute. Has no effect if there
   * isn't a date-based time stamp in the data.
   *
   * @param d true if a day of the week attribute is to be created.
   */
  public void setAddDayOfWeek(boolean d) {
    m_dayOfWeek = d;
  }

  /**
   * Return true if a day of the week attribute is to be created.
   *
   * @return true if a day of the week attribute is to be created.
   */
  public boolean getAddDayOfWeek() {
    return m_dayOfWeek;
  }

  /**
   * Set whether to create a day of the month attribute. Has no effect if there
   * isn't a date-based time stamp in the data.
   *
   * @param d true if a day of the month attribute is to be created.
   */
  public void setAddDayOfMonth(boolean d) {
    m_dayOfMonth = d;
  }

  /**
   * Return true if a day of the month attribute is to be created.
   *
   * @return true if a day of the month attribute is to be created.
   */
  public boolean getAddDayOfMonth() {
    return m_dayOfMonth;
  }

  /**
   * Set whether to create a numeric attribute that holds the number of days in
   * the month.
   *
   * @param d true if a num days in month attribute is to be created.
   */
  public void setAddNumDaysInMonth(boolean d) {
    m_numDaysInMonth = d;
  }

  /**
   * Return true if a num days in the month attribute is to be created.
   *
   * @return true if a num days in the month attribute is to be created.
   */
  public boolean getAddNumDaysInMonth() {
    return m_numDaysInMonth;
  }

  /**
   * Set whether to create a weekend indicator attribute. Has no effect if there
   * isn't a date-based time stamp in the data.
   *
   * @param w true if a weekend indicator attribute is to be created.
   */
  public void setAddWeekendIndicator(boolean w) {
    m_weekend = w;
  }

  /**
   * Returns true if a weekend indicator attribute is to be created.
   *
   * @return true if a weekend indicator attribute is to be created.
   */
  public boolean getAddWeekendIndicator() {
    return m_weekend;
  }

  /**
   * Set whether to create a month of the year attribute. Has no effect if there
   * isn't a date-based time stamp in the data.
   *
   * @param m true if a month of the year attribute is to be created.
   */
  public void setAddMonthOfYear(boolean m) {
    m_monthOfYear = m;
  }

  /**
   * Returns true if a month of the year attribute is to be created.
   *
   * @return true if a month of the year attribute is to be created.
   */
  public boolean getAddMonthOfYear() {
    return m_monthOfYear;
  }

  /**
   * Set whether to create a quarter attribute. Has no effect if there isn't a
   * date-based time stamp in the data.
   *
   * @param q true if a quarter attribute is to be added.
   */
  public void setAddQuarterOfYear(boolean q) {
    m_quarter = q;
  }

  /**
   * Returns true if a quarter attribute is to be created.
   *
   * @return true if a quarter attribute is to be created.
   */
  public boolean getAddQuarterOfYear() {
    return m_quarter;
  }

  /**
   * Returns true if an artificial time index is in use.
   *
   * @return true if an artificial time index is in use.
   */
  public boolean isUsingAnArtificialTimeIndex() {
    return m_useArtificialTimeIndex;
  }

  /**
   * Set the starting value for the artificial time stamp.
   *
   * @param value the value to initialize the artificial time stamp with.
   * @throws Exception if an artificial time stamp is not being used.
   */
  public void setArtificialTimeStartValue(double value) throws Exception {
    if (isUsingAnArtificialTimeIndex()) {
      m_lastTimeValue = value;
    } else {
      throw new Exception("Not using an artificial time index");
    }
  }

  /**
   * Returns the current value of the artificial time stamp. After training,
   * after priming, and prior to forecasting, this will be equal to the number
   * of training instances seen.
   *
   * @return the current value of the artificial time stamp.
   * @throws Exception if an artificial time stamp is not being used.
   */
  public double getArtificialTimeStartValue() throws Exception {
    if (!isUsingAnArtificialTimeIndex()) {
      throw new Exception("Not using an artificial time index!");
    }

    return m_lastTimeValue;
  }

  /**
   * Returns the current (i.e. most recent) time stamp value. Unlike an
   * artificial time stamp, the value after training, after priming and before
   * forecasting, will be equal to the time stamp of the most recent priming
   * instance.
   *
   * @return the current time stamp value
   * @throws Exception if the lag maker is not adjusting for trends or no time
   *           stamp attribute has been specified.
   */
  public double getCurrentTimeStampValue() throws Exception {
    if (m_adjustForTrends && m_timeStampName.length() > 0) {
      return m_lastTimeValue;
    }

    throw new Exception("Not using a time stamp!");
  }

  /**
   * Increment the artificial time value with the supplied incrememt value.
   *
   * @param increment the value to increment by.
   */
  public void incrementArtificialTimeValue(int increment) {
    m_lastTimeValue += increment;
  }

  /**
   * Return the difference between time values. This may be only approximate for
   * periods based on dates. It is best to used date-based arithmetic in this
   * case for incrementing/decrementing time stamps.
   *
   * @return the (average) difference between time values.
   */
  public double getDeltaTime() {
    return m_dateBasedPeriodicity.deltaTime(); // m_deltaTime;
  }

  /**
   * Gets the Periodicity representing the time stamp in use for this lag maker.
   * If the lag maker is not adjusting for trends, or an artificial time stamp
   * is being used, then null is returned.
   *
   * @return the Periodicity in use, or null if the lag maker is not adjusting
   *         for trends or is using an artificial time stamp.
   */
  public Periodicity getPeriodicity() {
    if (!m_adjustForTrends || m_useArtificialTimeIndex) {
      return null;
    }

    return m_dateBasedPeriodicity.getPeriodicity();
  }

  /**
   * Set the periodicity for the data. This is ignored if the lag maker is not
   * adjusting for trends or is using an artificial time stamp. If not specified
   * or set to Periodicity.UNKNOWN (the default) then heuristics will be used to
   * try and automatically determine the periodicity.
   *
   * @param toUse the periodicity to use
   */
  public void setPeriodicity(Periodicity toUse) {
    m_userHintPeriodicity = toUse;
  }

  /**
   * Set the list of time units to be 'skipped' - i.e. not considered as an
   * increment. E.g financial markets don't trade on the weekend, so the
   * difference between friday closing and the following monday closing is one
   * time unit (and not three). Can accept strings such as "sat", "sunday",
   * "jan", "august", or explicit dates (with optional formatting string) such
   * as "2011-07-04@yyyy-MM-dd", or integers. Integers are interpreted with
   * respect to the periodicity - e.g for daily data they are interpreted as day
   * of the year; for hourly data, hour of the day; weekly data, week of the
   * year.
   *
   * @param skipEntries a comma separated list of strings, explicit dates and
   *          integers.
   */
  public void setSkipEntries(String skipEntries) {
    m_skipEntries = skipEntries;
  }

  /**
   * Get a list of time units to be 'skipped' - i.e. not considered as an
   * increment. E.g financial markets don't trade on the weekend, so the
   * difference between friday closing and the following monday closing is one
   * time unit (and not three). Can accept strings such as "sat", "sunday",
   * "jan", "august", or explicit dates (with optional formatting string) such
   * as "2011-07-04@yyyy-MM-dd", or integers. Integers are interpreted with
   * respect to the periodicity - e.g for daily data they are interpreted as day
   * of the year; for hourly data, hour of the day; weekly data, week of the
   * year.
   *
   * @return a comma-separated list of strings, explicit dates and integers
   */
  public String getSkipEntries() {
    return m_skipEntries;
  }

  private List<Object> createLagFiller(Instances insts, String targetName)
      throws Exception {
    // Classifier lagFiller = new weka.classifiers.functions.LeastMedSq();
    Classifier lagFiller = new weka.classifiers.functions.LinearRegression();

    ArrayList<Attribute> atts = new ArrayList<Attribute>();
    atts.add(new Attribute("time"));
    atts.add(new Attribute("target"));
    Instances simple = new Instances("simple", atts, insts.numInstances());
    int targetIndex = insts.attribute(targetName).index();
    for (int i = 0; i < insts.numInstances(); i++) {
      double targetValue = insts.instance(i).value(targetIndex);
      double time = i;
      double[] vals = new double[2];
      vals[0] = time;
      vals[1] = targetValue;
      DenseInstance d = new DenseInstance(1.0, vals);
      simple.add(d);
    }

    simple.setClassIndex(1);
    lagFiller.buildClassifier(simple);
    System.err.println(lagFiller);
    simple = new Instances(simple, 0);

    List<Object> results = new ArrayList<Object>();
    results.add(lagFiller);
    results.add(simple);

    return results;
  }

  private Instances createLags(Instances insts) throws Exception {
    if (m_fieldsToLag == null || m_fieldsToLag.get(0).length() == 0) {
      throw new Exception("Field to forecast is not specified!");
    }

    m_lagMakers = new ArrayList<Filter>();

    // do we have a fine tuning range for lags?
    Range r = null;
    int[] rangeIndexes = null;
    if (m_lagFineTune.length() > 0) {
      r = getLagRangeSelection(m_lagFineTune);
      rangeIndexes = r.getSelection();
    }

    for (int j = 0; j < m_fieldsToLag.size(); j++) {
      int classIndex = insts.attribute(m_fieldsToLag.get(j)).index();
      if (classIndex < 0) {
        throw new Exception("Can't find field '" + m_fieldsToLag.get(j) + "'!");
      }

      // ---------------------
      // List<Object> lagFillerHolder = createLagFiller(insts,
      // m_fieldsToLag.get(j));
      // Classifier missingLagFiller = (Classifier)lagFillerHolder.get(0);
      // Instances lagFillerHeader = (Instances)lagFillerHolder.get(1);
      // ---------------------

      for (int i = m_minLag; i <= m_maxLag; i++) {

        // check against fine tuning ranges if set
        if (rangeIndexes != null) {
          boolean ok = false;
          for (int z = 0; z < rangeIndexes.length; z++) {
            if (rangeIndexes[z] + 1 == i) {
              ok = true;
              break;
            }
          }

          if (!ok) {
            continue;
          }
        }

        Copy c = new Copy();
        c.setAttributeIndices("" + (classIndex + 1));
        c.setInputFormat(insts);
        insts = Filter.useFilter(insts, c);
        m_lagMakers.add(c);
        RenameAttribute rename = new RenameAttribute();
        rename.setAttributeIndices("last");
        rename.setReplace("Lag_" + m_fieldsToLag.get(j));
        rename.setInputFormat(insts);
        insts = Filter.useFilter(insts, rename);
        m_lagMakers.add(rename);

        // AddExpression is convenient to make a copy and set a new name
        /*
         * AddExpression addE = new AddExpression(); addE.setName("Lag_" +
         * m_fieldsToLag.get(j)); addE.setExpression("a" + (classIndex + 1) +
         * "*1"); addE.setInputFormat(insts); insts = Filter.useFilter(insts,
         * addE); m_lagMakers.add(addE);
         */

        // now time shift it
        TimeSeriesTranslate timeS = new TimeSeriesTranslate();
        timeS.setAttributeIndices("last");
        timeS.setInstanceRange(-i);
        timeS.setInputFormat(insts);
        insts = Filter.useFilter(insts, timeS);
        m_lagMakers.add(timeS);

        // --------------
        // now use the missingLagFiller to project back and fill in
        // the unknown values for lag elements before the beginning
        // of the series. Our artificial time begins at the start of
        // the series at 0.
        /*
         * int count = 0; int lagIndex = insts.numAttributes() - 1; for (int z =
         * -i; z < 0; z++) { double time = z; double[] vals = new double[2];
         * vals[0] = time; vals[1] = Utils.missingValue(); DenseInstance d = new
         * DenseInstance(1.0, vals); d.setDataset(lagFillerHeader); double
         * predictedTarget = missingLagFiller.classifyInstance(d); if
         * (insts.instance(count).isMissing(lagIndex)) {
         * insts.instance(count).setValue(lagIndex, predictedTarget); } else {
         * System
         * .err.println("***** lag value is not missing!! (project missing lags)"
         * ); } count++; }
         */

        // --------------
      }
    }
    // System.err.println(insts);
    return insts;
  }

  private Instances createAveragedLags(Instances insts) throws Exception {

    if (!m_averageConsecutiveLongLags) {
      m_averagedLagMakers = null;
      return insts;
    }

    if (m_numConsecutiveToAverage > getMaxLag() - getAverageLagsAfter()) {
      if (getMaxLag() - getAverageLagsAfter() > 1) {
        m_numConsecutiveToAverage = getMaxLag() - getAverageLagsAfter();
      } else {
        m_averagedLagMakers = null;
        return insts;
      }
    }

    m_averagedLagMakers = new ArrayList<Filter>();
    int numAtts = insts.numAttributes();

    String removeLongLagIndexes = "";
    for (int z = 0; z < m_fieldsToLag.size(); z++) {
      int firstLagIndex = -1;
      // locate the first lagged attribute
      for (int i = 0; i < insts.numAttributes(); i++) {
        if (insts.attribute(i).name().startsWith("Lag_" + m_fieldsToLag.get(z))) {
          firstLagIndex = i;
          break;
        }
      }

      if (firstLagIndex < 0) {
        throw new Exception("Can't find the first lag attribute for "
            + m_fieldsToLag.get(z) + "!");
      }

      for (int i = firstLagIndex; i < numAtts;) {
        if (!insts.attribute(i).name()
            .startsWith("Lag_" + m_fieldsToLag.get(z))) {
          // finished
          break;
        }

        // need to parse the lag number out of the name
        String lagNumS = insts.attribute(i).name()
            .replace("Lag_" + m_fieldsToLag.get(z) + "-", "");
        int lagNum = Integer.parseInt(lagNumS);
        int lastLagNum = lagNum;

        if (/* (i - firstLagIndex + 1) */lagNum > m_averageLagsAfter) {
          int attNumber = i + 1;
          removeLongLagIndexes += (i + 1) + ",";
          String avExpression = "(a" + attNumber;
          String avAttName = "Avg(" + insts.attribute(i).name();
          int denom = 1;
          // build the expression
          for (int j = 1; j < m_numConsecutiveToAverage; j++) {
            if ((i + j) < insts.numAttributes()
                && insts.attribute(i + j).name()
                    .startsWith("Lag_" + m_fieldsToLag.get(z))) {
              String currNumS = insts.attribute(i + j).name()
                  .replace("Lag_" + m_fieldsToLag.get(z) + "-", "");
              int currentLagNum = Integer.parseInt(currNumS);

              // only average consecutive long lags (so truncate
              // if there is a jump of more than 1
              if (currentLagNum - lastLagNum == 1) {
                avExpression += " + a" + (attNumber + j);
                avAttName += "," + insts.attribute(i + j).name();
                denom++;
                removeLongLagIndexes += (i + j + 1) + ",";
                lastLagNum = currentLagNum;
              } else {
                break;
              }
            } else {
              break;
            }
          }

          avExpression += ")/" + denom;
          avAttName += ")";
          AddExpression addE = new AddExpression();
          addE.setName(avAttName);
          addE.setExpression(avExpression);
          addE.setInputFormat(insts);
          insts = Filter.useFilter(insts, addE);
          m_averagedLagMakers.add(addE);

          i += denom;
        } else {
          i++;
        }
      }
    }

    if (removeLongLagIndexes.length() > 0) {
      removeLongLagIndexes = removeLongLagIndexes.substring(0,
          removeLongLagIndexes.lastIndexOf(','));
      Remove r = new Remove();
      r.setAttributeIndices(removeLongLagIndexes);
      r.setInputFormat(insts);
      insts = Filter.useFilter(insts, r);
      m_averagedLagMakers.add(r);
    }

    return insts;
  }

  private Instances createTimeIndexes(Instances insts) throws Exception {

    m_timeIndexMakers = null;
    if (m_timeStampName != null && m_timeStampName.length() > 0
        && m_adjustForTrends) {
      int timeStampIndex = insts.attribute(m_timeStampName).index();
      if (timeStampIndex < 0) {
        throw new Exception("Can't find time stamp attribute '"
            + m_timeStampName + "' in the data!");
      }
      String timeStampName = m_timeStampName;

      if (insts.attribute(timeStampIndex).isDate()) {
        // we'll use the remapped one
        timeStampIndex = insts.attribute(m_timeStampName + "-remapped").index();
        timeStampName += "-remapped";
      }

      if (!insts.attribute(timeStampIndex).isNumeric()) {
        throw new Exception("Time stamp attribute '" + m_timeStampName
            + "' is not numeric!");
      }

      /*
       * Instance first = insts.instance(insts.numInstances() - 1); Instance two
       * = insts.instance(insts.numInstances() - 2); m_deltaTime =
       * first.value(timeStampIndex) - two.value(timeStampIndex);
       */

      m_timeIndexMakers = new ArrayList<Filter>();
      AddExpression addE = new AddExpression();
      addE.setName(timeStampName + "^2");
      addE.setExpression("a" + (timeStampIndex + 1) + "^2");
      addE.setInputFormat(insts);
      insts = Filter.useFilter(insts, addE);
      m_timeIndexMakers.add(addE);

      addE = new AddExpression();
      addE.setName(timeStampName + "^3");
      addE.setExpression("a" + (timeStampIndex + 1) + "^3");
      addE.setInputFormat(insts);
      insts = Filter.useFilter(insts, addE);
      m_timeIndexMakers.add(addE);
    }

    return insts;
  }

  public Instances createTimeLagCrossProducts(Instances insts) throws Exception {
    m_timeLagCrossProductMakers = null;

    if (m_timeStampName == null || m_timeStampName.length() == 0
        || !m_adjustForTrends) {
      return insts;
    }

    int numAtts = insts.numAttributes();
    int firstLagIndex = -1;
    // locate the first lagged attribute
    for (int i = 0; i < numAtts; i++) {
      if (insts.attribute(i).name().startsWith("Lag_")) {
        firstLagIndex = i;
        break;
      }
    }

    if (firstLagIndex < 0) {
      m_timeLagCrossProductMakers = null;
      return insts;
    }

    int timeStampIndex = insts.attribute(m_timeStampName).index();
    if (timeStampIndex < 0) {
      return insts;
    }
    String timeStampName = m_timeStampName;

    if (insts.attribute(timeStampIndex).isDate()) {
      // use the remapped one
      timeStampIndex = insts.attribute(m_timeStampName + "-remapped").index();
      timeStampName += "-remapped";
    }

    m_timeLagCrossProductMakers = new ArrayList<Filter>();
    for (int i = firstLagIndex; i < insts.numAttributes(); i++) {
      if (!(insts.attribute(i).name().startsWith("Lag_") || insts.attribute(i)
          .name().startsWith("Avg("))) {
        break;
      }

      AddExpression addE = new AddExpression();
      addE.setName(timeStampName + "*" + insts.attribute(i).name());
      addE.setExpression("a" + (timeStampIndex + 1) + "*a" + (i + 1));
      addE.setInputFormat(insts);
      insts = Filter.useFilter(insts, addE);
      m_timeLagCrossProductMakers.add(addE);
    }

    return insts;
  }

  private Instances createVarianceAdjusters(Instances insts) throws Exception {
    if (!m_adjustForVariance) {
      return insts;
    }

    if (m_fieldsToLag == null || m_fieldsToLag.get(0).length() == 0) {
      throw new Exception("Fields to lag is not specified!");
    }

    m_varianceAdjusters = new ArrayList<Filter>();
    for (String field : m_fieldsToLag) {
      int index = insts.attribute(field).index();
      if (index < 0) {
        throw new Exception("Can't find field '" + field + "'!");
      }

      MathExpression mathE = new MathExpression();
      mathE.setIgnoreRange("" + (index + 1));
      mathE.setInvertSelection(true);
      mathE.setExpression("log(A)");
      mathE.setInputFormat(insts);
      insts = Filter.useFilter(insts, mathE);
      m_varianceAdjusters.add(mathE);
    }

    return insts;
  }

  // this is useful for reducing the scale of a date timestamp. Since dates
  // are stored internally in elapsed milliseconds, they are large numbers and
  // any model coefficient computed for the timestamp is likely to be extremely
  // small (appearing as 0 in output due to 4 decimal places precision).
  // Furthermore,
  // date timestamps with a periodicity of a month are not a constant number of
  // milliseconds in length from one month to the next - remapping corrects this
  protected Instances createDateTimestampRemap(Instances insts)
      throws Exception {
    Instances result = insts;

    if (m_adjustForTrends && !m_useArtificialTimeIndex
        && m_timeStampName != null && m_timeStampName.length() > 0) {
      if (result.attribute(m_timeStampName).isDate()) {
        int origIndex = result.attribute(m_timeStampName).index();

        // find first non-missing date and set as base
        GregorianCalendar c = new GregorianCalendar();
        for (int i = 0; i < result.numInstances(); i++) {
          if (!result.instance(i).isMissing(origIndex)) {
            if (m_dateBasedPeriodicity.getPeriodicity() == Periodicity.MONTHLY
                || m_dateBasedPeriodicity.getPeriodicity() == Periodicity.WEEKLY
                || m_dateBasedPeriodicity.getPeriodicity() == Periodicity.QUARTERLY) {
              Date d = new Date((long) result.instance(i).value(origIndex));
              c.setTime(d);
              m_dateTimeStampBase = c.get(Calendar.YEAR);
            } else {
              m_dateTimeStampBase = (long) result.instance(i).value(origIndex);
            }
            break;
          }
        }
        m_addDateMap = new Add();
        m_addDateMap.setAttributeName(m_timeStampName + "-remapped");
        m_addDateMap.setInputFormat(result);
        result = Filter.useFilter(result, m_addDateMap);

        Instance previous = result.instance(0);
        // now loop through and compute remapped date
        for (int i = 0; i < result.numInstances(); i++) {
          Instance current = result.instance(i);

          current = m_dateBasedPeriodicity.remapDateTimeStamp(current,
              previous, m_timeStampName);
          previous = current;
          /*
           * if (!current.isMissing(origIndex)) { if (m_dateBasedPeriodicity ==
           * Periodicity.MONTHLY || m_dateBasedPeriodicity == Periodicity.WEEKLY
           * || m_dateBasedPeriodicity == Periodicity.QUARTERLY) { Date d = new
           * Date((long)current.value(origIndex)); c.setTime(d); long year =
           * c.get(Calendar.YEAR); long month = c.get(Calendar.MONTH); long week
           * = c.get(Calendar.WEEK_OF_YEAR); long remapped = 0; if
           * (m_dateBasedPeriodicity == Periodicity.MONTHLY) { remapped = ((year
           * - m_dateTimeStampBase) * 12) + month; } else if
           * (m_dateBasedPeriodicity == Periodicity.WEEKLY) { remapped = ((year
           * - m_dateTimeStampBase) * 52) + week;
           *
           * // adjust for the case where week 1 of the year actually starts //
           * in the last week of December if (month == Calendar.DECEMBER && week
           * == 1) { remapped += 52; } } else if (m_dateBasedPeriodicity ==
           * Periodicity.QUARTERLY) { remapped = ((year - m_dateTimeStampBase) *
           * 4) + ((month / 3L) + 1L); }
           * current.setValue(current.numAttributes() - 1, (double)remapped); }
           * else { double remapped = current.value(origIndex) -
           * m_dateTimeStampBase; remapped /=
           * m_dateBasedPeriodicity.deltaTime();//m_deltaTime;
           * current.setValue(current.numAttributes() - 1, remapped); } }
           */
        }
      }
    }

    return result;
  }

  protected Instance remapDateTimeStamp(Instance inst) throws Exception {
    Instance result = inst;

    if (m_addDateMap != null) {
      m_addDateMap.input(result);
      result = m_addDateMap.output();

      result = m_dateBasedPeriodicity.remapDateTimeStamp(result, null,
          m_timeStampName);

      /*
       * int origIndex = result.dataset().attribute(m_timeStampName).index();
       * Calendar c = new GregorianCalendar();
       *
       * if (!result.isMissing(origIndex)) { if (m_dateBasedPeriodicity ==
       * Periodicity.MONTHLY || m_dateBasedPeriodicity == Periodicity.WEEKLY ||
       * m_dateBasedPeriodicity == Periodicity.QUARTERLY) { Date d = new
       * Date((long)result.value(origIndex)); c.setTime(d); long year =
       * c.get(Calendar.YEAR); long month = c.get(Calendar.MONTH); long week =
       * c.get(Calendar.WEEK_OF_YEAR); long remapped = 0; if
       * (m_dateBasedPeriodicity == Periodicity.MONTHLY) { remapped = ((year -
       * m_dateTimeStampBase) * 12) + month; } else if (m_dateBasedPeriodicity
       * == Periodicity.WEEKLY) { remapped = ((year - m_dateTimeStampBase) * 52)
       * + week;
       *
       * // adjust for the case where week 1 of the year actually starts // in
       * the last week of December if (month == Calendar.DECEMBER && week == 1)
       * { remapped += 52; } } else if (m_dateBasedPeriodicity ==
       * Periodicity.QUARTERLY) { remapped = ((year - m_dateTimeStampBase) * 4)
       * + ((month / 3L) + 1L); } result.setValue(result.numAttributes() - 1,
       * (double)remapped); } else { double remapped = result.value(origIndex) -
       * m_dateTimeStampBase; remapped /=
       * m_dateBasedPeriodicity.deltaTime();//m_deltaTime;
       * result.setValue(result.numAttributes() - 1, remapped); } }
       */
    }

    return result;
  }

  /**
   * Enum defining periodicity
   */
  public static enum Periodicity {
    UNKNOWN, HOURLY, DAILY, WEEKLY, MONTHLY, QUARTERLY, YEARLY;

    private double m_deltaTime;

    public double deltaTime() {
      return m_deltaTime;
    }

    public void setDeltaTime(double deltaTime) {
      m_deltaTime = deltaTime;
    }
  }

  /**
   * Helper class to manage time stamp manipulation with respect to various
   * periodicities. Has a routine to remap the time stamp, which is useful for
   * date time stamps. Since dates are just manipulated internally as the number
   * of milliseconds elapsed since the epoch, and any global trend modelling in
   * regression functions results in enormous coefficients for this variable -
   * remapping to a more reasonable scale prevents this. It also makes it easier
   * to handle the case where there are time periods that shouldn't be
   * considered as a time unit increment, e.g. weekends and public holidays for
   * financial trading data. These "holes" in the data can be accomodated by
   * accumulating a negative offset for the remapped date when a particular
   * data/time occurs in a user-specified "skip" list.
   *
   * @author Mark Hall (mhall{[at]}pentaho{[dot]}com)
   */
  public static class PeriodicityHandler implements Serializable {

    /**
     * For serialization
     */
    private static final long serialVersionUID = 6330232772323425050L;

    /** Periodicity of this handler */
    protected Periodicity m_handlerPeriodicity = Periodicity.UNKNOWN;

    /** Delta time between consecutive units */
    private double m_deltaTime;

    /** True if we are managing a date-based periodicity */
    private boolean m_isDateBased;

    /** first date time stamp seen in batch training */
    private long m_dateTimeStampInitialVal;

    /** last date time stamp value seen in batch training */
    private long m_dateTimeStampFinalVal;

    /**
     * = year of initial time stamp val for weekly, monthly or quarterly
     * periodicies, otherwise is the same as initial time stamp val
     **/
    private long m_dateTimeStampBaseVal;

    /** holds the date-based entries that should be 'skipped' */
    private List<Object> m_skipList;

    /**
     * any adjustment for remapped date values accumulated via time unit skips
     * that occur during the training data time frame
     */
    private long m_trainingRemapSkipAdjust = 0;

    /**
     * Set periodicity to manage
     *
     * @param p the periodicity to manage
     */
    public void setPeriodicity(Periodicity p) {
      m_handlerPeriodicity = p;
    }

    /**
     * Get periodicity being managed
     *
     * @return the periodicity being managed
     */
    public Periodicity getPeriodicity() {
      return m_handlerPeriodicity;
    }

    /**
     * Set a list of skip entries
     *
     * @param aList a comma separated list of date-based entries. May include
     *          strings such as 'sat' or 'june', specific dates (with optional
     *          format string) such as '2011-08-22@yyyy-MM-dd' or integers
     *          (which get interpreted differently depending on the periodicity)
     *
     * @param dateFormat a default date format to use for parsing dates
     * @throws Exception if an entry in the list is unparsable or unrecognized
     */
    public void setSkipList(String aList, String dateFormat) throws Exception {
      if (aList != null && aList.length() > 0) {
        // reset skip list and skip adjust
        m_skipList = new ArrayList<Object>();
        m_trainingRemapSkipAdjust = 0;

        String[] parts = aList.split(",");

        for (String p : parts) {
          p = p.trim();
          // try as day of week or month of the year first
          if (m_handlerPeriodicity == Periodicity.UNKNOWN
              || m_handlerPeriodicity == Periodicity.HOURLY
              || m_handlerPeriodicity == Periodicity.DAILY
              || m_handlerPeriodicity == Periodicity.MONTHLY) {
            if (p.equalsIgnoreCase("mon") || p.equalsIgnoreCase("monday")) {
              if (m_handlerPeriodicity != Periodicity.MONTHLY)
                m_skipList.add("mon");
              continue;
            } else if (p.equalsIgnoreCase("tue")
                || p.equalsIgnoreCase("tuesday")) {
              if (m_handlerPeriodicity != Periodicity.MONTHLY)
                m_skipList.add("tue");
              continue;
            } else if (p.equalsIgnoreCase("wed")
                || p.equalsIgnoreCase("wednesday")) {
              if (m_handlerPeriodicity != Periodicity.MONTHLY)
                m_skipList.add("wed");
              continue;
            } else if (p.equalsIgnoreCase("thu")
                || p.equalsIgnoreCase("thursday")) {
              if (m_handlerPeriodicity != Periodicity.MONTHLY)
                m_skipList.add("thu");
              continue;
            } else if (p.equalsIgnoreCase("fri")
                || p.equalsIgnoreCase("friday")) {
              if (m_handlerPeriodicity != Periodicity.MONTHLY)
                m_skipList.add("fri");
              continue;
            } else if (p.equalsIgnoreCase("sat")
                || p.equalsIgnoreCase("saturday")) {
              if (m_handlerPeriodicity != Periodicity.MONTHLY)
                m_skipList.add("sat");
              continue;
            } else if (p.equalsIgnoreCase("sun")
                || p.equalsIgnoreCase("sunday")) {
              if (m_handlerPeriodicity != Periodicity.MONTHLY)
                m_skipList.add("sun");
              continue;
            } else if (p.equalsIgnoreCase("weekend")) {
              if (m_handlerPeriodicity != Periodicity.MONTHLY) {
                m_skipList.add("sat");
                m_skipList.add("sun");
                continue;
              }
            } else if (p.equalsIgnoreCase("jan")
                || p.equalsIgnoreCase("january")) {
              m_skipList.add("jan");
              continue;
            } else if (p.equalsIgnoreCase("feb")
                || p.equalsIgnoreCase("february")) {
              m_skipList.add("feb");
              continue;
            } else if (p.equalsIgnoreCase("mar") || p.equalsIgnoreCase("march")) {
              m_skipList.add("mar");
              continue;
            } else if (p.equalsIgnoreCase("apr") || p.equalsIgnoreCase("april")) {
              m_skipList.add("apr");
              continue;
            } else if (p.equalsIgnoreCase("may")) {
              m_skipList.add("may");
              continue;
            } else if (p.equalsIgnoreCase("jun") || p.equalsIgnoreCase("june")) {
              m_skipList.add("jun");
              continue;
            } else if (p.equalsIgnoreCase("jul") || p.equalsIgnoreCase("july")) {
              m_skipList.add("jul");
              continue;
            } else if (p.equalsIgnoreCase("aug")
                || p.equalsIgnoreCase("august")) {
              m_skipList.add("aug");
              continue;
            } else if (p.equalsIgnoreCase("sep")
                || p.equalsIgnoreCase("september")) {
              m_skipList.add("sep");
              continue;
            } else if (p.equalsIgnoreCase("oct")
                || p.equalsIgnoreCase("october")) {
              m_skipList.add("oct");
              continue;
            } else if (p.equalsIgnoreCase("nov")
                || p.equalsIgnoreCase("november")) {
              m_skipList.add("nov");
              continue;
            } else if (p.equalsIgnoreCase("dec")
                || p.equalsIgnoreCase("december")) {
              m_skipList.add("dec");
              continue;
            }
          }

          // try as a number (no checking is done for numbers out of
          // range with respect to a given periodicity)
          try {
            int num = Integer.parseInt(p);
            m_skipList.add(new Integer(p));
            continue;
          } catch (NumberFormatException n) {
          }

          // last of all try as a specific date (if we have a date formatting
          // string)
          if (dateFormat != null && dateFormat.length() > 0) {
            // first check to see if there is a custom format attached to this
            // entry
            String datePart = p;
            if (p.indexOf('@') > 0) {
              String[] dateParts = p.split("@");
              datePart = dateParts[0];
              dateFormat = dateParts[1];
            }

            SimpleDateFormat sdf = new SimpleDateFormat();
            sdf.applyPattern(dateFormat);
            try {
              Date d = sdf.parse(datePart);
              m_skipList.add(d);
              continue;
            } catch (ParseException e) {
            }
          }

          throw new Exception("Unrecognized skip entry string : " + p);
        }
      }
    }

    /**
     * Get the delta time of the periodicity being managed
     *
     * @return the delta time
     */
    public double deltaTime() {
      return m_deltaTime;
    }

    /**
     * Set the delta time for the periodicity being managed
     *
     * @param deltaTime the delta time to use
     */
    public void setDeltaTime(double deltaTime) {
      m_deltaTime = deltaTime;
      m_handlerPeriodicity.setDeltaTime(m_deltaTime);
    }

    /**
     * Set the first date time stamp value in the batch training data
     *
     * @param tsbase the first date time stamp value in the batch training data
     *          as a long (num milliseconds since epoch)
     */
    public void setDateTimeStampInitial(long tsbase) {
      m_isDateBased = true;
      m_dateTimeStampInitialVal = tsbase;
      GregorianCalendar c = new GregorianCalendar();

      Date d = new Date(m_dateTimeStampInitialVal);
      c.setTime(d);

      if (m_handlerPeriodicity == Periodicity.MONTHLY
          || m_handlerPeriodicity == Periodicity.WEEKLY
          || m_handlerPeriodicity == Periodicity.QUARTERLY) {
        m_dateTimeStampBaseVal = c.get(Calendar.YEAR);
      } else {
        m_dateTimeStampBaseVal = m_dateTimeStampInitialVal;
      }
    }

    /**
     * Get the first date time stamp value in the batch training data
     *
     * @return the first date time stamp value in the batch training data
     * @throws Exception if the periodicity being managed is not date
     *           timestamp-based
     */
    public long getDateTimeStampInitial() throws Exception {
      if (!isDateBased()) {
        throw new Exception("This periodicity is not date timestamp-based");
      }
      return m_dateTimeStampInitialVal;
    }

    /**
     * Set the last date timestamp value in the batch training data
     *
     * @param tsfinal the last date timestamp value in the batch training data
     *          as a long (num milliseconds since the epoch).
     */
    public void setDateTimeStampFinal(long tsfinal) {
      m_isDateBased = true;
      m_dateTimeStampFinalVal = tsfinal;
    }

    /**
     * Get the last date timestamp value in the batch training data
     *
     * @return the last date timestamp value in the batch training data
     * @throws Exception if the periodicity being managed is not date
     *           timestamp-based
     */
    public long getDateTimeStampFinal() throws Exception {
      if (!isDateBased()) {
        throw new Exception("This periodicity is not date timestamp-based");
      }
      return m_dateTimeStampFinalVal;
    }

    /**
     * Set whether the periodicity being managed is date timestamp-based
     *
     * @param isDateBased true if the periodicity being managed is date
     *          timestamp-based
     */
    public void setIsDateBased(boolean isDateBased) {
      m_isDateBased = isDateBased;
    }

    /**
     * Returns true if the periodicity being managed is date timestamp-based
     *
     * @return true if the periodicity being managed is date timestamp-based
     */
    public boolean isDateBased() {
      return m_isDateBased;
    }

    /**
     * Checks to see if the supplied date is in the list of time units to skip
     * (i.e. should not be considered as a time increment).
     *
     * @param toCheck the date to check
     * @return true if the date is in the skip list
     */
    public boolean dateInSkipList(Date toCheck) {
      if (m_skipList == null || m_skipList.size() == 0) {
        return false;
      }

      GregorianCalendar c = new GregorianCalendar();
      c.setTime(toCheck);
      for (Object o : m_skipList) {
        if (o instanceof String) {
          if (o.toString().equals("mon")) {
            if (c.get(Calendar.DAY_OF_WEEK) == Calendar.MONDAY) {
              return true;
            }
          }
          if (o.toString().equals("tue")) {
            if (c.get(Calendar.DAY_OF_WEEK) == Calendar.TUESDAY) {
              return true;
            }
          }
          if (o.toString().equals("wed")) {
            if (c.get(Calendar.DAY_OF_WEEK) == Calendar.WEDNESDAY) {
              return true;
            }
          }
          if (o.toString().equals("thu")) {
            if (c.get(Calendar.DAY_OF_WEEK) == Calendar.THURSDAY) {
              return true;
            }
          }
          if (o.toString().equals("fri")) {
            if (c.get(Calendar.DAY_OF_WEEK) == Calendar.FRIDAY) {
              return true;
            }
          }
          if (o.toString().equals("sat") || o.toString().equals("weekend")) {
            if (c.get(Calendar.DAY_OF_WEEK) == Calendar.SATURDAY) {
              return true;
            }
          }
          if (o.toString().equals("sun") || o.toString().equals("weekend")) {
            if (c.get(Calendar.DAY_OF_WEEK) == Calendar.SUNDAY) {
              return true;
            }
          }
          if (o.toString().equals("jan")) {
            if (c.get(Calendar.MONTH) == Calendar.JANUARY) {
              return true;
            }
          }
          if (o.toString().equals("feb")) {
            if (c.get(Calendar.MONTH) == Calendar.FEBRUARY) {
              return true;
            }
          }
          if (o.toString().equals("mar")) {
            if (c.get(Calendar.MONTH) == Calendar.MARCH) {
              return true;
            }
          }
          if (o.toString().equals("apr")) {
            if (c.get(Calendar.MONTH) == Calendar.APRIL) {
              return true;
            }
          }
          if (o.toString().equals("may")) {
            if (c.get(Calendar.MONTH) == Calendar.MAY) {
              return true;
            }
          }
          if (o.toString().equals("jun")) {
            if (c.get(Calendar.MONTH) == Calendar.JUNE) {
              return true;
            }
          }
          if (o.toString().equals("jul")) {
            if (c.get(Calendar.MONTH) == Calendar.JULY) {
              return true;
            }
          }
          if (o.toString().equals("aug")) {
            if (c.get(Calendar.MONTH) == Calendar.AUGUST) {
              return true;
            }
          }
          if (o.toString().equals("sep")) {
            if (c.get(Calendar.MONTH) == Calendar.SEPTEMBER) {
              return true;
            }
          }
          if (o.toString().equals("oct")) {
            if (c.get(Calendar.MONTH) == Calendar.OCTOBER) {
              return true;
            }
          }
          if (o.toString().equals("nov")) {
            if (c.get(Calendar.MONTH) == Calendar.NOVEMBER) {
              return true;
            }
          }
          if (o.toString().equals("dec")) {
            if (c.get(Calendar.MONTH) == Calendar.DECEMBER) {
              return true;
            }
          }
        } else if (o instanceof Integer) {
          if (m_handlerPeriodicity == Periodicity.DAILY
              || m_handlerPeriodicity == Periodicity.UNKNOWN) {
            // assume value is day of year
            if (c.get(Calendar.DAY_OF_YEAR) == ((Integer) o).intValue()) {
              return true;
            }
          } else if (m_handlerPeriodicity == Periodicity.HOURLY) {
            // assume value is hour of day
            if (c.get(Calendar.HOUR_OF_DAY) == ((Integer) o).intValue()) {
              return true;
            }
          } else if (m_handlerPeriodicity == Periodicity.WEEKLY) {
            // assume value is week of year
            if (c.get(Calendar.WEEK_OF_YEAR) == ((Integer) o).intValue()) {
              return true;
            }
          } else if (m_handlerPeriodicity == Periodicity.MONTHLY) {
            // assume value is month of year
            if (c.get(Calendar.MONTH) == ((Integer) o).intValue()) {
              return true;
            }
          }
        } else if (o instanceof Date) {
          if (((Date) o).equals(toCheck)) {
            return true;
          }
        }
      }

      return false;
    }

    /**
     * Remaps a date timestamp to an integer starting (from the first time stamp
     * seen in the data) at 0. This is makes any coefficients produced by a
     * regression model for the timestamp (global trend modelling) of reasonable
     * scale. It is also useful for dealing with time units that shouldn't be
     * considered an increment as a negative adjustment can be accumulated for
     * these.
     *
     * @param inst the instance containing a date timestamp to be remapped
     * @param previous the immediately previous instance in the sequence (may be
     *          null).
     * @param timeStampName the name of the timestamp attribute
     * @return
     * @throws Exception if an error occurs
     */
    public Instance remapDateTimeStamp(Instance inst, Instance previous,
        String timeStampName) throws Exception {
      Instance result = inst;

      if (!isDateBased()) {
        throw new Exception("This periodicity is not date timestamp-based");
      }

      int origIndex = result.dataset().attribute(timeStampName).index();
      Calendar c = new GregorianCalendar();

      boolean applyTrainingSkipAdjust = true;
      long localSkipAdjust = 0;

      if (!result.isMissing(origIndex)) {

        Date d = new Date((long) result.value(origIndex));
        double origValue = result.value(origIndex);
        if (m_skipList != null && m_skipList.size() > 0 && previous != null) {
          // check this instance's date time stamp against the skip list -
          // our fundamental assumption (for the training data) is that these
          // dates
          // are not actually
          // present in the data (i.e. sat and sun for stock market data). If
          // they
          // are in the data (but with missing targets) then the missing value
          // interpolation routine will have filled them in, which is the wrong
          // thing to do if they are supposed to be skipped over
          if (dateInSkipList(d)) {
            throw new Exception(
                "This instance contains a date time stamp that is "
                    + "a member of the skip list - skip list entries are not time "
                    + "units with respect to the model and should not be present : "
                    + inst.toString());
          }

          if (!previous.isMissing(origIndex)) {
            if (result.value(origIndex) >= previous.value(origIndex)) {
              // compared to the previous date are we more than one time unit
              // ahead?
              double start = previous.value(origIndex);
              double end = origValue;
              while (start < end) {
                start = weka.classifiers.timeseries.core.Utils
                    .advanceSuppliedTimeValue(start, this);
                if (start < end) {
                  if (dateInSkipList(new Date((long) start))) {
                    m_trainingRemapSkipAdjust--;
                  } else {
                    // oh oh the difference between the current and previous
                    // instance
                    // is more than one time step but the intervening step(s)
                    // are
                    // not in the skip list!
                    throw new Exception("There is an increment of more than "
                        + "one time step between\n" + previous.toString()
                        + "\nand\n" + inst.toString() + "\n but none of the "
                        + "intervening time steps are in the " + "skip list.");
                  }
                }
              }
            } else {
              // we have a problem here - data is not sorted in ascending order
              // of the date time stamp!
              throw new Exception(
                  "The data does not seem to be sorted in ascending order "
                      + "of the date time stamp!");
            }
          }
        }

        if (m_skipList != null && m_skipList.size() > 0 && previous == null) {
          // this case indicates that we are being invoked in a
          // priming/forecasting context

          // check that this instance does not occur before the first training
          // instance!!
          if (origValue < m_dateTimeStampInitialVal) {
            throw new Exception(
                "The timestamp for this instance occurs before the "
                    + "timestamp of the first training instance!");
          }
          // can't prime/forecast for values that occurred before the training
          // data.

          double end = result.value(origIndex);
          // first advance end until it is not in the skip list (this won't
          // be needed for priming instances that are within the training
          // date range), but might occur for closed-loop forecasting when
          // the date is advanced one time unit for each step
          while (dateInSkipList(new Date((long) end))) {
            end = weka.classifiers.timeseries.core.Utils
                .advanceSuppliedTimeValue(end, this);
          }

          double start = 0;
          if (end < m_dateTimeStampFinalVal) {
            // priming/forecasting within the range of the training data -
            // will have to recompute all skips from the initial training
            // time stamp up to this instance and not apply the pre-computed
            // skip total for the full training period
            applyTrainingSkipAdjust = false;
            start = m_dateTimeStampInitialVal;
          } else {
            // priming/forecasting beyond the last training date time stamp seen
            start = m_dateTimeStampFinalVal;
          }

          // now compute local skip adjust from start up to end
          while (start < end) {
            start = weka.classifiers.timeseries.core.Utils
                .advanceSuppliedTimeValue(start, this);
            if (start < end) {
              if (dateInSkipList(new Date((long) start))) {
                localSkipAdjust--;
              }
            }
          }
          // set end as the current value
          d = new Date((long) end);
          origValue = end;
        }

        if (m_handlerPeriodicity == Periodicity.MONTHLY
            || m_handlerPeriodicity == Periodicity.WEEKLY
            || m_handlerPeriodicity == Periodicity.QUARTERLY) {
          c.setTime(d);
          long year = c.get(Calendar.YEAR);
          long month = c.get(Calendar.MONTH);
          long week = c.get(Calendar.WEEK_OF_YEAR);
          long remapped = 0;
          if (m_handlerPeriodicity == Periodicity.MONTHLY) {
            remapped = ((year - m_dateTimeStampBaseVal) * 12) + month;
          } else if (m_handlerPeriodicity == Periodicity.WEEKLY) {
            remapped = ((year - m_dateTimeStampBaseVal) * 52) + week;

            // adjust for the case where week 1 of the year actually starts
            // in the last week of December
            if (month == Calendar.DECEMBER && week == 1) {
              remapped += 52;
            }
          } else if (m_handlerPeriodicity == Periodicity.QUARTERLY) {
            remapped = ((year - m_dateTimeStampBaseVal) * 4)
                + ((month / 3L) + 1L);
          }

          if (m_skipList != null && m_skipList.size() > 0) {
            remapped += (applyTrainingSkipAdjust) ? m_trainingRemapSkipAdjust
                : 0;
            remapped += localSkipAdjust;
          }

          result.setValue(result.numAttributes() - 1, remapped);
        } else {
          double remapped = origValue - m_dateTimeStampInitialVal;
          remapped /= deltaTime();// m_deltaTime;

          // it might (or might not) make sense to take the floor here. For
          // daily data
          // I have the feeling that data arithmetic (adding 1 to day of the
          // year)
          // may actually add slightly more than
          // a day at certain times (to account for) leap seconds/years
          // remapped = Math.floor(remapped);
          if (m_skipList != null && m_skipList.size() > 0) {
            remapped += (applyTrainingSkipAdjust) ? m_trainingRemapSkipAdjust
                : 0;
            remapped += localSkipAdjust;
          }
          result.setValue(result.numAttributes() - 1, remapped);
        }
      }
      return result;
    }
  }

  /**
   * Utility method that uses heuristics to identify the periodicity of the data
   * with respect to a time stamp. If the time stamp is not a date then the
   * periodicity is UNKNOWN with a delta set by computing the average difference
   * between consecutive time stamp values. Configures the periodicity with
   * first and last time stamp entries in the data.
   *
   * @param insts the instances to determine the periodicity from
   * @param timeName the name of the time stamp attribute
   * @param userHint a specific periodicity to defer to. The user should provide
   *          a specific periodicity when the data has non-constant differences
   *          in time between consecutive elements and a skip list will be used
   *          to correct for this. Specifying UNKNOWN as the periodicity here
   *          will result in the heuristic detection routine being applied.
   * @return the configured Periodicity of the data.
   */
  public static PeriodicityHandler determinePeriodicity(Instances insts,
      String timeName, Periodicity userHint) {

    double fiveMins = 300000D;
    double oneHour = 3600000D;
    double oneDay = oneHour * 24D;
    double oneWeek = oneDay * 7D;
    double thirtyDays = oneHour * 24D * 30D;
    double approxQuarter = thirtyDays * 3D;
    double oneYear = oneDay * 365D;

    double averageDelta = Utils.missingValue();
    int timeIndex = insts.attribute(timeName).index();
    PeriodicityHandler result = new PeriodicityHandler();

    if (timeIndex < 0) {
      result.setPeriodicity(Periodicity.UNKNOWN);
      result.setDeltaTime(Utils.missingValue());
      return result;
    }

    if (userHint != Periodicity.UNKNOWN && insts.attribute(timeIndex).isDate()) {
      // trust the user's indication
      result.setPeriodicity(userHint);
      switch (userHint) {
      case HOURLY:
        result.setDeltaTime(oneHour);
        break;
      case DAILY:
        result.setDeltaTime(oneDay);
        break;
      case WEEKLY:
        result.setDeltaTime(oneWeek);
        break;
      case YEARLY:
        result.setDeltaTime(oneYear);
        break;

      // others don't matter as date arithmetic is used
      }

      long initialTS = (long) insts.instance(0).value(timeIndex);
      long finalTS = (long) insts.instance(insts.numInstances() - 1).value(
          timeIndex);
      result.setDateTimeStampInitial(initialTS);
      result.setDateTimeStampFinal(finalTS);
      return result;
    }

    List<Double> deltas = new ArrayList<Double>();
    for (int i = 1; i < insts.numInstances(); i++) {
      if (!insts.instance(i).isMissing(timeIndex)
          && !insts.instance(i - 1).isMissing(timeIndex)) {
        deltas.add(new Double(insts.instance(i).value(timeIndex)
            - insts.instance(i - 1).value(timeIndex)));
      }
    }

    double previousDelta = -1;
    double deltaSum = 0;
    for (int i = 0; i < deltas.size(); i++) {
      if (i == 0) {
        previousDelta = deltas.get(i);
        deltaSum += previousDelta;
      } else {
        double currentDelta = deltas.get(i);
        if (currentDelta - previousDelta != 0) {
          // nonConstant = true;
        }
        previousDelta = currentDelta;
        deltaSum += currentDelta;
      }
    }
    averageDelta = deltaSum /= deltas.size();

    if (insts.attribute(timeIndex).isDate()) {
      long initialTS = (long) insts.instance(0).value(timeIndex);
      long finalTS = (long) insts.instance(insts.numInstances() - 1).value(
          timeIndex);

      // allow +-5mins for hourly
      if (Math.abs(oneHour - averageDelta) <= fiveMins) {
        result.setPeriodicity(Periodicity.HOURLY);
        result.setDeltaTime(oneHour);
        result.setDateTimeStampInitial(initialTS);
        result.setDateTimeStampFinal(finalTS);
        return result;
      }

      // allow += 1 hour for daily
      if (Math.abs(oneDay - averageDelta) <= oneHour) {
        result.setPeriodicity(Periodicity.DAILY);
        result.setDeltaTime(oneDay);
        result.setDateTimeStampInitial(initialTS);
        result.setDateTimeStampFinal(finalTS);
        return result;
      }

      // allow +- 6 hours for weekly
      if (Math.abs(oneWeek - averageDelta) <= (oneDay / 4.0)) {
        result.setPeriodicity(Periodicity.WEEKLY);
        result.setDeltaTime(oneWeek);
        result.setDateTimeStampInitial(initialTS);
        result.setDateTimeStampFinal(finalTS);
        return result;
      }

      // allow +- 3 days for monthly
      if (Math.abs(thirtyDays - averageDelta) <= (oneDay * 3.0)) {
        result.setPeriodicity(Periodicity.MONTHLY);
        result.setDeltaTime(thirtyDays);
        result.setDateTimeStampInitial(initialTS);
        result.setDateTimeStampFinal(finalTS);
        return result;
      }

      // allow +- 1 week for quarterly
      if (Math.abs(approxQuarter - averageDelta) <= oneWeek) {
        result.setPeriodicity(Periodicity.QUARTERLY);
        result.setDeltaTime(approxQuarter);
        result.setDateTimeStampInitial(initialTS);
        result.setDateTimeStampFinal(finalTS);
        return result;
      }

      // allow +- 2 days for yearly
      if (Math.abs(oneYear - averageDelta) <= (oneDay * 2.0)) {
        result.setPeriodicity(Periodicity.YEARLY);
        result.setDeltaTime(oneYear);
        result.setDateTimeStampInitial(initialTS);
        result.setDateTimeStampFinal(finalTS);
        return result;
      }

      // otherwise UNKNOWN but date-based
      result.setPeriodicity(Periodicity.UNKNOWN);
      result.setIsDateBased(true);
      result.setDeltaTime(averageDelta);
      result.setDateTimeStampInitial(initialTS);
      result.setDateTimeStampFinal(finalTS);
      return result;
    }

    // default for non-date-based time stamps
    result.setPeriodicity(Periodicity.UNKNOWN);
    result.setIsDateBased(false);
    result.setDeltaTime(averageDelta);
    return result;
  }

  protected Instances setupDerivedPeriodics(Instances insts) throws Exception {
    Instances result = insts;

    if (m_adjustForTrends && !m_useArtificialTimeIndex) {

      m_dateBasedPeriodicity = determinePeriodicity(insts, m_timeStampName,
          m_userHintPeriodicity);
      if (m_skipEntries != null && m_skipEntries.length() > 0) {
        m_dateBasedPeriodicity.setSkipList(m_skipEntries, m_dateFormat);
      }

      // int timeIndex = insts.attribute(m_timeStampName).index();

      // m_deltaTime = m_dateBasedPeriodicity.deltaTime();
      /*
       * if (m_dateBasedPeriodicity == Periodicity.MONTHLY) {
       * m_advanceTimeStampByMonth = true; }
       */

      if (insts.attribute(m_timeStampName).isDate()) {
        m_derivedPeriodicMakers = new ArrayList<Filter>();
        // now add filters for each requested derived periodic value
        if (m_am) {
          // numeric binary
          Add a = new Add();
          a.setAttributeName("AM");
          a.setInputFormat(insts);
          result = Filter.useFilter(result, a);
          m_derivedPeriodicMakers.add(a);
        }

        if (m_dayOfWeek) {
          // nominal
          Add a = new Add();
          a.setAttributeName("DayOfWeek");
          a.setNominalLabels("sun,mon,tue,wed,thu,fri,sat");
          a.setInputFormat(result);
          result = Filter.useFilter(result, a);
          m_derivedPeriodicMakers.add(a);
        }

        if (m_dayOfMonth) {
          // nominal
          Add a = new Add();
          a.setAttributeName("DayOfMonth");
          a.setNominalLabels("1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,"
              + "20,21,22,23,24,25,26,27,28,29,30,31");
          a.setInputFormat(result);
          result = Filter.useFilter(result, a);
          m_derivedPeriodicMakers.add(a);
        }

        if (m_numDaysInMonth) {
          Add a = new Add();
          a.setAttributeName("NumDaysInMonth");
          a.setInputFormat(insts);
          result = Filter.useFilter(result, a);
          m_derivedPeriodicMakers.add(a);
        }

        if (m_weekend) {
          // numeric binary
          Add a = new Add();
          a.setAttributeName("Weekend");
          a.setInputFormat(result);
          result = Filter.useFilter(result, a);
          m_derivedPeriodicMakers.add(a);
        }

        if (m_monthOfYear) {
          // nominal
          Add a = new Add();
          a.setAttributeName("Month");
          a.setNominalLabels("jan,feb,mar,apr,may,jun,jul,aug,sep,oct,nov,dec");
          a.setInputFormat(result);
          result = Filter.useFilter(result, a);
          m_derivedPeriodicMakers.add(a);
        }

        if (m_quarter) {
          // nominal
          Add a = new Add();
          a.setAttributeName("Quarter");
          a.setNominalLabels("Q1,Q2,Q3,Q4");
          a.setInputFormat(result);
          result = Filter.useFilter(result, a);
          m_derivedPeriodicMakers.add(a);
        }

        // any custom date-derived periodics?
        if (m_customPeriodics != null) {
          for (String name : m_customPeriodics.keySet()) {
            List<CustomPeriodicTest> l = m_customPeriodics.get(name);
            // check to see if we should create a multi-label nominal
            // attribute instead of a numeric binary attribute. If all
            // test intervals have a non-null label then attribute will
            // be nominal
            boolean binary = false;
            String labels = "";
            Set<String> uniqueLabels = new HashSet<String>();
            for (CustomPeriodicTest t : l) {
              if (t.getLabel() == null || t.getLabel().length() == 0) {
                binary = true;
                break;
              } else {
                if (uniqueLabels.add(t.getLabel())) {
                  labels += t.getLabel() + ",";
                }
              }
            }

            Add a = new Add();
            a.setAttributeName("c_" + name);
            if (!binary) {
              labels = labels.substring(0, labels.lastIndexOf(','));
              a.setAttributeType(new SelectedTag("NOM", Add.TAGS_TYPE));
              a.setNominalLabels(labels);
            }

            a.setInputFormat(result);
            result = Filter.useFilter(result, a);
            m_derivedPeriodicMakers.add(a);
          }
        }

        // set the values for each instance in the data
        for (int i = 0; i < result.numInstances(); i++) {
          setDerivedPeriodicValues(result.instance(i));
        }
      }
    }

    return result;
  }

  protected void setDerivedPeriodicValues(Instance inst) {
    if (m_adjustForTrends && !m_useArtificialTimeIndex) {
      if (inst.dataset().attribute(m_timeStampName).isDate()) {
        int timeIndex = inst.dataset().attribute(m_timeStampName).index();
        long time = (inst.isMissing(timeIndex)) ? -1 : (long) inst
            .value(timeIndex);
        Date instDate = null;
        GregorianCalendar cal = new GregorianCalendar();
        if (time != -1) {
          instDate = new Date(time);
          cal.setTime(instDate);
        }

        if (m_am) {
          if (instDate == null) {
            inst.setMissing(inst.dataset().attribute("AM"));
          } else {
            if (cal.get(Calendar.AM_PM) == Calendar.AM) {
              inst.setValue(inst.dataset().attribute("AM"), 1);
            } else {
              inst.setValue(inst.dataset().attribute("AM"), 0);
            }
          }
        }

        if (m_dayOfWeek || m_weekend) {
          if (instDate == null) {
            if (m_dayOfWeek) {
              inst.setMissing(inst.dataset().attribute("DayOfWeek"));
            }
            if (m_weekend) {
              inst.setMissing(inst.dataset().attribute("Weekend"));
            }
          } else {
            int dow = cal.get(Calendar.DAY_OF_WEEK);
            String day = "";
            switch (dow) {
            case Calendar.SUNDAY:
              day = "sun";
              break;
            case Calendar.MONDAY:
              day = "mon";
              break;
            case Calendar.TUESDAY:
              day = "tue";
              break;
            case Calendar.WEDNESDAY:
              day = "wed";
              break;
            case Calendar.THURSDAY:
              day = "thu";
              break;
            case Calendar.FRIDAY:
              day = "fri";
              break;
            case Calendar.SATURDAY:
              day = "sat";
              break;
            }

            if (day.length() > 0) {
              if (m_dayOfWeek) {
                inst.setValue(inst.dataset().attribute("DayOfWeek"), day);
              }

              if (m_weekend) {
                if (day.equals("sat") || day.equals("sun")) {
                  inst.setValue(inst.dataset().attribute("Weekend"), 1);
                } else {
                  inst.setValue(inst.dataset().attribute("Weekend"), 0);
                }
              }
            } else {
              if (m_dayOfWeek) {
                inst.setMissing(inst.dataset().attribute("DayOfWeek"));
              }

              if (m_weekend) {
                inst.setMissing(inst.dataset().attribute("Weekend"));
              }
            }
          }
        }

        if (m_dayOfMonth) {
          if (instDate == null) {
            inst.setMissing(inst.dataset().attribute("DayOfWeek"));
          } else {
            int dom = cal.get(Calendar.DAY_OF_MONTH);
            inst.setValue(inst.dataset().attribute("DayOfMonth"), (dom - 1));
          }
        }

        if (m_numDaysInMonth) {
          if (instDate == null) {
            inst.setMissing(inst.dataset().attribute("NumDaysInMonth"));
          } else {
            boolean isLeap = cal.isLeapYear(cal.get(Calendar.YEAR));
            int daysInMonth = 0;
            int month = cal.get(Calendar.MONTH);
            if (month == Calendar.FEBRUARY) {
              daysInMonth = 28;
              if (isLeap) {
                daysInMonth++;
              }
            } else if (month == Calendar.APRIL || month == Calendar.JUNE
                || month == Calendar.SEPTEMBER || month == Calendar.NOVEMBER) {
              daysInMonth = 30;
            } else {
              daysInMonth = 31;
            }

            inst.setValue(inst.dataset().attribute("NumDaysInMonth"),
                daysInMonth);
          }
        }

        if (m_monthOfYear || m_quarter) {
          if (instDate == null) {
            if (m_monthOfYear) {
              inst.setMissing(inst.dataset().attribute("Month"));
            }

            if (m_quarter) {
              inst.setMissing(inst.dataset().attribute("Quarter"));
            }
          } else {
            int moy = cal.get(Calendar.MONTH);
            if (m_monthOfYear) {
              String month = inst.dataset().attribute("Month").value(moy);
              inst.setValue(inst.dataset().attribute("Month"), month);
            }

            if (m_quarter) {
              String quarter = "";
              if (moy == 0 || moy == 1 || moy == 2) {
                quarter = "Q1";
              } else if (moy == 3 || moy == 4 || moy == 5) {
                quarter = "Q2";
              } else if (moy == 6 || moy == 7 || moy == 8) {
                quarter = "Q3";
              } else {
                quarter = "Q4";
              }

              inst.setValue(inst.dataset().attribute("Quarter"), quarter);
            }
          }
        }

        if (m_customPeriodics != null) {
          for (String name : m_customPeriodics.keySet()) {
            Attribute att = inst.dataset().attribute("c_" + name);
            if (att != null) {
              if (instDate == null) {
                inst.setMissing(att);
              } else {
                // evaluate for this periodic
                List<CustomPeriodicTest> l = m_customPeriodics.get(name);
                boolean result = false;
                String label = null;
                for (CustomPeriodicTest t : l) {
                  result = (result || t.evaluate(instDate));

                  // match?
                  if (result) {
                    label = t.getLabel();
                    break;
                  } else {
                    label = null;
                  }
                }

                if (result) {
                  if (att.isNominal()) {
                    if (label == null) {
                      // inst.setMissing(att);
                      System.err.println("This shouldn't happen!!");
                    } else {
                      inst.setValue(att, att.indexOfValue(label));
                    }
                  } else {
                    // numeric binary attribute
                    inst.setValue(att, 1);
                  }
                } else {
                  if (att.isNominal()) {
                    inst.setMissing(att);
                  } else {
                    inst.setValue(att, 0);
                  }
                }
              }
            } else {
              System.err.println("WARNING: custom periodic att c_" + name
                  + " not found in instances!");
            }
          }
        }
      }
    }
  }

  protected void setupPeriodicMaps(Instances insts) {
    m_primaryPeriodicSequence = null;
    m_secondaryPeriodicLookups = null;

    if (m_primaryPeriodicName != null && m_primaryPeriodicName.length() > 0) {
      int primaryIndex = insts.attribute(m_primaryPeriodicName).index();

      if (primaryIndex < 0) {
        return;
      }

      m_primaryPeriodicSequence = new HashMap<String, String>();
      for (int i = 0; i < insts.numInstances() - 1; i++) {
        Instance current = insts.instance(i);
        Instance next = insts.instance(i + 1);
        if (!Utils.isMissingValue(current.value(primaryIndex))
            && !Utils.isMissingValue(next.value(primaryIndex))) {
          String key = current.stringValue(primaryIndex);
          String value = next.stringValue(primaryIndex);
          if (m_primaryPeriodicSequence.get(key) == null) {
            m_primaryPeriodicSequence.put(key, value);
          } else {
            // check to see if this value is consistent with
            // what we've seen previously
            String previous = m_primaryPeriodicSequence.get(key);
            if (!previous.equals(value)) {
              // we don't have a consistent sequence, so can't
              // use this as the main periodic sequence
              m_primaryPeriodicSequence = null;
              break;
            }
          }
        }
      }

      if (m_primaryPeriodicSequence != null) {
        // now look for any other nominal attributes that
        // might be secondary periodic sequences at a higher
        // granularity than the primary sequence
        m_secondaryPeriodicLookups = new HashMap<Attribute, Map<String, String>>();

        for (int i = 0; i < insts.numAttributes(); i++) {
          if (insts.attribute(i).isNominal() && i != primaryIndex) {
            Attribute candidate = insts.attribute(i);
            Map<String, String> candidateMap = new HashMap<String, String>();
            for (int j = 0; j < insts.numInstances(); j++) {
              Instance current = insts.instance(j);
              if (!Utils.isMissingValue(current.value(primaryIndex))
                  && !Utils.isMissingValue(j)) {
                String key = current.stringValue(primaryIndex);
                String value = current.stringValue(j);

                if (candidateMap.get(key) == null) {
                  candidateMap.put(key, value);
                } else {
                  // check to see if this value is consistent with what
                  // we've seen previously
                  String previous = candidateMap.get(key);
                  if (!previous.equals(value)) {
                    // we need one unique value of the secondary to occur
                    // in conjunction for each primary (e.g. months of the year
                    // and quarters - each month is associated with only one
                    // quarter
                    // of the year)
                    candidateMap = null;
                    break;
                  }
                }
              }
            }

            if (candidateMap != null) {
              m_secondaryPeriodicLookups.put(candidate, candidateMap);
            }
          }
        }
      }
    }
  }

  private void setPeriodicValues(Instance inst) throws Exception {
    if (m_primaryPeriodicName != null && m_primaryPeriodicName.length() > 0) {
      int primaryIndex = m_originalHeader.attribute(m_primaryPeriodicName)
          .index();

      if (primaryIndex < 0) {
        throw new Exception(
            "Can't find the primary periodic variable in the data!");
      }

      // determine the next value in the sequence
      double lastPeriodicIndex = m_lastHistoricInstance.value(primaryIndex);
      if (!Utils.isMissingValue(lastPeriodicIndex)) {
        String lastPeriodicValue = m_lastHistoricInstance
            .stringValue(primaryIndex);
        String successor = m_primaryPeriodicSequence.get(lastPeriodicValue);
        if (successor != null) {
          // newVals[primaryIndex] =
          // m_originalHeader.attribute(primaryIndex).indexOfValue(successor);
          inst.setValue(primaryIndex, m_originalHeader.attribute(primaryIndex)
              .indexOfValue(successor));

          // now we can look for secondary periodic attributes
          if (m_secondaryPeriodicLookups != null) {
            for (int i = 0; i < m_originalHeader.numAttributes(); i++) {
              Attribute current = m_originalHeader.attribute(i);
              Map<String, String> correspondingL = m_secondaryPeriodicLookups
                  .get(current);
              if (correspondingL != null) {
                String correspondingV = correspondingL.get(successor);
                if (correspondingV != null) {
                  // newVals[i] =
                  // m_originalHeader.attribute(i).indexOfValue(correspondingV);
                  inst.setValue(i,
                      m_originalHeader.attribute(i)
                          .indexOfValue(correspondingV));
                } else {
                  // Set a missing value
                  // newVals[i] = Utils.missingValue();
                  inst.setMissing(i);
                }
              }
            }
          }
        } else {
          // TODO
          // We can either set a missing value here if we don't have a successor
          // in the map
          // or we can look at the order that the values are declared in the
          // header for
          // the primary periodic sequence and assume that this order is
          // correct.
          // newVals[primaryIndex] = Utils.missingValue();
          inst.setMissing(primaryIndex);

        }
      } else {
        // newVals[primaryIndex] = Utils.missingValue();
        inst.setMissing(primaryIndex);
      }
    }
  }

  protected Instances removeExtraneousAttributes(Instances insts)
      throws Exception {
    int primaryIndex = -1;
    String removeList = "";

    if (m_primaryPeriodicName != null && m_primaryPeriodicName.length() > 0) {
      primaryIndex = insts.attribute(m_primaryPeriodicName).index();
    }

    for (int i = 0; i < insts.numAttributes(); i++) {
      if (i == primaryIndex) {
        continue;
      }

      if (m_secondaryPeriodicLookups != null) {
        if (m_secondaryPeriodicLookups.containsKey(insts.attribute(i))) {
          continue;
        }
      }

      boolean target = false;
      for (String s : m_fieldsToLag) {
        if (insts.attribute(i).name().equals(s)) {
          target = true;
          break;
        }
      }

      if (target) {
        continue;
      }

      if (m_overlayFields != null) {
        boolean overlay = false;
        for (String s : m_overlayFields) {
          if (insts.attribute(i).name().equals(s)) {
            overlay = true;
            break;
          }
        }

        if (overlay) {
          continue;
        }
      }

      if (m_adjustForTrends && m_timeStampName != null
          && m_timeStampName.length() > 0) {
        if (i == insts.attribute(m_timeStampName).index()) {
          continue;
        }
      }

      // otherwise, this is some attribute that we are not predicting and
      // wont be able to determine the value for when forecasting future
      // instances. So we can't let the model use it.
      removeList += "" + (i + 1) + ",";
    }

    if (removeList.length() > 0) {
      removeList = removeList.substring(0, removeList.lastIndexOf(','));
      m_extraneousAttributeRemover = new Remove();
      m_extraneousAttributeRemover.setAttributeIndices(removeList);
      m_extraneousAttributeRemover.setInputFormat(insts);
      insts = Filter.useFilter(insts, m_extraneousAttributeRemover);
    }

    return insts;
  }

  /**
   * Creates a transformed data set based on the user's settings
   *
   * @param insts the instances to transform
   * @return a transformed data set
   * @throws Exception if a problem occurs during the creation of lagged and
   *           auxiliary attributes.
   */
  public Instances getTransformedData(Instances insts) throws Exception {
    m_originalHeader = new Instances(insts, 0);
    Instances result = insts;
    m_lastHistoricInstance = result.instance(result.numInstances() - 1);

    setupPeriodicMaps(result);
    result = removeExtraneousAttributes(insts);

    // m_lastArtificialTimeValue = -1;
    m_lastTimeValue = -1;

    if (m_adjustForTrends
        && (m_timeStampName == null || m_timeStampName.length() == 0 || insts
            .attribute(m_timeStampName) == null)) {
      // add an artificial time index. This will be problematic when
      // using the built model to forecast for future time points that do
      // not occur immediately after the last training event. Since the time
      // index is artificial, all we can do for future predictions is assume
      // that the n instances provided to the primeForecaster() method overlap
      // the last n instances of the training data and that future predictions
      // occur from the last known artificial time value + 1.
      m_artificialTimeMaker = new AddID();
      m_artificialTimeMaker.setAttributeName("ArtificialTimeIndex");
      m_artificialTimeMaker.setIDIndex("last");
      m_artificialTimeMaker.setInputFormat(result);
      result = Filter.useFilter(result, m_artificialTimeMaker);
      m_useArtificialTimeIndex = true;
      m_timeStampName = "ArtificialTimeIndex";
      /*
       * m_lastArtificialTimeValue = result.instance(result.numInstances() -
       * 1).value(result.numAttributes() - 1);
       */
      /*
       * m_lastTimeValue = result.instance(result.numInstances() -
       * 1).value(result.numAttributes() - 1);
       */

    } else {
      m_useArtificialTimeIndex = false;
    }

    if (m_adjustForTrends) {
      int timeStampIndex = result.attribute(m_timeStampName).index();

      m_lastTimeValue = result.instance(result.numInstances() - 1).value(
          timeStampIndex);
      Instance last = result.instance(result.numInstances() - 1);
      Instance secondToLast = result.instance(result.numInstances() - 2);
      /*
       * m_deltaTime = last.value(timeStampIndex) -
       * secondToLast.value(timeStampIndex);
       */

      result = setupDerivedPeriodics(result);

      // remap timestamp if it is a date
      result = createDateTimestampRemap(result);
    }

    result = createVarianceAdjusters(result);

    result = createLags(result);
    result = createAveragedLags(result);
    result = createTimeIndexes(result);
    if (m_includeTimeLagCrossProducts) {
      result = createTimeLagCrossProducts(result);
    }

    // remove all instances with missing values at the
    // start of the series?
    if (m_deleteMissingFromStartOfSeries) {
      int start = 0;
      for (int i = 0; i <= m_maxLag; i++) {
        boolean ok = true;
        for (int j = 0; j < result.numAttributes(); j++) {
          if (result.instance(i).isMissing(j)) {
            ok = false;
            break;
          }
        }
        if (!ok) {
          start++;
        } else {
          break;
        }
      }

      System.err.println("******** Discarding " + start
          + " instances from the start.");
      result = new Instances(result, start, result.numInstances() - start);
    }
    // System.err.println(result);
    return result;
  }

  public Instance processInstance(Instance source, boolean incrementTime,
      boolean setAnyPeriodic) throws Exception {
    return processInstance(source, incrementTime, setAnyPeriodic, false);
  }

  public Instance processInstancePreview(Instance source,
      boolean incrementTime, boolean setAnyPeriodic) throws Exception {
    return processInstance(source, incrementTime, setAnyPeriodic, true);
  }

  /**
   * Process an instance in the original format and produce a transformed
   * instance as output. Assumes that the lag maker has been configured an
   * initialized with a call to getTransformedDataset()
   *
   * @param source an instance in original format
   * @param incrementTime true if any time stamp value should be incremented
   *          based on the time stamp value from the last instance seen and set
   *          in the outputted instance
   * @param setAnyPeriodic true if any user-specified periodic value should be
   *          set in the transformed instance based on the value from the last
   *          instance seen.
   * @return a transformed instance
   * @throws Exception if something goes wrong.
   */
  public Instance processInstance(Instance source, boolean incrementTime,
      boolean setAnyPeriodic, boolean temporary) throws Exception {

    String message = null;
    if ((message = source.dataset().equalHeadersMsg(m_originalHeader)) != null) {
      throw new Exception("[TSLagMaker] cannot process instance because the "
          + "structure\ndiffers from what we were configured with:\n\n"
          + message);
    }

    Instance result = source;

    if (setAnyPeriodic) {
      setPeriodicValues(result);
    }

    m_lastHistoricInstance = new DenseInstance(result);
    m_lastHistoricInstance.setDataset(result.dataset());

    if (m_extraneousAttributeRemover != null) {
      m_extraneousAttributeRemover.input(result);
      result = m_extraneousAttributeRemover.output();
    }

    if (m_artificialTimeMaker != null) {
      m_artificialTimeMaker.input(result);
      result = m_artificialTimeMaker.output();

      // set the correct value here - it can't be done after the fact because
      // of other filters that create the product of time and something else.
      if (incrementTime) {
        double newTime = m_lastTimeValue + 1;
        int timeIndex = result.dataset().attribute(m_timeStampName).index();
        result.setValue(timeIndex, newTime);
        m_lastTimeValue = newTime;
      }
    } else {
      // if we have a genuine time stamp field then make sure
      // that we keep track of the most recent time value
      if (m_adjustForTrends) {
        int timeIndex = result.dataset().attribute(m_timeStampName).index();
        if (incrementTime) {

          double newTime = weka.classifiers.timeseries.core.Utils
              .advanceSuppliedTimeValue(m_lastTimeValue, m_dateBasedPeriodicity);

          // default to add the delta
          /*
           * double newTime = m_lastTimeValue +
           * m_dateBasedPeriodicity.deltaTime();//m_deltaTime; Date d = new
           * Date((long)m_lastTimeValue); Calendar c = new GregorianCalendar();
           * c.setTime(d); if (m_dateBasedPeriodicity == Periodicity.MONTHLY) {
           * c.add(Calendar.MONTH, 1); newTime = (double)c.getTimeInMillis(); }
           * else if (m_dateBasedPeriodicity == Periodicity.WEEKLY) {
           * c.add(Calendar.WEEK_OF_YEAR, 1); newTime =
           * (double)c.getTimeInMillis(); } else if (m_dateBasedPeriodicity ==
           * Periodicity.QUARTERLY) { c.add(Calendar.MONTH, 3); newTime =
           * (double)c.getTimeInMillis(); } else if (m_dateBasedPeriodicity ==
           * Periodicity.DAILY) { c.add(Calendar.DAY_OF_YEAR, 1); newTime =
           * (double)c.getTimeInMillis(); }
           */
          result.setValue(timeIndex, newTime);
          // if (!temporary) {
          m_lastTimeValue = newTime;
          // }
        } else {
          // if (!temporary) {
          // if we have a value, just store it
          if (!result.isMissing(timeIndex)) {
            m_lastTimeValue = result.value(timeIndex);
          }/*
            * else { System.err.println("*****WARNING missing time..."); }
            */
          // }
        }

        // set any derived periodic values
        if (m_derivedPeriodicMakers != null
            && m_derivedPeriodicMakers.size() > 0) {
          for (Filter f : m_derivedPeriodicMakers) {
            f.input(result);
            result = f.output();
          }
          setDerivedPeriodicValues(result);
        }

        // remap the timestamp if necessary
        result = remapDateTimeStamp(result);
      }
    }

    if (m_adjustForVariance) {
      for (Filter f : m_varianceAdjusters) {
        f.input(result);
        result = f.output();
      }
    }

    for (Filter f : m_lagMakers) {
      if (temporary && f instanceof TimeSeriesTranslate) {
        result = ((TimeSeriesTranslate) f).inputOneTemporarily(result);
      } else {
        f.input(result);
        result = f.output();
      }
    }

    if (m_averagedLagMakers != null) {
      for (Filter f : m_averagedLagMakers) {
        f.input(result);
        result = f.output();
      }
    }

    if (m_timeIndexMakers != null) {
      for (Filter f : m_timeIndexMakers) {
        f.input(result);
        result = f.output();
      }
    }

    if (m_includeTimeLagCrossProducts && m_timeLagCrossProductMakers != null) {
      for (Filter f : m_timeLagCrossProductMakers) {
        f.input(result);
        result = f.output();
      }
    }

    return result;
  }

  /**
   * Clears any history accumulated in the lag creating filters.
   *
   * @throws Exception if something goes wrong.
   */
  public void clearLagHistories() throws Exception {

    if (m_artificialTimeMaker != null) {
      m_artificialTimeMaker.batchFinished();
    }

    for (Filter f : m_lagMakers) {
      f.batchFinished();
    }

    if (m_averagedLagMakers != null) {
      for (Filter f : m_averagedLagMakers) {
        f.batchFinished();
      }
    }

    if (m_timeIndexMakers != null) {
      for (Filter f : m_timeIndexMakers) {
        f.batchFinished();
      }
    }

    if (m_includeTimeLagCrossProducts && m_timeLagCrossProductMakers != null) {
      for (Filter f : m_timeLagCrossProductMakers) {
        f.batchFinished();
      }
    }
  }

  /**
   * Utility method to advance a supplied time value by one unit according to
   * the periodicity set for this LagMaker.
   *
   * @param valueToAdvance the time value to advance
   * @return the advanced value or the original value if this lag maker is not
   *         adjusting for trends
   */
  public double advanceSuppliedTimeValue(double valueToAdvance) {
    return weka.classifiers.timeseries.core.Utils.advanceSuppliedTimeValue(
        valueToAdvance, m_dateBasedPeriodicity);
  }

  public double decrementSuppliedTimeValue(double valueToDecrement) {
    return weka.classifiers.timeseries.core.Utils.decrementSuppliedTimeValue(
        valueToDecrement, m_dateBasedPeriodicity);
  }
}
TOP

Related Classes of weka.classifiers.timeseries.core.TSLagMaker$PeriodicityHandler

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.