Package weka.gui.beans

Source Code of weka.gui.beans.TimeSeriesForecasting

/*
* Copyright (c) 2011 Pentaho Corporation.  All rights reserved.
* This software was developed by Pentaho Corporation and is provided under the terms
* of the GNU Lesser General Public License, Version 2.1. You may not use
* this file except in compliance with the license. If you need a copy of the license,
* please go to http://www.gnu.org/licenses/lgpl-2.1.txt. The Original Code is Time Series
* Forecasting.  The Initial Developer is Pentaho Corporation.
*
* Software distributed under the GNU Lesser Public License is distributed on an "AS IS"
* basis, WITHOUT WARRANTY OF ANY KIND, either express or  implied. Please refer to
* the license for the specific language governing your rights and limitations.
*/

/*
*    TimeSeriesForecasting.java
*    Copyright (C) 2011 Pentaho Corporation
*
*/

package weka.gui.beans;

import java.awt.BorderLayout;
import java.beans.EventSetDescriptor;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;

import javax.swing.JPanel;

import org.apache.commons.codec.binary.Base64;

import weka.classifiers.evaluation.NumericPrediction;
import weka.classifiers.timeseries.AbstractForecaster;
import weka.classifiers.timeseries.WekaForecaster;
import weka.classifiers.timeseries.core.TSLagMaker;
import weka.core.Attribute;
import weka.core.DenseInstance;
import weka.core.Environment;
import weka.core.EnvironmentHandler;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Utils;
import weka.gui.Logger;

/**
* KnowledgeFlow component for producing a forecast using a time series
* forecasting model.
*
* @author Mark hall (mhall{[at]}pentaho{[dot]}com)
* @version $Revision: 49983 $
*
*/
@KFStep(category = "Time Series", toolTipText = "Forecasting using a time series model")
public class TimeSeriesForecasting extends JPanel implements BeanCommon,
    Visible, EventConstraints, EnvironmentHandler, InstanceListener,
    DataSourceListener {

  /**
   * For serialization
   */
  private static final long serialVersionUID = -7826178727365267059L;

  /** The structure of the data used to train the forecaster */
  protected transient Instances m_header;

  /** The forecaster to use for forecasting */
  protected transient WekaForecaster m_forecaster;

  /**
   * The output instances structure - typically the same as the input structure.
   * Will have additional attributes for upper and lower confidence intervals if
   * the forecaster produces them
   */
  protected transient Instances m_outgoingStructure;

  /**
   * The filename to load from - takes precendence over an encoded forecaster if
   * not null and not equal to "-NONE-"
   */
  protected String m_fileName = "-NONE-";

  /**
   * The file name to save the updated forecaster to if the user has opted to
   * rebuild the forecasting model on the incoming data
   */
  protected String m_saveFileName = "";

  /**
   * Base 64 encoded forecasting model - this allows the model to be embeded in
   * the XML knowledge flow file format rather than loaded from a file at
   * execution time.
   */
  protected String m_encodedForecaster = "-NONE-";

  /**
   * Number of future time steps to forecast - will be ignored if overlay data
   * is being used since the number of instances containing overlay values will
   * dictate the number of forecasted values that can be produced
   */
  protected String m_numberOfStepsToForecast = "1";

  /** True if the forecaster should be rebuilt on incoming data */
  protected boolean m_rebuildForecaster = false;

  /**
   * The number of time units beyond the end of the training data used to train
   * the forecaster that the most recent incoming priming instance is. This is
   * used to adjust the artificial time stamp (if one is being used) to the
   * right value before a forecast is produced
   */
  protected String m_artificialTimeStartOffset = "0";

  /** Logging object */
  protected transient Logger m_log;

  /** Environment variables */
  protected transient Environment m_env;

  /** Upstream component sending us data */
  protected Object m_listenee = null;

  /** Incoming connection type */
  protected String m_incomingConnection = "";

  /** Components listening to our output */
  protected ArrayList<InstanceListener> m_instanceListeners = new ArrayList<InstanceListener>();

  private static enum Status {
    BUSY, IDLE;
  }

  protected Status m_forecastingStatus = Status.IDLE;

  /**
   * Global about information for this component.
   *
   * @return global information for this component
   */
  public String globalInfo() {
    return "Encapsulates a time series forecasting model and uses it to"
        + " produce forecasts given incoming historical data. Forecaster "
        + "can optionally be rebuilt using the incoming data before a "
        + "forecast is generated.";
  }

  /** Visual representation */
  protected BeanVisual m_visual = new BeanVisual("TimeSeriesForecasting",
      BeanVisual.ICON_PATH + "DefaultClassifier.gif", BeanVisual.ICON_PATH
          + "DefaultClassifier_animated.gif");

  /**
   * Constructor
   */
  public TimeSeriesForecasting() {
    setLayout(new BorderLayout());
    useDefaultVisual();
    add(m_visual, BorderLayout.CENTER);
  }

  /**
   * Use the default images for a data source
   *
   */
  public void useDefaultVisual() {
    m_visual.loadIcons(BeanVisual.ICON_PATH + "DefaultClassifier.gif",
        BeanVisual.ICON_PATH + "DefaultClassifier_animated.gif");
  }

  /**
   * Set the logging object to use
   *
   * @param logger the logging object to use
   */
  public void setLog(Logger logger) {
    m_log = logger;
  }

  /**
   * Get the forecaster. Loads the forecaster from a file (if necessary).
   *
   * @return the forecasting model
   * @throws Exception if there is a problem loading the forecaster
   */
  public WekaForecaster getForecaster() throws Exception {
    if (m_forecaster != null) {
      return m_forecaster;
    } else {

      // try and decode the base64 string (if set)
      List<Object> model = getForecaster(m_encodedForecaster);
      if (model != null) {
        m_forecaster = (WekaForecaster) model.get(0);
        m_header = (Instances) model.get(1);
        return m_forecaster;
      }
    }

    return null;
  }

  /**
   * Decodes and returns a forecasting model (list containing the forecaster and
   * Instances object containing the structure of the data used to train the
   * forecaster) from a base 64 string.
   *
   * @param base64encoded a List<Object> containing forecaster and header
   *          encoded as a base 64 string
   *
   * @return the decoded List<Object> containing forecaster and header
   * @throws Exception if there is a problem decoding
   */
  public static List<Object> getForecaster(String base64encoded)
      throws Exception {
    if (base64encoded != null && base64encoded.length() > 0
        && !base64encoded.equals("-NONE-")) {

      byte[] decoded = decodeFromBase64(base64encoded);
      ByteArrayInputStream bis = new ByteArrayInputStream(decoded);
      ObjectInputStream ois = new ObjectInputStream(bis);

      List<Object> model = (List<Object>) ois.readObject();
      ois.close();
      return model;
    }

    return null;
  }

  /**
   * Set the base 64 encoded forecaster.
   *
   * @param encodedForecaster a base 64 encoded List<Object> containing the
   *          forecaster and header
   */
  public void setEncodedForecaster(String encodedForecaster) {
    m_encodedForecaster = encodedForecaster;
  }

  /**
   * Gets the base 64 encoded forecaster
   *
   * @return a base 64 string encoding a List<Object> that contains the
   *         forecasting model and the header
   */
  public String getEncodedForecaster() {
    return m_encodedForecaster;
  }

  /**
   * Set the filename to load from.
   *
   * @param filename the filename to load from
   */
  public void setFilename(String filename) {
    m_fileName = filename;
  }

  /**
   * Get the filename to load from.
   *
   * @return the filename to load from.
   */
  public String getFilename() {
    return m_fileName;
  }

  /**
   * Set the name of the file to save the forecasting model out to if the user
   * has opted to rebuild the forecaster using the incoming data.
   *
   * @param fileName the file name to save to.
   */
  public void setSaveFilename(String fileName) {
    m_saveFileName = fileName;
  }

  /**
   * Get the name of the file to save the forecasting model to if the user has
   * opted to rebuild the forecaster using the incoming data.
   *
   * @return the name of the file to save the forecaster to.
   */
  public String getSaveFilename() {
    return m_saveFileName;
  }

  /**
   * Set whether the forecaster should be rebuilt/re-estimated on the incoming
   * data.
   *
   * @param rebuild true if the forecaster should be rebuilt using the incoming
   *          data
   */
  public void setRebuildForecaster(boolean rebuild) {
    m_rebuildForecaster = rebuild;
  }

  /**
   * Get whether the forecaster will be rebuilt/re-estimated on the incoming
   * data.
   *
   * @return true if the forecaster is to be rebuilt on the incoming data
   */
  public boolean getRebuildForecaster() {
    return m_rebuildForecaster;
  }

  /**
   * Returns true if, at this time, the object will accept a connection
   * according to the supplied EventSetDescriptor
   *
   * @param esd the EventSetDescriptor
   * @return true if the object will accept a connection
   */
  public boolean connectionAllowed(EventSetDescriptor esd) {
    return connectionAllowed(esd.getName());
  }

  /**
   * Returns true if, at this time, the object will accept a connection with
   * respect to the named event
   *
   * @param eventName the event
   * @return true if the object will accept a connection
   */
  public boolean connectionAllowed(String arg0) {
    if (m_listenee != null) {
      return false;
    }
    return true;
  }

  /**
   * Set the number of time steps to forecast beyond the end of the incoming
   * priming data. This will be ignored if the forecaster is using overlay data
   * as the number of instances for which overlay data is present (and targets
   * are missing) in the incoming data will determine how many forecasted values
   * are produced.
   *
   * @param n the number of steps to forecast.
   */
  public void setNumStepsToForecast(String n) {
    m_numberOfStepsToForecast = n;
  }

  /**
   * Get the number of time steps to forecast beyond the end of the incoming
   * priming data. This will be ignored if the forecaster is using overlay data
   * as the number of instances for which overlay data is present (and targets
   * are missing) in the incoming data will determine how many forecasted values
   * are produced.
   *
   * @return the number of steps to forecast.
   */
  public String getNumStepsToForecast() {
    return m_numberOfStepsToForecast;
  }

  /**
   * Set the offset, from the value associated with the last training instance,
   * for the artificial time stamp. Has no effect if an artificial time stamp is
   * not in use by the forecaster. If in use, this needs to be set so that the
   * forecaster knows what time stamp value corresponds to the first requested
   * forecast (i.e. it should be equal to the number of recent historical
   * priming instances that occur after the last training instance in time).
   *
   * @param art the offset from the last artificial time value in the training
   *          data for which the forecast is requested.
   */
  public void setArtificialTimeStartOffset(String art) {
    m_artificialTimeStartOffset = art;
  }

  /**
   * Get the offset, from the value associated with the last training instance,
   * for the artificial time stamp. Has no effect if an artificial time stamp is
   * not in use by the forecaster. If in use, this needs to be set so that the
   * forecaster knows what time stamp value corresponds to the first requested
   * forecast (i.e. it should be equal to the number of recent historical
   * priming instances that occur after the last training instance in time).
   *
   * @return the offset from the last artificial time value in the training data
   *         for which the forecast is requested.
   */
  public String getArtificialTimeStartOffset() {
    return m_artificialTimeStartOffset;
  }

  /**
   * Notify this object that it has been registered as a listener with a source
   * with respect to the named event
   *
   * @param eventName the event
   * @param source the source with which this object has been registered as a
   *          listener
   */
  public void connectionNotification(String eventName, Object source) {
    if (connectionAllowed(eventName)) {
      m_listenee = source;
      m_incomingConnection = eventName;
    }
  }

  /**
   * Notify this object that it has been deregistered as a listener with a
   * source with respect to the supplied event name
   *
   * @param eventName the event
   * @param source the source with which this object has been registered as a
   *          listener
   */
  public void disconnectionNotification(String eventName, Object source) {
    if (m_listenee == source) {
      m_listenee = null;
      m_incomingConnection = "";
    }
  }

  /**
   * Get the custom (descriptive) name for this bean (if one has been set)
   *
   * @return the custom name (or the default name)
   */
  public String getCustomName() {
    return m_visual.getText();
  }

  /**
   * Returns true if. at this time, the bean is busy with some task.
   *
   * @return true if the bean is busy.
   */
  public boolean isBusy() {
    return (m_forecastingStatus == Status.BUSY);
  }

  /**
   * Set a custom (descriptive) name for this bean
   *
   * @param name the name to use
   */
  public void setCustomName(String name) {
    m_visual.setText(name);
  }

  /**
   * Stop the component from executing. Also attempts to tell the immediate
   * upstream component to stop.
   */
  public void stop() {
    m_forecastingStatus = Status.IDLE;
    // tell upstream component to stop
    if (m_listenee != null) {
      if (m_listenee instanceof BeanCommon) {
        ((BeanCommon) m_listenee).stop();
      }
    }
  }

  /**
   * Set environment variables to use.
   *
   * @param env the environment variables to use
   */
  public void setEnvironment(Environment env) {
    m_env = env;
  }

  /**
   * Gets the visual appearance of this wrapper bean
   */
  public BeanVisual getVisual() {
    return m_visual;
  }

  /**
   * Sets the visual appearance of this wrapper bean
   *
   * @param newVisual a <code>BeanVisual</code> value
   */
  public void setVisual(BeanVisual newVisual) {
    m_visual = newVisual;
  }

  /**
   * Encode the model and header into a base 64 string. A List<Object>
   * containing first the model and then the header is encoded.
   *
   * @param model the forecasting model to encode
   * @param header empty instances object containing just the structure of the
   *          data used to train the forecaster
   * @return a base 64 encoded String
   * @throws Exception if a problem occurs.
   */
  public static String encodeForecasterToBase64(WekaForecaster model,
      Instances header) throws Exception {

    if (model != null && header != null) {
      List<Object> modelAndHeader = new ArrayList<Object>();
      modelAndHeader.add(model);
      modelAndHeader.add(header);

      ByteArrayOutputStream bao = new ByteArrayOutputStream();
      BufferedOutputStream bos = new BufferedOutputStream(bao);
      ObjectOutputStream oo = new ObjectOutputStream(bos);
      oo.writeObject(modelAndHeader);
      oo.flush();

      byte[] modelBytes = bao.toByteArray();

      return encodeToBase64(modelBytes);
    } else {
      throw new Exception("[TimeSeriesForecasting] unable to encode model!");
    }

  }

  protected List<Object> loadModel(String filename) {

    List<Object> loaded = new ArrayList<Object>();
    try {
      if (!isEmpty(filename) && !filename.equals("-NONE-")) {

        // replace any environment variables
        if (m_env == null) {
          m_env = Environment.getSystemWide();
        }

        String filenameN = filename;
        try {
          filenameN = m_env.substitute(filename);
        } catch (Exception e) {
          // quietly ignore
        }

        InputStream is = new FileInputStream(filenameN);
        if (filenameN.toLowerCase().endsWith(".gz")) {
          is = new GZIPInputStream(is);
        }
        ObjectInputStream ois = new ObjectInputStream(new BufferedInputStream(
            is));
        WekaForecaster forecaster = (WekaForecaster) ois.readObject();

        Instances header = (Instances) ois.readObject();
        is.close();

        loaded.add(forecaster);
        loaded.add(header);
        return loaded;
      } else {
        logMessage("Model is null or no filename specified to load from!");
        return null;
      }
    } catch (Exception ex) {
      ex.printStackTrace();
      logMessage(ex.getMessage());
      return null;
    }
  }

  /**
   * Utility method to check if a String is null or empty ("").
   *
   * @param aString the String to check.
   * @return true if the supplied String is null or empty.
   */
  protected static boolean isEmpty(String aString) {
    if (aString == null || aString.length() == 0) {
      return true;
    }
    return false;
  }

  /**
   * Logs message to the log or System.out if the log object is not set.
   *
   * @param message the message to log
   */
  protected void logMessage(String message) {
    message = "[TimeSeriesForecasting] " + statusMessagePrefix() + message;
    if (m_log != null) {
      m_log.logMessage(message);
    } else {
      System.out.println(message);
    }
  }

  /**
   * Logs an error message to the log or System.err if the log object is not set
   *
   * @param message the message to log
   * @param ex the exception that generated the error (may be null)
   */
  protected void logError(String message, Exception ex) {
    String exceptionMessage = (ex != null) ? ex.getMessage() : "";
    message = "[TimeSeriesForecasting] " + statusMessagePrefix() + " "
        + message + exceptionMessage;
    if (m_log != null) {
      m_log.logMessage(message);
    } else {
      System.err.println(message);
    }
  }

  /**
   * Prints a message to the status area of the log or to System.out if the log
   * object is not set.
   *
   * @param message the message to output
   */
  protected void statusMessage(String message) {
    message = statusMessagePrefix() + message;
    if (m_log != null) {
      m_log.statusMessage(message);
    } else {
      System.out.println(message);
    }
  }

  /**
   * Prints a fixed error message to the status area of the log
   */
  protected void statusError() {
    String message = statusMessagePrefix() + "ERROR: see log for details";
    if (m_log != null) {
      m_log.statusMessage(message);
    } else {
      System.err.println(message);
    }
  }

  /**
   * Prints a warning message to the status area of the log
   *
   * @param message the message to log
   */
  protected void statusWarning(String message) {
    message = statusMessagePrefix() + "WARNING: " + message;
    if (m_log != null) {
      m_log.statusMessage(message);
    } else {
      System.out.println(message);
    }
  }

  protected static final String encodeToBase64(byte[] val) throws IOException {
    String string;
    if (val == null) {
      string = null;
    } else {
      ByteArrayOutputStream baos = new ByteArrayOutputStream();
      GZIPOutputStream gzos = new GZIPOutputStream(baos);
      BufferedOutputStream bos = new BufferedOutputStream(gzos);
      bos.write(val);
      bos.flush();
      bos.close();

      string = new String(Base64.encodeBase64(baos.toByteArray()));
    }
    return string;
  }

  protected static final byte[] decodeFromBase64(String string)
      throws Exception {

    byte[] bytes;
    if (string == null) {
      bytes = new byte[] {};
    } else {
      bytes = Base64.decodeBase64(string.getBytes());
    }
    if (bytes.length > 0) {
      ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
      GZIPInputStream gzip = new GZIPInputStream(bais);
      BufferedInputStream bi = new BufferedInputStream(gzip);
      byte[] result = new byte[] {};

      byte[] extra = new byte[1000000];
      int nrExtra = bi.read(extra);
      while (nrExtra >= 0) {
        // add it to bytes...
        //
        int newSize = result.length + nrExtra;
        byte[] tmp = new byte[newSize];
        for (int i = 0; i < result.length; i++)
          tmp[i] = result[i];
        for (int i = 0; i < nrExtra; i++)
          tmp[result.length + i] = extra[i];

        // change the result
        result = tmp;
        nrExtra = bi.read(extra);
      }
      bytes = result;
      gzip.close();
    }

    return bytes;
  }

  /**
   * Returns true, if at the current time, the named event could be generated.
   * Assumes that the supplied event name is an event that could be generated by
   * this bean
   *
   * @param eventName the name of the event in question
   * @return true if the named event could be generated at this point in time
   */
  public boolean eventGeneratable(String eventName) {
    if (eventName.equals("instance") || eventName.equals("dataset")) {
      if (m_listenee == null) {
        return false;
      }
    }
    return true;
  }

  /** holds overlay data (if present in the incoming data) */
  protected transient Instances m_overlayData;

  /**
   * holds the incoming data for either priming the forecaster or for rebuilding
   * the forecaster
   */
  protected transient Instances m_bufferedPrimeData;

  /** true if the forecaster is using overlay data */
  protected transient boolean m_isUsingOverlayData;

  /** the lag maker in use by the forecaster */
  protected transient TSLagMaker m_modelLagMaker;

  /** name of the time stamp attribute */
  protected transient String m_timeStampName = "";

  /** the fields that the forecaster is predicting */
  protected transient List<String> m_fieldsToForecast;

  private void loadOrDecodeForecaster() {
    // filename takes precedence over encoded forecaster (if any)
    if (!isEmpty(m_fileName) && !m_fileName.equals("-NONE-")) {
      List<Object> loaded = loadModel(m_fileName);
      if (loaded == null) {
        statusError();
        logError("problem loading forecasting model.", null);
        stop();
        return;
      } else {
        m_forecaster = (WekaForecaster) loaded.get(0);
        m_header = (Instances) loaded.get(1);
      }
    } else if (m_encodedForecaster != null && m_encodedForecaster.length() > 0
        && !m_encodedForecaster.equals("-NONE-")) {
      try {
        getForecaster();
      } catch (Exception ex) {
        ex.printStackTrace();
        statusError();
        logError("a problem occurred while decoding the model. See the "
            + "log for details.", ex);
        stop();
        return;
      }
    } else {
      statusError();
      logError("unable to obtain a forecasting model to use.", null);
      stop();
      return;
    }
  }

  /**
   * Accept an incoming instance
   *
   * @param e incoming InstanceEvent encapsulating the instance to process.
   */
  public void acceptInstance(InstanceEvent e) {

    int status = e.getStatus();
    if (status == InstanceEvent.FORMAT_AVAILABLE) {
      Instances dataset = e.getStructure();

      loadOrDecodeForecaster();

      // check the structure
      if (!m_header.equalHeaders(dataset)) {
        statusError();
        logError("incoming instances structure does not match the structure "
            + "of the data used to train the forecaster.", null);
        stop();
        return;
      }

      m_forecastingStatus = Status.BUSY;
      processInstance(null, true);

      return;
    }

    // get the instance, process it
    processInstance(e.getInstance(), false);

    if (status == InstanceEvent.BATCH_FINISHED) {
      processInstance(null, false); // finished

      // generate forecast
      generateForecast();
      m_forecastingStatus = Status.IDLE;
    }
  }

  protected void processInstance(Instance toProcess, boolean first) {

    if (first) {
      m_overlayData = null;
      m_bufferedPrimeData = null;
      statusMessage("configuring forecaster...");

      m_modelLagMaker = m_forecaster.getTSLagMaker();
      if (!m_modelLagMaker.isUsingAnArtificialTimeIndex()
          && m_modelLagMaker.getAdjustForTrends()) {
        m_timeStampName = m_modelLagMaker.getTimeStampField();
      }

      m_isUsingOverlayData = m_forecaster.isUsingOverlayData();

      if (!m_rebuildForecaster) {
        // m_isIncrementallyPrimeable = true;
        logMessage("forecaster will be primed " + "incrementally.");

        // first reset lag histories
        try {
          m_forecaster.primeForecaster(new Instances(m_header, 0));
        } catch (Exception ex) {
          ex.printStackTrace();
          statusError();
          logError("problem during initialization of the " + "priming data.",
              ex);
          stop();
          return;
        }
      } else {
        logMessage("forecaster will be "
            + "rebuilt/re-estimated on incoming data");
      }

      if (m_isUsingOverlayData) {
        logMessage("forecaster is using "
            + "overlay data. We expect to see overlay attribute "
            + "values for the forecasting period.");
        m_overlayData = new Instances(m_header, 0);
      }

      if (m_rebuildForecaster) {
        m_bufferedPrimeData = new Instances(m_header, 0);
      }

      m_fieldsToForecast = AbstractForecaster.stringToList(m_forecaster
          .getFieldsToForecast());

      // notify instance listeners of the structure (same as incoming
      // but may have confidence bounds attributes added)
      m_outgoingStructure = new Instances(m_header);
      if (m_forecaster.isProducingConfidenceIntervals()) {
        ArrayList<Attribute> atts = new ArrayList<Attribute>();
        for (int i = 0; i < m_header.numAttributes(); i++) {
          atts.add((Attribute) m_header.attribute(i).copy());
        }
        for (String f : m_fieldsToForecast) {
          Attribute lb = new Attribute(f + "_lowerBound");
          Attribute ub = new Attribute(f + "_upperBound");
          atts.add(lb);
          atts.add(ub);
        }
        m_outgoingStructure = new Instances(m_header.relationName() + "_"
            + "plus_forecast", atts, 0);
      }
      InstanceEvent ie = new InstanceEvent(this, m_outgoingStructure);
      // notify all listeners
      notifyInstanceListeners(ie);

    } else if (toProcess == null) {
      // no more input. rebuild forecaster if necessary
      if (m_rebuildForecaster && m_bufferedPrimeData.numInstances() > 0) {

        // push out the historical data first
        for (int i = 0; i < m_bufferedPrimeData.numInstances(); i++) {
          InstanceEvent ie = new InstanceEvent(this,
              m_bufferedPrimeData.instance(i), InstanceEvent.INSTANCE_AVAILABLE);
          // notify listeners here
          notifyInstanceListeners(ie);
        }

        // rebuild the forecaster
        try {
          statusMessage("rebuilding the forecasting model...");
          logMessage("rebuilding the forecasting model.");
          m_forecaster.buildForecaster(m_bufferedPrimeData);

          statusMessage("priming the forecasting model...");
          logMessage("priming the forecasting model.");
          // prime the forecaster
          m_forecaster.primeForecaster(m_bufferedPrimeData);
        } catch (Exception e) {
          e.printStackTrace();
          statusError();
          logError("a problem occurred when rebuilding the forecaster", e);
          stop();
        }
      }

      if (m_rebuildForecaster && !isEmpty(m_saveFileName)) {
        // save the forecaster
        String saveName = m_saveFileName;
        if (m_env == null) {
          m_env = Environment.getSystemWide();
        }

        try {
          saveName = m_env.substitute(saveName);
        } catch (Exception ex) {
          // quietly ignore
        }

        statusMessage("Saving rebuilt forecasting model...");
        logMessage("Saving rebuild forecasting model to \"" + saveName + "\"");
        try {
          OutputStream os = new FileOutputStream(saveName);
          if (saveName.toLowerCase().endsWith(".gz")) {
            os = new GZIPOutputStream(os);
          }
          ObjectOutputStream oos = new ObjectOutputStream(
              new BufferedOutputStream(os));
          oos.writeObject(m_forecaster);
          oos.writeObject(m_header);
          oos.close();
        } catch (IOException e) {
          statusError();
          logError("a problem occurred when trying to save rebuilt model.", e);
          stop();
        }
      }

    } else {
      // if we are expecting overlay data, then check this instance to see if
      // all
      // target values predicted by the forecaster are missing. If so, then this
      // *might* indicate the start of the overlay data. We will start buffering
      // instances into the overlay buffer. If we get an instace with all
      // non-missing targets
      // at some future point then we will flush the overlay buffer either into
      // the
      // forecaster as priming instances (if forecaster is incrementally
      // primeable)
      // or into the buffered prime/training data if forecaster is not
      // incrementally
      // primeable or we are rebuilding/re-estimating the model

      if (m_isUsingOverlayData) {
        boolean allMissing = true;
        for (String field : m_fieldsToForecast) {
          if (!toProcess.isMissing(m_header.attribute(field))) {
            allMissing = false;
            break;
          }
        }

        if (allMissing) {
          // add it to the overlay buffer
          m_overlayData.add(toProcess);
          statusMessage("buffering overlay instance");
        } else {
          // check the overlay buffer - if it's not empty then flush it
          // into either the forecaster directly (if incrementally primeable)
          // or into the priming buffer
          if (m_overlayData.numInstances() > 0) {
            // first buffer this one (will get flushed anyway)
            m_overlayData.add(toProcess);
            logMessage("encountered a supposed "
                + "overlay instance with non-missing target values - "
                + "converting buffered overlay data into "
                + (m_rebuildForecaster ? "training" : "priming") + " data...");
            statusMessage("flushing overlay buffer.");

            for (int i = 0; i < m_overlayData.numInstances(); i++) {
              if (!m_rebuildForecaster) {
                try {
                  m_forecaster.primeForecasterIncremental(m_overlayData
                      .instance(i));

                  // output this instance immediately (make sure that we include
                  // any attributes for confidence intervals - these will be
                  // necessarily missing for historical instances)
                  Instance outgoing = convertToOutputFormat(m_overlayData
                      .instance(i));
                  InstanceEvent ie = new InstanceEvent(this, outgoing,
                      InstanceEvent.INSTANCE_AVAILABLE);
                  // notify listeners here
                  notifyInstanceListeners(ie);
                } catch (Exception e) {
                  e.printStackTrace();
                  statusError();
                  logError("problem occurred during priming.", e);
                  stop();
                  return;
                }

              } else {
                // transfer to the priming buffer
                m_bufferedPrimeData.add(m_overlayData.instance(i));
              }
            }

            // clear out the overlay data
            m_overlayData = new Instances(m_header, 0);
          } else {
            // not all missing and overlay buffer is empty then it's a priming
            // instance

            // either buffer it or send it directly to the forecaster (if
            // incrementally
            // primeable
            if (!m_rebuildForecaster) {
              try {
                m_forecaster.primeForecasterIncremental(toProcess);

                // output this instance immediately (make sure that we include
                // any attributes for confidence intervals - these will be
                // necessarily missing for historical instances)
                Instance outgoing = convertToOutputFormat(toProcess);
                InstanceEvent ie = new InstanceEvent(this, outgoing,
                    InstanceEvent.INSTANCE_AVAILABLE);
                // tell listeners
                notifyInstanceListeners(ie);
              } catch (Exception ex) {
                ex.printStackTrace();
                statusError();
                logError("problem occurred during priming.", ex);
                stop();
                return;
              }
            } else {
              // buffer
              m_bufferedPrimeData.add(toProcess);
            }
          }

        }
      } else {
        // not using overlay data
        if (!m_rebuildForecaster) {
          try {
            m_forecaster.primeForecasterIncremental(toProcess);

            // output this instance immediately (make sure that we include
            // any attributes for confidence intervals - these will be
            // necessarily missing for historical instances)
            Instance outgoing = convertToOutputFormat(toProcess);
            InstanceEvent ie = new InstanceEvent(this, outgoing,
                InstanceEvent.INSTANCE_AVAILABLE);
            notifyInstanceListeners(ie);
          } catch (Exception ex) {
            ex.printStackTrace();
            statusError();
            logError("problem occurred during priming.", ex);
            stop();
            return;
          }
        } else {
          // buffer
          m_bufferedPrimeData.add(toProcess);
        }
      }
    }
  }

  /**
   * Accept an incoming data set
   *
   * @param dse incoming DataSetEvent encapsulating the instances to process
   */
  public void acceptDataSet(DataSetEvent dse) {

    loadOrDecodeForecaster();

    Instances data = dse.getDataSet();
    if (!m_header.equalHeaders(data)) {
      statusError();
      logError(
          "Incoming instance structure does not match what the forecaster "
              + "was trained with", null);
      return;
    }

    if (dse.isStructureOnly()) {
      return;
    }

    m_forecastingStatus = Status.BUSY;
    processInstance(null, true);

    for (int i = 0; i < data.numInstances(); i++) {
      processInstance(data.instance(i), false);
    }

    processInstance(null, false); // finished

    // generate forecast
    generateForecast();

    m_forecastingStatus = Status.IDLE;
  }

  private void generateForecast() {

    // doesn't matter if we're not using a time stamp
    double lastTimeFromPrime = -1;

    if (m_modelLagMaker.getAdjustForTrends()
        && m_modelLagMaker.getTimeStampField() != null
        && m_modelLagMaker.getTimeStampField().length() > 0
        && !m_modelLagMaker.isUsingAnArtificialTimeIndex()) {
      try {
        lastTimeFromPrime = m_modelLagMaker.getCurrentTimeStampValue();
      } catch (Exception ex) {
        statusError();
        logError("a problem occurred while establishing the current "
            + "time stamp value", ex);
        stop();
        return;
      }
    } else if (m_modelLagMaker.getAdjustForTrends()
        && m_modelLagMaker.isUsingAnArtificialTimeIndex()) {

      // If an artificial time stamp is in use then we need to set the
      // initial value to whatever offset from training that the user has
      // indicated to be the first forecasted point.
      try {
        String artOff = m_artificialTimeStartOffset;
        if (m_env != null) {
          artOff = m_env.substitute(artOff);
        }
        double artificialStartValue = m_modelLagMaker
            .getArtificialTimeStartValue();
        artificialStartValue += Integer.parseInt(artOff);
        m_modelLagMaker.setArtificialTimeStartValue(artificialStartValue);
      } catch (Exception ex) {
        ex.printStackTrace();
        statusError();
        logError("unable to set the value of the artificial time stamp.", ex);
        stop();
        return;
      }
    }

    boolean overlay = (m_overlayData != null
        && m_overlayData.numInstances() > 0 && m_isUsingOverlayData);

    String numS = m_numberOfStepsToForecast;
    if (m_env != null) {
      try {
        numS = m_env.substitute(numS);
      } catch (Exception e) {
        // quietly ignore
        // e.printStackTrace();
      }
    }

    int numSteps = (overlay) ? m_overlayData.numInstances() : Integer
        .parseInt(numS);

    List<List<NumericPrediction>> forecast = null;

    // TODO adapt the log to PrintStream for the forecasting methods
    try {
      if (overlay) {
        forecast = m_forecaster.forecast(numSteps, m_overlayData);
      } else {
        forecast = m_forecaster.forecast(numSteps);
      }
    } catch (Exception ex) {
      ex.printStackTrace();
      statusError();
      logError("unable to generate a forecast.", ex);
      stop();
      return;
    }

    // now convert the forecast into instances. If we have overlay
    // data then we can just fill in the forecasted values (and
    // potentially add for confidence intervals)
    double time = lastTimeFromPrime;
    int timeStampIndex = -1;
    if (m_timeStampName.length() > 0) {
      Attribute timeStampAtt = m_outgoingStructure.attribute(m_timeStampName);
      if (timeStampAtt == null) {
        statusError();
        logError("couldn't find time stamp: " + m_timeStampName
            + "in the input data", null);
        stop();
        return;
      }
      timeStampIndex = timeStampAtt.index();
    }

    statusMessage("Generating forecast...");
    logMessage("Generating forecast.");
    for (int i = 0; i < numSteps; i++) {
      Instance outputI = null;
      double[] outVals = new double[m_outgoingStructure.numAttributes()];
      for (int j = 0; j < outVals.length; j++) {
        if (overlay) {
          outVals[j] = m_overlayData.instance(i).value(j);
        } else {
          outVals[j] = Utils.missingValue();
        }
      }
      List<NumericPrediction> predsForStep = forecast.get(i);

      if (timeStampIndex != -1) {
        // set time value
        time = m_modelLagMaker.advanceSuppliedTimeValue(time);
        outVals[timeStampIndex] = time;
      }

      for (int j = 0; j < m_fieldsToForecast.size(); j++) {
        String target = m_fieldsToForecast.get(j);
        int targetI = m_outgoingStructure.attribute(target).index();
        NumericPrediction predForTargetAtStep = predsForStep.get(j);
        double y = predForTargetAtStep.predicted();
        double yHigh = y;
        double yLow = y;
        double[][] conf = predForTargetAtStep.predictionIntervals();
        if (!Utils.isMissingValue(y)) {
          outVals[targetI] = y;
        }

        // any confidence bounds?
        if (conf.length > 0) {
          yLow = conf[0][0];
          yHigh = conf[0][1];
          int indexOfLow = m_outgoingStructure
              .attribute(target + "_lowerBound").index();
          int indexOfHigh = m_outgoingStructure.attribute(
              target + "_upperBound").index();
          outVals[indexOfLow] = yLow;
          outVals[indexOfHigh] = yHigh;
        }
      }
      outputI = new DenseInstance(1.0, outVals);
      outputI.setDataset(m_outgoingStructure);

      // notify listeners of output instance
      InstanceEvent ie = new InstanceEvent(this, outputI,
          InstanceEvent.INSTANCE_AVAILABLE);

      if (i == (numSteps - 1)) {
        ie.setStatus(InstanceEvent.BATCH_FINISHED);
      }
      notifyInstanceListeners(ie);
    }
    statusMessage("Finished.");
    logMessage("Finished. Generated " + numSteps + " forecasted values.");
  }

  private String statusMessagePrefix() {
    return getCustomName()
        + "$"
        + hashCode()
        + "|"
        + ((Utils.joinOptions(m_forecaster.getOptions()).length() > 0) ? Utils
            .joinOptions(m_forecaster.getOptions()) + "|" : "");
  }

  private Instance convertToOutputFormat(Instance incoming) {

    Instance output = (Instance) incoming.copy();
    if (m_forecaster.isProducingConfidenceIntervals()) {
      double[] values = new double[incoming.numAttributes()
          + (m_fieldsToForecast.size() * 2)];
      for (int i = 0; i < incoming.numAttributes(); i++) {
        values[i] = incoming.value(i);
      }

      // set all bounds to missing (initially)
      for (int i = incoming.numAttributes(); i < incoming.numAttributes()
          + (m_fieldsToForecast.size() * 2); i++) {
        values[i] = Utils.missingValue();
      }

      output = new DenseInstance(1.0, values);
    }
    output.setDataset(m_outgoingStructure);

    return output;
  }

  private void notifyInstanceListeners(InstanceEvent e) {
    ArrayList<InstanceListener> l = null;
    synchronized (this) {
      l = (ArrayList<InstanceListener>) m_instanceListeners.clone();
    }
    if (l.size() > 0) {
      for (int i = 0; i < l.size(); i++) {
        l.get(i).acceptInstance(e);
      }
    }
  }

  /**
   * Add an listener to be notified of outgoing InstanceEvents
   *
   * @param l the listener to register to receive outging instance events
   */
  public synchronized void addInstanceListener(InstanceListener l) {
    m_instanceListeners.add(l);
  }

  /**
   * Deregister and remove a listener of InstanceEvents
   *
   * @param l the listener to remove.
   */
  public synchronized void removeInstanceListener(InstanceListener l) {
    m_instanceListeners.remove(l);
  }
}
TOP

Related Classes of weka.gui.beans.TimeSeriesForecasting

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.