Source Code of org.encog.neural.networks.training.lma.LevenbergMarquardtTraining

/*
 * Encog(tm) Core v3.0 - Java Version
 * http://www.heatonresearch.com/encog/
 * http://code.google.com/p/encog-java/
 
 * Copyright 2008-2011 Heaton Research, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *   
 * For more information on Heaton Research copyrights, licenses 
 * and trademarks visit:
 * http://www.heatonresearch.com/copyright
 */
package org.encog.neural.networks.training.lma;


import org.encog.mathutil.EncogMath;
import org.encog.mathutil.matrices.Matrix;
import org.encog.mathutil.matrices.decomposition.LUDecomposition;
import org.encog.ml.MLMethod;
import org.encog.ml.TrainingImplementationType;
import org.encog.ml.data.MLData;
import org.encog.ml.data.MLDataPair;
import org.encog.ml.data.MLDataSet;
import org.encog.ml.data.basic.BasicMLData;
import org.encog.ml.data.basic.BasicMLDataPair;
import org.encog.ml.train.BasicTraining;
import org.encog.neural.networks.BasicNetwork;
import org.encog.neural.networks.structure.NetworkCODEC;
import org.encog.neural.networks.training.TrainingError;
import org.encog.neural.networks.training.propagation.TrainingContinuation;
import org.encog.util.validate.ValidateNetwork;


/**
 * Trains a neural network using a Levenberg Marquardt algorithm (LMA). This
 * training technique is based on the mathematical technique of the same name.
 * 
 * http://en.wikipedia.org/wiki/Levenberg%E2%80%93Marquardt_algorithm
 * 
 * The LMA training technique has some important limitations that you should be
 * aware of, before using it.
 * 
 * Only neural networks that have a single output neuron can be used with this
 * training technique.
 * 
 * The entire training set must be loaded into memory. Because of this an
 * Indexable training set must be used.
 * 
 * However, despite these limitations, the LMA training technique can be a very
 * effective training method.
 * 
 * References: - http://www-alg.ist.hokudai.ac.jp/~jan/alpha.pdf -
 * http://www.inference.phy.cam.ac.uk/mackay/Bayes_FAQ.html
 * ----------------------------------------------------------------
 * 
 * This implementation of the Levenberg Marquardt algorithm is based heavily on code
 * published in an article by Cesar Roberto de Souza.  The original article can be
 * found here:
 * 
 * http://crsouza.blogspot.com/2009/11/neural-network-learning-by-levenberg_18.html
 * 
 * Portions of this class are under the following copyright/license.
 * Copyright 2009 by Cesar Roberto de Souza, Released under the LGPL.
 * 
 */
public class LevenbergMarquardtTraining extends BasicTraining {


  /**
   * The amount to scale the lambda by.
   */
  public static final double SCALE_LAMBDA = 10.0;


  /**
   * The max amount for the LAMBDA.
   */
  public static final double LAMBDA_MAX = 1e25;


  /**
   * Number of points for finite difference.
   */
  public final static int NUM_POINTS = 3;


  /**
   * Return the sum of the diagonal.
   * 
   * @param m
   *            The matrix to sum.
   * @return The trace of the matrix.
   */
  public static double trace(final double[][] m) {
    double result = 0.0;
    for (int i = 0; i < m.length; i++) {
      result += m[i][i];
    }
    return result;
  }


  /**
   * The network that is to be trained.
   */
  private final BasicNetwork network;


  /**
   * The training set that we are using to train.
   */
  private final MLDataSet indexableTraining;


  /**
   * The training set length.
   */
  private final int trainingLength;


  /**
   * The number of "parameters" in the LMA algorithm. The parameters are what
   * the LMA adjusts to achieve the desired outcome. For neural network
   * optimization, the parameters are the weights and bias values.
   */
  private final int parametersLength;


  /**
   * The neural network weights and bias values.
   */
  private double[] weights;


  /**
   * The "hessian" matrix, used by the LMA.
   */
  private final Matrix hessianMatrix;


  /**
   * The jacobian, a matrix of partial derivatives.
   */
  private final double[][] jacobian;


  /**
   * The "hessian" matrix as a 2d array.
   */
  private final double[][] hessian;


  /**
   * The beta is multiplied by the sum squared of the errors.
   */
  private final double beta;


  /**
   * The lambda, or damping factor. This is increased until a desirable
   * adjustment is found.
   */
  private double lambda;


  /**
   * The calculated gradients.
   */
  private final double[] gradient;


  /**
   * The diagonal of the hessian.
   */
  private final double[] diagonal;


  /**
   * The amount to change the weights by.
   */
  private double[] deltas;


  /**
   * Gamma, used for Bayesian regularization.
   */
  private double gamma;


  /**
   * The training elements.
   */
  private final MLDataPair pair;


  /**
   * The derivative step size for finite difference.
   */
  private final double[] derivativeStepSize;
  
  /**
   * The coefficients for finite difference.
   */
  private final double[][][] differentialCoefficients;
  
  /**
   * The derivitive step size.
   */
  private final double DERIV_STEP = 1e-2;
  
  /**
   * The errors.
   */
  private final double[] errors;


  /**
   * Construct the LMA object.
   * 
   * @param network
   *            The network to train. Must have a single output neuron.
   * @param training
   *            The training data to use. Must be indexable.
   */
  public LevenbergMarquardtTraining(final BasicNetwork network,
      final MLDataSet training) {
    super(TrainingImplementationType.Iterative);
    ValidateNetwork.validateMethodToData(network, training);
    if (network.getOutputCount() != 1) {
      throw new TrainingError(
          "Levenberg Marquardt requires an output layer with a single neuron.");
    }


    setTraining(training);
    this.indexableTraining = getTraining();
    this.network = network;
    this.trainingLength = (int) this.indexableTraining.getRecordCount();
    this.parametersLength = this.network.getStructure().calculateSize();
    this.hessianMatrix = new Matrix(this.parametersLength,
        this.parametersLength);
    this.hessian = this.hessianMatrix.getData();
    this.beta = 1.0;
    this.lambda = 0.1;
    this.deltas = new double[this.parametersLength];
    this.gradient = new double[this.parametersLength];
    this.diagonal = new double[this.parametersLength];
    this.errors = new double[this.trainingLength];
    this.jacobian = new double[this.trainingLength][this.parametersLength];


    final BasicMLData input = new BasicMLData(
        this.indexableTraining.getInputSize());
    final BasicMLData ideal = new BasicMLData(
        this.indexableTraining.getIdealSize());
    this.pair = new BasicMLDataPair(input, ideal);


    // setup coefficient arrays for finite difference method
    // create differential coefficient arrays
    this.differentialCoefficients = CreateCoefficients(LevenbergMarquardtTraining.NUM_POINTS);
    this.derivativeStepSize = new double[this.parametersLength];


    // initialize arrays
    for (int i = 0; i < this.parametersLength; i++) {
      this.derivativeStepSize[i] = this.DERIV_STEP;
    }
  }


  /**
   * Calculate the Hessian matrix.
   */
  public void calculateHessian() {
    for (int i = 0; i < this.parametersLength; i++) {
      // Compute Jacobian Matrix Errors
      double s = 0.0;
      for (int j = 0; j < this.trainingLength; j++) {
        s += this.jacobian[j][i] * this.errors[j];
      }
      this.gradient[i] = s;


      // Compute Quasi-Hessian Matrix using Jacobian (H = J'J)
      for (int j = 0; j < this.parametersLength; j++) {
        double c = 0.0;
        for (int k = 0; k < this.trainingLength; k++) {
          c += this.jacobian[k][i] * this.jacobian[k][j];
        }
        this.hessian[i][j] = this.beta * c;
      }
    }


    for (int i = 0; i < this.parametersLength; i++) {
      this.diagonal[i] = this.hessian[i][i];
    }
  }


  /**
   * Calculate the sum squared of the weights.
   * 
   * @return The sum squared of the weights.
   */
  private double calculateSumOfSquaredWeights() {
    double result = 0;


    for (final double weight : this.weights) {
      result += weight * weight;
    }


    return result / 2.0;
  }


  @Override
  public boolean canContinue() {
    return false;
  }


  private double computeDerivative(final MLData inputData, final int layer,
      final int neuron, final int weight, final double[] stepSize,
      final double networkOutput, final int jj) {
    final int numPoints = this.differentialCoefficients.length;
    double ret = 0.0;
    double originalValue;


    // Saves a copy of the original value in the neuron
    originalValue = this.network.getWeight(layer, weight, neuron);


    final double[] points = new double[numPoints];


    if (originalValue != 0.0) {
      stepSize[jj] = this.DERIV_STEP * Math.abs(originalValue);
    } else {
      stepSize[jj] = this.DERIV_STEP;
    }


    final int centerPoint = (numPoints - 1) / 2;


    for (int i = 0; i < numPoints; i++) {
      if (i != centerPoint) {
        final double newValue = originalValue + ((i - centerPoint))
            * stepSize[jj];


        this.network.setWeight(layer, weight, neuron, newValue);


        final MLData output = this.network.compute(inputData);
        points[i] = output.getData(0);
      } else {
        points[i] = networkOutput;
      }
    }


    ret = 0.0;
    for (int i = 0; i < this.differentialCoefficients.length; i++) {
      ret += this.differentialCoefficients[centerPoint][1][i] * points[i];
    }


    ret /= Math.pow(stepSize[jj], 1);


    // Changes back the modified value
    this.network.setWeight(layer, weight, neuron, originalValue);


    return ret;
  }


  private double[][][] CreateCoefficients(final int points) {
    final double[][][] coefficients = new double[points][points][points];


    for (int i = 0; i < points; i++) {
      final Matrix delts = new Matrix(points, points);
      final double[][] ptr = delts.getData();


      for (int j = 0; j < points; j++) {
        final double delt = (j - i);
        double hterm = 1.0;


        for (int k = 0; k < points; k++) {
          ptr[j][k] = hterm / EncogMath.factorial(k);
          hterm *= delt;
        }
      }


      final Matrix invMatrix = delts.inverse();
      final double dNumPointsFactorial = EncogMath.factorial(points);


      for (int j = 0; j < points; j++) {
        for (int k = 0; k < points; k++) {
          coefficients[i][j][k] = (Math
              .round(invMatrix.getData()[j][k]
                  * dNumPointsFactorial))
              / dNumPointsFactorial;
        }
      }
    }


    return coefficients;
  }


  /**
   * @return The trained network.
   */
  @Override
  public MLMethod getMethod() {
    return this.network;
  }


  /**
   * Perform one iteration.
   */
  @Override
  public void iteration() {


    LUDecomposition decomposition = null;
    preIteration();


    this.weights = NetworkCODEC.networkToArray(this.network);


    double sumOfSquaredErrors = jacobianByFiniteDifference();


    // this.setError(j.getError());
    calculateHessian();


    // Define the objective function
    // bayesian regularization objective function
    final double objective = this.beta * sumOfSquaredErrors;
    double current = objective + 1.0;


    // Start the main Levenberg-Macquardt method
    this.lambda /= LevenbergMarquardtTraining.SCALE_LAMBDA;


    // We'll try to find a direction with less error
    // (or where the objective function is smaller)
    while ((current >= objective)
        && (this.lambda < LevenbergMarquardtTraining.LAMBDA_MAX)) {
      this.lambda *= LevenbergMarquardtTraining.SCALE_LAMBDA;


      // Update diagonal (Levenberg-Marquardt formula)
      for (int i = 0; i < this.parametersLength; i++) {
        this.hessian[i][i] = this.diagonal[i] + this.lambda;
      }


      // Decompose to solve the linear system
      decomposition = new LUDecomposition(this.hessianMatrix);


      // Check if the Jacobian has become non-invertible
      if (!decomposition.isNonsingular()) {
        continue;
      }


      // Solve using LU (or SVD) decomposition
      this.deltas = decomposition.Solve(this.gradient);


      // Update weights using the calculated deltas
      updateWeights();


      // Calculate the new error
      sumOfSquaredErrors = 0.0;
      for (int i = 0; i < this.trainingLength; i++) {
        this.indexableTraining.getRecord(i, this.pair);
        final MLData actual = this.network
            .compute(this.pair.getInput());
        final double e = this.pair.getIdeal().getData(0)
            - actual.getData(0);
        sumOfSquaredErrors += e * e;
      }
      sumOfSquaredErrors /= 2.0;


      // Update the objective function
      current = this.beta * sumOfSquaredErrors;


      // If the object function is bigger than before, the method
      // is tried again using a greater dumping factor.
    }


    // If this iteration caused a error drop, then next iteration
    // will use a smaller damping factor.
    this.lambda /= LevenbergMarquardtTraining.SCALE_LAMBDA;


    setError(sumOfSquaredErrors);


    postIteration();
  }


  /// <summary>
  ///   Calculates the Jacobian Matrix using Finite Differences
  /// </summary>
  /// <returns>Returns the sum of squared errors of the network divided by 2.</returns>
  private double jacobianByFiniteDifference() {
    double e;
    double sumOfSquaredErrors = 0;
    getTraining().getRecordCount();


    int ji = 0;


    // foreach training vector
    for (final MLDataPair pair : getTraining()) {
      final MLData networkOutput = this.network.compute(pair.getInput());


      // Calculate network error to build the residuals vector
      e = pair.getIdeal().getData(0) - networkOutput.getData(0);
      this.errors[ji] = e;
      sumOfSquaredErrors += e * e;


      // Computation of one of the Jacobian Matrix rows by nummerical differentiation:
      // for each weight wj in the network, we have to compute its partial
      //   derivative to build the jacobian matrix.
      int jj = 0;


      // So, for each layer:
      for (int layer = this.network.getLayerCount() - 1; layer > 0; layer--) {
        // for each neuron:
        for (int neuron = 0; neuron < this.network
            .getLayerNeuronCount(layer); neuron++) {
          // for each weight:
          for (int weight = 0; weight < this.network
              .getLayerTotalNeuronCount(layer - 1); weight++) {
            // Compute its partial derivative
            this.jacobian[ji][jj] = computeDerivative(
                pair.getInput(), layer - 1, neuron, weight,
                this.derivativeStepSize,
                networkOutput.getData(0), jj);
            jj++;
          }
        }
      }


      ji++;
    }


    // returns the sum of squared errors / 2
    return sumOfSquaredErrors / 2.0;
  }


  /**
   * {@inheritDoc}
   */
  @Override
  public TrainingContinuation pause() {
    return null;
  }


  /**
   * {@inheritDoc}
   */
  @Override
  public void resume(final TrainingContinuation state) {


  }


  /**
   * Update the weights.
   * 
   * @return The sum squared of the weights.
   */
  public void updateWeights() {
    final double[] w = this.weights.clone();


    for (int i = 0; i < w.length; i++) {
      w[i] += this.deltas[i];
    }


    NetworkCODEC.arrayToNetwork(w, this.network);
  }
}
Source Code of org.encog.neural.networks.training.lma.LevenbergMarquardtTraining

Related Classes of org.encog.neural.networks.training.lma.LevenbergMarquardtTraining