final int maxIterations, boolean verbose) {
double[] lastCosts = new double[COST_HISTORY];
Arrays.fill(lastCosts, Double.MAX_VALUE);
final int lastIndex = lastCosts.length - 1;
DoubleVector lastTheta = null;
DoubleVector lastGradient = null;
DoubleVector theta = pInput;
double alpha = this.alpha;
for (int iteration = 0; iteration < maxIterations; iteration++) {
CostGradientTuple evaluateCost = f.evaluateCost(theta);
if (verbose) {
LOG.info("Iteration " + iteration + " | Cost: "
+ evaluateCost.getCost());
}
shiftLeft(lastCosts);
lastCosts[lastIndex] = evaluateCost.getCost();
// break if we converged below the limit
if (converged(lastCosts, breakDifference)) {
break;
}
// break if we are going in the wrong direction
if (breakOnDivergence && ascending(lastCosts)) {
break;
}
DoubleVector gradient = evaluateCost.getGradient();
// check the bold driver
if (boldDriver) {
if (lastGradient != null) {
double costDifference = getCostDifference(lastCosts);
if (costDifference < 0) {
// we can increase, because cost decreased
alpha += (alpha * boldDecreasePercentage);
} else {
// we decrease, because cost increased
// we undo the last theta change
theta = lastTheta;
gradient = lastGradient;
alpha -= (alpha * boldIncreasePercentage);
}
if (verbose) {
LOG.info("Iteration " + iteration + " | Alpha: " + alpha + "\n");
}
}
lastGradient = gradient;
}
// check annealing
if (annealingIteration > 0) {
// always pick the initial learning rate
alpha = this.alpha / (1d + iteration / annealingIteration);
}
// save our last parameter
lastTheta = theta;
// basically subtract the gradient multiplied with the learning rate
theta = theta.subtract(gradient.multiply(alpha));
if (lastTheta != null && momentum != 0d) {
// we add momentum as the parameter "m" multiplied by the difference of
// both theta vectors
theta = theta.add((lastTheta.subtract(theta)).multiply(momentum));
}