protected void gradientUpdateMiniBatch(SGDTrainConfig config, DoubleMatrix x_samples, DoubleMatrix y_samples, SGDParam curr_param) {
int nbr_sample = x_samples.rows;
BPParam curr_pbparam = (BPParam)curr_param;
DoubleMatrix[] activation = new DoubleMatrix[curr_pbparam.nl];
DoubleMatrix[] l_bias = new DoubleMatrix[curr_pbparam.nl];
DoubleMatrix avg_hidden = null;
/**
* feedforward
*/
activation[0] = x_samples;
for(int i = 1; i < curr_pbparam.nl; i++) {
activation[i] = activation[i - 1].mmul(curr_pbparam.w[i - 1].transpose()).addiRowVector(curr_pbparam.b[i - 1]);
MathUtil.sigmod(activation[i]);
}
//sparsity
if(config.isForceSparsity()) {
avg_hidden = activation[1].columnSums().divi(nbr_sample);
}
/**
* backward
*/
// 1 last layer
DoubleMatrix ai = activation[curr_pbparam.nl - 1];
l_bias[curr_pbparam.nl - 1] = ai.sub(x_samples).muli(ai).muli(ai.neg().addi(1));
//2 back
for(int i = curr_pbparam.nl - 2; i >= 1; i--) {
l_bias[i] = l_bias[i + 1].mmul(curr_pbparam.w[i]);
if(config.isForceSparsity()) {
DoubleMatrix sparsity_v = avg_hidden.dup();
for(int k = 0; k < sparsity_v.columns; k++) {
double roat = config.getSparsity();
double roat_k = sparsity_v.get(0, k);
sparsity_v.put(0, k, config.getSparsityBeta()*((1-roat)/(1-roat_k) - roat/roat_k));
}
l_bias[i].addiRowVector(sparsity_v);
}
ai = activation[i];
l_bias[i].muli(ai).muli(ai.neg().addi(1));
}
/**
* delta
*/
for(int i = 0; i < curr_pbparam.w.length; i++) {
DoubleMatrix delta_wi = l_bias[i + 1].transpose().mmul(activation[i]).divi(nbr_sample);
if(config.isUseRegularization()) {
//for bp, only use L2
if(0 != config.getLamada2()) {
delta_wi.addi(curr_pbparam.w[i].mul(config.getLamada2()));
}
}
curr_pbparam.w[i].subi(delta_wi.muli(config.getLearningRate()));
}
for(int i = 0; i < curr_pbparam.b.length; i++) {
DoubleMatrix delta_bi = l_bias[i + 1].columnSums().divi(nbr_sample);
curr_pbparam.b[i].subi(delta_bi.transpose().muli(config.getLearningRate()));
}
}