CLQueue queue = context.createDefaultQueue();
ByteOrder byteOrder = context.getByteOrder();
ArrayList<CLBuffer<Double>> clBufferEntradas = new ArrayList<CLBuffer<Double>>(entradas.length);
for (int i = 0; i < entradas.length; i++) {
DoubleBuffer dBufferEntrada = NIOUtils.directDoubles(entradas[i].length, byteOrder);
FuncoesGPU.preencheBuffer(dBufferEntrada, entradas[i]);
clBufferEntradas.add(context.createDoubleBuffer(CLMem.Usage.Input, dBufferEntrada, true));
}
CLBuffer<Double> clBufferEntrada = null;
//leitura do arquivo cl e compilacao do programa
String src = IOUtils.readText(new File("matvec.cl"));
CLProgram program = context.createProgram(src);
//CLKernel kernel = null;
//CLEvent prodEvt = null;
CLKernel kernelProdEscalar = program.createKernel("prod_escalar");
CLKernel kernelS2 = program.createKernel("s2");
CLKernel kernelS1 = program.createKernel("s1");
CLKernel kernelAtualizaPesos3 = program.createKernel("atualiza_pesos_3");
CLKernel kernelAtualizaPesos2 = program.createKernel("atualiza_pesos_2");
CLKernel kernelAtualizaPesos1 = program.createKernel("atualiza_pesos_1");
//----------------------------VARIAVEIS DA 1a CAMADA
int qtdNeuronios_1 = 12;
//gerado como vetor para facilitar o uso no kernel
double[] pesos_1 = FuncoesCPU.gerarVetorAleatorio(qtdNeuronios_1 * qtdNeuronios_1, Param.min, Param.max);
double[] pesos_1_bias = FuncoesCPU.gerarVetorAleatorio(qtdNeuronios_1, Param.min, Param.max);
//double[] saida_camada_1 = new double[qtdNeuronios_1];
DoubleBuffer dBufferPesos1 = NIOUtils.directDoubles(pesos_1.length * pesos_1.length, byteOrder);
DoubleBuffer dBufferPesosBias1 = NIOUtils.directDoubles(pesos_1_bias.length, byteOrder);
FuncoesGPU.preencheBuffer(dBufferPesos1, pesos_1);
FuncoesGPU.preencheBuffer(dBufferPesosBias1, pesos_1_bias);
CLBuffer<Double> clBufferPesos1 = context.createDoubleBuffer(CLMem.Usage.InputOutput, dBufferPesos1, true);
CLBuffer<Double> clBufferPesosBias1 = context.createDoubleBuffer(CLMem.Usage.InputOutput, dBufferPesosBias1, true);
CLBuffer<Double> clBufferSaida1 = context.createDoubleBuffer(CLMem.Usage.InputOutput, qtdNeuronios_1);
//----------------------------VARIAVEIS DA 2a CAMADA
int qtdNeuronios_2 = 6;
double[] pesos_2 = FuncoesCPU.gerarVetorAleatorio(qtdNeuronios_2 * qtdNeuronios_1, Param.min, Param.max);
double[] pesos_2_bias = FuncoesCPU.gerarVetorAleatorio(qtdNeuronios_2, Param.min, Param.max);
//double[] saida_camada_2 = new double[qtdNeuronios_2];
DoubleBuffer dBufferPesos2 = NIOUtils.directDoubles(qtdNeuronios_2 * qtdNeuronios_1, byteOrder);
DoubleBuffer dBufferPesosBias2 = NIOUtils.directDoubles(pesos_2_bias.length, byteOrder);
FuncoesGPU.preencheBuffer(dBufferPesos2, pesos_2);
FuncoesGPU.preencheBuffer(dBufferPesosBias2, pesos_2_bias);
CLBuffer<Double> clBufferPesos2 = context.createDoubleBuffer(CLMem.Usage.InputOutput, dBufferPesos2, true);
CLBuffer<Double> clBufferPesosBias2 = context.createDoubleBuffer(CLMem.Usage.InputOutput, dBufferPesosBias2, true);
CLBuffer<Double> clBufferSaida2 = context.createDoubleBuffer(CLMem.Usage.InputOutput, qtdNeuronios_2);
//----------------------------VARIAVEIS DA 3a CAMADA
int qtdNeuronios_3 = 1;
double[] dvPesos3 = FuncoesCPU.gerarVetorAleatorio(qtdNeuronios_3 * qtdNeuronios_2, Param.min, Param.max);
double[] dvPesosBias3 = FuncoesCPU.gerarVetorAleatorio(qtdNeuronios_3, Param.min, Param.max);
DoubleBuffer dBufferPesos3 = NIOUtils.directDoubles(qtdNeuronios_3 * qtdNeuronios_2, byteOrder);
DoubleBuffer dBufferPesosBias3 = NIOUtils.directDoubles(dvPesosBias3.length, byteOrder);
FuncoesGPU.preencheBuffer(dBufferPesos3, dvPesos3);
FuncoesGPU.preencheBuffer(dBufferPesosBias3, dvPesosBias3);
CLBuffer<Double> clBufferPesos3 = context.createDoubleBuffer(CLMem.Usage.InputOutput, dBufferPesos3, true);
CLBuffer<Double> clBufferPesosBias3 = context.createDoubleBuffer(CLMem.Usage.InputOutput, dBufferPesosBias3, true);
CLBuffer<Double> clBufferSaida3 = context.createDoubleBuffer(CLMem.Usage.Output, qtdNeuronios_3);
// VARIAVEIS DO BACKPROPAGATION
CLBuffer<Double> clBufferS2 = context.createDoubleBuffer(CLMem.Usage.InputOutput, qtdNeuronios_2);
CLBuffer<Double> clBufferS1 = context.createDoubleBuffer(CLMem.Usage.InputOutput, qtdNeuronios_1);
double dSaidaFinal = 0.0;
double erro = 0.0;
double s3 = 0.0;
int epocas = 1000;
int tamanhoTreinamento = (int) (entradas.length * 0.85);
int indiceTeste = tamanhoTreinamento;
long init = System.currentTimeMillis();
double percentualErro = 0.0;
for (int epoca = 0; epoca < epocas; epoca++) {
for (int e = 0; e < tamanhoTreinamento; e++) {
//TODO possivel ponto de latencia. Para toda entrada vai ter outro
// 'for' somente para preencher o buffer.
//DoubleBuffer dBufferEntrada = NIOUtils.directDoubles(entradas[e].length, byteOrder);
//FuncoesGPU.preencheBuffer(dBufferEntrada, entradas[e]);
//CLBuffer<Double> clBufferEntrada = context.createDoubleBuffer(CLMem.Usage.Input, dBufferEntrada, true);
clBufferEntrada = clBufferEntradas.get(0);
//kernel = program.createKernel("prod_escalar");
/*args
* input
* pesos
* pesos bias
* result
* quantidade de neuronios
* quantidade pesos por neuronio
*/
// PRIMEIRA CAMADA
kernelProdEscalar.setArgs(clBufferEntrada, clBufferPesos1, clBufferPesosBias1,
clBufferSaida1, qtdNeuronios_1, qtdNeuronios_1);
//aqui diz quantos work itens trabalharao para executar o kernel
CLEvent prodEvt = kernelProdEscalar.enqueueNDRange(queue, new int[]{qtdNeuronios_1});
//faz a leitura do 'result'
//DoubleBuffer dBufferResSaida1 = clBufferSaida1.read(queue,prodEvt);
// SEGUNDA CAMADA
clBufferSaida1 = context.createDoubleBuffer(CLMem.Usage.Input,
(DoubleBuffer) clBufferSaida1.read(queue, prodEvt), true);
//TODO esta passando mais neuronios do que tem. Verica depois
//pois nos testes funcionou. Tanto nos argumentos quanto no kernel
kernelProdEscalar.setArgs(clBufferSaida1, clBufferPesos2, clBufferPesosBias2,
clBufferSaida2, qtdNeuronios_1, qtdNeuronios_1);
prodEvt = kernelProdEscalar.enqueueNDRange(queue, new int[]{qtdNeuronios_1});
//DoubleBuffer dBufferResSaida2 = clBufferSaida2.read(queue, prodEvt);
//TERCEIRA CAMADA
clBufferSaida2 = context.createDoubleBuffer(CLMem.Usage.Input,
(DoubleBuffer) clBufferSaida2.read(queue, prodEvt), true);
//TODO esta passando mais neuronios do que tem. Verica depois
//pois nos testes funcionou. Tanto nos argumentos quanto no kernel
kernelProdEscalar.setArgs(clBufferSaida2, clBufferPesos3, clBufferPesosBias3,
clBufferSaida3, qtdNeuronios_2, qtdNeuronios_2);
prodEvt = kernelProdEscalar.enqueueNDRange(queue, new int[]{qtdNeuronios_2});
//DoubleBuffer dBufferSaidaFinal = clBufferSaida3.read(queue,prodEvt);
// BACKPROPAGATION
dSaidaFinal = ((DoubleBuffer) clBufferSaida3.read(queue, prodEvt)).get(0);
erro = Param.target - dSaidaFinal;
percentualErro = Math.abs((erro / Param.target) * 100);
s3 = -2 * FuncoesCPU.derivativeSigmoid(dSaidaFinal) * erro;
//kernel = program.createKernel("s2");
kernelS2.setArgs(clBufferPesos3, dSaidaFinal, erro,
clBufferSaida2, clBufferS2);
prodEvt = kernelS2.enqueueNDRange(queue, new int[]{qtdNeuronios_2});
//DoubleBuffer dBufferResS2 = clBufferS2.read(queue,prodEvt);
clBufferS2 = context.createDoubleBuffer(CLMem.Usage.InputOutput,
(DoubleBuffer) clBufferS2.read(queue, prodEvt), true);
//kernel = program.createKernel("s1");
kernelS1.setArgs(clBufferPesos2,
clBufferS2,
clBufferSaida1,
clBufferS1,
qtdNeuronios_2);
prodEvt = kernelS1.enqueueNDRange(queue, new int[]{qtdNeuronios_1});
//DoubleBuffer dBufferResS1 = clBufferS1.read(queue,prodEvt);
clBufferS1 = context.createDoubleBuffer(CLMem.Usage.InputOutput,
(DoubleBuffer) clBufferS1.read(queue, prodEvt), true);
//ATUALIZANDO OS PESOS
//CAMADA 3
//kernel = program.createKernel("atualiza_pesos_3");
kernelAtualizaPesos3.setArgs(clBufferPesos3, clBufferPesos2, dSaidaFinal,
erro, Param.taxaAprendizado);
prodEvt = kernelAtualizaPesos3.enqueueNDRange(queue, new int[]{qtdNeuronios_2});
//DoubleBuffer dBufferResPesos3 = clBufferPesos3.read(queue,prodEvt);
clBufferPesos3 = context.createDoubleBuffer(CLMem.Usage.InputOutput,
(DoubleBuffer) clBufferPesos3.read(queue, prodEvt), true);
dBufferPesosBias3.put(0, dBufferPesosBias3.get(0) - (Param.taxaAprendizado * s3 * 1));
clBufferPesosBias3 = context.createDoubleBuffer(CLMem.Usage.InputOutput, dBufferPesosBias3, true);
//CAMADA 2
//kernel = program.createKernel("atualiza_pesos_2");
kernelAtualizaPesos2.setArgs(clBufferPesos2, clBufferS2, clBufferSaida1, qtdNeuronios_1,