/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package rnaopencl;
import com.nativelibs4java.opencl.CLBuffer;
import com.nativelibs4java.opencl.CLContext;
import com.nativelibs4java.opencl.CLEvent;
import com.nativelibs4java.opencl.CLKernel;
import com.nativelibs4java.opencl.CLMem;
import com.nativelibs4java.opencl.CLProgram;
import com.nativelibs4java.opencl.CLQueue;
import com.nativelibs4java.opencl.JavaCL;
import com.nativelibs4java.util.IOUtils;
import com.nativelibs4java.util.NIOUtils;
import java.io.File;
import java.io.IOException;
import java.nio.ByteOrder;
import java.nio.DoubleBuffer;
import java.util.ArrayList;
/**
*
* @author matheuscas
*/
public class RNAOpencl2 {
public static void main(String[] args) throws IOException {
//leitura dos dados de entrada
double[][] entradas = FuncoesCPU.lerArquivoEntradas(Param.nomeArquivo);
//configuracao do opencl via JavaCL
CLContext context = JavaCL.createBestContext();
CLQueue queue = context.createDefaultQueue();
ByteOrder byteOrder = context.getByteOrder();
ArrayList<CLBuffer<Double>> clBufferEntradas = new ArrayList<CLBuffer<Double>>(entradas.length);
for (int i = 0; i < entradas.length; i++) {
DoubleBuffer dBufferEntrada = NIOUtils.directDoubles(entradas[i].length, byteOrder);
FuncoesGPU.preencheBuffer(dBufferEntrada, entradas[i]);
clBufferEntradas.add(context.createDoubleBuffer(CLMem.Usage.Input, dBufferEntrada, true));
}
CLBuffer<Double> clBufferEntrada = null;
//leitura do arquivo cl e compilacao do programa
String src = IOUtils.readText(new File("matvec.cl"));
CLProgram program = context.createProgram(src);
//CLKernel kernel = null;
//CLEvent prodEvt = null;
CLKernel kernelProdEscalar = program.createKernel("prod_escalar");
CLKernel kernelS2 = program.createKernel("s2");
CLKernel kernelS1 = program.createKernel("s1");
CLKernel kernelAtualizaPesos3 = program.createKernel("atualiza_pesos_3");
CLKernel kernelAtualizaPesos2 = program.createKernel("atualiza_pesos_2");
CLKernel kernelAtualizaPesos1 = program.createKernel("atualiza_pesos_1");
//----------------------------VARIAVEIS DA 1a CAMADA
int qtdNeuronios_1 = 12;
//gerado como vetor para facilitar o uso no kernel
double[] pesos_1 = FuncoesCPU.gerarVetorAleatorio(qtdNeuronios_1 * qtdNeuronios_1, Param.min, Param.max);
double[] pesos_1_bias = FuncoesCPU.gerarVetorAleatorio(qtdNeuronios_1, Param.min, Param.max);
//double[] saida_camada_1 = new double[qtdNeuronios_1];
DoubleBuffer dBufferPesos1 = NIOUtils.directDoubles(pesos_1.length * pesos_1.length, byteOrder);
DoubleBuffer dBufferPesosBias1 = NIOUtils.directDoubles(pesos_1_bias.length, byteOrder);
FuncoesGPU.preencheBuffer(dBufferPesos1, pesos_1);
FuncoesGPU.preencheBuffer(dBufferPesosBias1, pesos_1_bias);
CLBuffer<Double> clBufferPesos1 = context.createDoubleBuffer(CLMem.Usage.InputOutput, dBufferPesos1, true);
CLBuffer<Double> clBufferPesosBias1 = context.createDoubleBuffer(CLMem.Usage.InputOutput, dBufferPesosBias1, true);
CLBuffer<Double> clBufferSaida1 = context.createDoubleBuffer(CLMem.Usage.InputOutput, qtdNeuronios_1);
//----------------------------VARIAVEIS DA 2a CAMADA
int qtdNeuronios_2 = 6;
double[] pesos_2 = FuncoesCPU.gerarVetorAleatorio(qtdNeuronios_2 * qtdNeuronios_1, Param.min, Param.max);
double[] pesos_2_bias = FuncoesCPU.gerarVetorAleatorio(qtdNeuronios_2, Param.min, Param.max);
//double[] saida_camada_2 = new double[qtdNeuronios_2];
DoubleBuffer dBufferPesos2 = NIOUtils.directDoubles(qtdNeuronios_2 * qtdNeuronios_1, byteOrder);
DoubleBuffer dBufferPesosBias2 = NIOUtils.directDoubles(pesos_2_bias.length, byteOrder);
FuncoesGPU.preencheBuffer(dBufferPesos2, pesos_2);
FuncoesGPU.preencheBuffer(dBufferPesosBias2, pesos_2_bias);
CLBuffer<Double> clBufferPesos2 = context.createDoubleBuffer(CLMem.Usage.InputOutput, dBufferPesos2, true);
CLBuffer<Double> clBufferPesosBias2 = context.createDoubleBuffer(CLMem.Usage.InputOutput, dBufferPesosBias2, true);
CLBuffer<Double> clBufferSaida2 = context.createDoubleBuffer(CLMem.Usage.InputOutput, qtdNeuronios_2);
//----------------------------VARIAVEIS DA 3a CAMADA
int qtdNeuronios_3 = 1;
double[] dvPesos3 = FuncoesCPU.gerarVetorAleatorio(qtdNeuronios_3 * qtdNeuronios_2, Param.min, Param.max);
double[] dvPesosBias3 = FuncoesCPU.gerarVetorAleatorio(qtdNeuronios_3, Param.min, Param.max);
DoubleBuffer dBufferPesos3 = NIOUtils.directDoubles(qtdNeuronios_3 * qtdNeuronios_2, byteOrder);
DoubleBuffer dBufferPesosBias3 = NIOUtils.directDoubles(dvPesosBias3.length, byteOrder);
FuncoesGPU.preencheBuffer(dBufferPesos3, dvPesos3);
FuncoesGPU.preencheBuffer(dBufferPesosBias3, dvPesosBias3);
CLBuffer<Double> clBufferPesos3 = context.createDoubleBuffer(CLMem.Usage.InputOutput, dBufferPesos3, true);
CLBuffer<Double> clBufferPesosBias3 = context.createDoubleBuffer(CLMem.Usage.InputOutput, dBufferPesosBias3, true);
CLBuffer<Double> clBufferSaida3 = context.createDoubleBuffer(CLMem.Usage.Output, qtdNeuronios_3);
// VARIAVEIS DO BACKPROPAGATION
CLBuffer<Double> clBufferS2 = context.createDoubleBuffer(CLMem.Usage.InputOutput, qtdNeuronios_2);
CLBuffer<Double> clBufferS1 = context.createDoubleBuffer(CLMem.Usage.InputOutput, qtdNeuronios_1);
double dSaidaFinal = 0.0;
double erro = 0.0;
double s3 = 0.0;
int epocas = 1000;
int tamanhoTreinamento = (int) (entradas.length * 0.85);
int indiceTeste = tamanhoTreinamento;
long init = System.currentTimeMillis();
double percentualErro = 0.0;
for (int epoca = 0; epoca < epocas; epoca++) {
for (int e = 0; e < tamanhoTreinamento; e++) {
//TODO possivel ponto de latencia. Para toda entrada vai ter outro
// 'for' somente para preencher o buffer.
//DoubleBuffer dBufferEntrada = NIOUtils.directDoubles(entradas[e].length, byteOrder);
//FuncoesGPU.preencheBuffer(dBufferEntrada, entradas[e]);
//CLBuffer<Double> clBufferEntrada = context.createDoubleBuffer(CLMem.Usage.Input, dBufferEntrada, true);
clBufferEntrada = clBufferEntradas.get(0);
//kernel = program.createKernel("prod_escalar");
/*args
* input
* pesos
* pesos bias
* result
* quantidade de neuronios
* quantidade pesos por neuronio
*/
// PRIMEIRA CAMADA
kernelProdEscalar.setArgs(clBufferEntrada, clBufferPesos1, clBufferPesosBias1,
clBufferSaida1, qtdNeuronios_1, qtdNeuronios_1);
//aqui diz quantos work itens trabalharao para executar o kernel
CLEvent prodEvt = kernelProdEscalar.enqueueNDRange(queue, new int[]{qtdNeuronios_1});
//faz a leitura do 'result'
//DoubleBuffer dBufferResSaida1 = clBufferSaida1.read(queue,prodEvt);
// SEGUNDA CAMADA
clBufferSaida1 = context.createDoubleBuffer(CLMem.Usage.Input,
(DoubleBuffer) clBufferSaida1.read(queue, prodEvt), true);
//TODO esta passando mais neuronios do que tem. Verica depois
//pois nos testes funcionou. Tanto nos argumentos quanto no kernel
kernelProdEscalar.setArgs(clBufferSaida1, clBufferPesos2, clBufferPesosBias2,
clBufferSaida2, qtdNeuronios_1, qtdNeuronios_1);
prodEvt = kernelProdEscalar.enqueueNDRange(queue, new int[]{qtdNeuronios_1});
//DoubleBuffer dBufferResSaida2 = clBufferSaida2.read(queue, prodEvt);
//TERCEIRA CAMADA
clBufferSaida2 = context.createDoubleBuffer(CLMem.Usage.Input,
(DoubleBuffer) clBufferSaida2.read(queue, prodEvt), true);
//TODO esta passando mais neuronios do que tem. Verica depois
//pois nos testes funcionou. Tanto nos argumentos quanto no kernel
kernelProdEscalar.setArgs(clBufferSaida2, clBufferPesos3, clBufferPesosBias3,
clBufferSaida3, qtdNeuronios_2, qtdNeuronios_2);
prodEvt = kernelProdEscalar.enqueueNDRange(queue, new int[]{qtdNeuronios_2});
//DoubleBuffer dBufferSaidaFinal = clBufferSaida3.read(queue,prodEvt);
// BACKPROPAGATION
dSaidaFinal = ((DoubleBuffer) clBufferSaida3.read(queue, prodEvt)).get(0);
erro = Param.target - dSaidaFinal;
percentualErro = Math.abs((erro / Param.target) * 100);
s3 = -2 * FuncoesCPU.derivativeSigmoid(dSaidaFinal) * erro;
//kernel = program.createKernel("s2");
kernelS2.setArgs(clBufferPesos3, dSaidaFinal, erro,
clBufferSaida2, clBufferS2);
prodEvt = kernelS2.enqueueNDRange(queue, new int[]{qtdNeuronios_2});
//DoubleBuffer dBufferResS2 = clBufferS2.read(queue,prodEvt);
clBufferS2 = context.createDoubleBuffer(CLMem.Usage.InputOutput,
(DoubleBuffer) clBufferS2.read(queue, prodEvt), true);
//kernel = program.createKernel("s1");
kernelS1.setArgs(clBufferPesos2,
clBufferS2,
clBufferSaida1,
clBufferS1,
qtdNeuronios_2);
prodEvt = kernelS1.enqueueNDRange(queue, new int[]{qtdNeuronios_1});
//DoubleBuffer dBufferResS1 = clBufferS1.read(queue,prodEvt);
clBufferS1 = context.createDoubleBuffer(CLMem.Usage.InputOutput,
(DoubleBuffer) clBufferS1.read(queue, prodEvt), true);
//ATUALIZANDO OS PESOS
//CAMADA 3
//kernel = program.createKernel("atualiza_pesos_3");
kernelAtualizaPesos3.setArgs(clBufferPesos3, clBufferPesos2, dSaidaFinal,
erro, Param.taxaAprendizado);
prodEvt = kernelAtualizaPesos3.enqueueNDRange(queue, new int[]{qtdNeuronios_2});
//DoubleBuffer dBufferResPesos3 = clBufferPesos3.read(queue,prodEvt);
clBufferPesos3 = context.createDoubleBuffer(CLMem.Usage.InputOutput,
(DoubleBuffer) clBufferPesos3.read(queue, prodEvt), true);
dBufferPesosBias3.put(0, dBufferPesosBias3.get(0) - (Param.taxaAprendizado * s3 * 1));
clBufferPesosBias3 = context.createDoubleBuffer(CLMem.Usage.InputOutput, dBufferPesosBias3, true);
//CAMADA 2
//kernel = program.createKernel("atualiza_pesos_2");
kernelAtualizaPesos2.setArgs(clBufferPesos2, clBufferS2, clBufferSaida1, qtdNeuronios_1,
Param.taxaAprendizado, clBufferPesosBias2);
prodEvt = kernelAtualizaPesos2.enqueueNDRange(queue, new int[]{qtdNeuronios_2});
//DoubleBuffer dBufferResPesos2 = clBufferPesos2.read(queue,prodEvt);
//DoubleBuffer dBufferResPesosBias2 = clBufferPesosBias2.read(queue,prodEvt);
clBufferPesos2 = context.createDoubleBuffer(CLMem.Usage.InputOutput,
(DoubleBuffer) clBufferPesos2.read(queue, prodEvt), true);
clBufferPesosBias2 = context.createDoubleBuffer(CLMem.Usage.InputOutput,
(DoubleBuffer) clBufferPesosBias2.read(queue, prodEvt), true);
//CAMADA 3
//kernel = program.createKernel("atualiza_pesos_1");
kernelAtualizaPesos1.setArgs(clBufferPesos1, clBufferS1, clBufferEntrada,
qtdNeuronios_1, Param.taxaAprendizado, clBufferPesosBias1);
prodEvt = kernelAtualizaPesos1.enqueueNDRange(queue, new int[]{qtdNeuronios_1});
//DoubleBuffer dBufferResPesos1 = clBufferPesos1.read(queue,prodEvt);
//DoubleBuffer dBufferResPesosBias1 = clBufferPesosBias1.read(queue,prodEvt);
clBufferPesos1 = context.createDoubleBuffer(CLMem.Usage.InputOutput,
(DoubleBuffer) clBufferPesos1.read(queue, prodEvt), true);
clBufferPesosBias1 = context.createDoubleBuffer(CLMem.Usage.InputOutput,
(DoubleBuffer) clBufferPesosBias1.read(queue, prodEvt), true);
}
}
System.out.println("TESTE");
for (int e = indiceTeste; e < entradas.length; e++) {
// PRIMEIRA CAMADA
kernelProdEscalar.setArgs(clBufferEntrada, clBufferPesos1, clBufferPesosBias1,
clBufferSaida1, qtdNeuronios_1, qtdNeuronios_1);
//aqui diz quantos work itens trabalharao para executar o kernel
CLEvent prodEvt = kernelProdEscalar.enqueueNDRange(queue, new int[]{qtdNeuronios_1});
//faz a leitura do 'result'
//DoubleBuffer dBufferResSaida1 = clBufferSaida1.read(queue,prodEvt);
// SEGUNDA CAMADA
clBufferSaida1 = context.createDoubleBuffer(CLMem.Usage.Input,
(DoubleBuffer) clBufferSaida1.read(queue, prodEvt), true);
//TODO esta passando mais neuronios do que tem. Verica depois
//pois nos testes funcionou. Tanto nos argumentos quanto no kernel
kernelProdEscalar.setArgs(clBufferSaida1, clBufferPesos2, clBufferPesosBias2,
clBufferSaida2, qtdNeuronios_1, qtdNeuronios_1);
prodEvt = kernelProdEscalar.enqueueNDRange(queue, new int[]{qtdNeuronios_1});
//DoubleBuffer dBufferResSaida2 = clBufferSaida2.read(queue, prodEvt);
//TERCEIRA CAMADA
clBufferSaida2 = context.createDoubleBuffer(CLMem.Usage.Input,
(DoubleBuffer) clBufferSaida2.read(queue, prodEvt), true);
//TODO esta passando mais neuronios do que tem. Verica depois
//pois nos testes funcionou. Tanto nos argumentos quanto no kernel
kernelProdEscalar.setArgs(clBufferSaida2, clBufferPesos3, clBufferPesosBias3,
clBufferSaida3, qtdNeuronios_2, qtdNeuronios_2);
prodEvt = kernelProdEscalar.enqueueNDRange(queue, new int[]{qtdNeuronios_2});
//DoubleBuffer dBufferSaidaFinal = clBufferSaida3.read(queue,prodEvt);
// BACKPROPAGATION
dSaidaFinal = ((DoubleBuffer) clBufferSaida3.read(queue, prodEvt)).get(0);
System.out.println(dSaidaFinal);
}
System.out.println("");
long elapsed = System.currentTimeMillis() - init;
System.out.println("Elapsed time in millis: " + elapsed);
System.out.println("Percentual de erro final: " + percentualErro);
//long init = System.currentTimeMillis();
/*double[] temp = new double[qtdNeuronios_1];
int pos = 0;
for(int i = 0; i < pesos_1.length; i++){
if(i != 0 && i % 12 == 0){
saida_camada_1[pos] = FuncoesCPU.produtoEscalar(entradas[0],temp);
saida_camada_1[pos] = FuncoesCPU.sigmoid(saida_camada_1[pos] + (Param.bias * pesos_1_bias[pos]));
pos++;
}
temp[i % 12] = pesos_1[i];
if(i == pesos_1.length - 1){
saida_camada_1[pos] = FuncoesCPU.produtoEscalar(entradas[0],temp);
saida_camada_1[pos] = FuncoesCPU.sigmoid(saida_camada_1[pos] + (Param.bias * pesos_1_bias[pos]));
pos++;
}
}
System.out.println("CPU");
for (int i = 0; i < saida_camada_1.length; i++) {
System.out.print(saida_camada_1[i]);
System.out.print(",");
}
System.out.println("");
System.out.println("");
pos = 0;
temp = new double[qtdNeuronios_1];
for(int i = 0; i < pesos_2.length; i++){
if(i != 0 && i % 12 == 0){
saida_camada_2[pos] = FuncoesCPU.produtoEscalar(saida_camada_1,temp);
saida_camada_2[pos] = FuncoesCPU.sigmoid(saida_camada_2[pos] + (Param.bias * pesos_2_bias[pos]));
pos++;
}
temp[i % 12] = pesos_2[i];
if(i == pesos_2.length - 1){
saida_camada_2[pos] = FuncoesCPU.produtoEscalar(saida_camada_1,temp);
saida_camada_2[pos] = FuncoesCPU.sigmoid(saida_camada_2[pos] + (Param.bias * pesos_2_bias[pos]));
pos++;
}
}
System.out.println("CPU");
for (int i = 0; i < saida_camada_2.length; i++) {
System.out.print(saida_camada_2[i]);
System.out.print(",");
}
System.out.println("");
System.out.println("");
double saidaFinal = 0.0;
for(int i = 0; i < saida_camada_2.length; i++)
saidaFinal = saidaFinal + (saida_camada_2[i] * pesos_3[i]);
saidaFinal = FuncoesCPU.sigmoid(saidaFinal + (Param.bias * pesos_3_bias[0]));
System.out.println("CPU");
System.out.println(saidaFinal);
System.out.println("");
System.out.println("");*/
/*for(int i = 0; i < qtdNeuronios_1;i++){
System.out.print(buffer_v4.get(i));
System.out.print(",");
}
System.out.println("");
System.out.println("");*/
/*for(int i = 0; i < qtdNeuronios_2;i++){
System.out.print(buffer_v5.get(i));
System.out.print(",");
}
System.out.println("");
System.out.println("");*/
/*for(int i = 0; i < qtdNeuronios_3;i++){
System.out.println(buffer_v6.get(i));
}*/
//--------------------->>>>>>>>> Propagacao dos erros
//double erro = Param.target - buffer_saida_final.get(0);
//double s1[] = new double[qtdNeuronios_1];
//double s2[] = new double[qtdNeuronios_2];
//double s3 = -2 * FuncoesCPU.derivativeSigmoid(buffer_saida_final.get(0)) * erro;
/*for (int i = 0; i < s2.length; i++) {
s2[i] = (pesos_3[i] * s3) * FuncoesCPU.derivativeSigmoid(buffer_v5.get(i));
//System.out.print(s2[i]);
//System.out.print(",");
}*/
//System.out.println("");
/*for(int i = 0; i < bufferS2.limit(); i++){
System.out.print(bufferS2.get(i));
System.out.print(",");
}
System.out.println("");*/
/*double delta = 0.0;
for (int i = 0; i < qtdNeuronios_1; i++) {
delta = 0.0;
for (int j = 0; j < qtdNeuronios_2; j++) {
delta = delta + (pesos_2[i + (j * qtdNeuronios_2)] * s2[j]);
}
s1[i] = FuncoesCPU.derivativeSigmoid(buffer_v4.get(i)) * delta;
System.out.print(s1[i]);
System.out.print(",");
}*/
//System.out.println("");
/*for(int i = 0; i < bufferS1.limit(); i++){
System.out.print(bufferS1.get(i));
System.out.print(",");
}
System.out.println("");*/
//atualizando os pesos
//ultima camada - 3
/*for (int i = 0; i < dvPesos3.length; i++) {
dvPesos3[i] = dvPesos3[i] - (buffer_v5.get(i) * Param.taxaAprendizado * s3);
//System.out.print(pesos_3[i]);
//System.out.print(",");
}
//System.out.println("");
dvPesosBias3[0] = dvPesosBias3[0] - (Param.taxaAprendizado * s3 * 1);*/
//System.out.println(pesos_3_bias[0]);
/*for(int i = 0; i < novosPesos3.limit(); i++){
System.out.print(novosPesos3.get(i));
System.out.print(",");
}*/
//atualizando a camada 2
/*for (int i = 0; i < qtdNeuronios_2; i++) {
for (int j = 0; j < qtdNeuronios_1; j++) {
pesos_2[(i * qtdNeuronios_1) + j] = pesos_2[(i * qtdNeuronios_1) + j] -
(Param.taxaAprendizado * bufferS2.get(i) * buffer_v4.get(i));
}
}
for (int i = 0; i < pesos_2_bias.length; i++) {
pesos_2_bias[i] = pesos_2_bias[i] - (Param.taxaAprendizado * bufferS2.get(i));
}*/
/*System.out.println(FuncoesCPU.comparaVetores(pesos_2,novosPesos2));
System.out.println(FuncoesCPU.comparaVetores(pesos_2_bias,novosPesos2Bias));
System.out.println("");*/
//atualizando a camada 1
/*for (int i = 0; i < qtdNeuronios_1; i++) {
for (int j = 0; j < qtdNeuronios_1; j++) {
pesos_1[(i * qtdNeuronios_1) + j] = pesos_1[(i * qtdNeuronios_1) + j] -
(Param.taxaAprendizado * bufferS1.get(i) * entradas[0][i]);
}
}
for (int i = 0; i < pesos_1_bias.length; i++) {
pesos_1_bias[i] = pesos_1_bias[i] - (Param.taxaAprendizado * bufferS1.get(i));
}*/
/*System.out.println(FuncoesCPU.comparaVetores(pesos_1,novosPesos1));
System.out.println(FuncoesCPU.comparaVetores(pesos_1_bias,novosPesos1Bias));
System.out.println("");*/
}
}