/*
* DiabloMiner - OpenCL miner for Bitcoin
* Copyright (C) 2010, 2011, 2012 Patrick McFarland <diablod3@gmail.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package com.diablominer.DiabloMiner.DeviceState;
import java.nio.ByteBuffer;
import java.nio.IntBuffer;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.concurrent.atomic.AtomicLong;
import org.lwjgl.BufferUtils;
import org.lwjgl.LWJGLUtil;
import org.lwjgl.PointerBuffer;
import org.lwjgl.opencl.CL10;
import org.lwjgl.opencl.CL12;
import org.lwjgl.opencl.CLCommandQueue;
import org.lwjgl.opencl.CLContext;
import org.lwjgl.opencl.CLContextCallback;
import org.lwjgl.opencl.CLDevice;
import org.lwjgl.opencl.CLKernel;
import org.lwjgl.opencl.CLMem;
import org.lwjgl.opencl.CLPlatform;
import org.lwjgl.opencl.CLProgram;
import com.diablominer.DiabloMiner.DiabloMiner;
import com.diablominer.DiabloMiner.DiabloMinerFatalException;
import com.diablominer.DiabloMiner.NetworkState.WorkState;
public class GPUDeviceState extends DeviceState {
final static int OUTPUTS = 16;
final PlatformVersion platform_version;
final CLDevice device;
final CLContext context;
final CLKernel kernel;
AtomicLong workSize = new AtomicLong(0);
long workSizeBase;
boolean hwcheck;
final PointerBuffer localWorkSize = BufferUtils.createPointerBuffer(1);
final ExecutionState executions[];
AtomicLong runs = new AtomicLong(0);
long lastRuns = 0;
long startTime = 0;
long lastTime = 0;
GPUHardwareType hardwareType;
GPUDeviceState(GPUHardwareType hardwareType, String deviceName, CLPlatform platform, PlatformVersion platform_version, CLDevice device) throws DiabloMinerFatalException {
this.platform_version = platform_version;
this.hardwareType = hardwareType;
this.diabloMiner = hardwareType.getDiabloMiner();
this.deviceName = deviceName;
this.resetNetworkState = DiabloMiner.now();
this.executions = new ExecutionState[GPUHardwareType.EXECUTION_TOTAL];
boolean hasBitAlign;
boolean hasBFI_INT = false;
CLProgram program;
this.device = device;
PointerBuffer properties = BufferUtils.createPointerBuffer(3);
properties.put(CL10.CL_CONTEXT_PLATFORM).put(platform.getPointer()).put(0).flip();
int err = 0;
int deviceCU = device.getInfoInt(CL10.CL_DEVICE_MAX_COMPUTE_UNITS);
long deviceWorkSize;
int forceWorkSize = diabloMiner.getGPUForceWorkSize();
if(forceWorkSize > 0)
deviceWorkSize = forceWorkSize;
else if(LWJGLUtil.getPlatform() != LWJGLUtil.PLATFORM_MACOSX)
deviceWorkSize = device.getInfoSize(CL10.CL_DEVICE_MAX_WORK_GROUP_SIZE);
else
deviceWorkSize = 64;
context = CL10.clCreateContext(properties, device, new CLContextCallback() {
protected void handleMessage(String errinfo, ByteBuffer private_info) {
diabloMiner.error(errinfo);
}
}, null);
ByteBuffer extb = BufferUtils.createByteBuffer(1024);
CL10.clGetDeviceInfo(device, CL10.CL_DEVICE_EXTENSIONS, extb, null);
byte[] exta = new byte[1024];
extb.get(exta);
if(new String(exta).contains("cl_amd_media_ops"))
hasBitAlign = true;
else
hasBitAlign = false;
if(hasBitAlign) {
if(deviceName.contains("Cedar") || deviceName.contains("Redwood") || deviceName.contains("Juniper") || deviceName.contains("Cypress") || deviceName.contains("Hemlock") || deviceName.contains("Caicos") || deviceName.contains("Turks") || deviceName.contains("Barts") || deviceName.contains("Cayman") || deviceName.contains("Antilles") || deviceName.contains("Palm") || deviceName.contains("Sumo") || deviceName.contains("Wrestler") || deviceName.contains("WinterPark") || deviceName.contains("BeaverCreek"))
hasBFI_INT = true;
}
// String compileOptions =
// "-save-temps="+(device.getInfoString(CL10.CL_DEVICE_NAME).trim());
String compileOptions = "";
compileOptions += " -D WORKSIZE=" + deviceWorkSize;
if(hasBitAlign)
compileOptions += " -D BITALIGN";
if(hasBFI_INT)
compileOptions += " -D BFIINT";
program = CL10.clCreateProgramWithSource(context, hardwareType.getSource(), null);
err = CL10.clBuildProgram(program, device, compileOptions, null);
if(err != CL10.CL_SUCCESS) {
ByteBuffer logBuffer = BufferUtils.createByteBuffer(1024);
byte[] log = new byte[1024];
CL10.clGetProgramBuildInfo(program, device, CL10.CL_PROGRAM_BUILD_LOG, logBuffer, null);
logBuffer.get(log);
System.out.println(new String(log));
throw new DiabloMinerFatalException(diabloMiner, "Failed to build program on " + deviceName);
}
if(hasBFI_INT) {
diabloMiner.info("BFI_INT patching enabled, disabling hardware check errors");
hwcheck = false;
int binarySize = (int) program.getInfoSizeArray(CL10.CL_PROGRAM_BINARY_SIZES)[0];
ByteBuffer binary = BufferUtils.createByteBuffer(binarySize);
program.getInfoBinaries(binary);
for(int pos = 0; pos < binarySize - 4; pos++) {
if((long) (0xFFFFFFFF & binary.getInt(pos)) == 0x464C457FL && (long) (0xFFFFFFFF & binary.getInt(pos + 4)) == 0x64010101L) {
boolean firstText = true;
int offset = binary.getInt(pos + 32);
short entrySize = binary.getShort(pos + 46);
short entryCount = binary.getShort(pos + 48);
short index = binary.getShort(pos + 50);
int header = pos + offset;
int nameTableOffset = binary.getInt(header + index * entrySize + 16);
int size = binary.getInt(header + index * entrySize + 20);
int entry = header;
for(int section = 0; section < entryCount; section++) {
int nameIndex = binary.getInt(entry);
offset = binary.getInt(entry + 16);
size = binary.getInt(entry + 20);
int name = pos + nameTableOffset + nameIndex;
if((long) (0xFFFFFFFF & binary.getInt(name)) == 0x7865742E) {
if(firstText) {
firstText = false;
} else {
int sectionStart = pos + offset;
for(int i = 0; i < size / 8; i++) {
long instruction1 = (long) (0xFFFFFFFF & binary.getInt(sectionStart + i * 8));
long instruction2 = (long) (0xFFFFFFFF & binary.getInt(sectionStart + i * 8 + 4));
if((instruction1 & 0x02001000L) == 0x00000000L && (instruction2 & 0x9003F000L) == 0x0001A000L) {
instruction2 ^= (0x0001A000L ^ 0x0000C000L);
binary.putInt(sectionStart + i * 8 + 4, (int) instruction2);
}
}
}
}
entry += entrySize;
}
break;
}
}
IntBuffer binaryErr = BufferUtils.createIntBuffer(1);
CL10.clReleaseProgram(program);
program = CL10.clCreateProgramWithBinary(context, device, binary, binaryErr, null);
err = CL10.clBuildProgram(program, device, compileOptions, null);
if(err != CL10.CL_SUCCESS) {
throw new DiabloMinerFatalException(diabloMiner, "Failed to BFI_INT patch kernel on " + deviceName);
}
}
kernel = CL10.clCreateKernel(program, "search", null);
if(kernel == null) {
throw new DiabloMinerFatalException(diabloMiner, "Failed to create kernel on " + deviceName);
}
if(forceWorkSize == 0) {
ByteBuffer rkwgs = BufferUtils.createByteBuffer(8);
err = CL10.clGetKernelWorkGroupInfo(kernel, device, CL10.CL_KERNEL_WORK_GROUP_SIZE, rkwgs, null);
localWorkSize.put(0, rkwgs.getLong(0));
if(!(err == CL10.CL_SUCCESS) || localWorkSize.get(0) == 0)
localWorkSize.put(0, deviceWorkSize);
} else {
localWorkSize.put(0, forceWorkSize);
}
diabloMiner.info("Added " + deviceName + " (" + deviceCU + " CU, local work size of " + localWorkSize.get(0) + ")");
workSizeBase = 64 * 512;
workSize.set(workSizeBase * 16);
for(int i = 0; i < GPUHardwareType.EXECUTION_TOTAL; i++) {
String executorName = deviceName + "/" + i;
executions[i] = this.new GPUExecutionState(executorName);
Thread thread = new Thread(executions[i], "DiabloMiner Executor (" + executorName + ")");
thread.start();
diabloMiner.addThread(thread);
}
}
public void checkDevice() {
long now = DiabloMiner.now();
long elapsed = now - lastTime;
long currentRuns = runs.get();
double targetFPSBasis = hardwareType.getTargetFPSBasis();
int totalVectors = hardwareType.getTotalVectors();
long ws = workSize.get();
if(now > startTime + DiabloMiner.TIME_OFFSET * 2 && currentRuns > lastRuns + diabloMiner.getGPUTargetFPS()) {
basis = (double) elapsed / (double) (currentRuns - lastRuns);
if(basis < targetFPSBasis / 4)
ws += workSizeBase * 16;
else if(basis < targetFPSBasis / 2)
ws += workSizeBase * 4;
else if(basis < targetFPSBasis)
ws += workSizeBase;
else if(basis > targetFPSBasis * 4)
ws -= workSizeBase * 16;
else if(basis > targetFPSBasis * 2)
ws -= workSizeBase * 4;
else if(basis > targetFPSBasis)
ws -= workSizeBase;
if(ws < workSizeBase)
ws = workSizeBase;
else if(ws > DiabloMiner.TWO32 / totalVectors - 1)
ws = DiabloMiner.TWO32 / totalVectors - 1;
lastRuns = currentRuns;
lastTime = now;
workSize.set(ws);
}
}
public class GPUExecutionState extends ExecutionState {
final CLCommandQueue queue;
final CLMem output[] = new CLMem[2];
final CLMem blank;
ByteBuffer outputBuffer;
int outputIndex = 0;
final PointerBuffer workBaseBuffer = BufferUtils.createPointerBuffer(1);
final PointerBuffer workSizeBuffer = BufferUtils.createPointerBuffer(1);
final IntBuffer errBuffer = BufferUtils.createIntBuffer(1);
int err;
WorkState workState;
boolean requestedNewWork;
final int[] midstate2 = new int[16];
final MessageDigest digestInside;
final MessageDigest digestOutside;
final ByteBuffer digestInput = ByteBuffer.allocate(80);
byte[] digestOutput;
public GPUExecutionState(String executionName) throws DiabloMinerFatalException {
super(executionName);
try {
digestInside = MessageDigest.getInstance("SHA-256");
digestOutside = MessageDigest.getInstance("SHA-256");
} catch(NoSuchAlgorithmException e) {
throw new DiabloMinerFatalException(diabloMiner, "Your Java implementation does not have a MessageDigest for SHA-256");
}
queue = CL10.clCreateCommandQueue(context, device, 0, errBuffer);
if(queue == null || errBuffer.get(0) != CL10.CL_SUCCESS) {
throw new DiabloMinerFatalException(diabloMiner, "Failed to allocate queue");
}
IntBuffer blankinit = BufferUtils.createIntBuffer(OUTPUTS * 4);
for(int i = 0; i < OUTPUTS; i++)
blankinit.put(0);
blankinit.rewind();
if(platform_version == PlatformVersion.V1_1)
blank = CL10.clCreateBuffer(context, CL10.CL_MEM_COPY_HOST_PTR | CL10.CL_MEM_READ_ONLY, blankinit, errBuffer);
else
blank = CL10.clCreateBuffer(context, CL10.CL_MEM_COPY_HOST_PTR | CL10.CL_MEM_READ_ONLY | CL12.CL_MEM_HOST_NO_ACCESS, blankinit, errBuffer);
if(blank == null || errBuffer.get(0) != CL10.CL_SUCCESS)
throw new DiabloMinerFatalException(diabloMiner, "Failed to allocate blank buffer");
blankinit.rewind();
for(int i = 0; i < 2; i++) {
if(platform_version == PlatformVersion.V1_1)
output[i] = CL10.clCreateBuffer(context, CL10.CL_MEM_COPY_HOST_PTR | CL10.CL_MEM_WRITE_ONLY, blankinit, errBuffer);
else
output[i] = CL10.clCreateBuffer(context, CL10.CL_MEM_COPY_HOST_PTR | CL10.CL_MEM_WRITE_ONLY | CL12.CL_MEM_HOST_READ_ONLY, blankinit, errBuffer);
blankinit.rewind();
if(output[i] == null || errBuffer.get(0) != CL10.CL_SUCCESS) {
throw new DiabloMinerFatalException(diabloMiner, "Failed to allocate output buffer");
}
}
outputBuffer = CL10.clEnqueueMapBuffer(queue, output[outputIndex], 1, CL10.CL_MAP_READ, 0, OUTPUTS * 4, null, null, null);
diabloMiner.getNetworkStateHead().addGetQueue(this);
requestedNewWork = true;
}
public void run() {
boolean submittedBlock;
boolean resetBuffer;
boolean hwError;
boolean skipProcessing;
boolean skipUnmap = false;
while(diabloMiner.getRunning()) {
submittedBlock = false;
resetBuffer = false;
hwError = false;
skipProcessing = false;
WorkState workIncoming = null;
if(requestedNewWork) {
try {
workIncoming = incomingQueue.take();
} catch(InterruptedException f) {
continue;
}
} else {
workIncoming = incomingQueue.poll();
}
if(workIncoming != null) {
workState = workIncoming;
requestedNewWork = false;
resetBuffer = true;
skipProcessing = true;
}
if(!skipProcessing | !skipUnmap) {
for(int z = 0; z < OUTPUTS; z++) {
int nonce = outputBuffer.getInt(z * 4);
if(nonce != 0) {
for(int j = 0; j < 19; j++)
digestInput.putInt(j * 4, workState.getData(j));
digestInput.putInt(19 * 4, nonce);
digestOutput = digestOutside.digest(digestInside.digest(digestInput.array()));
long G = ((long) (0xFF & digestOutput[27]) << 24) | ((long) (0xFF & digestOutput[26]) << 16) | ((long) (0xFF & digestOutput[25]) << 8) | ((long) (0xFF & digestOutput[24]));
long H = ((long) (0xFF & digestOutput[31]) << 24) | ((long) (0xFF & digestOutput[30]) << 16) | ((long) (0xFF & digestOutput[29]) << 8) | ((long) (0xFF & digestOutput[28]));
if(H == 0) {
diabloMiner.debug("Attempt " + diabloMiner.incrementAttempts() + " from " + executionName);
if(workState.getTarget(7) != 0 || G <= workState.getTarget(6)) {
workState.submitNonce(nonce);
submittedBlock = true;
}
} else {
hwError = true;
}
resetBuffer = true;
}
}
if(hwError && submittedBlock == false) {
if(hwcheck && !diabloMiner.getDebug())
diabloMiner.error("Invalid solution " + diabloMiner.incrementHWErrors() + " from " + deviceName + ", possible driver or hardware issue");
else
diabloMiner.debug("Invalid solution " + diabloMiner.incrementHWErrors() + " from " + executionName + ", possible driver or hardware issue");
}
}
if(resetBuffer)
CL10.clEnqueueCopyBuffer(queue, blank, output[outputIndex], 0, 0, OUTPUTS * 4, null, null);
if(!skipUnmap) {
CL10.clEnqueueUnmapMemObject(queue, output[outputIndex], outputBuffer, null, null);
outputIndex = (outputIndex == 0) ? 1 : 0;
}
long workBase = workState.getBase();
long increment = workSize.get();
if(DiabloMiner.now() - 3600000 > resetNetworkState) {
resetNetworkState = DiabloMiner.now();
diabloMiner.getNetworkStateHead().addGetQueue(this);
requestedNewWork = skipUnmap = true;
} else {
requestedNewWork = skipUnmap = workState.update(increment);
}
if(!requestedNewWork) {
diabloMiner.addAndGetHashCount(increment);
deviceHashCount.addAndGet(increment);
runs.incrementAndGet();
workSizeBuffer.put(0, increment);
workBaseBuffer.put(0, workBase);
System.arraycopy(workState.getMidstate(), 0, midstate2, 0, 8);
DiabloMiner.sharound(midstate2, 0, 1, 2, 3, 4, 5, 6, 7, workState.getData(16), 0x428A2F98);
DiabloMiner.sharound(midstate2, 7, 0, 1, 2, 3, 4, 5, 6, workState.getData(17), 0x71374491);
DiabloMiner.sharound(midstate2, 6, 7, 0, 1, 2, 3, 4, 5, workState.getData(18), 0xB5C0FBCF);
int W16 = workState.getData(16) + (DiabloMiner.rot(workState.getData(17), 7) ^ DiabloMiner.rot(workState.getData(17), 18) ^ (workState.getData(17) >>> 3));
int W17 = workState.getData(17) + (DiabloMiner.rot(workState.getData(18), 7) ^ DiabloMiner.rot(workState.getData(18), 18) ^ (workState.getData(18) >>> 3)) + 0x01100000;
int W18 = workState.getData(18) + (DiabloMiner.rot(W16, 17) ^ DiabloMiner.rot(W16, 19) ^ (W16 >>> 10));
int W19 = 0x11002000 + (DiabloMiner.rot(W17, 17) ^ DiabloMiner.rot(W17, 19) ^ (W17 >>> 10));
int W31 = 0x00000280 + (DiabloMiner.rot(W16, 7) ^ DiabloMiner.rot(W16, 18) ^ (W16 >>> 3));
int W32 = W16 + (DiabloMiner.rot(W17, 7) ^ DiabloMiner.rot(W17, 18) ^ (W17 >>> 3));
int PreVal4 = workState.getMidstate(4) + (DiabloMiner.rot(midstate2[1], 6) ^ DiabloMiner.rot(midstate2[1], 11) ^ DiabloMiner.rot(midstate2[1], 25)) + (midstate2[3] ^ (midstate2[1] & (midstate2[2] ^ midstate2[3]))) + 0xe9b5dba5;
int T1 = (DiabloMiner.rot(midstate2[5], 2) ^ DiabloMiner.rot(midstate2[5], 13) ^ DiabloMiner.rot(midstate2[5], 22)) + ((midstate2[5] & midstate2[6]) | (midstate2[7] & (midstate2[5] | midstate2[6])));
int PreVal4_state0 = PreVal4 + workState.getMidstate(0);
int PreVal4_state0_k7 = (int) (PreVal4_state0 + 0xAB1C5ED5L);
int PreVal4_T1 = PreVal4 + T1;
int B1_plus_K6 = (int) (midstate2[1] + 0x923f82a4L);
int C1_plus_K5 = (int) (midstate2[2] + 0x59f111f1L);
int W16_plus_K16 = (int) (W16 + 0xe49b69c1L);
int W17_plus_K17 = (int) (W17 + 0xefbe4786L);
kernel.setArg(0, PreVal4_state0).setArg(1, PreVal4_state0_k7).setArg(2, PreVal4_T1).setArg(3, W18).setArg(4, W19).setArg(5, W16).setArg(6, W17).setArg(7, W16_plus_K16).setArg(8, W17_plus_K17).setArg(9, W31).setArg(10, W32).setArg(11, (int) (midstate2[3] + 0xB956c25bL)).setArg(12, midstate2[1]).setArg(13, midstate2[2]).setArg(14, midstate2[7]).setArg(15, midstate2[5]).setArg(16, midstate2[6]).setArg(17, C1_plus_K5).setArg(18, B1_plus_K6).setArg(19, workState.getMidstate(0)).setArg(20, workState.getMidstate(1)).setArg(21, workState.getMidstate(2)).setArg(22, workState.getMidstate(3)).setArg(23, workState.getMidstate(4)).setArg(24, workState.getMidstate(5)).setArg(25, workState.getMidstate(6)).setArg(26, workState.getMidstate(7)).setArg(27, output[outputIndex]);
err = CL10.clEnqueueNDRangeKernel(queue, kernel, 1, workBaseBuffer, workSizeBuffer, localWorkSize, null, null);
if(err != CL10.CL_SUCCESS && err != CL10.CL_INVALID_KERNEL_ARGS && err != CL10.CL_INVALID_GLOBAL_OFFSET) {
try {
throw new DiabloMinerFatalException(diabloMiner, "Failed to queue kernel, error " + err);
} catch(DiabloMinerFatalException e) {
}
} else {
if(err == CL10.CL_INVALID_KERNEL_ARGS) {
diabloMiner.debug("Spurious CL_INVALID_KERNEL_ARGS error, ignoring");
skipUnmap = true;
} else if(err == CL10.CL_INVALID_GLOBAL_OFFSET) {
diabloMiner.error("Spurious CL_INVALID_GLOBAL_OFFSET error, offset: " + workBase + ", work size: " + increment);
skipUnmap = true;
} else {
outputBuffer = CL10.clEnqueueMapBuffer(queue, output[outputIndex], 1, CL10.CL_MAP_READ, 0, OUTPUTS * 4, null, null, null);
}
}
}
}
}
}
}