Package com.diablominer.DiabloMiner.DeviceState

Source Code of com.diablominer.DiabloMiner.DeviceState.GPUDeviceState

/*
* DiabloMiner - OpenCL miner for Bitcoin
* Copyright (C) 2010, 2011, 2012 Patrick McFarland <diablod3@gmail.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

package com.diablominer.DiabloMiner.DeviceState;

import java.nio.ByteBuffer;
import java.nio.IntBuffer;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.concurrent.atomic.AtomicLong;

import org.lwjgl.BufferUtils;
import org.lwjgl.LWJGLUtil;
import org.lwjgl.PointerBuffer;
import org.lwjgl.opencl.CL10;
import org.lwjgl.opencl.CL12;
import org.lwjgl.opencl.CLCommandQueue;
import org.lwjgl.opencl.CLContext;
import org.lwjgl.opencl.CLContextCallback;
import org.lwjgl.opencl.CLDevice;
import org.lwjgl.opencl.CLKernel;
import org.lwjgl.opencl.CLMem;
import org.lwjgl.opencl.CLPlatform;
import org.lwjgl.opencl.CLProgram;

import com.diablominer.DiabloMiner.DiabloMiner;
import com.diablominer.DiabloMiner.DiabloMinerFatalException;
import com.diablominer.DiabloMiner.NetworkState.WorkState;

public class GPUDeviceState extends DeviceState {
  final static int OUTPUTS = 16;

  final PlatformVersion platform_version;
  final CLDevice device;
  final CLContext context;
  final CLKernel kernel;

  AtomicLong workSize = new AtomicLong(0);
  long workSizeBase;
  boolean hwcheck;

  final PointerBuffer localWorkSize = BufferUtils.createPointerBuffer(1);

  final ExecutionState executions[];

  AtomicLong runs = new AtomicLong(0);
  long lastRuns = 0;
  long startTime = 0;
  long lastTime = 0;

  GPUHardwareType hardwareType;

  GPUDeviceState(GPUHardwareType hardwareType, String deviceName, CLPlatform platform, PlatformVersion platform_version, CLDevice device) throws DiabloMinerFatalException {
    this.platform_version = platform_version;
    this.hardwareType = hardwareType;
    this.diabloMiner = hardwareType.getDiabloMiner();
    this.deviceName = deviceName;

    this.resetNetworkState = DiabloMiner.now();

    this.executions = new ExecutionState[GPUHardwareType.EXECUTION_TOTAL];

    boolean hasBitAlign;
    boolean hasBFI_INT = false;
    CLProgram program;

    this.device = device;

    PointerBuffer properties = BufferUtils.createPointerBuffer(3);
    properties.put(CL10.CL_CONTEXT_PLATFORM).put(platform.getPointer()).put(0).flip();
    int err = 0;

    int deviceCU = device.getInfoInt(CL10.CL_DEVICE_MAX_COMPUTE_UNITS);
    long deviceWorkSize;

    int forceWorkSize = diabloMiner.getGPUForceWorkSize();

    if(forceWorkSize > 0)
      deviceWorkSize = forceWorkSize;
    else if(LWJGLUtil.getPlatform() != LWJGLUtil.PLATFORM_MACOSX)
      deviceWorkSize = device.getInfoSize(CL10.CL_DEVICE_MAX_WORK_GROUP_SIZE);
    else
      deviceWorkSize = 64;

    context = CL10.clCreateContext(properties, device, new CLContextCallback() {
      protected void handleMessage(String errinfo, ByteBuffer private_info) {
        diabloMiner.error(errinfo);
      }
    }, null);

    ByteBuffer extb = BufferUtils.createByteBuffer(1024);
    CL10.clGetDeviceInfo(device, CL10.CL_DEVICE_EXTENSIONS, extb, null);
    byte[] exta = new byte[1024];
    extb.get(exta);

    if(new String(exta).contains("cl_amd_media_ops"))
      hasBitAlign = true;
    else
      hasBitAlign = false;

    if(hasBitAlign) {
      if(deviceName.contains("Cedar") || deviceName.contains("Redwood") || deviceName.contains("Juniper") || deviceName.contains("Cypress") || deviceName.contains("Hemlock") || deviceName.contains("Caicos") || deviceName.contains("Turks") || deviceName.contains("Barts") || deviceName.contains("Cayman") || deviceName.contains("Antilles") || deviceName.contains("Palm") || deviceName.contains("Sumo") || deviceName.contains("Wrestler") || deviceName.contains("WinterPark") || deviceName.contains("BeaverCreek"))
        hasBFI_INT = true;
    }

    // String compileOptions =
    // "-save-temps="+(device.getInfoString(CL10.CL_DEVICE_NAME).trim());
    String compileOptions = "";

    compileOptions += " -D WORKSIZE=" + deviceWorkSize;

    if(hasBitAlign)
      compileOptions += " -D BITALIGN";

    if(hasBFI_INT)
      compileOptions += " -D BFIINT";

    program = CL10.clCreateProgramWithSource(context, hardwareType.getSource(), null);

    err = CL10.clBuildProgram(program, device, compileOptions, null);
    if(err != CL10.CL_SUCCESS) {
      ByteBuffer logBuffer = BufferUtils.createByteBuffer(1024);
      byte[] log = new byte[1024];

      CL10.clGetProgramBuildInfo(program, device, CL10.CL_PROGRAM_BUILD_LOG, logBuffer, null);

      logBuffer.get(log);

      System.out.println(new String(log));

      throw new DiabloMinerFatalException(diabloMiner, "Failed to build program on " + deviceName);
    }

    if(hasBFI_INT) {
      diabloMiner.info("BFI_INT patching enabled, disabling hardware check errors");
      hwcheck = false;

      int binarySize = (int) program.getInfoSizeArray(CL10.CL_PROGRAM_BINARY_SIZES)[0];

      ByteBuffer binary = BufferUtils.createByteBuffer(binarySize);
      program.getInfoBinaries(binary);

      for(int pos = 0; pos < binarySize - 4; pos++) {
        if((long) (0xFFFFFFFF & binary.getInt(pos)) == 0x464C457FL && (long) (0xFFFFFFFF & binary.getInt(pos + 4)) == 0x64010101L) {
          boolean firstText = true;

          int offset = binary.getInt(pos + 32);
          short entrySize = binary.getShort(pos + 46);
          short entryCount = binary.getShort(pos + 48);
          short index = binary.getShort(pos + 50);

          int header = pos + offset;

          int nameTableOffset = binary.getInt(header + index * entrySize + 16);
          int size = binary.getInt(header + index * entrySize + 20);

          int entry = header;

          for(int section = 0; section < entryCount; section++) {
            int nameIndex = binary.getInt(entry);
            offset = binary.getInt(entry + 16);
            size = binary.getInt(entry + 20);

            int name = pos + nameTableOffset + nameIndex;

            if((long) (0xFFFFFFFF & binary.getInt(name)) == 0x7865742E) {
              if(firstText) {
                firstText = false;
              } else {
                int sectionStart = pos + offset;
                for(int i = 0; i < size / 8; i++) {
                  long instruction1 = (long) (0xFFFFFFFF & binary.getInt(sectionStart + i * 8));
                  long instruction2 = (long) (0xFFFFFFFF & binary.getInt(sectionStart + i * 8 + 4));

                  if((instruction1 & 0x02001000L) == 0x00000000L && (instruction2 & 0x9003F000L) == 0x0001A000L) {
                    instruction2 ^= (0x0001A000L ^ 0x0000C000L);

                    binary.putInt(sectionStart + i * 8 + 4, (int) instruction2);
                  }
                }
              }
            }

            entry += entrySize;
          }

          break;
        }
      }

      IntBuffer binaryErr = BufferUtils.createIntBuffer(1);

      CL10.clReleaseProgram(program);
      program = CL10.clCreateProgramWithBinary(context, device, binary, binaryErr, null);

      err = CL10.clBuildProgram(program, device, compileOptions, null);

      if(err != CL10.CL_SUCCESS) {
        throw new DiabloMinerFatalException(diabloMiner, "Failed to BFI_INT patch kernel on " + deviceName);
      }
    }

    kernel = CL10.clCreateKernel(program, "search", null);
    if(kernel == null) {
      throw new DiabloMinerFatalException(diabloMiner, "Failed to create kernel on " + deviceName);

    }

    if(forceWorkSize == 0) {
      ByteBuffer rkwgs = BufferUtils.createByteBuffer(8);

      err = CL10.clGetKernelWorkGroupInfo(kernel, device, CL10.CL_KERNEL_WORK_GROUP_SIZE, rkwgs, null);

      localWorkSize.put(0, rkwgs.getLong(0));

      if(!(err == CL10.CL_SUCCESS) || localWorkSize.get(0) == 0)
        localWorkSize.put(0, deviceWorkSize);
    } else {
      localWorkSize.put(0, forceWorkSize);
    }

    diabloMiner.info("Added " + deviceName + " (" + deviceCU + " CU, local work size of " + localWorkSize.get(0) + ")");

    workSizeBase = 64 * 512;

    workSize.set(workSizeBase * 16);

    for(int i = 0; i < GPUHardwareType.EXECUTION_TOTAL; i++) {
      String executorName = deviceName + "/" + i;
      executions[i] = this.new GPUExecutionState(executorName);
      Thread thread = new Thread(executions[i], "DiabloMiner Executor (" + executorName + ")");
      thread.start();
      diabloMiner.addThread(thread);
    }
  }

  public void checkDevice() {
    long now = DiabloMiner.now();
    long elapsed = now - lastTime;
    long currentRuns = runs.get();
    double targetFPSBasis = hardwareType.getTargetFPSBasis();
    int totalVectors = hardwareType.getTotalVectors();
    long ws = workSize.get();

    if(now > startTime + DiabloMiner.TIME_OFFSET * 2 && currentRuns > lastRuns + diabloMiner.getGPUTargetFPS()) {
      basis = (double) elapsed / (double) (currentRuns - lastRuns);

      if(basis < targetFPSBasis / 4)
        ws += workSizeBase * 16;
      else if(basis < targetFPSBasis / 2)
        ws += workSizeBase * 4;
      else if(basis < targetFPSBasis)
        ws += workSizeBase;
      else if(basis > targetFPSBasis * 4)
        ws -= workSizeBase * 16;
      else if(basis > targetFPSBasis * 2)
        ws -= workSizeBase * 4;
      else if(basis > targetFPSBasis)
        ws -= workSizeBase;

      if(ws < workSizeBase)
        ws = workSizeBase;
      else if(ws > DiabloMiner.TWO32 / totalVectors - 1)
        ws = DiabloMiner.TWO32 / totalVectors - 1;

      lastRuns = currentRuns;
      lastTime = now;

      workSize.set(ws);
    }
  }

  public class GPUExecutionState extends ExecutionState {
    final CLCommandQueue queue;

    final CLMem output[] = new CLMem[2];
    final CLMem blank;
    ByteBuffer outputBuffer;
    int outputIndex = 0;

    final PointerBuffer workBaseBuffer = BufferUtils.createPointerBuffer(1);
    final PointerBuffer workSizeBuffer = BufferUtils.createPointerBuffer(1);

    final IntBuffer errBuffer = BufferUtils.createIntBuffer(1);
    int err;

    WorkState workState;
    boolean requestedNewWork;

    final int[] midstate2 = new int[16];
    final MessageDigest digestInside;
    final MessageDigest digestOutside;
    final ByteBuffer digestInput = ByteBuffer.allocate(80);
    byte[] digestOutput;

    public GPUExecutionState(String executionName) throws DiabloMinerFatalException {
      super(executionName);

      try {
        digestInside = MessageDigest.getInstance("SHA-256");
        digestOutside = MessageDigest.getInstance("SHA-256");
      } catch(NoSuchAlgorithmException e) {
        throw new DiabloMinerFatalException(diabloMiner, "Your Java implementation does not have a MessageDigest for SHA-256");
      }

      queue = CL10.clCreateCommandQueue(context, device, 0, errBuffer);

      if(queue == null || errBuffer.get(0) != CL10.CL_SUCCESS) {
        throw new DiabloMinerFatalException(diabloMiner, "Failed to allocate queue");
      }

      IntBuffer blankinit = BufferUtils.createIntBuffer(OUTPUTS * 4);

      for(int i = 0; i < OUTPUTS; i++)
        blankinit.put(0);

      blankinit.rewind();

      if(platform_version == PlatformVersion.V1_1)
        blank = CL10.clCreateBuffer(context, CL10.CL_MEM_COPY_HOST_PTR | CL10.CL_MEM_READ_ONLY, blankinit, errBuffer);
      else
        blank = CL10.clCreateBuffer(context, CL10.CL_MEM_COPY_HOST_PTR | CL10.CL_MEM_READ_ONLY | CL12.CL_MEM_HOST_NO_ACCESS, blankinit, errBuffer);

      if(blank == null || errBuffer.get(0) != CL10.CL_SUCCESS)
        throw new DiabloMinerFatalException(diabloMiner, "Failed to allocate blank buffer");

      blankinit.rewind();

      for(int i = 0; i < 2; i++) {
        if(platform_version == PlatformVersion.V1_1)
          output[i] = CL10.clCreateBuffer(context, CL10.CL_MEM_COPY_HOST_PTR | CL10.CL_MEM_WRITE_ONLY, blankinit, errBuffer);
        else
          output[i] = CL10.clCreateBuffer(context, CL10.CL_MEM_COPY_HOST_PTR | CL10.CL_MEM_WRITE_ONLY | CL12.CL_MEM_HOST_READ_ONLY, blankinit, errBuffer);

        blankinit.rewind();

        if(output[i] == null || errBuffer.get(0) != CL10.CL_SUCCESS) {
          throw new DiabloMinerFatalException(diabloMiner, "Failed to allocate output buffer");
        }
      }

      outputBuffer = CL10.clEnqueueMapBuffer(queue, output[outputIndex], 1, CL10.CL_MAP_READ, 0, OUTPUTS * 4, null, null, null);

      diabloMiner.getNetworkStateHead().addGetQueue(this);
      requestedNewWork = true;
    }

    public void run() {
      boolean submittedBlock;
      boolean resetBuffer;
      boolean hwError;
      boolean skipProcessing;
      boolean skipUnmap = false;

      while(diabloMiner.getRunning()) {
        submittedBlock = false;
        resetBuffer = false;
        hwError = false;
        skipProcessing = false;

        WorkState workIncoming = null;

        if(requestedNewWork) {
          try {
            workIncoming = incomingQueue.take();
          } catch(InterruptedException f) {
            continue;
          }
        } else {
          workIncoming = incomingQueue.poll();
        }

        if(workIncoming != null) {
          workState = workIncoming;
          requestedNewWork = false;
          resetBuffer = true;
          skipProcessing = true;
        }

        if(!skipProcessing | !skipUnmap) {
          for(int z = 0; z < OUTPUTS; z++) {
            int nonce = outputBuffer.getInt(z * 4);

            if(nonce != 0) {
              for(int j = 0; j < 19; j++)
                digestInput.putInt(j * 4, workState.getData(j));

              digestInput.putInt(19 * 4, nonce);

              digestOutput = digestOutside.digest(digestInside.digest(digestInput.array()));

              long G = ((long) (0xFF & digestOutput[27]) << 24) | ((long) (0xFF & digestOutput[26]) << 16) | ((long) (0xFF & digestOutput[25]) << 8) | ((long) (0xFF & digestOutput[24]));

              long H = ((long) (0xFF & digestOutput[31]) << 24) | ((long) (0xFF & digestOutput[30]) << 16) | ((long) (0xFF & digestOutput[29]) << 8) | ((long) (0xFF & digestOutput[28]));

              if(H == 0) {
                diabloMiner.debug("Attempt " + diabloMiner.incrementAttempts() + " from " + executionName);

                if(workState.getTarget(7) != 0 || G <= workState.getTarget(6)) {
                  workState.submitNonce(nonce);
                  submittedBlock = true;
                }
              } else {
                hwError = true;
              }

              resetBuffer = true;
            }
          }

          if(hwError && submittedBlock == false) {
            if(hwcheck && !diabloMiner.getDebug())
              diabloMiner.error("Invalid solution " + diabloMiner.incrementHWErrors() + " from " + deviceName + ", possible driver or hardware issue");
            else
              diabloMiner.debug("Invalid solution " + diabloMiner.incrementHWErrors() + " from " + executionName + ", possible driver or hardware issue");
          }
        }


        if(resetBuffer)
          CL10.clEnqueueCopyBuffer(queue, blank, output[outputIndex], 0, 0, OUTPUTS * 4, null, null);

        if(!skipUnmap) {
          CL10.clEnqueueUnmapMemObject(queue, output[outputIndex], outputBuffer, null, null);

          outputIndex = (outputIndex == 0) ? 1 : 0;
        }



        long workBase = workState.getBase();
        long increment = workSize.get();

        if(DiabloMiner.now() - 3600000 > resetNetworkState) {
          resetNetworkState = DiabloMiner.now();

          diabloMiner.getNetworkStateHead().addGetQueue(this);
          requestedNewWork = skipUnmap = true;
        } else {
          requestedNewWork = skipUnmap = workState.update(increment);
        }

        if(!requestedNewWork) {
          diabloMiner.addAndGetHashCount(increment);
          deviceHashCount.addAndGet(increment);
          runs.incrementAndGet();

          workSizeBuffer.put(0, increment);
          workBaseBuffer.put(0, workBase);

          System.arraycopy(workState.getMidstate(), 0, midstate2, 0, 8);

          DiabloMiner.sharound(midstate2, 0, 1, 2, 3, 4, 5, 6, 7, workState.getData(16), 0x428A2F98);
          DiabloMiner.sharound(midstate2, 7, 0, 1, 2, 3, 4, 5, 6, workState.getData(17), 0x71374491);
          DiabloMiner.sharound(midstate2, 6, 7, 0, 1, 2, 3, 4, 5, workState.getData(18), 0xB5C0FBCF);

          int W16 = workState.getData(16) + (DiabloMiner.rot(workState.getData(17), 7) ^ DiabloMiner.rot(workState.getData(17), 18) ^ (workState.getData(17) >>> 3));
          int W17 = workState.getData(17) + (DiabloMiner.rot(workState.getData(18), 7) ^ DiabloMiner.rot(workState.getData(18), 18) ^ (workState.getData(18) >>> 3)) + 0x01100000;
          int W18 = workState.getData(18) + (DiabloMiner.rot(W16, 17) ^ DiabloMiner.rot(W16, 19) ^ (W16 >>> 10));
          int W19 = 0x11002000 + (DiabloMiner.rot(W17, 17) ^ DiabloMiner.rot(W17, 19) ^ (W17 >>> 10));
          int W31 = 0x00000280 + (DiabloMiner.rot(W16, 7) ^ DiabloMiner.rot(W16, 18) ^ (W16 >>> 3));
          int W32 = W16 + (DiabloMiner.rot(W17, 7) ^ DiabloMiner.rot(W17, 18) ^ (W17 >>> 3));

          int PreVal4 = workState.getMidstate(4) + (DiabloMiner.rot(midstate2[1], 6) ^ DiabloMiner.rot(midstate2[1], 11) ^ DiabloMiner.rot(midstate2[1], 25)) + (midstate2[3] ^ (midstate2[1] & (midstate2[2] ^ midstate2[3]))) + 0xe9b5dba5;
          int T1 = (DiabloMiner.rot(midstate2[5], 2) ^ DiabloMiner.rot(midstate2[5], 13) ^ DiabloMiner.rot(midstate2[5], 22)) + ((midstate2[5] & midstate2[6]) | (midstate2[7] & (midstate2[5] | midstate2[6])));

          int PreVal4_state0 = PreVal4 + workState.getMidstate(0);
          int PreVal4_state0_k7 = (int) (PreVal4_state0 + 0xAB1C5ED5L);
          int PreVal4_T1 = PreVal4 + T1;
          int B1_plus_K6 = (int) (midstate2[1] + 0x923f82a4L);
          int C1_plus_K5 = (int) (midstate2[2] + 0x59f111f1L);
          int W16_plus_K16 = (int) (W16 + 0xe49b69c1L);
          int W17_plus_K17 = (int) (W17 + 0xefbe4786L);

          kernel.setArg(0, PreVal4_state0).setArg(1, PreVal4_state0_k7).setArg(2, PreVal4_T1).setArg(3, W18).setArg(4, W19).setArg(5, W16).setArg(6, W17).setArg(7, W16_plus_K16).setArg(8, W17_plus_K17).setArg(9, W31).setArg(10, W32).setArg(11, (int) (midstate2[3] + 0xB956c25bL)).setArg(12, midstate2[1]).setArg(13, midstate2[2]).setArg(14, midstate2[7]).setArg(15, midstate2[5]).setArg(16, midstate2[6]).setArg(17, C1_plus_K5).setArg(18, B1_plus_K6).setArg(19, workState.getMidstate(0)).setArg(20, workState.getMidstate(1)).setArg(21, workState.getMidstate(2)).setArg(22, workState.getMidstate(3)).setArg(23, workState.getMidstate(4)).setArg(24, workState.getMidstate(5)).setArg(25, workState.getMidstate(6)).setArg(26, workState.getMidstate(7)).setArg(27, output[outputIndex]);

          err = CL10.clEnqueueNDRangeKernel(queue, kernel, 1, workBaseBuffer, workSizeBuffer, localWorkSize, null, null);

          if(err != CL10.CL_SUCCESS && err != CL10.CL_INVALID_KERNEL_ARGS && err != CL10.CL_INVALID_GLOBAL_OFFSET) {
            try {
              throw new DiabloMinerFatalException(diabloMiner, "Failed to queue kernel, error " + err);
            } catch(DiabloMinerFatalException e) {
            }
          } else {
            if(err == CL10.CL_INVALID_KERNEL_ARGS) {
              diabloMiner.debug("Spurious CL_INVALID_KERNEL_ARGS error, ignoring");
              skipUnmap = true;
            } else if(err == CL10.CL_INVALID_GLOBAL_OFFSET) {
              diabloMiner.error("Spurious CL_INVALID_GLOBAL_OFFSET error, offset: " + workBase + ", work size: " + increment);
              skipUnmap = true;
            } else {
              outputBuffer = CL10.clEnqueueMapBuffer(queue, output[outputIndex], 1, CL10.CL_MAP_READ, 0, OUTPUTS * 4, null, null, null);
            }
          }
        }
      }
    }
  }
}
TOP

Related Classes of com.diablominer.DiabloMiner.DeviceState.GPUDeviceState

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.