package net.hearthstats.game.ocr;
import net.sourceforge.tess4j.Tesseract;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.awt.*;
import java.awt.image.BufferedImage;
import java.awt.image.BufferedImageOp;
import java.awt.image.ColorConvertOp;
import java.awt.image.RescaleOp;
/**
* Base class for performing OCR. Subclasses can override methods to customise the handling of OCR on different
* types of text.
*/
public abstract class OcrBase {
protected final static Logger debugLog = LoggerFactory.getLogger(OcrBase.class);
/**
* Crops the image to the expected location of the text to OCR. Different interations may return crop the image
* differently if necessary.
*
* @param image A full screenshot image that needs to be cropped
* @param iteration The iteration number, zero-based
* @return
*/
protected abstract BufferedImage crop(BufferedImage image, int iteration);
/**
* The filename of the image written to disk for debugging.
* @return
*/
protected abstract String getFilename();
/**
* Defines which page segmentation mode to use in Tesseract. This defines what type of OCR is performed on the image,
* which if set inappropriately means nothing will be detected. Each subclass should pick the most appropriate mode
* for the type of text undergoing OCR.
*
* @see net.sourceforge.tess4j.TessAPI.TessPageSegMode
* @param iteration The iteration number, zero-based
* @return a value from TessAPI.TessPageSegMode
*/
protected abstract int getTesseractPageSegMode(int iteration);
/**
* Parse an OCR string to fix up any obvious errors, such as 'I' instead of '1' in a number.
*
* @param ocrResult A string generated by OCR
* @param iteration The iteration number, zero-based
* @return The OCR string with errors fixed, if possible
*/
protected abstract String parseString(String ocrResult, int iteration);
/**
* Some OCR might require multiple iterations to find the right spot. Set this value to 1 if only one OCR attempt
* should be made, or higher if multiple OCR attempts are needed.
*
* @param ocrResult A string generated by OCR
* @param iteration The iteration number - one-based, unlike other methods where it is zero-based!
* @return true if OCR should be processed again, or false if it's OK to continue with this OCR result
*/
protected abstract boolean tryProcessingAgain(String ocrResult, int iteration);
public String process(BufferedImage image) throws OcrException {
String result = null;
int iteration = 0;
do {
BufferedImage croppedImage = crop(image, 0);
BufferedImage filteredImage = filter(croppedImage, iteration);
croppedImage.flush();
saveCopy(filteredImage, iteration);
String rawResult = performOcr(filteredImage, iteration);
filteredImage.flush();
result = parseString(rawResult, iteration);
iteration++;
} while (tryProcessingAgain(result, iteration));
debugLog.debug("OCR recognised \"{}\"", result);
return result;
}
/**
* Filters the image to make it easier to OCR, such as by turning it greyscale and increasing the contrast.
*
* @param image A cropped image
* @param iteration The iteration number, zero-based
* @return
* @throws OcrException
*/
protected BufferedImage filter(BufferedImage image, int iteration) throws OcrException {
int width = image.getWidth();
int height = image.getHeight();
int bigWidth = width * 3;
int bigHeight = height * 3;
// to gray scale
BufferedImage grayscale = new BufferedImage(image.getWidth(), image.getHeight(), BufferedImage.TYPE_INT_RGB);
BufferedImageOp grayscaleConv =
new ColorConvertOp(image.getColorModel().getColorSpace(),
grayscale.getColorModel().getColorSpace(), null);
grayscaleConv.filter(image, grayscale);
// blow it up for ocr
BufferedImage newImage = new BufferedImage(bigWidth, bigHeight, BufferedImage.TYPE_INT_RGB);
Graphics g = newImage.createGraphics();
g.drawImage(grayscale, 0, 0, bigWidth, bigHeight, null);
g.dispose();
// invert image
for (int x = 0; x < bigWidth; x++) {
for (int y = 0; y < bigHeight; y++) {
int rgba = newImage.getRGB(x, y);
Color col = new Color(rgba, true);
col = new Color(255 - col.getRed(),
255 - col.getGreen(),
255 - col.getBlue());
newImage.setRGB(x, y, col.getRGB());
}
}
// increase contrast
try {
RescaleOp rescaleOp = new RescaleOp(1.8f, -30, null);
rescaleOp.filter(newImage, newImage); // Source and destination are the same.
} catch (Exception e) {
throw new OcrException("Error rescaling OCR image", e);
}
return newImage;
}
/**
* Save a copy of the image to disk for use when debugging inaccurate OCR.
*
* @param image An image to be processed by OCR. Should already be cropped and filtered.
*/
protected void saveCopy(BufferedImage image, int iteration) {
String filename = getFilename();
if (filename != null) {
BackgroundImageSave.savePngImage(image, getFilename());
}
}
/**
* Perform the actual OCR using Tesseract.
*
* @param image An image to be processed by OCR. Should be cropped and filtered to ensure the contrast is sufficient.
* @return The text that was recognised in the image
*/
protected String performOcr(BufferedImage image, int iteration) throws OcrException {
try {
Tesseract instance = Tesseract.getInstance();
instance.setPageSegMode(getTesseractPageSegMode(iteration));
String output = instance.doOCR(image);
return output.trim();
} catch (Exception e) {
throw new OcrException("Error performing OCR", e);
}
}
}