/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package pdfrobot.engine.parser;
import java.io.File;
import java.io.IOException;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.util.PDFTextStripper;
import pdfrobot.engine.robot.Filetracker;
/**
* Checks PDF-files according to a given rule.
* @author hedsttor
*/
public class PdfFolderParser {
int owningThread;
/**
* Constructor
* @param owningThread The index of the thread creating this parser
*/
public PdfFolderParser(int owningThread) {
this.owningThread = owningThread;
}
/**
* Parses a file according to a rule.
* @param rule The rule which form the base of the comparison.
* @return The first file that matches the rule.
* @throws IOException
*/
public File parseFile(PdfFileRule rule) throws IOException {
File f = Filetracker.getInstance().track(rule, owningThread);
if(f!=null) {
Logger.getLogger(PdfFolderParser.class.getName()).log(Level.FINEST, owningThread+" - Loading pdf document "+f.getName());
PDDocument pdfDoc = PDDocument.load(f);
Logger.getLogger(PdfFolderParser.class.getName()).log(Level.FINEST, owningThread+" - Ripping pdf");
PDFTextStripper pdfTextStripper = new PDFTextStripper();
String text = pdfTextStripper.getText(pdfDoc);
pdfDoc.close();
String[] lines = text.split("\n");
String line = lines[rule.getLine()];
if (!line.contains(rule.getPattern())) {
Logger.getLogger(PdfFolderParser.class.getName()).log(Level.FINEST, owningThread+" - Pattern "+rule.getPattern()+" not found in line "+rule.getLine()+" exiting");
f=null;
}
}
if(f!=null)
Logger.getLogger(PdfFolderParser.class.getName()).log(Level.FINEST, owningThread+" - Found pattern "+rule.getPattern());
return f;
}
}