Source Code of pdfrobot.engine.parser.PdfFolderParser

/*
 * To change this template, choose Tools | Templates
 * and open the template in the editor.
 */
package pdfrobot.engine.parser;


import java.io.File;
import java.io.IOException;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.util.PDFTextStripper;
import pdfrobot.engine.robot.Filetracker;


/**
 * Checks PDF-files according to a given rule.
 * @author hedsttor
 */
public class PdfFolderParser {
    int owningThread;


    /**
     * Constructor
     * @param owningThread The index of the thread creating this parser
     */
    public PdfFolderParser(int owningThread) {
        this.owningThread = owningThread;
    }


    /**
     * Parses a file according to a rule.
     * @param rule The rule which form the base of the comparison.
     * @return The first file that matches the rule.
     * @throws IOException
     */
    public File parseFile(PdfFileRule rule) throws IOException {
        File f = Filetracker.getInstance().track(rule, owningThread);


        if(f!=null) {
            Logger.getLogger(PdfFolderParser.class.getName()).log(Level.FINEST, owningThread+" - Loading pdf document "+f.getName());
            PDDocument pdfDoc = PDDocument.load(f);
            Logger.getLogger(PdfFolderParser.class.getName()).log(Level.FINEST, owningThread+" - Ripping pdf");
            PDFTextStripper pdfTextStripper = new PDFTextStripper();
            String text = pdfTextStripper.getText(pdfDoc);
            pdfDoc.close();
            String[] lines = text.split("\n");
            String line = lines[rule.getLine()];
            if (!line.contains(rule.getPattern())) {
                Logger.getLogger(PdfFolderParser.class.getName()).log(Level.FINEST, owningThread+" - Pattern "+rule.getPattern()+" not found in line "+rule.getLine()+" exiting");
                f=null;
            }    
        }
        if(f!=null)
            Logger.getLogger(PdfFolderParser.class.getName()).log(Level.FINEST, owningThread+" - Found pattern "+rule.getPattern());
        return f;
    }
}
Source Code of pdfrobot.engine.parser.PdfFolderParser

Related Classes of pdfrobot.engine.parser.PdfFolderParser