Package pdfrobot.engine.parser

Source Code of pdfrobot.engine.parser.PdfFolderParser

/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package pdfrobot.engine.parser;

import java.io.File;
import java.io.IOException;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.util.PDFTextStripper;
import pdfrobot.engine.robot.Filetracker;

/**
* Checks PDF-files according to a given rule.
* @author hedsttor
*/
public class PdfFolderParser {
    int owningThread;

    /**
     * Constructor
     * @param owningThread The index of the thread creating this parser
     */
    public PdfFolderParser(int owningThread) {
        this.owningThread = owningThread;
    }

    /**
     * Parses a file according to a rule.
     * @param rule The rule which form the base of the comparison.
     * @return The first file that matches the rule.
     * @throws IOException
     */
    public File parseFile(PdfFileRule rule) throws IOException {
        File f = Filetracker.getInstance().track(rule, owningThread);

        if(f!=null) {
            Logger.getLogger(PdfFolderParser.class.getName()).log(Level.FINEST, owningThread+" - Loading pdf document "+f.getName());
            PDDocument pdfDoc = PDDocument.load(f);
            Logger.getLogger(PdfFolderParser.class.getName()).log(Level.FINEST, owningThread+" - Ripping pdf");
            PDFTextStripper pdfTextStripper = new PDFTextStripper();
            String text = pdfTextStripper.getText(pdfDoc);
            pdfDoc.close();
            String[] lines = text.split("\n");
            String line = lines[rule.getLine()];
            if (!line.contains(rule.getPattern())) {
                Logger.getLogger(PdfFolderParser.class.getName()).log(Level.FINEST, owningThread+" - Pattern "+rule.getPattern()+" not found in line "+rule.getLine()+" exiting");
                f=null;
            }   
        }
        if(f!=null)
            Logger.getLogger(PdfFolderParser.class.getName()).log(Level.FINEST, owningThread+" - Found pattern "+rule.getPattern());
        return f;
    }
}
TOP

Related Classes of pdfrobot.engine.parser.PdfFolderParser

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.