package com.cardence.lawshelf.pdf;
import java.io.File;
import java.io.IOException;
import java.net.URL;
import org.apache.commons.logging.Log;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.util.PDFTextStripper;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
@Component
public class PdfBoxParserImpl implements PdfParser {
private PDDocument doc = null;
@Autowired
private Log log;
public void parseFile(String filepath) throws IOException {
// TODO Auto-generated method stub
doc = PDDocument.load(filepath);
}
public void parseFile(File file) throws IOException {
// TODO Auto-generated method stub
doc = PDDocument.load(file);
}
public void parseUrl(URL url) throws IOException {
// TODO Auto-generated method stub
doc = PDDocument.load(url);
}
public void processStructure() {
// TODO Auto-generated method stub
try {
temp();
} catch (Exception e) {
e.printStackTrace();
}
}
private void temp() throws IOException {
PDFTextStripper stripper = new PDFTextStripper();
String text = stripper.getText(doc);
log.info(text);
}
}