Package com.cardence.lawshelf.pdf

Source Code of com.cardence.lawshelf.pdf.PdfBoxParserImpl

package com.cardence.lawshelf.pdf;

import java.io.File;
import java.io.IOException;
import java.net.URL;

import org.apache.commons.logging.Log;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.util.PDFTextStripper;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;

@Component
public class PdfBoxParserImpl implements PdfParser {

  private PDDocument doc = null;

  @Autowired
  private Log log;

  public void parseFile(String filepath) throws IOException {
    // TODO Auto-generated method stub

    doc = PDDocument.load(filepath);
  }

  public void parseFile(File file) throws IOException {
    // TODO Auto-generated method stub

    doc = PDDocument.load(file);

  }

  public void parseUrl(URL url) throws IOException {
    // TODO Auto-generated method stub

    doc = PDDocument.load(url);

  }

  public void processStructure() {
    // TODO Auto-generated method stub
    try {
      temp();
    } catch (Exception e) {
      e.printStackTrace();
    }
  }

  private void temp() throws IOException {
    PDFTextStripper stripper = new PDFTextStripper();
    String text = stripper.getText(doc);
    log.info(text);
  }

}
TOP

Related Classes of com.cardence.lawshelf.pdf.PdfBoxParserImpl

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.