Examples of PDFParser

com.dotcms.repackage.org.apache.pdfbox.pdfparser.PDFParser
com.flaptor.util.parser.PdfParser
com.sun.pdfview.PDFParser
PDFParser is the class that parses a PDF content stream and produces PDFCmds for a PDFPage. You should never ever see it run: it gets created by a PDFPage only if needed, and may even run in its own thread. @author Mike Wessler
org.apache.padaf.preflight.javacc.PDFParser
org.apache.pdfbox.pdfparser.PDFParser
This class will handle the parsing of the PDF document. @author Ben Litchfield
org.apache.tika.parser.pdf.PDFParser
PDF parser.
This parser can process also encrypted PDF documents if the required password is given as a part of the input metadata associated with a document. If no password is given, then this parser will try decrypting the document using the empty password that's often used with PDFs.
org.pdfbox.pdfparser.PDFParser
This class will handle the parsing of the PDF document. @author Ben Litchfield @version $Revision: 1.53 $

Examples of org.pdfbox.pdfparser.PDFParser

   * @return Reader a reader that is fed to an indexer.
   */
  protected Reader getReader(InputStream docStream)
  {
    
    PDFParser parser = null; PDDocument document = null; PDFTextStripper stripper = null;
    CharArrayWriter writer = null;
    try{
      parser = new PDFParser(docStream);
      parser.parse();
      document = parser.getPDDocument();
      writer = new CharArrayWriter();
      stripper = new PDFTextStripper();
      stripper.setLineSeparator("\n");
      stripper.writeText(document, writer);
      document.close();
      writer.close();
      parser.getDocument().close();
      return new CharArrayReader(writer.toCharArray());
    }catch (Exception e){
        //logger.warn("WARNING: Problem converting PDF: ",e);
      try{
        document.close();        
      }catch(Exception e1){
        //logger.warn("WARNING: Problem converting PDF: ",e1);
      }
      try{
        writer.close();
      }catch(Exception e2){
        //logger.warn("WARNING: Problem converting PDF: ",e2);
      }
      try{
        parser.getDocument().close();
      }catch(Exception e3){
        //logger.warn("WARNING: Problem converting PDF: ",e3);  
      }
      parser = null; document = null; writer = null; stripper = null;
      EOD=true;

View Full Code Here

Examples of org.pdfbox.pdfparser.PDFParser

      // FileOutputStream fout = new FileOutputStream("/home/nutchwax/lixo/"+System.currentTimeMillis()+".pdf"); 
      // fout.write(raw);
      // fout.close();
      // TODO MC


      PDFParser parser = new PDFParser(new ByteArrayInputStream(raw));
      parser.parse();


      pdf = parser.getPDDocument();


      if (pdf.isEncrypted()) {
        DocumentEncryption decryptor = new DocumentEncryption(pdf);
        //Just try using the default password and move on
        decryptor.decryptDocument("");

View Full Code Here

Examples of org.pdfbox.pdfparser.PDFParser

        InternalValue[] values = data.getValues();
        if (values.length > 0) {
            BLOBFileValue blob = (BLOBFileValue) values[0].internalValue();
                
            try {
                PDFParser parser = new PDFParser(blob.getStream());
                parser.parse();
    
                PDDocument document = parser.getPDDocument();
    
                CharArrayWriter writer = new CharArrayWriter();
    
                PDFTextStripper stripper = new PDFTextStripper();
                stripper.setLineSeparator("\n");

View Full Code Here

Examples of org.pdfbox.pdfparser.PDFParser

          return new ParseStatus(ParseStatus.FAILED, ParseStatus.FAILED_TRUNCATED,
                  "Content truncated at "+raw.length
            +" bytes. Parser can't handle incomplete pdf file.").getEmptyParse(getConf());
      }


      PDFParser parser = new PDFParser(new ByteArrayInputStream(raw));
      parser.parse();


      pdf = parser.getPDDocument();


      if (pdf.isEncrypted()) {
        DocumentEncryption decryptor = new DocumentEncryption(pdf);
        //Just try using the default password and move on
        decryptor.decryptDocument("");

View Full Code Here

Examples of org.pdfbox.pdfparser.PDFParser

  
  public static final Log log = LogFactory.getLog(PDFIndexer.class); 


  public IndexDocument getIndexedDocument(File2Index fileData) throws SolrException {
    try {
      PDFParser parser = new PDFParser(new ByteArrayInputStream(fileData.data));
      parser.parse();
      COSDocument cosDoc = parser.getDocument();


      PDFTextStripper stripper = new PDFTextStripper();
      String docText = stripper.getText(new PDDocument(cosDoc));
      cosDoc.close();

View Full Code Here

Examples of org.pdfbox.pdfparser.PDFParser

                validateForLocalUrl(url);
                resourceURL = new URL(url);
                is = resourceURL.openStream();
            }


            PDFParser parser = new PDFParser(is);
            parser.parse();
            COSDocument cosDoc = parser.getDocument();


            PDFTextStripper stripper = new PDFTextStripper();
            String docText = stripper.getText(new PDDocument(cosDoc));
            cosDoc.close();
            Document document = new Document();

View Full Code Here

Examples of org.pdfbox.pdfparser.PDFParser

        try {
            // get file as stream
            input = new FileInputStream(filename);


            // init PDFParser with stream
            PDFParser parser = new PDFParser(input);
            parser.setTempDirectory(new File(System.getProperty("java.io.tmpdir")));


            // parse
            parser.parse();


            // return Document
            return parser.getPDDocument();
        }
        finally {
            IOUtils.closeQuietly(input);
        }
    }

View Full Code Here

Examples of org.pdfbox.pdfparser.PDFParser

          return new ParseStatus(ParseStatus.FAILED, ParseStatus.FAILED_TRUNCATED,
                  "Content truncated at "+raw.length
            +" bytes. Parser can't handle incomplete pdf file.").getEmptyParse();
      }


      PDFParser parser = new PDFParser(
        new ByteArrayInputStream(raw));
      parser.parse();


      pdf = parser.getPDDocument();


      if (pdf.isEncrypted()) {
        DocumentEncryption decryptor = new DocumentEncryption(pdf);
        //Just try using the default password and move on
        decryptor.decryptDocument("");

View Full Code Here

Examples of org.pdfbox.pdfparser.PDFParser

            && raw.length != Integer.parseInt(contentLength)) {
          throw new ParseException("Content truncated at "+raw.length
            +" bytes. Parser can't handle incomplete pdf file.");
      }


      PDFParser parser = new PDFParser(
        new ByteArrayInputStream(raw));
      parser.parse();


      pdf = parser.getPDDocument();


      if (pdf.isEncrypted()) {
        DocumentEncryption decryptor = new DocumentEncryption(pdf);
        //Just try using the default password and move on
        decryptor.decryptDocument("");

View Full Code Here

Examples of org.pdfbox.pdfparser.PDFParser


    public Reader extract(InputStream content)  throws ExtractorException
    {
        try
        {
            PDFParser parser = new PDFParser( content );
            parser.parse();


            PDDocument document = parser.getPDDocument();


            CharArrayWriter writer = new CharArrayWriter();


            PDFTextStripper stripper = new PDFTextStripper();
            stripper.setLineSeparator("\n");

View Full Code Here

0 1 2 3 4 5

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.