Examples of PdfDecoder

org.jpedal.PdfDecoder
Provides an object to decode pdf files and provide a rasterizer if required - Normal usage is to create instance of PdfDecoder and access via public methods. Examples showing usage in org.jpedal.examples
We recommend you access JPedal using only public methods listed in API

Examples of org.jpedal.PdfDecoder

        case PDF:
          break;
        case PNG:
        case JPG:
          /** instance of PdfDecoder to convert PDF into image */
          PdfDecoder decodePdf = new PdfDecoder(true);


          /** set mappings for non-embedded fonts to use */
          PdfDecoder.setFontReplacements(decodePdf);
          decodePdf.useHiResScreenDisplay(true);
          decodePdf.getDPIFactory().setDpi(2 * 72);
          decodePdf.setPageParameters(1, 1);
          try {
            decodePdf.openPdfArray(baos.toByteArray());
            /** get page 1 as an image */
            BufferedImage img = decodePdf.getPageAsImage(1);


            /** close the pdf file */
            decodePdf.closePdfFile();
            baos.reset();
            ImageIO.write(img, format.getExtension(), baos);
          } catch (PdfException e) {
            throw new PrintingException(e, PrintingException.DOCUMENT_RENDER_PROBLEM);
          }

View Full Code Here

Examples of org.jpedal.PdfDecoder

   */
  private PdfDecoder openNewMultiplePage(String fileName){


    JDesktopPane desktopPane = currentGUI.getMultiViewerFrames();


    /**
     * setup PDF object
     */
    final PdfDecoder localPdf=new PdfDecoder(true);
//    System.out.println("new pdf = "+localPdf.getClass().getName() + "@" + Integer.toHexString(localPdf.hashCode()));
    
    decode_pdf=localPdf;


    currentGUI.setPdfDecoder(decode_pdf);

View Full Code Here

Examples of org.jpedal.PdfDecoder


        this.viewerTitle = title;


        setTitle(title);


        pdfDecoder = new PdfDecoder(true);


        //ensure non-embedded font map to sensible replacements
        FontMappings.setFontReplacements();


        currentFile = name;//store file name for use in page changer

View Full Code Here

Examples of org.jpedal.PdfDecoder

  /**
   * routine to write out clipped PDFs
   */
  private void decodeHires(int start,int end,String imageType,String output_dir){
    
    PdfDecoder decode_pdf=null;
    
    String target="";
    
    //PdfDecoder returns a PdfException if there is a problem
    try{
      
      decode_pdf = new PdfDecoder( false );
      decode_pdf.setExtractionMode(PdfDecoder.FINALIMAGES+PdfDecoder.CLIPPEDIMAGES,72,1);
      
      /** open the file (and read metadata including pages in  file)*/
      decode_pdf.openPdfFile( selectedFile );
      
    }catch( Exception e ){
      e.printStackTrace();
    }
    
    /**
     * extract data from pdf (if allowed). 
     */
    if ((decode_pdf.isEncrypted()&&(!decode_pdf.isPasswordSupplied()))&&(!decode_pdf.isExtractionAllowed()))
      return; 
    
    ProgressMonitor status = new ProgressMonitor(currentGUI.getFrame(),
        Messages.getMessage("PdfViewerMessage.ExtractImages"),"",start,end);
    
    try{
      int count=0;
      boolean yesToAll = false;
      for( int page = start;page < end + 1;page++ ){ //read pages
        if(status.isCanceled()){
          currentGUI.showMessageDialog(Messages.getMessage("PdfViewerError.UserStoppedExport") +
              count+ ' ' +Messages.getMessage("PdfViewerError.ReportNumberOfImagesExported"));
          return;
        }
        //decode the page
        decode_pdf.decodePage( page );
        
        //get the PdfImages object which now holds the images.
        //binary data is stored in a temp directory and we hold the
        //image name and other info in this object
        PdfImageData pdf_images = decode_pdf.getPdfImageData();
        
        //image count (note image 1 is item 0, so any loop runs 0 to count-1)
        int image_count = pdf_images.getImageCount();
        
        if(image_count>0){
          target=output_dir+page+separator;
          File targetExists=new File(target);
          if(!targetExists.exists())
            targetExists.mkdir();
        }
        
        //work through and save each image
        for( int i = 0;i < image_count;i++ ){
          
          String image_name =pdf_images.getImageName( i );
          BufferedImage image_to_save;
          
          float x1=pdf_images.getImageXCoord(i);
          float y1=pdf_images.getImageYCoord(i);
          float w=pdf_images.getImageWidth(i);
          float h=pdf_images.getImageHeight(i);
          
          try{
            
            image_to_save =decode_pdf.getObjectStore().loadStoredImage(  "CLIP_"+image_name );
            
            //save image


            if(image_to_save!=null){
              
              //remove transparency on jpeg
              if(imageType.toLowerCase().startsWith("jp"))
                image_to_save=ColorSpaceConvertor.convertToRGB(image_to_save);


              File fileToSave = new File(target+image_name+ '.' +imageType);
              if(fileToSave.exists() && !yesToAll){
                int n = currentGUI.showOverwriteDialog(fileToSave.getAbsolutePath(),true);
                        
                        if(n==0){
                          // clicked yes so just carry on for this once
                        }else if(n==1){
                          // clicked yes to all, so set flag
                          yesToAll = true;
                        }else if(n==2){
                          // clicked no, so loop round again
                          status.setProgress(page);
                          continue;
                        }else{
                          
                          currentGUI.showMessageDialog(Messages.getMessage("PdfViewerError.UserStoppedExport") +
                              count+ ' ' +Messages.getMessage("PdfViewerError.ReportNumberOfImagesExported"));
                          
                          status.close();
                          return;
                        }
              }
              
              saveImage(image_to_save,target+image_name+ '.' +imageType,imageType);
              count++;
            }
            
            //save an xml file with details
            /**
             * output the data
             */
            //LogWriter.writeLog( "Writing out "+(outputName + ".xml"));
            OutputStreamWriter output_stream =
              new OutputStreamWriter(
                  new FileOutputStream(target+image_name + ".xml"),
              "UTF-8");
            
            output_stream.write(
            "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
            output_stream.write(
            "<!-- Pixel Location of image x1,y1,x2,y2\n");
            output_stream.write("(x1,y1 is top left corner)\n");
            output_stream.write(
            "(origin is bottom left corner)  -->\n");
            output_stream.write("\n\n<META>\n");
            output_stream.write(
                "<PAGELOCATION x1=\""+ x1+ "\" "
                + "y1=\""+ (y1+h)+ "\" "
                + "x2=\""+ (x1+w)+ "\" "
                + "y2=\""+ (y1)+ "\" />\n");
            output_stream.write("<FILE>"+this.fileName+"</FILE>\n"); 
            output_stream.write("</META>\n");
            output_stream.close();
          }catch( Exception ee ){
            ee.printStackTrace();
            LogWriter.writeLog( "Exception " + ee + " in extracting images" );
          }
        }
        
        
        //flush images in case we do more than 1 page so only contains
        //images from current page
        decode_pdf.flushObjectValues(true);
        
        status.setProgress(page+1);
        
      }
      status.close();
      
      currentGUI.showMessageDialog(Messages.getMessage("PdfViewerMessage.ImagesSavedTo")+ ' ' +output_dir);
      
      
    }catch( Exception e ){
      decode_pdf.closePdfFile();
      LogWriter.writeLog( "Exception " + e.getMessage() );
    }
    
    
    
    /**close the pdf file*/
    decode_pdf.closePdfFile();
    
  }

View Full Code Here

Examples of org.jpedal.PdfDecoder

  /**
   * routine to write out images in PDFs
   */
  private void decodeImages(int start,int end,String prefix,String output_dir,boolean downsampled){
    
    PdfDecoder decode_pdf=null;
    
    //PdfDecoder returns a PdfException if there is a problem
    try{
      
      decode_pdf = new PdfDecoder( false );
      
      decode_pdf.setExtractionMode(PdfDecoder.RAWIMAGES+PdfDecoder.FINALIMAGES,72,1);
      /** open the file (and read metadata including pages in  file)*/
      decode_pdf.openPdfFile( selectedFile );
      
    }catch( Exception e ){
      e.printStackTrace();
    }
    
    /**
     * extract data from pdf (if allowed). 
     */
    if ((decode_pdf.isEncrypted()&&(!decode_pdf.isPasswordSupplied()))&&(!decode_pdf.isExtractionAllowed()))
      return; 
    
    ProgressMonitor status = new ProgressMonitor(currentGUI.getFrame(),
        Messages.getMessage("PdfViewerMessage.ExtractImages"),"",start,end);
    
    try{
      int count=0;
      boolean yesToAll = false;
      for( int page = start;page < end + 1;page++ ){ //read pages
        if(status.isCanceled()){
          currentGUI.showMessageDialog(Messages.getMessage("PdfViewerError.UserStoppedExport") +
                            count + Messages.getMessage("PdfViewerError.ReportNumberOfImagesExported"));
          return;
        }
        //decode the page
        decode_pdf.decodePage( page );
        
        //get the PdfImages object which now holds the images.
        //binary data is stored in a temp directory and we hold the
        //image name and other info in this object
        PdfImageData pdf_images = decode_pdf.getPdfImageData();
        
        //image count (note image 1 is item 0, so any loop runs 0 to count-1)
        int image_count = pdf_images.getImageCount();
        
        String target=output_dir+separator;
        if(downsampled)
          target=target+"downsampled"+separator+page+separator;
        else
          target=target+"normal"+separator+page+separator;
        
        //tell user
        if( image_count > 0 ){
          
          
          //create a directory for page
          File page_path = new File( target );
          if( page_path.exists() == false )
            page_path.mkdirs();
          
          
          //do it again as some OS struggle with creating nested dirs
          page_path = new File( target );
          if( page_path.exists() == false )
            page_path.mkdirs();
          
        }
        
        //work through and save each image
        for( int i = 0;i < image_count;i++ )
        {
          String image_name = pdf_images.getImageName( i );
          BufferedImage image_to_save;
          
          try
          {
            if(downsampled){
              //load processed version of image (converted to rgb)
              image_to_save = decode_pdf.getObjectStore().loadStoredImage( image_name );
              if(prefix.toLowerCase().startsWith("jp")){
                image_to_save=ColorSpaceConvertor.convertToRGB(image_to_save);
                
              }
            }else{
              //get raw version of image (R prefix for raw image)
              image_to_save = decode_pdf.getObjectStore().loadStoredImage( image_name );
              if(prefix.toLowerCase().startsWith("jp")){
                image_to_save=ColorSpaceConvertor.convertToRGB(image_to_save);
              }      
            }
            
            File fileToSave = new File(target+ image_name+ '.' +prefix);
            if(fileToSave.exists() && !yesToAll){
              int n = currentGUI.showOverwriteDialog(fileToSave.getAbsolutePath(),true);
                      
                      if(n==0){
                        // clicked yes so just carry on for this once
                      }else if(n==1){
                        // clicked yes to all, so set flag
                        yesToAll = true;
                      }else if(n==2){
                        // clicked no, so loop round again
                        status.setProgress(page);
                        continue;
                      }else{
                        
                        currentGUI.showMessageDialog(Messages.getMessage("PdfViewerError.UserStoppedExport") +
                            count+ ' ' +Messages.getMessage("PdfViewerError.ReportNumberOfImagesExported"));
                        
                        status.close();
                        return;
                      }
            }
            
            //save image
            saveImage(image_to_save,target+ image_name+ '.' +prefix,prefix);
            count++;
          }
          
          
          catch( Exception ee )
          {
            System.err.println( "Exception " + ee + " in extracting images" );
          }
        }
        
        //flush images in case we do more than 1 page so only contains
        //images from current page
        decode_pdf.flushObjectValues(true);
        
        
        status.setProgress(page+1);
      }


      currentGUI.showMessageDialog(Messages.getMessage("PdfViewerMessage.ImagesSavedTo")+ ' ' +output_dir);
      
      status.close();
    }catch( Exception e ){
      decode_pdf.closePdfFile();
      LogWriter.writeLog( "Exception " + e.getMessage() );
    }
    
    /**close the pdf file*/
    decode_pdf.closePdfFile();
    
  }

View Full Code Here

Examples of org.jpedal.PdfDecoder

  
  
  
  private void decodeTextTable(int startPage, int endPage, String output_dir, boolean useXMLExtraction) {
    
    PdfDecoder decode_pdf=null;
    
    try {
      decode_pdf = new PdfDecoder(false);
      decode_pdf.setExtractionMode(PdfDecoder.TEXT); //extract just text
      
      decode_pdf.init(true);
      
      /**
       * open the file (and read metadata including pages in  file)
       */
      
      decode_pdf.openPdfFile(selectedFile);
              
    } catch (Exception e) {
      System.err.println("Exception " + e + " in pdf code");
    }
    
    /**
     * extract data from pdf (if allowed). 
     */
    if ((decode_pdf.isEncrypted()&&(!decode_pdf.isPasswordSupplied()))&& (!decode_pdf.isExtractionAllowed())) {
      System.out.println("Encrypted settings");
      System.out.println("Please look at SimpleViewer for code sample to handle such files");
    } else {
      
      ProgressMonitor status = new ProgressMonitor(currentGUI.getFrame(),
          Messages.getMessage("PdfViewerMessage.ExtractText"),"",startPage,endPage);
      /**
       * extract data from pdf
       */
      try {
        int count=0;
        boolean yesToAll = false;
        for (int page = startPage; page < endPage + 1; page++) { //read pages
          if(status.isCanceled()){
            currentGUI.showMessageDialog(Messages.getMessage("PdfViewerError.UserStoppedExport") +count
                + ' ' +Messages.getMessage("PdfViewerError.ReportNumberOfPagesExported"));
            return;
          }
          //decode the page
          decode_pdf.decodePage(page);
          
          /** create a grouping object to apply grouping to data*/
          PdfGroupingAlgorithms currentGrouping =decode_pdf.getGroupingObject();
          
          /**use whole page size for  demo - get data from PageData object*/
          PdfPageData currentPageData = decode_pdf.getPdfPageData();
          
          int x1,y1,x2,y2;
          
          x1 = currentPageData.getMediaBoxX(page);
          x2 = currentPageData.getMediaBoxWidth(page)+x1;
          
          y2 = currentPageData.getMediaBoxY(page);
          y1 = currentPageData.getMediaBoxHeight(page)+y2;
          
          //default for xml 
          String ending="_text.csv";
          
          if(useXMLExtraction)
            ending="_xml.txt";
          
          /**Co-ordinates are x1,y1 (top left hand corner), x2,y2(bottom right) */
          
          /**The call to extract the table*/
          Map tableContent =null;
          String tableText=null;
          
          try{
            //the source code for this grouping is in the customer area
            //in class pdfGroupingAlgorithms
            //all these settings are defined in the Java
            
            tableContent =currentGrouping.extractTextAsTable(
                x1,
                y1,
                x2,
                y2,
                page,
                !useXMLExtraction,
                false,
                false,false,0);
            
            //get the text from the Map object
            tableText=(String)tableContent.get("content");
            
          } catch (PdfException e) {
            decode_pdf.closePdfFile();
            System.err.println("Exception " + e.getMessage()+" with table extraction");
          }catch (Error e) {
            e.printStackTrace();
          }
          
          if (tableText == null) {
            System.out.println("No text found");
          } else {
            
            
            String target=output_dir+separator+"table"+separator;
            
            //create a directory if it doesn't exist
            File output_path = new File(target);
            if (output_path.exists() == false)
              output_path.mkdirs();
            
            File fileToSave = new File(target + fileName+ '_' +page+ ending);
            if(fileToSave.exists() && !yesToAll){
              if((endPage - startPage) > 1){
                        int n = currentGUI.showOverwriteDialog(fileToSave.getAbsolutePath(),true);
                        
                        if(n==0){
                          // clicked yes so just carry on for this once
                        }else if(n==1){
                          // clicked yes to all, so set flag
                          yesToAll = true;
                        }else if(n==2){
                          // clicked no, so loop round again
                          status.setProgress(page);
                          continue;
                        }else{
                          
                          currentGUI.showMessageDialog(Messages.getMessage("PdfViewerError.UserStoppedExport") +
                              count+ ' ' +Messages.getMessage("PdfViewerError.ReportNumberOfPagesExported"));
                          
                          status.close();
                          return;
                        }
                      }else{
                        int n = currentGUI.showOverwriteDialog(fileToSave.getAbsolutePath(),false);
                        
                        if(n==0){
                          // clicked yes so just carry on
                        }else{
                          // clicked no, so exit
                          return;
                        }
                      }
            }
            
            /**
             * output the data - you may wish to alter the encoding to suit
             */
            OutputStreamWriter output_stream =
              new OutputStreamWriter(
                  new FileOutputStream(target + fileName+ '_' +page+ ending),
              "UTF-8");
            
//            xml header
            if(useXMLExtraction)
              output_stream.write("<xml><BODY>\n\n");
            
            output_stream.write(tableText); //write actual data
            
//            xml footer
            if(useXMLExtraction)
              output_stream.write("\n</body></xml>");
            
            output_stream.close();
            
          }
          count++;
          status.setProgress(page+1);
          //remove data once written out
          decode_pdf.flushObjectValues(false);
        }
        status.close();
        currentGUI.showMessageDialog(Messages.getMessage("PdfViewerMessage.TextSavedTo")+ ' ' +output_dir);
      } catch (Exception e) {
        decode_pdf.closePdfFile();
        System.err.println("Exception " + e.getMessage());
        e.printStackTrace();
      }catch(Error e){
        System.out.println("h34343");
        e.printStackTrace();
      }
      
      decode_pdf.flushObjectValues(true); //flush any text data read
      
    }
    
    /**close the pdf file*/
    decode_pdf.closePdfFile();
    
  }

View Full Code Here

Examples of org.jpedal.PdfDecoder

    
  }
  
  private void decodeTextWordlist(int startPage, int endPage, String output_dir,boolean useXMLExtraction) {
    
    PdfDecoder decode_pdf=null;
    
    //PdfDecoder returns a PdfException if there is a problem
    try {
      decode_pdf = new PdfDecoder(false);
      
      decode_pdf.setExtractionMode(PdfDecoder.TEXT); //extract just text
      decode_pdf.init(true);
      
      
      //always reset to use unaltered co-ords - allow use of rotated or unrotated
      // co-ordinates on pages with rotation (used to be in PdfDecoder)
      PdfGroupingAlgorithms.useUnrotatedCoords=false;
      
      /**
       * open the file (and read metadata including pages in  file)
       */
      decode_pdf.openPdfFile(selectedFile);
      
    } catch (PdfSecurityException e) {
      System.err.println("Exception " + e+" in pdf code for wordlist"+selectedFile);
    } catch (PdfException e) {
      System.err.println("Exception " + e+" in pdf code for wordlist"+selectedFile);
      
    } catch (Exception e) {
      System.err.println("Exception " + e+" in pdf code for wordlist"+selectedFile);
      e.printStackTrace();
    }
    
    /**
     * extract data from pdf (if allowed). 
     */
    if ((decode_pdf.isEncrypted()&&(!decode_pdf.isPasswordSupplied()))&& (!decode_pdf.isExtractionAllowed())) {
      System.out.println("Encrypted settings");
      System.out.println("Please look at SimpleViewer for code sample to handle such files");
      
    } else{
      //page range
      int start = startPage, end = endPage;
      int wordsExtracted=0;
      
      ProgressMonitor status = new ProgressMonitor(currentGUI.getFrame(),
          Messages.getMessage("PdfViewerMessage.ExtractText"),"",startPage,endPage);
      
      /**
       * extract data from pdf
       */
      try {
        int count=0;
        boolean yesToAll = false;
        for (int page = start; page < end + 1; page++) { //read pages
          if(status.isCanceled()){
            currentGUI.showMessageDialog(Messages.getMessage("PdfViewerError.UserStoppedExport") +
                count+ ' ' +Messages.getMessage("PdfViewerError.ReportNumberOfPagesExported"));
            return;
          }
          //decode the page
          decode_pdf.decodePage(page);
          
          /** create a grouping object to apply grouping to data*/
          PdfGroupingAlgorithms currentGrouping =decode_pdf.getGroupingObject();
          
          /**use whole page size for  demo - get data from PageData object*/
          PdfPageData currentPageData = decode_pdf.getPdfPageData();
          
          int x1 = currentPageData.getMediaBoxX(page);
          int x2 = currentPageData.getMediaBoxWidth(page)+x1;
          
          int y2 = currentPageData.getMediaBoxX(page);
          int y1 = currentPageData.getMediaBoxHeight(page)-y2;
          
          /**Co-ordinates are x1,y1 (top left hand corner), x2,y2(bottom right) */
          
          /**The call to extract the list*/
          List words =null;
          
          try{
            words =currentGrouping.extractTextAsWordlist(
                x1,
                y1,
                x2,
                y2,
                page,
                true,"&:=()!;.,\\/\"\"\'\'");
          } catch (PdfException e) {
            decode_pdf.closePdfFile();
            System.err.println("Exception= "+ e+" in "+selectedFile);
            e.printStackTrace();
          }catch(Error e){
            e.printStackTrace();
          }
          
          if (words == null) {
            
            System.out.println("No text found");
            
          } else {
            
            String target=output_dir+separator+"wordlist"+separator;
            
            //create a directory if it doesn't exist
            File output_path = new File(target);
            if (output_path.exists() == false)
              output_path.mkdirs();
            
            /**
             * choose correct prefix
             */
            String prefix="_text.txt";
            String encoding=System.getProperty("file.encoding");
            
            if(useXMLExtraction){
              prefix="_xml.txt";
              encoding="UTF-8";
            }
            
            /**each word is stored as 5 consecutive values (word,x1,y1,x2,y2)*/
            int wordCount=words.size()/5;
            
            //update our count
            wordsExtracted=wordsExtracted+wordCount;
            
            
            File fileToSave = new File(target + fileName+ '_' +page + prefix);
            if(fileToSave.exists() && !yesToAll){
              if((endPage - startPage) > 1){
                        int n = currentGUI.showOverwriteDialog(fileToSave.getAbsolutePath(),true);
                        
                        if(n==0){
                          // clicked yes so just carry on for this once
                        }else if(n==1){
                          // clicked yes to all, so set flag
                          yesToAll = true;
                        }else if(n==2){
                          // clicked no, so loop round again
                          status.setProgress(page);
                          continue;
                        }else{
                          
                          currentGUI.showMessageDialog(Messages.getMessage("PdfViewerError.UserStoppedExport") +
                              count+ ' ' +Messages.getMessage("PdfViewerError.ReportNumberOfPagesExported"));
                          
                          status.close();
                          return;
                        }
                      }else{
                        int n = currentGUI.showOverwriteDialog(fileToSave.getAbsolutePath(),false);
                        
                        if(n==0){
                          // clicked yes so just carry on
                        }else{
                          // clicked no, so exit
                          return;
                        }
                      }
            }
            
            
            /**
             * output the data
             */
            OutputStreamWriter output_stream =
              new OutputStreamWriter(
                  new FileOutputStream(target + fileName+ '_' +page + prefix),
                  encoding);
            
            Iterator wordIterator=words.iterator();
            while(wordIterator.hasNext()){
              
              String currentWord=(String) wordIterator.next();
              
              /**remove the XML formatting if present - not needed for pure text*/
              if(!useXMLExtraction)
                currentWord=Strip.convertToText(currentWord,true);
              
              int wx1=(int)Float.parseFloat((String) wordIterator.next());
              int wy1=(int)Float.parseFloat((String) wordIterator.next());
              int wx2=(int)Float.parseFloat((String) wordIterator.next());
              int wy2=(int)Float.parseFloat((String) wordIterator.next());
              
              /**this could be inserting into a database instead*/
              output_stream.write(currentWord+ ',' +wx1+ ',' +wy1+ ',' +wx2+ ',' +wy2+ '\n');
              
            }
            output_stream.close();
            
          }
          
          count++;
          status.setProgress(page+1);
          
          //remove data once written out
          decode_pdf.flushObjectValues(false);
          
        }
        status.close();
        currentGUI.showMessageDialog(Messages.getMessage("PdfViewerMessage.TextSavedTo")+ ' ' +output_dir);
      } catch (Exception e) {
        decode_pdf.closePdfFile();
        System.err.println("Exception "+ e+" in "+selectedFile);
        e.printStackTrace();
      }catch(Error e){
        e.printStackTrace();
      }  
    }
    
    /**close the pdf file*/
    decode_pdf.closePdfFile();
    
    decode_pdf=null;
    
    
  }

View Full Code Here

Examples of org.jpedal.PdfDecoder

    
  }
  
  private void decodeTextRectangle(int startPage, int endPage, String output_dir,boolean useXMLExtraction) {
    
    PdfDecoder decode_pdf=null;
    
    //PdfDecoder returns a PdfException if there is a problem
    try {
      decode_pdf = new PdfDecoder( false );
      
      if(!useXMLExtraction)
                decode_pdf.useTextExtraction();
      
      decode_pdf.setExtractionMode(PdfDecoder.TEXT); //extract just text
      decode_pdf.init(true);
      
      /**
       * open the file (and read metadata including pages in  file)
       */
      decode_pdf.openPdfFile(selectedFile);
      
    } catch (PdfSecurityException se) {
      System.err.println("Security Exception " + se + " in pdf code for text extraction on file ");
      //e.printStackTrace();
    } catch (PdfException se) {
      System.err.println("Pdf Exception " + se + " in pdf code for text extraction on file ");
      //e.printStackTrace();
    } catch (Exception e) {
      System.err.println("Exception " + e + " in pdf code for text extraction on file ");
      e.printStackTrace();
    }
    
    /**
     * extract data from pdf (if allowed). 
     */
    if ((decode_pdf.isEncrypted()&&(!decode_pdf.isPasswordSupplied()))&& (!decode_pdf.isExtractionAllowed())) {
      System.out.println("Encrypted settings");
      System.out.println("Please look at SimpleViewer for code sample to handle such files");
      
    } else {
      
      ProgressMonitor status = new ProgressMonitor(currentGUI.getFrame(),
          Messages.getMessage("PdfViewerMessage.ExtractText"),"",startPage,endPage);
      
      /**
       * extract data from pdf
       */
      try {
        int count=0;
        boolean yesToAll = false;
        for (int page = startPage; page < endPage + 1; page++) { //read pages
          if(status.isCanceled()){
            currentGUI.showMessageDialog(Messages.getMessage("PdfViewerError.UserStoppedExport") 
                +count+ ' ' +Messages.getMessage("PdfViewerError.ReportNumberOfPagesExported"));
            return;
          }
          //decode the page
          decode_pdf.decodePage(page);
          
          /** create a grouping object to apply grouping to data*/
          PdfGroupingAlgorithms currentGrouping =decode_pdf.getGroupingObject();
          
          /**use whole page size for  demo - get data from PageData object*/
          PdfPageData currentPageData = decode_pdf.getPdfPageData();
          
          int x1 = currentPageData.getMediaBoxX(page);
          int x2 = currentPageData.getMediaBoxWidth(page)+x1;
          
          int y2 = currentPageData.getMediaBoxY(page);
          int y1 = currentPageData.getMediaBoxHeight(page)+y2;
          
          /**Co-ordinates are x1,y1 (top left hand corner), x2,y2(bottom right) */
          
          /**The call to extract the text*/
          String text =null;
          
          try{
            text =currentGrouping.extractTextInRectangle(
                x1,
                y1,
                x2,
                y2,
                page,
                false,
                true);
          } catch (PdfException e) {
            decode_pdf.closePdfFile();
            System.err.println("Exception " + e.getMessage()+" in file "+decode_pdf.getObjectStore().fullFileName);
            e.printStackTrace();
          }
          
          //allow for no text
          if(text==null)
            continue;
          
          String target=output_dir+separator+"rectangle"+separator;          
          
          //ensure a directory for data
          File page_path = new File(target);
          if (page_path.exists() == false)
            page_path.mkdirs();
          
          /**
           * choose correct prefix
           */
          String prefix="_text.txt";
          String encoding=System.getProperty("file.encoding");
          
          if(useXMLExtraction){
            prefix="_xml.txt";
            encoding="UTF-8";
          }


          File fileToSave = new File(target + fileName+ '_' +page + prefix);
          if(fileToSave.exists() && !yesToAll){
            if((endPage - startPage) > 1){
                      int n = currentGUI.showOverwriteDialog(fileToSave.getAbsolutePath(),true);
                      
                      if(n==0){
                        // clicked yes so just carry on for this once
                      }else if(n==1){
                        // clicked yes to all, so set flag
                        yesToAll = true;
                      }else if(n==2){
                        // clicked no, so loop round again
                        status.setProgress(page);
                        continue;
                      }else{
                        
                        currentGUI.showMessageDialog(Messages.getMessage("PdfViewerError.UserStoppedExport") +
                            count+ ' ' +Messages.getMessage("PdfViewerError.ReportNumberOfPagesExported"));
                        
                        status.close();
                        return;
                      }
                    }else{
                      int n = currentGUI.showOverwriteDialog(fileToSave.getAbsolutePath(),false);
                      
                      if(n==0){
                        // clicked yes so just carry on
                      }else{
                        // clicked no, so exit
                        return;
                      }
                    }
          }


          /**
           * output the data
           */
          OutputStreamWriter output_stream =
            new OutputStreamWriter(
                new FileOutputStream(target + fileName+ '_' +page + prefix),
                encoding);
          
          if((useXMLExtraction)){
            output_stream.write(
            "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n\n");
            output_stream.write(
            "<!-- Pixel Location of text x1,y1,x2,y2\n");
            output_stream.write("(x1,y1 is top left corner)\n");
            output_stream.write("(x1,y1 is bottom right corner)\n");
            output_stream.write(
            "(origin is bottom left corner)  -->\n");
            output_stream.write("\n\n<ARTICLE>\n");
            output_stream.write(
                "<LOCATION x1=\""
                + x1
                + "\" "
                + "y1=\""
                + y1
                + "\" "
                + "x2=\""
                + x2
                + "\" "
                + "y2=\""
                + y2
                + "\" />\n");
            output_stream.write("\n\n<TEXT>\n");
            //NOTE DATA IS TECHNICALLY UNICODE
            output_stream.write(text); //write actual data
            output_stream.write("\n\n</TEXT>\n");
            output_stream.write("\n\n</ARTICLE>\n");
          }else
            output_stream.write(text); //write actual data
          
          count++;
          output_stream.close();
          
          status.setProgress(page+1);
          
          //remove data once written out
          decode_pdf.flushObjectValues(true);
        }
        status.close();
        currentGUI.showMessageDialog(Messages.getMessage("PdfViewerMessage.TextSavedTo")+ ' ' +output_dir);
        
      } catch (Exception e) {
        decode_pdf.closePdfFile();
        System.err.println("Exception " + e.getMessage());
        e.printStackTrace();
        System.out.println(decode_pdf.getObjectStore().getCurrentFilename());
      }
    }


    /**close the pdf file*/
    decode_pdf.closePdfFile();
    
    decode_pdf=null;
  }

View Full Code Here

Examples of org.jpedal.PdfDecoder

    /**example method to open a file and return the links*/
    public ShowLinks(String file_name) {


        BufferedImage img = null;


        PdfDecoder decodePdf;


        if (includeImages) {
            decodePdf = new PdfDecoder(true);
        } else {
            decodePdf = new PdfDecoder(false);
        }




        try {
            decodePdf.openPdfFile(file_name);




            /**
             * form code here
             */
            //new list we can parse
            for (int ii = 1; ii < decodePdf.getPageCount() + 1; ii++) {


                //the list of Annots from the file
                PdfArrayIterator annotListForPage = decodePdf.getFormRenderer().getAnnotsOnPage(ii);




                if (annotListForPage != null && annotListForPage.getTokenCount() > 0) { //can have empty lists




                    //get image if needed and save
                    if (includeImages) {
                        img = decodePdf.getPageAsImage(ii);


                    }


                    while (annotListForPage.hasMoreTokens()) {


                        //get ID of annot which has already been decoded and get actual object
                        String annotKey = annotListForPage.getNextValueAsString(true);


                        Object[] rawObj = decodePdf.getFormRenderer().getCompData().getRawForm(annotKey);
                        for (Object aRawObj : rawObj) {
                            if (aRawObj != null) {
                                //each PDF annot object - extract data from it
                                FormObject annotObj = (FormObject) aRawObj;


                                int subtype = annotObj.getParameterConstant(PdfDictionary.Subtype);


                                if (subtype == PdfDictionary.Link) {


                                    //PDF co-ords
                                    System.out.println("link object");
                                    float[] coords = annotObj.getFloatArray(PdfDictionary.Rect);
                                    System.out.println("PDF Rect= " + coords[0] + ' ' + coords[1] + ' ' + coords[2] + ' ' + coords[3]);


                                    //convert to Javaspace rectangle by subtracting page Crop Height
                                    int pageH = decodePdf.getPdfPageData().getCropBoxHeight(ii);
                                    float x = coords[0];


                                    float w = coords[2] - coords[0];
                                    float h = coords[3] - coords[1];
                                    float y = pageH - coords[1] - h; //note we remove h from y
                                    System.out.println("Javaspace Rect x=" + x + " y=" + y + " w=" + w + " h=" + h);


                                    //draw on image as example
                                    //get image if needed and save
                                    if (includeImages) {


                                        //as an example draw onto page
                                        Graphics2D g2 = (Graphics2D) img.getGraphics();
                                        g2.setPaint(Color.RED);
                                        g2.drawRect((int) x, (int) y, (int) w, (int) h);


                                    }


                                    //text in A subobject
                                    PdfObject aData = annotObj.getDictionary(PdfDictionary.A);
                                    if (aData != null && aData.getNameAsConstant(PdfDictionary.S) == PdfDictionary.URI) {
                                        String text = aData.getTextStreamValue(PdfDictionary.URI); //+"ZZ"; deliberately broken first to test checking
                                        System.out.println("text=" + text);
                                    }
                                }
                            }
                        }
                    }
                }


                //get image if needed and save
                if (includeImages) {


                    ImageIO.write(img, "PNG", new File("image-" + ii + ".png"));
                }
            }


            /**close the pdf file*/
            decodePdf.closePdfFile();


        } catch (Exception e) {
            e.printStackTrace();


        }

View Full Code Here

Examples of org.jpedal.PdfDecoder


public class ShowPageSize {


    public ShowPageSize(String file_name){


        PdfDecoder decode_pdf = new PdfDecoder( false ); //false as no display


        try{
        decode_pdf.openPdfFile( file_name );


            /**get page count*/
      int pageCount= decode_pdf.getPageCount();
      System.out.println( "Page count=" + pageCount );




            //get PageData object
            PdfPageData pageData = decode_pdf.getPdfPageData();
            //show all page sizes
            for(int ii=0;ii<pageCount;ii++){


                //pixels
                System.out.print("page (size in pixels) "+ii+
                        " mediaBox="+pageData.getMediaBoxX(ii)+ ' ' +pageData.getMediaBoxY(ii)+ ' ' +pageData.getMediaBoxWidth(ii)+ ' ' +pageData.getMediaBoxHeight(ii)+
                        " CropBox="+pageData.getCropBoxX(ii)+ ' ' +pageData.getCropBoxY(ii)+ ' ' +pageData.getCropBoxWidth(ii)+ ' ' +pageData.getCropBoxHeight(ii));


                //inches
                float factor=72f; //72 is the usual screen dpi
                System.out.print(" (size in inches) "+ii+
                        " mediaBox="+pageData.getMediaBoxX(ii)/factor+ ' ' +pageData.getMediaBoxY(ii)/factor+ ' ' +pageData.getMediaBoxWidth(ii)/factor+ ' ' +pageData.getMediaBoxHeight(ii)/factor+
                        " CropBox="+pageData.getCropBoxX(ii)/factor+ ' ' +pageData.getCropBoxY(ii)/factor+pageData.getCropBoxWidth(ii)/factor+ ' ' +pageData.getCropBoxHeight(ii)/factor);


                //cm
                factor=72f/2.54f;
                System.out.print(" (size in cm) "+ii+
                        " mediaBox="+pageData.getMediaBoxX(ii)/factor+ ' ' +pageData.getMediaBoxY(ii)/factor+ ' ' +pageData.getMediaBoxWidth(ii)/factor+ ' ' +pageData.getMediaBoxHeight(ii)/factor+
                        " CropBox="+pageData.getCropBoxX(ii)/factor+ ' ' +pageData.getCropBoxY(ii)/factor+pageData.getCropBoxWidth(ii)/factor+ ' ' +pageData.getCropBoxHeight(ii)/factor+ '\n');


            }


      /**close the pdf file*/
      decode_pdf.closePdfFile();
        }catch(Exception e){
            e.printStackTrace();
        }
    }

View Full Code Here

0 1 2

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.