Package org.sf.mustru.filters

Source Code of org.sf.mustru.filters.PptHandler

package org.sf.mustru.filters;

//import java.io.FileInputStream;
//import org.apache.poi.hslf.extractor.PowerPointExtractor;

import org.apache.log4j.Logger;
import org.apache.poi.hslf.extractor.QuickButCruddyTextExtractor;

import org.sf.mustru.docs.IndexableDoc;
import org.sf.mustru.utils.*;

/**
* Extract text from a PowerPoint file using the POI classes.
*/
public class PptHandler implements HandlerInterface
{
  static Logger logger = Logger.getLogger(PptHandler.class.getName());
 
  /**
   * empty constructor
   *
   */
  public PptHandler() { super(); }
 
  /**
   *- Extract the text from a Power Point file and return the plain text contents 
   */
  public void getDocument(String ifile, IndexableDoc doc)
  {
    String bodyText = "";
    try
     {
       logger.info("Extracting text from PPT file " + ifile);
      
       //*-- use the cruddy extractor instead of powerpointextractor since it seems to
       //*-- handle all kinds of PPT files
       //PowerPointExtractor ppe = null;
       //ppe = new PowerPointExtractor(ifile);
       //bodyText = ppe.getText() + ppe.getNotes();
       //ppe.close();    
       QuickButCruddyTextExtractor qbt = new QuickButCruddyTextExtractor(ifile);
       bodyText = qbt.getTextAsString()
     }
    catch (OutOfMemoryError e)
      { logger.error("Cannot allocate memory, file may be corrupt " + ifile + " " +  e.getMessage()); }
    catch (Exception e)
      { logger.error("Cannot extract text from a PowerPoint document " + ifile + " " + e.getMessage() ); }
   
    //SlideDoc doc = new SlideDoc(ifile);  
    if (bodyText != null) { bodyText = StringTools.filterChars(bodyText); }
    doc.setContents ( new StringBuffer(bodyText) );
    doc.setFileType("slide"); doc.setFileName(ifile);
    return;
  }

}
TOP

Related Classes of org.sf.mustru.filters.PptHandler

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.