Package org.sf.mustru.filters

Source Code of org.sf.mustru.filters.XlsHandler

package org.sf.mustru.filters;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;

import org.apache.log4j.Logger;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;

import org.sf.mustru.docs.IndexableDoc;

/**
* Extract the text from a Microsoft spreadsheet using the POI classes.
*/
public class XlsHandler implements HandlerInterface
{
  static Logger logger = Logger.getLogger(XlsHandler.class.getName());
 
  /**
   *  Extract text from a .xls spreadsheet by scanning all possible
   *  cells in the spreadsheet.
   */
  public void getDocument(String ifile, IndexableDoc doc)
  {
   StringBuffer sheetText = new StringBuffer();
   try
   {
    logger.info("Extracting text from spreadsheet " + ifile);
    POIFSFileSystem poiFS = new POIFSFileSystem(new FileInputStream(new File(ifile)));
    HSSFWorkbook cworkBook = new HSSFWorkbook(poiFS);

    //*-- loop through the individual sheets and parse each one
    for (int i = 0; i < cworkBook.getNumberOfSheets(); i++)
    { HSSFSheet cSheet = cworkBook.getSheetAt(i);
    if (cSheet != null) { sheetText.append(" "); sheetText.append(extractText(cSheet)); }
    }
   } //*-- end of try block
   catch (OutOfMemoryError exc)
   { logger.error("Ran out of memory for " + ifile + " or could be corrupt file " + exc.getMessage()); }
   catch (IOException exc)
   { logger.error("Could not read text from spreadsheet " + ifile + " " + exc.getMessage()); }
   catch (Exception e)
   { logger.error("Could not extract text from spreadsheet " + ifile + " " + e.getMessage()); }

   doc.setContents ( new StringBuffer( sheetText.toString() ) );
   doc.setFileType("text"); doc.setFileName(ifile);
   return;
  }

  /**
   * Loop through the individual cells of the spreadsheet and extract text
   * strings wherever possible.
   */
  private String extractText(HSSFSheet cSheet)
  {
   StringBuffer sb = new StringBuffer();

   //*-- scan the rows from top to bottom
   for (int i = cSheet.getFirstRowNum(); i <= cSheet.getLastRowNum(); i++)
   { HSSFRow cRow = cSheet.getRow(i);
     if (cRow == null) continue;      //*-- skip empty rows

     //*-- for every row, scan each cell from left to right
     for (short j = cRow.getFirstCellNum(); j <= cRow.getLastCellNum(); j++)
     { HSSFCell cCell = cRow.getCell(j);

     if (cCell == null) continue//*-- skip empty cells
     if (cCell.getCellType() == HSSFCell.CELL_TYPE_STRING)
      { String cellValue = cCell.getStringCellValue();
        if (cellValue != null) { sb.append(" "); sb.append(cellValue); }
      }  
     } //*-- end of inner for
   } //*-- end of outer for
   return (sb.toString());
  }

}
TOP

Related Classes of org.sf.mustru.filters.XlsHandler

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.