package org.sf.mustru.filters;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import org.apache.log4j.Logger;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.sf.mustru.docs.IndexableDoc;
/**
* Extract the text from a Microsoft spreadsheet using the POI classes.
*/
public class XlsHandler implements HandlerInterface
{
static Logger logger = Logger.getLogger(XlsHandler.class.getName());
/**
* Extract text from a .xls spreadsheet by scanning all possible
* cells in the spreadsheet.
*/
public void getDocument(String ifile, IndexableDoc doc)
{
StringBuffer sheetText = new StringBuffer();
try
{
logger.info("Extracting text from spreadsheet " + ifile);
POIFSFileSystem poiFS = new POIFSFileSystem(new FileInputStream(new File(ifile)));
HSSFWorkbook cworkBook = new HSSFWorkbook(poiFS);
//*-- loop through the individual sheets and parse each one
for (int i = 0; i < cworkBook.getNumberOfSheets(); i++)
{ HSSFSheet cSheet = cworkBook.getSheetAt(i);
if (cSheet != null) { sheetText.append(" "); sheetText.append(extractText(cSheet)); }
}
} //*-- end of try block
catch (OutOfMemoryError exc)
{ logger.error("Ran out of memory for " + ifile + " or could be corrupt file " + exc.getMessage()); }
catch (IOException exc)
{ logger.error("Could not read text from spreadsheet " + ifile + " " + exc.getMessage()); }
catch (Exception e)
{ logger.error("Could not extract text from spreadsheet " + ifile + " " + e.getMessage()); }
doc.setContents ( new StringBuffer( sheetText.toString() ) );
doc.setFileType("text"); doc.setFileName(ifile);
return;
}
/**
* Loop through the individual cells of the spreadsheet and extract text
* strings wherever possible.
*/
private String extractText(HSSFSheet cSheet)
{
StringBuffer sb = new StringBuffer();
//*-- scan the rows from top to bottom
for (int i = cSheet.getFirstRowNum(); i <= cSheet.getLastRowNum(); i++)
{ HSSFRow cRow = cSheet.getRow(i);
if (cRow == null) continue; //*-- skip empty rows
//*-- for every row, scan each cell from left to right
for (short j = cRow.getFirstCellNum(); j <= cRow.getLastCellNum(); j++)
{ HSSFCell cCell = cRow.getCell(j);
if (cCell == null) continue; //*-- skip empty cells
if (cCell.getCellType() == HSSFCell.CELL_TYPE_STRING)
{ String cellValue = cCell.getStringCellValue();
if (cellValue != null) { sb.append(" "); sb.append(cellValue); }
}
} //*-- end of inner for
} //*-- end of outer for
return (sb.toString());
}
}