String docLongTitle = null;
String docAuthor = null;
String docLanguage = null;
// opening the file as zip file
final ZipFile zipFile= new ZipFile(dest);
final Enumeration<? extends ZipEntry> zipEnum = zipFile.entries();
final SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
// looping through all containing files
while (zipEnum.hasMoreElements()) {
// get next zip file entry
final ZipEntry zipEntry= zipEnum.nextElement();
final String entryName = zipEntry.getName();
// content.xml contains the document content in xml format
if (entryName.equals("word/document.xml")
|| entryName.startsWith("ppt/slides/slide")
|| entryName.startsWith("xl/worksheets/sheet")) {
// create a writer for output
writer = new CharBuffer();
// extract data
final InputStream zipFileEntryStream = zipFile.getInputStream(zipEntry);
final SAXParser saxParser = saxParserFactory.newSAXParser();
saxParser.parse(zipFileEntryStream, new ODContentHandler(writer));
// close readers and writers
zipFileEntryStream.close();
writer.close();
} else if (entryName.equals("docProps/core.xml")) {
// meta.xml contains metadata about the document
final InputStream zipFileEntryStream = zipFile.getInputStream(zipEntry);
final SAXParser saxParser = saxParserFactory.newSAXParser();
final ODMetaHandler metaData = new ODMetaHandler();
saxParser.parse(zipFileEntryStream, metaData);
docDescription = metaData.getDescription();
docKeywordStr = metaData.getKeyword();