{
parser.parse(is, handler, metadata, context);
}
catch (SAXException e)
{
throw new DocumentReadException(e.getMessage(), e);
}
catch (TikaException e)
{
throw new DocumentReadException(e.getMessage(), e);
}
// construct Properties set
Properties props = new Properties();
convertProperty(metadata, props, DCMetaData.CONTRIBUTOR, new String[]{DublinCore.CONTRIBUTOR,
MSOffice.LAST_AUTHOR});
convertProperty(metadata, props, DCMetaData.COVERAGE, DublinCore.COVERAGE);
convertProperty(metadata, props, DCMetaData.CREATOR,
new String[]{MSOffice.AUTHOR, DublinCore.CREATOR});
// different parsers return date in different formats, so keep it as String
convertProperty(metadata, props, DCMetaData.DATE, new String[]{DublinCore.DATE, MSOffice.LAST_SAVED,
MSOffice.CREATION_DATE});
convertProperty(metadata, props, DCMetaData.DESCRIPTION, new String[]{DublinCore.DESCRIPTION,
MSOffice.COMMENTS});
convertProperty(metadata, props, DCMetaData.FORMAT, DublinCore.FORMAT);
convertProperty(metadata, props, DCMetaData.IDENTIFIER, DublinCore.IDENTIFIER);
convertProperty(metadata, props, DCMetaData.LANGUAGE, DublinCore.LANGUAGE);
//convertProperty(metadata, props, DCMetaData.?, DublinCore.MODIFIED);
convertProperty(metadata, props, DCMetaData.PUBLISHER, DublinCore.PUBLISHER);
convertProperty(metadata, props, DCMetaData.RELATION, DublinCore.RELATION);
convertProperty(metadata, props, DCMetaData.RESOURCE, DublinCore.SOURCE);
convertProperty(metadata, props, DCMetaData.RIGHTS, DublinCore.RIGHTS);
convertProperty(metadata, props, DCMetaData.SUBJECT, new String[]{DublinCore.SUBJECT,
MSOffice.KEYWORDS});
convertProperty(metadata, props, DCMetaData.TITLE, DublinCore.TITLE);
convertProperty(metadata, props, DCMetaData.TYPE, DublinCore.TYPE);
return props;
}
finally
{
try
{
is.close();
}
catch (IOException e)
{
if (LOG.isTraceEnabled())
{
LOG.trace("An exception occurred: " + e.getMessage());
}
}
}
}
});
}
catch (PrivilegedActionException pae)
{
Throwable cause = pae.getCause();
if (cause instanceof IOException)
{
throw (IOException)cause;
}
else
{
throw new DocumentReadException("Can not get properties: " + cause.getMessage(), cause);
}
}
}