{
parser.parse(is, handler, metadata, context);
}
catch (SAXException e)
{
throw new DocumentReadException(e.getMessage(), e);
}
catch (TikaException e)
{
throw new DocumentReadException(e.getMessage(), e);
}
// construct Properties set
Properties props = new Properties();
convertProperty(metadata, props, DCMetaData.CONTRIBUTOR, new String[]{DublinCore.CONTRIBUTOR.getName(),
MSOffice.LAST_AUTHOR});
convertProperty(metadata, props, DCMetaData.COVERAGE, DublinCore.COVERAGE);
convertProperty(metadata, props, DCMetaData.CREATOR,
new String[]{MSOffice.AUTHOR, DublinCore.CREATOR.getName()});
// different parsers return date in different formats, so keep it as String
convertProperty(metadata, props, DCMetaData.DATE, new Property[]{DublinCore.DATE,
MSOffice.LAST_SAVED, MSOffice.CREATION_DATE});
convertProperty(metadata, props, DCMetaData.DESCRIPTION, new String[]{DublinCore.DESCRIPTION.getName(),
MSOffice.COMMENTS});
convertProperty(metadata, props, DCMetaData.FORMAT, DublinCore.FORMAT);
convertProperty(metadata, props, DCMetaData.IDENTIFIER, DublinCore.IDENTIFIER);
convertProperty(metadata, props, DCMetaData.LANGUAGE, DublinCore.LANGUAGE);
//convertProperty(metadata, props, DCMetaData.?, DublinCore.MODIFIED);
convertProperty(metadata, props, DCMetaData.PUBLISHER, DublinCore.PUBLISHER);
convertProperty(metadata, props, DCMetaData.RELATION, DublinCore.RELATION);
convertProperty(metadata, props, DCMetaData.RESOURCE, DublinCore.SOURCE);
convertProperty(metadata, props, DCMetaData.RIGHTS, DublinCore.RIGHTS);
convertProperty(metadata, props, DCMetaData.SUBJECT, new String[]{Metadata.SUBJECT,
OfficeOpenXMLCore.SUBJECT.getName(), DublinCore.SUBJECT.getName(), MSOffice.KEYWORDS});
convertProperty(metadata, props, DCMetaData.TITLE, DublinCore.TITLE);
convertProperty(metadata, props, DCMetaData.TYPE, DublinCore.TYPE);
return props;
}
finally
{
try
{
is.close();
}
catch (IOException e)
{
if (LOG.isTraceEnabled())
{
LOG.trace("An exception occurred: " + e.getMessage());
}
}
}
}
});
}
catch (PrivilegedActionException pae)
{
Throwable cause = pae.getCause();
if (cause instanceof IOException)
{
throw (IOException)cause;
}
else
{
throw new DocumentReadException("Can not get properties: " + cause.getMessage(), cause);
}
}
}