@Override
public void processBean(final CRResolvableBean bean) throws CRException {
if (this.contentAttributeField != null) {
Object obj = bean.get(this.contentAttributeField);
if (obj != null) {
TikaInputStream inputStream = null;
if (obj instanceof byte[]) {
inputStream = TikaInputStream.get((byte[]) obj);
} else {
throw new IllegalArgumentException("Parameter must be instance of byte[]");
}
ContentHandler textHandler = new BodyContentHandler(fileLengthLimit);
Metadata metadata = new Metadata();
ParseContext context = new ParseContext();
try {
metadata.set(Metadata.CONTENT_TYPE, tika.detect(inputStream));
parser.parse(inputStream, textHandler, metadata, context);
bean.set(headingField, metadata.get(TikaCoreProperties.TITLE));
if (bean.get(createTimestampField) == null) {
bean.set(createTimestampField, metadata.get(TikaCoreProperties.CREATED));
}
if (bean.get(editTimestampField) == null) {
bean.set(editTimestampField, metadata.get(TikaCoreProperties.MODIFIED));
}
if (bean.get(keywordsField) == null) {
bean.set(keywordsField, metadata.get(TikaCoreProperties.KEYWORDS));
}
if (bean.get(publishTimestampField) == null) {
bean.set(publishTimestampField, metadata.get(TikaCoreProperties.PRINT_DATE));
}
if (bean.get(mimetypeField) == null) {
//HttpHeaders.CONTENT_TYPE
bean.set(mimetypeField, metadata.get(Metadata.CONTENT_TYPE));
}
String content = prepareContent(bean, textHandler);
bean.set(this.targetAttributeField, content);
} catch (IOException e) {
LOGGER.error("Error reading inputstream from bean: " + bean.getContentid(), e);
} catch (SAXException e) {
LOGGER.error("Sax Parser Exception while reading inputstream from bean: " + bean.getContentid(), e);
} catch (TikaException e) {
LOGGER.error("Tika Parser Exception while reading inputstream from bean: " + bean.getContentid(), e);
} catch (Exception e) {
LOGGER.error("Exception occured while indexing file at bean: " + bean.getContentid(), e);
} finally {
try {
if (inputStream != null) {
inputStream.close();
}
} catch (IOException e) {
LOGGER.error("Could not close inputstream of bean: " + bean.getContentid(), e);
}
}