return at != null && at.getTokens().hasNext() ? ENHANCE_ASYNC : CANNOT_ENHANCE;
}
@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
AnalysedText at = getAnalysedText(this, ci, true);
log.debug(" > AnalysedText {}", at);
String language = getLanguage(this, ci, true);
log.debug(" > Language {}", language);
if (log.isDebugEnabled()) {
log.debug("computeEnhancements for ContentItem {} language {} text={}", new Object[] {
ci.getUri().getUnicodeString(), language, StringUtils.abbreviate(at.getSpan(), 100)});
}
// TODO: we need to do the same for the the default matching language
TaggingSession session;
try {
session = TaggingSession.createSession(indexConfig, language);
} catch (CorpusException e) {
throw new EngineException(this, ci, e);
}
long taggingStart = System.currentTimeMillis();
final NavigableMap<int[],Tag> tags = new TreeMap<int[],Tag>(Tag.SPAN_COMPARATOR);
try {
//process the language of the document
Corpus corpus = null;
if(session.getLanguageCorpus() != null){
corpus = session.getLanguageCorpus();
long t = System.currentTimeMillis();
int d = tag(at, session,corpus,tags);
log.debug(" - {}: fst: {}ms (callback: {}ms)", new Object[]{
corpus.getIndexedField(), System.currentTimeMillis()-t, d
});
}
if(session.getDefaultCorpus() != null){
if(corpus == null){
corpus = session.getDefaultCorpus();
}
long t = System.currentTimeMillis();
int d = tag(at, session, session.getDefaultCorpus(),tags);
log.debug(" - {}: fst: {}ms (callback: {}ms)",new Object[]{
session.getDefaultCorpus().getIndexedField(),
System.currentTimeMillis()-t, d});
}
long taggingEnd = System.currentTimeMillis();
if(corpus == null){
throw new EngineException(this,ci,"No FST corpus found to process contentItem "
+ "language '"+session.getLanguage()+"'!",null);
} else {
if(session.getLanguageCorpus() != null && session.getDefaultCorpus() != null){
log.debug(" - sum fst: {} ms", taggingEnd - taggingStart);
}
}
int matches = match(at,tags.values());
log.debug(" - loaded {} ({} loaded, {} cached, {} appended) Matches in {} ms",
new Object[]{matches, session.getSessionDocLoaded(),
session.getSessionDocCached(), session.getSessionDocAppended(),
System.currentTimeMillis()-taggingEnd});
if(log.isDebugEnabled() && session.getDocumentCache() != null){
log.debug("EntityCache Statistics: {}",
session.getDocumentCache().printStatistics());
}
} catch (IOException e) {
throw new EngineException(this,ci,e);
} finally {
session.close();
}
if(log.isTraceEnabled()){
log.trace("Tagged Entities:");
for(Tag tag : tags.values()){
log.trace("[{},{}]: {}", new Object[]{tag.getStart(),tag.getEnd(),tag.getMatches()});
}
}
ci.getLock().writeLock().lock();
try {
writeEnhancements(ci,at.getSpan(),tags.values(),language);
} finally {
ci.getLock().writeLock().unlock();
}
tags.clear(); //help the GC
}