* if the underlying process failed to work as
* expected
*/
@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
final AnalysedText at = initAnalysedText(this,analysedTextFactory,ci);
String language = getLanguage(this,ci,false);
if(!("zh".equals(language) || (language != null && language.startsWith("zh-")))) {
throw new IllegalStateException("The detected language is NOT 'zh'! "
+ "As this is also checked within the #canEnhance(..) method this "
+ "indicates an Bug in the used EnhancementJobManager implementation. "
+ "Please report this on the dev@apache.stanbol.org or create an "
+ "JIRA issue about this.");
}
if(!at.getSentences().hasNext()) { //no sentences ... use this engine to detect
//first the sentences
TokenStream sentences = new SentenceTokenizer(new CharSequenceReader(at.getText()));
try {
while(sentences.incrementToken()){
OffsetAttribute offset = sentences.addAttribute(OffsetAttribute.class);
Sentence s = at.addSentence(offset.startOffset(), offset.endOffset());
if(log.isTraceEnabled()) {
log.trace("detected {}:{}",s,s.getSpan());
}
}
} catch (IOException e) {
String message = String.format("IOException while reading from "
+"CharSequenceReader of AnalyzedText for ContentItem %s",ci.getUri());
log.error(message,e);
throw new EngineException(this, ci, message, e);
}
}
//now the tokens
TokenStream tokens = new WordTokenFilter(new AnalyzedTextSentenceTokenizer(at));
try {
while(tokens.incrementToken()){
OffsetAttribute offset = tokens.addAttribute(OffsetAttribute.class);
Token t = at.addToken(offset.startOffset(), offset.endOffset());
log.trace("detected {}",t);
}
} catch (IOException e) {
String message = String.format("IOException while reading from "
+"CharSequenceReader of AnalyzedText for ContentItem %s",ci.getUri());