public void computeEnhancements(ContentItem ci) throws EngineException {
AnalysedText at = getAnalysedText(this, ci, true);
String language = getLanguage(this, ci, true);
isLangaugeConfigured(this, languageConfiguration, language, true);
ChunkerME chunker = initChunker(language);
if(chunker == null){
//init the Phrase TagSet
TagSet<PhraseTag> tagSet = tagSetRegistry.getTagSet(language);
if(tagSet == null){
if(tagSet == null){
log.warn("No Phrase TagSet registered for Language '{}'. Will build an "
+ "adhoc set based on encountered Tags!",language);
//for now only created to avoid checks for tagSet == null
//TODO: in future we might want to automatically create posModels based
//on tagged texts. However this makes no sense as long we can not
//persist TagSets.
tagSet = new TagSet<PhraseTag>("dummy", language);
//holds PosTags created for POS tags that where not part of the posModel
//(will hold all PosTags in case tagSet is NULL
Map<String,PhraseTag> adhocTags = languageAdhocTags.get(language);
if(adhocTags == null){
adhocTags = new HashMap<String,PhraseTag>();
languageAdhocTags.put(language, adhocTags);
try {
Iterator<? extends Section> sentences = at.getSentences();
if(!sentences.hasNext()){ //no sentences ... iterate over the whole text
sentences = Collections.singleton(at).iterator();
List<String> tokenTextList = new ArrayList<String>(64);
List<String> posList = new ArrayList<String>(64);
List<Token> tokenList = new ArrayList<Token>(64);
//process each sentence seperatly
// (1) get Tokens and POS information for the sentence
Section sentence = sentences.next();
Iterator<Token> tokens = sentence.getTokens();
Token token = tokens.next();
Value<PosTag> posValue = token.getAnnotation(POS_ANNOTATION);
if(posValue == null){
throw new EngineException("Missing POS value for Token '"
+ token.getSpan()+"' of ContentItem "+ci.getUri()
+ "(Sentence: '"+sentence.getSpan()+"'). This may "
+ "indicate that a POS tagging Engine is missing in "
+ "the EnhancementChain or that the used POS tagging "
+ "does not provide POS tags for each token!");
} else {
String[] tokenStrings = tokenTextList.toArray(new String[tokenTextList.size()]);
String[] tokenPos = posList.toArray(new String[tokenTextList.size()]);
log.trace("Tokens: {}"+Arrays.toString(tokenStrings));
tokenTextList.clear(); //free memory
posList.clear(); //free memory
// (2) Chunk the sentence
String[] chunkTags = chunker.chunk(tokenStrings, tokenPos);
double[] chunkProb = chunker.probs();
log.trace("Chunks: {}"+Arrays.toString(chunkTags));
tokenStrings = null; //free memory
tokenPos = null; //free memory