// search regexp
for(Concept c: searchRegExp(text)){
if(!isFilteredOut(c)){
c.setScore(1.0);
if(result.containsKey(c)){
Concept oc = result.get(c);
for(String ot: c.getMatchedTerms())
oc.addMatchedTerm(ot);
for(Annotation a: c.getAnnotations())
oc.addAnnotation(a);
}else
result.put(c,c);
}
}
// for each word
Set<String> usedWords = new HashSet<String>();
int count = 0;
for(String word : swords){
count ++;
// filter out junk
if(ignoreSmallWords && word.length() <= 1)
continue;
// filter out common words
if(ignoreCommonWords && TextTools.isCommonWord(word))
continue;
// if word is already in list of used words
// save time and go on this time, but re-added for
// later use in case the word is repeated later on
if(ignoreUsedWords && usedWords.contains(word)){
continue;
}
//Arrays.asList(TextTools.getWords(text))
List<String> textWords = getTextWords(words,count);
for(String term: getBestTerms(textWords,usedWords,word)){
resultTerms.add(term);
if(ignoreUsedWords)
usedWords.addAll(getUsedWords(textWords,term));
}
}
// create result list
//time = System.currentTimeMillis();
for(String term: resultTerms){
Set<String> codes = storage.getTermMap().get(term);
// Derive original looking term
String oterm = getOriginalTerm(text, term, normWords);
// create
List<Concept> termConcepts = new ArrayList<Concept>();
double score = getDefaultScore(term,oterm,resultTerms);
for(String code: codes){
Concept c = convertConcept(code);
if(c != null){
c.setInitialized(true);
}else{
c = new Concept(code,term);
}
// clone
c = c.clone();
c.setTerminology(this);
c.addMatchedTerm(oterm);
c.setSearchString(text);
if(ignoreAcronyms && isAcronym(c))
continue;
scoreConcept(c,term,score);
// filter out really bad ones
if(!scoreConcepts || c.getScore() >= 0.5)
termConcepts.add(c);
}
// add to results
for(Concept c: getBestCandidates(termConcepts)){
if(!isFilteredOut(c)){
// if we have multipe annotations, deal with it better
if(result.containsKey(c)){
Concept oc = result.get(c);
for(String ot: c.getMatchedTerms())
oc.addMatchedTerm(ot);
// this actually fails to identify multiple mentions
//for(Annotation a: c.getAnnotations())
// oc.addAnnotation(a);
}else
result.put(c,c);