boolean foundWildcard = false;
//Lucene tokenizer are really low level ...
try {
while(tokenizer.incrementToken()){
//only interested in the start/end indexes of tokens
OffsetAttribute offset = tokenizer.addAttribute(OffsetAttribute.class);
if(lastAdded < 0){ //rest with this token
lastAdded = offset.startOffset();
}
if(foundWildcard){ //wildcard present in the current token
//two cases: "wildcar? at the end", "wild?ard within the word"
// (1) [wildcar,at,the,end] : In this case this is called with
// 'at' as active Token and we need write "wildcar?" as
// query term
// (2) [wild,ard,within,the,word]: In this case this is called with
// 'ard' as active Token and we need write "wild?ard" as
// query term.
if(offset.startOffset() > lastOffset+1) {//(1)
String queryElement = value.substring(lastAdded,lastOffset+1);
if(loewercaseWildcardTokens){
queryElement = queryElement.toLowerCase();
}
queryElements.add(queryElement);
lastAdded = offset.startOffset(); //previous token consumed
//set to the start of the current token
foundWildcard = false;
} else if(next != offset.endOffset()){ //(2)
String queryElement = value.substring(lastAdded,offset.endOffset());
if(loewercaseWildcardTokens){
queryElement = queryElement.toLowerCase();
}
queryElements.add(queryElement);
lastAdded = -1; //consume the current token
foundWildcard = false;
}
}
if(next == offset.endOffset()){ //end of current token is '*' or '?'
next = m.find()?m.start()+1:-1; //search next '*', '?' in value
//we need to write all tokens previous to the current (if any)
//NOTE: ignore if foundWildcard is TRUE (multiple wildcards in
// a single word
if(!foundWildcard && lastAdded<lastOffset){
String queryElement = value.substring(lastAdded,lastOffset);
queryElements.add('"'+queryElement+'"');
lastAdded = offset.startOffset();
}//else multiple wildcards in a single token
foundWildcard = true;
}
lastOffset = offset.endOffset();
}
} catch (IOException e) {
//StringReader can not throw IOExceptions
throw new IllegalStateException(e);
}