int firstProcessableFoundIndex = -1;
int lastFoundIndex = -1;
int lastProcessableFoundIndex = -1;
int firstFoundLabelIndex = -1;
int lastfoundLabelIndex = -1;
TokenData currentToken;
String currentTokenText;
int currentTokenLength;
int notFound = 0;
int matchedTokensNotWithinProcessableTokenSpan = 0;
int foundTokensWithinCoveredProcessableTokens = 0;
float minTokenMatchFactor = linkerConfig.getMinTokenMatchFactor();
//search for matches within the correct order
for(int currentIndex = state.getToken().index;
currentIndex < state.getTokens().size()
&& search ;currentIndex++){
currentToken = state.getTokens().get(currentIndex);
if(currentToken.hasAlphaNumeric){
currentTokenText = currentToken.getTokenText();
if(!linkerConfig.isCaseSensitiveMatching()){
currentTokenText = currentTokenText.toLowerCase();
}
currentTokenLength = currentTokenText.length();
boolean found = false;
float matchFactor = 0f;
//iteration starts at the next token after the last matched one
//so it is OK to skip tokens in the label, but not within the text
for(int i = lastfoundLabelIndex+1;!found && i < labelTokens.length;i ++){
String labelTokenText = labelTokens[i];
int labelTokenLength = labelTokenText.length();
float maxLength = currentTokenLength > labelTokenLength ? currentTokenLength : labelTokenLength;
float lengthDif = Math.abs(currentTokenLength - labelTokenLength);
if((lengthDif/maxLength)<=(1-minTokenMatchFactor)){ //this prevents unnecessary string comparison
int matchCount = compareTokens(currentTokenText, labelTokenText);
if(matchCount/maxLength >= minTokenMatchFactor){
lastfoundLabelIndex = i; //set the last found index to the current position
found = true; //set found to true -> stops iteration
matchFactor = matchCount/maxLength; //how good is the match
//remove matched labels from the set to disable them for
//a later random oder search
labelTokenSet.remove(labelTokenText);
}
}
}
if(!found){
//search for a match in the wrong order
//currently only exact matches (for testing)
if(found = labelTokenSet.remove(currentTokenText)){
matchFactor = 0.7f;
}
}
//int found = text.indexOf(currentToken.getText().toLowerCase());
if(found){ //found
if(currentToken.isMatchable){
foundProcessableTokens++; //only count processable Tokens
if(firstProcessableFoundIndex < 0){
firstProcessableFoundIndex = currentIndex;
}
lastProcessableFoundIndex = currentIndex;
foundTokensWithinCoveredProcessableTokens++;
if(matchedTokensNotWithinProcessableTokenSpan > 0){
foundTokensWithinCoveredProcessableTokens = foundTokensWithinCoveredProcessableTokens +
matchedTokensNotWithinProcessableTokenSpan;
matchedTokensNotWithinProcessableTokenSpan = 0;
}
} else {
matchedTokensNotWithinProcessableTokenSpan++;
}
foundTokens++;
foundTokenMatch = foundTokenMatch + matchFactor; //sum up the matches
if(firstFoundIndex < 0){
firstFoundIndex = currentIndex;
firstFoundLabelIndex = lastfoundLabelIndex;
}
lastFoundIndex = currentIndex;
} else { //not found
notFound++;
if(currentToken.isMatchable || notFound > linkerConfig.getMaxNotFound()){
//stop as soon as a token that needs to be processed is
//not found in the label or the maximum number of tokens
//that are not processable are not found
search = false;
}
}
} // else token without alpha or numeric characters are not processed
}
//search backwards for label tokens until firstFoundLabelIndex if there
//are unconsumed Tokens in the sentence before state.getTokenIndex
int currentIndex = state.getToken().index-1;
int labelIndex = firstFoundLabelIndex-1;
notFound = 0;
matchedTokensNotWithinProcessableTokenSpan = 0;
search = true;
while(search && labelIndex >= 0 && currentIndex > state.getConsumedIndex()){
String labelTokenText = labelTokens[labelIndex];
if(labelTokenSet.contains(labelTokenText)){ //still not matched
currentToken = state.getTokens().get(currentIndex);
currentTokenText = currentToken.getTokenText();
if(!linkerConfig.isCaseSensitiveMatching()){
currentTokenText = currentTokenText.toLowerCase();
}
currentTokenLength = currentTokenText.length();
boolean found = false;