return;
}
// Lists to accommodate the results
final ArrayList<char []> stemImages = new ArrayList<char []>(allWordsCount);
final IntArrayList stemTf = new IntArrayList(allWordsCount);
final IntArrayList stemMostFrequentWordIndexes = new IntArrayList(allWordsCount);
final ArrayList<int []> stemTfByDocumentList = new ArrayList<int []>(allWordsCount);
final ByteArrayList fieldIndexList = new ByteArrayList();
// Counters
int totalTf = wordTfArray[stemImagesOrder[0]];
int mostFrequentWordFrequency = wordTfArray[stemImagesOrder[0]];
int mostFrequentWordIndex = stemImagesOrder[0];
int stemIndex = 0;
// A list of document-term-frequency pairs, by document, for all words with identical stems.
final ArrayList<int[]> stemTfsByDocument = Lists.newArrayList();
stemTfsByDocument.add(wordTfByDocumentArray[stemImagesOrder[0]]);
byte fieldIndices = 0;
fieldIndices |= wordsFieldIndices[0];
// For locating query words
final MutableCharArray buffer = new MutableCharArray(
wordStemImages[stemImagesOrder[0]]);
boolean inQuery = queryStems.contains(buffer);
// Go through all words in the order of stem images
for (int i = 0; i < stemImagesOrder.length - 1; i++)
{
final int orderIndex = stemImagesOrder[i];
final char [] stem = wordStemImages[orderIndex];
final int nextInOrderIndex = stemImagesOrder[i + 1];
final char [] nextStem = wordStemImages[nextInOrderIndex];
stemIndexesArray[orderIndex] = stemIndex;
if (inQuery)
{
wordsType[orderIndex] |= ITokenizer.TF_QUERY_WORD;
}
// Now check if token image is changing
final boolean sameStem = CharArrayComparators.FAST_CHAR_ARRAY_COMPARATOR
.compare(stem, nextStem) == 0;
if (sameStem)
{
totalTf += wordTfArray[nextInOrderIndex];
stemTfsByDocument.add(wordTfByDocumentArray[nextInOrderIndex]);
fieldIndices |= wordsFieldIndices[nextInOrderIndex];
if (mostFrequentWordFrequency < wordTfArray[nextInOrderIndex])
{
mostFrequentWordFrequency = wordTfArray[nextInOrderIndex];
mostFrequentWordIndex = nextInOrderIndex;
}
}
else
{
stemImages.add(stem);
stemTf.add(totalTf);
stemMostFrequentWordIndexes.add(mostFrequentWordIndex);
storeTfByDocument(stemTfByDocumentList, stemTfsByDocument);
fieldIndexList.add(fieldIndices);
stemIndex++;
totalTf = wordTfArray[nextInOrderIndex];
mostFrequentWordFrequency = wordTfArray[nextInOrderIndex];
mostFrequentWordIndex = nextInOrderIndex;
fieldIndices = 0;
fieldIndices |= wordsFieldIndices[nextInOrderIndex];
stemTfsByDocument.clear();
stemTfsByDocument.add(wordTfByDocumentArray[nextInOrderIndex]);
buffer.reset(wordStemImages[nextInOrderIndex]);
inQuery = queryStems.contains(buffer);
}
}
// Store tf for the last stem in the array
stemImages.add(wordStemImages[stemImagesOrder[stemImagesOrder.length - 1]]);
stemTf.add(totalTf);
stemMostFrequentWordIndexes.add(mostFrequentWordIndex);
stemIndexesArray[stemImagesOrder[stemImagesOrder.length - 1]] = stemIndex;
storeTfByDocument(stemTfByDocumentList, stemTfsByDocument);
fieldIndexList.add(fieldIndices);
if (inQuery)
{
wordsType[stemImagesOrder[stemImagesOrder.length - 1]] |= ITokenizer.TF_QUERY_WORD;
}
// Convert lists to arrays and store them in allStems
context.allStems.image = stemImages.toArray(new char [stemImages.size()] []);
context.allStems.mostFrequentOriginalWordIndex = stemMostFrequentWordIndexes
.toArray();
context.allStems.tf = stemTf.toArray();
context.allStems.tfByDocument = stemTfByDocumentList
.toArray(new int [stemTfByDocumentList.size()] []);
context.allStems.fieldIndices = fieldIndexList.toArray();