public boolean incrementToken() throws IOException {
if (currentToken >= tokens.length) {
return false;
}
clearAttributes();
Token token = tokens[currentToken++];
termAtt.setTermBuffer(token.term());
offsetAtt.setOffset(token.startOffset(), token.endOffset());
return true;
}
}
//code to reconstruct the original sequence of Tokens
String[] terms=tpv.getTerms();
int[] freq=tpv.getTermFrequencies();
int totalTokens=0;
for (int t = 0; t < freq.length; t++)
{
totalTokens+=freq[t];
}
Token tokensInOriginalOrder[]=new Token[totalTokens];
ArrayList<Token> unsortedTokens = null;
for (int t = 0; t < freq.length; t++)
{
TermVectorOffsetInfo[] offsets=tpv.getOffsets(t);
if(offsets==null)
{
return null;
}
int[] pos=null;
if(tokenPositionsGuaranteedContiguous)
{
//try get the token position info to speed up assembly of tokens into sorted sequence
pos=tpv.getTermPositions(t);
}
if(pos==null)
{
//tokens NOT stored with positions or not guaranteed contiguous - must add to list and sort later
if(unsortedTokens==null)
{
unsortedTokens=new ArrayList<Token>();
}
for (int tp = 0; tp < offsets.length; tp++)
{
Token token = new Token(offsets[tp].getStartOffset(), offsets[tp].getEndOffset());
token.setTermBuffer(terms[t]);
unsortedTokens.add(token);
}
}
else
{
//We have positions stored and a guarantee that the token position information is contiguous
// This may be fast BUT wont work if Tokenizers used which create >1 token in same position or
// creates jumps in position numbers - this code would fail under those circumstances
//tokens stored with positions - can use this to index straight into sorted array
for (int tp = 0; tp < pos.length; tp++)
{
Token token = new Token(terms[t], offsets[tp].getStartOffset(), offsets[tp].getEndOffset());
tokensInOriginalOrder[pos[tp]] = token;
}
}
}
//If the field has been stored without position data we must perform a sort