@Override
public boolean incrementToken() throws IOException {
if (currentToken >= tokens.length) {
return false;
}
Token token = tokens[currentToken++];
clearAttributes();
termAtt.setEmpty().append(token);
offsetAtt.setOffset(token.startOffset(), token.endOffset());
posincAtt
.setPositionIncrement(currentToken <= 1
|| tokens[currentToken - 1].startOffset() > tokens[currentToken - 2]
.startOffset() ? 1 : 0);
return true;
}
}
// code to reconstruct the original sequence of Tokens
String[] terms = tpv.getTerms();
int[] freq = tpv.getTermFrequencies();
int totalTokens = 0;
for (int t = 0; t < freq.length; t++) {
totalTokens += freq[t];
}
Token tokensInOriginalOrder[] = new Token[totalTokens];
ArrayList<Token> unsortedTokens = null;
for (int t = 0; t < freq.length; t++) {
TermVectorOffsetInfo[] offsets = tpv.getOffsets(t);
if (offsets == null) {
throw new IllegalArgumentException(
"Required TermVector Offset information was not found");
}
int[] pos = null;
if (tokenPositionsGuaranteedContiguous) {
// try get the token position info to speed up assembly of tokens into
// sorted sequence
pos = tpv.getTermPositions(t);
}
if (pos == null) {
// tokens NOT stored with positions or not guaranteed contiguous - must
// add to list and sort later
if (unsortedTokens == null) {
unsortedTokens = new ArrayList<Token>();
}
for (int tp = 0; tp < offsets.length; tp++) {
Token token = new Token(terms[t], offsets[tp].getStartOffset(), offsets[tp]
.getEndOffset());
unsortedTokens.add(token);
}
} else {
// We have positions stored and a guarantee that the token position
// information is contiguous
// This may be fast BUT wont work if Tokenizers used which create >1
// token in same position or
// creates jumps in position numbers - this code would fail under those
// circumstances
// tokens stored with positions - can use this to index straight into
// sorted array
for (int tp = 0; tp < pos.length; tp++) {
Token token = new Token(terms[t], offsets[tp].getStartOffset(),
offsets[tp].getEndOffset());
tokensInOriginalOrder[pos[tp]] = token;
}
}
}