* Utility class to make it easier to handle term vectors.
*/
public class TermVectorMapper {
public static List<IntPair> map(Terms terms, TermsEnum reuse, boolean acceptTermsOnly, boolean convertOffsets) throws IOException {
TermsEnum te = terms.iterator(reuse);
DocsAndPositionsEnum dpe = null;
List<IntPair> res = new ArrayList<IntPair>();
while (te.next() != null) {
DocsAndPositionsEnum newDpe = te.docsAndPositions(null, dpe, DocsAndPositionsEnum.FLAG_OFFSETS);
if (newDpe == null) { // no positions and no offsets - just add terms if allowed
if (!acceptTermsOnly) {
return null;
}
int freq = (int)te.totalTermFreq();
if (freq == -1) freq = 0;
res.add(new IntPair(freq, te.term().utf8ToString()));
continue;
}
dpe = newDpe;
// term vectors have only one document, number 0
if (dpe.nextDoc() == DocsEnum.NO_MORE_DOCS) { // oops
// treat this as no positions nor offsets
int freq = (int)te.totalTermFreq();
if (freq == -1) freq = 0;
res.add(new IntPair(freq, te.term().utf8ToString()));
continue;
}
IntPair ip = new IntPair(dpe.freq(), te.term().utf8ToString());
for (int i = 0; i < dpe.freq(); i++) {
int pos = dpe.nextPosition();
if (pos != -1) {
if (ip.positions == null) {
ip.positions = new int[dpe.freq()];