// of this loop, including merge sort of terms from
// multiple threads and interacting with the
// TermsConsumer, only calling out to us (passing us the
// DocsConsumer) to handle delivery of docs/positions
final PostingsConsumer postingsConsumer = termsConsumer.startTerm(text);
final int delDocLimit;
if (segDeletes != null) {
protoTerm.bytes = text;
final Integer docIDUpto = segDeletes.get(protoTerm);
if (docIDUpto != null) {
delDocLimit = docIDUpto;
} else {
delDocLimit = 0;
}
} else {
delDocLimit = 0;
}
// Now termStates has numToMerge FieldMergeStates
// which all share the same term. Now we must
// interleave the docID streams.
int docFreq = 0;
long totalTermFreq = 0;
int docID = 0;
while(true) {
//System.out.println(" cycle");
final int termFreq;
if (freq.eof()) {
if (postings.lastDocCodes[termID] != -1) {
// Return last doc
docID = postings.lastDocIDs[termID];
if (readTermFreq) {
termFreq = postings.termFreqs[termID];
} else {
termFreq = -1;
}
postings.lastDocCodes[termID] = -1;
} else {
// EOF
break;
}
} else {
final int code = freq.readVInt();
if (!readTermFreq) {
docID += code;
termFreq = -1;
} else {
docID += code >>> 1;
if ((code & 1) != 0) {
termFreq = 1;
} else {
termFreq = freq.readVInt();
}
}
assert docID != postings.lastDocIDs[termID];
}
docFreq++;
assert docID < state.segmentInfo.getDocCount(): "doc=" + docID + " maxDoc=" + state.segmentInfo.getDocCount();
// NOTE: we could check here if the docID was
// deleted, and skip it. However, this is somewhat
// dangerous because it can yield non-deterministic
// behavior since we may see the docID before we see
// the term that caused it to be deleted. This
// would mean some (but not all) of its postings may
// make it into the index, which'd alter the docFreq
// for those terms. We could fix this by doing two
// passes, ie first sweep marks all del docs, and
// 2nd sweep does the real flush, but I suspect
// that'd add too much time to flush.
visitedDocs.set(docID);
postingsConsumer.startDoc(docID, writeTermFreq ? termFreq : -1);
if (docID < delDocLimit) {
// Mark it deleted. TODO: we could also skip
// writing its postings; this would be
// deterministic (just for this Term's docs).
// TODO: can we do this reach-around in a cleaner way????
if (state.liveDocs == null) {
state.liveDocs = docState.docWriter.codec.liveDocsFormat().newLiveDocs(state.segmentInfo.getDocCount());
}
if (state.liveDocs.get(docID)) {
state.delCountOnFlush++;
state.liveDocs.clear(docID);
}
}
totalTermFreq += termFreq;
// Carefully copy over the prox + payload info,
// changing the format to match Lucene's segment
// format.
if (readPositions || readOffsets) {
// we did record positions (& maybe payload) and/or offsets
int position = 0;
int offset = 0;
for(int j=0;j<termFreq;j++) {
final BytesRef thisPayload;
if (readPositions) {
final int code = prox.readVInt();
position += code >>> 1;
if ((code & 1) != 0) {
// This position has a payload
final int payloadLength = prox.readVInt();
if (payload == null) {
payload = new BytesRef();
payload.bytes = new byte[payloadLength];
} else if (payload.bytes.length < payloadLength) {
payload.grow(payloadLength);
}
prox.readBytes(payload.bytes, 0, payloadLength);
payload.length = payloadLength;
thisPayload = payload;
} else {
thisPayload = null;
}
if (readOffsets) {
final int startOffset = offset + prox.readVInt();
final int endOffset = startOffset + prox.readVInt();
if (writePositions) {
if (writeOffsets) {
assert startOffset >=0 && endOffset >= startOffset : "startOffset=" + startOffset + ",endOffset=" + endOffset + ",offset=" + offset;
postingsConsumer.addPosition(position, thisPayload, startOffset, endOffset);
} else {
postingsConsumer.addPosition(position, thisPayload, -1, -1);
}
}
offset = startOffset;
} else if (writePositions) {
postingsConsumer.addPosition(position, thisPayload, -1, -1);
}
}
}
}
postingsConsumer.finishDoc();
}
termsConsumer.finishTerm(text, new TermStats(docFreq, writeTermFreq ? totalTermFreq : -1));
sumTotalTermFreq += totalTermFreq;
sumDocFreq += docFreq;
}