long maxSize = 0;
long minSize = Long.MAX_VALUE;
long totalSize = 0;
for (int end = start; end < n; end++) {
FileRange range = file.getRanges(end);
long size = range.getEnd() - range.getStart();
maxSize = Math.max(maxSize, size);
minSize = Math.min(minSize, size);
totalSize += size;
int len = end - start;
if (len >= minLength) {
// This is loosely/directly based on Lucene's tiered merge
// policy
// final float skew = ((float) maxSize) / ((float) minSize);
final float skew = ((float) maxSize) / ((float) totalSize);
// Strongly favor merges with less skew (smaller
// mergeScore is better):
float mergeScore = skew;
// Gently favor smaller merges over bigger ones. We
// don't want to make this exponent too large else we
// can end up doing poor merges of small segments in
// order to avoid the large merges:
mergeScore *= Math.pow(totalSize, 0.05);
if (mergeScore < bestScore) {
bestScore = mergeScore;
bestStart = start;
bestEnd = end;
}
// log.debug("{} - {} => {}", new Object[] { start, end,
// mergeScore });
}
}
}
if (bestEnd == -1) {
log.warn("Unable to find any merges!");
return false;
}
for (int i = 0; i < n; i++) {
FileRange range = file.getRanges(i);
long len = range.getEnd() - range.getStart();
log.info("{} {}", i, len);
}
log.info("Chose merge {}-{}", bestStart, bestEnd);
BlobStore blobStore = fs.getBlobStore(project);
List<FileRange> newRanges = Lists.newArrayList();
for (int i = 0; i < bestStart; i++) {
newRanges.add(file.getRanges(i));
}
try (TempFile tempFile = TempFile.create()) {
FileRange.Builder c = FileRange.newBuilder();
Hasher md5 = Hashing.md5().newHasher();
try (OutputStream fos = new HashingOutputStream(new FileOutputStream(tempFile.getFile()), md5)) {
for (int i = bestStart; i < bestEnd; i++) {
FileRange range = file.getRanges(i);
if (i == bestStart) {
c.setStart(range.getStart());
}
if (i == (bestEnd - 1)) {
c.setEnd(range.getEnd());
}
final BlobData blob = blobStore.find(range.getContentKey());
if (blob == null) {
throw new IOException("Unable to open storage for range: " + range);
}
blob.copyTo(fos);