//we need to over-estimate using ceil, to ensure that the last split is not /too/ big
final int numberOfFilesPerSplit = (int)Math.ceil((double)paths.length / (double)numSplits);
int pathsUsed = 0;
int splitnum = 0;
CombineFileSplit mfs;
// for each split except the last one (which may be smaller than numberOfFilesPerSplit)
while(pathsUsed < numPaths)
{
/* caclulate split size for this task - usually numberOfFilesPerSplit, but
* less than this for the last split */
final int splitSizeForThisSplit = numberOfFilesPerSplit + pathsUsed > numPaths
? numPaths - pathsUsed
: numberOfFilesPerSplit;
//arrays of information for split
Path[] splitPaths = new Path[splitSizeForThisSplit];
long[] splitLengths = new long[splitSizeForThisSplit];
long[] splitStarts = new long[splitSizeForThisSplit];
final TObjectLongHashMap<String> allLocationsForSplit = new TObjectLongHashMap<String>();
String[] splitLocations = null; //final recommended locations for this split.
for(int i=0;i<splitSizeForThisSplit;i++)
{
locations[pathsUsed+i].forEachEntry(new TObjectLongProcedure<String>() {
public boolean execute(String a, long b)
{
allLocationsForSplit.adjustOrPutValue(a, b, b); return true;
}
});
if ( allLocationsForSplit.size() <=3 )
{
splitLocations = allLocationsForSplit.keys(new String[allLocationsForSplit.size()]);
}
else
{
String[] hosts = allLocationsForSplit.keys(new String[allLocationsForSplit.size()]);
Arrays.sort(hosts, new Comparator<String>() {
public int compare(String o1, String o2) {
long diffamount = allLocationsForSplit.get(o1) - allLocationsForSplit.get(o2);
if (diffamount > 0)
{
return -1;
}
else if (diffamount < 0)
{
return 1;
}
return 0;
}
});
splitLocations = new String[3];
System.arraycopy(hosts, 0, splitLocations, 0, 3);
}
}
//copy information for this split
System.arraycopy(lengths, pathsUsed, splitLengths, 0, splitSizeForThisSplit);
System.arraycopy(paths, pathsUsed, splitPaths, 0, splitSizeForThisSplit);
//count the number of paths consumed
pathsUsed += splitSizeForThisSplit;
//make the actual split object
////logger.info("New split of size " + splitSizeForThisSplit);
mfs = new CombineFileSplit(job, splitPaths, splitStarts, splitLengths, splitLocations);
splits.add(new PositionAwareSplit<CombineFileSplit>(mfs, splitnum));
splitnum++;
}
if (!(pathsUsed==paths.length)) {