if (useMappers) {
// not implemented
throw new IOException("Defining S3InputFormat with number of mappers is not implemented");
} else {
S3ObjectSummary startKey = null;
S3ObjectSummary endKey = null;
int batchSize = 0;
int numOfSplits = 0;
int numOfCalls = 0;
int maxKeyIDX = 0;
int currentKeyIDX = 0;
int nextKeyIDX = 0;
ObjectListing listing = null;
boolean isLastCall = true;
// split all keys starting with "keyPrefix" into splits of
// "numOfKeysPerMapper" keys
do {
// for first time we have to build request after that use
// previous listing to get next batch
if (listing == null) {
listing = s3Reader.listObjects(bucketName, keyPrefix, maxKeys);
} else {
listing = s3Reader.listObjects(listing);
}
// Is this last call to WS (last batch of objects)
isLastCall = !listing.isTruncated();
// Size of the batch from last WS call
batchSize = listing.getObjectSummaries().size();
// Absolute index of last key from batch
maxKeyIDX = numOfCalls * maxKeys + batchSize;
// Absolute indexes of current and next keys
currentKeyIDX = numOfSplits * numOfKeysPerMapper;
// if there are no more keys to process, index of last key is selected
nextKeyIDX = (numOfSplits + 1) * numOfKeysPerMapper > maxKeyIDX && isLastCall ? maxKeyIDX : (numOfSplits + 1) * numOfKeysPerMapper;
// create one input split for each key which is in current range
while (nextKeyIDX <= maxKeyIDX) {
startKey = endKey;
endKey = listing.getObjectSummaries().get((nextKeyIDX - 1) % maxKeys);
// Create new input split
S3InputSplit split = new S3InputSplit();
split.setBucketName(bucketName);
split.setKeyPrefix(keyPrefix);
split.setMarker(startKey != null ? startKey.getKey() : null);
split.setLastKey(endKey.getKey());
split.setSize(nextKeyIDX - currentKeyIDX);
splits.add(split);
numOfSplits++;