return calculateSplits_phase2(conf, confQuery, false, shardingPolicyNew, null);
}//TESTED
else if (conf.getLimit() > 0) { // debug
//Case 3: Ensure we have small sets of sources to search over
BasicDBList collectionOfSplits = splitPrecalculations_oldShardSchemeOrDebug(confQuery, srcTagsQuery, conf.getMaxDocsPerSplit());
final List<InputSplit> splits = new ArrayList<InputSplit>();
boolean queryNonTrivial = isQueryNonTrivial(confQuery);
if (!queryNonTrivial) {
//Case 3a: query is trivial, so can just create splits directly from the split pre-calcs
int toProcess = conf.getLimit();
Iterator<Object> itSplit = collectionOfSplits.iterator();
while ((toProcess > 0) && (itSplit.hasNext())) {
BasicDBObject split = (BasicDBObject) itSplit.next();
int docCount = (int)split.getLong(SourceHarvestStatusPojo.doccount_, 0L);
int toGet = (docCount > toProcess) ? toProcess : docCount;
BasicDBObject modQuery = convertQuery(confQuery, split.get(DocumentPojo.sourceKey_));
if (null != modQuery) {
splits.add(new InfiniteMongoInputSplit(conf.getInputURI(), conf.getInputKey(), modQuery, conf.getFields(), conf.getSort(), toGet, 0, conf.isNoTimeout()));
toProcess -= docCount;
}
}//TESTED
}
else {
// Case 3b: annoying, some extra query terms, gonna need to do it the hard way...
int toProcess = conf.getLimit();
Iterator<Object> itSplit = collectionOfSplits.iterator();
DBCollection coll = InfiniteMongoConfigUtil.getCollection(conf.getInputURI());
while ((toProcess > 0) && (itSplit.hasNext())) {
BasicDBObject split = (BasicDBObject) itSplit.next();
BasicDBObject modQuery = convertQuery(confQuery, split.get(DocumentPojo.sourceKey_));
if (null != modQuery) {
int docsCounted = (int) coll.getCount(modQuery, null, toProcess, 0);
int toGet = (docsCounted > toProcess) ? toProcess : docsCounted;
if (docsCounted > 0) {
splits.add(new InfiniteMongoInputSplit(conf.getInputURI(), conf.getInputKey(), modQuery, conf.getFields(), conf.getSort(), toGet, 0, conf.isNoTimeout()));
toProcess -= docsCounted;
}
}//TESTED
}
}//TESTED
return splits;
}
else { // More complex cases:
if (shardingPolicyNew) {
// Case 4a: NEW SHARDING SCHEME
// Always fetch the new sources, eg convert communityId to sourceKeys
try {
splitPrecalculations_newShardScheme(confQuery, srcTagsQuery); // (modifies confQuery if returns true)
boolean queryNonTrivial = isQueryNonTrivial(confQuery);
return calculateSplits_phase2(conf, confQuery, !queryNonTrivial, shardingPolicyNew, null);
// (ie trivial query => always use chunks, bypass skip/limit test)
}//TESTED (trivial + non-trivial)
catch (Exception e) { // Didn't match any sources, no problem
return new ArrayList<InputSplit>();
}//TESTED
}//TESTED
else {
BasicDBList collectionOfSplits = splitPrecalculations_oldShardSchemeOrDebug(confQuery, srcTagsQuery, conf.getMaxDocsPerSplit());
if (null == collectionOfSplits) {
// Case 4b: OLD SHARDING SCHEME can't get a partition by source keys, just back off to old code
return calculateSplits_phase2(conf, confQuery, false, shardingPolicyNew, null);
}//TESTED (old code)
else {
conf.setMaxDocsPerSplit(2*conf.getMaxDocsPerSplit());
// (because we stop creating splits when the exceed the size)
// Case 4c: OLD SHARDING SCHEME, have a source key partition
int nMaxCount = 1 + conf.getMaxDocsPerSplit()*conf.getMaxSplits();
boolean queryNonTrivial = isQueryNonTrivial(confQuery);
final List<InputSplit> splits = new ArrayList<InputSplit>();
BasicDBObject savedQuery = confQuery;
Iterator<Object> itSplit = collectionOfSplits.iterator();
BasicDBList bigSplit = null;
while (itSplit.hasNext()) {
BasicDBObject split = (BasicDBObject) itSplit.next();
int docCount = (int)split.getLong(SourceHarvestStatusPojo.doccount_, 0L);
if (docCount < nMaxCount) { // small split, will use skip/limit
BasicDBObject modQuery = convertQuery(savedQuery, split.get(DocumentPojo.sourceKey_));
if (null != modQuery) {
final int SPLIT_THRESHOLD = 3;
// A few cases:
if ((docCount < (SPLIT_THRESHOLD*conf.getMaxDocsPerSplit())) || !queryNonTrivial) {
splits.addAll(calculateSplits_phase2(conf, modQuery, false, shardingPolicyNew, (Integer)docCount));
}//TESTED (based on limit, based on query)
else {
// My guess at the point at which you might as well as do the full query in the hope you're going
// to save some (empty) splits
splits.addAll(calculateSplits_phase2(conf, modQuery, false, shardingPolicyNew, null));
}//TESTED
}//TESTED
}
else { // large split, combine all these guys into an array of source keys
if (null == bigSplit) {
bigSplit = new BasicDBList();
}
bigSplit.add(split.get(DocumentPojo.sourceKey_));
// (guaranteed to be a single element)
}
}//(end loop over collections)
if (null != bigSplit) {
// If we have a big left over community then create a set of splits for that - always chunks if query trivial
if (1 == bigSplit.size()) {
confQuery.put(DocumentPojo.sourceKey_, bigSplit.iterator().next());
}
else {
confQuery.put(DocumentPojo.sourceKey_, new BasicDBObject(DbManager.in_, bigSplit));
}
splits.addAll(calculateSplits_phase2(conf, confQuery, !queryNonTrivial, shardingPolicyNew, null));