}
}//TESTED
else if (0 == nPage) { //returned no links, log an error if this is page 1 and one has been saved
Object[] onError = searchDoc.getMetaData().get("_ONERROR_");
if ((null != onError) && (onError.length > 0) && (onError[0] instanceof String) && !(((String)(onError[0]))).isEmpty()) {
throw new ExtractorSourceLevelTransientException("generateFeedFromSearch: _ONERROR_: " + onError[0]);
}
}//TESTED
if (context.isStandalone()) { // debug mode, will display some additional logging
Object[] onDebug = searchDoc.getMetaData().get("_ONDEBUG_");
if ((null != onDebug) && (onDebug.length > 0)) {
for (Object debug: onDebug) {
if (debug instanceof String) {
context.getHarvestStatus().logMessage("_ONDEBUG_: " + (String)debug, true);
}
else {
context.getHarvestStatus().logMessage("_ONDEBUG_: " + new com.google.gson.Gson().toJson(debug), true);
}
}
}
}//TESTED
// PAGINGATION BREAK LOGIC:
// 1: All the links are duplicates of links already in the DB
// 2: No new links from last page
// LOGIC CASE 1: (All the links are duplicates of links already in the DB)
//(already handled above)
// LOGIC CASE 2: (No new links from last page)
//DEBUG
//System.out.println("LINKS_SIZE=" + feedConfig.getExtraUrls().size());
//System.out.println("LINKS=\n"+new com.google.gson.GsonBuilder().setPrettyPrinting().create().toJson(feedConfig.getExtraUrls()));
if (dedupSet.size() == nCurrDedupSetSize) { // All links were duplicate
//DEBUG
//System.out.println("FOUND " + nLinksFound + " vs " + nMinLinksToExitLoop + " duplicate URLs (" + nCurrDedupSetSize + ")");
if (nLinksFound >= nMinLinksToExitLoop) { // (at least 10 found so insta-quit)
break;
}
else { // (fewer than 10 found - includ
nMinLinksToExitLoop = 0; // (also handles the no links found case)
}
}//TESTED
else {
nMinLinksToExitLoop = 10; // (reset)
}//TESTED
}// end loop over pages
}
catch (Exception e) {
//DEBUG
//e.printStackTrace();
if ((null == dedupSet) || dedupSet.isEmpty()) {
throw new ExtractorSourceLevelTransientException("generateFeedFromSearch: " + e.getMessage());
}
else {
throw new ExtractorDocumentLevelException("generateFeedFromSearch: " + e.getMessage());
}
// (don't log since these errors will appear in the log under the source, ie more usefully)