// (and also derived URLs)
HashSet<String> dedupSet = new HashSet<String>();
if (null != src.getRssConfig().getExtraUrls()) {
Iterator<ExtraUrlPojo> itDedupUrls = src.getRssConfig().getExtraUrls().iterator();
while (itDedupUrls.hasNext()) {
ExtraUrlPojo itUrl = itDedupUrls.next();
if (null != itUrl.title) {
String dedupUrl = itUrl.url;
dedupSet.add(dedupUrl);
if (maxDocsPerCycle != Integer.MAX_VALUE) {
maxDocsPerCycle++; // (ensure we get as far as adding these)
}
}
}
}//TESTED
Iterator<ExtraUrlPojo> itUrls = null;
// Spider parameters used in conjunction with itUrls
List<ExtraUrlPojo> iteratingList = null;
List<ExtraUrlPojo> waitingList = null;
int nIteratingDepth = 0;
// (ie no URL specified, so using extra URLs as search URLs - and optionally as real URLs also)
if ((null == savedUrl) && (null != src.getRssConfig().getExtraUrls()) && !src.getRssConfig().getExtraUrls().isEmpty()) {
// Spider logic:
iteratingList = src.getRssConfig().getExtraUrls();
// (end spidering logic)
itUrls = iteratingList.iterator();
src.getRssConfig().setExtraUrls(new LinkedList<ExtraUrlPojo>());
// (ie overwrite the original list)
}//TESTED
for (;;) { // The logic for this loop can vary...
if (dedupSet.size() >= maxDocsPerCycle) {
break;
}
String currTitle = null;
String currFullText = null;
String currDesc = null;
if (null != itUrls) {
ExtraUrlPojo urlPojo = itUrls.next();
savedUrl = urlPojo.url;
if (0 == nIteratingDepth) {
if (null != urlPojo.title) { // Also harvest this
src.getRssConfig().getExtraUrls().add(urlPojo);
if (maxDocsPerCycle != Integer.MAX_VALUE) {