final Pattern compiledPattern = Pattern.compile(pattern);
if (option == PROFILE) {
// search and delete the crawl profile (_much_ faster, independant of queue size)
// XXX: what to do about the annoying LOST PROFILE messages in the log?
CrawlProfile entry;
for (final byte[] handle: sb.crawler.getActive()) {
entry = sb.crawler.getActive(handle);
final String name = entry.name();
if (name.equals(CrawlSwitchboard.CRAWL_PROFILE_PROXY) ||
name.equals(CrawlSwitchboard.CRAWL_PROFILE_REMOTE) ||
name.equals(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_LOCAL_TEXT) ||
name.equals(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT) ||
name.equals(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA) ||
name.equals(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA) ||
name.equals(CrawlSwitchboard.CRAWL_PROFILE_SURROGATE))
continue;
if (compiledPattern.matcher(name).find()) sb.crawler.removeActive(entry.handle().getBytes());
}
} else {
// iterating through the list of URLs
final Iterator<Request> iter = sb.crawlQueues.noticeURL.iterator(NoticedURL.StackType.CORE);
Request entry;
final List<byte[]> removehashes = new ArrayList<byte[]>();
while (iter.hasNext()) {
if ((entry = iter.next()) == null) continue;
String value = null;
location: switch (option) {
case URL: value = (entry.url() == null) ? null : entry.url().toString(); break location;
case ANCHOR: value = entry.name(); break location;
case DEPTH: value = Integer.toString(entry.depth()); break location;
case INITIATOR:
value = (entry.initiator() == null || entry.initiator().length == 0) ? "proxy" : ASCII.String(entry.initiator());
break location;
case MODIFIED: value = daydate(entry.appdate()); break location;
default: value = null; break location;
}
if (value != null && compiledPattern.matcher(value).matches()) removehashes.add(entry.url().hash());
}
Log.logInfo("IndexCreateWWWLocalQueue", "created a remove list with " + removehashes.size() + " entries for pattern '" + pattern + "'");
for (final byte[] b: removehashes) {
sb.crawlQueues.noticeURL.removeByURLHash(b);
}
}
} catch (final PatternSyntaxException e) {
Log.logException(e);
}
}
prop.put("info", "3");//crawling queue cleared
prop.putNum("info_numEntries", c);
} else if (post.containsKey("deleteEntry")) {
final String urlHash = post.get("deleteEntry");
sb.crawlQueues.noticeURL.removeByURLHash(urlHash.getBytes());
prop.put("LOCATION","");
return prop;
}
}
int showNum = 0, stackSize = sb.crawlQueues.noticeURL.stackSize(NoticedURL.StackType.CORE);
if (stackSize == 0) {
prop.put("crawler-queue", "0");
} else {
prop.put("crawler-queue", "1");
final List<Request> crawlerList = sb.crawlQueues.noticeURL.top(NoticedURL.StackType.CORE, (int) (showLimit * 1.20));
Request urle;
boolean dark = true;
Seed initiator;
String profileHandle;
CrawlProfile profileEntry;
int i;
for (i = 0; (i < crawlerList.size()) && (showNum < showLimit); i++) {
urle = crawlerList.get(i);
if ((urle != null)&&(urle.url()!=null)) {
initiator = sb.peers.getConnected(urle.initiator() == null ? "" : ASCII.String(urle.initiator()));
profileHandle = urle.profileHandle();
profileEntry = profileHandle == null ? null : sb.crawler.getActive(profileHandle.getBytes());
prop.put("crawler-queue_list_"+showNum+"_dark", dark ? "1" : "0");
prop.putHTML("crawler-queue_list_"+showNum+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()) );
prop.put("crawler-queue_list_"+showNum+"_profile", ((profileEntry == null) ? "unknown" : profileEntry.name()));
prop.put("crawler-queue_list_"+showNum+"_depth", urle.depth());
prop.put("crawler-queue_list_"+showNum+"_modified", daydate(urle.appdate()) );
prop.putHTML("crawler-queue_list_"+showNum+"_anchor", urle.name());
prop.putHTML("crawler-queue_list_"+showNum+"_url", urle.url().toNormalform(false, true));
prop.put("crawler-queue_list_"+showNum+"_hash", urle.url().hash());