Examples of profileHandle()


Examples of de.anomic.crawler.retrieval.Request.profileHandle()

                    // get one entry that will not be loaded, just indexed
                    urlEntry = this.noticeURL.pop(NoticedURL.StackType.NOLOAD, true, this.sb.crawler);
                    if (urlEntry == null) continue;
                    final String profileHandle = urlEntry.profileHandle();
                    if (profileHandle == null) {
                        this.log.logSevere(stats + ": NULL PROFILE HANDLE '" + urlEntry.profileHandle() + "' for URL " + urlEntry.url());
                        return true;
                    }
                    final CrawlProfile profile = this.sb.crawler.getActive(ASCII.getBytes(profileHandle));
                    if (profile == null) {
                        this.log.logSevere(stats + ": NULL PROFILE HANDLE '" + urlEntry.profileHandle() + "' for URL " + urlEntry.url());
View Full Code Here

Examples of de.anomic.crawler.retrieval.Request.profileHandle()

                        this.log.logSevere(stats + ": NULL PROFILE HANDLE '" + urlEntry.profileHandle() + "' for URL " + urlEntry.url());
                        return true;
                    }
                    final CrawlProfile profile = this.sb.crawler.getActive(ASCII.getBytes(profileHandle));
                    if (profile == null) {
                        this.log.logSevere(stats + ": NULL PROFILE HANDLE '" + urlEntry.profileHandle() + "' for URL " + urlEntry.url());
                        return true;
                    }
                    try {
                        this.sb.indexingDocumentProcessor.enQueue(new indexingQueueEntry(PROCESS, new Response(urlEntry, profile), null, null));
                        Log.logInfo("CrawlQueues", "placed NOLOAD URL on indexing queue: " + urlEntry.url().toNormalform(true, false));
View Full Code Here

Examples of de.anomic.crawler.retrieval.Request.profileHandle()

                    return true;
                }

                urlEntry = this.noticeURL.pop(NoticedURL.StackType.CORE, true, this.sb.crawler);
                if (urlEntry == null) continue;
                final String profileHandle = urlEntry.profileHandle();
                // System.out.println("DEBUG plasmaSwitchboard.processCrawling:
                // profileHandle = " + profileHandle + ", urlEntry.url = " + urlEntry.url());
                if (profileHandle == null) {
                    this.log.logSevere(stats + ": NULL PROFILE HANDLE '" + urlEntry.profileHandle() + "' for URL " + urlEntry.url());
                    return true;
View Full Code Here

Examples of de.anomic.crawler.retrieval.Request.profileHandle()

                if (urlEntry == null) continue;
                final String profileHandle = urlEntry.profileHandle();
                // System.out.println("DEBUG plasmaSwitchboard.processCrawling:
                // profileHandle = " + profileHandle + ", urlEntry.url = " + urlEntry.url());
                if (profileHandle == null) {
                    this.log.logSevere(stats + ": NULL PROFILE HANDLE '" + urlEntry.profileHandle() + "' for URL " + urlEntry.url());
                    return true;
                }
                load(urlEntry, stats, profileHandle);
                return true;
            } catch (final IOException e) {
View Full Code Here

Examples of de.anomic.crawler.retrieval.Request.profileHandle()

        // we don't want to crawl a global URL globally, since WE are the global part. (from this point of view)
        final String stats = "REMOTETRIGGEREDCRAWL[" + this.noticeURL.stackSize(NoticedURL.StackType.CORE) + ", " + this.noticeURL.stackSize(NoticedURL.StackType.LIMIT) + ", " + this.noticeURL.stackSize(NoticedURL.StackType.OVERHANG) + ", "
                        + this.noticeURL.stackSize(NoticedURL.StackType.REMOTE) + "]";
        try {
            final Request urlEntry = this.noticeURL.pop(NoticedURL.StackType.REMOTE, true, this.sb.crawler);
            final String profileHandle = urlEntry.profileHandle();
            // System.out.println("DEBUG plasmaSwitchboard.processCrawling:
            // profileHandle = " + profileHandle + ", urlEntry.url = " +
            // urlEntry.url());
            load(urlEntry, stats, profileHandle);
            return true;
View Full Code Here

Examples of de.anomic.crawler.retrieval.Request.profileHandle()

        if (searchEvent != null) {
            searchEvent.addHeuristic(url.hash(), heuristicName, true);
        }
        if (this.indexSegments.segment(process).exists(url.hash())) return; // don't do double-work
        final Request request = this.loader.request(url, true, true);
        final CrawlProfile profile = sb.crawler.getActive(ASCII.getBytes(request.profileHandle()));
        final String acceptedError = this.crawlStacker.checkAcceptance(url, profile, 0);
        if (acceptedError != null) {
            this.log.logWarning("addToIndex: cannot load " + url.toNormalform(false, false) + ": " + acceptedError);
            return;
        }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.