Package de.anomic.crawler

Examples of de.anomic.crawler.CrawlProfile.handle()


                                        name.equals(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT||
                                        name.equals(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA) ||
                                        name.equals(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA) ||
                                        name.equals(CrawlSwitchboard.CRAWL_PROFILE_SURROGATE))
                                    continue;
                                if (compiledPattern.matcher(name).find()) sb.crawler.removeActive(entry.handle().getBytes());
                            }
                        } else {
                            // iterating through the list of URLs
                            final Iterator<Request> iter = sb.crawlQueues.noticeURL.iterator(NoticedURL.StackType.CORE);
                            Request entry;
View Full Code Here


        CrawlProfile selentry;
        final Map<String, String> orderdHandles = new TreeMap<String, String>();
        for (final byte[] h : sb.crawler.getActive()) {
            selentry = sb.crawler.getActive(h);
            if (selentry != null && !ignoreNames.contains(selentry.name())) {
                orderdHandles.put(selentry.name(), selentry.handle());
            }
        }
       
        // then write into pop-up menu list
        int count = 0;
View Full Code Here

            }
            count++;
        }
        prop.put("profiles", count);
        selentry = sb.crawler.getActive(handle.getBytes());
        assert selentry == null || selentry.handle() != null;
        // read post for change submit
        if ((post != null) && (selentry != null)) {
            if (post.containsKey("submit")) {
                try {
                  Pattern.compile(post.get(CrawlProfile.FILTER_MUSTMATCH, CrawlProfile.MATCH_ALL));
View Full Code Here

                        tee = lit.next();
                        final String cval = selentry.get(tee.name);
                        final String val = (tee.type == eentry.BOOLEAN) ? Boolean.toString(post.containsKey(tee.name)) : post.get(tee.name, cval);
                        if (!cval.equals(val)) {
                            selentry.put(tee.name, val);
                            sb.crawler.putActive(selentry.handle().getBytes(), selentry);
                        }
                    }
                } catch (final Exception ex) {
                    Log.logException(ex);
                    prop.put("error", "1");
View Full Code Here

                        remoteIndexing,
                        xsstopw,
                        xdstopw,
                        xpstopw,
                        CacheStrategy.IFFRESH);
                sb.crawler.putActive(pe.handle().getBytes(), pe);
            } catch (final Exception e) {
                // mist
                prop.put("mode_status", "2");//Error with url
                prop.put("mode_code", "2");
                prop.putHTML("mode_status_error", e.getMessage());
View Full Code Here

                    sb.peers.mySeed().hash.getBytes(),
                    crawlingStartURL,
                    null,
                    (title==null)?"CRAWLING-ROOT":title,
                    new Date(),
                    pe.handle(),
                    0,
                    0,
                    0,
                    0
                ));
View Full Code Here

            // refresh recrawl dates
            try{
                CrawlProfile selentry;
                for (final byte[] handle: this.crawler.getActive()) {
                    selentry = this.crawler.getActive(handle);
                    assert selentry.handle() != null : "profile.name = " + selentry.name();
                    if (selentry.handle() == null) {
                        this.crawler.removeActive(handle);
                        continue;
                    }
                    boolean insert = false;
View Full Code Here

            try{
                CrawlProfile selentry;
                for (final byte[] handle: this.crawler.getActive()) {
                    selentry = this.crawler.getActive(handle);
                    assert selentry.handle() != null : "profile.name = " + selentry.name();
                    if (selentry.handle() == null) {
                        this.crawler.removeActive(handle);
                        continue;
                    }
                    boolean insert = false;
                    if (selentry.name().equals(CrawlSwitchboard.CRAWL_PROFILE_PROXY)) {
View Full Code Here

                    if (selentry.name().equals(CrawlSwitchboard.CRAWL_PROFILE_SURROGATE)) {
                        selentry.put(CrawlProfile.RECRAWL_IF_OLDER,
                                Long.toString(CrawlProfile.getRecrawlDate(CrawlSwitchboard.CRAWL_PROFILE_SURROGATE_RECRAWL_CYCLE)));
                        insert = true;
                    }
                    if (insert) this.crawler.putActive(UTF8.getBytes(selentry.handle()), selentry);
                }
            } catch (final Exception e) {
                Log.logException(e);
            }
View Full Code Here

                CrawlProfile.MATCH_NEVER_STRING,
                "", depth, medialink,
                CrawlProfile.getRecrawlDate(CrawlSwitchboard.CRAWL_PROFILE_PROXY_RECRAWL_CYCLE), -1, crawlingQ,
                true, true, true, false, true, true, true,
                CacheStrategy.IFFRESH);
        sb.crawler.putActive(pe.handle().getBytes(), pe);
        return sb.crawlStacker.stackCrawl(new Request(
                sb.peers.mySeed().hash.getBytes(),
                startURL,
                null,
                "CRAWLING-ROOT",
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.