Package de.anomic.yacy

Examples of de.anomic.yacy.yacySeed


            return this.targets.size();
        }
       
        public boolean transmit() {
            if (this.targets.isEmpty()) return false;
            yacySeed target = this.targets.remove(0);
            // transferring selected words to remote peer
            if (target == seeds.mySeed() || target.hash.equals(seeds.mySeed().hash)) {
              // target is my own peer. This is easy. Just restore the indexContainer
              restore();
              this.hit++;
              log.logInfo("Transfer of chunk to myself-target");
              return true;
            }
            log.logInfo("starting new index transmission request to " + ASCII.String(this.primaryTarget));
            long start = System.currentTimeMillis();
            final String error = yacyClient.transferIndex(target, this.containers, this.references, gzipBody4Transfer, timeout4Transfer);
            if (error == null) {
                // words successfully transfered
                long transferTime = System.currentTimeMillis() - start;
                Iterator<ReferenceContainer<WordReference>> i = this.containers.iterator();
                ReferenceContainer<WordReference> firstContainer = (i == null) ? null : i.next();
                log.logInfo("Index transfer of " + this.containers.size() +
                                 " words [" + ((firstContainer == null) ? null : ASCII.String(firstContainer.getTermHash())) + " .. " + ASCII.String(this.primaryTarget) + "]" +
                                 " and " + this.references.size() + " URLs" +
                                 " to peer " + target.getName() + ":" + target.hash +
                                 " in " + (transferTime / 1000) +
                                 " seconds successful ("  + (1000 * this.containers.size() / (transferTime + 1)) +
                                 " words/s)");
                seeds.mySeed().incSI(this.containers.size());
                seeds.mySeed().incSU(this.references.size());
                // if the peer has set a pause time and we are in flush mode (index transfer)
                // then we pause for a while now
                log.logInfo("Transfer finished of chunk to target " + target.hash + "/" + target.getName());
                this.hit++;
                return true;
            }
            this.miss++;
            // write information that peer does not receive index transmissions
            log.logInfo("Transfer failed of chunk to target " + target.hash + "/" + target.getName() + ": " + error);
            // get possibly newer target Info
            yacySeed newTarget = seeds.get(target.hash);
            if (newTarget != null) {
                String oldAddress = target.getPublicAddress();
                if ((oldAddress != null) && (oldAddress.equals(newTarget.getPublicAddress()))) {
                    newTarget.setFlagAcceptRemoteIndex(false);
                    seeds.update(newTarget.hash, newTarget);
                } else {
                    // we tried an old Address. Don't change anything
                }
            } else {
View Full Code Here


    public static yacySeed[] selectClusterPeers(final yacySeedDB seedDB, final SortedMap<byte[], String> peerhashes) {
        final Iterator<Map.Entry<byte[], String>> i = peerhashes.entrySet().iterator();
        final List<yacySeed> l = new ArrayList<yacySeed>();
        Map.Entry<byte[], String> entry;
        yacySeed s;
        while (i.hasNext()) {
            entry = i.next();
            s = seedDB.get(ASCII.String(entry.getKey())); // should be getConnected; get only during testing time
            if (s != null) {
                s.setAlternativeAddress(entry.getValue());
                l.add(s);
            }
        }
        return l.toArray(new yacySeed[l.size()]);
    }
View Full Code Here

        // the seedcount is the maximum number of wanted results
        if (seedDB == null) { return null; }
       
        // put in seeds according to dht
        final Map<String, yacySeed> regularSeeds = new HashMap<String, yacySeed>(); // dht position seeds
        yacySeed seed;
        Iterator<yacySeed> dhtEnum;        
        Iterator<byte[]> iter = wordhashes.iterator();
        while (iter.hasNext()) {
            selectDHTPositions(seedDB, iter.next(), redundancy, regularSeeds);
        }
        //int minimumseeds = Math.min(seedDB.scheme.verticalPartitions(), regularSeeds.size()); // that should be the minimum number of seeds that are returned
        //int maximumseeds = seedDB.scheme.verticalPartitions() * redundancy; // this is the maximum number of seeds according to dht and heuristics. It can be more using burst mode.
       
        // put in some seeds according to size of peer.
        // But not all, that would produce too much load on the largest peers
        dhtEnum = seedDB.seedsSortedConnected(false, yacySeed.ICOUNT);
        int c = Math.max(Math.min(5, seedDB.sizeConnected()), wordhashes.size() > 1 ? seedDB.sizeConnected() * burstMultiwordPercent / 100 : 0);
        while (dhtEnum.hasNext() && c-- > 0) {
            seed = dhtEnum.next();
            if (seed == null) continue;
            if (seed.isLastSeenTimeout(3600000)) continue;
            if (seed.getAge() < 1) { // the 'workshop feature'
                Log.logInfo("DHT", "selectPeers/Age: " + seed.hash + ":" + seed.getName() + ", is newbie, age = " + seed.getAge());
                regularSeeds.put(seed.hash, seed);
                continue;
            }
            if (Math.random() * 100 + (wordhashes.size() > 1 ? burstMultiwordPercent : 25) >= 50) {
                if (Log.isFine("DHT")) Log.logFine("DHT", "selectPeers/CountBurst: " + seed.hash + ":" + seed.getName() + ", RWIcount=" + seed.getWordCount());
                regularSeeds.put(seed.hash, seed);
                continue;
            }
        }

        // create a set that contains only robinson peers because these get a special handling
        dhtEnum = seedDB.seedsConnected(true, false, null, 0.50f);
        Set<yacySeed> robinson = new HashSet<yacySeed>();
        while (dhtEnum.hasNext()) {
            seed = dhtEnum.next();
            if (seed == null) continue;
            if (seed.getFlagAcceptRemoteIndex()) continue;
            if (seed.isLastSeenTimeout(3600000)) continue;
            robinson.add(seed);
        }

        // add robinson peers according to robinson burst rate
        dhtEnum = robinson.iterator();
        c = robinson.size() * burstRobinsonPercent / 100;
        while (dhtEnum.hasNext() && c-- > 0) {
            seed = dhtEnum.next();
            if (seed == null) continue;
            if (seed.isLastSeenTimeout(3600000)) continue;
            if (Math.random() * 100 + burstRobinsonPercent >= 100) {
                if (Log.isFine("DHT")) Log.logFine("DHT", "selectPeers/RobinsonBurst: " + seed.hash + ":" + seed.getName());
                regularSeeds.put(seed.hash, seed);
                continue;
            }
        }

        // put in seeds that are public robinson peers and where the peer tags match with query
        // or seeds that are newbies to ensure that private demonstrations always work
        dhtEnum = robinson.iterator();
        while (dhtEnum.hasNext()) {
            seed = dhtEnum.next();
            if (seed == null) continue;
            if (seed.isLastSeenTimeout(3600000)) continue;
            if (seed.matchPeerTags(wordhashes)) {
                // peer tags match
                String specialized = seed.getPeerTags().toString();
                if (specialized.equals("[*]")) {
                    Log.logInfo("DHT", "selectPeers/RobinsonTag: " + seed.hash + ":" + seed.getName() + " grants search for all");
                } else {
                    Log.logInfo("DHT", "selectPeers/RobinsonTag " + seed.hash + ":" + seed.getName() + " is specialized peer for " + specialized);
                }
                regularSeeds.put(seed.hash, seed);
            }
        }
       
View Full Code Here

        final String host = urlcomps.url().getHost();
        if (host != null && host.endsWith(".yacyh")) {
            // translate host into current IP
            int p = host.indexOf('.');
            final String hash = yacySeed.hexHash2b64Hash(host.substring(p + 1, host.length() - 6));
            final yacySeed seed = peers.getConnected(hash);
            final String filename = urlcomps.url().getFile();
            String address = null;
            if ((seed == null) || ((address = seed.getPublicAddress()) == null)) {
                // seed is not known from here
                try {
                    indexSegment.termIndex().remove(
                        Word.words2hashesHandles(Condenser.getWords(
                            ("yacyshare " +
                             filename.replace('?', ' ') +
                             " " +
                             urlcomps.dc_title()), null).keySet()),
                             urlentry.hash());
                } catch (IOException e) {
                    Log.logException(e);
                }
                indexSegment.urlMetadata().remove(urlentry.hash()); // clean up
                throw new RuntimeException("index void");
            }
            alternative_urlstring = "http://" + address + "/" + host.substring(0, p) + filename;
            alternative_urlname = "http://share." + seed.getName() + ".yacy" + filename;
            if ((p = alternative_urlname.indexOf('?')) > 0) alternative_urlname = alternative_urlname.substring(0, p);
        }
    }
View Full Code Here

            // clean up seed-dbs
            if (getConfigBool("routing.deleteOldSeeds.permission",true)) {
                final long deleteOldSeedsTime = getConfigLong("routing.deleteOldSeeds.time",7)*24*3600000;
                Iterator<yacySeed> e = this.peers.seedsSortedDisconnected(true,yacySeed.LASTSEEN);
                yacySeed seed = null;
                final List<String> deleteQueue = new ArrayList<String>();
                checkInterruption();
                // clean passive seeds
                while (e.hasNext()) {
                    seed = e.next();
                    if (seed != null) {
                        //list is sorted -> break when peers are too young to delete
                        if (!seed.isLastSeenTimeout(deleteOldSeedsTime)) break;
                        deleteQueue.add(seed.hash);
                    }
                }
                for (int i = 0; i < deleteQueue.size(); ++i) this.peers.removeDisconnected(deleteQueue.get(i));
                deleteQueue.clear();
                e = this.peers.seedsSortedPotential(true,yacySeed.LASTSEEN);
                checkInterruption();
                // clean potential seeds
                while (e.hasNext()) {
                    seed = e.next();
                    if (seed != null) {
                        //list is sorted -> break when peers are too young to delete
                        if (!seed.isLastSeenTimeout(deleteOldSeedsTime)) break;
                        deleteQueue.add(seed.hash);
                    }
                }
                for (int i = 0; i < deleteQueue.size(); ++i) this.peers.removePotential(deleteQueue.get(i));
            }
View Full Code Here

        }
        EventTracker.update(EventTracker.EClass.INDEX, queueEntry.url().toNormalform(true, false), false);

        // if this was performed for a remote crawl request, notify requester
        if ((processCase == EventOrigin.GLOBAL_CRAWLING) && (queueEntry.initiator() != null)) {
            final yacySeed initiatorPeer = this.peers.get(ASCII.String(queueEntry.initiator()));
            if (initiatorPeer != null) {
                if (this.clusterhashes != null) {
                    initiatorPeer.setAlternativeAddress(this.clusterhashes.get(queueEntry.initiator()));
                }
                // start a thread for receipt sending to avoid a blocking here
                new Thread(new receiptSending(initiatorPeer, newEntry), "sending receipt to " + ASCII.String(queueEntry.initiator())).start();
            }
        }
View Full Code Here

    }

    public void loadSeedLists() {
        // uses the superseed to initialize the database with known seeds

        yacySeed           ys;
        String             seedListFileURL;
        DigestURI          url;
        Iterator<String>   enu;
        int                lc;
        final int          sc = this.peers.sizeConnected();
        ResponseHeader header;

        final RequestHeader reqHeader = new RequestHeader();
        reqHeader.put(HeaderFramework.PRAGMA, "no-cache");
        reqHeader.put(HeaderFramework.CACHE_CONTROL, "no-cache");
        reqHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent());
        final HTTPClient client = new HTTPClient();
        client.setHeader(reqHeader.entrySet());
        client.setTimout((int) getConfigLong("bootstrapLoadTimeout", 20000));

        yacyCore.log.logInfo("BOOTSTRAP: " + sc + " seeds known from previous run");

        // - use the superseed to further fill up the seedDB
        int ssc = 0, c = 0;
        while (true) {
            if (Thread.currentThread().isInterrupted()) {
                break;
            }
            seedListFileURL = sb.getConfig("network.unit.bootstrap.seedlist" + c, "");
            if (seedListFileURL.length() == 0) {
                break;
            }
            c++;
            if (
                    seedListFileURL.startsWith("http://") ||
                    seedListFileURL.startsWith("https://")
            ) {
                // load the seed list
                try {

                    url = new DigestURI(seedListFileURL);
                    //final long start = System.currentTimeMillis();
                    client.HEADResponse(url.toString());
                    header = new ResponseHeader(client.getHttpResponse().getAllHeaders());
                    //final long loadtime = System.currentTimeMillis() - start;
                    /*if (header == null) {
                        if (loadtime > getConfigLong("bootstrapLoadTimeout", 6000)) {
                            yacyCore.log.logWarning("BOOTSTRAP: seed-list URL " + seedListFileURL + " not available, time-out after " + loadtime + " milliseconds");
                        } else {
                            yacyCore.log.logWarning("BOOTSTRAP: seed-list URL " + seedListFileURL + " not available, no content");
                        }
                    } else*/ if (header.lastModified() == null) {
                        yacyCore.log.logWarning("BOOTSTRAP: seed-list URL " + seedListFileURL + " not usable, last-modified is missing");
                    } else if ((header.age() > 86400000) && (ssc > 0)) {
                        yacyCore.log.logInfo("BOOTSTRAP: seed-list URL " + seedListFileURL + " too old (" + (header.age() / 86400000) + " days)");
                    } else {
                        ssc++;
                        final byte[] content = client.GETbytes(url);
                        enu = FileUtils.strings(content);
                        lc = 0;
                        while (enu.hasNext()) {
                            try {
                                ys = yacySeed.genRemoteSeed(enu.next(), null, false, null);
                                if ((ys != null) &&
                                    (!this.peers.mySeedIsDefined() || !this.peers.mySeed().hash.equals(ys.hash))) {
                                        final long lastseen = Math.abs((System.currentTimeMillis() - ys.getLastSeenUTC()) / 1000 / 60);
                                        if (lastseen < 240) {
                                            if (this.peers.peerActions.connectPeer(ys, false)) {
                                                lc++;
                                            }
                                        }
View Full Code Here

            if (this.log.isFine()) this.log.logFine("remoteCrawlLoaderJob: a local crawl is running, omitting processing");
            return false;
        }

        // check if we have an entry in the provider list, otherwise fill the list
        yacySeed seed;
        if (this.remoteCrawlProviderHashes.isEmpty()) {
            if (this.sb.peers != null && this.sb.peers.sizeConnected() > 0) {
                final Iterator<yacySeed> e = PeerSelection.getProvidesRemoteCrawlURLs(this.sb.peers);
                while (e.hasNext()) {
                    seed = e.next();
View Full Code Here

            prop.put("crawler-queue", "1");
            final List<Request> crawlerList = sb.crawlQueues.noticeURL.top(NoticedURL.StackType.REMOTE, showLimit);
           
            Request urle;
            boolean dark = true;
            yacySeed initiator;
            String profileHandle;
            CrawlProfile profileEntry;
            int i, showNum = 0;
            for (i = 0; (i < crawlerList.size()) && (showNum < showLimit); i++) {
                urle = crawlerList.get(i);
                if (urle != null && urle.url() != null) {
                    initiator = sb.peers.getConnected((urle.initiator() == null) ? "" : ASCII.String(urle.initiator()));
                    profileHandle = urle.profileHandle();
                    profileEntry = profileHandle == null ? null : sb.crawler.getActive(profileHandle.getBytes());
                    prop.put("crawler-queue_list_" + showNum + "_dark", dark ? "1" : "0");
                    prop.putHTML("crawler-queue_list_" + showNum + "_initiator", ((initiator == null) ? "proxy" : initiator.getName()));
                    prop.put("crawler-queue_list_" + showNum + "_profile", ((profileEntry == null) ? "unknown" : profileEntry.name()));
                    prop.put("crawler-queue_list_" + showNum + "_depth", urle.depth());
                    prop.put("crawler-queue_list_" + showNum + "_modified", daydate(urle.appdate()) );
                    prop.putHTML("crawler-queue_list_" + showNum + "_anchor", urle.name());
                    prop.putHTML("crawler-queue_list_" + showNum + "_url", urle.url().toString());
View Full Code Here

        }
        if (segment == null) segment = sb.indexSegments.segment(Segments.Process.PUBLIC);
        prop.put("rejected", "0");
        //int showRejectedCount = 10;
       
        yacySeed initiator;
       
        // index size
        prop.putNum("urlpublictextSize", segment.urlMetadata().size());
        prop.putNum("rwipublictextSize", segment.termIndex().sizesMax());

        // loader queue
        prop.putNum("loaderSize", sb.crawlQueues.workerSize());       
        prop.putNum("loaderMax", sb.getConfigLong(SwitchboardConstants.CRAWLER_THREADS_ACTIVE_MAX, 10));
        if (sb.crawlQueues.workerSize() == 0) {
            prop.put("list-loader", "0");
        } else {
            final Request[] w = sb.crawlQueues.activeWorkerEntries();
            int count = 0;
            for (final Request r : w)  {
                if (r == null) continue;
                prop.put("list-loader_"+count+"_profile", r.profileHandle());
                initiator = sb.peers.getConnected((r.initiator() == null) ? "" : ASCII.String(r.initiator()));
                prop.putHTML("list-loader_"+count+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()));
                prop.put("list-loader_"+count+"_depth", r.depth());
                prop.putXML("list-loader_"+count+"_url", r.url().toString());
                count++;
            }
            prop.put("list-loader", count);
View Full Code Here

TOP

Related Classes of de.anomic.yacy.yacySeed

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.