Examples of HandleSet


Examples of net.yacy.kelondro.index.HandleSet

            String estring;
            int p;
            String wordHash;
            byte[] urlHash;
            WordReferenceRow iEntry;
            final HandleSet unknownURL = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0);
            final HandleSet knownURL = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0);
            final ArrayList<String> wordhashes = new ArrayList<String>();
            int received = 0;
            int blocked = 0;
            int receivedURL = 0;
            while (it.hasNext()) {
                serverCore.checkInterruption();
                estring = it.next();
               
                // check if RWI entry is well-formed
                p = estring.indexOf('{');
                if ((p < 0) || (estring.indexOf("x=") < 0) || !(estring.indexOf("[B@") < 0)) {
                    blocked++;
                    continue;
                }
                wordHash = estring.substring(0, p);
                wordhashes.add(wordHash);
                iEntry = new WordReferenceRow(estring.substring(p));
                urlHash = iEntry.urlhash();
               
                // block blacklisted entries
                if ((blockBlacklist) && (Switchboard.urlBlacklist.hashInBlacklistedCache(Blacklist.BLACKLIST_DHT, urlHash))) {
                    if (yacyCore.log.isFine()) yacyCore.log.logFine("transferRWI: blocked blacklisted URLHash '" + ASCII.String(urlHash) + "' from peer " + otherPeerName);
                    blocked++;
                    continue;
                }
               
                // check if the entry is in our network domain
                final String urlRejectReason = sb.crawlStacker.urlInAcceptedDomainHash(urlHash);
                if (urlRejectReason != null) {
                    yacyCore.log.logWarning("transferRWI: blocked URL hash '" + ASCII.String(urlHash) + "' (" + urlRejectReason + ") from peer " + otherPeerName + "; peer is suspected to be a spam-peer (or something is wrong)");
                    //if (yacyCore.log.isFine()) yacyCore.log.logFine("transferRWI: blocked URL hash '" + urlHash + "' (" + urlRejectReason + ") from peer " + otherPeerName);
                    blocked++;
                    continue;
                }
               
                // learn entry
                try {
                    sb.indexSegments.termIndex(Segments.Process.DHTIN).add(wordHash.getBytes(), iEntry);
                } catch (Exception e) {
                    Log.logException(e);
                }
                serverCore.checkInterruption();

                // check if we need to ask for the corresponding URL
                if (!(knownURL.has(urlHash) || unknownURL.has(urlHash)))  try {
                    if (sb.indexSegments.urlMetadata(Segments.Process.DHTIN).exists(urlHash)) {
                        knownURL.put(urlHash);
                    } else {
                        unknownURL.put(urlHash);
                    }
                    receivedURL++;
                } catch (final Exception ex) {
View Full Code Here

Examples of net.yacy.kelondro.index.HandleSet

        // tell all threads to do nothing for a specific time
        sb.intermissionAllThreads(100);

        EventTracker.delete(EventTracker.EClass.SEARCH);
        final HandleSet abstractSet = (abstracts.length() == 0 || abstracts.equals("auto")) ? null : QueryParams.hashes2Set(abstracts);

        // store accessing peer
        yacySeed remoteSeed;
        try {
            remoteSeed = yacySeed.genRemoteSeed(oseed, key, false, client);
        } catch (final IOException e) {
            yacyCore.log.logInfo("yacy.search: access with bad seed: " + e.getMessage());
            remoteSeed = null;
        }
        if (sb.peers == null) {
            yacyCore.log.logSevere("yacy.search: seed cache not initialized");
        } else {
            sb.peers.peerActions.peerArrival(remoteSeed, true);
        }

        // prepare search
        final HandleSet queryhashes = QueryParams.hashes2Set(query);
        final HandleSet excludehashes = (exclude.length() == 0) ? new HandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, 0) : QueryParams.hashes2Set(exclude);
        final long timestamp = System.currentTimeMillis();

      // prepare a search profile
        final RankingProfile rankingProfile = (profile.length() == 0) ? new RankingProfile(ContentDomain.contentdomParser(contentdom)) : new RankingProfile("", profile);

        // prepare an abstract result
        final StringBuilder indexabstract = new StringBuilder(6000);
        int indexabstractContainercount = 0;
        int joincount = 0;
        QueryParams theQuery = null;
        SearchEvent theSearch = null;
        ArrayList<WeakPriorityBlockingQueue.Element<ResultEntry>> accu = null;
        if (query.length() == 0 && abstractSet != null) {
            // this is _not_ a normal search, only a request for index abstracts
            final Segment indexSegment = sb.indexSegments.segment(Segments.Process.PUBLIC);
            theQuery = new QueryParams(
                    null,
                    abstractSet,
                    new HandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, 0),
                    null,
                    snippetPattern,
                    null,
                    maxdist,
                    prefer,
View Full Code Here

Examples of net.yacy.kelondro.index.HandleSet

            if (agent == null) agent = System.getProperty("user.language");
            language = (agent == null) ? "en" : ISO639.userAgentLanguageDetection(agent);
            if (language == null) language = "en";
        }
        final TreeSet<String>[] query = QueryParams.cleanQuery(querystring); // converts also umlaute
        HandleSet q = Word.words2hashesHandles(query[0]);
       
        // tell all threads to do nothing for a specific time
        sb.intermissionAllThreads(3000);

        // prepare search
View Full Code Here

Examples of net.yacy.kelondro.index.HandleSet

            // prepare search properties
            final boolean globalsearch = (global) && indexReceiveGranted;

            // do the search
            final HandleSet queryHashes = Word.words2hashesHandles(query[0]);
            final Pattern snippetPattern = QueryParams.stringSearchPattern(originalquerystring);

            // check filters
            try {
                Pattern.compile(urlmask);
View Full Code Here

Examples of net.yacy.kelondro.index.HandleSet

            prop.putHTML("keystring", keystring);
            prop.putHTML("keyhash", ASCII.String(keyhash));

            // read values from checkboxes
            final String[] urls = post.getAll("urlhx.*");
            HandleSet urlb = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, urls.length);
            if (urls != null) for (final String s: urls) try { urlb.put(s.getBytes()); } catch (final RowSpaceExceededException e) { Log.logException(e); }
            final boolean delurl    = post.containsKey("delurl");
            final boolean delurlref = post.containsKey("delurlref");

            if (post.containsKey("keystringsearch")) {
                keyhash = Word.word2hash(keystring);
                prop.put("keyhash", keyhash);
                final RankingProcess ranking = genSearchresult(prop, sb, segment, keyhash, null);
                if (ranking.filteredCount() == 0) {
                    prop.put("searchresult", 1);
                    prop.putHTML("searchresult_word", keystring);
                }
            }

            if (post.containsKey("keyhashsearch")) {
                if (keystring.length() == 0 || !ByteBuffer.equals(Word.word2hash(keystring), keyhash)) {
                    prop.put("keystring", "&lt;not possible to compute word from hash&gt;");
                }
                final RankingProcess ranking = genSearchresult(prop, sb, segment, keyhash, null);
                if (ranking.filteredCount() == 0) {
                    prop.put("searchresult", 2);
                    prop.putHTML("searchresult_wordhash", ASCII.String(keyhash));
                }
            }

            // delete everything
            if (post.containsKey("deletecomplete")) {
                if (post.get("deleteIndex", "").equals("on")) {
                    segment.clear();
                }
                if (post.get("deleteSolr", "").equals("on") && sb.getConfigBool("federated.service.solr.indexing.enabled", false)) try {
                    sb.solrConnector.clear();
                } catch (final Exception e) {
                    Log.logException(e);
                }
                if (post.get("deleteCrawlQueues", "").equals("on")) {
                    sb.crawlQueues.clear();
                    sb.crawlStacker.clear();
                    ResultURLs.clearStacks();
                }
                if (post.get("deleteCache", "").equals("on")) {
                    Cache.clear();
                }
                if (post.get("deleteRobots", "").equals("on")) {
                    sb.robots.clear();
                }
                if (post.get("deleteSearchFl", "").equals("on")) {
                  sb.tables.clear(WorkTables.TABLE_SEARCH_FAILURE_NAME);
                }
                post.remove("deletecomplete");
            }

            // delete word
            if (post.containsKey("keyhashdeleteall")) try {
                if (delurl || delurlref) {
                    // generate urlx: an array of url hashes to be deleted
                    ReferenceContainer<WordReference> index = null;
                    index = segment.termIndex().get(keyhash, null);
                    final Iterator<WordReference> en = index.entries();
                    urlb = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, index.size());
                    while (en.hasNext()) try { urlb.put(en.next().urlhash()); } catch (final RowSpaceExceededException e) { Log.logException(e); }
                    index = null;
                }
                if (delurlref) {
                    segment.removeAllUrlReferences(urlb, sb.loader, CacheStrategy.IFEXIST);
                }
                // delete the word first because that is much faster than the deletion of the urls from the url database
                segment.termIndex().delete(keyhash);
                // now delete all urls if demanded
                if (delurl || delurlref) {
                    for (final byte[] b: urlb) sb.urlRemove(segment, b);
                }
                post.remove("keyhashdeleteall");
                post.put("urllist", "generated");
            } catch (final IOException e) {
                Log.logException(e);
            }

            // delete selected URLs
            if (post.containsKey("keyhashdelete")) try {
                if (delurlref) {
                    segment.removeAllUrlReferences(urlb, sb.loader, CacheStrategy.IFEXIST);
                }
                if (delurl || delurlref) {
                    for (final byte[] b: urlb) sb.urlRemove(segment, b);
                }
                final HandleSet urlHashes = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0);
                for (final byte[] b: urlb) try { urlHashes.put(b); } catch (final RowSpaceExceededException e) { Log.logException(e); }
                segment.termIndex().remove(keyhash, urlHashes);
                // this shall lead to a presentation of the list; so handle that the remaining program
                // thinks that it was called for a list presentation
                post.remove("keyhashdelete");
                post.put("urllist", "generated");
            } catch (final IOException e) {
                Log.logException(e);
            }

            if (post.containsKey("urllist")) {
                if (keystring.length() == 0 || !ByteBuffer.equals(Word.word2hash(keystring), keyhash)) {
                    prop.put("keystring", "&lt;not possible to compute word from hash&gt;");
                }
                final Bitfield flags = compileFlags(post);
                final int count = (post.get("lines", "all").equals("all")) ? -1 : post.getInt("lines", -1);
                final RankingProcess ranking = genSearchresult(prop, sb, segment, keyhash, flags);
                genURLList(prop, keyhash, keystring, ranking, flags, count);
            }

            // transfer to other peer
            if (post.containsKey("keyhashtransfer")) try {
                if (keystring.length() == 0 || !ByteBuffer.equals(Word.word2hash(keystring), keyhash)) {
                    prop.put("keystring", "&lt;not possible to compute word from hash&gt;");
                }

                // find host & peer
                String host = post.get("host", ""); // get host from input field
                yacySeed seed = null;
                if (host.length() != 0) {
                    if (host.length() == 12) {
                        // the host string is a peer hash
                        seed = sb.peers.getConnected(host);
                    } else {
                        // the host string can be a host name
                        seed = sb.peers.lookupByName(host);
                    }
                } else {
                    host = post.get("hostHash", ""); // if input field is empty, get from select box
                    seed = sb.peers.getConnected(host);
                }

                // prepare index
                ReferenceContainer<WordReference> index;
                final long starttime = System.currentTimeMillis();
                index = segment.termIndex().get(keyhash, null);
                // built urlCache
                final Iterator<WordReference> urlIter = index.entries();
                final TreeMap<byte[], URIMetadataRow> knownURLs = new TreeMap<byte[], URIMetadataRow>(Base64Order.enhancedCoder);
                final HandleSet unknownURLEntries = new HandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, index.size());
                Reference iEntry;
                URIMetadataRow lurl;
                while (urlIter.hasNext()) {
                    iEntry = urlIter.next();
                    lurl = segment.urlMetadata().load(iEntry.urlhash());
                    if (lurl == null) {
                        try {
                            unknownURLEntries.put(iEntry.urlhash());
                        } catch (final RowSpaceExceededException e) {
                            Log.logException(e);
                        }
                        urlIter.remove();
                    } else {
                        knownURLs.put(iEntry.urlhash(), lurl);
                    }
                }

                // make an indexContainerCache
                final ReferenceContainerCache<WordReference> icc = new ReferenceContainerCache<WordReference>(Segment.wordReferenceFactory, Segment.wordOrder, Word.commonHashLength);
                try {
                    icc.add(index);
                } catch (final RowSpaceExceededException e) {
                    Log.logException(e);
                }

                // transport to other peer
                final boolean gzipBody = sb.getConfigBool("indexControl.gzipBody", false);
                final int timeout = (int) sb.getConfigLong("indexControl.timeout", 60000);
                final String error = yacyClient.transferIndex(
                             seed,
                             icc,
                             knownURLs,
                             gzipBody,
                             timeout);
                prop.put("result", (error == null) ? ("Successfully transferred " + knownURLs.size() + " words in " + ((System.currentTimeMillis() - starttime) / 1000) + " seconds, " + unknownURLEntries.size() + " URL not found") : "error: " + error);
                index = null;
            } catch (final IOException e) {
                Log.logException(e);
            }

            // generate list
            if (post.containsKey("keyhashsimilar")) try {
                final Iterator<ReferenceContainer<WordReference>> containerIt = segment.termIndex().references(keyhash, true, 256, false).iterator();
                    ReferenceContainer<WordReference> container;
                    i = 0;
                    int rows = 0, cols = 0;
                    prop.put("keyhashsimilar", "1");
                    while (containerIt.hasNext() && i < 256) {
                        container = containerIt.next();
                        prop.put("keyhashsimilar_rows_"+rows+"_cols_"+cols+"_wordHash", container.getTermHash());
                        cols++;
                        if (cols==8) {
                            prop.put("keyhashsimilar_rows_"+rows+"_cols", cols);
                            cols = 0;
                            rows++;
                        }
                        i++;
                    }
                    prop.put("keyhashsimilar_rows_"+rows+"_cols", cols);
                    prop.put("keyhashsimilar_rows", rows + 1);
                    prop.put("result", "");
            } catch (final IOException e) {
                Log.logException(e);
            }

            if (post.containsKey("blacklist")) {
                final String blacklist = post.get("blacklist", "");
                final HandleSet urlHashes = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, urlb.size());
                if (post.containsKey("blacklisturls")) {
                    PrintWriter pw;
                    try {
                        final String[] supportedBlacklistTypes = env.getConfig("BlackLists.types", "").split(",");
                        pw = new PrintWriter(new FileWriter(new File(ListManager.listsPath, blacklist), true));
                        DigestURI url;
                        for (final byte[] b: urlb) {
                            try {
                                urlHashes.put(b);
                            } catch (final RowSpaceExceededException e) {
                                Log.logException(e);
                            }
                            final URIMetadataRow e = segment.urlMetadata().load(b);
                            segment.urlMetadata().remove(b);
                            if (e != null) {
                                url = e.metadata().url();
                                pw.println(url.getHost() + "/" + url.getFile());
                                for (final String supportedBlacklistType : supportedBlacklistTypes) {
                                    if (ListManager.listSetContains(supportedBlacklistType + ".BlackLists", blacklist)) {
                                        Switchboard.urlBlacklist.add(
                                                supportedBlacklistType,
                                                url.getHost(),
                                                url.getFile());
                                    }
                                }
                                SearchEventCache.cleanupEvents(true);
                            }
                        }
                        pw.close();
                    } catch (final IOException e) {
                    }
                }

                if (post.containsKey("blacklistdomains")) {
                    PrintWriter pw;
                    try {
                        final String[] supportedBlacklistTypes = Blacklist.BLACKLIST_TYPES_STRING.split(",");
                        pw = new PrintWriter(new FileWriter(new File(ListManager.listsPath, blacklist), true));
                        DigestURI url;
                        for (final byte[] b: urlb) {
                            try {
                                urlHashes.put(b);
                            } catch (final RowSpaceExceededException e) {
                                Log.logException(e);
                            }
                            final URIMetadataRow e = segment.urlMetadata().load(b);
                            segment.urlMetadata().remove(b);
View Full Code Here

Examples of net.yacy.kelondro.index.HandleSet

        }

        int pos = -1;
        String addressStr = null;
        InetAddress seedIPAddress = null;       
        final HandleSet badPeerHashes = new HandleSet(12, Base64Order.enhancedCoder, 0);
       
        if (lookupConnected) {
            // enumerate the cache and simultanous insert values
            final Iterator<yacySeed> e = seedsConnected(true, false, null, (float) 0.0);
            while (e.hasNext()) {
                seed = e.next();
                if (seed != null) {
                    addressStr = seed.getPublicAddress();
                    if (addressStr == null) {
                      Log.logWarning("YACY","lookupByIP/Connected: address of seed " + seed.getName() + "/" + seed.hash + " is null.");
                      try {
                            badPeerHashes.put(ASCII.getBytes(seed.hash));
                        } catch (RowSpaceExceededException e1) {
                            Log.logException(e1);
                            break;
                        }
                      continue;
                    }
                    if ((pos = addressStr.indexOf(':'))!= -1) {
                        addressStr = addressStr.substring(0,pos);
                    }
                    seedIPAddress = Domains.dnsResolve(addressStr);
                    if (seedIPAddress == null) continue;
                    if (seed.isProper(false) == null) ipLookupCache.put(seedIPAddress, new SoftReference<yacySeed>(seed));
                    if (seedIPAddress.equals(peerIP)) return seed;
                }
            }
            // delete bad peers
            final Iterator<byte[]> i = badPeerHashes.iterator();
            while (i.hasNext()) try {seedActiveDB.delete(i.next());} catch (final IOException e1) {Log.logException(e1);}
            badPeerHashes.clear();
        }
       
        if (lookupDisconnected) {
            // enumerate the cache and simultanous insert values
            final Iterator<yacySeed>e = seedsDisconnected(true, false, null, (float) 0.0);

            while (e.hasNext()) {
                seed = e.next();
                if (seed != null) {
                    addressStr = seed.getPublicAddress();
                    if (addressStr == null) {
                        Log.logWarning("YACY","lookupByIPDisconnected: address of seed " + seed.getName() + "/" + seed.hash + " is null.");
                        try {
                            badPeerHashes.put(UTF8.getBytes(seed.hash));
                        } catch (RowSpaceExceededException e1) {
                            Log.logException(e1);
                            break;
                        }
                        continue;
                    }
                    if ((pos = addressStr.indexOf(':'))!= -1) {
                        addressStr = addressStr.substring(0,pos);
                    }
                    seedIPAddress = Domains.dnsResolve(addressStr);
                    if (seedIPAddress == null) continue;
                    if (seed.isProper(false) == null) ipLookupCache.put(seedIPAddress, new SoftReference<yacySeed>(seed));
                    if (seedIPAddress.equals(peerIP)) return seed;
                }
            }
            // delete bad peers
            final Iterator<byte[]> i = badPeerHashes.iterator();
            while (i.hasNext()) try {seedActiveDB.delete(i.next());} catch (final IOException e1) {Log.logException(e1);}
            badPeerHashes.clear();
        }
       
        if (lookupPotential) {
            // enumerate the cache and simultanous insert values
            final Iterator<yacySeed> e = seedsPotential(true, false, null, (float) 0.0);
View Full Code Here

Examples of net.yacy.kelondro.index.HandleSet

       
        private acceptRemoteIndexSeedEnum(yacySeedDB seedDB, final byte[] starthash, int max, boolean alsoMyOwn) {
            this.seedDB = seedDB;
            this.se = getDHTSeeds(seedDB, starthash, yacyVersion.YACY_HANDLES_COLLECTION_INDEX);
            this.remaining = max;
            this.doublecheck = new HandleSet(12, Base64Order.enhancedCoder, 0);
            this.nextSeed = nextInternal();
            this.alsoMyOwn = alsoMyOwn && nextSeed != null && (Base64Order.enhancedCoder.compare(ASCII.getBytes(seedDB.mySeed().hash), ASCII.getBytes(nextSeed.hash)) > 0);
        }
View Full Code Here

Examples of net.yacy.kelondro.index.HandleSet

        public Chunk(byte[] primaryTarget, final List<yacySeed> targets) {
            super();
            this.primaryTarget = primaryTarget;
            this.containers = new ReferenceContainerCache<WordReference>(Segment.wordReferenceFactory, Segment.wordOrder, Word.commonHashLength);
            this.references = new TreeMap<byte[], URIMetadataRow>(Base64Order.enhancedCoder);
            this.badReferences = new HandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, 0);
            this.targets    = targets;
            this.hit = 0;
            this.miss = 0;
        }
View Full Code Here

Examples of net.yacy.kelondro.index.HandleSet

        }
        // then remove the container from the backend
        final ArrayList<ReferenceContainer<WordReference>> rc;
        if (ram) {
            // selection was only from ram, so we have to carefully remove only the selected entries
            final HandleSet urlHashes = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0);
            Iterator<WordReference> it;
            for (final ReferenceContainer<WordReference> c: containers) {
                urlHashes.clear();
                it = c.entries();
                while (it.hasNext()) try { urlHashes.put(it.next().urlhash()); } catch (final RowSpaceExceededException e) { Log.logException(e); }
                if (this.log.isFine()) this.log.logFine("selected " + urlHashes.size() + " urls for word '" + ASCII.String(c.getTermHash()) + "'");
                if (!urlHashes.isEmpty()) this.segment.termIndex().remove(c.getTermHash(), urlHashes);
            }
            rc = containers;
        } else {
            // selection was from whole index, so we can just delete the whole container
            // but to avoid race conditions return the results from the deletes
View Full Code Here

Examples of net.yacy.kelondro.index.HandleSet

        if (sentence == null) return queryhashes;
        final SortedMap<byte[], Integer> hs = WordTokenizer.hashSentence(sentence, null);
        final Iterator<byte[]> j = queryhashes.iterator();
        byte[] hash;
        Integer pos;
        final HandleSet remaininghashes = new HandleSet(queryhashes.row().primaryKeyLength, queryhashes.comparator(), queryhashes.size());
        while (j.hasNext()) {
            hash = j.next();
            pos = hs.get(hash);
            if (pos == null) {
                try {
                    remaininghashes.put(hash);
                } catch (final RowSpaceExceededException e) {
                    Log.logException(e);
                }
            }
        }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.