Package net.yacy.kelondro.index

Examples of net.yacy.kelondro.index.HandleSet


        this.result = new WeakPriorityBlockingQueue<ResultEntry>(-1); // this is the result, enriched with snippets, ranked and ordered by ranking
        this.images = new WeakPriorityBlockingQueue<MediaSnippet>(-1);

        // snippets do not need to match with the complete query hashes,
        // only with the query minus the stopwords which had not been used for the search
        HandleSet filtered;
        try {
            filtered = HandleSet.joinConstructive(query.queryHashes, Switchboard.stopwordHashes);
        } catch (final RowSpaceExceededException e) {
            Log.logException(e);
            filtered = new HandleSet(query.queryHashes.row().primaryKeyLength, query.queryHashes.comparator(), 0);
        }
        this.snippetFetchWordHashes = query.queryHashes.clone();
        if (filtered != null && !filtered.isEmpty()) {
            this.snippetFetchWordHashes.excludeDestructive(Switchboard.stopwordHashes);
        }

        // start worker threads to fetch urls and snippets
        this.workerThreads = null;
View Full Code Here


            tc = topwords.get(descrcomp);
            if (tc > 0) r += Math.max(1, tc) << this.query.ranking.coeff_descrcompintoplist;
        }

        // apply query-in-result matching
        final HandleSet urlcomph = Word.words2hashesHandles(urlcomps);
        final HandleSet descrcomph = Word.words2hashesHandles(descrcomps);
        final Iterator<byte[]> shi = this.query.queryHashes.iterator();
        byte[] queryhash;
        while (shi.hasNext()) {
            queryhash = shi.next();
            if (urlcomph.has(queryhash)) r += 256 << this.query.ranking.coeff_appurl;
            if (descrcomph.has(queryhash)) r += 256 << this.query.ranking.coeff_app_dc_title;
        }

        return r;
    }
View Full Code Here

        }

        int pos = -1;
        String addressStr = null;
        InetAddress seedIPAddress = null;
        final HandleSet badPeerHashes = new HandleSet(12, Base64Order.enhancedCoder, 0);

        if (lookupConnected) {
            // enumerate the cache and simultanous insert values
            final Iterator<Seed> e = seedsConnected(true, false, null, (float) 0.0);
            while (e.hasNext()) {
                seed = e.next();
                if (seed != null) {
                    addressStr = seed.getPublicAddress();
                    if (addressStr == null) {
                      Log.logWarning("YACY","lookupByIP/Connected: address of seed " + seed.getName() + "/" + seed.hash + " is null.");
                      try {
                            badPeerHashes.put(ASCII.getBytes(seed.hash));
                        } catch (final RowSpaceExceededException e1) {
                            Log.logException(e1);
                            break;
                        }
                      continue;
                    }
                    if ((pos = addressStr.indexOf(':'))!= -1) {
                        addressStr = addressStr.substring(0,pos);
                    }
                    seedIPAddress = Domains.dnsResolve(addressStr);
                    if (seedIPAddress == null) continue;
                    if (seed.isProper(false) == null) this.ipLookupCache.put(seedIPAddress, new SoftReference<Seed>(seed));
                    if (seedIPAddress.equals(peerIP)) return seed;
                }
            }
            // delete bad peers
            final Iterator<byte[]> i = badPeerHashes.iterator();
            while (i.hasNext()) try {this.seedActiveDB.delete(i.next());} catch (final IOException e1) {Log.logException(e1);}
            badPeerHashes.clear();
        }

        if (lookupDisconnected) {
            // enumerate the cache and simultanous insert values
            final Iterator<Seed>e = seedsDisconnected(true, false, null, (float) 0.0);

            while (e.hasNext()) {
                seed = e.next();
                if (seed != null) {
                    addressStr = seed.getPublicAddress();
                    if (addressStr == null) {
                        Log.logWarning("YACY","lookupByIPDisconnected: address of seed " + seed.getName() + "/" + seed.hash + " is null.");
                        try {
                            badPeerHashes.put(UTF8.getBytes(seed.hash));
                        } catch (final RowSpaceExceededException e1) {
                            Log.logException(e1);
                            break;
                        }
                        continue;
                    }
                    if ((pos = addressStr.indexOf(':'))!= -1) {
                        addressStr = addressStr.substring(0,pos);
                    }
                    seedIPAddress = Domains.dnsResolve(addressStr);
                    if (seedIPAddress == null) continue;
                    if (seed.isProper(false) == null) this.ipLookupCache.put(seedIPAddress, new SoftReference<Seed>(seed));
                    if (seedIPAddress.equals(peerIP)) return seed;
                }
            }
            // delete bad peers
            final Iterator<byte[]> i = badPeerHashes.iterator();
            while (i.hasNext()) try {this.seedActiveDB.delete(i.next());} catch (final IOException e1) {Log.logException(e1);}
            badPeerHashes.clear();
        }

        if (lookupPotential) {
            // enumerate the cache and simultanous insert values
            final Iterator<Seed> e = seedsPotential(true, false, null, (float) 0.0);
View Full Code Here

            prop.putHTML("keystring", keystring);
            prop.putHTML("keyhash", ASCII.String(keyhash));

            // read values from checkboxes
            final String[] urls = post.getAll("urlhx.*");
            HandleSet urlb = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, urls.length);
            if (urls != null) for (final String s: urls) try { urlb.put(s.getBytes()); } catch (final RowSpaceExceededException e) { Log.logException(e); }
            final boolean delurl    = post.containsKey("delurl");
            final boolean delurlref = post.containsKey("delurlref");

            if (post.containsKey("keystringsearch")) {
                keyhash = Word.word2hash(keystring);
                prop.put("keyhash", keyhash);
                final RWIProcess ranking = genSearchresult(prop, sb, segment, keyhash, null);
                if (ranking.filteredCount() == 0) {
                    prop.put("searchresult", 1);
                    prop.putHTML("searchresult_word", keystring);
                }
            }

            if (post.containsKey("keyhashsearch")) {
                if (keystring.length() == 0 || !ByteBuffer.equals(Word.word2hash(keystring), keyhash)) {
                    prop.put("keystring", "&lt;not possible to compute word from hash&gt;");
                }
                final RWIProcess ranking = genSearchresult(prop, sb, segment, keyhash, null);
                if (ranking.filteredCount() == 0) {
                    prop.put("searchresult", 2);
                    prop.putHTML("searchresult_wordhash", ASCII.String(keyhash));
                }
            }

         // delete everything
            if (post.containsKey("deletecomplete")) {
                if (post.get("deleteIndex", "").equals("on")) {
                    segment.clear();
                }
                if (post.get("deleteSolr", "").equals("on") && sb.getConfigBool("federated.service.solr.indexing.enabled", false)) try {
                    sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).getSolr().clear();
                } catch (final Exception e) {
                    Log.logException(e);
                }
                if (post.get("deleteCrawlQueues", "").equals("on")) {
                    sb.crawlQueues.clear();
                    sb.crawlStacker.clear();
                    ResultURLs.clearStacks();
                }
                if (post.get("deleteCache", "").equals("on")) {
                    Cache.clear();
                }
                if (post.get("deleteRobots", "").equals("on")) {
                    sb.robots.clear();
                }
                if (post.get("deleteSearchFl", "").equals("on")) {
                    sb.tables.clear(WorkTables.TABLE_SEARCH_FAILURE_NAME);
                }
                post.remove("deletecomplete");
            }

            // set reference limitation
            if (post.containsKey("maxReferencesLimit")) {
                if (post.get("maxReferencesRadio", "").equals("on")) {
                    ReferenceContainer.maxReferences = post.getInt("maxReferences", 0);
                } else {
                    ReferenceContainer.maxReferences = 0;
                }
                sb.setConfig("index.maxReferences", ReferenceContainer.maxReferences);
            }

            // delete word
            if (post.containsKey("keyhashdeleteall")) try {
                if (delurl || delurlref) {
                    // generate urlx: an array of url hashes to be deleted
                    ReferenceContainer<WordReference> index = null;
                    index = segment.termIndex().get(keyhash, null);
                    final Iterator<WordReference> en = index.entries();
                    urlb = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, index.size());
                    while (en.hasNext()) try { urlb.put(en.next().urlhash()); } catch (final RowSpaceExceededException e) { Log.logException(e); }
                    index = null;
                }
                if (delurlref) {
                    segment.removeAllUrlReferences(urlb, sb.loader, CacheStrategy.IFEXIST);
                }
                // delete the word first because that is much faster than the deletion of the urls from the url database
                segment.termIndex().delete(keyhash);
                // now delete all urls if demanded
                if (delurl || delurlref) {
                    for (final byte[] b: urlb) sb.urlRemove(segment, b);
                }
                post.remove("keyhashdeleteall");
                post.put("urllist", "generated");
            } catch (final IOException e) {
                Log.logException(e);
            }

            // delete selected URLs
            if (post.containsKey("keyhashdelete")) try {
                if (delurlref) {
                    segment.removeAllUrlReferences(urlb, sb.loader, CacheStrategy.IFEXIST);
                }
                if (delurl || delurlref) {
                    for (final byte[] b: urlb) sb.urlRemove(segment, b);
                }
                final HandleSet urlHashes = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0);
                for (final byte[] b: urlb) try { urlHashes.put(b); } catch (final RowSpaceExceededException e) { Log.logException(e); }
                segment.termIndex().remove(keyhash, urlHashes);
                // this shall lead to a presentation of the list; so handle that the remaining program
                // thinks that it was called for a list presentation
                post.remove("keyhashdelete");
                post.put("urllist", "generated");
            } catch (final IOException e) {
                Log.logException(e);
            }

            if (post.containsKey("urllist")) {
                if (keystring.length() == 0 || !ByteBuffer.equals(Word.word2hash(keystring), keyhash)) {
                    prop.put("keystring", "&lt;not possible to compute word from hash&gt;");
                }
                final Bitfield flags = compileFlags(post);
                final int count = (post.get("lines", "all").equals("all")) ? -1 : post.getInt("lines", -1);
                final RWIProcess ranking = genSearchresult(prop, sb, segment, keyhash, flags);
                genURLList(prop, keyhash, keystring, ranking, flags, count);
            }

            // transfer to other peer
            if (post.containsKey("keyhashtransfer")) try {
                if (keystring.length() == 0 || !ByteBuffer.equals(Word.word2hash(keystring), keyhash)) {
                    prop.put("keystring", "&lt;not possible to compute word from hash&gt;");
                }

                // find host & peer
                String host = post.get("host", ""); // get host from input field
                Seed seed = null;
                if (host.length() != 0) {
                    if (host.length() == 12) {
                        // the host string is a peer hash
                        seed = sb.peers.getConnected(host);
                    } else {
                        // the host string can be a host name
                        seed = sb.peers.lookupByName(host);
                    }
                } else {
                    host = post.get("hostHash", ""); // if input field is empty, get from select box
                    seed = sb.peers.getConnected(host);
                }

                // prepare index
                ReferenceContainer<WordReference> index;
                final long starttime = System.currentTimeMillis();
                index = segment.termIndex().get(keyhash, null);
                // built urlCache
                final Iterator<WordReference> urlIter = index.entries();
                final TreeMap<byte[], URIMetadataRow> knownURLs = new TreeMap<byte[], URIMetadataRow>(Base64Order.enhancedCoder);
                final HandleSet unknownURLEntries = new HandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, index.size());
                Reference iEntry;
                URIMetadataRow lurl;
                while (urlIter.hasNext()) {
                    iEntry = urlIter.next();
                    lurl = segment.urlMetadata().load(iEntry.urlhash());
                    if (lurl == null) {
                        try {
                            unknownURLEntries.put(iEntry.urlhash());
                        } catch (final RowSpaceExceededException e) {
                            Log.logException(e);
                        }
                        urlIter.remove();
                    } else {
                        knownURLs.put(iEntry.urlhash(), lurl);
                    }
                }

                // make an indexContainerCache
                final ReferenceContainerCache<WordReference> icc = new ReferenceContainerCache<WordReference>(Segment.wordReferenceFactory, Segment.wordOrder, Word.commonHashLength);
                try {
                    icc.add(index);
                } catch (final RowSpaceExceededException e) {
                    Log.logException(e);
                }

                // transport to other peer
                final boolean gzipBody = sb.getConfigBool("indexControl.gzipBody", false);
                final int timeout = (int) sb.getConfigLong("indexControl.timeout", 60000);
                final String error = Protocol.transferIndex(
                             seed,
                             icc,
                             knownURLs,
                             gzipBody,
                             timeout);
                prop.put("result", (error == null) ? ("Successfully transferred " + knownURLs.size() + " words in " + ((System.currentTimeMillis() - starttime) / 1000) + " seconds, " + unknownURLEntries.size() + " URL not found") : "error: " + error);
                index = null;
            } catch (final IOException e) {
                Log.logException(e);
            }

            // generate list
            if (post.containsKey("keyhashsimilar")) try {
                final Iterator<ReferenceContainer<WordReference>> containerIt = segment.termIndex().referenceContainer(keyhash, true, 256, false).iterator();
                    ReferenceContainer<WordReference> container;
                    i = 0;
                    int rows = 0, cols = 0;
                    prop.put("keyhashsimilar", "1");
                    while (containerIt.hasNext() && i < 256) {
                        container = containerIt.next();
                        prop.put("keyhashsimilar_rows_"+rows+"_cols_"+cols+"_wordHash", container.getTermHash());
                        cols++;
                        if (cols==8) {
                            prop.put("keyhashsimilar_rows_"+rows+"_cols", cols);
                            cols = 0;
                            rows++;
                        }
                        i++;
                    }
                    prop.put("keyhashsimilar_rows_"+rows+"_cols", cols);
                    prop.put("keyhashsimilar_rows", rows + 1);
                    prop.put("result", "");
            } catch (final IOException e) {
                Log.logException(e);
            }

            if (post.containsKey("blacklist")) {
                final String blacklist = post.get("blacklist", "");
                final HandleSet urlHashes = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, urlb.size());
                if (post.containsKey("blacklisturls")) {
                    PrintWriter pw;
                    try {
                        final String[] supportedBlacklistTypes = env.getConfig("BlackLists.types", "").split(",");
                        pw = new PrintWriter(new FileWriter(new File(ListManager.listsPath, blacklist), true));
                        DigestURI url;
                        for (final byte[] b: urlb) {
                            try {
                                urlHashes.put(b);
                            } catch (final RowSpaceExceededException e) {
                                Log.logException(e);
                            }
                            final URIMetadataRow e = segment.urlMetadata().load(b);
                            segment.urlMetadata().remove(b);
                            if (e != null) {
                                url = e.metadata().url();
                                pw.println(url.getHost() + "/" + url.getFile());
                                for (final String supportedBlacklistType : supportedBlacklistTypes) {
                                    if (ListManager.listSetContains(supportedBlacklistType + ".BlackLists", blacklist)) {
                                        Switchboard.urlBlacklist.add(
                                                supportedBlacklistType,
                                                url.getHost(),
                                                url.getFile());
                                    }
                                }
                                SearchEventCache.cleanupEvents(true);
                            }
                        }
                        pw.close();
                    } catch (final IOException e) {
                    }
                }

                if (post.containsKey("blacklistdomains")) {
                    PrintWriter pw;
                    try {
                        final String[] supportedBlacklistTypes = Blacklist.BLACKLIST_TYPES_STRING.split(",");
                        pw = new PrintWriter(new FileWriter(new File(ListManager.listsPath, blacklist), true));
                        DigestURI url;
                        for (final byte[] b: urlb) {
                            try {
                                urlHashes.put(b);
                            } catch (final RowSpaceExceededException e) {
                                Log.logException(e);
                            }
                            final URIMetadataRow e = segment.urlMetadata().load(b);
                            segment.urlMetadata().remove(b);
View Full Code Here

        public void run() {
            Log.logInfo("INDEXCLEANER", "IndexCleaner-Thread started");
            ReferenceContainer<WordReference> container = null;
            WordReferenceVars entry = null;
            DigestURI url = null;
            final HandleSet urlHashs = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0);
            try {
                Iterator<ReferenceContainer<WordReference>> indexContainerIterator = Segment.this.termIndex.referenceContainer(this.startHash, false, 100, false).iterator();
                while (indexContainerIterator.hasNext() && this.run) {
                    waiter();
                    container = indexContainerIterator.next();
                    final Iterator<WordReference> containerIterator = container.entries();
                    this.wordHashNow = container.getTermHash();
                    while (containerIterator.hasNext() && this.run) {
                        waiter();
                        entry = new WordReferenceVars(containerIterator.next());
                        // System.out.println("Wordhash: "+wordHash+" UrlHash:
                        // "+entry.getUrlHash());
                        final URIMetadataRow ue = Segment.this.urlMetadata.load(entry.urlhash());
                        if (ue == null) {
                            urlHashs.put(entry.urlhash());
                        } else {
                            url = ue.metadata().url();
                            if (url == null || Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_CRAWLER, url)) {
                                urlHashs.put(entry.urlhash());
                            }
                        }
                    }
                    if (!urlHashs.isEmpty()) try {
                        final int removed = Segment.this.termIndex.remove(container.getTermHash(), urlHashs);
                        Log.logFine("INDEXCLEANER", ASCII.String(container.getTermHash()) + ": " + removed + " of " + container.size() + " URL-entries deleted");
                        this.lastWordHash = container.getTermHash();
                        this.lastDeletionCounter = urlHashs.size();
                        urlHashs.clear();
                    } catch (final IOException e) {
                        Log.logException(e);
                    }

                    if (!containerIterator.hasNext()) {
View Full Code Here

        if (sentence == null) return queryhashes;
        final SortedMap<byte[], Integer> hs = WordTokenizer.hashSentence(sentence, null);
        final Iterator<byte[]> j = queryhashes.iterator();
        byte[] hash;
        Integer pos;
        final HandleSet remaininghashes = new HandleSet(queryhashes.row().primaryKeyLength, queryhashes.comparator(), queryhashes.size());
        while (j.hasNext()) {
            hash = j.next();
            pos = hs.get(hash);
            if (pos == null) {
                try {
                    remaininghashes.put(hash);
                } catch (final RowSpaceExceededException e) {
                    Log.logException(e);
                }
            }
        }
View Full Code Here

        final ReferenceContainer<ReferenceType> countRam = this.ram.get(termHash, null);
        assert countRam == null || countRam.size() >= 0;
        int c = countRam == null ? countFile : countFile + countRam.size();
        // exclude entries from delayed remove
        synchronized (this.removeDelayedURLs) {
            final HandleSet s = this.removeDelayedURLs.get(termHash);
            if (s != null) c -= s.size();
            if (c < 0) c = 0;
        }
        // put count result into cache
        if (MemoryControl.shortStatus()) this.countCache.clear();
        this.countCache.insert(termHash, c);
View Full Code Here

            result = c1;
        }
        if (result == null) return null;
        // remove the failed urls
        synchronized (this.removeDelayedURLs) {
            final HandleSet s = this.removeDelayedURLs.get(termHash);
            if (s != null) result.removeEntries(s);
        }
        return result;
    }
View Full Code Here

            }
        }
    }

    public void removeDelayed(final byte[] termHash, final HandleSet urlHashes) {
        HandleSet r;
        synchronized (this.removeDelayedURLs) {
            r = this.removeDelayedURLs.get(termHash);
        }
        if (r == null) {
            r = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0);
        }
        try {
            r.putAll(urlHashes);
        } catch (final RowSpaceExceededException e) {
            try {remove(termHash, urlHashes);} catch (final IOException e1) {}
            return;
        }
        synchronized (this.removeDelayedURLs) {
View Full Code Here

            this.removeDelayedURLs.put(termHash, r);
        }
    }

    public void removeDelayed(final byte[] termHash, final byte[] urlHashBytes) {
        HandleSet r;
        synchronized (this.removeDelayedURLs) {
            r = this.removeDelayedURLs.get(termHash);
        }
        if (r == null) {
            r = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0);
        }
        try {
            r.put(urlHashBytes);
        } catch (final RowSpaceExceededException e) {
            try {remove(termHash, urlHashBytes);} catch (final IOException e1) {}
            return;
        }
        synchronized (this.removeDelayedURLs) {
View Full Code Here

TOP

Related Classes of net.yacy.kelondro.index.HandleSet

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.