Package net.yacy.kelondro.data.word

Examples of net.yacy.kelondro.data.word.WordReferenceRow$ExternalParser


            // now parse the Strings in the value-vector and write index entries
            String estring;
            int p;
            String wordHash;
            byte[] urlHash;
            WordReferenceRow iEntry;
            final HandleSet unknownURL = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0);
            final HandleSet knownURL = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0);
            final ArrayList<String> wordhashes = new ArrayList<String>();
            int received = 0;
            int blocked = 0;
            int receivedURL = 0;
            while (it.hasNext()) {
                serverCore.checkInterruption();
                estring = it.next();
               
                // check if RWI entry is well-formed
                p = estring.indexOf('{');
                if ((p < 0) || (estring.indexOf("x=") < 0) || !(estring.indexOf("[B@") < 0)) {
                    blocked++;
                    continue;
                }
                wordHash = estring.substring(0, p);
                wordhashes.add(wordHash);
                iEntry = new WordReferenceRow(estring.substring(p));
                urlHash = iEntry.urlhash();
               
                // block blacklisted entries
                if ((blockBlacklist) && (Switchboard.urlBlacklist.hashInBlacklistedCache(Blacklist.BLACKLIST_DHT, urlHash))) {
                    if (yacyCore.log.isFine()) yacyCore.log.logFine("transferRWI: blocked blacklisted URLHash '" + ASCII.String(urlHash) + "' from peer " + otherPeerName);
                    blocked++;
View Full Code Here


        this.entry.setCol(col_lapp, Integer.parseInt(prop.getProperty("lapp", "0")));
        this.snippet = crypt.simpleDecode(prop.getProperty("snippet", ""), null);
        this.word = null;
        if (prop.containsKey("word")) throw new kelondroException("old database structure is not supported");
        if (prop.containsKey("wi")) {
            this.word = new WordReferenceVars(new WordReferenceRow(Base64Order.enhancedCoder.decodeString(prop.getProperty("wi", ""))));
        }
        this.ranking = 0;
        this.comp = null;
    }
View Full Code Here

        // iterate over all words of context text
        final Iterator<Map.Entry<String, Word>> i = condenser.words().entrySet().iterator();
        Map.Entry<String, Word> wentry;
        String word;
        final int len = (document == null) ? urlLength : document.dc_title().length();
        final WordReferenceRow ientry = new WordReferenceRow(url.hash(),
                                urlLength, urlComps, len,
                                condenser.RESULT_NUMB_WORDS,
                                condenser.RESULT_NUMB_SENTENCES,
                                urlModified.getTime(),
                                System.currentTimeMillis(),
                                UTF8.getBytes(language),
                                doctype,
                                outlinksSame, outlinksOther);
        Word wprop;
        byte[] wordhash;
        while (i.hasNext()) {
            wentry = i.next();
            word = wentry.getKey();
            wprop = wentry.getValue();
            assert (wprop.flags != null);
            ientry.setWord(wprop);
            wordhash = Word.word2hash(word);
            try {
                this.termIndex.add(wordhash, ientry);
            } catch (final Exception e) {
                Log.logException(e);
View Full Code Here

            // now parse the Strings in the value-vector and write index entries
            String estring;
            int p;
            String wordHash;
            byte[] urlHash;
            WordReferenceRow iEntry;
            final HandleSet unknownURL = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0);
            final HandleSet knownURL = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0);
            final ArrayList<String> wordhashes = new ArrayList<String>();
            int received = 0;
            int blocked = 0;
            int receivedURL = 0;
            final IndexCell<WordReference> cell = sb.indexSegments.termIndex(Segments.Process.DHTIN);
            int count = 0;
            while (it.hasNext()) {
                serverCore.checkInterruption();
                estring = it.next();
                count++;
                if (count > 1000) break; // protection against flooding

                // check if RWI entry is well-formed
                p = estring.indexOf('{',0);
                if (p < 0 || estring.indexOf("x=",0) < 0 || !(estring.indexOf("[B@",0) < 0)) {
                    blocked++;
                    continue;
                }
                wordHash = estring.substring(0, p);
                wordhashes.add(wordHash);
                iEntry = new WordReferenceRow(estring.substring(p));
                urlHash = iEntry.urlhash();

                // block blacklisted entries
                if ((blockBlacklist) && (Switchboard.urlBlacklist.hashInBlacklistedCache(Blacklist.BLACKLIST_DHT, urlHash))) {
                    if (Network.log.isFine()) Network.log.logFine("transferRWI: blocked blacklisted URLHash '" + ASCII.String(urlHash) + "' from peer " + otherPeerName);
                    blocked++;
View Full Code Here

        this.entry.setCol(col_lapp, Integer.parseInt(prop.getProperty("lapp", "0")));
        this.snippet = crypt.simpleDecode(prop.getProperty("snippet", ""), null);
        this.word = null;
        if (prop.containsKey("word")) throw new kelondroException("old database structure is not supported");
        if (prop.containsKey("wi")) {
            this.word = new WordReferenceVars(new WordReferenceRow(Base64Order.enhancedCoder.decodeString(prop.getProperty("wi", ""))));
        }
        this.ranking = 0;
        this.comp = null;
    }
View Full Code Here

        // iterate over all words of context text
        final Iterator<Map.Entry<String, Word>> i = condenser.words().entrySet().iterator();
        Map.Entry<String, Word> wentry;
        String word;
        final int len = (document == null) ? urlLength : document.dc_title().length();
        final WordReferenceRow ientry = new WordReferenceRow(url.hash(),
                                urlLength, urlComps, len,
                                condenser.RESULT_NUMB_WORDS,
                                condenser.RESULT_NUMB_SENTENCES,
                                urlModified.getTime(),
                                System.currentTimeMillis(),
                                UTF8.getBytes(language),
                                doctype,
                                outlinksSame, outlinksOther);
        Word wprop;
        byte[] wordhash;
        while (i.hasNext()) {
            wentry = i.next();
            word = wentry.getKey();
            wprop = wentry.getValue();
            assert (wprop.flags != null);
            ientry.setWord(wprop);
            wordhash = Word.word2hash(word);
            try {
                this.termIndex.add(wordhash, ientry);
            } catch (final Exception e) {
                Log.logException(e);
View Full Code Here

TOP

Related Classes of net.yacy.kelondro.data.word.WordReferenceRow$ExternalParser

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.