Package net.yacy.kelondro.data.meta

Examples of net.yacy.kelondro.data.meta.DigestURI


    }

    public SolrInputDocument yacy2solr(final String id, final ResponseHeader header, final Document yacydoc) {
        // we user the SolrCell design as index scheme
        final SolrInputDocument solrdoc = new SolrInputDocument();
        final DigestURI digestURI = new DigestURI(yacydoc.dc_source());
        addSolr(solrdoc, "failreason_t", ""); // overwrite a possible fail reason (in case that there was a fail reason before)
        addSolr(solrdoc, "id", id);
        addSolr(solrdoc, "sku", digestURI.toNormalform(true, false), 3.0f);
        final InetAddress address = Domains.dnsResolve(digestURI.getHost());
        if (address != null) addSolr(solrdoc, "ip_s", address.getHostAddress());
        if (digestURI.getHost() != null) addSolr(solrdoc, "host_s", digestURI.getHost());
        addSolr(solrdoc, "title", yacydoc.dc_title());
        addSolr(solrdoc, "author", yacydoc.dc_creator());
        addSolr(solrdoc, "description", yacydoc.dc_description());
        addSolr(solrdoc, "content_type", yacydoc.dc_format());
        addSolr(solrdoc, "last_modified", header.lastModified());
        addSolr(solrdoc, "keywords", yacydoc.dc_subject(' '));
        final String content = UTF8.String(yacydoc.getTextBytes());
        addSolr(solrdoc, "text_t", content);
        if (isEmpty() || contains("wordcount_i")) {
            final int contentwc = content.split(" ").length;
            addSolr(solrdoc, "wordcount_i", contentwc);
        }

        // path elements of link
        final String path = digestURI.getPath();
        if (path != null && (isEmpty() || contains("attr_paths"))) {
            final String[] paths = path.split("/");
            if (paths.length > 0) addSolr(solrdoc, "attr_paths", paths);
        }
View Full Code Here


        if (record.category() == null) return;
        if (!(categories.contains(record.category()))) return;
        if (record.created().getTime() == 0) return;
        final Map<String, String> attributes = record.attributes();
        if (attributes.containsKey("url")){
            if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_NEWS, new DigestURI(attributes.get("url")))){
                System.out.println("DEBUG: ignored news-entry url blacklisted: " + attributes.get("url"));
                return;
            }
        }
        if (attributes.containsKey("startURL")){
            if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_NEWS, new DigestURI(attributes.get("startURL")))){
                System.out.println("DEBUG: ignored news-entry url blacklisted: " + attributes.get("startURL"));
                return;
            }
        }
       
View Full Code Here

                log.logWarning("SaveSeedList: " + errorMsg);
                return errorMsg;
            }

            // ensure that the seed file url is configured properly
            DigestURI seedURL;
            try {
                final String seedURLStr = sb.peers.mySeed().get(yacySeed.SEEDLISTURL, "");
                if (seedURLStr.length() == 0) { throw new MalformedURLException("The seed-file url must not be empty."); }
                if (!(
                        seedURLStr.toLowerCase().startsWith("http://") ||
                        seedURLStr.toLowerCase().startsWith("https://")
                )) {
                    throw new MalformedURLException("Unsupported protocol.");
                }
                seedURL = new DigestURI(seedURLStr);
            } catch (final MalformedURLException e) {
                final String errorMsg = "Malformed seed file URL '" + sb.peers.mySeed().get(yacySeed.SEEDLISTURL, "") + "'. " + e.getMessage();
                log.logWarning("SaveSeedList: " + errorMsg);
                return errorMsg;
            }
View Full Code Here

                this.queue.put(start);
            } catch (InterruptedException e) {}
            return;
        }
        String[] s = start.list();
        DigestURI w;
        for (String t: s) {
            try {
                w = new DigestURI(start, t);
                if (w.canRead() && !w.isHidden()) {
                    if (w.isDirectory()) {
                        addConcurrent(w);
                    } else {
                        try {
                            this.queue.put(w);
                        } catch (InterruptedException e) {}
View Full Code Here

                System.out.println("not indexed " + f.toString() + ": " + failReason);
            }
        };
        try {
            if (args[1].equals("add")) {
                DigestURI f = new DigestURI(args[2]);
                DocumentIndex di = new DocumentIndex(segmentPath, callback, 100000);
                di.addConcurrent(f);
                di.close();
            } else {
                String query = "";
                for (int i = 2; i < args.length; i++) query += args[i];
                query.trim();
                DocumentIndex di = new DocumentIndex(segmentPath, callback, 100000);
                ArrayList<DigestURI> results = di.find(query, 100);
                for (DigestURI f: results) {
                    if (f != null) System.out.println(f.toString());
                }
                di.close();
            }
        } catch (IOException e) {
            Log.logException(e);
View Full Code Here

            super(workerThreadGroup, "query-" + count);
        }
       
        @Override
        public void run() {
            DigestURI f;
            URIMetadataRow resultRow;
            try {
                while ((f = queue.take()) != poison) try {
                    resultRow = add(f);
                    if (callback != null) {
View Full Code Here

     * @return a byte[] hash for the input URL string
     * @throws MalformedURLException
     * @see net.yacy.kelondro.data.meta.DigestURI.DigestURI(String url, byte[] hash).hash()
     */
    public final static byte[] getBookmarkId(String url) throws MalformedURLException {
    return (new DigestURI(url, null)).hash();
    }
View Full Code Here

        if (urls.length > 1) {
            // select one that fits
            u = bestU(urls);
        }
        try {
            return new DigestURI(u);
        } catch (MalformedURLException e) {
            if (useRelationAsAlternative) {
                DigestURI relation = this.getRelation();
                if (relation != null) return relation;
                Log.logWarning("DCEntry", "getIdentifier: url is bad, relation also: " + e.getMessage());
            }
            Log.logWarning("DCEntry", "getIdentifier: url is bad: " + e.getMessage());
            return null;
View Full Code Here

        if (urls.length > 1) {
            // select one that fits
            u = bestU(urls);
        }
        try {
            return new DigestURI(u);
        } catch (MalformedURLException e) {
            Log.logWarning("DCEntry", "getRelation: url is bad: " + e.getMessage());
            return null;
        }
    }
View Full Code Here

        else if (mediatype == ContentDomain.APP) media = document.getApplinks();
        if (media == null) return null;

        final Iterator<Map.Entry<MultiProtocolURI, String>> i = media.entrySet().iterator();
        Map.Entry<MultiProtocolURI, String> entry;
        DigestURI url;
        String desc;
        final List<MediaSnippet> result = new ArrayList<MediaSnippet>();
        while (i.hasNext()) {
            entry = i.next();
            url = new DigestURI(entry.getKey());
            desc = entry.getValue();
            final int ranking = removeAppearanceHashes(url.toNormalform(false, false), queryhashes).size() +
                           removeAppearanceHashes(desc, queryhashes).size();
            if (ranking < 2 * queryhashes.size()) {
                result.add(new MediaSnippet(mediatype, url, MimeTable.url2mime(url), desc, document.getTextLength(), null, ranking, source));
            }
        }
View Full Code Here

TOP

Related Classes of net.yacy.kelondro.data.meta.DigestURI

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.