Package net.yacy.kelondro.data.meta

Examples of net.yacy.kelondro.data.meta.DigestURI


            NodeList sitemapNodes = doc.getElementsByTagName("sitemap");
            for (int i = 0; i < sitemapNodes.getLength(); i++) {
                String url = new SitemapEntry((Element) sitemapNodes.item(i)).url();
                if (url != null && url.length() > 0) {
                    try {
                        final SitemapReader r = parse(new DigestURI(url));
                        for (final URLEntry ue: r) this.add(ue);
                    } catch (IOException e) {}
                }
            }
            final NodeList urlEntryNodes = doc.getElementsByTagName("url");
View Full Code Here


        this.message = "import initialized";
        // fix start url
        String url = ResumptionToken.truncatedURL(source);
        if (!url.endsWith("?")) url = url + "?";
        try {
            this.source = new DigestURI(url + "verb=ListRecords&metadataPrefix=oai_dc");
        } catch (MalformedURLException e) {
            // this should never happen
            Log.logException(e);
        }
        startedJobs.put(this, N);
View Full Code Here

        if (token.length() == 0) throw new IOException("end of resumption reached - token.length() == 0");
        String url = truncatedURL(this.source);
       
        // encoded state
        if (token.indexOf("from=") >= 0) {
            return new DigestURI(url + "verb=ListRecords&" + token);
        }
       
        // cached result set
        // can be detected with given expiration date
        Date expiration = getExpirationDate();
        if (expiration != null) {
            if (expiration.before(new Date())) throw new IOException("the resumption is expired at " + ISO8601Formatter.FORMATTER.format(expiration) + " (now: " + ISO8601Formatter.FORMATTER.format());
            // the resumption token is still fresh
        }
        String u = url + "verb=ListRecords&resumptionToken=" + escape(token);
        return new DigestURI(u);
    }
View Full Code Here

            final boolean pre,
            final int snippetMaxLength,
            final int maxDocLen,
            final boolean reindexing) {
        // heise = "0OQUNU3JSs05"
        final DigestURI url = comp.url();
        if (queryhashes.isEmpty()) {
            //System.out.println("found no queryhashes for URL retrieve " + url);
            init(url.hash(), null, ResultClass.ERROR_NO_HASH_GIVEN, "no query hashes given");
            return;
        }

        // try to get snippet from snippetCache
        ResultClass source = ResultClass.SOURCE_CACHE;
        final String wordhashes = yacySearch.set2string(queryhashes);
        final String urls = ASCII.String(url.hash());
        String snippetLine = snippetsCache.get(wordhashes, urls);
        if (snippetLine != null) {
            // found the snippet
            init(url.hash(), snippetLine, source, null);
            return;
        }


        /* ===========================================================================
         * LOAD RESOURCE DATA
         * =========================================================================== */
        // if the snippet is not in the cache, we can try to get it from the htcache
        final Response response;
        try {
            // first try to get the snippet from metadata
            String loc;
            final boolean noCacheUsage = url.isFile() || url.isSMB() || cacheStrategy == null;
            if (containsAllHashes(loc = comp.dc_title(), queryhashes)) {
                // try to create the snippet from information given in the url itself
                init(url.hash(), loc, ResultClass.SOURCE_METADATA, null);
                return;
            } else if (containsAllHashes(loc = comp.dc_creator(), queryhashes)) {
                // try to create the snippet from information given in the creator metadata
                init(url.hash(), loc, ResultClass.SOURCE_METADATA, null);
                return;
            } else if (containsAllHashes(loc = comp.dc_subject(), queryhashes)) {
                // try to create the snippet from information given in the subject metadata
                init(url.hash(), loc, ResultClass.SOURCE_METADATA, null);
                return;
            } else if (containsAllHashes(loc = comp.url().toNormalform(true, true).replace('-', ' '), queryhashes)) {
                // try to create the snippet from information given in the url
                init(url.hash(), loc, ResultClass.SOURCE_METADATA, null);
                return;
            } else {
                // try to load the resource from the cache
                response = loader == null ? null : loader.load(loader.request(url, true, reindexing), noCacheUsage ? CacheStrategy.NOCACHE : cacheStrategy, Long.MAX_VALUE, true);
                if (response == null) {
                    // in case that we did not get any result we can still return a success when we are not allowed to go online
                    if (cacheStrategy == null || cacheStrategy.mustBeOffline()) {
                        init(url.hash(), null, ResultClass.ERROR_SOURCE_LOADING, "omitted network load (not allowed), no cache entry");
                        return;
                    }

                    // if it is still not available, report an error
                    init(url.hash(), null, ResultClass.ERROR_RESOURCE_LOADING, "error loading resource from net, no cache entry");
                    return;
                } else {
                    // place entry on indexing queue
                    Switchboard.getSwitchboard().toIndexer(response);
                    source = ResultClass.SOURCE_WEB;
                }
            }
        } catch (final Exception e) {
            //Log.logException(e);
            init(url.hash(), null, ResultClass.ERROR_SOURCE_LOADING, "error loading resource: " + e.getMessage());
            return;
        }

        /* ===========================================================================
         * PARSE RESOURCE
         * =========================================================================== */
        Document document = null;
        try {
            document = Document.mergeDocuments(response.url(), response.getMimeType(), response.parse());
        } catch (final Parser.Failure e) {
            init(url.hash(), null, ResultClass.ERROR_PARSER_FAILED, e.getMessage()); // cannot be parsed
            return;
        }
        if (document == null) {
            init(url.hash(), null, ResultClass.ERROR_PARSER_FAILED, "parser error/failed"); // cannot be parsed
            return;
        }

        /* ===========================================================================
         * COMPUTE SNIPPET
         * =========================================================================== */
        // we have found a parseable non-empty file: use the lines

        // compute snippet from text
        final Collection<StringBuilder> sentences = document.getSentences(pre);
        if (sentences == null) {
            init(url.hash(), null, ResultClass.ERROR_PARSER_NO_LINES, "parser returned no sentences");
            return;
        }
        final SnippetExtractor tsr;
        String textline = null;
        HandleSet remainingHashes = queryhashes;
        try {
            tsr = new SnippetExtractor(sentences, queryhashes, snippetMaxLength);
            textline = tsr.getSnippet();
            remainingHashes =  tsr.getRemainingWords();
        } catch (final UnsupportedOperationException e) {
            init(url.hash(), null, ResultClass.ERROR_NO_MATCH, "no matching snippet found");
            return;
        }

        // compute snippet from media
        //String audioline = computeMediaSnippet(document.getAudiolinks(), queryhashes);
        //String videoline = computeMediaSnippet(document.getVideolinks(), queryhashes);
        //String appline = computeMediaSnippet(document.getApplinks(), queryhashes);
        //String hrefline = computeMediaSnippet(document.getAnchors(), queryhashes);
        //String imageline = computeMediaSnippet(document.getAudiolinks(), queryhashes);

        snippetLine = "";
        //if (audioline != null) line += (line.length() == 0) ? audioline : "<br />" + audioline;
        //if (videoline != null) line += (line.length() == 0) ? videoline : "<br />" + videoline;
        //if (appline   != null) line += (line.length() == 0) ? appline   : "<br />" + appline;
        //if (hrefline  != null) line += (line.length() == 0) ? hrefline  : "<br />" + hrefline;
        if (textline  != null) snippetLine += (snippetLine.length() == 0) ? textline  : "<br />" + textline;

        if (snippetLine == null || !remainingHashes.isEmpty()) {
            init(url.hash(), null, ResultClass.ERROR_NO_MATCH, "no matching snippet found");
            return;
        }
        if (snippetLine.length() > snippetMaxLength) snippetLine = snippetLine.substring(0, snippetMaxLength);

        // finally store this snippet in our own cache
        snippetsCache.put(wordhashes, urls, snippetLine);

        document.close();
        init(url.hash(), snippetLine, source, null);
    }
View Full Code Here

        // return the license key
        return license;
    }
   
    public DigestURI releaseLicense(final String license) {
        DigestURI url = null;
        url = permissions.remove(license);
        return url;
    }
View Full Code Here

                    urlStub = t.substring(p + 6, q);
                    if (!urlStub.endsWith("/")) {
                        q = urlStub.lastIndexOf('/');
                        if (q > 0) urlStub = urlStub.substring(0, q + 1);
                    }
                    DigestURI uri = new DigestURI(urlStub);
                    hostport = uri.getHost();
                    if (uri.getPort() != 80) hostport += ":" + uri.getPort();
                    continue;
                }
                if (t.indexOf(pagestart) >= 0) {
                    page = true;
                    continue;
View Full Code Here

                throw new IOException(e.getMessage());
            }
        }
        public void genDocument() throws Parser.Failure {
            try {
        url = new DigestURI(urlStub + title);
        Document[] parsed = TextParser.parseSource(url, "text/html", "UTF-8", UTF8.getBytes(html));
        document = Document.mergeDocuments(url, "text/html", parsed);
        // the wiki parser is not able to find the proper title in the source text, so it must be set here
        document.setTitle(title);
      } catch (MalformedURLException e1) {
View Full Code Here

            entry = new HashMap<String, String>();
            if (!url.toLowerCase().startsWith("http://") && !url.toLowerCase().startsWith("https://")) {
                url="http://"+url;
            }
            try {
                this.urlHash = ASCII.String((new DigestURI(url)).hash());
            } catch (final MalformedURLException e) {
                this.urlHash = null;
            }
            entry.put(BOOKMARK_URL, url);
            this.timestamp=System.currentTimeMillis();
View Full Code Here

           
            removeBookmark(this.urlHash); //prevent empty tags
        }
       
        public Bookmark(final Map<String, String> map) throws MalformedURLException {
            this(ASCII.String((new DigestURI(map.get(BOOKMARK_URL))).hash()), map);
        }
View Full Code Here

    public static Map<String, String> getListFriends(final LoaderDispatcher loader) {
        final Map<String, String> map = new TreeMap<String, String>();
        Map<String, String> m;
        for (final Map.Entry<String, File> oaiFriend: listFriends.entrySet()) try {
            if (!oaiFriend.getValue().exists()) {
                final Response response = loader == null ? null : loader.load(loader.request(new DigestURI(oaiFriend.getKey()), false, true), CacheStrategy.NOCACHE, Long.MAX_VALUE, true);
                if (response != null) FileUtils.copy(response.getContent(), oaiFriend.getValue());
            }

            if (oaiFriend.getValue().exists()) {
                final byte[] b = FileUtils.read(oaiFriend.getValue());
View Full Code Here

TOP

Related Classes of net.yacy.kelondro.data.meta.DigestURI

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.