Package net.yacy.kelondro.data.meta

Examples of net.yacy.kelondro.data.meta.DigestURI


                        continue;
                    } catch (final RowSpaceExceededException e) {
                        Log.logException(e);
                        continue;
                    }
                    DigestURI url = null;
                    try {
                        url = new DigestURI(row.get("url", ""));
                    } catch (final MalformedURLException e) {
                        Log.logWarning("Load_RSS", "malformed url '" + row.get("url", "") + "': " + e.getMessage());
                        continue;
                    }
                    // load feeds concurrently to get better responsibility in web interface
                    new RSSLoader(sb, url).start();
                }
            }
        }

        if (post == null || (post != null && (
                post.containsKey("addSelectedFeedScheduler") ||
                post.containsKey("removeSelectedFeedsNewList") ||
                post.containsKey("removeAllFeedsNewList") ||
                post.containsKey("removeSelectedFeedsScheduler") ||
                post.containsKey("removeAllFeedsScheduler")
            ))) {
            try {
                // get list of primary keys from the api table with scheduled feed loading requests
                Tables.Row row;
                String messageurl;

                // check feeds
                int newc = 0, apic = 0;
                final Iterator<Row> plainIterator = sb.tables.iterator("rss");
                while (plainIterator.hasNext()) {
                    row = plainIterator.next();
                    if (row == null) continue;
                    messageurl = row.get("url", "");
                    if (messageurl.length() == 0) continue;
                    // get referrer
                    final DigestURI referrer = sb.getURL(Segments.Process.LOCALCRAWLING, row.get("referrer", "").getBytes());
                    // check if feed is registered in scheduler
                    final byte[] api_pk = row.get("api_pk");
                    final Row r = api_pk == null ? null : sb.tables.select("api", api_pk);
                    if (r != null && r.get("comment", "").matches(".*\\Q" + messageurl + "\\E.*")) {
                        // this is a recorded entry
                        final Date date_next_exec = r.get(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC, (Date) null);
                        prop.put("showscheduledfeeds_list_" + apic + "_pk", UTF8.String(row.getPK()));
                        prop.put("showscheduledfeeds_list_" + apic + "_count", apic);
                        prop.putXML("showscheduledfeeds_list_" + apic + "_rss", messageurl);
                        prop.putXML("showscheduledfeeds_list_" + apic + "_title", row.get("title", ""));
                        prop.putXML("showscheduledfeeds_list_" + apic + "_referrer", referrer == null ? "#" : referrer.toNormalform(true, false));
                        prop.put("showscheduledfeeds_list_" + apic + "_recording", DateFormat.getDateTimeInstance().format(row.get("recording_date", new Date())));
                        prop.put("showscheduledfeeds_list_" + apic + "_lastload", DateFormat.getDateTimeInstance().format(row.get("last_load_date", new Date())));
                        prop.put("showscheduledfeeds_list_" + apic + "_nextload", date_next_exec == null ? "" : DateFormat.getDateTimeInstance().format(date_next_exec));
                        prop.put("showscheduledfeeds_list_" + apic + "_lastcount", row.get("last_load_count", 0));
                        prop.put("showscheduledfeeds_list_" + apic + "_allcount", row.get("all_load_count", 0));
                        prop.put("showscheduledfeeds_list_" + apic + "_updperday", row.get("avg_upd_per_day", 0));
                        apic++;
                    } else {
                        // this is a new entry
                        prop.put("shownewfeeds_list_" + newc + "_pk", UTF8.String(row.getPK()));
                        prop.put("shownewfeeds_list_" + newc + "_count", newc);
                        prop.putXML("shownewfeeds_list_" + newc + "_rss", messageurl);
                        prop.putXML("shownewfeeds_list_" + newc + "_title", row.get("title", ""));
                        prop.putXML("shownewfeeds_list_" + newc + "_referrer", referrer == null ? "" : referrer.toNormalform(true, false));
                        prop.put("shownewfeeds_list_" + newc + "_recording", DateFormat.getDateTimeInstance().format(row.get("recording_date", new Date())));
                        newc++;
                    }
                    if (apic > 1000 || newc > 1000) break;
                }
                prop.put("showscheduledfeeds_list" , apic);
                prop.put("showscheduledfeeds_num", apic);
                prop.put("showscheduledfeeds", apic > 0 ? apic : 0);
                prop.put("shownewfeeds_list" , newc);
                prop.put("shownewfeeds_num", newc);
                prop.put("shownewfeeds", newc > 0 ? 1 : 0);
            } catch (final IOException e) {
                Log.logException(e);
            } catch (final RowSpaceExceededException e) {
                Log.logException(e);
            }

            return prop;
        }

        prop.put("url", post.get("url", ""));

        int repeat_time = post.getInt("repeat_time", -1);
        final String repeat_unit = post.get("repeat_unit", "seldays"); // selminutes, selhours, seldays
        if (!"on".equals(post.get("repeat", "off")) && repeat_time > 0) repeat_time = -1;

        boolean record_api = false;

        DigestURI url = null;
        try {
            url = post.containsKey("url") ? new DigestURI(post.get("url", "")) : null;
        } catch (final MalformedURLException e) {
            Log.logWarning("Load_RSS_p", "url not well-formed: '" + post.get("url", "") + "'");
        }

        // if we have an url then try to load the rss
        RSSReader rss = null;
        if (url != null) try {
            prop.put("url", url.toNormalform(true, false));
            final Response response = sb.loader.load(sb.loader.request(url, true, false), CacheStrategy.NOCACHE, Long.MAX_VALUE, true);
            final byte[] resource = response == null ? null : response.getContent();
            rss = resource == null ? null : RSSReader.parse(RSSFeed.DEFAULT_MAXSIZE, resource);
        } catch (final IOException e) {
            Log.logException(e);
        }

        // index all selected items: description only
        if (rss != null && post.containsKey("indexSelectedItemContent")) {
            final RSSFeed feed = rss.getFeed();
            loop: for (final Map.Entry<String, String> entry: post.entrySet()) {
                if (entry.getValue().startsWith("mark_")) try {
                    final RSSMessage message = feed.getMessage(entry.getValue().substring(5));
                    final DigestURI messageurl = new DigestURI(message.getLink());
                    if (RSSLoader.indexTriggered.containsKey(messageurl.hash())) continue loop;
                    if (sb.urlExists(Segments.Process.LOCALCRAWLING, messageurl.hash()) != null) continue loop;
                    sb.addToIndex(messageurl, null, null);
                    RSSLoader.indexTriggered.insertIfAbsent(messageurl.hash(), new Date());
                } catch (final IOException e) {
                    Log.logException(e);
                } catch (final Failure e) {
                    Log.logException(e);
                }
            }
        }

        if (rss != null && post.containsKey("indexAllItemContent")) {
            record_api = true;
            final RSSFeed feed = rss.getFeed();
            RSSLoader.indexAllRssFeed(sb, url, feed);
        }

        if (record_api && rss != null && rss.getFeed() != null && rss.getFeed().getChannel() != null) {
            // record API action
            RSSLoader.recordAPI(sb, post.get(WorkTables.TABLE_API_COL_APICALL_PK, null), url, rss.getFeed(), repeat_time, repeat_unit);
        }

        // show items from rss
        if (rss != null) {
            prop.put("showitems", 1);
            final RSSFeed feed = rss.getFeed();
            final RSSMessage channel = feed.getChannel();
            prop.putHTML("showitems_title", channel == null ? "" : channel.getTitle());
            String author = channel == null ? "" : channel.getAuthor();
            if (author == null || author.length() == 0) author = channel == null ? "" : channel.getCopyright();
            Date pubDate = channel == null ? null : channel.getPubDate();
            prop.putHTML("showitems_author", author == null ? "" : author);
            prop.putHTML("showitems_description", channel == null ? "" : channel.getDescription());
            prop.putHTML("showitems_language", channel == null ? "" : channel.getLanguage());
            prop.putHTML("showitems_date", (pubDate == null) ? "" : DateFormat.getDateTimeInstance().format(pubDate));
            prop.putHTML("showitems_ttl", channel == null ? "" : channel.getTTL());
            prop.putHTML("showitems_docs", channel == null ? "" : channel.getDocs());

            int i = 0;
            for (final Hit item: feed) {
                try {
                    final DigestURI messageurl = new DigestURI(item.getLink());
                    author = item.getAuthor();
                    if (author == null) author = item.getCopyright();
                    pubDate = item.getPubDate();
                    prop.put("showitems_item_" + i + "_state", sb.urlExists(Segments.Process.LOCALCRAWLING, messageurl.hash()) != null ? 2 : RSSLoader.indexTriggered.containsKey(messageurl.hash()) ? 1 : 0);
                    prop.put("showitems_item_" + i + "_state_count", i);
                    prop.putHTML("showitems_item_" + i + "_state_guid", item.getGuid());
                    prop.putHTML("showitems_item_" + i + "_author", author == null ? "" : author);
                    prop.putHTML("showitems_item_" + i + "_title", item.getTitle());
                    prop.putHTML("showitems_item_" + i + "_link", messageurl.toNormalform(false, false));
                    prop.putHTML("showitems_item_" + i + "_description", item.getDescription());
                    prop.putHTML("showitems_item_" + i + "_language", item.getLanguage());
                    prop.putHTML("showitems_item_" + i + "_date", (pubDate == null) ? "" : DateFormat.getDateTimeInstance().format(pubDate));
                    i++;
                } catch (final MalformedURLException e) {
View Full Code Here


            int maxCount = Math.min(100, post.getInt("count", 10));
            long maxTime = Math.min(20000, Math.max(1000, post.getInt("time", 10000)));
            long timeout = System.currentTimeMillis() + maxTime;
            int c = 0;
            Request entry;
            DigestURI referrer;
            while ((maxCount > 0) &&
                   (System.currentTimeMillis() < timeout) &&
                   (sb.crawlQueues.noticeURL.stackSize(stackType) > 0)) {
                try {
                    entry = sb.crawlQueues.noticeURL.pop(stackType, false, sb.crawler);
                } catch (final IOException e) {
                    break;
                }
                if (entry == null) break;
               
                // find referrer, if there is one
                referrer = sb.getURL(Segments.Process.PUBLIC, entry.referrerhash());
               
                // place url to notice-url db
                sb.crawlQueues.delegatedURL.push(
                                entry,
                                sb.peers.mySeed().hash.getBytes(),
                                new Date(),
                                0,
                                FailCategory.FINAL_PROCESS_CONTEXT,
                                "client=____________",
                                -1);
               
                // create RSS entry
                prop.put("item_" + c + "_title", "");
                prop.putXML("item_" + c + "_link", entry.url().toNormalform(true, false));
                prop.putXML("item_" + c + "_referrer", (referrer == null) ? "" : referrer.toNormalform(true, false));
                prop.putXML("item_" + c + "_description", entry.name());
                prop.put("item_" + c + "_author", "");
                prop.put("item_" + c + "_pubDate", GenericFormatter.SHORT_SECOND_FORMATTER.format(entry.appdate()));
                prop.put("item_" + c + "_guid", entry.url().hash());
                c++;
                maxCount--;
            }
            prop.put("item", c);
            prop.putXML("response", "ok");
        }
       
        if (post.get("call", "").equals("urlhashlist")) {
            // retrieve a list of urls from the LURL-db by a given list of url hashes
            final String urlhashes = post.get("hashes", "");
            if (urlhashes.length() % 12 != 0) return prop;
            final int count = urlhashes.length() / 12;
          int c = 0;
          URIMetadataRow entry;
          URIMetadataRow.Components metadata;
            DigestURI referrer;
            for (int i = 0; i < count; i++) {
                entry = sb.indexSegments.urlMetadata(Segments.Process.PUBLIC).load(ASCII.getBytes(urlhashes.substring(12 * i, 12 * (i + 1))));
                if (entry == null) continue;
                // find referrer, if there is one
                referrer = sb.getURL(Segments.Process.PUBLIC, entry.referrerHash());
                // create RSS entry
                metadata = entry.metadata();
                prop.put("item_" + c + "_title", metadata.dc_title());
                prop.putXML("item_" + c + "_link", metadata.url().toNormalform(true, false));
                prop.putXML("item_" + c + "_referrer", (referrer == null) ? "" : referrer.toNormalform(true, false));
                prop.putXML("item_" + c + "_description", metadata.dc_title());
                prop.put("item_" + c + "_author", metadata.dc_creator());
                prop.put("item_" + c + "_pubDate", GenericFormatter.SHORT_SECOND_FORMATTER.format(entry.moddate()));
                prop.put("item_" + c + "_guid", ASCII.String(entry.hash()));
                c++;
View Full Code Here

            if (!urlstring.startsWith("http://") &&
                    !urlstring.startsWith("https://") &&
                    !urlstring.startsWith("ftp://") &&
                    !urlstring.startsWith("smb://") &&
                    !urlstring.startsWith("file://")) urlstring = "http://" + urlstring;
            DigestURI testurl = null;
            try {
                testurl = new DigestURI(urlstring);
            } catch (final MalformedURLException e) { testurl = null; }
            if(testurl != null) {
                prop.putHTML("url",testurl.toString());
                prop.putHTML("testlist_url",testurl.toString());
                if(Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_CRAWLER, testurl))
                        prop.put("testlist_listedincrawler", "1");
                if(Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_DHT, testurl))
                        prop.put("testlist_listedindht", "1");
                if(Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_NEWS, testurl))
View Full Code Here

            PrintTool.print(graphPicture, width / 2, height / 2 + 16, 0, "START A WEB CRAWL TO OBTAIN STRUCTURE DATA.", 0);
        } else {
            // find start hash
            String hash = null;
            if (host != null && host.length() > 0) try {
                hash = ASCII.String((new DigestURI("http://" + host)).hash(), 6, 6);
            } catch (final MalformedURLException e) {Log.logException(e);}
            //assert (sb.webStructure.outgoingReferences(hash) != null);

            // recursively find domains, up to a specific depth
            final GraphPlotter graph = new GraphPlotter();
View Full Code Here

                //load language file from URL
            } else if (post.containsKey("url")){
                final String url = post.get("url");
                Iterator<String> it;
                try {
                    final DigestURI u = new DigestURI(url);
                    it = FileUtils.strings(u.get(ClientIdentification.getUserAgent(), 10000));
                } catch(final IOException e) {
                    prop.put("status", "1");//unable to get url
                    prop.put("status_url", url);
                    return prop;
                }
View Full Code Here

            if (post.containsKey("downloadRelease")) {
                // download a release
                final String release = post.get("releasedownload", "");
                if (!release.isEmpty()) {
                    try {
                  yacyRelease versionToDownload = new yacyRelease(new DigestURI(release));
                 
                  // replace this version with version which contains public key
                  yacyRelease.DevAndMainVersions allReleases = yacyRelease.allReleases(false, false);
                  final Set<yacyRelease> mostReleases = versionToDownload.isMainRelease() ? allReleases.main : allReleases.dev;
                  for (final yacyRelease rel : mostReleases) {
View Full Code Here

            prop.put("reload", 0);
        }

        if (post.containsKey("urldelete")) {
            try {
                urlhash = ASCII.String((new DigestURI(urlstring)).hash());
            } catch (final MalformedURLException e) {
                urlhash = null;
            }
            if ((urlhash == null) || (urlstring == null)) {
                prop.put("result", "No input given; nothing deleted.");
            } else {
                sb.urlRemove(segment, urlhash.getBytes());
                prop.putHTML("result", "Removed URL " + urlstring);
            }
            prop.put("lurlexport", 0);
            prop.put("reload", 0);
        }

        if (post.containsKey("urlstringsearch")) {
            try {
                final DigestURI url = new DigestURI(urlstring);
                urlhash = ASCII.String(url.hash());
                prop.put("urlhash", urlhash);
                final URIMetadataRow entry = segment.urlMetadata().load(ASCII.getBytes(urlhash));
                if (entry == null) {
                    prop.putHTML("result", "No Entry for URL " + url.toNormalform(true, true));
                    prop.putHTML("urlstring", urlstring);
                    prop.put("urlhash", "");
                } else {
                    prop.putAll(genUrlProfile(segment, entry, urlhash));
                    prop.put("statistics", 0);
View Full Code Here

        String urlhash = post.get("urlhash", "").trim();
        if (urlstring.length() == 0 && urlhash.length() == 0) return prop;

        if (urlstring.length() > 0 && urlhash.length() == 0) {
            try {
                DigestURI url = new DigestURI(urlstring);
                urlhash = ASCII.String(url.hash());
            } catch (MalformedURLException e) {
                Log.logException(e);
            }
        }
        if (urlhash == null || urlhash.length() == 0) return prop;
View Full Code Here

        final Switchboard sb = (Switchboard) env;
        String about = post == null ? null : post.get("about", null);
        prop.put("out", 0);
        prop.put("in", 0);
        if (about != null) {
            DigestURI url = null;
            if (about.length() > 6) {
                try {
                    url = new DigestURI(about);
                    about = ASCII.String(url.hash(), 6, 6);
                } catch (MalformedURLException e) {
                    about = null;
                }
            }
            if (url != null && about != null) {
View Full Code Here

               
                if (downloadURLOld != null) {
                    // download the blacklist
                    try {
                        // get List
                        DigestURI u = new DigestURI(downloadURLOld);

                        otherBlacklist = FileUtils.strings(u.get(ClientIdentification.getUserAgent(), 10000));
                    } catch (final Exception e) {
                        prop.put("status", STATUS_PEER_UNKNOWN);
                        prop.putHTML("status_name", hash);
                        prop.put("page", "1");
                    }
                }
            } else if (post.containsKey("url")) {
                /* ======================================================
                 * Download the blacklist from URL
                 * ====================================================== */
               
                final String downloadURL = post.get("url");
                prop.putHTML("page_source", downloadURL);

                try {
                    final DigestURI u = new DigestURI(downloadURL);
                    otherBlacklist = FileUtils.strings(u.get(ClientIdentification.getUserAgent(), 10000));
                } catch (final Exception e) {
                    prop.put("status", STATUS_URL_PROBLEM);
                    prop.putHTML("status_address",downloadURL);
                    prop.put("page", "1");
                }
View Full Code Here

TOP

Related Classes of net.yacy.kelondro.data.meta.DigestURI

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.