Package net.yacy.cora.document

Examples of net.yacy.cora.document.RSSFeed$messageIterator


        if (post != null) {
            if (post.containsKey("retrieve")) {
                final String peerhash = post.get("peer", null);
                final yacySeed seed = (peerhash == null) ? null : sb.peers.getConnected(peerhash);
                final RSSFeed feed = (seed == null) ? null : yacyClient.queryRemoteCrawlURLs(sb.peers, seed, 20, 60000);
                if (feed != null) {
                    for (final Hit item: feed) {
                        //System.out.println("URL=" + item.getLink() + ", desc=" + item.getDescription() + ", pubDate=" + item.getPubDate());
                       
                        // put url on remote crawl stack
View Full Code Here


            Log.logException(e);
        }

        // index all selected items: description only
        if (rss != null && post.containsKey("indexSelectedItemContent")) {
            final RSSFeed feed = rss.getFeed();
            loop: for (final Map.Entry<String, String> entry: post.entrySet()) {
                if (entry.getValue().startsWith("mark_")) try {
                    final RSSMessage message = feed.getMessage(entry.getValue().substring(5));
                    final DigestURI messageurl = new DigestURI(message.getLink());
                    if (RSSLoader.indexTriggered.containsKey(messageurl.hash())) continue loop;
                    if (sb.urlExists(Segments.Process.LOCALCRAWLING, messageurl.hash()) != null) continue loop;
                    sb.addToIndex(messageurl, null, null);
                    RSSLoader.indexTriggered.insertIfAbsent(messageurl.hash(), new Date());
                } catch (final IOException e) {
                    Log.logException(e);
                } catch (final Failure e) {
                    Log.logException(e);
                }
            }
        }

        if (rss != null && post.containsKey("indexAllItemContent")) {
            record_api = true;
            final RSSFeed feed = rss.getFeed();
            RSSLoader.indexAllRssFeed(sb, url, feed);
        }

        if (record_api && rss != null && rss.getFeed() != null && rss.getFeed().getChannel() != null) {
            // record API action
            RSSLoader.recordAPI(sb, post.get(WorkTables.TABLE_API_COL_APICALL_PK, null), url, rss.getFeed(), repeat_time, repeat_unit);
        }

        // show items from rss
        if (rss != null) {
            prop.put("showitems", 1);
            final RSSFeed feed = rss.getFeed();
            final RSSMessage channel = feed.getChannel();
            prop.putHTML("showitems_title", channel == null ? "" : channel.getTitle());
            String author = channel == null ? "" : channel.getAuthor();
            if (author == null || author.length() == 0) author = channel == null ? "" : channel.getCopyright();
            Date pubDate = channel == null ? null : channel.getPubDate();
            prop.putHTML("showitems_author", author == null ? "" : author);
View Full Code Here

                messageMaxCount--;
                continue channelIteration;
            }

            // read the channel
            final RSSFeed feed = yacyChannel.channels(channel);
            if (feed == null || feed.isEmpty()) continue channelIteration;

            RSSMessage message = feed.getChannel();
            if (message != null) {
                prop.putXML("channel_title", message.getTitle());
                prop.putXML("channel_description", message.getDescription());
                prop.put("channel_pubDate", message.getPubDate());
            }
            while (messageMaxCount > 0 && !feed.isEmpty()) {
                message = feed.pollMessage();
                if (message == null) continue;

                // create RSS entry
                prop.putXML("item_" + messageCount + "_title", channelName + ": " + message.getTitle());
                prop.putXML("item_" + messageCount + "_description", message.getDescription());
View Full Code Here

        this.sb = sb;
        sb.setConfig("yacyStatus", "");
       
        // create a peer news channel
        final RSSFeed peernews = yacyChannel.channels(yacyChannel.PEERNEWS);
        peernews.addMessage(new RSSMessage("YaCy started", "", ""));
       
        // ensure that correct IP is used
        final String staticIP = sb.getConfig("staticIP", "");
        if (staticIP.length() != 0 && yacySeed.isProperIP(staticIP) == null) {
            serverCore.useStaticIP = true;
View Full Code Here

                target.put(yacySeed.RCOUNT, "0");
                seedDB.update(target.hash, target); // overwrite number of remote-available number to avoid that this peer is called again (until update is done by peer ping)
                //Log.logException(e);
                return null;
            }
            final RSSFeed feed = reader.getFeed();
            if (feed == null) {
                // case where the rss reader does not understand the content
                yacyCore.log.logWarning("yacyClient.queryRemoteCrawlURLs failed asking peer '" + target.getName() + "': probably bad response from remote peer (2)");
                //System.out.println("***DEBUG*** rss input = " + UTF8.String(result));
                target.put(yacySeed.RCOUNT, "0");
                seedDB.update(target.hash, target); // overwrite number of remote-available number to avoid that this peer is called again (until update is done by peer ping)
                //Log.logException(e);
                return null;
            }
            // update number of remotely available links in seed
            target.put(yacySeed.RCOUNT, Integer.toString(Math.max(0, targetCount - feed.size())));
            seedDB.update(target.hash, target);
            return feed;
        } catch (final IOException e) {
            yacyCore.log.logWarning("yacyClient.queryRemoteCrawlURLs error asking peer '" + target.getName() + "':" + e.toString());
            return null;
View Full Code Here

                    queueEntry.size(),
                    document,
                    condenser,
                    searchEvent,
                    sourceName);
            final RSSFeed feed = yacyChannel.channels(queueEntry.initiator() == null ? yacyChannel.PROXY : Base64Order.enhancedCoder.equal(queueEntry.initiator(), ASCII.getBytes(this.peers.mySeed().hash)) ? yacyChannel.LOCALINDEXING : yacyChannel.REMOTEINDEXING);
            feed.addMessage(new RSSMessage("Indexed web page", dc_title, queueEntry.url().toNormalform(true, false)));
        } catch (final IOException e) {
            //if (this.log.isFine()) log.logFine("Not Indexed Resource '" + queueEntry.url().toNormalform(false, true) + "': process case=" + processCase);
            addURLtoErrorDB(queueEntry.url(), (referrerURL == null) ? null : referrerURL.hash(), queueEntry.initiator(), dc_title, FailCategory.FINAL_LOAD_CONTEXT, "error storing url: " + queueEntry.url().toNormalform(false, true) + "': process case=" + processCase + ", error = " + e.getMessage());
            return;
        }
View Full Code Here

            rssReader = new RSSReader(RSSFeed.DEFAULT_MAXSIZE, source, RSSReader.Type.none);
        } catch (IOException e) {
            throw new Parser.Failure("Load error:" + e.getMessage(), url, e);
        }
       
        final RSSFeed feed = rssReader.getFeed();
        //RSSMessage channel = feed.getChannel();
        final List<Document> docs = new ArrayList<Document>();
        MultiProtocolURI uri;
        Set<String> languages;
        Map<MultiProtocolURI, Properties> anchors;
View Full Code Here

            }
        }
        if (seed == null) return false;

        // we know a peer which should provide remote crawl entries. load them now.
        final RSSFeed feed = yacyClient.queryRemoteCrawlURLs(this.sb.peers, seed, 60, 8000);
        if (feed == null || feed.isEmpty()) {
            // something is wrong with this provider. To prevent that we get not stuck with this peer
            // we remove it from the peer list
            this.sb.peers.peerActions.peerDeparture(seed, "no results from provided remote crawls");
            // try again and ask another peer
            return remoteCrawlLoaderJob();
View Full Code Here

        }
        if (rss == null) {
            Log.logWarning("Load_RSS", "no rss for url " + this.urlf.toNormalform(true, false));
            return;
        }
        final RSSFeed feed = rss.getFeed();
        indexAllRssFeed(this.sb, this.urlf, feed);

        // add the feed also to the scheduler
        recordAPI(this.sb, null, this.urlf, feed, 7, "seldays");
    }
View Full Code Here

                RSSMessage message;
                int maximumRecords = maximumRecordsInit;
                long timeout = timeoutInit;
                mainloop: while (timeout > 0 && maximumRecords > 0) {
                    final long st = System.currentTimeMillis();
                    RSSFeed feed;
                    try {
                        feed = loadSRURSS(urlBase, query, timeout, startRecord, recordsPerSession, verify, global, userAgent);
                    } catch (final IOException e1) {
                        //e1.printStackTrace();
                        break mainloop;
                    }
                    if (feed == null || feed.isEmpty()) break mainloop;
                    maximumRecords -= feed.size();
                    innerloop: while (!feed.isEmpty()) {
                        message = feed.pollMessage();
                        if (message == null) break innerloop;
                        try {
                            queue.put(message);
                        } catch (final InterruptedException e) {
                            e.printStackTrace();
View Full Code Here

TOP

Related Classes of net.yacy.cora.document.RSSFeed$messageIterator

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.