Package org.apache.nutch.parse.rss.structs

Examples of org.apache.nutch.parse.rss.structs.RSSChannel


        // also get the outlinks

        if (theRSSChannels != null) {
            for (int i = 0; i < theRSSChannels.size(); i++) {
                RSSChannel r = (RSSChannel) theRSSChannels.get(i);
                contentTitle.append(r.getTitle());
                contentTitle.append(" ");

                // concat the description to the index text
                indexText.append(r.getDescription());
                indexText.append(" ");

                if (r.getLink() != null) {
                    try {
                        // get the outlink
      if (r.getDescription()!= null ) {
          theOutlinks.add(new Outlink(r.getLink(), r.getDescription(), getConf()));
      } else {
          theOutlinks.add(new Outlink(r.getLink(), "", getConf()));
      }
                    } catch (MalformedURLException e) {
                        if (LOG.isWarnEnabled()) {
                          LOG.warn("MalformedURL: " + r.getLink());
                          LOG.warn("Attempting to continue processing outlinks");
                          e.printStackTrace(LogUtil.getWarnStream(LOG));
                        }
                        continue;
                    }
                }

                // now get the descriptions of all the underlying RSS Items and
                // then index them too
                for (int j = 0; j < r.getItems().size(); j++) {
                    RSSItem theRSSItem = (RSSItem) r.getItems().get(j);
                    indexText.append(theRSSItem.getDescription());
                    indexText.append(" ");

                    String whichLink = null;
View Full Code Here


            fRssChannels.add(fCurrentChannel);
        }

        //System.out.println("Found a new channel: " + title);

        fCurrentChannel = new RSSChannel(title, link, description);

    }
View Full Code Here

        // also get the outlinks

        if (theRSSChannels != null) {
            for (int i = 0; i < theRSSChannels.size(); i++) {
                RSSChannel r = (RSSChannel) theRSSChannels.get(i);
                contentTitle.append(r.getTitle());
                contentTitle.append(" ");

                // concat the description to the index text
                indexText.append(r.getDescription());
                indexText.append(" ");

                if (r.getLink() != null) {
                    try {
                        // get the outlink
      if (r.getDescription()!= null ) {
          theOutlinks.add(new Outlink(r.getLink(), r.getDescription()));
      } else {
          theOutlinks.add(new Outlink(r.getLink(), ""));
      }
                    } catch (MalformedURLException e) {
                        if (LOG.isWarnEnabled()) {
                          LOG.warn("MalformedURL: " + r.getLink());
                          LOG.warn("Attempting to continue processing outlinks");
                          e.printStackTrace(LogUtil.getWarnStream(LOG));
                        }
                        continue;
                    }
                }

                // now get the descriptions of all the underlying RSS Items and
                // then index them too
                for (int j = 0; j < r.getItems().size(); j++) {
                    RSSItem theRSSItem = (RSSItem) r.getItems().get(j);
                    indexText.append(theRSSItem.getDescription());
                    indexText.append(" ");

                    String whichLink = null;
View Full Code Here

        // also get the outlinks

        if (theRSSChannels != null) {
            for (int i = 0; i < theRSSChannels.size(); i++) {
                RSSChannel r = (RSSChannel) theRSSChannels.get(i);
                contentTitle.append(r.getTitle());
                contentTitle.append(" ");

                // concat the description to the index text
                indexText.append(r.getDescription());
                indexText.append(" ");

                if (r.getLink() != null) {
                    try {
                        // get the outlink
      if (r.getDescription()!= null ) {
          theOutlinks.add(new Outlink(r.getLink(), r.getDescription()));
      } else {
          theOutlinks.add(new Outlink(r.getLink(), ""));
      }
                    } catch (MalformedURLException e) {
                        LOG.info("nutch:parse-rss:RSSParser Exception: MalformedURL: "
                                        + r.getLink()
                                        + ": Attempting to continue processing outlinks");
                        e.printStackTrace();
                        continue;
                    }
                }

                // now get the descriptions of all the underlying RSS Items and
                // then index them too
                for (int j = 0; j < r.getItems().size(); j++) {
                    RSSItem theRSSItem = (RSSItem) r.getItems().get(j);
                    indexText.append(theRSSItem.getDescription());
                    indexText.append(" ");

                    String whichLink = null;
View Full Code Here

            fRssChannels.add(fCurrentChannel);
        }

        //System.out.println("Found a new channel: " + title);

        fCurrentChannel = new RSSChannel(title, link, description);

    }
View Full Code Here

        // also get the outlinks

        if (theRSSChannels != null) {
            for (int i = 0; i < theRSSChannels.size(); i++) {
                RSSChannel r = (RSSChannel) theRSSChannels.get(i);
                contentTitle.append(r.getTitle());
                contentTitle.append(" ");

                // concat the description to the index text
                indexText.append(r.getDescription());
                indexText.append(" ");

                if (r.getLink() != null) {
                    try {
                        // get the outlink
                        theOutlinks.add(new Outlink(r.getLink(), r
                                .getDescription()));
                    } catch (MalformedURLException e) {
                        LOG
                                .info("nutch:parse-rss:RSSParser Exception: MalformedURL: "
                                        + r.getLink()
                                        + ": Attempting to continue processing outlinks");
                        e.printStackTrace();
                        continue;
                    }
                }

                // now get the descriptions of all the underlying RSS Items and
                // then index them too
                for (int j = 0; j < r.getItems().size(); j++) {
                    RSSItem theRSSItem = (RSSItem) r.getItems().get(j);
                    indexText.append(theRSSItem.getDescription());
                    indexText.append(" ");

                    String whichLink = null;
View Full Code Here

        // also get the outlinks

        if (theRSSChannels != null) {
            for (int i = 0; i < theRSSChannels.size(); i++) {
                RSSChannel r = (RSSChannel) theRSSChannels.get(i);
                contentTitle.append(r.getTitle());
                contentTitle.append(" ");

                // concat the description to the index text
                indexText.append(r.getDescription());
                indexText.append(" ");

                if (r.getLink() != null) {
                    try {
                        // get the outlink
      if (r.getDescription()!= null ) {
          theOutlinks.add(new Outlink(r.getLink(), r.getDescription(), getConf()));
      } else {
          theOutlinks.add(new Outlink(r.getLink(), "", getConf()));
      }
                    } catch (MalformedURLException e) {
                        if (LOG.isWarnEnabled()) {
                          LOG.warn("MalformedURL: " + r.getLink());
                          LOG.warn("Attempting to continue processing outlinks");
                          e.printStackTrace(LogUtil.getWarnStream(LOG));
                        }
                        continue;
                    }
                }

                // now get the descriptions of all the underlying RSS Items and
                // then index them too
                for (int j = 0; j < r.getItems().size(); j++) {
                    RSSItem theRSSItem = (RSSItem) r.getItems().get(j);
                    indexText.append(theRSSItem.getDescription());
                    indexText.append(" ");

                    String whichLink = null;
View Full Code Here

            fRssChannels.add(fCurrentChannel);
        }

        //System.out.println("Found a new channel: " + title);

        fCurrentChannel = new RSSChannel(title, link, description);

    }
View Full Code Here

TOP

Related Classes of org.apache.nutch.parse.rss.structs.RSSChannel

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.