Package com.digital

Source Code of com.digital.DigitalUrlGetter

package com.digital;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.htmlparser.Parser;
import org.htmlparser.Tag;
import org.htmlparser.tags.Div;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.util.ParserException;
import org.htmlparser.visitors.NodeVisitor;

import com.commons.IUrlGetter;
import com.commons.UrlData;
import com.utils.LoggerUtils;

public class DigitalUrlGetter implements IUrlGetter {
  private String channelUrl = "http://channel.jd.com/digital.html";

  @Override
  public List<UrlData> getUrlData() {
    final List<UrlData> dataList = new ArrayList<UrlData>();
    try {
      Parser parser = new Parser();
      parser.setURL(channelUrl);
      parser.setEncoding("Gb2312");
      NodeVisitor visitor = new NodeVisitor() {

        public void visitTag(Tag tag) {
          if (Div.class.equals(tag.getClass())
              && "m".equals(tag.getAttribute("class"))
              && "sortlist".equals(tag.getAttribute("id"))) {
            tag.accept(new NodeVisitor() {
              public void visitTag(Tag tag) {
                if (Div.class.equals(tag.getClass())
                    && "con".equals(tag
                        .getAttribute("class"))) {
                  tag.accept(new NodeVisitor() {
                    public void visitTag(Tag tag) {
                      UrlData data = new UrlData();
                      if (LinkTag.class.equals(tag
                          .getClass())) {
                        data.setUrlName(tag
                            .getAttribute("title"));
                        data.setUrl(tag
                            .getAttribute("href"));
                        try {
                          LoggerUtils
                              .log(DigitalUrlGetter.class
                                  .getName(),
                                  data.toString());
                        } catch (IOException e) {
                          // TODO Auto-generated catch
                          e.printStackTrace();
                        }
                        System.out.println(data);
                        dataList.add(data);
                      }
                    };
                  });
                }
              };
            });
          }
        }
      };
      parser.visitAllNodesWith(visitor);
    } catch (ParserException e) {
      e.printStackTrace();
    }
    return dataList;
  }

  public void setChannelUrl(String channelUrl) {
    this.channelUrl = channelUrl;
  }

  public static void main(String[] args) {
    new DigitalUrlGetter().getUrlData();
  }
}
TOP

Related Classes of com.digital.DigitalUrlGetter

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.