codeProcess();
}
public void codeProcess() {
// 获得request
UrlRequest urlReq = ClientMessageCenter.getInstance().urlGet();
if (urlReq == null || StringUtil.isEmpty(urlReq.getUrl()))
return;
// *+* 此处首先获取模板,若无模板则无需浪费时间与流量,直接放弃该链接。
List<AnalyserTemplate> templates = null;
try {
templates = this.findAnalyserTemplate(urlReq);
} catch (Exception e) {
logger.error(e);
}
if (templates == null || templates.size() == 0)
return;
logger.info("running url : " + urlReq.getUrl());
// 发送request
String html = null;
for (int i = 0; i < 3; i++) { // 最多发送3次请求
try {
html = sendRequest(urlReq);
if (!StringUtil.isEmpty(html))
break;
} catch (Exception e) {
logger.error("send request url : " + urlReq.getUrl()
+ " error! ", e);
}
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
logger.error("thread sleep error! ", e);
}
}
List<Conversation> convsResult = new ArrayList<Conversation>();
List<UrlRequest> urlReqResult = new ArrayList<UrlRequest>();
if (!StringUtil.isEmpty(html)) { // html 若无内容 则无需分析
// 一个个分析模板过程,单个页面可能分析出多组数据
for (AnalyserTemplate template : templates) {
if (!template.isMatchTemplate(urlReq, html))
continue;
Document docHtml = null;
try {
docHtml = XmlUtil.formatToDoc(html);
} catch (Exception e) {
logger.error("html fromat to doc error! " + html, e);
}
List<Conversation> convsTemp = template.findConversations(
urlReq, html, docHtml);
if (convsTemp != null)
for (Conversation convs : convsTemp) {
if (convs.getSaveable().equals("1")) {
convsResult.add(convs);
}
if (convs.getRunable().equals("1"))
urlReqResult.add(new UrlRequest(formatUrl(
convs.getSelfLink(), urlReq), urlReq
.getTask()));
}
urlReqResult.addAll(template.findLinks(urlReq, html, docHtml));
}
if (urlReqResult != null && urlReqResult.size() > 0) {
ClientMessageCenter.getInstance().urlSend(urlReqResult);
logger.info("url : " + urlReq.getUrl() + " get next url : "
+ urlReqResult.size());
}
if (convsResult != null && convsResult.size() > 0) {
ClientMessageCenter.getInstance().resultSend(convsResult);
logger.info("url : " + urlReq.getUrl() + " get convsResult : "
+ convsResult.size());
}
}
if ((urlReqResult == null || urlReqResult.size() == 0) && (convsResult == null || convsResult.size() == 0)) {
logger.info("url : " + urlReq.getUrl() + " get no reuslt! ");
}
ClientMessageCenter.getInstance().stateSend(
new UrlState(html.length(), urlReqResult.size(), convsResult
.size(), urlReq));
}