package net.sf.jpluck.jxl;
import java.io.IOException;
import java.net.URI;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.TimeZone;
import java.util.regex.Pattern;
import javax.servlet.jsp.el.ExpressionEvaluator;
import net.sf.jpluck.spider.LinkFilter;
import net.sf.jpluck.spider.RuleFilter;
import net.sf.jpluck.util.el.DefaultVariables;
import net.sf.jpluck.util.el.StaticFunctionMapper;
import net.sf.jpluck.xml.ContextAdapter;
import net.sf.jpluck.xml.DOMUtil;
import org.apache.commons.el.ExpressionEvaluatorImpl;
import org.apache.commons.jxpath.JXPathContext;
import org.apache.commons.jxpath.Pointer;
import org.apache.commons.jxpath.ri.model.dom.DOMNodePointer;
import org.quartz.CronTrigger;
import org.quartz.impl.calendar.BaseCalendar;
import org.w3c.dom.Element;
public abstract class Document extends Template implements Comparable,Cloneable {
private static final String[] WEEKDAYS = {
"sunday", "monday", "tuesday", "wednesday", "thursday", "friday",
"saturday"
};
protected List inclusionPatternList = new ArrayList();
protected Pattern uriPattern;
protected URI startingURI;
private ExpressionEvaluator eval = new ExpressionEvaluatorImpl();
protected Document(Element element, JXL jxl) {
super(element, jxl);
String template = (String) resolveValue("@template", "_default");
if (jxl!=null && !jxl.containsTemplate(template) && !template.equals("_default")) {
throw new RuntimeException("Invalid template reference \"" + template + "\".");
}
configureParentTemplate();
resetStartingURI();
}
public void resetStartingURI() {
setStartingURI(URI.create(getUri()));
}
public void init() throws Exception {
inclusionPatternList.clear();
for (Iterator it = iteratePointers("uriPatterns/include"); it.hasNext();) {
Pointer pointer = (Pointer) it.next();
Pattern pattern = Pattern.compile((String) pointer.getValue());
inclusionPatternList.add(pattern);
}
exclusionPatternList.clear();
for (Iterator it = resolveIteratePointers("uriPatterns/exclude"); it.hasNext();) {
Pointer pointer = (Pointer) it.next();
Pattern pattern = Pattern.compile((String) pointer.getValue());
exclusionPatternList.add(pattern);
}
initTransformationPipelines();
bookmarkProcessor = new BookmarkProcessor();
for (Iterator it = resolveIteratePointers("autoBookmark"); it.hasNext();) {
DOMNodePointer pointer = (DOMNodePointer) it.next();
Element elem = (Element) pointer.getBaseValue();
String regexp = elem.getAttribute("uriPattern");
String value = DOMUtil.getText(elem);
bookmarkProcessor.addBookmark(regexp, value);
}
}
public boolean isUseHTTPCache() {
return "yes".equals(resolveValue("@cache", "yes"));
}
public boolean isDue() {
Date due = getDueDate();
if (due == null) {
return isScheduled();
} else {
Date now = new Date();
return (due.before(now));
}
}
public Date getDueDate() {
try {
if (resolvePointer("schedule") == null) {
return null;
}
CronTrigger trigger = createTrigger();
Date date = getLastConverted();
if (date == null) {
Pointer p = resolvePointer("schedule/@start");
if (p != null) {
date = JXL.stringToDate((String) p.getValue());
} else {
return null;
}
}
trigger.setStartTime(date);
trigger.computeFirstFireTime(new BaseCalendar());
return trigger.getNextFireTime();
} catch (ParseException e) {
throw new RuntimeException(e);
}
}
public Date getLastConverted() {
String value = (String) resolveValue("@lastConverted");
if (value != null) {
try {
return JXL.stringToDate(value);
} catch (ParseException e) {
}
}
return null;
}
public long getLastConvertedAsLong() {
Date date = getLastConverted();
if (date != null) {
return date.getTime();
} else {
return -1;
}
}
public long getDueDateAsLong() {
Date date = getDueDate();
if (date != null) {
return date.getTime();
} else {
return -1;
}
}
public int getLinkDepth() {
return Integer.parseInt((String) resolveValue("uri/@maxDepth", "0"));
}
public String getName() {
try {
String name = (String) getValue("name");
name = (String) eval.evaluate(name, String.class, new DefaultVariables(),
StaticFunctionMapper.getDefault());
return name.trim();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
public String getRawFilename() {
return (String) getValue("filename");
}
public String getFilename() {
try {
String filename = (String) getValue("filename");
filename = (String) eval.evaluate(filename, String.class, new DefaultVariables(),
StaticFunctionMapper.getDefault());
return filename.trim();
} catch (Exception e) {
return null;
}
}
public boolean isScheduled() {
return (resolvePointer("schedule") != null);
}
public void setStartingURI(URI startingURI) {
this.startingURI = startingURI;
if (isStayWithinDomain()) {
// TODO: Check this. This code may not properly detect every domain yet.
if (startingURI.getScheme().equals("http")) {
String host = startingURI.getHost();
String[] hostParts = host.split("\\.");
int domainStart = 0;
if (hostParts[hostParts.length - 1].length() == 2) {
for (int i = hostParts.length - 1; i > 0; i--) {
if (hostParts[i].length() > 2) {
domainStart = i;
break;
}
}
} else {
domainStart = hostParts.length - 2;
}
String domain = "";
for (int i = domainStart; i < hostParts.length; i++) {
domain += (hostParts[i] + ((i < (hostParts.length - 1)) ? "." : ""));
}
uriPattern = Pattern.compile("http://.*" + domain + ".*");
}
}
if (isStayOnHost()) {
if (startingURI.getScheme().equals("http")) {
uriPattern = Pattern.compile("http://" + startingURI.getHost() + ".*");
}
}
if (isStayBelowDirectory()) {
String s = getUri();
uriPattern = Pattern.compile(s.substring(0, s.lastIndexOf('/') + 1) + ".*");
}
}
public URI getStartingURI() {
return startingURI;
}
public String getRestrict() {
return (String)getValue("uri/@restrict", "none");
}
public boolean isStayBelowDirectory() {
return "directory".equals(resolveValue("uri/@restrict", "none"));
}
public boolean isStayOnHost() {
return "host".equals(resolveValue("uri/@restrict", "none"));
}
public boolean isStayWithinDomain() {
return "domain".equals(resolveValue("uri/@restrict", "none"));
}
public String getRawName() {
return (String)getValue("name");
}
public String getRawUri() {
return (String)getValue("uri");
}
public String getUri() {
try {
String uri = (String) eval.evaluate((String) getValue("uri"), String.class,
new DefaultVariables(null, getLastConverted(), null),
StaticFunctionMapper.getDefault());
uri = uri.trim();
if (jxl != null) {
return jxl.resolve(uri);
} else {
return uri;
}
} catch (Exception e) {
throw new RuntimeException(e);
}
}
public void clearLastConverted() {
try {
element.removeAttribute("lastConverted");
if (jxl != null) {
jxl.save();
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
public int compareTo(Object obj) {
return getName().compareToIgnoreCase(((Document) obj).getName());
}
public void converted(long size) {
try {
element.setAttribute("lastConverted", JXL.dateToString(new Date()));
element.setAttribute("lastSize", String.valueOf(size));
if (jxl != null) {
jxl.updated(this);
jxl.save();
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
public boolean shouldConvert() {
if (isScheduled()) {
if (isDue()) {
return true;
} else {
if (getDueDate() != null) {
return false;
} else {
return (resolveValue("schedule/@start") == null);
}
}
} else {
return true;
}
}
public boolean shouldExclude(String uri) {
if (startingURI.toString().equals(uri)) {
return false;
}
return super.shouldExclude(uri);
}
public boolean shouldInclude(String uri, int linkType) {
if (!RuleFilter.getDefault().include(URI.create(uri).getPath(), RuleFilter.EXTERNAL_LINK)) {
// Skip file types that we know we can't handle.
return false;
}
if (startingURI.toString().equals(uri)) {
return true;
}
if ((uriPattern != null) && (linkType == LinkFilter.EXTERNAL_LINK)) {
if (!uriPattern.matcher(uri).matches()) {
return false;
}
}
if (inclusionPatternList.size() > 0) {
for (Iterator iterator = inclusionPatternList.iterator(); iterator.hasNext();) {
Pattern pattern = (Pattern) iterator.next();
if (pattern.matcher(uri).matches()) {
return true;
}
}
return false;
} else {
return true;
}
}
private static int getWeekDay(String day) {
for (int i = 0; i < WEEKDAYS.length; i++) {
if (WEEKDAYS[i].equals(day)) {
return i + 1;
}
}
throw new RuntimeException("Cannot parse date " + day); // Should not occur;
}
private CronTrigger createTrigger() {
try {
CronTrigger trigger = new CronTrigger();
trigger.setTimeZone(TimeZone.getDefault());
ContextAdapter context = new ContextAdapter(JXPathContext.newContext(resolvePointer("schedule").getNode()));
Pointer p = context.getPointer("hourly");
if (p != null) {
String minute = (String) context.getValue("hourly/@minute");
trigger.setCronExpression("0 " + minute + " * ? * *");
} else {
p = context.getPointer("daily");
if (p != null) {
String hour = (String) context.getValue("daily/@hour");
String minute = (String) context.getValue("daily/@minute");
trigger.setCronExpression("0 " + minute + " " + hour + " ? * *");
} else {
p = context.getPointer("weekly");
if (p != null) {
String day = (String) context.getValue("weekly/@day");
String hour = (String) context.getValue("weekly/@hour");
String minute = (String) context.getValue("weekly/@minute");
trigger.setCronExpression("0 " + minute + " " + hour + " ? * " + getWeekDay(day));
} else {
p = context.getPointer("monthly");
if (p != null) {
String day = (String) context.getValue("monthly/@day");
if (day.equalsIgnoreCase("last")) {
day = "L";
}
String hour = (String) context.getValue("monthly/@hour");
String minute = (String) context.getValue("monthly/@minute");
trigger.setCronExpression("0 " + minute + " " + hour + " " + day + " * ?");
} else {
p = context.getPointer("cron");
if (p != null) {
trigger.setCronExpression((String) p.getValue());
}
}
}
}
}
return trigger;
} catch (Exception e) {
logger.warning(getName() + ": Error parsing schedule. " + e.getClass().getName() + ": " +
e.getMessage());
throw new RuntimeException(e);
}
}
public Element getElement() {
return element;
}
public long getLastSize() {
String s = (String)getValue("@lastSize");
if (s!=null) {
return Long.parseLong(s);
} else {
return -1;
}
}
public String getReferrer() {
return (String)resolveValue("uri/@referrer");
}
private BookmarkProcessor bookmarkProcessor;
public BookmarkProcessor getBookmarkProcessor() {
return bookmarkProcessor;
}
}