Package net.sf.jpluck.jxl

Source Code of net.sf.jpluck.jxl.Document

package net.sf.jpluck.jxl;

import java.io.IOException;
import java.net.URI;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.TimeZone;
import java.util.regex.Pattern;

import javax.servlet.jsp.el.ExpressionEvaluator;

import net.sf.jpluck.spider.LinkFilter;
import net.sf.jpluck.spider.RuleFilter;
import net.sf.jpluck.util.el.DefaultVariables;
import net.sf.jpluck.util.el.StaticFunctionMapper;
import net.sf.jpluck.xml.ContextAdapter;
import net.sf.jpluck.xml.DOMUtil;

import org.apache.commons.el.ExpressionEvaluatorImpl;
import org.apache.commons.jxpath.JXPathContext;
import org.apache.commons.jxpath.Pointer;
import org.apache.commons.jxpath.ri.model.dom.DOMNodePointer;
import org.quartz.CronTrigger;
import org.quartz.impl.calendar.BaseCalendar;
import org.w3c.dom.Element;


public abstract class Document extends Template implements Comparable,Cloneable {
  private static final String[] WEEKDAYS = {
                         "sunday", "monday", "tuesday", "wednesday", "thursday", "friday",
                         "saturday"
                       };

  protected List inclusionPatternList = new ArrayList();
  protected Pattern uriPattern;
  protected URI startingURI;
  private ExpressionEvaluator eval = new ExpressionEvaluatorImpl();

  protected Document(Element element, JXL jxl) {
    super(element, jxl);

    String template = (String) resolveValue("@template", "_default");
    if (jxl!=null && !jxl.containsTemplate(template) && !template.equals("_default")) {
      throw new RuntimeException("Invalid template reference \"" + template + "\".");
    }
    configureParentTemplate();
    resetStartingURI();
  }
 
  public void resetStartingURI() {
    setStartingURI(URI.create(getUri()));
  }
 
  public void init() throws Exception {
    inclusionPatternList.clear();
    for (Iterator it = iteratePointers("uriPatterns/include"); it.hasNext();) {
      Pointer pointer = (Pointer) it.next();
      Pattern pattern = Pattern.compile((String) pointer.getValue());
      inclusionPatternList.add(pattern);
    }
    exclusionPatternList.clear();
    for (Iterator it = resolveIteratePointers("uriPatterns/exclude"); it.hasNext();) {
      Pointer pointer = (Pointer) it.next();
      Pattern pattern = Pattern.compile((String) pointer.getValue());
      exclusionPatternList.add(pattern);
    }
    initTransformationPipelines();
    bookmarkProcessor = new BookmarkProcessor();   
    for (Iterator it = resolveIteratePointers("autoBookmark"); it.hasNext();) {
      DOMNodePointer pointer = (DOMNodePointer) it.next();
      Element elem = (Element) pointer.getBaseValue();
      String regexp = elem.getAttribute("uriPattern");
      String value = DOMUtil.getText(elem);
      bookmarkProcessor.addBookmark(regexp, value);
    }
  }

  public boolean isUseHTTPCache() {
    return "yes".equals(resolveValue("@cache", "yes"));
  }

  public boolean isDue() {
    Date due = getDueDate();
    if (due == null) {
      return isScheduled();
    } else {
      Date now = new Date();
      return (due.before(now));
    }
  }

  public Date getDueDate() {
    try {
      if (resolvePointer("schedule") == null) {
        return null;
      }

      CronTrigger trigger = createTrigger();
      Date date = getLastConverted();
      if (date == null) {
        Pointer p = resolvePointer("schedule/@start");
        if (p != null) {
          date = JXL.stringToDate((String) p.getValue());
        } else {
          return null;
        }
      }
      trigger.setStartTime(date);
      trigger.computeFirstFireTime(new BaseCalendar());

      return trigger.getNextFireTime();
    } catch (ParseException e) {
      throw new RuntimeException(e);
    }
  }

  public Date getLastConverted() {
    String value = (String) resolveValue("@lastConverted");
    if (value != null) {
      try {
        return JXL.stringToDate(value);
      } catch (ParseException e) {
      }
    }

    return null;
  }

  public long getLastConvertedAsLong() {
    Date date = getLastConverted();
    if (date != null) {
      return date.getTime();
    } else {
      return -1;
    }
  }
 
  public long getDueDateAsLong() {
    Date date = getDueDate();
    if (date != null) {
      return date.getTime();
    } else {
      return -1;
    }
  }

  public int getLinkDepth() {
    return Integer.parseInt((String) resolveValue("uri/@maxDepth", "0"));
  }

  public String getName() {
    try {
      String name = (String) getValue("name");
      name = (String) eval.evaluate(name, String.class, new DefaultVariables(),
                      StaticFunctionMapper.getDefault());
      return name.trim();
    } catch (Exception e) {
      throw new RuntimeException(e);
    }
  }
 
  public String getRawFilename() {   
    return (String) getValue("filename");
  }
 
  public String getFilename() {
    try {
      String filename = (String) getValue("filename");
      filename = (String) eval.evaluate(filename, String.class, new DefaultVariables(),
                      StaticFunctionMapper.getDefault());
      return filename.trim();
    } catch (Exception e) {
      return null;
    }
  }

  public boolean isScheduled() {
    return (resolvePointer("schedule") != null);
  }

  public void setStartingURI(URI startingURI) {
    this.startingURI = startingURI;
    if (isStayWithinDomain()) {
      // TODO: Check this. This code may not properly detect every domain yet.
      if (startingURI.getScheme().equals("http")) {
        String host = startingURI.getHost();
        String[] hostParts = host.split("\\.");
        int domainStart = 0;
        if (hostParts[hostParts.length - 1].length() == 2) {
          for (int i = hostParts.length - 1; i > 0; i--) {
            if (hostParts[i].length() > 2) {
              domainStart = i;
              break;
            }
          }
        } else {
          domainStart = hostParts.length - 2;
        }

        String domain = "";
        for (int i = domainStart; i < hostParts.length; i++) {
          domain += (hostParts[i] + ((i < (hostParts.length - 1)) ? "." : ""));
        }
        uriPattern = Pattern.compile("http://.*" + domain + ".*");
      }
    }

    if (isStayOnHost()) {
      if (startingURI.getScheme().equals("http")) {
        uriPattern = Pattern.compile("http://" + startingURI.getHost() + ".*");
      }
    }
    if (isStayBelowDirectory()) {
      String s = getUri();
      uriPattern = Pattern.compile(s.substring(0, s.lastIndexOf('/') + 1) + ".*");
    }
  }

  public URI getStartingURI() {
    return startingURI;
  }
 
  public String getRestrict() {
    return (String)getValue("uri/@restrict", "none");
  }

  public boolean isStayBelowDirectory() {
    return "directory".equals(resolveValue("uri/@restrict", "none"));
  }

  public boolean isStayOnHost() {
    return "host".equals(resolveValue("uri/@restrict", "none"));
  }

  public boolean isStayWithinDomain() {
    return "domain".equals(resolveValue("uri/@restrict", "none"));
  }
 
  public String getRawName() {
    return (String)getValue("name");
  }
 
  public String getRawUri() {
    return (String)getValue("uri");
  }

  public String getUri() {
    try {
      String uri = (String) eval.evaluate((String) getValue("uri"), String.class,
                        new DefaultVariables(null, getLastConverted(), null),
                        StaticFunctionMapper.getDefault());
      uri = uri.trim();
      if (jxl != null) {
        return jxl.resolve(uri);
      } else {
        return uri;
      }
    } catch (Exception e) {
      throw new RuntimeException(e);
    }
  }

  public void clearLastConverted() {
    try {
      element.removeAttribute("lastConverted");
      if (jxl != null) {
        jxl.save();
      }
    } catch (IOException e) {
      throw new RuntimeException(e);
    }
  }

  public int compareTo(Object obj) {
    return getName().compareToIgnoreCase(((Document) obj).getName());
  }

  public void converted(long size) {
    try {
      element.setAttribute("lastConverted", JXL.dateToString(new Date()));
      element.setAttribute("lastSize", String.valueOf(size));
      if (jxl != null) {
        jxl.updated(this);
        jxl.save();
      }
    } catch (IOException e) {
      throw new RuntimeException(e);
    }
  }

  public boolean shouldConvert() {
    if (isScheduled()) {
      if (isDue()) {
        return true;
      } else {
        if (getDueDate() != null) {
          return false;
        } else {
          return (resolveValue("schedule/@start") == null);
        }
      }
    } else {
      return true;
    }
  }

  public boolean shouldExclude(String uri) {
    if (startingURI.toString().equals(uri)) {
      return false;
    }
    return super.shouldExclude(uri);
  }

  public boolean shouldInclude(String uri, int linkType) {
    if (!RuleFilter.getDefault().include(URI.create(uri).getPath(), RuleFilter.EXTERNAL_LINK)) {
      // Skip file types that we know we can't handle.
      return false;
    }
    if (startingURI.toString().equals(uri)) {
      return true;
    }
    if ((uriPattern != null) && (linkType == LinkFilter.EXTERNAL_LINK)) {
      if (!uriPattern.matcher(uri).matches()) {
        return false;
      }
    }
    if (inclusionPatternList.size() > 0) {
      for (Iterator iterator = inclusionPatternList.iterator(); iterator.hasNext();) {
        Pattern pattern = (Pattern) iterator.next();
        if (pattern.matcher(uri).matches()) {
          return true;
        }
      }

      return false;
    } else {
      return true;
    }
  }

  private static int getWeekDay(String day) {
    for (int i = 0; i < WEEKDAYS.length; i++) {
      if (WEEKDAYS[i].equals(day)) {
        return i + 1;
      }
    }
    throw new RuntimeException("Cannot parse date " + day); // Should not occur;
  }

  private CronTrigger createTrigger() {
    try {
      CronTrigger trigger = new CronTrigger();
      trigger.setTimeZone(TimeZone.getDefault());

      ContextAdapter context = new ContextAdapter(JXPathContext.newContext(resolvePointer("schedule").getNode()));
      Pointer p = context.getPointer("hourly");
      if (p != null) {
        String minute = (String) context.getValue("hourly/@minute");
        trigger.setCronExpression("0 " + minute + " * ? * *");
      } else {
        p = context.getPointer("daily");
        if (p != null) {
          String hour = (String) context.getValue("daily/@hour");
          String minute = (String) context.getValue("daily/@minute");
          trigger.setCronExpression("0 " + minute + " " + hour + " ? * *");
        } else {
          p = context.getPointer("weekly");
          if (p != null) {
            String day = (String) context.getValue("weekly/@day");
            String hour = (String) context.getValue("weekly/@hour");
            String minute = (String) context.getValue("weekly/@minute");
            trigger.setCronExpression("0 " + minute + " " + hour + " ? * " + getWeekDay(day));
          } else {
            p = context.getPointer("monthly");
            if (p != null) {
              String day = (String) context.getValue("monthly/@day");
              if (day.equalsIgnoreCase("last")) {
                day = "L";
              }

              String hour = (String) context.getValue("monthly/@hour");
              String minute = (String) context.getValue("monthly/@minute");
              trigger.setCronExpression("0 " + minute + " " + hour + " " + day + " * ?");
            } else {
              p = context.getPointer("cron");
              if (p != null) {
                trigger.setCronExpression((String) p.getValue());
              }
            }
          }
        }
      }

      return trigger;
    } catch (Exception e) {
      logger.warning(getName() + ": Error parsing schedule. " + e.getClass().getName() + ": " +
               e.getMessage());
      throw new RuntimeException(e);
    }
  }
 
  public Element getElement()  {
    return element;
  }

  public long getLastSize() {
    String s = (String)getValue("@lastSize");
    if (s!=null) {
      return Long.parseLong(s);
    } else {
      return -1;
    }
  }

  public String getReferrer() {
    return (String)resolveValue("uri/@referrer");
  }
 
  private BookmarkProcessor bookmarkProcessor;
 
  public BookmarkProcessor getBookmarkProcessor() {
    return bookmarkProcessor;
  }
}
TOP

Related Classes of net.sf.jpluck.jxl.Document

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.