Package org.archive.wayback.core

Examples of org.archive.wayback.core.WaybackRequest


  //private final Pattern WB_REQUEST_REGEX = Pattern.compile("^id(\\d+)index(\\d+)$");
  private final Pattern WB_REQUEST_REGEX = Pattern.compile("^id(\\d+)index(\\d+)(\\?(.*))*$");
 

  public WaybackRequest parse(String requestPath) {
    WaybackRequest wbRequest = null;
    Matcher matcher = WB_REQUEST_REGEX.matcher(requestPath)
    if (matcher != null && matcher.matches()) {
      wbRequest = new WaybackRequest();
      String docId = matcher.group(1);     
      wbRequest.put(WaybackConstants.REQUEST_DOC_ID,docId);
      String indexId = matcher.group(2);     
      wbRequest.put(WaybackConstants.REQUEST_INDEX_ID,indexId);
     
      wbRequest.put(WaybackConstants.REQUEST_TYPE, WaybackConstants.REQUEST_REPLAY_QUERY);
    }
    return wbRequest;
  }
View Full Code Here


  private final static Pattern WB_QUERY_REGEX = Pattern
      .compile("^(\\d{0,13})\\*/(.*[^*])$");

  public WaybackRequest parse(String requestPath) {
   
    WaybackRequest wbRequest = null;
    Matcher matcher = WB_QUERY_REGEX.matcher(requestPath);
    if (matcher != null && matcher.matches()) {

      wbRequest = new WaybackRequest();
      String dateStr = matcher.group(1);
      String urlStr = matcher.group(2);

      String startDate;
      String endDate;     
      if(dateStr.length() == 0) {
        startDate = getEarliestTimestamp();
        //endDate = getLatestTimestamp(); // BUG MC 120608 - nutchwax 0000051
        endDate = Timestamp.padEndDateStr(""); // BUG MC 120608 - nutchwax 0000051       
      } else {
        startDate = Timestamp.parseBefore(dateStr).getDateStr();
        endDate = Timestamp.parseAfter(dateStr).getDateStr();
      }
      wbRequest.put(WaybackConstants.REQUEST_START_DATE,startDate);
      wbRequest.put(WaybackConstants.REQUEST_END_DATE,endDate);
      wbRequest.put(WaybackConstants.REQUEST_TYPE,
          WaybackConstants.REQUEST_URL_QUERY);
      try {
                wbRequest.setRequestUrl(urlStr);
      } catch (URIException e) {
        wbRequest = null;
      }
    }
    return wbRequest;
View Full Code Here

  private final static Pattern WB_QUERY2_REGEX = Pattern
      .compile("^(\\d{1,14})-(\\d{1,14})\\*/(.*[^*])$");
 

  public WaybackRequest parse(String requestPath) {
    WaybackRequest wbRequest = null;
    Matcher matcher = WB_QUERY2_REGEX.matcher(requestPath);
    if (matcher != null && matcher.matches()) {

      wbRequest = new WaybackRequest();
      String startDateStr = matcher.group(1);
      String endDateStr = matcher.group(2);
      String urlStr = matcher.group(3);

      String startDate = Timestamp.parseBefore(startDateStr).getDateStr();
      String endDate = Timestamp.parseAfter(endDateStr).getDateStr();
      wbRequest.put(WaybackConstants.REQUEST_START_DATE,startDate);
      wbRequest.put(WaybackConstants.REQUEST_END_DATE,endDate);
      wbRequest.put(WaybackConstants.REQUEST_TYPE,
          WaybackConstants.REQUEST_URL_QUERY);
      try {
                wbRequest.setRequestUrl(urlStr);
      } catch (URIException e) {
        wbRequest = null;
      }
    }
    return wbRequest;
View Full Code Here

    return (nodes.getLength() <= 0) ? null : nodes;
  }

  protected String getRequestUrl(WaybackRequest wbRequest)
      throws BadQueryException {
    WaybackRequest tmp = wbRequest.clone();
    String type = tmp.get(WaybackConstants.REQUEST_TYPE);
    if(type.equals(WaybackConstants.REQUEST_REPLAY_QUERY)) {
      tmp.put(WaybackConstants.REQUEST_TYPE, WaybackConstants.REQUEST_URL_QUERY);
    }
    return this.searchUrlBase + "?" + tmp.getQueryArguments();
  }
View Full Code Here

    arcCacheDir.shutdown();
  }
 
  private WaybackRequest makeCacheWBRequest(URL url, long maxCacheMS,
      boolean bUseOlder) throws URIException {
    WaybackRequest req = new WaybackRequest();
    req.setRequestUrl(url.toString());
    req.put(WaybackConstants.REQUEST_TYPE,
        WaybackConstants.REQUEST_CLOSEST_QUERY);
    req.put(WaybackConstants.REQUEST_EXACT_DATE,
        Timestamp.currentTimestamp().getDateStr());
    Timestamp earliest = null;
    if(bUseOlder) {
      earliest = Timestamp.earliestTimestamp();
    } else {
      Date d = new Date(System.currentTimeMillis() - maxCacheMS);
      earliest = new Timestamp(d);
    }
    req.put(WaybackConstants.REQUEST_START_DATE,earliest.getDateStr());
    // for now, assume all live web requests are only satisfiable by the
    // exact host -- no massaging.
    req.put(WaybackConstants.REQUEST_EXACT_HOST_ONLY,
        WaybackConstants.REQUEST_YES);
    return req;
  }
View Full Code Here

  private Resource getLocalCachedResource(URL url, long maxCacheMS,
      boolean bUseOlder) throws ResourceNotInArchiveException,
      IOException, LiveDocumentNotAvailableException {
   
    Resource resource = null;
    WaybackRequest wbRequest = makeCacheWBRequest(url,maxCacheMS,bUseOlder);
   
    CaptureSearchResults results = null;
    try {
      SearchResults gresults = index.query(wbRequest);
      if(!(gresults instanceof CaptureSearchResults)) {
View Full Code Here

   */
  private final static Pattern WB_PATH_QUERY_REGEX = Pattern
      .compile("^(\\d{0,13})\\*/(.*)\\*$");

  public WaybackRequest parse(String requestPath) {
    WaybackRequest wbRequest = null;
    Matcher matcher = WB_PATH_QUERY_REGEX.matcher(requestPath);
    if (matcher != null && matcher.matches()) {

      wbRequest = new WaybackRequest();
      String dateStr = matcher.group(1);
      String urlStr = matcher.group(2);

      String startDate;
      String endDate;
      if(dateStr.length() == 0) {
        startDate = getEarliestTimestamp();
        endDate = getLatestTimestamp();
      } else {
        startDate = Timestamp.parseBefore(dateStr).getDateStr();
        endDate = Timestamp.parseAfter(dateStr).getDateStr();
      }

      wbRequest.put(WaybackConstants.REQUEST_START_DATE,
          startDate);
      wbRequest.put(WaybackConstants.REQUEST_END_DATE,endDate);

      wbRequest.put(WaybackConstants.REQUEST_TYPE,
          WaybackConstants.REQUEST_URL_PREFIX_QUERY);
      try {
                wbRequest.setRequestUrl(urlStr);
      } catch (URIException e) {
        wbRequest = null;
      }
    }
    return wbRequest;
View Full Code Here

   */
  private final static Pattern WB_PATH_QUERY2_REGEX = Pattern
      .compile("^(\\d{1,14})-(\\d{1,14})\\*/(.*)\\*$");

  public WaybackRequest parse(String requestPath) {
    WaybackRequest wbRequest = null;
    Matcher matcher = WB_PATH_QUERY2_REGEX.matcher(requestPath);
    if (matcher != null && matcher.matches()) {

      wbRequest = new WaybackRequest();
      String startDateStr = matcher.group(1);
      String endDateStr = matcher.group(2);
      String urlStr = matcher.group(3);
      String startDate = Timestamp.parseBefore(startDateStr).getDateStr();
      String endDate = Timestamp.parseAfter(endDateStr).getDateStr();
      wbRequest.put(WaybackConstants.REQUEST_START_DATE,
          startDate);
      wbRequest.put(WaybackConstants.REQUEST_END_DATE,endDate);

      wbRequest.put(WaybackConstants.REQUEST_TYPE,
          WaybackConstants.REQUEST_URL_PREFIX_QUERY);
      try {
                wbRequest.setRequestUrl(urlStr);
      } catch (URIException e) {
        wbRequest = null;
      }
    }
    return wbRequest;
View Full Code Here

   */
  private final Pattern WB_REQUEST_REGEX = Pattern
      .compile("^(\\d{1,14})/(.*)$");

  public WaybackRequest parse(String requestPath) {
    WaybackRequest wbRequest = null;
    Matcher matcher = WB_REQUEST_REGEX.matcher(requestPath);
    String urlStr = null;
    if (matcher != null && matcher.matches()) {
      wbRequest = new WaybackRequest();
      String dateStr = matcher.group(1);
      urlStr = matcher.group(2);
      if (!urlStr.startsWith("http://")) {
        urlStr = "http://" + urlStr;
      }             

      // The logic of the classic WM wrt timestamp bounding:
      // if 14-digits are specified, assume min-max range boundaries
      // if less than 14 are specified, assume min-max range boundaries
      // based upon amount given (2001 => 20010101... - 20011231...)
      // AND assume the user asked for the LATEST possible date
      // within that range...
      //
      // ...don't ask me, I just work here.

      String startDate = null;
      String endDate = null;
      if (dateStr.length() == 14) {
        startDate = getEarliestTimestamp();
        endDate = getLatestTimestamp();
      } else {

        // classic behavior:
        // startDate = Timestamp.parseBefore(dateStr).getDateStr();
        // endDate = Timestamp.parseAfter(dateStr).getDateStr();
        // dateStr = endDate;

        // "better" behavior:
        startDate = getEarliestTimestamp();
        endDate = getLatestTimestamp();
        dateStr = Timestamp.parseAfter(dateStr).getDateStr();

      }
      wbRequest.put(WaybackConstants.REQUEST_EXACT_DATE, dateStr);
      //wbRequest.put(WaybackConstants.REQUEST_START_DATE, startDate); BUG MC 120608
      //wbRequest.put(WaybackConstants.REQUEST_END_DATE, endDate); BUG MC 120608

      wbRequest.put(WaybackConstants.REQUEST_TYPE,
          WaybackConstants.REQUEST_REPLAY_QUERY);

      try {
//        String wbPrefix = wbRequest.getDefaultWaybackPrefix();
//        if (urlStr.startsWith(wbPrefix)) {
//          wbRequest.setBetterRequestURI(urlStr);
//        }
        wbRequest.setRequestUrl(urlStr);
      } catch (URIException e) {
        if(urlStr != null) {
          LOGGER.severe("Failed parse of url(" + urlStr + ")");
        }
        e.printStackTrace();
View Full Code Here

      return null;
    }
    String requestPath = origRequestPath.substring(contextPath.length());   
    requestPath=EscapeDecoder.urlUnescape(requestPath); // BUG 0000069 (nutchwax) and BUG 0000075 (nutchwax)
   
    WaybackRequest wbRequest = parse(requestPath);   
    if(wbRequest != null) {
      //wbRequest.fixup(httpRequest); BUG 120608
      wbRequest.setResultsPerPage(maxRecords);
    }   

    return wbRequest;
  }
View Full Code Here

TOP

Related Classes of org.archive.wayback.core.WaybackRequest

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.