Package org.archive.wayback.core

Examples of org.archive.wayback.core.WaybackRequest


  }

  private void testFindRange(final AlphaPartitionedIndex apIndex,
      final String url, final String wantGroup) throws URIException,
      BadQueryException, ResourceIndexNotAvailableException {
    WaybackRequest r = new WaybackRequest();
    r.put(WaybackConstants.REQUEST_URL,apIndex.canonicalize(url));
    RangeGroup g = apIndex.getRangeGroupForRequest(r);
    assertEquals(g.getName(),wantGroup);   
  }
View Full Code Here


      }     
     
      LOGGER.fine(String.format("Parsed format(%s) URL(%s)", format,
          urlStr));

      WaybackRequest wbRequest = new WaybackRequest();
      // ludab changes nov 30 2012 to add timemap paging

      Matcher matcher = WB_REQUEST_REGEX.matcher(urlStr);
      String startDate = getEarliestTimestamp();

      if (matcher != null && matcher.matches()) {

        String dateStr = matcher.group(1);
        urlStr = matcher.group(4);
        wbRequest.put(PAGE_STARTS, dateStr);

        if (dateStr.length() == 0) {
          startDate = getEarliestTimestamp();

        } else {
          startDate = Timestamp.parseAfter(dateStr).getDateStr();

        }

      }

      if (wbRequest.getStartTimestamp() == null) {
        // ludab nov30 timemap paging
        wbRequest.setStartTimestamp(startDate);

        // wbRequest.setStartTimestamp(getEarliestTimestamp());
      }
      wbRequest.setAnchorTimestamp(getLatestTimestamp());
      if (wbRequest.getEndTimestamp() == null) {
        wbRequest.setEndTimestamp(getLatestTimestamp());
      }
      wbRequest.setCaptureQueryRequest();
      wbRequest.setMementoTimemapFormat(format);
      wbRequest.setRequestUrl(urlStr);
      int pagemax = MementoUtils.getPageMaxRecord(accessPoint);
      if (pagemax == 0) {
        wbRequest.setResultsPerPage(getMaxRecords());
      } else {
        wbRequest.setResultsPerPage(pagemax);
      }
      return wbRequest;
    }
    return null;
  }
View Full Code Here

     * <p>%-encoded timestamp.</p>
     * @throws Exception
     */
    public void testPathPrefixDatePrefixEncoded() throws Exception {
    {
      WaybackRequest wbr = parse("/web/2010%2A/http://www.yahoo.com/*");
      assertNotNull(wbr);
      assertTrue(wbr.isUrlQueryRequest());
      assertEquals("20100101000000", wbr.getStartTimestamp());
      assertEquals("20101231235959", wbr.getEndTimestamp());
      assertEquals("http://www.yahoo.com/", wbr.getRequestUrl());
    }
    // negative case - %2A doesn't make it path-prefix.
    {
      WaybackRequest wbr = parse("/web/2010%2A/http://www.yahoo.com/%2A");
      assertNotNull(wbr);
      assertTrue(wbr.isCaptureQueryRequest());
      assertEquals("20100101000000", wbr.getStartTimestamp());
      assertEquals("20101231235959", wbr.getEndTimestamp());
      assertEquals("http://www.yahoo.com/%2A", wbr.getRequestUrl());
    }
    }
View Full Code Here

     * start and end timestamp.</p>
     * <p>timerange without "*" is not recognized. it could be.</p>
     */
  public void testPathPrefixDateRange() throws Exception {
    {
      WaybackRequest wbr1 = parse("/web/20100101-20100531*/http://www.yahoo.com/*");
      assertNotNull(wbr1);
      assertTrue(wbr1.isUrlQueryRequest());
      assertEquals("20100101000000", wbr1.getStartTimestamp());
      assertEquals("20100531235959", wbr1.getEndTimestamp());
      assertEquals("http://www.yahoo.com/", wbr1.getRequestUrl());
    }

    // TODO: date range without "*"
  }
 
View Full Code Here

   * %-encoded version.
   * @throws Exception
   */
  public void testPathPrefixdateRangeEncoded() throws Exception {
    {
      WaybackRequest wbr = parse("/web/20100101%2D20100531%2A/http://www.yahoo.com/*");
      assertNotNull(wbr);
      assertTrue(wbr.isUrlQueryRequest());
      assertEquals("20100101000000", wbr.getStartTimestamp());
      assertEquals("20100531235959", wbr.getEndTimestamp());
      assertEquals("http://www.yahoo.com/", wbr.getRequestUrl());
    }
  }
View Full Code Here

   * for non-URL paths.</p>
   * @throws Exception
   */
  public void testPathDateless() throws Exception {
    {
      WaybackRequest wbr = parse("/web/http://www.yahoo.com/");
      checkPathDateless(wbr, "http://www.yahoo.com/");
    }
    {
      WaybackRequest wbr = parse("/web/https://www.yahoo.com/");
      checkPathDateless(wbr, "https://www.yahoo.com/");
    }
    {
      WaybackRequest wbr = parse("/web/http://www.yahoo.com:8080/");
      checkPathDateless(wbr, "http://www.yahoo.com:8080/");
    }
    // some client canonicalizes "//" in path into "/".
    // (why this isn't done for other patterns of requests?)
    {
      WaybackRequest wbr = parse("/web/http:/www.yahoo.com/");
      checkPathDateless(wbr, "http://www.yahoo.com/");
    }
    // doesn't repair "https:/"
    {
      WaybackRequest wbr = parse("/web/https:/www.yahoo.com/");
      //checkPathDateless(wbr, "https://www.yahoo.com/");
      assertNull(wbr);
    }
    // doesn't repair "ftp:/" either.
    {
      WaybackRequest wbr = parse("/web/ftp:/www.yahoo.com/afile");
      assertNull(wbr);
    }
    // scheme-relative - results in NullPointerException FIXME
    {
      try {
        WaybackRequest wbr = parse("/web///www.yahoo.com/");
        assertNull(wbr);
      } catch (NullPointerException ex) {
        // current behavior - FIXME.
      }
    }
    // regular case.
    {
      WaybackRequest wbr = parse("/web/www.yahoo.com/");
      checkPathDateless(wbr, "http://www.yahoo.com/");
    }
    // scheme-less URL with user info is rejected
    // TODO: why is this rejected?
    {
      try {
        @SuppressWarnings("unused")
        WaybackRequest wbr = parse("/web/user@www.yahoo.com/");
        fail("BadQueryException was not thrown");
      } catch (BadQueryException ex) {
        // expected;
      }
    }
    // but it's accepted with scheme.
    // TODO: should this be rejected as well?
    {
      WaybackRequest wbr = parse("/web/http://user@www.yahoo.com/");
      checkPathDateless(wbr, "http://user@www.yahoo.com/");
    }
    // just make sure path and query parts in requestUrl are preserved.
    {
      WaybackRequest wbr = parse("/web/www.yahoo.com/apis?v=2");
      checkPathDateless(wbr, "http://www.yahoo.com/apis?v=2");
    }
    // doesn't look like an URL.
    {
      WaybackRequest wbr = parse("/web/images/foo.gif");
      assertNull(wbr);
    }
    {
      WaybackRequest wbr = parse("/web/handler.php?url=http://www.yahoo.com/");
      assertNull(wbr);
    }
    // TODO: shouldn't this be parsed as dateless URL-Query?
    {
      WaybackRequest wbr = parse("/web/http://www.yahoo.com/*");
      checkPathDateless(wbr, "http://www.yahoo.com/*");
    }
    // ditto
    {
      WaybackRequest wbr = parse("/web/www.yahoo.com/*");
      checkPathDateless(wbr, "http://www.yahoo.com/*");
    }
  }
 
View Full Code Here

  public void testPathDatelessWithDateHeader() throws Exception {
    final String dateHeader = "Thu, 24 Apr 2014 21:15:51 UTC+00:00";
    final Date date = (new SimpleDateFormat("EEE, dd MMM yyyy hh:mm:ss z", Locale.ENGLISH)).parse(dateHeader);
    {
      acceptDatetimeHeader = dateHeader;
      WaybackRequest wbr = parse("/web/http://www.yahoo.com/");
      assertNotNull(wbr);
      assertTrue(wbr.isReplayRequest());
      assertFalse(wbr.isBestLatestReplayRequest());
      assertEquals(EXPECTED_START_TIMESTAMP, wbr.getStartTimestamp());
      assertEquals(EXPECTED_END_TIMESTAMP, wbr.getEndTimestamp());
      assertEquals(date, wbr.getReplayDate());
      assertEquals(date, wbr.getAnchorDate());
    }
    // invalid Accept-Datetime header
    {
      acceptDatetimeHeader = "invalid date";
      try {
        @SuppressWarnings("unused")
        WaybackRequest wbr = parse("/web/http://www.yahoo.com/");
        fail("did not throw exception");
      } catch (TimeGateBadQueryException ex) {
        // expected
      }
    }
    // alternate Accept-Timestamp header. as long as it is valid,
    // invalid value in Accept-Datetime header doesn't cause exception.
    {
      acceptDatetimeHeader = "invalid date";
      acceptTimestampHeader = "20140424211551";
      WaybackRequest wbr = parse("/web/http://www.yahoo.com/");
      assertNotNull(wbr);
      assertTrue(wbr.isReplayRequest());
      assertFalse(wbr.isBestLatestReplayRequest());
      assertEquals(EXPECTED_START_TIMESTAMP, wbr.getStartTimestamp());
      assertEquals(EXPECTED_END_TIMESTAMP, wbr.getEndTimestamp());
      assertEquals(date, wbr.getReplayDate());
      assertEquals(date, wbr.getAnchorDate());
    }
    // invalid value in Accept-Timestamp header is silently ignored,
    // unless Accept-Datetime also has an invalid value.
    {
      acceptDatetimeHeader = null;
      acceptTimestampHeader = "*INVALID*";
      WaybackRequest wbr = parse("/web/http://www.yahoo.com/");
      assertNotNull(wbr);
      assertTrue(wbr.isReplayRequest());
      assertTrue(wbr.isBestLatestReplayRequest());
      assertEquals(EXPECTED_START_TIMESTAMP, wbr.getStartTimestamp());
      assertEquals(EXPECTED_END_TIMESTAMP, wbr.getEndTimestamp());
    }
  }
View Full Code Here

   * some pathological cases.
   * @throws Exception
   */
    public void testPathological() throws Exception {
    {
      WaybackRequest wbr = parse("/web/20100101*30/http://www.yahoo.com/?p=*");
      assertNull(wbr);
    }
    {
      WaybackRequest wbr = parse("/web/*20100101*/http://www.yahoo.com/");
      assertNull(wbr);
    }
    {
      WaybackRequest wbr = parse("/web/20100101*im_/http://www.yahoo.com/a.png");
      assertNull(wbr);
    }
    // TODO: should we accept this?
    {
      WaybackRequest wbr = parse("/web//20100101*/http://www.yahoo.com/");
      assertNull(wbr);
    }
    }
View Full Code Here

        uriConverter = EasyMock.createMock(ResultURIConverter.class);
       
        response = EasyMock.createMock(HttpServletResponse.class);
        EasyMock.expect(response.getOutputStream()).andReturn(servletOutput);
       
        wbRequest = new WaybackRequest();
        wbRequest.setFrameWrapperContext(false);
       
        result = new CaptureSearchResult();
        result.setOriginalUrl("http://www.example.com/");
        result.setCaptureTimestamp("20100101123456");
View Full Code Here

   * @throws BetterRequestException
   */
  public void testParseString() throws Exception {
    BaseRequestParser wrapped = new ArchivalUrlRequestParser();
    ReplayRequestParser p = new ReplayRequestParser(wrapped);
    WaybackRequest r;
    AccessPoint ap = null;
    r = p.parse("",ap);
    assertNull("Should not parse empty string", r);
    r = p.parse("20070101000000/foo.com",ap);
    assertNotNull("Should parse legit request sans scheme", r);
    assertEquals("parsed request Url",r.getRequestUrl(),"http://foo.com");
    assertEquals("Parsed timestamp","20070101000000",r.getReplayTimestamp());

    r = p.parse("20070101000000/foo.com/",ap);
    assertEquals("parsed request Url, maintaining trailing slash",
        "http://foo.com/",r.getRequestUrl());

    r = p.parse("200701010000/foo.com",ap);
    assertEquals("parsed partial date",
        "http://foo.com",r.getRequestUrl());
    assertEquals("Parsed partial timestamp to earliest",
        "20070101000000",r.getReplayTimestamp());

    r = p.parse("20070101000000/http://foo.com",ap);
    assertEquals("parsed request Url with scheme",
        "http://foo.com",r.getRequestUrl());

    r = p.parse("20070101000000/http://foo.com/",ap);
    assertEquals("parsed request Url with scheme and trailing slash",
        "http://foo.com/",r.getRequestUrl());

    r = p.parse("20070101000000/ftp://foo.com/",ap);
    assertEquals("parsed request Url with ftp scheme",
        "ftp://foo.com/",r.getRequestUrl());
   
    r = p.parse("20070101000000/https://foo.com/",ap);
    assertEquals("parsed request Url with https scheme",
        "https://foo.com/",r.getRequestUrl());

    r = p.parse("20070101000000js_/http://foo.com/",ap);
    assertEquals("parsed request Url with js_ flag",
        "http://foo.com/",r.getRequestUrl());
    assertTrue("parsed js_ flag",r.isJSContext());
    assertFalse("css not set",r.isCSSContext());

    r = p.parse("20070101000000cs_/http://foo.com/",ap);
    assertEquals("parsed request Url with cs_ flag",
        "http://foo.com/",r.getRequestUrl());
    assertTrue("parsed cs_ flag",r.isCSSContext());
    assertFalse("js not set",r.isJSContext());

    r = p.parse("20070101000000cs_js_/http://foo.com/",ap);
    assertEquals("parsed request Url with cs_ and js_ flags",
        "http://foo.com/",r.getRequestUrl());
    assertTrue("parsed cs_ flag",r.isCSSContext());
    assertTrue("parsed js_ flag",r.isJSContext());

    r = p.parse("20070101000000js_cs_/http://foo.com/",ap);
    assertEquals("parsed request Url with cs_ and js_ flags, backvards",
        "http://foo.com/",r.getRequestUrl());
    assertTrue("parsed cs_ flag",r.isCSSContext());
    assertTrue("parsed js_ flag",r.isJSContext());

    r = p.parse("20070101000000un_/http://foo.com/",ap);
    assertEquals("parsed request Url with unknown flag",
        "http://foo.com/",r.getRequestUrl());
    assertFalse("no cs_ flag",r.isCSSContext());
    assertFalse("no js_ flag",r.isJSContext());

    r = p.parse("20070101000000un_js_cs_/http://foo.com/",ap);
    assertEquals("parsed request Url with falgs and unknown flag",
        "http://foo.com/",r.getRequestUrl());
    assertTrue("parsed cs_ flag",r.isCSSContext());
    assertTrue("parsed js_ flag",r.isJSContext());

    r = p.parse("20070101000000js_cs_un_/http://foo.com/",ap);
    assertEquals("parsed request Url with falgs and unknown flag at end",
        "http://foo.com/",r.getRequestUrl());
    assertTrue("parsed cs_ flag",r.isCSSContext());
    assertTrue("parsed js_ flag",r.isJSContext());

    r = p.parse("20070101000000un_js_cs_un_/http://foo.com/",ap);
    assertEquals("parsed request Url with falgs and unknown flags",
        "http://foo.com/",r.getRequestUrl());
    assertTrue("parsed cs_ flag",r.isCSSContext());
    assertTrue("parsed js_ flag",r.isJSContext());

  }
View Full Code Here

TOP

Related Classes of org.archive.wayback.core.WaybackRequest

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.