Package org.archive.wayback.core

Examples of org.archive.wayback.core.CaptureSearchResults


    HostMatchFilter hostMatchFilter = getExactHostFilter(wbRequest);

    if (searchType.equals(WaybackConstants.REQUEST_REPLAY_QUERY)
        || searchType.equals(WaybackConstants.REQUEST_CLOSEST_QUERY)) {

      results = new CaptureSearchResults();

      ObjectFilterChain<SearchResult> forwardFilters =
        new ObjectFilterChain<SearchResult>();

//      ObjectFilterChain<SearchResult> reverseFilters =
//        new ObjectFilterChain<SearchResult>();

      // use the same guardrail for both:
      forwardFilters.addFilter(guardrail);
//      reverseFilters.addFilter(guardrail);
     
      forwardFilters.addFilter(new DuplicateRecordFilter());
     
      // match URL key:
      forwardFilters.addFilter(new UrlMatchFilter(keyUrl));
//      reverseFilters.addFilter(new UrlMatchFilter(keyUrl));

      if(hostMatchFilter != null) {
        forwardFilters.addFilter(hostMatchFilter);
//        reverseFilters.addFilter(hostMatchFilter);
      }
     
      // be sure to only include records within the date range we want:
      // The bin search may start the forward filters at a record older
      // than we want. Since the fowardFilters only include an abort
      // endDateFilter, we might otherwise include a record before the
      // requested range.
      DateRangeFilter drFilter = new DateRangeFilter(startDate,endDate);
      forwardFilters.addFilter(drFilter);
//      reverseFilters.addFilter(drFilter);
     
      // abort processing if we hit a date outside the search range:
      forwardFilters.addFilter(new EndDateFilter(endDate));
//      reverseFilters.addFilter(new StartDateFilter(startDate));

      // for replay, do not include records that redirect to
      // themselves.. We'll leave this for both closest and replays,
      // because the only application of closest at the moment is
      // timeline in which case, we don't want to show captures that
      // redirect to themselves in the timeline if they are not viewable.
      SelfRedirectFilter selfRedirectFilter = new SelfRedirectFilter();
      selfRedirectFilter.setCanonicalizer(canonicalizer);
      forwardFilters.addFilter(selfRedirectFilter);
//      reverseFilters.addFilter(selfRedirectFilter);
     
      // possibly filter via exclusions:
      if(exclusion != null) {
        forwardFilters.addFilter(preExCounter);
        forwardFilters.addFilter(exclusion);

//        reverseFilters.addFilter(preExCounter);
//        reverseFilters.addFilter(exclusion);
      }
      forwardFilters.addFilter(finalCounter);
//      reverseFilters.addFilter(finalCounter);

      forwardFilters.addFilter(new WindowEndFilter(resultsPerPage));
//      int resultsPerDirection = (int) Math.floor(resultsPerPage / 2);
//      reverseFilters.addFilter(new WindowEndFilter(resultsPerDirection));

      startKey = keyUrl;

      try {
//        CloseableIterator<SearchResult> reverse =
//          new AdaptedObjectFilterIterator<SearchResult>(
//          source.getPrefixReverseIterator(startKey),
//          reverseFilters);

//        // reverse the reverseResults:
//        ArrayList<SearchResult> reverseResults =
//          new ArrayList<SearchResult>();
//        while(reverse.hasNext()) {
//          reverseResults.add(0, reverse.next());
//        }
       
        // now make a composite of the reverse and forwards:
       
        CloseableIterator<SearchResult> forward =
          source.getPrefixIterator(startKey);
//       
//        CompositeIterator<SearchResult> resultsItr =
//          new CompositeIterator<SearchResult>();
//        resultsItr.addComponent(reverseResults.iterator());
//        resultsItr.addComponent(forward);
       
        // and filter:
//        filterRecords(resultsItr, forwardFilters, results, true);
        filterRecords(forward, forwardFilters, results, true);

      } catch (IOException e) {
        throw new ResourceIndexNotAvailableException(
            e.getLocalizedMessage());
      }

    } else if (searchType.equals(WaybackConstants.REQUEST_URL_QUERY)) {

      results = new CaptureSearchResults();
      // build up the FilterChain(s):
      ObjectFilterChain<SearchResult> filters =
        new ObjectFilterChain<SearchResult>();
      filters.addFilter(guardrail);
      filters.addFilter(new DuplicateRecordFilter());
View Full Code Here


      IOException, LiveDocumentNotAvailableException {
   
    Resource resource = null;
    WaybackRequest wbRequest = makeCacheWBRequest(url,maxCacheMS,bUseOlder);
   
    CaptureSearchResults results = null;
    try {
      SearchResults gresults = index.query(wbRequest);
      if(!(gresults instanceof CaptureSearchResults)) {
        throw new IOException("bad result type...");
      }
      results = (CaptureSearchResults) gresults;
    } catch (ResourceNotInArchiveException e) {
//      e.printStackTrace();
      throw e;
    } catch (WaybackException e) {
      e.printStackTrace();
      throw new IOException(e.getMessage());
    }
    SearchResult result = results.getClosest(wbRequest);
    if(result != null) {
      if(isForgedFailedSearchResult(result)) {
        if(isForgedFailRecentEnough(result)) {
          LOGGER.info(url.toString() + " has failed recently");
          throw new LiveDocumentNotAvailableException("failed prev");
View Full Code Here

      ObjectFilter<SearchResult> filter) {
    SearchResults results = null;
    NodeList filters = getRequestFilters(document);
    String resultsType = getResultsType(document);
    if(resultsType.equals(WaybackConstants.RESULTS_TYPE_CAPTURE)) {
      results = new CaptureSearchResults();
    } else {
      results = new UrlSearchResults();
    }
    for(int i = 0; i < filters.getLength(); i++) {
      String key = filters.item(i).getNodeName();
View Full Code Here

    try {
      SearchResults results = collection.getResourceIndex().query(wbRequest);
      if(!(results instanceof CaptureSearchResults)) {
        throw new ResourceNotAvailableException("Bad results...");
      }
      CaptureSearchResults captureResults = (CaptureSearchResults) results;
 
      // TODO: check which versions are actually accessible right now?
      SearchResult closest = captureResults.getClosest(wbRequest);
      resource = collection.getResourceStore().retrieveResource(closest);
 
      replay.renderResource(httpRequest, httpResponse, wbRequest,
          closest, resource, uriConverter, captureResults);
    } catch(WaybackException e) {
View Full Code Here

    try {
      SearchResults results = collection.getResourceIndex().query(wbRequest);
      if(results.getResultsType().equals(
          WaybackConstants.RESULTS_TYPE_CAPTURE)) {
        CaptureSearchResults cResults = (CaptureSearchResults) results;
        SearchResult closest = cResults.getClosest(wbRequest);
        closest.put(WaybackConstants.RESULT_CLOSEST_INDICATOR,
            WaybackConstants.RESULT_CLOSEST_VALUE);
        query.renderUrlResults(httpRequest,httpResponse,wbRequest,
            results,uriConverter);
View Full Code Here

    SearchResults results;
    String type = wbRequest.get(WaybackConstants.REQUEST_TYPE);
    if(type.equals(WaybackConstants.REQUEST_REPLAY_QUERY) ||
        type.equals(WaybackConstants.REQUEST_URL_QUERY)) {
      results = new CaptureSearchResults();     
    } else {
      // TODO: this is wrong, but needs exploration into what NutchWax can actually do.
      throw new BadQueryException("Unable to perform path prefix requests with this index type");
    }
    NodeList channel = getSearchChannel(document);
View Full Code Here

      e.printStackTrace();
      throw new ResourceIndexNotAvailableException("Unexpected SAX: " +
          e.getMessage());
    }

    CaptureSearchResults results;
    if(wbRequest.isReplayRequest() || wbRequest.isCaptureQueryRequest()) {
      results = new CaptureSearchResults();     
    } else {
      // TODO: this is wrong, but needs exploration into what NutchWax
      //       can actually do.
      throw new BadQueryException("Unable to perform path " +
          "prefix requests with this index type");
    }
    NodeList channel = getSearchChannel(document);
    NodeList nodes = getSearchItems(document);

    if (channel == null || channel.getLength() != 1) {
      // TODO: better error for user:
         throw new ResourceNotInArchiveException("No results for " +
             requestUrl);
       }

       if (nodes == null) {
      // TODO: better error for user:
         throw new ResourceNotInArchiveException("No results for " +
             requestUrl);
       }

       for (int i = 0; i < nodes.getLength(); i++) {
        
           Element e = (Element) nodes.item(i);

           List<CaptureSearchResult> resultsList = itemToSearchResults(e);
           if(resultsList != null) {
             for(CaptureSearchResult result : resultsList) {
               results.addSearchResult(result);
             }
           }
       }
       Element channelElement = (Element) channel.item(0);
      
       results.putFilter(SearchResults.RESULTS_FIRST_RETURNED,
           getNodeContent(channelElement,NUTCH_FIRST_RESULT));
      
       results.putFilter(SearchResults.RESULTS_NUM_RESULTS,
           getNodeContent(channelElement,NUTCH_NUM_RESULTS));
      
       results.putFilter(SearchResults.RESULTS_NUM_RETURNED,
           getNodeContent(channelElement,NUTCH_NUM_RETURNED));
      
       results.putFilter(SearchResults.RESULTS_REQUESTED,
           String.valueOf(wbRequest.getResultsPerPage()));
      
    results.putFilter(WaybackRequest.REQUEST_START_DATE,
        Timestamp.earliestTimestamp().getDateStr());
   
       results.putFilter(WaybackRequest.REQUEST_END_DATE,
           Timestamp.latestTimestamp().getDateStr());
    return results;
  }
View Full Code Here

        urlKey += " " + replayTimestamp;
      }
    }

    // the CaptureSearchResults we are about to return:
    CaptureSearchResults results = new CaptureSearchResults();
    // the various filters to apply to the results:
    ObjectFilterChain<CaptureSearchResult> filters =
      new ObjectFilterChain<CaptureSearchResult>();

    // Groupings of filters for... sanity and summary annotation of results:
    // Windows:
    WindowFilterGroup<CaptureSearchResult> window =
      new WindowFilterGroup<CaptureSearchResult>(wbRequest,this);
    List<CaptureFilterGroup> groups = getRequestFilterGroups(wbRequest);
    if(filter != null) {
      filters.addFilter(filter);
    }

    for(CaptureFilterGroup cfg : groups) {
      filters.addFilters(cfg.getFilters());
    }
    filters.addFilters(window.getFilters());
   
    CloseableIterator<CaptureSearchResult> itr = null;
   
    try {
      PerfStats.timeStart(PerfStat.IndexLoad);
     
      itr = new ObjectFilterIterator<CaptureSearchResult>(source.getPrefixIterator(urlKey),filters);
     
      while(itr.hasNext()) {
        results.addSearchResult(itr.next());
      }
    } catch(RuntimeIOException e) {
      throw new ResourceIndexNotAvailableException(e.getLocalizedMessage());
    } finally {
      if (itr != null) {
View Full Code Here

    return results;
  }
  private CaptureSearchResults documentToCaptureSearchResults(
      Document document, ObjectFilter<CaptureSearchResult> filter)
  throws ResourceNotInArchiveException {
    CaptureSearchResults results = new CaptureSearchResults();
    NodeList xresults = getSearchResults(document);
    int numAdded = 0;
    for(int i = 0; i < xresults.getLength(); i++) {
      Node xresult = xresults.item(i);
      CaptureSearchResult result = searchElementToCaptureSearchResult(xresult);
     
      int ruling = ObjectFilter.FILTER_INCLUDE;
      if (filter != null) {
        ruling = filter.filterObject(result);
      }
     
      if (ruling == ObjectFilter.FILTER_ABORT) {
        break;
      } else if (ruling == ObjectFilter.FILTER_INCLUDE) {
        numAdded++;
        results.addSearchResult(result, true);
      }
    }
    if(numAdded == 0) {
      throw new ResourceNotInArchiveException("No documents matching" +
          " filter");
View Full Code Here

     * @param closestIndex 0-based index of resource to be marked as <i>closest</i>
     * @param resources sequence of WarcResources
     * @return CaptureSearchResults built
     */
    protected CaptureSearchResults setupCaptures(int closestIndex, Resource... resources) throws Exception {
        CaptureSearchResults results = new CaptureSearchResults();
        for (Resource res : resources) {
            CaptureSearchResult result = new CaptureSearchResult();
            // TODO: Resource should have methods for accessing URI and date
            if (res instanceof WarcResource) {
                // TODO: want to use WARCRecordToSearchResultAdapter? WarcResource
                // has no method to retrieve underlining WARCRecord.
                ArchiveRecordHeader h = ((WarcResource)res).getWarcHeaders();
                String originalUrl = h.getUrl();
                String ts = (String)h.getHeaderValue("WARC-Date");
                // WARC-Date is in ISOZ format.
                ts = transformWARCDate(ts);
                result.setOriginalUrl(originalUrl);
                result.setCaptureTimestamp(ts);
                result.setOffset(0);
                // this is (W)ARC file name in real practice. here we use
                // DT14 timestamp as pseudo filename (.warc.gz suffix is not
                // essential).
                result.setFile(ts + ".warc.gz");
            } else if (res instanceof ArcResource) {
                // TODO: should use ARCRecordToSearchResultAdapter? ArcResource has
                // getArcRecord() methods whose result may be cast to ARCRecord.
                // NB: ArcResource#getARCMetadata() creates a new Map object.
                Map<String, String> meta = ((ArcResource)res).getARCMetadata();
                String originalUrl = meta.get(ArchiveFileConstants.URL_FIELD_KEY);
                String ts = meta.get(ArchiveFileConstants.DATE_FIELD_KEY);
                result.setOriginalUrl(originalUrl);
                result.setCaptureTimestamp(ts);
            } else {
                throw new AssertionError("unexpected Resource type: " + res.getClass());
            }
            result.setHttpCode(Integer.toString(res.getStatusCode()));
            // CaptureSearchResultMatcher fails without this, but actual value does not
            // matter. so set it to 0.
            result.setOffset(0);
            assertTrue("invalid timestamp " + result.getCaptureTimestamp(),
                    validTimestamp(result.getCaptureTimestamp()));
            if (closestIndex == 0) {
                result.setClosest(true);
                results.setClosest(result);
                EasyMock.expect(replay.getClosest(wbRequest, results)).andReturn(result);
            }
           
            // Note AccessPoint passes a copy of CaptureSearchResult in some case (ex. Replay_Revisit() test).
            // so we need to use custom argument matcher.
            EasyMock.expect(resourceStore.retrieveResource(eqCaptureSearchResult(result))).andReturn(res).anyTimes();
           
            results.addSearchResult(result);
            --closestIndex;
        }
        EasyMock.expect(resourceIndex.query(wbRequest)).andReturn(results);
       
        return results;
View Full Code Here

TOP

Related Classes of org.archive.wayback.core.CaptureSearchResults

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.