Examples of DuplicateRecordFilter


Examples of org.archive.wayback.resourceindex.filters.DuplicateRecordFilter

      // use the same guardrail for both:
      forwardFilters.addFilter(guardrail);
//      reverseFilters.addFilter(guardrail);
     
      forwardFilters.addFilter(new DuplicateRecordFilter());
     
      // match URL key:
      forwardFilters.addFilter(new UrlMatchFilter(keyUrl));
//      reverseFilters.addFilter(new UrlMatchFilter(keyUrl));

      if(hostMatchFilter != null) {
        forwardFilters.addFilter(hostMatchFilter);
//        reverseFilters.addFilter(hostMatchFilter);
      }
     
      // be sure to only include records within the date range we want:
      // The bin search may start the forward filters at a record older
      // than we want. Since the fowardFilters only include an abort
      // endDateFilter, we might otherwise include a record before the
      // requested range.
      DateRangeFilter drFilter = new DateRangeFilter(startDate,endDate);
      forwardFilters.addFilter(drFilter);
//      reverseFilters.addFilter(drFilter);
     
      // abort processing if we hit a date outside the search range:
      forwardFilters.addFilter(new EndDateFilter(endDate));
//      reverseFilters.addFilter(new StartDateFilter(startDate));

      // for replay, do not include records that redirect to
      // themselves.. We'll leave this for both closest and replays,
      // because the only application of closest at the moment is
      // timeline in which case, we don't want to show captures that
      // redirect to themselves in the timeline if they are not viewable.
      SelfRedirectFilter selfRedirectFilter = new SelfRedirectFilter();
      selfRedirectFilter.setCanonicalizer(canonicalizer);
      forwardFilters.addFilter(selfRedirectFilter);
//      reverseFilters.addFilter(selfRedirectFilter);
     
      // possibly filter via exclusions:
      if(exclusion != null) {
        forwardFilters.addFilter(preExCounter);
        forwardFilters.addFilter(exclusion);

//        reverseFilters.addFilter(preExCounter);
//        reverseFilters.addFilter(exclusion);
      }
      forwardFilters.addFilter(finalCounter);
//      reverseFilters.addFilter(finalCounter);

      forwardFilters.addFilter(new WindowEndFilter(resultsPerPage));
//      int resultsPerDirection = (int) Math.floor(resultsPerPage / 2);
//      reverseFilters.addFilter(new WindowEndFilter(resultsPerDirection));

      startKey = keyUrl;

      try {
//        CloseableIterator<SearchResult> reverse =
//          new AdaptedObjectFilterIterator<SearchResult>(
//          source.getPrefixReverseIterator(startKey),
//          reverseFilters);

//        // reverse the reverseResults:
//        ArrayList<SearchResult> reverseResults =
//          new ArrayList<SearchResult>();
//        while(reverse.hasNext()) {
//          reverseResults.add(0, reverse.next());
//        }
       
        // now make a composite of the reverse and forwards:
       
        CloseableIterator<SearchResult> forward =
          source.getPrefixIterator(startKey);
//       
//        CompositeIterator<SearchResult> resultsItr =
//          new CompositeIterator<SearchResult>();
//        resultsItr.addComponent(reverseResults.iterator());
//        resultsItr.addComponent(forward);
       
        // and filter:
//        filterRecords(resultsItr, forwardFilters, results, true);
        filterRecords(forward, forwardFilters, results, true);

      } catch (IOException e) {
        throw new ResourceIndexNotAvailableException(
            e.getLocalizedMessage());
      }

    } else if (searchType.equals(WaybackConstants.REQUEST_URL_QUERY)) {

      results = new CaptureSearchResults();
      // build up the FilterChain(s):
      ObjectFilterChain<SearchResult> filters =
        new ObjectFilterChain<SearchResult>();
      filters.addFilter(guardrail);
      filters.addFilter(new DuplicateRecordFilter());

      filters.addFilter(new UrlMatchFilter(keyUrl));
      if(hostMatchFilter != null) {
        filters.addFilter(hostMatchFilter);
      }
      filters.addFilter(new EndDateFilter(endDate));
      // possibly filter via exclusions:
      if (exclusion != null) {
        filters.addFilter(preExCounter);
        filters.addFilter(exclusion);
      }
      filters.addFilter(finalCounter);
      // OPTIMIZ: beginning the search at the startDate causes problems
      // with deduplicated results. We need to be smarter about rolling
      // backwards a ways if we start on a deduped record.
//      startKey = keyUrl + " " + startDate;
      startKey = keyUrl + " ";

      // add the start and end windowing filters:
      filters.addFilter(new WindowStartFilter(startResult));
      filters.addFilter(new WindowEndFilter(resultsPerPage));
      try {
        filterRecords(source.getPrefixIterator(startKey), filters, results,
            true);
      } catch (IOException e) {
        throw new ResourceIndexNotAvailableException(
            e.getLocalizedMessage());
      }
     

    } else if (searchType.equals(WaybackConstants.REQUEST_URL_PREFIX_QUERY)) {

      results = new UrlSearchResults();
      // build up the FilterChain(s):
      ObjectFilterChain<SearchResult> filters =
        new ObjectFilterChain<SearchResult>();
      filters.addFilter(guardrail);
      filters.addFilter(new DuplicateRecordFilter());

      filters.addFilter(new UrlPrefixMatchFilter(keyUrl));
      if(hostMatchFilter != null) {
        filters.addFilter(hostMatchFilter);
      }
View Full Code Here

Examples of org.archive.wayback.resourceindex.filters.DuplicateRecordFilter

  private static String ALEXA_DAT_MIME = "alexa/dat";

  public CoreCaptureFilterGroup(LocalResourceIndex index) {
    chain = new ObjectFilterChain<CaptureSearchResult>();
    chain.addFilter(new GuardRailFilter(index.getMaxRecords()));
    chain.addFilter(new DuplicateRecordFilter());

    MimeTypeFilter mimeExcludeFilter = new MimeTypeFilter();
    mimeExcludeFilter.addMime(ALEXA_DAT_MIME);
    mimeExcludeFilter.setIncludeIfContains(false);
    chain.addFilter(new UserInfoInAuthorityFilter());
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.