Package org.archive.wayback.exception

Examples of org.archive.wayback.exception.BadQueryException


      String defaultValue) throws BadQueryException {

    String value = wbRequest.get(field);
    if (value == null) {
      if (defaultValue == null) {
        throw new BadQueryException("No " + field + " specified");
      } else {
        value = defaultValue;
      }
    }
    return value;
View Full Code Here


    int resultsPerPage = wbRequest.getResultsPerPage();
    int pageNum = wbRequest.getPageNum();
    startResult = (pageNum - 1) * resultsPerPage;

    if (resultsPerPage < 1) {
      throw new BadQueryException("resultsPerPage cannot be < 1");
    }
    if (resultsPerPage > maxRecords) {
      throw new BadQueryException("resultsPerPage cannot be > "
          + maxRecords);
    }
    if (pageNum < 1) {
      throw new BadQueryException("pageNum must be > 0");
    }

    String searchUrl = getRequired(wbRequest, WaybackConstants.REQUEST_URL);
    String searchType = getRequired(wbRequest,
        WaybackConstants.REQUEST_TYPE);
    String startDate = getRequired(wbRequest,
        WaybackConstants.REQUEST_START_DATE, Timestamp
            .earliestTimestamp().getDateStr());
    String endDate = getRequired(wbRequest,
        WaybackConstants.REQUEST_END_DATE, Timestamp.latestTimestamp()
            .getDateStr());
    String exactDate = getRequired(wbRequest,
        WaybackConstants.REQUEST_EXACT_DATE, Timestamp
            .latestTimestamp().getDateStr());

    try {
      keyUrl = canonicalizer.urlStringToKey(searchUrl);
    } catch (URIException e) {
      throw new BadQueryException("invalid "
          + WaybackConstants.REQUEST_URL + " " + searchUrl);
    }

    // set up the common Filters:

    // makes sure we don't inspect too many records: prevents DOS
    GuardRailFilter guardrail = new GuardRailFilter(maxRecords);

    // checks an exclusion service for every matching record
    ObjectFilter<SearchResult> exclusion = wbRequest.getExclusionFilter();

    // count how many results got to the ExclusionFilter:
    CounterFilter preExCounter = new CounterFilter();
    // count how many results got past the ExclusionFilter, or how
    // many total matched, if there was no ExclusionFilter:
    CounterFilter finalCounter = new CounterFilter();
   
    // has the user asked for only results on the exact host specified?
    HostMatchFilter hostMatchFilter = getExactHostFilter(wbRequest);

    if (searchType.equals(WaybackConstants.REQUEST_REPLAY_QUERY)
        || searchType.equals(WaybackConstants.REQUEST_CLOSEST_QUERY)) {

      results = new CaptureSearchResults();

      ObjectFilterChain<SearchResult> forwardFilters =
        new ObjectFilterChain<SearchResult>();

//      ObjectFilterChain<SearchResult> reverseFilters =
//        new ObjectFilterChain<SearchResult>();

      // use the same guardrail for both:
      forwardFilters.addFilter(guardrail);
//      reverseFilters.addFilter(guardrail);
     
      forwardFilters.addFilter(new DuplicateRecordFilter());
     
      // match URL key:
      forwardFilters.addFilter(new UrlMatchFilter(keyUrl));
//      reverseFilters.addFilter(new UrlMatchFilter(keyUrl));

      if(hostMatchFilter != null) {
        forwardFilters.addFilter(hostMatchFilter);
//        reverseFilters.addFilter(hostMatchFilter);
      }
     
      // be sure to only include records within the date range we want:
      // The bin search may start the forward filters at a record older
      // than we want. Since the fowardFilters only include an abort
      // endDateFilter, we might otherwise include a record before the
      // requested range.
      DateRangeFilter drFilter = new DateRangeFilter(startDate,endDate);
      forwardFilters.addFilter(drFilter);
//      reverseFilters.addFilter(drFilter);
     
      // abort processing if we hit a date outside the search range:
      forwardFilters.addFilter(new EndDateFilter(endDate));
//      reverseFilters.addFilter(new StartDateFilter(startDate));

      // for replay, do not include records that redirect to
      // themselves.. We'll leave this for both closest and replays,
      // because the only application of closest at the moment is
      // timeline in which case, we don't want to show captures that
      // redirect to themselves in the timeline if they are not viewable.
      SelfRedirectFilter selfRedirectFilter = new SelfRedirectFilter();
      selfRedirectFilter.setCanonicalizer(canonicalizer);
      forwardFilters.addFilter(selfRedirectFilter);
//      reverseFilters.addFilter(selfRedirectFilter);
     
      // possibly filter via exclusions:
      if(exclusion != null) {
        forwardFilters.addFilter(preExCounter);
        forwardFilters.addFilter(exclusion);

//        reverseFilters.addFilter(preExCounter);
//        reverseFilters.addFilter(exclusion);
      }
      forwardFilters.addFilter(finalCounter);
//      reverseFilters.addFilter(finalCounter);

      forwardFilters.addFilter(new WindowEndFilter(resultsPerPage));
//      int resultsPerDirection = (int) Math.floor(resultsPerPage / 2);
//      reverseFilters.addFilter(new WindowEndFilter(resultsPerDirection));

      startKey = keyUrl;

      try {
//        CloseableIterator<SearchResult> reverse =
//          new AdaptedObjectFilterIterator<SearchResult>(
//          source.getPrefixReverseIterator(startKey),
//          reverseFilters);

//        // reverse the reverseResults:
//        ArrayList<SearchResult> reverseResults =
//          new ArrayList<SearchResult>();
//        while(reverse.hasNext()) {
//          reverseResults.add(0, reverse.next());
//        }
       
        // now make a composite of the reverse and forwards:
       
        CloseableIterator<SearchResult> forward =
          source.getPrefixIterator(startKey);
//       
//        CompositeIterator<SearchResult> resultsItr =
//          new CompositeIterator<SearchResult>();
//        resultsItr.addComponent(reverseResults.iterator());
//        resultsItr.addComponent(forward);
       
        // and filter:
//        filterRecords(resultsItr, forwardFilters, results, true);
        filterRecords(forward, forwardFilters, results, true);

      } catch (IOException e) {
        throw new ResourceIndexNotAvailableException(
            e.getLocalizedMessage());
      }

    } else if (searchType.equals(WaybackConstants.REQUEST_URL_QUERY)) {

      results = new CaptureSearchResults();
      // build up the FilterChain(s):
      ObjectFilterChain<SearchResult> filters =
        new ObjectFilterChain<SearchResult>();
      filters.addFilter(guardrail);
      filters.addFilter(new DuplicateRecordFilter());

      filters.addFilter(new UrlMatchFilter(keyUrl));
      if(hostMatchFilter != null) {
        filters.addFilter(hostMatchFilter);
      }
      filters.addFilter(new EndDateFilter(endDate));
      // possibly filter via exclusions:
      if (exclusion != null) {
        filters.addFilter(preExCounter);
        filters.addFilter(exclusion);
      }
      filters.addFilter(finalCounter);
      // OPTIMIZ: beginning the search at the startDate causes problems
      // with deduplicated results. We need to be smarter about rolling
      // backwards a ways if we start on a deduped record.
//      startKey = keyUrl + " " + startDate;
      startKey = keyUrl + " ";

      // add the start and end windowing filters:
      filters.addFilter(new WindowStartFilter(startResult));
      filters.addFilter(new WindowEndFilter(resultsPerPage));
      try {
        filterRecords(source.getPrefixIterator(startKey), filters, results,
            true);
      } catch (IOException e) {
        throw new ResourceIndexNotAvailableException(
            e.getLocalizedMessage());
      }
     

    } else if (searchType.equals(WaybackConstants.REQUEST_URL_PREFIX_QUERY)) {

      results = new UrlSearchResults();
      // build up the FilterChain(s):
      ObjectFilterChain<SearchResult> filters =
        new ObjectFilterChain<SearchResult>();
      filters.addFilter(guardrail);
      filters.addFilter(new DuplicateRecordFilter());

      filters.addFilter(new UrlPrefixMatchFilter(keyUrl));
      if(hostMatchFilter != null) {
        filters.addFilter(hostMatchFilter);
      }
      filters.addFilter(new DateRangeFilter(startDate, endDate));
      // possibly filter via exclusions:
      if (exclusion != null) {
        filters.addFilter(preExCounter);
        filters.addFilter(exclusion);
      }
      filters.addFilter(new CaptureToUrlResultFilter());
      filters.addFilter(finalCounter);
      startKey = keyUrl;

      // add the start and end windowing filters:
      filters.addFilter(new WindowStartFilter(startResult));
      filters.addFilter(new WindowEndFilter(resultsPerPage));
      try {
        filterRecords(source.getPrefixIterator(startKey), filters, results,
            true);
      } catch (IOException e) {
        throw new ResourceIndexNotAvailableException(
            e.getLocalizedMessage());
      }

    } else {
      throw new BadQueryException("Unknown query type(" + searchType
          + "), must be " + WaybackConstants.REQUEST_REPLAY_QUERY
          + ", " + WaybackConstants.REQUEST_CLOSEST_QUERY + ", "
          + WaybackConstants.REQUEST_URL_QUERY + ", or "
          + WaybackConstants.REQUEST_URL_PREFIX_QUERY);
    }
View Full Code Here

      if(errTitle == null) {
        throw new ResourceIndexNotAvailableException("Unknown error!");
      } else if(errTitle.equals("Resource Not In Archive")) {
        throw new ResourceNotInArchiveException(errMessage);
      } else if(errTitle.equals("Bad Query Exception")) {
        throw new BadQueryException(errMessage);
      } else if(errTitle.equals("Resource Index Not Available Exception")) {
        throw new ResourceIndexNotAvailableException(errMessage);
      } else if(errTitle.equals("Access Control Exception")) {
        throw new AccessControlException(errMessage);
      } else {
View Full Code Here

    }

 
    String searchUrl = wbRequest.get(WaybackConstants.REQUEST_URL);
    if (searchUrl == null) {
      throw new BadQueryException("No " + WaybackConstants.REQUEST_URL
          + " specified");
    }

    try {
      keyUrl = canonicalizer.urlStringToKey(searchUrl);
    } catch (URIException e) {
      throw new BadQueryException("invalid "
          + WaybackConstants.REQUEST_URL + " " + searchUrl);
    }
    RangeGroup dummy = new RangeGroup("",keyUrl,"");
    int loc = Arrays.binarySearch(groups,dummy,comparator);
    if(loc < 0) {
View Full Code Here

  protected static String getRequiredMapParam(Map<String,String[]> queryMap,
      String field)
  throws BadQueryException {
    String value = getMapParam(queryMap,field);
    if(value == null) {
      throw new BadQueryException("missing field " + field);
    }
    if(value.length() == 0) {
      throw new BadQueryException("empty field " + field);     
    }
    return value;
  }
View Full Code Here

    String[] parts = WHITESPACE_PATTERN.split(query);
    for (int i = 0; i < parts.length; i++) {
      String token = parts[i];
      int colonIndex = token.indexOf(':');
      if (colonIndex == -1) {
        throw new BadQueryException("Bad search token(" + token + ")");
      }
      String key = token.substring(0, colonIndex);
      String value = token.substring(colonIndex + 1);
      // TODO: make sure key is in singleTokens?
      // let's just let em all thru for now:
View Full Code Here

    if(type.equals(WaybackConstants.REQUEST_REPLAY_QUERY) ||
        type.equals(WaybackConstants.REQUEST_URL_QUERY)) {
      results = new CaptureSearchResults();     
    } else {
      // TODO: this is wrong, but needs exploration into what NutchWax can actually do.
      throw new BadQueryException("Unable to perform path prefix requests with this index type");
    }
    NodeList channel = getSearchChannel(document);
    NodeList nodes = getSearchItems(document);

    if (channel == null || channel.getLength() != 1) {
View Full Code Here

         existStartDate=false; // BUG 120608
         startDateStr = Timestamp.earliestTimestamp().getDateStr();
       }
       int hitsPerPage = wbRequest.getResultsPerPage();
       if(hitsPerPage < 1) {
         throw new BadQueryException("Hits per page must be positive");
       }
       if(hitsPerPage > maxRecords) {
         throw new BadQueryException("Hits per page must be less than " +
             maxRecords);
       }
       int start = (wbRequest.getPageNum()-1) * hitsPerPage;
      
     /* BUG 0000155 */
        String multDet = wbRequest.get(WaybackConstants.REQUEST_MULT_DETAILS);
     String docId = wbRequest.get(WaybackConstants.REQUEST_DOC_ID);
     String indexId = wbRequest.get(WaybackConstants.REQUEST_INDEX_ID);
     /* BUG 0000155 */                          
    
       if ((urlStr==null || urlStr.length()<=0) && (docId==null || indexId==null)) {
           throw new BadQueryException("Url is empty.");
       }
       // Construct the search url.
       MutableString ms = new MutableString(this.searchUrlBase)
           .append("?query=");
       // Add 'date:...+' to query string.   
       if (existStartDate || existEndDate) { // BUG wayback 0000051;  if exist startDate OR endDate
         ms.append("date%3A").append(startDateStr).append('-').append(endDateStr);  
       }    
       else if (exactDateStr!=null) { // BUG wayback 0000153
         ms.append("closestdate%3A").append(exactDateStr);
       }
      
       ms.append('+');
       // Add 'url:URL'.
       if(wbRequest.get(WaybackConstants.REQUEST_TYPE).equals(
                  WaybackConstants.REQUEST_URL_PREFIX_QUERY)) {
           ms.append("url%3A").append(urlStr);
       } else {
           try {       
           if (docId!=null && indexId!=null) {
             // do nothing
           }
           else if (wbRequest.get(WaybackConstants.REQUEST_ALIASES)!=null && wbRequest.get(WaybackConstants.REQUEST_ALIASES).equals("true")) {
                ms.append("exacturlexpand%3A").append(java.net.URLEncoder.encode(urlStr, "UTF-8"));
           }
           else {                        
            URL url=null;
            boolean error=false;
            try {
              url=new URL(urlStr);
            }
            catch (MalformedURLException e) {
              error=true;
            }
            
            if (!error && !urlStr.endsWith("/") && url.getQuery()==null && url.getPath().indexOf('.')==-1) { // BUG nutchwax 0000357 - add also a "/" if the url's query is null and is not a file
              ms.append("exacturlexpandmin%3A").append(java.net.URLEncoder.encode(urlStr, "UTF-8"));
            }
            else {            
              ms.append("exacturl%3A").append(java.net.URLEncoder.encode(urlStr, "UTF-8"));
            }
           }
           }
           catch (UnsupportedEncodingException e) {
             throw new BadQueryException(e.toString());
           }
           catch (NullPointerException e) {
               throw new BadQueryException(e.toString());
           }
       }
       ms.append("&hitsPerPage=").append(hitsPerPage);
       ms.append("&start=").append(start);
       ms.append("&dedupField=site");       
View Full Code Here

    if(wbRequest.isReplayRequest() || wbRequest.isCaptureQueryRequest()) {
      results = new CaptureSearchResults();     
    } else {
      // TODO: this is wrong, but needs exploration into what NutchWax
      //       can actually do.
      throw new BadQueryException("Unable to perform path " +
          "prefix requests with this index type");
    }
    NodeList channel = getSearchChannel(document);
    NodeList nodes = getSearchItems(document);
View Full Code Here

       if (startDateStr == null || startDateStr.length() == 0) {
         startDateStr = Timestamp.earliestTimestamp().getDateStr();
       }
       int hitsPerPage = wbRequest.getResultsPerPage();
       if(hitsPerPage < 1) {
         throw new BadQueryException("Hits per page must be positive");
       }
       if(hitsPerPage > maxRecords) {
         throw new BadQueryException("Hits per page must be less than " +
             maxRecords);
       }
       int start = (wbRequest.getPageNum()-1) * hitsPerPage;
       if (urlStr == null || urlStr.length() <= 0) {
           throw new BadQueryException("Url is empty.");
       }
       // Construct the search url.
       MutableString ms = new MutableString(this.searchUrlBase)
           .append("?query=");
       // Add 'date:...+' to query string.
       ms.append("date%3A").append(startDateStr).append('-').append(endDateStr);
       ms.append('+');
       // Add 'url:URL'.
       if(wbRequest.isUrlQueryRequest()) {
           ms.append("url%3A");
       } else {
           ms.append("exacturl%3A");
       }
       try {
            ms.append(java.net.URLEncoder.encode("\""+urlStr+"\"", "UTF-8"));
       } catch (UnsupportedEncodingException e) {
         throw new BadQueryException(e.toString());
       }
       ms.append("&hitsPerPage=").append(hitsPerPage);
       ms.append("&start=").append(start);
       ms.append("&dedupField=site");
       // As we are always searching agains an url, a
View Full Code Here

TOP

Related Classes of org.archive.wayback.exception.BadQueryException

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.