Package org.archive.wayback.exception

Examples of org.archive.wayback.exception.ResourceNotAvailableException


      throws ResourceNotAvailableException {
   
    String filename = result.getFile();

    if (filename == null || filename.isEmpty()) {
      throw new ResourceNotAvailableException("No ARC/WARC name in search result...", filename);
    }
   
    Resource resource = null;
    boolean breakOnErr = false;
   
    StringBuilder excMsg = new StringBuilder();
    IOException lastExc = null;
       
    for (SourceResolver resolver : sources) {
     
      String[] paths = null;
     
      try {
        paths = resolver.lookupPath(filename);
      } catch (IOException io) {
        if (excMsg.length() > 0) {
          excMsg.append(" ");
        }
        excMsg.append(io.getMessage());
        lastExc = io;
       
        if (failOnFirstUnavailable) {
          breakOnErr = true;
          break;
        }
      }
     
      if (paths.length == 0) {
        continue;
      }
     
      for (String path : paths) {
        try {
          resource = getResource(path, result);
         
          if (resource != null) {
            return resource;
          }
         
        } catch (IOException io) {
          if (excMsg.length() > 0) {
            excMsg.append(" ");
          }
          excMsg.append(io.getMessage());
          lastExc = io;
         
          if (failOnFirstUnavailable) {
            breakOnErr = true;
            break;
          }
        }
      }
     
      if (breakOnErr) {
        break;
      }
    }
   
    if (lastExc == null) {
      lastExc = new FileNotFoundException(filename);
      excMsg.append("File not Found: " + filename);
    }

    ResourceNotAvailableException rnae = new ResourceNotAvailableException(excMsg.toString(), filename, lastExc);
    throw rnae;
  }
View Full Code Here


        r = WARCArchiveRecordToResource(reader.get(), reader);
      }
    else
      {
      is.close();
        throw new ResourceNotAvailableException("Unknown extension");
      }
   
    return r;
  }
View Full Code Here

      r = WARCArchiveRecordToResource(reader.get(), reader);

    } else {
      is.close();
      raf.close();
      throw new ResourceNotAvailableException("Unknown extension");
    }

    return r;
  }
View Full Code Here

    } else if(reader instanceof WARCReader) {
      WARCReader wreader = (WARCReader) reader;
      r = WARCArchiveRecordToResource(wreader.get(),wreader);
     
    } else {
      throw new ResourceNotAvailableException("Unknown ArchiveReader");
    }
    long elapsed = System.currentTimeMillis() - start;
    PerformanceLogger.noteElapsed("Http11Resource", elapsed, url.toExternalForm());
    return r;
  }
View Full Code Here

 
  public static Resource ARCArchiveRecordToResource(ArchiveRecord rec,
      ArchiveReader reader) throws ResourceNotAvailableException, IOException {

    if (!(rec instanceof ARCRecord)) {
      throw new ResourceNotAvailableException("Bad ARCRecord format");
    }
    ArcResource ar = new ArcResource((ARCRecord) rec, reader);
    ar.parseHeaders();
    return ar;
  }
View Full Code Here

  public static Resource WARCArchiveRecordToResource(ArchiveRecord rec,
      ArchiveReader reader) throws ResourceNotAvailableException, IOException {

    if (!(rec instanceof WARCRecord)) {
      throw new ResourceNotAvailableException("Bad WARCRecord format");
    }
    WarcResource wr = new WarcResource((WARCRecord) rec, reader);
    wr.parseHeaders();
    return wr;
  }
View Full Code Here

    if (GzipReader.isGzipped(r.pbin)) {
      r.gzipReader = new GzipReader(r.pbin);
      if ( (r.gzipEntry = r.gzipReader.getNextEntry()) != null ) {
        in = new ByteCountingPushBackInputStream(new BufferedInputStream( r.gzipEntry.getInputStream(), 8192), 32);
      } else {
        throw new ResourceNotAvailableException("GZip entry is invalid");
      }
    }
    else {
      in = r.pbin;
    }
    Payload payload = null;
    HttpHeader httpHeader = null;
    if (ArcReaderFactory.isArcRecord(in)) {
      r.arcReader = ArcReaderFactory.getReaderUncompressed();
      r.arcReader.setUriProfile(UriProfile.RFC3986_ABS_16BIT_LAX);
      r.arcReader.setBlockDigestEnabled(false);
      r.arcReader.setPayloadDigestEnabled(false);
      r.arcRecord = r.arcReader.getNextRecordFrom(in, offset);
      if (r.arcRecord != null) {
        payload = r.arcRecord.getPayload();
        if (payload != null) {
          httpHeader = r.arcRecord.getHttpHeader();
        }
        if (httpHeader != null) {
          r.payloadStream = httpHeader.getPayloadInputStream();
          r.length = httpHeader.payloadLength;
          r.status = httpHeader.statusCode;
        } else if (payload != null) {
          r.payloadStream = payload.getInputStreamComplete();
          r.length = payload.getTotalLength();
          r.status = 200;
        } else {
          r.payloadStream = new ByteArrayInputStream(new byte[0]);
          r.length = 0;
          r.status = 200;
        }
      }
    }
    else if ( WarcReaderFactory.isWarcRecord(in) ) {
      r.warcReader = WarcReaderFactory.getReaderUncompressed();
      r.warcReader.setWarcTargetUriProfile(UriProfile.RFC3986_ABS_16BIT_LAX);
      r.warcReader.setBlockDigestEnabled(false);
      r.warcReader.setPayloadDigestEnabled(false);
      r.warcRecord = r.warcReader.getNextRecordFrom(in, offset);
      if (r.warcRecord != null) {
        payload = r.warcRecord.getPayload();
        if (payload != null) {
          httpHeader = r.warcRecord.getHttpHeader();
        }
        if (httpHeader != null) {
          r.payloadStream = httpHeader.getPayloadInputStream();
          r.length = httpHeader.payloadLength;
          r.status = httpHeader.statusCode;
        } else if (payload != null) {
          r.payloadStream = payload.getInputStreamComplete();
          r.length = payload.getTotalLength();
          r.status = 200;
        } else {
          r.payloadStream = new ByteArrayInputStream(new byte[0]);
          r.length = 0;
          r.status = 200;
        }
      }
    }
    else {
      throw new ResourceNotAvailableException("Unknown archive record");
    }
    if (r.payloadStream == null) {
      r.close();
      r = null;
    } else {
View Full Code Here

  {   
    try {
      PerfStats.timeStart(PerfStat.WArcResource);
     
      if ((skipFiles != null) && skipFiles.contains(closest.getFile())) {
        throw new ResourceNotAvailableException("Revisit: Skipping already failed " + closest.getFile());
      }
     
      return getCollection().getResourceStore().retrieveResource(closest);
    } finally {
      PerfStats.timeEnd(PerfStat.WArcResource);
View Full Code Here

   
    SearchResults results = queryIndex(wbRequest);
    p.queried();
   
    if(!(results instanceof CaptureSearchResults)) {
      throw new ResourceNotAvailableException("Bad results...");
    }
    CaptureSearchResults captureResults =
      (CaptureSearchResults) results;

   
    CaptureSearchResult closest = null;
   
    closest =
      getReplay().getClosest(wbRequest, captureResults);
   
    //CaptureSearchResult originalClosest = closest;
   
    int counter = 0;
   
    //TODO: parameterize
    //int maxTimeouts = 2;
    //int maxMissingRevisits = 2;
   
    Set<String> skipFiles = null;
    //boolean isRevisit = false;
   
    while (true) {   
      // Support for redirect from the CDX redirectUrl field
      // This was the intended use of the redirect field, but has not actually be tested
      // To enable this functionality, uncomment the lines below
      // This is an optimization that allows for redirects to be handled without loading the original content
      //
      //String redir = closest.getRedirectUrl();
      //if ((redir != null) && !redir.equals("-")) {
      //  String fullRedirect = getUriConverter().makeReplayURI(closest.getCaptureTimestamp(), redir);
      //  throw new BetterRequestException(fullRedirect, Integer.valueOf(closest.getHttpCode()));
      //}
     
      Resource httpHeadersResource = null;
      Resource payloadResource = null;
      boolean isRevisit = false;
     
      try {
        counter++;
       
        if (closest == null) {
          throw new ResourceNotAvailableException("Self-Redirect: No Closest Match Found", 404);
        }
       
        closest.setClosest(true);
        checkAnchorWindow(wbRequest,closest);
       
       
        // Attempt to resolve any not-found embedded content with next-best
        // For "best last" capture, skip not-founds and redirects, hoping to find the best 200 response.
        if ((wbRequest.isAnyEmbeddedContext() && closest.isHttpError()) ||
          (wbRequest.isBestLatestReplayRequest() && !closest.isHttpSuccess())) {
          CaptureSearchResult nextClosest = closest;
         
          while ((nextClosest = findNextClosest(nextClosest, captureResults, requestMS)) != null) {
            // If redirect, save but keep looking -- if no better match, will use the redirect
            if (nextClosest.isHttpRedirect()) {
              closest = nextClosest;
            // If success, pick that one!
            } else if (nextClosest.isHttpSuccess()) {
              closest = nextClosest;
              break;
            }
          }
        }
       
        // Redirect to url for the actual closest capture, if not a retry
        if (counter == 1) {
          handleReplayRedirect(wbRequest, httpResponse, captureResults, closest);
        }     
       
        // If revisit, may load two resources separately
        if (closest.isDuplicateDigest()) {
          isRevisit = true;
         
          // If the payload record is known and it failed before with this payload, don't try
          // loading the header resource even.. outcome will likely be same
          if ((closest.getDuplicatePayloadFile() != null) &&
            (skipFiles != null) && skipFiles.contains(closest.getDuplicatePayloadFile())) {
            counter--; //don't really count this as we're not even checking the file anymore
            throw new ResourceNotAvailableException("Revisit: Skipping already failed " + closest.getDuplicatePayloadFile());
         
          } else if ((closest.getDuplicatePayloadFile() == null) && wbRequest.isTimestampSearchKey()) {
            // If a missing revisit and loaded optimized, try loading the entire timeline again
           
            wbRequest.setTimestampSearchKey(false);
           
            results = queryIndex(wbRequest);
           
            captureResults = (CaptureSearchResults)results;
           
            closest = getReplay().getClosest(wbRequest, captureResults);
            //originalClosest = closest;
            //maxTimeouts *= 2;
            //maxMissingRevisits *= 2;
           
            continue;
          }
         
          // If old-style arc revisit (no mimetype, filename is '-'), then don't load
          // headersResource = payloadResource
          if (EMPTY_VALUE.equals(closest.getFile())) {
            closest.setFile(closest.getDuplicatePayloadFile());
            closest.setOffset(closest.getDuplicatePayloadOffset());
           
            // See that this is successful
            httpHeadersResource = getResource(closest, skipFiles);
           
            // Hmm, since this is a revisit it should not redirect -- was: if both headers and payload are from a different timestamp, redirect to that timestamp
//            if (!closest.getCaptureTimestamp().equals(closest.getDuplicateDigestStoredTimestamp())) {
//              throwRedirect(wbRequest, httpResponse, captureResults, closest.getDuplicateDigestStoredTimestamp(), closest.getOriginalUrl(), closest.getHttpCode());
//            }
           
            payloadResource = httpHeadersResource;
           
          } else {
            httpHeadersResource = getResource(closest, skipFiles);
           
            CaptureSearchResult payloadLocation = retrievePayloadForIdenticalContentRevisit(wbRequest, httpHeadersResource, closest);
           
            if (payloadLocation == null) {
              throw new ResourceNotAvailableException("Revisit: Missing original for revisit record " + closest.toString(), 404);
            }
           
            payloadResource = getResource(payloadLocation, skipFiles);
           
            // If zero length old-style revisit with no headers, then must use payloadResource as headersResource
View Full Code Here

      wbr.setRequestUrl(payloadUri);

      SearchResults results = queryIndex(wbr);
     
      if(!(results instanceof CaptureSearchResults)) {
        throw new ResourceNotAvailableException("Bad results looking up " + payloadTimestamp + " " + payloadUri);
      }
      CaptureSearchResults payloadCaptureResults = (CaptureSearchResults) results;
      payloadLocation = getReplay().getClosest(wbr, payloadCaptureResults);
    }
   
View Full Code Here

TOP

Related Classes of org.archive.wayback.exception.ResourceNotAvailableException

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.