Package org.archive.wayback.core

Examples of org.archive.wayback.core.Resource


    throw rnae;
  }
 
  public Resource getResource(String path, CaptureSearchResult result) throws IOException, ResourceNotAvailableException
  {   
    Resource r = null;
   
    long offset = result.getOffset();
    int length = (int)result.getCompressedLength();
   
    if (LOGGER.isLoggable(Level.INFO)) {
      LOGGER.info("Loading " + path + " - " + offset + ":" + length);
    }
   
    boolean success = false;
   
    SeekableLineReader slr = blockLoader.attemptLoadBlock(path, offset, length, false, false);
   
    if (slr == null) {
      return null;
    }
   
    try {
      InputStream is = slr.getInputStream();
     
      r = loadResource(path, is);
     
      r.parseHeaders();
     
      success = true;
     
    } finally {
      if (!success) {
View Full Code Here


  LiveWebTimeoutException, MalformedURLException, IOException {
 
    List<String> missing = aggregation.getMissingRobotUrls(host);
    for(String robotUrl : missing) {
      long start = System.currentTimeMillis();
      Resource resource;
      try {
        resource = webCache.getCachedResource(new URL(robotUrl),
            0,true);
        if(resource.getStatusCode() != 200) {
          LOGGER.info("ROBOT: Non200("+robotUrl+")");
          // consider it an allow:
          aggregation.addDirectives(robotUrl, ALLOW_ROBOT_DIRECTIVE);
        } else {
          InputStreamReader isr = new InputStreamReader(resource, cs);
View Full Code Here

  protected static FileSystem hdfsSys = null;

  public static Resource getResource( URI uri, long offset)
    throws IOException, ResourceNotAvailableException, URISyntaxException {
   
    Resource r = null;
   
    // FIXME: Put this into static initialization?  or require
    //        explicit init during startup?  Or just create it each
    //        time?
    //
View Full Code Here

  }

  public static Resource getResource(File file, long offset)
      throws IOException, ResourceNotAvailableException {

    Resource r = null;
    String name = file.getName();
    if (name.endsWith(ArcWarcFilenameFilter.OPEN_SUFFIX)) {
      name = name.substring(0, name.length()
          - ArcWarcFilenameFilter.OPEN_SUFFIX.length());
    }
View Full Code Here

    return r;
  }
  public static Resource getResource(URL url, long offset)
  throws IOException, ResourceNotAvailableException {
   
    Resource r = null;
    long start = System.currentTimeMillis();
    TimeoutArchiveReaderFactory tarf = defaultTimeoutReader;
    ArchiveReader reader = tarf.getArchiveReader(url,offset);
    if(reader instanceof ARCReader) {
      ARCReader areader = (ARCReader) reader;
View Full Code Here

      CaptureSearchResult result, Resource httpHeadersResource,
      Resource payloadResource, ResultURIConverter uriConverter,
      CaptureSearchResults results) throws ServletException, IOException,
      WaybackException {

    Resource decodedResource = TextReplayRenderer.decodeResource(httpHeadersResource, payloadResource);

    // The URL of the page, for resolving in-page relative URLs:
    URL url = null;
    try {
      url = new URL(result.getOriginalUrl());
    } catch (MalformedURLException e1) {
      // TODO: this shouldn't happen...
      e1.printStackTrace();
      throw new IOException(e1.getMessage());
    }
    // determine the character set used to encode the document bytes:
    String charSet = charsetDetector.getCharset(httpHeadersResource, decodedResource, wbRequest);

    ContextResultURIConverterFactory fact = createConverterFactory(uriConverter, httpRequest, wbRequest);
   
    // set up the context:
    ReplayParseContext context =
        new ReplayParseContext(fact,url,result.getCaptureTimestamp());
   
    context.setRewriteHttpsOnly(rewriteHttpsOnly);

    if(!wbRequest.isFrameWrapperContext()) {
      // in case this is an HTML page with FRAMEs, peek ahead an look:
      // TODO: make ThreadLocal:
      byte buffer[] = new byte[FRAMESET_SCAN_BUFFER_SIZE];

      decodedResource.mark(FRAMESET_SCAN_BUFFER_SIZE);
      int amtRead = decodedResource.read(buffer);
      decodedResource.reset();

      if(amtRead > 0) {
        StringBuilder foo = new StringBuilder(new String(buffer,charSet));
        int frameIdx = TagMagix.getEndOfFirstTag(foo, "FRAMESET");
        if(frameIdx != -1) {
View Full Code Here

 
  protected RobotsResult loadExternal(URL urlURL, long maxCacheMS, boolean bUseOlder)
  {
    //RobotsContext context = new RobotsContext(url, current, true, true);
   
    Resource origResource = null;
    int status = 0;
    String contents = null;
   
    try {
      PerfStats.timeStart(PerfStat.RobotsLive);
     
      origResource = liveweb.getCachedResource(urlURL, maxCacheMS, bUseOlder);
     
      status = origResource.getStatusCode();
     
      if (status == STATUS_OK) { 
        if (origResource instanceof RobotsTxtResource) {
          contents = ((RobotsTxtResource)origResource).getContents();
        } else {
          contents = IOUtils.toString(ByteStreams.limit(origResource, MAX_ROBOTS_SIZE), "UTF-8");
        }
      }
    } catch (Exception e) {
      status = STATUS_ERROR;
    } finally {
      if (origResource != null) {
        try {
          origResource.close();
        } catch (IOException e) {
         
        }
      }
      PerfStats.timeEnd(PerfStat.RobotsLive);
View Full Code Here

        res.parseHeaders();
       
        assertEquals("statusCode", 200, res.getStatusCode());
        assertEquals("content-type", ctype, res.getHeader("Content-Type"));
       
        Resource zres = TextReplayRenderer.decodeResource(res);
        assertTrue("wrapped with GzipDecodingResource", (zres instanceof GzipDecodingResource));
       
        byte[] buf = new byte[payload.getBytes().length + 1];
        int n = zres.read(buf);
        assertEquals("content length", buf.length - 1, n);
       
        res.close();
    }
View Full Code Here

        res.parseHeaders();
       
        assertEquals("statusCode", 200, res.getStatusCode());
        assertEquals("content-type", ctype, res.getHeader("Content-Type"));
       
        Resource zres = TextReplayRenderer.decodeResource(res);
        assertTrue("wrapped with GzipDecodingResource", (zres instanceof GzipDecodingResource));
       
        byte[] buf = new byte[payload.getBytes().length + 1];
        int n = zres.read(buf);
        assertEquals("content length", buf.length - 1, n);
       
        res.close();
    }
View Full Code Here

            "200 OK", ct, payload.getBytes("UTF-8"), true);
        //System.out.println(new String(recordBytes, "UTF-8"));
        WARCRecordInfo recinfo = new TestWARCRecordInfo(recordBytes);
        TestWARCReader ar = new TestWARCReader(recinfo);
        WARCRecord rec = ar.get(0);
        Resource payloadResource = new WarcResource(rec, ar);
        payloadResource.parseHeaders();
        Resource headersResource = payloadResource;

        TestServletOutputStream servletOutput = new TestServletOutputStream();
        // expectations
        response.setStatus(200);
        EasyMock.expect(response.getOutputStream()).andReturn(servletOutput);
View Full Code Here

TOP

Related Classes of org.archive.wayback.core.Resource

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.