Examples of org.archive.wayback.core.Resource

org.archive.wayback.core.Resource
Abstraction on top of a document stored in a WaybackCollection. Currently implemented subclasses include ArcResource and WarcResource. This implementation needs some pretty drastic refactoring.. May have to wait for 2.0. This should be a byte-oriented record, and allow wrapping the interior byte-stream in on the more full featured HTTP libraries (jetty/apache-http-client/w3c-http-reference). For now, it is a system-wide assumption that all resources are HTTP based. @author Brad Tofel @version $Date: 2010-09-28 23:28:38 +0100 (Ter, 28 Set 2010) $, $Revision: 3262 $

    throw rnae;
  }
  
  public Resource getResource(String path, CaptureSearchResult result) throws IOException, ResourceNotAvailableException
  {    
    Resource r = null;
    
    long offset = result.getOffset();
    int length = (int)result.getCompressedLength();
    
    if (LOGGER.isLoggable(Level.INFO)) {
      LOGGER.info("Loading " + path + " - " + offset + ":" + length);
    }
    
    boolean success = false;
    
    SeekableLineReader slr = blockLoader.attemptLoadBlock(path, offset, length, false, false);
    
    if (slr == null) {
      return null;
    }
    
    try {
      InputStream is = slr.getInputStream();
      
      r = loadResource(path, is);
      
      r.parseHeaders();
      
      success = true;
      
    } finally {
      if (!success) {

View Full Code Here

  LiveWebTimeoutException, MalformedURLException, IOException {
  
    List<String> missing = aggregation.getMissingRobotUrls(host);
    for(String robotUrl : missing) {
      long start = System.currentTimeMillis();
      Resource resource;
      try {
        resource = webCache.getCachedResource(new URL(robotUrl),
            0,true);
        if(resource.getStatusCode() != 200) {
          LOGGER.info("ROBOT: Non200("+robotUrl+")");
          // consider it an allow:
          aggregation.addDirectives(robotUrl, ALLOW_ROBOT_DIRECTIVE);
        } else {
          InputStreamReader isr = new InputStreamReader(resource, cs);

View Full Code Here

  protected static FileSystem hdfsSys = null;


  public static Resource getResource( URI uri, long offset)
    throws IOException, ResourceNotAvailableException, URISyntaxException {
    
    Resource r = null;
    
    // FIXME: Put this into static initialization?  or require
    //        explicit init during startup?  Or just create it each
    //        time?
    //

View Full Code Here

  }


  public static Resource getResource(File file, long offset)
      throws IOException, ResourceNotAvailableException {


    Resource r = null;
    String name = file.getName();
    if (name.endsWith(ArcWarcFilenameFilter.OPEN_SUFFIX)) {
      name = name.substring(0, name.length()
          - ArcWarcFilenameFilter.OPEN_SUFFIX.length());
    }

View Full Code Here

    return r;
  }
  public static Resource getResource(URL url, long offset)
  throws IOException, ResourceNotAvailableException {
    
    Resource r = null;
    long start = System.currentTimeMillis();
    TimeoutArchiveReaderFactory tarf = defaultTimeoutReader;
    ArchiveReader reader = tarf.getArchiveReader(url,offset);
    if(reader instanceof ARCReader) {
      ARCReader areader = (ARCReader) reader;

View Full Code Here

      CaptureSearchResult result, Resource httpHeadersResource,
      Resource payloadResource, ResultURIConverter uriConverter,
      CaptureSearchResults results) throws ServletException, IOException,
      WaybackException {


    Resource decodedResource = TextReplayRenderer.decodeResource(httpHeadersResource, payloadResource);


    // The URL of the page, for resolving in-page relative URLs: 
    URL url = null;
    try {
      url = new URL(result.getOriginalUrl());
    } catch (MalformedURLException e1) {
      // TODO: this shouldn't happen...
      e1.printStackTrace();
      throw new IOException(e1.getMessage());
    }
    // determine the character set used to encode the document bytes:
    String charSet = charsetDetector.getCharset(httpHeadersResource, decodedResource, wbRequest);


    ContextResultURIConverterFactory fact = createConverterFactory(uriConverter, httpRequest, wbRequest);
    
    // set up the context:
    ReplayParseContext context = 
        new ReplayParseContext(fact,url,result.getCaptureTimestamp());
    
    context.setRewriteHttpsOnly(rewriteHttpsOnly);


    if(!wbRequest.isFrameWrapperContext()) {
      // in case this is an HTML page with FRAMEs, peek ahead an look:
      // TODO: make ThreadLocal:
      byte buffer[] = new byte[FRAMESET_SCAN_BUFFER_SIZE];


      decodedResource.mark(FRAMESET_SCAN_BUFFER_SIZE);
      int amtRead = decodedResource.read(buffer);
      decodedResource.reset();


      if(amtRead > 0) {
        StringBuilder foo = new StringBuilder(new String(buffer,charSet));
        int frameIdx = TagMagix.getEndOfFirstTag(foo, "FRAMESET");
        if(frameIdx != -1) {

View Full Code Here

  
  protected RobotsResult loadExternal(URL urlURL, long maxCacheMS, boolean bUseOlder)
  {
    //RobotsContext context = new RobotsContext(url, current, true, true);
    
    Resource origResource = null;
    int status = 0;
    String contents = null;
    
    try {
      PerfStats.timeStart(PerfStat.RobotsLive);
      
      origResource = liveweb.getCachedResource(urlURL, maxCacheMS, bUseOlder);
      
      status = origResource.getStatusCode();
      
      if (status == STATUS_OK) {  
        if (origResource instanceof RobotsTxtResource) {
          contents = ((RobotsTxtResource)origResource).getContents();
        } else {
          contents = IOUtils.toString(ByteStreams.limit(origResource, MAX_ROBOTS_SIZE), "UTF-8");
        }
      }
    } catch (Exception e) {
      status = STATUS_ERROR;
    } finally {
      if (origResource != null) {
        try {
          origResource.close();
        } catch (IOException e) {
          
        }
      }
      PerfStats.timeEnd(PerfStat.RobotsLive);

View Full Code Here

        res.parseHeaders();
        
        assertEquals("statusCode", 200, res.getStatusCode());
        assertEquals("content-type", ctype, res.getHeader("Content-Type"));
        
        Resource zres = TextReplayRenderer.decodeResource(res);
        assertTrue("wrapped with GzipDecodingResource", (zres instanceof GzipDecodingResource));
        
        byte[] buf = new byte[payload.getBytes().length + 1];
        int n = zres.read(buf);
        assertEquals("content length", buf.length - 1, n);
        
        res.close();
    }

View Full Code Here

        res.parseHeaders();
        
        assertEquals("statusCode", 200, res.getStatusCode());
        assertEquals("content-type", ctype, res.getHeader("Content-Type"));
        
        Resource zres = TextReplayRenderer.decodeResource(res);
        assertTrue("wrapped with GzipDecodingResource", (zres instanceof GzipDecodingResource));
        
        byte[] buf = new byte[payload.getBytes().length + 1];
        int n = zres.read(buf);
        assertEquals("content length", buf.length - 1, n);
        
        res.close();
    }

View Full Code Here

            "200 OK", ct, payload.getBytes("UTF-8"), true);
        //System.out.println(new String(recordBytes, "UTF-8"));
        WARCRecordInfo recinfo = new TestWARCRecordInfo(recordBytes);
        TestWARCReader ar = new TestWARCReader(recinfo);
        WARCRecord rec = ar.get(0);
        Resource payloadResource = new WarcResource(rec, ar);
        payloadResource.parseHeaders();
        Resource headersResource = payloadResource;


        TestServletOutputStream servletOutput = new TestServletOutputStream();
        // expectations
        response.setStatus(200);
        EasyMock.expect(response.getOutputStream()).andReturn(servletOutput);

View Full Code Here

0 1 2 3 4

TOP

Related Classes of org.archive.wayback.core.Resource

org.apache.commons.httpclient.ChunkedInputStream

org.archive.wayback.accesscontrol.robotstxt.HRobotExclusionFilter

org.archive.wayback.accesscontrol.robotstxt.redis.SimpleRedisRobotsCache

org.archive.wayback.accesscontrol.robotstxt.RobotExclusionFilter

org.archive.wayback.archivalurl.ArchivalUrlCSSReplayRendererTest

org.archive.wayback.archivalurl.ArchivalURLJSStringTransformerReplayRendererTest

org.archive.wayback.archivalurl.ArchivalUrlSAXRewriteReplayRenderer

org.archive.wayback.archivalurl.ArchivalUrlSAXRewriteReplayRendererTest

org.archive.wayback.liveweb.ARCUnwrappingProxy

org.archive.wayback.liveweb.LiveWebCache

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.