Package org.archive.io.warc

Examples of org.archive.io.warc.WARCReader


      ARCReader reader = ARCReaderFactory.get(file,offset);
      r = ARCArchiveRecordToResource(reader.get(),reader);

    } else if (isWarc(name)) {

      WARCReader reader = WARCReaderFactory.get(file,offset);
      r = WARCArchiveRecordToResource(reader.get(),reader);

    } else {
      throw new ResourceNotAvailableException("Unknown extension");
    }
View Full Code Here


      ARCReader reader = ARCReaderFactory.get(url, offset);
      r = ARCArchiveRecordToResource(reader.get(),reader);

    } else if (isWarc(name)) {

      WARCReader reader = WARCReaderFactory.get(url, offset);
      r = WARCArchiveRecordToResource(reader.get(),reader);

    } else {
      throw new ResourceNotAvailableException("Unknown extension");
    }
    return r;
View Full Code Here

    WARCRecordToSearchResultAdapter adapter2 =
      new WARCRecordToSearchResultAdapter();
    adapter2.setCanonicalizer(canonicalizer);
   
    WARCReader reader = WARCReaderFactory.get(warc);
   
    ArchiveReaderCloseableIterator itr1 =
      new ArchiveReaderCloseableIterator(reader,reader.iterator());

    CloseableIterator<WARCRecord> itr2 =
      new AdaptedIterator<ArchiveRecord, WARCRecord>(itr1, adapter1);

    return new AdaptedIterator<WARCRecord, SearchResult>(itr2, adapter2);
View Full Code Here

   public void transform(final File warc, final File dir, final String prefix,
           final String suffix, final boolean force)
   throws IOException, java.text.ParseException {
       FileUtils.assertReadable(warc);
       FileUtils.assertReadable(dir);
       WARCReader reader = WARCReaderFactory.get(warc);
       List<String> metadata =  new ArrayList<String>();
       metadata.add("Made from " + reader.getReaderIdentifier() + " by " +
           this.getClass().getName() + "/" + getRevision());
       ARCWriter writer =
           new ARCWriter(
                   new AtomicInteger(),
                   new WriterPoolSettingsData(
                           prefix,
                           suffix,
                           -12,
                           reader.isCompressed(),
                           Arrays.asList(new File [] {dir}),
                           metadata));
       transform(reader, writer);
   }
View Full Code Here

            // check the warc records
            List<File> warcDirs = warcWriter.calcOutputDirs();
            assertEquals(1, warcDirs.size());
            String[] warcs = warcDirs.get(0).list();
            assertEquals(1, warcs.length);
            WARCReader warcReader = WARCReaderFactory.get(new File(warcDirs.get(0), warcs[0]));
            Iterator<ArchiveRecord> recordIterator = warcReader.iterator();
           
            ArchiveRecord record = recordIterator.next();
            assertEquals(WARCRecordType.warcinfo.toString(), record.getHeader().getHeaderValue(HEADER_KEY_TYPE));
           
            assertTrue(recordIterator.hasNext());
View Full Code Here

    if(reader instanceof ARCReader) {
      ARCReader areader = (ARCReader) reader;
      r = ARCArchiveRecordToResource(areader.get(),areader);
   
    } else if(reader instanceof WARCReader) {
      WARCReader wreader = (WARCReader) reader;
      r = WARCArchiveRecordToResource(wreader.get(),wreader);
     
    } else {
      throw new ResourceNotAvailableException("Unknown ArchiveReader");
    }
    long elapsed = System.currentTimeMillis() - start;
View Full Code Here

TOP

Related Classes of org.archive.io.warc.WARCReader

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.