Examples of WARCReader


Examples of org.archive.io.warc.WARCReader

      ARCReader reader = ARCReaderFactory.get(file,offset);
      r = ARCArchiveRecordToResource(reader.get(),reader);

    } else if (isWarc(name)) {

      WARCReader reader = WARCReaderFactory.get(file,offset);
      r = WARCArchiveRecordToResource(reader.get(),reader);

    } else {
      throw new ResourceNotAvailableException("Unknown extension");
    }
View Full Code Here

Examples of org.archive.io.warc.WARCReader

      ARCReader reader = ARCReaderFactory.get(url, offset);
      r = ARCArchiveRecordToResource(reader.get(),reader);

    } else if (isWarc(name)) {

      WARCReader reader = WARCReaderFactory.get(url, offset);
      r = WARCArchiveRecordToResource(reader.get(),reader);

    } else {
      throw new ResourceNotAvailableException("Unknown extension");
    }
    return r;
View Full Code Here

Examples of org.archive.io.warc.WARCReader

    WARCRecordToSearchResultAdapter adapter2 =
      new WARCRecordToSearchResultAdapter();
    adapter2.setCanonicalizer(canonicalizer);
   
    WARCReader reader = WARCReaderFactory.get(warc);
   
    ArchiveReaderCloseableIterator itr1 =
      new ArchiveReaderCloseableIterator(reader,reader.iterator());

    CloseableIterator<WARCRecord> itr2 =
      new AdaptedIterator<ArchiveRecord, WARCRecord>(itr1, adapter1);

    return new AdaptedIterator<WARCRecord, SearchResult>(itr2, adapter2);
View Full Code Here

Examples of org.archive.io.warc.WARCReader

   public void transform(final File warc, final File dir, final String prefix,
           final String suffix, final boolean force)
   throws IOException, java.text.ParseException {
       FileUtils.assertReadable(warc);
       FileUtils.assertReadable(dir);
       WARCReader reader = WARCReaderFactory.get(warc);
       List<String> metadata =  new ArrayList<String>();
       metadata.add("Made from " + reader.getReaderIdentifier() + " by " +
           this.getClass().getName() + "/" + getRevision());
       ARCWriter writer =
           new ARCWriter(
                   new AtomicInteger(),
                   new WriterPoolSettingsData(
                           prefix,
                           suffix,
                           -12,
                           reader.isCompressed(),
                           Arrays.asList(new File [] {dir}),
                           metadata));
       transform(reader, writer);
   }
View Full Code Here

Examples of org.archive.io.warc.WARCReader

            // check the warc records
            List<File> warcDirs = warcWriter.calcOutputDirs();
            assertEquals(1, warcDirs.size());
            String[] warcs = warcDirs.get(0).list();
            assertEquals(1, warcs.length);
            WARCReader warcReader = WARCReaderFactory.get(new File(warcDirs.get(0), warcs[0]));
            Iterator<ArchiveRecord> recordIterator = warcReader.iterator();
           
            ArchiveRecord record = recordIterator.next();
            assertEquals(WARCRecordType.warcinfo.toString(), record.getHeader().getHeaderValue(HEADER_KEY_TYPE));
           
            assertTrue(recordIterator.hasNext());
View Full Code Here

Examples of org.archive.io.warc.WARCReader

    if(reader instanceof ARCReader) {
      ARCReader areader = (ARCReader) reader;
      r = ARCArchiveRecordToResource(areader.get(),areader);
   
    } else if(reader instanceof WARCReader) {
      WARCReader wreader = (WARCReader) reader;
      r = WARCArchiveRecordToResource(wreader.get(),wreader);
     
    } else {
      throw new ResourceNotAvailableException("Unknown ArchiveReader");
    }
    long elapsed = System.currentTimeMillis() - start;
View Full Code Here

Examples of org.jwat.warc.WarcReader

        int warnings = 0;

        try {
            in = this.getClass().getClassLoader().getResourceAsStream(warcFile);

            WarcReader reader = WarcReaderFactory.getReader(in);
            WarcRecord record;

            while ((record = reader.getNextRecord()) != null) {
                if (bDebugOutput) {
                    TestBaseUtils.printRecord(record);
                    TestBaseUtils.printRecordErrors(record);
                }

                record.close();

                errors = 0;
                warnings = 0;
                if (record.diagnostics.hasErrors()) {
                    errors += record.diagnostics.getErrors().size();
                }
                if (record.diagnostics.hasWarnings()) {
                    warnings += record.diagnostics.getWarnings().size();
                }

                Assert.assertEquals(expected_errors[records], errors);
                Assert.assertEquals(expected_warnings[records], warnings);

                ++records;
            }

            reader.close();
            in.close();

            if (bDebugOutput) {
                TestBaseUtils.printStatus(records, errors, warnings);
            }
View Full Code Here

Examples of org.jwat.warc.WarcReader

        int warnings = 0;

        try {
            in = this.getClass().getClassLoader().getResourceAsStream(warcFile);

            WarcReader reader = WarcReaderFactory.getReader(in);
            WarcRecord record;

            while ((record = reader.getNextRecord()) != null) {
                if (bDebugOutput) {
                    TestBaseUtils.printRecord(record);
                    TestBaseUtils.printRecordErrors(record);
                }

                record.close();

                ++records;

                if (record.diagnostics.hasErrors()) {
                    errors += record.diagnostics.getErrors().size();
                }
                if (record.diagnostics.hasWarnings()) {
                    warnings += record.diagnostics.getWarnings().size();
                }
            }

            reader.close();
            in.close();

            if (bDebugOutput) {
                TestBaseUtils.printStatus(records, errors, warnings);
            }
View Full Code Here

Examples of org.jwat.warc.WarcReader

        int warnings = 0;

        try {
            in = this.getClass().getClassLoader().getResourceAsStream(warcFile);

            WarcReader reader = WarcReaderFactory.getReader(in);
            WarcRecord record;

            while ((record = reader.getNextRecord()) != null) {
                if (bDebugOutput) {
                    TestBaseUtils.printRecord(record);
                    TestBaseUtils.printRecordErrors(record);
                }

                record.close();

                errors = 0;
                warnings = 0;
                if (record.diagnostics.hasErrors()) {
                    errors += record.diagnostics.getErrors().size();
                }
                if (record.diagnostics.hasWarnings()) {
                    warnings += record.diagnostics.getWarnings().size();
                }

                Assert.assertEquals(expected_errors[records], errors);
                Assert.assertEquals(expected_warnings[records], warnings);

                ++records;
            }

            reader.close();
            in.close();

            if (bDebugOutput) {
                TestBaseUtils.printStatus(records, errors, warnings);
            }
View Full Code Here

Examples of org.jwat.warc.WarcReader

        int warnings = 0;

        try {
            in = this.getClass().getClassLoader().getResourceAsStream(warcFile);

            WarcReader reader = WarcReaderFactory.getReader(in);
            WarcRecord record;

            while ((record = reader.getNextRecord()) != null) {
                if (bDebugOutput) {
                    TestBaseUtils.printRecord(record);
                    TestBaseUtils.printRecordErrors(record);
                }

                record.close();

                if (record.diagnostics.hasErrors()) {
                    errors += record.diagnostics.getErrors().size();
                }
                if (record.diagnostics.hasWarnings()) {
                    warnings += record.diagnostics.getWarnings().size();
                }

                ++records;
            }

            reader.close();
            in.close();

            if (bDebugOutput) {
                TestBaseUtils.printStatus(records, errors, warnings);
            }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.