finally {
os.close();
}
FileSplit split = new FileSplit(path, 0, fs.getFileStatus(path).getLen(), new String[0]);
ARCFileRecordReader reader = new ARCFileRecordReader();
reader.initialize(conf,split);
int index = 0;
// iterate and validate stuff ...
Text key = reader.createKey();
BytesWritable value = reader.createValue();
while (reader.next(key,value)) {
TestRecord testRecord = records.get(index++);
// get test key bytes as utf-8 bytes ...
byte[] testKeyBytes = testRecord.url.getBytes(Charset.forName("UTF-8"));
// compare against raw key bytes to validate key is the same (Text's utf-8 mapping code replaces invalid characters
// with ?, which causes our test case (which does use invalid characters to from the key, to break.
Assert.assertTrue(ArcFileReaderTests.compareTo(testKeyBytes,0,testKeyBytes.length,key.getBytes(),0,key.getLength()) == 0);
// retured bytes represent the header(encoded in utf-8), terminated by a \r\n\r\n. The content follows this terminator
// we search for this specific byte pattern to locate start of content, then compare it against source ...
int indexofHeaderTerminator = ByteArrayUtils.indexOf(value.getBytes(), 0, value.getLength(), "\r\n\r\n".getBytes());
indexofHeaderTerminator += 4;
Assert.assertTrue(ArcFileReaderTests.compareTo(testRecord.data,0,testRecord.data.length,value.getBytes(),indexofHeaderTerminator,testRecord.data.length) == 0);
}
reader.close();
Assert.assertEquals(index,ArcFileReaderTests.BASIC_TEST_RECORD_COUNT);
fs.delete(path, false);
}