* Test {@link DataStatistics}.
*/
@Test
public void testDataStatistics() throws Exception {
// test data-statistics getters with compression enabled
DataStatistics stats = new DataStatistics(10, 2, true);
assertEquals("Data size mismatch", 10, stats.getDataSize());
assertEquals("Num files mismatch", 2, stats.getNumFiles());
assertTrue("Compression configuration mismatch", stats.isDataCompressed());
// test data-statistics getters with compression disabled
stats = new DataStatistics(100, 5, false);
assertEquals("Data size mismatch", 100, stats.getDataSize());
assertEquals("Num files mismatch", 5, stats.getNumFiles());
assertFalse("Compression configuration mismatch", stats.isDataCompressed());
// test publish data stats
Configuration conf = new Configuration();
Path rootTempDir = new Path(System.getProperty("test.build.data", "/tmp"));
Path testDir = new Path(rootTempDir, "testDataStatistics");
FileSystem fs = testDir.getFileSystem(conf);
fs.delete(testDir, true);
Path testInputDir = new Path(testDir, "test");
fs.mkdirs(testInputDir);
// test empty folder (compression = true)
CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
Boolean failed = null;
try {
GenerateData.publishDataStatistics(testInputDir, 1024L, conf);
failed = false;
} catch (RuntimeException e) {
failed = true;
}
assertNotNull("Expected failure!", failed);
assertTrue("Compression data publishing error", failed);
// test with empty folder (compression = off)
CompressionEmulationUtil.setCompressionEmulationEnabled(conf, false);
stats = GenerateData.publishDataStatistics(testInputDir, 1024L, conf);
assertEquals("Data size mismatch", 0, stats.getDataSize());
assertEquals("Num files mismatch", 0, stats.getNumFiles());
assertFalse("Compression configuration mismatch", stats.isDataCompressed());
// test with some plain input data (compression = off)
CompressionEmulationUtil.setCompressionEmulationEnabled(conf, false);
Path inputDataFile = new Path(testInputDir, "test");
long size =
UtilsForTests.createTmpFileDFS(fs, inputDataFile,
FsPermission.createImmutable((short)777), "hi hello bye").size();
stats = GenerateData.publishDataStatistics(testInputDir, -1, conf);
assertEquals("Data size mismatch", size, stats.getDataSize());
assertEquals("Num files mismatch", 1, stats.getNumFiles());
assertFalse("Compression configuration mismatch", stats.isDataCompressed());
// test with some plain input data (compression = on)
CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
failed = null;
try {
GenerateData.publishDataStatistics(testInputDir, 1234L, conf);
failed = false;
} catch (RuntimeException e) {
failed = true;
}
assertNotNull("Expected failure!", failed);
assertTrue("Compression data publishing error", failed);
// test with some compressed input data (compression = off)
CompressionEmulationUtil.setCompressionEmulationEnabled(conf, false);
fs.delete(inputDataFile, false);
inputDataFile = new Path(testInputDir, "test.gz");
size =
UtilsForTests.createTmpFileDFS(fs, inputDataFile,
FsPermission.createImmutable((short)777), "hi hello").size();
stats = GenerateData.publishDataStatistics(testInputDir, 1234L, conf);
assertEquals("Data size mismatch", size, stats.getDataSize());
assertEquals("Num files mismatch", 1, stats.getNumFiles());
assertFalse("Compression configuration mismatch", stats.isDataCompressed());
// test with some compressed input data (compression = on)
CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
stats = GenerateData.publishDataStatistics(testInputDir, 1234L, conf);
assertEquals("Data size mismatch", size, stats.getDataSize());
assertEquals("Num files mismatch", 1, stats.getNumFiles());
assertTrue("Compression configuration mismatch", stats.isDataCompressed());
}