MersenneTwisterRNG rng = new MersenneTwisterRNG();
byte[] seed = rng.getSeed();
double threshold = rng.nextDouble();
JobConf conf = new JobConf();
RndLineRecordReader rndReader;
Set<Long> dataset = new HashSet<Long>();
LongWritable key = new LongWritable();
Text value = new Text();
DatasetSplit split = new DatasetSplit(seed, threshold);
// read the training set
split.storeJobParameters(conf);
rndReader = new RndLineRecordReader(new MockReader(datasetSize), conf);
while (rndReader.next(key, value)) {
assertTrue("duplicate line index", dataset.add(key.get()));
}
// read the testing set
split.setTraining(false);
split.storeJobParameters(conf);
rndReader = new RndLineRecordReader(new MockReader(datasetSize), conf);
while (rndReader.next(key, value)) {
assertTrue("duplicate line index", dataset.add(key.get()));
}
assertEquals("missing datas", datasetSize, dataset.size());
}