ArrayList<String> handledurls=new ArrayList<String>();
READ_CONTENT:
do {
Text key=new Text();
Content value=new Content();
if(!reader.next(key, value)) break READ_CONTENT;
String contentString=new String(value.getContent());
if(contentString.indexOf("Nutch fetcher test page")!=-1) {
handledurls.add(key.toString());
}
} while(true);
reader.close();
Collections.sort(urls);
Collections.sort(handledurls);
//verify that enough pages were handled
Assert.assertEquals(urls.size(), handledurls.size());
//verify that correct pages were handled
Assert.assertTrue(handledurls.containsAll(urls));
Assert.assertTrue(urls.containsAll(handledurls));
handledurls.clear();
//verify parse data
Path parseData = new Path(new Path(generatedSegment[0], ParseData.DIR_NAME),"part-00000/data");
reader = new SequenceFile.Reader(fs, parseData, conf);
READ_PARSE_DATA:
do {
Text key = new Text();
ParseData value = new ParseData();
if(!reader.next(key, value)) break READ_PARSE_DATA;
// make sure they all contain "nutch.segment.name" and "nutch.content.digest"
// keys in parse metadata
Metadata contentMeta = value.getContentMeta();
if (contentMeta.get(Nutch.SEGMENT_NAME_KEY) != null
&& contentMeta.get(Nutch.SIGNATURE_KEY) != null) {
handledurls.add(key.toString());
}
} while(true);