*/
@Test
public void testIt() throws ProtocolException, ParseException {
String urlString;
Protocol protocol;
Content content;
Parse parse;
Configuration conf = NutchConfiguration.create();
for (int i = 0; i < sampleFiles.length; i++) {
urlString = "file:" + sampleDir + fileSeparator + sampleFiles[i];
protocol = new ProtocolFactory(conf).getProtocol(urlString);
content = protocol.getProtocolOutput(new Text(urlString),
new CrawlDatum()).getContent();
parse = new ParseUtil(conf).parseByExtensionId("parse-tika",
content).get(content.getUrl());
// check that there are 2 outlinks:
// unlike the original parse-rss
// tika ignores the URL and description of the channel