public void testIt() throws ProtocolException, ParseException, IOException {
String urlString;
Parse parse;
Configuration conf = NutchConfiguration.create();
MimeUtil mimeutil = new MimeUtil(conf);
System.out.println("Expected : " + expectedText);
for (int i = 0; i < sampleFiles.length; i++) {
urlString = "file:" + sampleDir + fileSeparator + sampleFiles[i];
if (sampleFiles[i].startsWith("ootest") == false)
continue;
File file = new File(sampleDir + fileSeparator + sampleFiles[i]);
byte[] bytes = new byte[(int) file.length()];
DataInputStream in = new DataInputStream(new FileInputStream(file));
in.readFully(bytes);
in.close();
WebPage page = new WebPage();
page.setBaseUrl(new Utf8(urlString));
page.setContent(ByteBuffer.wrap(bytes));
String mtype = mimeutil.getMimeType(file);
page.setContentType(new Utf8(mtype));
parse = new ParseUtil(conf).parse(urlString, page);
String text = parse.getText().replaceAll("[ \t\r\n]+", " ").trim();