Configuration conf = NutchConfiguration.create();
conf.setInt("indexer.max.title.length", 10);
BasicIndexingFilter filter = new BasicIndexingFilter();
filter.setConf(conf);
assertNotNull(filter);
NutchDocument doc = new NutchDocument();
WebPage page = new WebPage();
page.putToInlinks(new Utf8("http://exceedmaximumtitleurl.org/"), new Utf8("exceeding title site"));
page.setTitle(new Utf8("This title exceeds maximum characters"));
try {
filter.filter(doc, "http://www.apache.org/", page);
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
assertNotNull(doc);
assertEquals("assert title field only has 10 characters", 10, doc.getFieldValue("title").length());
}