try {
PreparatorSettings[] prepConf = config.getPreparatorSettingsList();
mPreparatorArr = PreparatorFactory.getInstance().createPreparatorArr(prepConf);
}
catch (RegainException exc) {
throw new RegainException("Creating the document preparators failed", exc);
}
// Create a profiler for each preparator
mPreparatorProfilerArr = new Profiler[mPreparatorArr.length];
for (int i = 0; i < mPreparatorProfilerArr.length; i++) {
String name = mPreparatorArr[i].getClass().getName();
mPreparatorProfilerArr[i] = new Profiler("Preparator " + name, "docs");
}
// Create the CrawlerAccessController
String accessClass = config.getCrawlerAccessControllerClass();
if (accessClass != null) {
String accessJar = config.getCrawlerAccessControllerJar();
mCrawlerAccessController = (CrawlerAccessController)
RegainToolkit.createClassInstance(accessClass, CrawlerAccessController.class,
accessJar);
Properties accessControllerConfig = config.getCrawlerAccessControllerConfig();
if (accessControllerConfig == null) {
accessControllerConfig = new Properties();
}
mCrawlerAccessController.init(accessControllerConfig);
mLog.info("Using crawler access controller: " + accessClass);
}
// Create the mUseLinkTextAsTitleReArr
String[] useLinkTextAsTitleRegexArr = config.getUseLinkTextAsTitleRegexList();
if (useLinkTextAsTitleRegexArr == null) {
mUseLinkTextAsTitleReArr = new RE[0];
} else {
mUseLinkTextAsTitleReArr = new RE[useLinkTextAsTitleRegexArr.length];
for (int i = 0; i < useLinkTextAsTitleRegexArr.length; i++) {
try {
mUseLinkTextAsTitleReArr[i] = new RE(useLinkTextAsTitleRegexArr[i]);
}
catch (RESyntaxException exc) {
throw new RegainException("Regular expression of "
+ "use-link-text-as-title-pattern #" + i + " has wrong syntax '"
+ useLinkTextAsTitleRegexArr[i] + "'", exc);
}
}
}