}
protected ProductCrawler createProductCrawler() throws Exception {
/* create a ProductCrawler based on whether or not the output dir specifies a MIME_EXTRACTOR_REPO */
logger.info("Configuring ProductCrawler...");
ProductCrawler crawler = null;
if (pgeMetadata.getMetadata(MIME_EXTRACTOR_REPO) != null &&
pgeMetadata.getMetadata(MIME_EXTRACTOR_REPO).equals("")){
crawler = new AutoDetectProductCrawler();
((AutoDetectProductCrawler)crawler).
setMimeExtractorRepo(pgeMetadata.getMetadata(MIME_EXTRACTOR_REPO));
}
else{
crawler = new StdProductCrawler();
}
crawler.setClientTransferer(pgeMetadata
.getMetadata(INGEST_CLIENT_TRANSFER_SERVICE_FACTORY));
crawler.setFilemgrUrl(pgeMetadata.getMetadata(INGEST_FILE_MANAGER_URL));
String crawlerConfigFile = pgeMetadata.getMetadata(CRAWLER_CONFIG_FILE);
if (!Strings.isNullOrEmpty(crawlerConfigFile)) {
crawler.setApplicationContext(
new FileSystemXmlApplicationContext(crawlerConfigFile));
List<String> actionIds = pgeMetadata.getAllMetadata(ACTION_IDS);
if (actionIds != null) {
crawler.setActionIds(actionIds);
}
}
crawler.setRequiredMetadata(pgeMetadata.getAllMetadata(REQUIRED_METADATA));
crawler.setCrawlForDirs(Boolean.parseBoolean(pgeMetadata
.getMetadata(CRAWLER_CRAWL_FOR_DIRS)));
crawler.setNoRecur(!Boolean.parseBoolean(
pgeMetadata.getMetadata(CRAWLER_RECUR)));
logger.fine(
"Passing Workflow Metadata to CAS-Crawler as global metadata . . .");
crawler.setGlobalMetadata(pgeMetadata.asMetadata(PgeMetadata.Type.DYNAMIC));
logger.fine("Created ProductCrawler ["
+ crawler.getClass().getCanonicalName() + "]");
return crawler;
}