Package org.apache.oodt.cas.crawl

Examples of org.apache.oodt.cas.crawl.ProductCrawler


      FileSystemXmlApplicationContext appContext = new FileSystemXmlApplicationContext(
            this.beanRepo);

      try {
         ProductCrawler pc = (ProductCrawler) appContext
               .getBean(crawlerId != null ? crawlerId : getName());
         pc.setApplicationContext(appContext);
         if (pc.getDaemonPort() != -1 && pc.getDaemonWait() != -1) {
            new CrawlDaemon(pc.getDaemonWait(), pc, pc.getDaemonPort())
                  .startCrawling();
         } else {
            pc.crawl();
         }
      } catch (Exception e) {
         throw new CmdLineActionException("Failed to launch crawler : "
               + e.getMessage(), e);
      }
View Full Code Here


  }

  protected ProductCrawler createProductCrawler() throws Exception {
     /* create a ProductCrawler based on whether or not the output dir specifies a MIME_EXTRACTOR_REPO */
      logger.info("Configuring ProductCrawler...");
      ProductCrawler crawler = null;
      if (pgeMetadata.getMetadata(MIME_EXTRACTOR_REPO) != null &&
          pgeMetadata.getMetadata(MIME_EXTRACTOR_REPO).equals("")){
          crawler = new AutoDetectProductCrawler();
          ((AutoDetectProductCrawler)crawler).
            setMimeExtractorRepo(pgeMetadata.getMetadata(MIME_EXTRACTOR_REPO));       
      }
      else{
        crawler = new StdProductCrawler();
      }

      crawler.setClientTransferer(pgeMetadata
            .getMetadata(INGEST_CLIENT_TRANSFER_SERVICE_FACTORY));
      crawler.setFilemgrUrl(pgeMetadata.getMetadata(INGEST_FILE_MANAGER_URL));
      String crawlerConfigFile = pgeMetadata.getMetadata(CRAWLER_CONFIG_FILE);
      if (!Strings.isNullOrEmpty(crawlerConfigFile)) {
         crawler.setApplicationContext(
               new FileSystemXmlApplicationContext(crawlerConfigFile));
         List<String> actionIds = pgeMetadata.getAllMetadata(ACTION_IDS);
         if (actionIds != null) {
            crawler.setActionIds(actionIds);
         }
      }
      crawler.setRequiredMetadata(pgeMetadata.getAllMetadata(REQUIRED_METADATA));
      crawler.setCrawlForDirs(Boolean.parseBoolean(pgeMetadata
            .getMetadata(CRAWLER_CRAWL_FOR_DIRS)));
      crawler.setNoRecur(!Boolean.parseBoolean(
            pgeMetadata.getMetadata(CRAWLER_RECUR)));
      logger.fine(
            "Passing Workflow Metadata to CAS-Crawler as global metadata . . .");
      crawler.setGlobalMetadata(pgeMetadata.asMetadata(PgeMetadata.Type.DYNAMIC));
      logger.fine("Created ProductCrawler ["
            + crawler.getClass().getCanonicalName() + "]");
      return crawler;
   }
View Full Code Here

      pgeTask.pgeMetadata.replaceMetadata(CRAWLER_CRAWL_FOR_DIRS,
            Boolean.toString(false));
      pgeTask.pgeMetadata.replaceMetadata(CRAWLER_RECUR,
            Boolean.toString(true));

      ProductCrawler pc = pgeTask.createProductCrawler();
      assertEquals(
            "org.apache.oodt.cas.filemgr.datatransfer.LocalDataTransferFactory",
            pc.getClientTransferer());
      assertEquals("http://localhost:9000", pc.getFilemgrUrl());
      assertEquals(
            Sets.newHashSet("DeleteDataFile", "MoveMetadataFileToFailureDir"),
            Sets.newHashSet(pc.getActionIds()));
      CrawlerAction action = (CrawlerAction) pc.getApplicationContext().getBean("DeleteDataFile");
      assertNotNull(action);
      MoveFile moveFileAction = (MoveFile) pc.getApplicationContext().getBean("MoveMetadataFileToFailureDir");
      Properties properties = new Properties();
      properties.load(new FileInputStream(new File(
            "src/main/resources/examples/Crawler/action-beans.properties")));
      assertEquals(properties.get("crawler.failure.dir"),
            moveFileAction.getToDir());
      assertTrue(pc.getRequiredMetadata().contains("Owners"));
      assertFalse(pc.isCrawlForDirs());
      assertFalse(pc.isNoRecur());
   }
View Full Code Here

        String[] crawlerIds = this.getApplicationContext().getBeanNamesForType(
                ProductCrawler.class);
        PrintStream ps = new PrintStream(this.getOutStream());
        ps.println("ProductCrawlers:");
        for (String crawlerId : crawlerIds) {
            ProductCrawler pc = (ProductCrawler) this.getApplicationContext()
                    .getBean(crawlerId);
            ps.println("  Id: " + pc.getId());
        }
        ps.println();
        ps.close();
    }
View Full Code Here

      pgeTask.pgeMetadata.replaceMetadata(CRAWLER_CRAWL_FOR_DIRS,
            Boolean.toString(false));
      pgeTask.pgeMetadata.replaceMetadata(CRAWLER_RECUR,
            Boolean.toString(true));

      ProductCrawler pc = pgeTask.createProductCrawler();
      assertEquals(
            "org.apache.oodt.cas.filemgr.datatransfer.LocalDataTransferFactory",
            pc.getClientTransferer());
      assertEquals("http://localhost:9000", pc.getFilemgrUrl());
      assertEquals(
            Sets.newHashSet("DeleteDataFile", "MoveMetadataFileToFailureDir"),
            Sets.newHashSet(pc.getActionIds()));
      CrawlerAction action = (CrawlerAction) pc.getApplicationContext().getBean("DeleteDataFile");
      assertNotNull(action);
      MoveFile moveFileAction = (MoveFile) pc.getApplicationContext().getBean("MoveMetadataFileToFailureDir");
      Properties properties = new Properties();
      properties.load(new FileInputStream(new File(
            "src/main/resources/examples/Crawler/action-beans.properties")));
      assertEquals(properties.get("crawler.failure.dir"),
            moveFileAction.getToDir());
      assertTrue(pc.getRequiredMetadata().contains("Owners"));
      assertFalse(pc.isCrawlForDirs());
      assertFalse(pc.isNoRecur());
   }
View Full Code Here

TOP

Related Classes of org.apache.oodt.cas.crawl.ProductCrawler

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.