Package com.ikanow.infinit.e.harvest

Examples of com.ikanow.infinit.e.harvest.HarvestController


   
    // "-1"] Test initialization
    new GenericProcessingController().Initialize();
   
    // 0] Preparation: use the harvest library to get various sets of files to test out...
    HarvestController hc = new HarvestController();
    hc.setStandaloneMode(5);
    LinkedList<DocumentPojo> toAdd_feed = new LinkedList<DocumentPojo>();
    LinkedList<DocumentPojo> toUpdate_feed = new LinkedList<DocumentPojo>();
    LinkedList<DocumentPojo> toDelete_feed = new LinkedList<DocumentPojo>();
    // File type:
    BasicDBObject query = new BasicDBObject("extractType", "Feed");
    // A useful source known to work during V0S1 testing:
    query.put("key", "arstechnica.com.tech-policy.2012.10.last-android-vendor-f.147.4.");
   
    SourcePojo feedSource = SourcePojo.fromDb(DbManager.getIngest().getSource().findOne(query), SourcePojo.class);
    hc.harvestSource(feedSource, toAdd_feed, toUpdate_feed, toDelete_feed);
    System.out.println("############# Retrieved sample feed documents: " + toAdd_feed.size() + " from " + feedSource.getUrl());
   
    // 1] Test the store and index manager by itself:
    StoreAndIndexManager.setDiagnosticMode(true);
   
View Full Code Here


          src = SourcePojo.fromDb(srcDbo, SourcePojo.class);
         
          if (null != src.getProcessingPipeline()) {
            try {
              // Set the index settings
              HarvestController hc = new HarvestController();
              HarvestControllerPipeline hcPipe = new HarvestControllerPipeline();
              hcPipe.extractSource_preProcessingPipeline(src, hc);
            }
            catch (Exception e) {
              //DEBUG
View Full Code Here

      if (bRealDedup) { // Want to test update code, so ignore update cycle
        if (null != source.getRssConfig()) {
          source.getRssConfig().setUpdateCycle_secs(1); // always update
        }
      }
      HarvestController harvester = new HarvestController(true);
      if (nNumDocsToReturn > 100) { // (seems reasonable)
        nNumDocsToReturn = 100;
      }
      harvester.setStandaloneMode(nNumDocsToReturn, bRealDedup);
      List<DocumentPojo> toAdd = new LinkedList<DocumentPojo>();
      List<DocumentPojo> toUpdate = new LinkedList<DocumentPojo>();
      List<DocumentPojo> toRemove = new LinkedList<DocumentPojo>();
      if (null == source.getHarvestStatus()) {
        source.setHarvestStatus(new SourceHarvestStatusPojo());
      }
      String oldMessage = source.getHarvestStatus().getHarvest_message();
      // SPECIAL CASE: FOR FEDERATED QUERIES
      if ((null != source.getExtractType()) && source.getExtractType().equals("Federated")) {
        int federatedQueryEnts = 0;
        SourceFederatedQueryConfigPojo endpoint = null;
        try {
          endpoint = source.getProcessingPipeline().get(0).federatedQuery;
        }
        catch (Exception e) {}
        if (null == endpoint) {
          rp.setResponse(new ResponseObject("Test Source",false,"source error: no federated query specified"));     
          return rp;
        }
        AdvancedQueryPojo testQuery = null;
        String errMessage = "no query specified";
        try {
          testQuery = AdvancedQueryPojo.fromApi(endpoint.testQueryJson, AdvancedQueryPojo.class);
        }
        catch (Exception e) {
          errMessage = e.getMessage();
        }
        if (null == testQuery) {
          rp.setResponse(new ResponseObject("Test Source",false,"source error: need to specifiy a valid IKANOW query to test federated queries, error: " + errMessage));     
          return rp;         
        }
        // OK if we're here then we can test the query
        SimpleFederatedQueryEngine testFederatedQuery = new SimpleFederatedQueryEngine();
        endpoint.parentSource = source;
        testFederatedQuery.addEndpoint(endpoint);
        ObjectId queryId = new ObjectId();
        String[] communityIdStrs = new String[source.getCommunityIds().size()];
        int i = 0;
        for (ObjectId commId: source.getCommunityIds()) {
          communityIdStrs[i] = commId.toString();
          i++;
        }
        testFederatedQuery.setTestMode(true);
        testFederatedQuery.preQueryActivities(queryId, testQuery, communityIdStrs);
        StatisticsPojo stats = new StatisticsPojo();
        stats.setSavedScores(0, 0);
        rp.setStats(stats);
        ArrayList<BasicDBObject> toAddTemp = new ArrayList<BasicDBObject>(1);
        testFederatedQuery.postQueryActivities(queryId, toAddTemp, rp);
        for (BasicDBObject docObj: toAddTemp) {
          DocumentPojo doc = DocumentPojo.fromDb(docObj, DocumentPojo.class);
          if (null != doc.getEntities()) {
            federatedQueryEnts += doc.getEntities().size();
          }
         
          //Metadata workaround:
          @SuppressWarnings("unchecked")
          LinkedHashMap<String, Object[]> meta = (LinkedHashMap<String, Object[]>) docObj.get(DocumentPojo.metadata_);
          if (null != meta) {
            Object metaJson = meta.get("json");
            if (metaJson instanceof Object[]) { // (in this case ... non-cached, need to recopy in, I forget why)
              doc.addToMetadata("json", (Object[])metaJson);
            }
          }         
          toAdd.add(doc);
        }
        // (currently can't run harvest source federated query)
        if (0 == federatedQueryEnts) { // (more fed query exceptions)
          source.getHarvestStatus().setHarvest_message("Warning: no entities extracted, probably docConversionMap is wrong?");
        }
        else {
          source.getHarvestStatus().setHarvest_message(federatedQueryEnts + " entities extracted");
        }
       
      }//TESTED (END FEDERATED QUERY TEST MODE, WHICH IS A BIT DIFFERENT)
      else {
        harvester.harvestSource(source, toAdd, toUpdate, toRemove);
      }     
     
      // (don't parrot the old message back - v confusing)
      if (oldMessage == source.getHarvestStatus().getHarvest_message()) { // (ptr ==)
        source.getHarvestStatus().setHarvest_message("(no documents extracted - likely a source or configuration error)");       
View Full Code Here

  }
 
  private void harvesterSetup() throws IOException
  {
   
    harvester = new HarvestController();
    harvester.setStandaloneMode(1, false)
  }
View Full Code Here

    // Check proxy:
    ProxyManager.getProxy(new URL("http://www.ikanow.com"), null);
   
    // TESTING
   
    HarvestController harvester = new HarvestController();
    //harvester.setStandaloneMode(0);
    harvester.setStandaloneMode(5);
   
    List<DocumentPojo> toAdd = new LinkedList<DocumentPojo>();
    List<DocumentPojo> toUpdate = new LinkedList<DocumentPojo>();
    List<DocumentPojo> toRemove = new LinkedList<DocumentPojo>();
   
View Full Code Here

    }
    private SourcePojo _sourceToProcess = null;
   
    public void run() {
   
      HarvestController hc;
      GenericProcessingController gpc;
     
      try {       
        if (null == _harvesterController.get()) { // Some sort of internal bug? No idea...
          _harvesterController.set(new HarvestController());
        }
        if (null == _genericController.get()) { // (ditto, not seen this but better safe than sorry)
          _genericController.set(new GenericProcessingController());
        }
       
        List<DocumentPojo> toAdd = new LinkedList<DocumentPojo>();
        List<DocumentPojo> toUpdate = new LinkedList<DocumentPojo>();
        List<DocumentPojo> toRemove = new LinkedList<DocumentPojo>();       
       
        hc = _harvesterController.get();
        hc.harvestSource(_sourceToProcess, toAdd, toUpdate, toRemove);
          // (toAdd includes toUpdate)
       
        if (HarvestEnum.error != _sourceToProcess.getHarvestStatus().getHarvest_status()) {
          gpc = _genericController.get();
          gpc.processDocuments(SourceUtils.getHarvestType(_sourceToProcess), toAdd, toUpdate, toRemove, _sourceToProcess);
View Full Code Here

TOP

Related Classes of com.ikanow.infinit.e.harvest.HarvestController

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.