Package com.ikanow.infinit.e.data_model.store.config.source

Examples of com.ikanow.infinit.e.data_model.store.config.source.SourcePojo


        query.put(SourcePojo.communityIds_, new BasicDBObject(MongoDbManager.in_, SocialUtils.getUserCommunities(userIdStr)));
      BasicDBObject fields = new BasicDBObject(SourcePojo.url_, 1);
      fields.put(SourcePojo.extractType_, 1);
      fields.put(SourcePojo.file_, 1);
      fields.put(SourcePojo.isApproved_, 1);
      SourcePojo source = SourcePojo.fromDb(DbManager.getIngest().getSource().findOne(query, fields), SourcePojo.class);

      // TEST for security shenanigans
      String baseRelativePath = new File(".").getCanonicalPath();
      String actualRelativePath = new File(relativePath).getCanonicalPath();
      if (!actualRelativePath.startsWith(baseRelativePath)) {
        throw new RuntimeException("Access denied: " + relativePath);
      }     
      //(end security shenanigans)
     
      if (null == source) {
        throw new RuntimeException("Document source not found: " + sourceKey);
      }
      if ((null != source.getExtractType()) && !source.getExtractType().equals("File")) {
        throw new RuntimeException("Document source not a file: " + sourceKey + ", " + source.getExtractType());       
      }
      if (!source.isApproved()) {
        throw new RuntimeException("Document source not approved, access denied: " + sourceKey);       
      }
      String fileURL = source.getUrl() + relativePath;
      byte[] bytes = FileHarvester.getFile(fileURL, source);
      if ( bytes == null )
      {
        //fail
        rp.setResponse(new ResponseObject("Doc Info",false,"Could not find document: " + relativePath));
View Full Code Here


    return rp;
  }//TESTED
 
  private SourcePojo getSourceFromKey(String sourceKey)
  {
    SourcePojo source = null;
    try
    {
      BasicDBObject query = new BasicDBObject();
      query.put(SourcePojo.key_, sourceKey);
      source = SourcePojo.fromDb(DbManager.getIngest().getSource().findOne(query), SourcePojo.class);
View Full Code Here

    // 2.2] Not in progress and have either never been harvested or synced, or in age of how long ago

    for (int nNumSourcesGot = 0; (nNumSourcesGot < nBatchSize) && (!uncheckedSources.isEmpty()); ) {
     
      BasicDBObject query = generateNotInProgressClause(now);
      SourcePojo candidate = null;
      synchronized (SourceUtils.class) { // (can be called across multiple threads)
        candidate = uncheckedSources.pop();
      }   

      //DEBUG
      //System.out.println(" CANDIDATE=" + candidate.getKey() + " ..." + candidate.getId());     
     
      if ((null != sSourceType) && !candidate.getExtractType().equalsIgnoreCase(sSourceType)) {
        continue;
      }
      HarvestEnum candidateStatus = null;
      if (null != candidate.getHarvestStatus()) {
        candidateStatus = candidate.getHarvestStatus().getHarvest_status();
      }
      if (bSync && (null == candidateStatus)) { // Don't sync unharvested sources, obviously!
        continue;
      }
      //(DISTRIBUTON LOGIC)
     
      // Checking whether to respect the searchCycle_secs for distributed sources is a bit more complex
      boolean isDistributed = (null != candidate.getDistributionFactor());     
      boolean distributedInProcess = isDistributed && 
        candidate.reachedMaxDocs() ||  // (<- only set inside a process)
          ((null != candidate.getHarvestStatus()) && // (robustness)
              (null != candidate.getHarvestStatus().getDistributionTokensFree()) && // (else starting out)
                (candidate.getDistributionFactor() != candidate.getHarvestStatus().getDistributionTokensFree()));
                  // (else this is the start)
      //(TESTED - local and distributed)
      //(END DISTRIBUTON LOGIC)
     
      if (((HarvestEnum.success_iteration != candidateStatus) && !distributedInProcess)
          ||
          ((null != candidate.getSearchCycle_secs()) && (candidate.getSearchCycle_secs() < 0)))
      {
        // (ie EITHER we're not iteration OR we're disabled)
        //(^^^ don't respect iteration status if source manually disabled)
       
        if ((null != candidate.getSearchCycle_secs()) || (null != defaultSearchCycle_ms)) {
          if (null == candidate.getSearchCycle_secs()) {
            candidate.setSearchCycle_secs((int)(defaultSearchCycle_ms/1000));
          }
          if (candidate.getSearchCycle_secs() < 0) {
            continue; // negative search cycle => disabled
          }
          if ((null != candidate.getHarvestStatus()) && (null != candidate.getHarvestStatus().getHarvested())) {
            //(ie the source has been harvested, and there is a non-default search cycle setting)
           
            if ((candidate.getHarvestStatus().getHarvested().getTime() + 1000L*candidate.getSearchCycle_secs())
                > now.getTime())
            {
              if ((HarvestEnum.in_progress != candidateStatus) && (null != candidateStatus) && (null == candidate.getOwnerId()))
              {
                //(^^ last test, if it's in_progress then it died recently (or hasn't started) so go ahead and harvest anyway)
                // (also hacky use of getOwnerId just to see if this is a source override source or not)
                continue; // (too soon since the last harvest...)
              }//TESTED (including hacky use of ownerId)
            }
          }
        }//TESTED
      }
      //TESTED: manually disabled (ignore), not success_iteration (ignore if outside cycle), success_iteration (always process)
     
      query.put(SourcePojo._id_, candidate.getId());
      BasicDBObject modifyClause = new BasicDBObject();
      modifyClause.put(SourceHarvestStatusPojo.sourceQuery_harvest_status_, HarvestEnum.in_progress.toString());
      if (bSync) {
        modifyClause.put(SourceHarvestStatusPojo.sourceQuery_synced_, now);       
      }
      else {
        modifyClause.put(SourceHarvestStatusPojo.sourceQuery_harvested_, now);
      }
      modifyClause.put(SourceHarvestStatusPojo.sourceQuery_lastHarvestedBy_, getHostname());
      BasicDBObject modify = new BasicDBObject(MongoDbManager.set_, modifyClause);
     
      try {
        BasicDBObject fields = new BasicDBObject(SourcePojo.templateProcessingFlow_, 0);
        BasicDBObject dbo = (BasicDBObject) DbManager.getIngest().getSource().findAndModify(query, fields, null, false, modify, false, false);
        if (null != dbo) {
          SourcePojo fullSource = SourcePojo.fromDb(dbo, SourcePojo.class, new SourcePojoSubstitutionDbMap());
          nextSetToProcess.add(fullSource);
          nNumSourcesGot++;
         
          ////////////////////////////////////////////////////////////////////////
          //
          // DISTRIBUTION LOGIC:
          // If distributionFactor set then grab one token and set state back to
          // success_iteration, to allow other threads/processes to grab me
          if ((null != fullSource.getDistributionFactor()) && !bSync)
          {
            // Get the current distribution token
            int distributionToken = 0;           
            boolean bReset = false;
            if ((null == fullSource.getHarvestStatus()) || (null == fullSource.getHarvestStatus().getDistributionTokensFree())) {
              distributionToken = fullSource.getDistributionFactor();
              // (also set up some parameters so don't need to worry about null checks later)
              if (null == fullSource.getHarvestStatus()) {
                fullSource.setHarvestStatus(new SourceHarvestStatusPojo());
              }
              fullSource.getHarvestStatus().setDistributionTokensFree(distributionToken);
              fullSource.getHarvestStatus().setDistributionTokensComplete(0);
            }
            else {
              distributionToken = fullSource.getHarvestStatus().getDistributionTokensFree();
             
              //Check last harvested time to ensure this isn't an old state (reset if so)
              if ((distributionToken != fullSource.getDistributionFactor()) ||
                  (0 != fullSource.getHarvestStatus().getDistributionTokensComplete()))
              {
                if (null != fullSource.getHarvestStatus().getRealHarvested()) { // harvested is useless here because it's already been updated
                  if ((new Date().getTime() - fullSource.getHarvestStatus().getRealHarvested().getTime()) >
                      _ONEDAY) // (ie older than a day)
                  {
                    distributionToken = fullSource.getDistributionFactor(); // ie start again
                  }
                }
              }//TESTED
            }//(end check for any existing state)         

            if (distributionToken == fullSource.getDistributionFactor()) {
              bReset = true; // (first time through, might as well go ahead and reset to ensure all the vars are present)
            }

            // If in error then just want to grab all remaining tokens and reset the status
            if (HarvestEnum.error == fullSource.getHarvestStatus().getHarvest_status()) { // currently an error
              if (distributionToken != fullSource.getDistributionFactor()) { // In the middle, ie just errored
                fullSource.setDistributionTokens(new HashSet<Integer>());
                while (distributionToken > 0) {
                  distributionToken--;
                  fullSource.getDistributionTokens().add(distributionToken);                 
                }
                BasicDBObject dummy = new BasicDBObject();
                bReset = updateHarvestDistributionState_tokenComplete(fullSource, HarvestEnum.error, dummy, dummy);
                  // (then finish off completion down below)               
              }
            }//TESTED (error mode, 2 cases: complete and incomplete)
           
            //DEBUG
            //System.out.println(" DIST_SOURCE=" + fullSource.getKey() + "/" + fullSource.getDistributionFactor() + ": " + distributionToken + ", " + bReset);
           
            //(note we'll see this even if searchCycle is set because the "source" var (which still has the old
            // state) is stuck back at the start of uncheckedList, so each harvester will see the source >1 time)
           
            if (0 != distributionToken) { // (else no available tokens for this cycle)
              distributionToken--;
             
              fullSource.setDistributionTokens(new HashSet<Integer>());
              fullSource.getDistributionTokens().add(distributionToken);
             
              // Remove one of the available tokens (they don't get reset until the source is complete)
              updateHarvestDistributionState_newToken(fullSource.getId(), distributionToken, HarvestEnum.success_iteration, bReset);

              // After this loop is complete, put back at the start of the unchecked list
              // so another thread can pick up more tokens:
              if (null == putMeBackAtTheStart_distributed) {
                putMeBackAtTheStart_distributed = new LinkedList<SourcePojo>();
              }
              putMeBackAtTheStart_distributed.add(candidate);
             
              // Before adding back to list, set a transient field to ensure it bypasses any search cycle checks
              // (for in process logic where we won't see the update status from the DB)
              candidate.setReachedMaxDocs();
             
              // Reset full source's status so we know if we started in success/error/success_iteration
              if (null == candidateStatus) {
                candidateStatus = HarvestEnum.success;
              }
              fullSource.getHarvestStatus().setHarvest_status(candidateStatus);             
             
            } // (end if available tokens)
            else { // (don't process, just set back to original status)
              HarvestEnum harvestStatus = HarvestEnum.success;
              if (null != fullSource.getHarvestStatus()) {
                if (null != fullSource.getHarvestStatus().getHarvest_status()) {
                  harvestStatus = fullSource.getHarvestStatus().getHarvest_status();
                }
              }
              if (bReset) { // resetting back to 10
                distributionToken = fullSource.getDistributionFactor();
              }
              updateHarvestDistributionState_newToken(fullSource.getId(), distributionToken, harvestStatus, bReset);
                // (bReset can be true in the error case handled above)

              nextSetToProcess.removeLast();
              nNumSourcesGot--;             
            }//TESTED           
           
          }//TESTED
          else if (bSync) {
            // Not allowed to sync "distributed in progress"
            if ((null != fullSource.getHarvestStatus()) || (null != fullSource.getHarvestStatus().getDistributionTokensFree())) {
              if (null == fullSource.getHarvestStatus().getHarvest_status()) { // (shouldn't ever happen)
                fullSource.getHarvestStatus().setHarvest_status(HarvestEnum.success_iteration);
              }
              if (fullSource.getHarvestStatus().getDistributionTokensFree() != fullSource.getDistributionFactor()) {
                updateHarvestDistributionState_newToken(fullSource.getId(), fullSource.getHarvestStatus().getDistributionTokensFree(), fullSource.getHarvestStatus().getHarvest_status(), false);
                nextSetToProcess.removeLast();
                nNumSourcesGot--;             
              }
            }
          }//TESTED
View Full Code Here

      else
      {
        communityIdSet = SocialUtils.getUserCommunities(userIdStr);
        query.put(SourcePojo.communityIds_, new BasicDBObject(MongoDbManager.in_, communityIdSet)); // (security)
      }
      SourcePojo source = SourcePojo.fromDb(DbManager.getIngest().getSource().findOne(query), SourcePojo.class);
      if (null == source) {
        rp.setResponse(new ResponseObject("Source Info",false,"error retrieving source info (or permissions error)"));       
      }
      else {
        ObjectId userId = null;
        if (bAdmin) {
          communityIdSet = source.getCommunityIds();
        }
        else if (!source.isPublic()) { // (otherwise can bypass this)
          userId = new ObjectId(userIdStr);
          if (userId.equals(source.getOwnerId())) {
            userId = null; // (no need to mess about with sets)
          }
          else {
            for (ObjectId communityId: communityIdSet) {
              if (isOwnerOrModerator(communityId.toString(), userIdStr)) {
View Full Code Here

      _allowedCommunityIds = allowedCommunityIds;
    }
    @Override
    public SourcePojo deserialize(JsonElement json, Type typeOfT, JsonDeserializationContext context) throws JsonParseException
    {
      SourcePojo source = new SourceFederatedQueryConfigPojo().extendBuilder(BaseApiPojo.getDefaultBuilder()).create().fromJson(json, SourcePojo.class);
      if (null != _allowedCommunityIds) {
        source.setCommunityIds(null);
        for (ObjectId communityId: _allowedCommunityIds) {
          source.addToCommunityIds(communityId);
        }
      }
      return source;
    }
View Full Code Here

    {
      communityIdStr = allowCommunityRegex(userIdStr, communityIdStr);
      boolean isApproved = isOwnerModeratorOrContentPublisherOrSysAdmin(communityIdStr, userIdStr);
     
      //create source object
      SourcePojo newSource = new SourcePojo();
      newSource.setId(new ObjectId());
      newSource.setTitle(sourcetitle);
      newSource.setDescription(sourcedesc);
      newSource.setUrl(sourceurl); // (key derived below)
      newSource.setExtractType(extracttype);
      newSource.setOwnerId(new ObjectId(userIdStr));
      newSource.setTags(new HashSet<String>(Arrays.asList(sourcetags.split(","))));
      newSource.setMediaType(mediatype);
      newSource.addToCommunityIds(new ObjectId(communityIdStr));
      newSource.setApproved(isApproved);
      newSource.setCreated(new Date());
      newSource.setModified(new Date());
      newSource.generateShah256Hash();
     
      newSource.setKey(validateSourceKey(newSource.getId(), newSource.generateSourceKey()));
     
      ///////////////////////////////////////////////////////////////////////
      // Add the new source to the harvester.sources collection
      try
      {
        // Need to double check that the community has an index (for legacy reasons):
        if (null == DbManager.getIngest().getSource().findOne(new BasicDBObject(SourcePojo.communityIds_,
            new BasicDBObject(MongoDbManager.in_, newSource.getCommunityIds()))))
        {
          for (ObjectId id: newSource.getCommunityIds()) {
            GenericProcessingController.recreateCommunityDocIndex_unknownFields(id, false);
          }
        }
        //TESTED (cut and paste from saveSource)
       
        DbManager.getIngest().getSource().save(newSource.toDb());
        String awaitingApproval = (isApproved) ? "" : " Awaiting approval by the community owner or moderators.";
       
        if (isUniqueSource(newSource, Arrays.asList(new ObjectId(communityIdStr))))
        {       
          rp.setResponse(new ResponseObject("Source", true, "New source added successfully." +
View Full Code Here

      communityIdStr = allowCommunityRegex(userIdStr, communityIdStr);
      boolean isApproved = isOwnerModeratorOrContentPublisherOrSysAdmin(communityIdStr, userIdStr);
     
      ///////////////////////////////////////////////////////////////////////
      // Try parsing the json into a SourcePojo object
      SourcePojo source = null;
      Set<ObjectId> communityIdSet = new TreeSet<ObjectId>();
      try
      {
        ///////////////////////////////////////////////////////////////////////
        // Note: Remove any communityids already in the source and set groupdID to
        // the communityid param (supports multiple IDs in a comma separated list)
        communityIdSet.add(new ObjectId(communityIdStr));
       
        source = ApiManager.mapFromApi(sourceString, SourcePojo.class, new SourcePojoApiMap(communityIdSet));
        if (null == source.getCommunityIds()) {
          source.setCommunityIds(new HashSet<ObjectId>());
        }
        for (ObjectId sid: communityIdSet) {
          source.getCommunityIds().add(sid);
        }
        source.setFederatedQueryCommunityIds(null); // (can be filled in by fillInSourcePipelineFields() below)
        source.fillInSourcePipelineFields(); // (needs to be after the community ids)
       
        // RSS search harvest types tend to be computationally expensive and therefore
        // should be done less frequently (by default once/4-hours seems good):
        if (sourceSearchesWeb(source)) {
          // If the search cycle has not been specified, use a default:
          if (null == source.getSearchCycle_secs()) {
            source.setSearchCycle_secs(4*3600); // (ie 4 hours)
          }
        }
        //TESTED
      }
      catch (Exception e)
      {
        rp.setResponse(new ResponseObject("Source", false, "Unable to serialize Source JSON. Error: " + e.getMessage()));
        return rp;
      }
     
      BasicDBObject query = null;
 
      //SPECIAL CASE: IF AN ACTIVE LOGSTASH HARVEST THEN CHECK BEFORE WE'LL PUBLISH:
      if (source.getExtractType().equalsIgnoreCase("logstash") &&
          ((null == source.getSearchCycle_secs()) || (source.getSearchCycle_secs() > 0)))
      {
        ResponsePojo rpTest = this.testSource(sourceString, 0, false, false, userIdStr);
        if (!rpTest.getResponse().isSuccess()) {
          rp.setResponse(new ResponseObject("Source", false, "Logstash not publishable. Error: " + rpTest.getResponse().getMessage()));
          return rp;
        }
       
      }//TESTED
     
      ///////////////////////////////////////////////////////////////////////
      // If source._id == null this should be a new source
      if ((source.getId() == null) && (source.getKey() == null))
      {
        ///////////////////////////////////////////////////////////////////////
        // Note: Overwrite the following fields regardless of what was sent in
        source.setId(new ObjectId());
        source.setOwnerId(new ObjectId(userIdStr));
        source.setApproved(isApproved);
        source.setCreated(new Date());
        source.setModified(new Date());
        source.setUrl(source.getUrl());
          // (key generated below from representative URL - don't worry about the fact this field is sometimes not present)
 
        source.setKey(validateSourceKey(source.getId(), source.generateSourceKey()));
 
        source.generateShah256Hash();
          // Note: Create/update the source's Shah-256 hash
     
        ///////////////////////////////////////////////////////////////////////
        // Note: Check the SourcePojo to make sure the required fields are there
        // return an error message to the user if any are missing
        String missingFields = hasRequiredSourceFields(source);
        if (missingFields != null && missingFields.length() > 0)
        {
          rp.setResponse(new ResponseObject("Source", false, missingFields));
          return rp;
        }
       
        ///////////////////////////////////////////////////////////////////////
        // Add the new source to the harvester.sources collection
        try
        {
          // Need to double check that the community has an index (for legacy reasons):
          if (null == DbManager.getIngest().getSource().findOne(new BasicDBObject(SourcePojo.communityIds_,
              new BasicDBObject(MongoDbManager.in_, source.getCommunityIds()))))
          {
            for (ObjectId id: source.getCommunityIds()) {
              GenericProcessingController.recreateCommunityDocIndex_unknownFields(id, false);
            }
          }
          //TESTED
         
          DbManager.getIngest().getSource().save(source.toDb());
          if (isUniqueSource(source, communityIdSet))
          {
            rp.setResponse(new ResponseObject("Source", true, "New source added successfully."));
          }
          else { // Still allow people to add identical sources, but warn them so they can delete it if they way
            rp.setResponse(new ResponseObject("Source", true, "New source added successfully. Note functionally identical sources are also present within your communities, which may waste system resources."));         
          }
          rp.setData(source, new SourcePojoApiMap(null, communityIdSet, null));
        }
        catch (Exception e)
        {
          rp.setResponse(new ResponseObject("Source", false, "Unable to add new source. Error: " + e.getMessage()));
        }
       
        ///////////////////////////////////////////////////////////////////////
        // If the user is not the owner or moderator we need to send the owner
        // and email asking them to approve or reject the source
        try {
          if (!isApproved)
          {
            emailSourceApprovalRequest(source);
          }
        }
        catch (Exception e) { // Unable to ask for permission, remove sources and error out
          logger.error("Exception Message: " + e.getMessage(), e);
          DbManager.getIngest().getSource().remove(new BasicDBObject(SourcePojo._id_, source.getId()));
          rp.setData((String)null, (BasePojoApiMap<String>)null); // (unset)
          rp.setResponse(new ResponseObject("Source", false, "Unable to email authority for permission, maybe email infrastructure isn't added? Error: " + e.getMessage()));
        }
       
      }//TESTED (behavior when an identical source is added)
 
      ///////////////////////////////////////////////////////////////////////
      // Existing source, update if possible
      else
      {
        if ((null != source.getPartiallyPublished()) && source.getPartiallyPublished()) {
          rp.setResponse(new ResponseObject("Source", false, "Unable to update source - the source is currently in 'partially published' mode, because it is private and you originally accessed it without sufficient credentials. Make a note of your changes, revert, and try again."));
          return rp;         
        }//TESTED
       
        ///////////////////////////////////////////////////////////////////////
        // Attempt to retrieve existing share from harvester.sources collection
        query = new BasicDBObject();
        if (null != source.getId()) {
          query.put(SourcePojo._id_, source.getId());
        }
        else if (null != source.getKey()) {
          query.put(SourcePojo.key_, source.getKey());         
        }
        try
        {
          BasicDBObject dbo = (BasicDBObject)DbManager.getIngest().getSource().findOne(query);
          // Source doesn't exist so it can't be updated
          if (dbo == null)
          {
            rp.setResponse(new ResponseObject("Source", false, "Unable to update source. The source ID is invalid."));
            return rp;
          }
         
          SourcePojo oldSource = SourcePojo.fromDb(dbo,SourcePojo.class);
          ///////////////////////////////////////////////////////////////////////
          // Note: Only an Infinit.e administrator, source owner, community owner
          // or moderator can update/edit a source
          if (null == oldSource.getOwnerId()) { // (internal error, just correct)
            oldSource.setOwnerId(new ObjectId(userIdStr));
          }
          boolean isSourceOwner = oldSource.getOwnerId().toString().equalsIgnoreCase(userIdStr);
         
          if (!isSourceOwner) {
            boolean ownerModOrApprovedSysAdmin = isApproved &&
                    (SocialUtils.isOwnerOrModerator(communityIdStr, userIdStr) || RESTTools.adminLookup(userIdStr));
           
            if (!ownerModOrApprovedSysAdmin)
            {
              rp.setResponse(new ResponseObject("Source", false, "User does not have permissions to edit this source"));
              return rp;
            }
          }//TESTED - works if owner or moderator, or admin (enabled), not if not admin-enabled

          // For now, don't allow you to change communities
          if ((null == source.getCommunityIds()) || (null == oldSource.getCommunityIds()) // (robustness)
              ||
              !source.getCommunityIds().equals(oldSource.getCommunityIds()))
          {
            rp.setResponse(new ResponseObject("Source", false, "It is not currently possible to change the community of a published source. You must duplicate/scrub the source and re-publish it as a new source (and potentially suspend/delete this one)"));
            return rp;
          }//TOTEST
         
          //isOwnerOrModerator
         
          String oldHash = source.getShah256Hash();
         
          ///////////////////////////////////////////////////////////////////////
          // Note: The following fields in an existing source cannot be changed: Key
          // Make sure new source url and key match existing source values
          // (we allow URL to be changed, though obv the key won't be changed to reflect that)
          source.setKey(oldSource.getKey());
          // Overwrite/set other values in the new source from old source as appropriate
          source.setCreated(oldSource.getCreated());
          source.setModified(new Date());
          source.setOwnerId(oldSource.getOwnerId());
         
          if (null == source.getIsPublic()) {
            source.setIsPublic(oldSource.getIsPublic());
          }//TESTED

          // Harvest status specification logic (we need normally need to keep these fields intact):
          // - If harvest completely unspecified, delete everything but num records
          // - If harvest specified, and there exists an existing harvest block then ignore
          // - If harvest specified, and the harvest has previously been deleted, then copy (except num records)
          // - Extra ... if new status object has harvested unset, then unset that
          if ((null == source.getHarvestStatus()) && (null != oldSource.getHarvestStatus())) {
            // request to unset the harvest status altogether
            source.setHarvestStatus(new SourceHarvestStatusPojo()); // new harvest status
            source.getHarvestStatus().setDoccount(oldSource.getHarvestStatus().getDoccount());
              // but keep number of records
          }
          else if ((null != oldSource.getHarvestStatus()) && (null == oldSource.getHarvestStatus().getHarvest_status())) {
            // Has previously been unset with the logic from the above clause
            source.getHarvestStatus().setDoccount(oldSource.getHarvestStatus().getDoccount());
              // (null != source.getHarvestStatus()) else would be in the clause above
          }
          else if (null != oldSource.getHarvestStatus()) {
            // Unset the harvested time to queue a new harvest cycle
            if ((null != source.getHarvestStatus()) && (null == source.getHarvestStatus().getHarvested())) {
              oldSource.getHarvestStatus().setHarvested(null);
            }
            source.setHarvestStatus(oldSource.getHarvestStatus());
          }
          //(else oldSource.getHarvestStatus is null, just retain the updated version)
         
          //TESTED: no original harvest status, failing to edit existing harvest status, delete status (except doc count), update deleted status (except doc count)
         
          // If we're changing the distribution factor, need to keep things a little bit consistent:
          if ((null == source.getDistributionFactor()) && (null != oldSource.getDistributionFactor())) {
            // Removing it:
            if (null != source.getHarvestStatus()) {
              source.getHarvestStatus().setDistributionReachedLimit(null);
              source.getHarvestStatus().setDistributionTokensComplete(null);
              source.getHarvestStatus().setDistributionTokensFree(null);             
            }
          }//TESTED
          else if ((null != source.getDistributionFactor()) && (null != oldSource.getDistributionFactor())
              && (source.getDistributionFactor() != oldSource.getDistributionFactor()))
          {
            // Update the number of available tokens:
            if ((null != source.getHarvestStatus()) && (null != source.getHarvestStatus().getDistributionTokensFree()))
            {
              int n = source.getHarvestStatus().getDistributionTokensFree() +
                    (source.getDistributionFactor() - oldSource.getDistributionFactor());
              if (n < 0) n = 0;
             
              source.getHarvestStatus().setDistributionTokensFree(n);
            }
          }//TESTED
         
          ///////////////////////////////////////////////////////////////////////
          // Check for missing fields:
          String missingFields = hasRequiredSourceFields(source);
          if (missingFields != null && missingFields.length() > 0)
          {
            rp.setResponse(new ResponseObject("Source", false, missingFields));
            return rp;
          }
         
          ///////////////////////////////////////////////////////////////////////
          // Note: Create/update the source's Shah-256 hash
          source.generateShah256Hash();
         
          ///////////////////////////////////////////////////////////////////////
          // Handle approval:
          if (isApproved || oldHash.equalsIgnoreCase(source.getShah256Hash())) {
            //(either i have permissions, or the source hasn't change)
           
            if (oldSource.isApproved()) { // Always approve - annoyingly no way of unsetting this
              source.setApproved(true);
            }
            else if (source.isApproved()) { // Want to re-approve
              if (!isApproved) // Don't have permission, so reset
              {           
                source.setApproved(oldSource.isApproved());
              }
            }         
          }
          else { // Need to re-approve           
            try {
View Full Code Here

   
// API testing:
   
    // API: Variable community source pojo...
    ResponsePojo rp1 = new ResponsePojo();
    SourcePojo sp = new SourcePojo();
    sp.setUrl("http://test");
    sp.setKey(sp.generateSourceKey());
    sp.addToCommunityIds(new ObjectId("a0000000000000000000000a"));
    sp.addToCommunityIds(new ObjectId("c0000000000000000000000c"));
    //CHECK THIS DOESN'T COMPILE
    //rp1.setData(sp); // (Not allowed SourcePojo isn't a BaseApiPojo)

    ////////////////////////////////////////////////
    //CANONICAL EXAMPLE:
    Set<ObjectId> communities = new HashSet<ObjectId>();
    communities.add(new ObjectId("a0000000000000000000000a"));
    rp1.setData(sp, new SourcePojoApiMap(null, communities, communities));
    String sRPSingleObject = rp1.toApi();
    System.out.println("RPa=" + sRPSingleObject); // ("chris" removed, toApi handles RepsonsePojo specially)
    ////////////////////////////////////////////////
    System.out.println("RPb=" + ResponsePojo.toApi(rp1, rp1.getMapper())); // ("chris" removed because of mapper)
    System.out.println("RPc=" + ResponsePojo.toApi(rp1)); // ("chris" removed, toApi handles RepsonsePojo specially)
   
    //API: Get an non-API object
    String sJson = "{ 'url':'http://test2', 'isApproved': false, 'harvestBadSource': true, 'created': 'Feb 14, 2013 9:24:34 PM' } ";
    //sp = BaseApiPojo.mapFromApi(sJson, SourcePojo.class, null);
    // Equivalent to:
    SourcePojo sp2 = ApiManager.mapFromApi(sJson, SourcePojo.class, new SourcePojoApiMap(null, new HashSet<ObjectId>(), new HashSet<ObjectId>()));
    System.out.println("RPd="+new Gson().toJson(sp2)); // "alex" and "chris" both removed
   
    //API: add a list to the response Pojo
    List<SourcePojo> list = Arrays.asList(sp, sp2);
    //CHECK THIS DOESN'T COMPILE
    //rp1.setData(list); // (Not allowed SourcePojo isn't a BaseApiPojo)

    sp2.addToCommunityIds(new ObjectId("a0000000000000000000000a")); // (alex will be allowed again)
    rp1.setData(list, new SourcePojoApiMap(null, communities, communities));
    String sRPList = rp1.toApi();
    sp2.setCommunityIds(null);
   
    //API:  And get as a list
    String listJson =  BaseApiPojo.getDefaultBuilder().create().toJson(rp1.getData());
    System.out.println("RP=" + listJson); // include "alex" and "chris" - no mapping applied
   
View Full Code Here

  // Implement the regex filter
 
  static public void setInfiniteInputPathFilter(Job job, Configuration config) {
   
    String sourceStr = config.get("mongo.input.query");
    SourcePojo source = ApiManager.mapFromApi(sourceStr, SourcePojo.class, null);
    SourceFileConfigPojo fileConfig = source.getFileConfig();
    if ((null != fileConfig) &&
        ((null != fileConfig.pathInclude) || (null != fileConfig.pathExclude)))
    {
      Pattern includeRegex = null;
      Pattern excludeRegex = null;
      if (null != source.getFileConfig().pathInclude) {
        includeRegex = Pattern.compile(source.getFileConfig().pathInclude, Pattern.CASE_INSENSITIVE);
      }
      if (null != source.getFileConfig().pathExclude) {
        excludeRegex = Pattern.compile(source.getFileConfig().pathExclude, Pattern.CASE_INSENSITIVE);
      }
      InfiniteFilePathFilter.initialize(includeRegex, excludeRegex);
      setInputPathFilter(job, InfiniteFilePathFilter.class);
    }     
  }//TESTED
View Full Code Here

    String jobName = _config.get("mapred.job.name", "unknown");
    _logger.info(jobName + ": new split, contains " + _numFiles + " files, total size: " + _fileSplit.getLength());   
   
    String sourceStr = _config.get("mongo.input.query");
    SourcePojo source = ApiManager.mapFromApi(sourceStr, SourcePojo.class, null);
    _fileConfig = source.getFileConfig();
   
    String fields = _config.get("mongo.input.fields", "");
    if (fields.length() > 2) {
      try {
        _fieldsToDelete = (BasicDBObject) com.mongodb.util.JSON.parse(fields);
View Full Code Here

TOP

Related Classes of com.ikanow.infinit.e.data_model.store.config.source.SourcePojo

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.