Examples of com.ikanow.infinit.e.data_model.store.config.source.SourcePojo$SourcePojoDeserializer

Package com.ikanow.infinit.e.data_model.store.config.source

Examples of com.ikanow.infinit.e.data_model.store.config.source.SourcePojo$SourcePojoDeserializer

com.ikanow.infinit.e.data_model.store.config.source.SourcePojo
Class used to establish the source information for a feed this defines the data necessary to create a feed in the system @author cmorgan

    // File type:
    BasicDBObject query = new BasicDBObject("extractType", "Feed");
    // A useful source known to work during V0S1 testing:
    query.put("key", "arstechnica.com.tech-policy.2012.10.last-android-vendor-f.147.4.");
    
    SourcePojo feedSource = SourcePojo.fromDb(DbManager.getIngest().getSource().findOne(query), SourcePojo.class);
    hc.harvestSource(feedSource, toAdd_feed, toUpdate_feed, toDelete_feed);
    System.out.println("############# Retrieved sample feed documents: " + toAdd_feed.size() + " from " + feedSource.getUrl());
    
    // 1] Test the store and index manager by itself:
    StoreAndIndexManager.setDiagnosticMode(true);
    
//    // Test all public calls

View Full Code Here

      // (else document has full text already)
      
      // Get tags, if necessary:
      // Always overwrite tags - one of the reasons we might choose to migrate
      // Also may need source in order to support source index filtering
      SourcePojo src = _sourceCache.get(doc.getSourceKey());
      if (null == src) {
        //TODO (INF-2265): handle search index settings in pipeline mode... (also didn't seem to work?)
        BasicDBObject srcDbo = (BasicDBObject) sourcesDB.findOne(new BasicDBObject(SourcePojo.key_, doc.getSourceKey()));
        if (null != srcDbo) {
          src = SourcePojo.fromDb(srcDbo, SourcePojo.class);
          
          if (null != src.getProcessingPipeline()) {
            try {
              // Set the index settings
              HarvestController hc = new HarvestController();
              HarvestControllerPipeline hcPipe = new HarvestControllerPipeline();
              hcPipe.extractSource_preProcessingPipeline(src, hc);
            }
            catch (Exception e) {
              //DEBUG
              e.printStackTrace();
            }
          }//TESTED (by hand)
          
          _sourceCache.put(doc.getSourceKey(), src);
        }
      }
      doc.setTempSource(src); // (needed for source index filtering)
      if (null != src) {
        if (null != src.getTags()) {
          Set<String> tagsTidied = new TreeSet<String>();
          for (String s: src.getTags()) {
            String ss = s.trim().toLowerCase();
            tagsTidied.add(ss);
          }
          
          // May also want to write this back to the DB:
          //TODO (INF-2223): Handle append tags or not in the pipeline...
          if ((null == src.getAppendTagsToDocs()) || src.getAppendTagsToDocs()) {          
            if ((null == doc.getTags()) || (doc.getTags().size() < tagsTidied.size())) {
              BasicDBObject updateQuery = new BasicDBObject(DocumentPojo.sourceKey_, doc.getSourceKey());
              updateQuery.put(DocumentPojo._id_, doc.getId());
              docsDB.update(updateQuery, new BasicDBObject(DbManager.addToSet_, new BasicDBObject(
                          DocumentPojo.tags_, new BasicDBObject(DbManager.each_, tagsTidied))));

View Full Code Here

          int nRead = gzip.read(storageArray, 0, 200000);
          String s = new String(storageArray, 0, nRead, "UTF-8");
          doc.setFullText(s);
        }        
        // Get tag:
        SourcePojo src = _sourceCache.get(doc.getSourceKey());
        if (null == src) {
          BasicDBObject srcDbo = (BasicDBObject) sourcesDB.findOne(new BasicDBObject("key", doc.getSourceKey()));
          if (null != srcDbo) {
            src = new Gson().fromJson(srcDbo.toString(), SourcePojo.class);
            
            _sourceCache.put(doc.getSourceKey(), src);
          }
        }
        if (null != src) {
          Set<String> tagsTidied = new TreeSet<String>();
          for (String s: src.getTags()) {
            String ss = s.trim().toLowerCase();
            tagsTidied.add(ss);
          }
          doc.setTags(tagsTidied);
        }

View Full Code Here

      if (null == srcDbo) {
        rp.setResponse(new ResponseObject("Delete Source", false, "Error finding source or permissions error."));      
        return rp;
      }
      // OK if we've got to here we're approved and the source exists, start deleting stuff
      SourcePojo source = SourcePojo.fromDb(srcDbo, SourcePojo.class);
      
      //double check this source isn't running INF-2195
      if ( null != source.getHarvestStatus() )
      {
        //a source is in progress if its status is in progress or
        //its distribution factor does not equals its free dist tokens
        if ( (source.getHarvestStatus().getHarvest_status() == HarvestEnum.in_progress ) || 
          (null != source.getDistributionFactor() && source.getDistributionFactor() != source.getHarvestStatus().getDistributionTokensFree() ) )
        {
          rp.setResponse(new ResponseObject("Delete Source", false, "Source was still in progress, turned off"));  
          return rp;
        }
      }
      
      
      long nDocsDeleted = 0;
      if (null != source.getKey()) { // or may delete everything!
        StoreAndIndexManager dataStore = new StoreAndIndexManager();
        nDocsDeleted = dataStore.removeFromDatastoreAndIndex_bySourceKey(source.getKey(), null, false, communityId.toString());
        
        DbManager.getDocument().getCounts().update(new BasicDBObject(DocCountPojo._id_, new ObjectId(communityIdStr)), 
            new BasicDBObject(DbManager.inc_, new BasicDBObject(DocCountPojo.doccount_, -nDocsDeleted)));
        
        if (bDocsOnly) { // Update the source harvest status (easy: no documents left!)
          try {
            DbManager.getIngest().getSource().update(queryDbo,
                new BasicDBObject(MongoDbManager.set_, 
                    new BasicDBObject(SourceHarvestStatusPojo.sourceQuery_doccount_, 0L))
                );
          }
          catch (Exception e) {} // Just carry on, shouldn't ever happen and it's too late to do anything about it
          //TESTED          
        }
        else { // !bDocsOnly, ie delete source also
          DbManager.getIngest().getSource().remove(queryDbo);          
        }
        if ((null != source.getExtractType()) && source.getExtractType().equalsIgnoreCase("logstash")) {
          BasicDBObject logStashMessage = new BasicDBObject();
          logStashMessage.put("_id", source.getId());
          logStashMessage.put("deleteOnlyCommunityId", communityId);
          logStashMessage.put("sourceKey", source.getKey());
          logStashMessage.put("deleteDocsOnly", bDocsOnly);
          
          if ((null != source.getProcessingPipeline()) && !source.getProcessingPipeline().isEmpty()) {
            SourcePipelinePojo px = source.getProcessingPipeline().iterator().next();
            if ((null != px.logstash) && (null != px.logstash.distributed) && px.logstash.distributed) {
              logStashMessage.put("distributed", true);
            }
          }//TESTED (by hand)
          DbManager.getIngest().getLogHarvesterQ().save(logStashMessage);

View Full Code Here

        query.put(SourcePojo.key_, sourceIdStr);          
      }
      query.put(SourcePojo.communityIds_, communityId);


      DBObject dbo = (BasicDBObject)DbManager.getIngest().getSource().findOne(query);
      SourcePojo sp = SourcePojo.fromDb(dbo,SourcePojo.class);
      sp.setApproved(true);


      DbManager.getIngest().getSource().update(query, (DBObject) sp.toDb());
      rp.setData(sp, new SourcePojoApiMap(null, communityIdSet, null));
      rp.setResponse(new ResponseObject("Approve Source",true,"Source approved successfully"));
      
      // Send email notification to the person who submitted the source
      emailSourceApproval(sp, submitterId, "Approved");

View Full Code Here

      }
      query.put(SourcePojo.communityIds_, communityId);


      // Get the source - what we do with it depends on whether it's ever been active or not
      DBObject dbo = (BasicDBObject)DbManager.getIngest().getSource().findOne(query);
      SourcePojo sp = SourcePojo.fromDb(dbo,SourcePojo.class);
      
      // Case 1: is currently active, set to inactive
      
      if (sp.isApproved()) {
        sp.setApproved(false);
        DbManager.getIngest().getSource().update(query, (DBObject) sp.toDb());
        rp.setResponse(new ResponseObject("Decline Source",true,"Source set to unapproved, use config/source/delete to remove it"));
      }
      
      // Case 2: is currently inactive, has been active
      
      else if (null != sp.getHarvestStatus()) {
        rp.setResponse(new ResponseObject("Decline Source",false,"Source has been active, use config/source/delete to remove it"));
      }
      
      // Case 3:

View Full Code Here

  public ResponsePojo testSource(String sourceJson, int nNumDocsToReturn, boolean bReturnFullText, boolean bRealDedup, String userIdStr) 
  {
    ResponsePojo rp = new ResponsePojo();    
    try 
    {
      SourcePojo source = null;
      SourcePojoSubstitutionApiMap apiMap = new SourcePojoSubstitutionApiMap(new ObjectId(userIdStr));
      try {
        source = ApiManager.mapFromApi(sourceJson, SourcePojo.class, apiMap);
        source.fillInSourcePipelineFields();
      }
      catch (Exception e) {
        rp.setResponse(new ResponseObject("Test Source",false,"Error deserializing source (JSON is valid but does not match schema): " + e.getMessage()));            
        return rp;        
      }
      if (null == source.getKey()) {
        source.setKey(source.generateSourceKey()); // (a dummy value, not guaranteed to be unique)
      }
      if ((null == source.getExtractType()) || !source.getExtractType().equals("Federated")) {
        String testUrl = source.getRepresentativeUrl();
        if (null == testUrl) {
          rp.setResponse(new ResponseObject("Test Source",false,"Error, source contains no URL to harvest"));            
          return rp;                
        }
      }
      
      // This is the only field that you don't normally need to specify in save but will cause 
      // problems if it's not populated in test.
      ObjectId userId = new ObjectId(userIdStr);
      // Set owner (overwrite, for security reasons)
      source.setOwnerId(userId);
      if (null == source.getCommunityIds()) {
        source.setCommunityIds(new TreeSet<ObjectId>());
      }
      if (!source.getCommunityIds().isEmpty()) { // need to check that I'm allowed the specified community...
        if ((1 == source.getCommunityIds().size()) && (userId.equals(source.getCommunityIds().iterator().next())))
        {
          // we're OK only community id is user community
        }//TESTED
        else {
          HashSet<ObjectId> communities = SocialUtils.getUserCommunities(userIdStr);
          Iterator<ObjectId> it = source.getCommunityIds().iterator();
          while (it.hasNext()) {
            ObjectId src = it.next();
            if (!communities.contains(src)) {
              rp.setResponse(new ResponseObject("Test Source",false,"Authentication error: you don't belong to this community: " + src));            
              return rp;
            }//TESTED
          }
        }//TESTED
      }
      // Always add the userId to the source community Id (so harvesters can tell if they're running in test mode or not...) 
      source.addToCommunityIds(userId); // (ie user's personal community, always has same _id - not that it matters)
      
      // Check the source's admin status
      source.setOwnedByAdmin(RESTTools.adminLookup(userId.toString(), false));      
      
      if (bRealDedup) { // Want to test update code, so ignore update cycle
        if (null != source.getRssConfig()) {
          source.getRssConfig().setUpdateCycle_secs(1); // always update
        }
      }
      HarvestController harvester = new HarvestController(true);
      if (nNumDocsToReturn > 100) { // (seems reasonable)
        nNumDocsToReturn = 100;
      }
      harvester.setStandaloneMode(nNumDocsToReturn, bRealDedup);
      List<DocumentPojo> toAdd = new LinkedList<DocumentPojo>();
      List<DocumentPojo> toUpdate = new LinkedList<DocumentPojo>();
      List<DocumentPojo> toRemove = new LinkedList<DocumentPojo>();
      if (null == source.getHarvestStatus()) {
        source.setHarvestStatus(new SourceHarvestStatusPojo());
      }
      String oldMessage = source.getHarvestStatus().getHarvest_message();
      // SPECIAL CASE: FOR FEDERATED QUERIES
      if ((null != source.getExtractType()) && source.getExtractType().equals("Federated")) {
        int federatedQueryEnts = 0;
        SourceFederatedQueryConfigPojo endpoint = null;
        try {
          endpoint = source.getProcessingPipeline().get(0).federatedQuery;
        }
        catch (Exception e) {}
        if (null == endpoint) {
          rp.setResponse(new ResponseObject("Test Source",false,"source error: no federated query specified"));      
          return rp;
        }
        AdvancedQueryPojo testQuery = null;
        String errMessage = "no query specified";
        try {
          testQuery = AdvancedQueryPojo.fromApi(endpoint.testQueryJson, AdvancedQueryPojo.class);
        }
        catch (Exception e) {
          errMessage = e.getMessage();
        }
        if (null == testQuery) {
          rp.setResponse(new ResponseObject("Test Source",false,"source error: need to specifiy a valid IKANOW query to test federated queries, error: " + errMessage));      
          return rp;          
        }
        // OK if we're here then we can test the query
        SimpleFederatedQueryEngine testFederatedQuery = new SimpleFederatedQueryEngine();
        endpoint.parentSource = source;
        testFederatedQuery.addEndpoint(endpoint);
        ObjectId queryId = new ObjectId();
        String[] communityIdStrs = new String[source.getCommunityIds().size()];
        int i = 0;
        for (ObjectId commId: source.getCommunityIds()) {
          communityIdStrs[i] = commId.toString();
          i++;
        }
        testFederatedQuery.setTestMode(true);
        testFederatedQuery.preQueryActivities(queryId, testQuery, communityIdStrs);
        StatisticsPojo stats = new StatisticsPojo();
        stats.setSavedScores(0, 0);
        rp.setStats(stats);
        ArrayList<BasicDBObject> toAddTemp = new ArrayList<BasicDBObject>(1);
        testFederatedQuery.postQueryActivities(queryId, toAddTemp, rp);
        for (BasicDBObject docObj: toAddTemp) {
          DocumentPojo doc = DocumentPojo.fromDb(docObj, DocumentPojo.class);
          if (null != doc.getEntities()) {
            federatedQueryEnts += doc.getEntities().size();
          }
          
          //Metadata workaround:
          @SuppressWarnings("unchecked")
          LinkedHashMap<String, Object[]> meta = (LinkedHashMap<String, Object[]>) docObj.get(DocumentPojo.metadata_);
          if (null != meta) {
            Object metaJson = meta.get("json");
            if (metaJson instanceof Object[]) { // (in this case ... non-cached, need to recopy in, I forget why)
              doc.addToMetadata("json", (Object[])metaJson);
            }
          }          
          toAdd.add(doc);
        }
        // (currently can't run harvest source federated query)
        if (0 == federatedQueryEnts) { // (more fed query exceptions)
          source.getHarvestStatus().setHarvest_message("Warning: no entities extracted, probably docConversionMap is wrong?");
        }
        else {
          source.getHarvestStatus().setHarvest_message(federatedQueryEnts + " entities extracted");
        }
        
      }//TESTED (END FEDERATED QUERY TEST MODE, WHICH IS A BIT DIFFERENT)
      else {
        harvester.harvestSource(source, toAdd, toUpdate, toRemove);
      }      
      
      // (don't parrot the old message back - v confusing)
      if (oldMessage == source.getHarvestStatus().getHarvest_message()) { // (ptr ==)
        source.getHarvestStatus().setHarvest_message("(no documents extracted - likely a source or configuration error)");        
      }//TESTED
      
      String message = null;
      if ((null != source.getHarvestStatus()) && (null != source.getHarvestStatus().getHarvest_message())) {
        message = source.getHarvestStatus().getHarvest_message();
      }
      else {
        message = "";
      }
      List<String> errMessagesFromSourceDeser = apiMap.getErrorMessages();
      if (null != errMessagesFromSourceDeser) {
        StringBuffer sbApiMapErr = new StringBuffer("Substitution errors:\n"); 
        for (String err: errMessagesFromSourceDeser) {
          sbApiMapErr.append(err).append("\n");
        }
        message = message + "\n" + sbApiMapErr.toString();
      }//TESTED (by hand)
      
      if ((null != source.getHarvestStatus()) && (HarvestEnum.error == source.getHarvestStatus().getHarvest_status())) {
        rp.setResponse(new ResponseObject("Test Source",false,"source error: " + message));      
        rp.setData(toAdd, new DocumentPojoApiMap());        
      }
      else {
        if ((null == message) || message.isEmpty()) {
          message = "no messages from harvester";
        }
        rp.setResponse(new ResponseObject("Test Source",true,"successfully returned " + toAdd.size() + " docs: " + message));      
        try {
          // If grabbing full text
          // Also some logstash specific logic - these aren't docs so just output the entire record
          boolean isLogstash = (null != source.getExtractType()) && source.getExtractType().equalsIgnoreCase("logstash");
          List<BasicDBObject> logstashRecords = null;
          if (bReturnFullText || isLogstash) {
            for (DocumentPojo doc: toAdd) {
              if (isLogstash) {
                if (null == logstashRecords) {

View Full Code Here

   * @param sourceIdStr
   * @return
   */
  private static SourcePojo getSource(String sourceIdStr)
  {
    SourcePojo source = null;
    try
    {
      BasicDBObject query = new BasicDBObject();
      query.put(SourcePojo._id_, new ObjectId(sourceIdStr));
      source = SourcePojo.fromDb(DbManager.getIngest().getSource().findOne(query), SourcePojo.class);

View Full Code Here

    if ( isApproved )
    {
      //get source
      BasicDBObject query = new BasicDBObject();
      query.put(SourcePojo._id_, new ObjectId(sourceIdStr));
      SourcePojo source = SourcePojo.fromDb(DbManager.getIngest().getSource().findOne(query), SourcePojo.class);
      if ( source != null )
      {
        //set the search cycle secs in order of user -> toplevel -> nonexist
        //if turning off: set to negative of user - toplevel or -1
        //if turning on: set to positive of user - toplevel or null
        BasicDBObject update = new BasicDBObject(SourcePojo.modified_, new Date());        
        int searchCycle_secs = Math.abs( getSearchCycleSecs(source) );
        if ( shouldSuspend )
        {
          //turn off the source
          if ( searchCycle_secs > 0 )
          {
            update.put(SourcePojo.searchCycle_secs_, -searchCycle_secs);
          }
          else
          {
            update.put(SourcePojo.searchCycle_secs_, -1);            
          }
        }
        else
        {
          //SPECIAL CASE: DON'T ALLOW THIS FOR LOGSTASH SOURCES BECAUSE NEED TO VALIDATE BEFORE RE-ACTIVATING
          if (source.getExtractType().equalsIgnoreCase("logstash")) {
            rp.setResponse(new ResponseObject("suspendSource", false, "Can't un-suspend logstash sources using this API call, unsuspend manually (eg from the Source Editor form)"));
            return rp;
          }//TESTED
          
          //turn on the source

View Full Code Here

    fakeEndpoint.requests.add(endpoint);
    
    HashSet<String> entityTypes = new HashSet<String>();
    entityTypes.add("testentityin");
    fakeEndpoint.entityTypes = entityTypes; // set<string>
    fakeEndpoint.parentSource = new SourcePojo();
    fakeEndpoint.parentSource.setKey("fakeendpoint.123");
    fakeEndpoint.parentSource.setOwnedByAdmin(true);
    fakeEndpoint.parentSource.setTitle("fakeendpoint");
    fakeEndpoint.parentSource.setMediaType("Report");
    fakeEndpoint.titlePrefix = "fake endpoint: ";

View Full Code Here

0 1 2 3

TOP

Related Classes of com.ikanow.infinit.e.data_model.store.config.source.SourcePojo$SourcePojoDeserializer

com.google.gson.Gson

com.google.gson.JsonElement

com.ikanow.infinit.e.api.config.source.SourceHandler

com.ikanow.infinit.e.api.knowledge.DocumentHandler

com.ikanow.infinit.e.api.test.FederatedQueryTestCode

com.ikanow.infinit.e.core.execute_harvest.HarvestThenProcessController$SourceTypeHarvesterRunnable

com.ikanow.infinit.e.core.utils.SourceUtils

com.ikanow.infinit.e.data_model.api.config.SourcePojoApiMap$SourcePojoDeserializer

com.ikanow.infinit.e.data_model.custom.InfiniteFileInputFormat

com.ikanow.infinit.e.data_model.custom.InfiniteFileInputReader

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.