Package com.ikanow.infinit.e.processing.generic.aggregation

Source Code of com.ikanow.infinit.e.processing.generic.aggregation.AssociationAggregationUtils

/*******************************************************************************
* Copyright 2012, The Infinit.e Open Source Project.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, version 3,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
******************************************************************************/
package com.ikanow.infinit.e.processing.generic.aggregation;

import java.nio.charset.Charset;
import java.security.MessageDigest;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Map;


import org.apache.commons.codec.binary.Hex;
//import org.apache.log4j.Logger;
import org.apache.log4j.Logger;
import org.bson.types.ObjectId;

import com.ikanow.infinit.e.data_model.index.ElasticSearchManager;
import com.ikanow.infinit.e.data_model.index.IndexManager;
import com.ikanow.infinit.e.data_model.index.feature.event.AssociationFeaturePojoIndexMap;
import com.ikanow.infinit.e.data_model.store.DbManager;
import com.ikanow.infinit.e.data_model.store.MongoDbManager;
import com.ikanow.infinit.e.data_model.store.document.AssociationPojo;
import com.ikanow.infinit.e.data_model.store.feature.association.AssociationFeaturePojo;
import com.ikanow.infinit.e.data_model.store.feature.entity.EntityFeaturePojo;
import com.mongodb.BasicDBObject;
import com.mongodb.DBCollection;
import com.mongodb.DBObject;

public class AssociationAggregationUtils {

  // Not currently needed (will be when add document-event frequency updates)
  private static final Logger logger = Logger.getLogger(AssociationAggregationUtils.class)
 
  private static boolean _diagnosticMode = false;
  public static void setDiagnosticMode(boolean bMode) { _diagnosticMode = bMode; }
 
  /////////////////////////////////////////////////////////////////////////////////////// 
  /////////////////////////////////////////////////////////////////////////////////////// 

  // PUBLIC UTILITIES
 
  /////////////////////////////////////////////////////////////////////////////////////// 
  /////////////////////////////////////////////////////////////////////////////////////// 

  public static String getEventFeatureIndex(AssociationPojo evt) {
    //calculate event_index
    StringBuffer preHashIndex = new StringBuffer();
    preHashIndex.append(evt.getEntity1_index()).append('/').append(evt.getVerb_category()).append('/').append(evt.getEntity2_index());
    if (null != evt.getGeo_index()) {
      preHashIndex.append(evt.getGeo_index());
    }
    return md5checksum(preHashIndex.toString());   
  }
 
  /////////////////////////////////////////////////////////////////////////////////////// 

  private static String md5checksum(String toHash)
  {
    try
    {
      MessageDigest m = MessageDigest.getInstance("MD5");
      m.reset();
      m.update(toHash.getBytes(Charset.forName("UTF8")));
      byte[] digest = m.digest();
      return new String(Hex.encodeHex(digest));     
    }
    catch (Exception ex)
    {
      return toHash;
    }   
  }//(TESTED - cut and paste from proven PasswordEncryption class)
 
 
  /////////////////////////////////////////////////////////////////////////////////////// 
  /////////////////////////////////////////////////////////////////////////////////////// 

  // CREATING/UPDATE FEATURES
 
  /////////////////////////////////////////////////////////////////////////////////////// 
  /////////////////////////////////////////////////////////////////////////////////////// 
 
  /**
   * Add events to the elastic search index for events
   * and the mongodb collection
   * so they are searchable for searchsuggest
   *
   * Step 1.a, try to just update alias's
   * Step 1.b, if fail, create new entry
   *
   * Step 2, Update totalfreq and doccount
   *
   * Step 3, After updating totalfreq and doccount, write to ES for every group
   *
   * @param events
   */
  public static void updateEventFeatures(Map<String, Map<ObjectId, AssociationFeaturePojo>> eventFeatures)
  {
    DBCollection col = DbManager.getFeature().getAssociation()
    String savedSyncTime = null;
    for (Map<ObjectId, AssociationFeaturePojo> evtCommunity: eventFeatures.values()) {
     
      Iterator<Map.Entry<ObjectId, AssociationFeaturePojo>> it = evtCommunity.entrySet().iterator();
      while (it.hasNext()) {
        Map.Entry<ObjectId, AssociationFeaturePojo> evtFeatureKV = it.next();       
        try {         
          AssociationFeaturePojo evtFeature = evtFeatureKV.getValue();
          long nSavedDocCount = evtFeature.getDoccount();

          ObjectId communityID = evtFeature.getCommunityId();
          //try to update
          BasicDBObject query = new BasicDBObject(AssociationFeaturePojo.index_, evtFeature.getIndex());
          query.put(AssociationFeaturePojo.communityId_, communityID);

          //Step1 try to update alias
          //update arrays
          BasicDBObject multiopAliasArrays = new BasicDBObject();                     
          // Entity1 Alias:
          if (null != evtFeature.getEntity1_index()) {
            evtFeature.addEntity1(evtFeature.getEntity1_index());
          }
          if (null != evtFeature.getEntity1())
          {
            BasicDBObject multiopE = new BasicDBObject(MongoDbManager.each_, evtFeature.getEntity1());
            multiopAliasArrays.put(AssociationFeaturePojo.entity1_, multiopE);
          }
          // Entity2 Alias:
          if (null != evtFeature.getEntity2_index()) {
            evtFeature.addEntity2(evtFeature.getEntity2_index());
          }
          if (null != evtFeature.getEntity2()) {
            BasicDBObject multiopE = new BasicDBObject(MongoDbManager.each_, evtFeature.getEntity2());
            multiopAliasArrays.put(AssociationFeaturePojo.entity2_, multiopE);
          }
          // verb/verb cat alias:
          if (null != evtFeature.getVerb_category()) {
            evtFeature.addVerb(evtFeature.getVerb_category());
          }
          if (null != evtFeature.getVerb()) {
            BasicDBObject multiopE = new BasicDBObject(MongoDbManager.each_, evtFeature.getVerb());
            multiopAliasArrays.put(AssociationFeaturePojo.verb_, multiopE);
          }
          BasicDBObject updateOp = new BasicDBObject()
          updateOp.put(MongoDbManager.addToSet_, multiopAliasArrays);
          // Document count for this event
          BasicDBObject updateFreqDocCount = new BasicDBObject(AssociationFeaturePojo.doccount_, nSavedDocCount);
          updateOp.put(MongoDbManager.inc_,updateFreqDocCount);

          BasicDBObject fields = new BasicDBObject(AssociationFeaturePojo.doccount_, 1);
          fields.put(AssociationFeaturePojo.entity1_, 1);
          fields.put(AssociationFeaturePojo.entity2_, 1);
          fields.put(AssociationFeaturePojo.verb_, 1);
          //(slightly annoying, since only want these if updating dc but won't know
          // until after i've got this object)

          fields.put(AssociationFeaturePojo.db_sync_time_, 1);
          fields.put(AssociationFeaturePojo.db_sync_doccount_, 1);

          DBObject dboUpdate = null;
          if (_diagnosticMode) {
            dboUpdate = col.findOne(query,fields);
          }
          else {
            dboUpdate = col.findAndModify(query,fields,new BasicDBObject(),false,updateOp,false,true)
              // (can use findAndModify because specify index, ie the shard key)
              // (returns event before the changes above, update the feature object below)
              // (also atomically creates the object if it doesn't exist so is "distributed-safe")
          }
          if ( ( dboUpdate != null ) && !dboUpdate.keySet().isEmpty() )
          {
            AssociationFeaturePojo egp = AssociationFeaturePojo.fromDb(dboUpdate, AssociationFeaturePojo.class);
            evtFeature.setDoccount(egp.getDoccount() + nSavedDocCount);
            evtFeature.setDb_sync_doccount(egp.getDb_sync_doccount());
            evtFeature.setDb_sync_time(egp.getDb_sync_time());
            if (null != egp.getEntity1()) {
              for (String ent: egp.getEntity1()) evtFeature.addEntity1(ent);
            }
            if (null != egp.getEntity2()) { 
              for (String ent: egp.getEntity2()) evtFeature.addEntity2(ent);
            }
            if (null != egp.getVerb()) {
              for (String verb: egp.getVerb()) evtFeature.addVerb(verb);
            }             
            if (_diagnosticMode) {
              System.out.println("EventAggregationUtils.updateEventFeatures, found: " + ((BasicDBObject)egp.toDb()).toString());
              System.out.println("EventAggregationUtils.updateEventFeatures, ^^^ found from query: " + query.toString() + " / " + updateOp.toString());
            }
            // (In background aggregation mode we update db_sync_prio when checking the -otherwise unused, unlike entities- document update schedule)
          }
          else // (the object in memory is now an accurate representation of the database, minus some fields we'll now add)
          {
            // Synchronization settings for the newly created object
            evtFeature.setDb_sync_doccount(nSavedDocCount);
            if (null == savedSyncTime) {
              savedSyncTime = Long.toString(System.currentTimeMillis());
            }
            evtFeature.setDb_sync_time(savedSyncTime);

            // This is all "distributed safe" (apart from the db_syc_xxx and it doesn't matter if that is
            // out of date, the update will just be slightly out-of-date at worst) since (otherwise) these fields are
            // only set here, and the findAndModify is atomic

            BasicDBObject baseFields = new BasicDBObject();
            if (null != evtFeature.getEntity1_index()) {
              baseFields.put(AssociationFeaturePojo.entity1_index_, evtFeature.getEntity1_index());
            }
            if (null != evtFeature.getEntity2_index()) {
              baseFields.put(AssociationFeaturePojo.entity2_index_, evtFeature.getEntity2_index());               
            }
            if (null != evtFeature.getVerb_category()) {
              baseFields.put(AssociationFeaturePojo.verb_category_, evtFeature.getVerb_category());                               
            }
            baseFields.put(AssociationFeaturePojo.assoc_type_, evtFeature.getAssociation_type());
            baseFields.put(AssociationFeaturePojo.db_sync_doccount_, evtFeature.getDb_sync_doccount());
            baseFields.put(AssociationFeaturePojo.db_sync_time_, evtFeature.getDb_sync_time());                           

            if (!_diagnosticMode) {
              // Store the object
              col.update(query, new BasicDBObject(MongoDbManager.set_, baseFields));
            }
            else {
              System.out.println("EventAggregationUtils.updateEventFeatures, not found: " + query.toString() + " / " + baseFields.toString() + "/ orig_update= " + updateOp.toString());
            }
            evtFeature.setDb_sync_time(null); // (ensures that index re-sync will occur)           
           
            // (Note even in background aggregation mode we still perform the feature synchronization
            //  for new entities - and it has to be right at the end because it "corrupts" the objects)
          }
        }
        catch (Exception e) {
          // Exception, remove from feature list
          it.remove();
         
          // If an exception occurs log the error
          logger.error("Exception Message: " + e.getMessage(), e);
        }       
       
      }// (end loop over all communities for the set of features sharing and index)               
    }// (end loop over indexes)

  }//TESTED (by eye, reasonably significant changes, but still based on proven Beta code)
 
  /////////////////////////////////////////////////////////////////////////////////////// 
  /////////////////////////////////////////////////////////////////////////////////////// 

  // DOCUMENT UPDATE
 
  /////////////////////////////////////////////////////////////////////////////////////// 
  /////////////////////////////////////////////////////////////////////////////////////// 

  public static void updateMatchingEvents(AssociationFeaturePojo evtFeature, ObjectId communityId)
  {
    //(Not needed until we start to calculate event significance)
 
 
  /////////////////////////////////////////////////////////////////////////////////////// 
  /////////////////////////////////////////////////////////////////////////////////////// 

  // INDEX SYNCHRONIZATION
 
  /////////////////////////////////////////////////////////////////////////////////////// 
  /////////////////////////////////////////////////////////////////////////////////////// 

  // Synchronization: sync vs index, update sync counts for each community
 
  public static void synchronizeEventFeature(AssociationFeaturePojo eventFeature, ObjectId communityId) {
    DBCollection eventFeatureDb = DbManager.getFeature().getAssociation();
   
    // NOTE: Important that feeds update occurs before synchronization, since the sync "corrupts" the event   

    if (_diagnosticMode || (null != eventFeature.getDb_sync_time()) || (null != eventFeature.getDb_sync_prio()))
    {
      // Else this is a new feature so don't need to update the feature DB, only the index (if db_sync_prio null then have to update to avoid b/g aggergation loop)
      // (note that db_sync_prio will in practice not be set when this is a new feature because it will have same sync_doccount as doc_count)
     
      long nCurrTime = System.currentTimeMillis();
      //(query from top of the function, basically lookup on gaz_index)
      BasicDBObject update2 = new BasicDBObject();
      update2.put(AssociationFeaturePojo.db_sync_time_, Long.toString(nCurrTime));
      update2.put(AssociationFeaturePojo.db_sync_doccount_, eventFeature.getDoccount());
      BasicDBObject update = new BasicDBObject(MongoDbManager.set_, update2);
        // (also can be added to below)
      BasicDBObject update3 = new BasicDBObject(EntityFeaturePojo.db_sync_prio_, 1);
      update.put(MongoDbManager.unset_, update3);
      BasicDBObject query = new BasicDBObject(AssociationFeaturePojo.index_, eventFeature.getIndex());
      query.put(AssociationFeaturePojo.communityId_, communityId);

      // Keep the number of entity1 and entity2 sets down to a reasonable number
      // (In the end would like to be able to do this based on date rather than (essentially) completely randomly)
      int nSize;
      BasicDBObject toPull = null;
      if (null != eventFeature.getEntity1()) {
        if ((nSize = eventFeature.getEntity1().size()) > AssociationFeaturePojo.entity_MAXFIELDS) {
          if (null == toPull) toPull = new BasicDBObject();
          ArrayList<String> ent1ToRemove = new ArrayList<String>(eventFeature.getEntity1().size() - AssociationFeaturePojo.entity_MAXFIELDS);
          Iterator<String> it = eventFeature.getEntity1().iterator();
          while (it.hasNext() && (nSize > AssociationFeaturePojo.entity_MAXFIELDS)) {
            String ent = it.next();
            if (-1 == ent.indexOf('/')) { // (ie don't remove the index)
              nSize--;
              it.remove(); // (this removes from the index)
              ent1ToRemove.add(ent);
            }
          }
          toPull.put(AssociationFeaturePojo.entity1_, ent1ToRemove);
            // (this removes from the database)
        }
      }
      if (null != eventFeature.getEntity2()) {
        if ((nSize = eventFeature.getEntity2().size()) > AssociationFeaturePojo.entity_MAXFIELDS) {
          if (null == toPull) toPull = new BasicDBObject();
          ArrayList<String> ent2ToRemove = new ArrayList<String>(eventFeature.getEntity2().size() - AssociationFeaturePojo.entity_MAXFIELDS);
          Iterator<String> it = eventFeature.getEntity2().iterator();
          while (it.hasNext() && (nSize > AssociationFeaturePojo.entity_MAXFIELDS)) {
            String ent = it.next();
            if (-1 == ent.indexOf('/')) { // (ie don't remove the index)
              nSize--;
              it.remove(); // (this removes from the index)
              ent2ToRemove.add(ent);
            }
          }
          toPull.put(AssociationFeaturePojo.entity2_, ent2ToRemove);
            // (this removes from the database)
        }
      }
      if (null != toPull) {
        update.put(MongoDbManager.pullAll_, toPull);
          // (this removes from the database)
      }
      //TESTED (2.1.4.3b, including no index removal clause)
     
      if (_diagnosticMode) {
        if ((null != eventFeature.getDb_sync_time()) || (null != eventFeature.getDb_sync_prio())) {
          System.out.println("EventAggregationUtils.synchronizeEventFeature, featureDB: " + query.toString() + " / " + update.toString());
        }
        else {
          System.out.println("(WOULD NOT RUN) EventAggregationUtils.synchronizeEventFeature, featureDB: " + query.toString() + " / " + update.toString());       
        }
      }
      else {
        eventFeatureDb.update(query, update, false, true);
      }
    }

    if (_diagnosticMode) {
      System.out.println("EventAggregationUtils.synchronizeEventFeature, synchronize: " + new StringBuffer(eventFeature.getIndex()).append(':').append(communityId).toString() + " = " +
          IndexManager.mapToIndex(eventFeature, new AssociationFeaturePojoIndexMap()));
    }
    else {
      ElasticSearchManager esm = IndexManager.getIndex(AssociationFeaturePojoIndexMap.indexName_);       
      esm.addDocument(eventFeature, new AssociationFeaturePojoIndexMap(), null, true);
    }
  }//TESTED
 
  ///////////////////////////////////////////////////////////////////////////////////////
 
  // Set flag to synchronize entity features
 
  public static void markAssociationFeatureForSync(AssociationFeaturePojo assocFeature, ObjectId communityId) {
    DBCollection assocFeatureDb = DbManager.getFeature().getAssociation();
    double dPrio = 100.0*(double)assocFeature.getDoccount()/(0.01 + (double)assocFeature.getDb_sync_doccount());
    assocFeature.setDb_sync_prio(dPrio);
    BasicDBObject query = new BasicDBObject(AssociationFeaturePojo.index_, assocFeature.getIndex());
    query.put(AssociationFeaturePojo.communityId_, communityId);
    BasicDBObject update = new BasicDBObject(MongoDbManager.set_, new BasicDBObject(AssociationFeaturePojo.db_sync_prio_, dPrio));
    if (_diagnosticMode) {
      System.out.println("EntityAggregationUtils.markAssociationFeatureForSync, featureDB: " + query.toString() + " / " + update.toString());       
    }
    else {
      assocFeatureDb.update(query, update, false, true);
    }
  }//TESTED
}
TOP

Related Classes of com.ikanow.infinit.e.processing.generic.aggregation.AssociationAggregationUtils

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.