Package com.ikanow.infinit.e.data_model.store.feature.entity

Examples of com.ikanow.infinit.e.data_model.store.feature.entity.EntityFeaturePojo


      {
        if ( field.equals(AssociationFeaturePojo.entity1_) )
          term = ent1;
        else {
          bExtraQueryTerms = true;
          EntityFeaturePojo alias = null;
          if (null != aliasTable) {
            alias = aliasTable.getAliasMaster(ent1);
          }
          if (null != alias) { // Found!
            boolQuery.must(QueryBuilders.termsQuery(AssociationFeaturePojo.entity1_index_, alias.getAlias().toArray()));
          }
          else {
            boolQuery.must(QueryBuilders.termQuery(AssociationFeaturePojo.entity1_index_, ent1));
          }//TESTED
        }
      }
      if ( !verb.equals("null") )
      {
        if ( field.equals(AssociationFeaturePojo.verb_) )
          term = verb;
        else
        {
          bExtraQueryTerms = true;
          boolQuery.must(QueryBuilders.queryString(new StringBuffer("+").append(verb.replaceAll("\\s+", " +")).toString()).
              defaultField(AssociationFeaturePojo.verb_));
        }
      }
      if ( !ent2.equals("null") )
      {
        if ( field.equals(AssociationFeaturePojo.entity2_) )
          term = ent2;
        else {
          bExtraQueryTerms = true;
          EntityFeaturePojo alias = null;
          if (null != aliasTable) {
            alias = aliasTable.getAliasMaster(ent2);
          }
          if (null != alias) { // Found!
            boolQuery.must(QueryBuilders.termsQuery(AssociationFeaturePojo.entity2_index_, alias.getAlias().toArray()));
          }
          else {
            boolQuery.must(QueryBuilders.termQuery(AssociationFeaturePojo.entity2_index_, ent2));
          }
        }//TESTED (cut and paste from entity1)
     

      String escapedterm = null;
      StandardTokenizer st = new StandardTokenizer(Version.LUCENE_30, new StringReader(ContentUtils.stripDiacritics(term)));
      CharTermAttribute termAtt = st.addAttribute(CharTermAttribute.class);
      StringBuffer sb = new StringBuffer();
      try {
        try {
          st.reset();
          while (st.incrementToken()) {
            if (sb.length() > 0) {
              sb.append(" +");
            }
            else {
              sb.append('+');           
            }
            sb.append(luceneEncodeTerm(termAtt.toString()));
          }
        }
        finally {
          st.close();
        }
      } catch (IOException e) {
        e.printStackTrace();
      }     
      if (!term.endsWith(" ") || (0 == sb.length())) { // Could be in the middle of typing, stick a * on the end
        sb.append('*');
      }//TESTED     

      escapedterm = sb.toString();
     
      // Also create an internal Lucene index for aliases, in case any of them do not have actual entities representing them
      List<EntityFeaturePojo> extraEntries = null;
      BoolQueryBuilder extraQueryTerms = null;
      if (field.startsWith("entity")) {
        String indexField = field.startsWith("entity1") ? "entity1_index" : "entity2_index";
        if (null != aliasTable) {
          extraEntries = checkAliasMasters(aliasTable, escapedterm);
        }
        if (null != extraEntries) {
          extraQueryTerms = QueryBuilders.boolQuery();
          int nExtraTerms = 0;
          Iterator<EntityFeaturePojo> aliasIt = extraEntries.iterator();
          while (aliasIt.hasNext()) {
            EntityFeaturePojo alias = aliasIt.next();           
            nExtraTerms += alias.getAlias().size();
           
            if (!bExtraQueryTerms && (nExtraTerms > 20)) { // If not filtering on event type we'll be more aggressive
              break;
            }//TESTED
            if (bExtraQueryTerms && (nExtraTerms > 60)) { // If the number of terms gets too large bail anyway
              break;
            }//TESTED
           
            extraQueryTerms.should(QueryBuilders.termsQuery(indexField, alias.getAlias().toArray()));
            aliasIt.remove();
           
          }//end loop over entities
        }//if found new aliases
       
      }//(if this is an entity lookup) TESTED - including breaking out because of # of terms
     
      // (end initial alias handling)
     
      if (null == extraQueryTerms) {
        boolQuery.must(QueryBuilders.queryString(escapedterm).defaultField(field));
      }
      else {//(in this case combine the escaped term with the aliases
        extraQueryTerms.should(QueryBuilders.queryString(escapedterm).defaultField(field));
        boolQuery.must(extraQueryTerms);
      }//TESTED
      boolQuery.must(QueryBuilders.termsQuery(AssociationFeaturePojo.communityId_, communityIdStrs));

      searchOptions.addSort(AssociationFeaturePojo.doccount_, SortOrder.DESC);

      // Work out which fields to return:
      //TODO (INF-1234) need to work out what to do with quotations and similar here (ie entityX without entityX_index)
      String returnfield;
      boolean bReturningEntities = true;
      if ( field.equals(AssociationFeaturePojo.entity1_) ) {
        returnfield = AssociationFeaturePojo.entity1_index_;
        searchOptions.addFields( AssociationFeaturePojo.entity1_index_, AssociationFeaturePojo.doccount_);
      }
      else if ( field.equals(AssociationFeaturePojo.entity2_)) {
        returnfield = AssociationFeaturePojo.entity2_index_;
        searchOptions.addFields( AssociationFeaturePojo.entity2_index_, AssociationFeaturePojo.doccount_);
      }
      else {
        bReturningEntities = false;
        returnfield = AssociationFeaturePojo.verb_;
        searchOptions.addFields( AssociationFeaturePojo.verb_, AssociationFeaturePojo.verb_category_,  AssociationFeaturePojo.doccount_);
      }

      int nNumSuggestionsToReturn = 20;
      if (bReturningEntities && (null != aliasTable)) {
        searchOptions.setSize(3*nNumSuggestionsToReturn); // we're going to remove some duplicates so get more than we need
      }
      else { // normal case
        searchOptions.setSize(nNumSuggestionsToReturn);
      }

      SearchResponse rsp = esm.doQuery(boolQuery, searchOptions);
      SearchHit[] docs = rsp.getHits().getHits();

      //Currently this code takes the results and puts
      //them into a set so there are no duplicates
      //duplicates occur for example when you search for
      //obama you get obama/quotation/quote1 and obama/travel/spain
      //may want to work this differnt, or atleast sum up
      //frequency
      Set<String> suggestions = new HashSet<String>();

      for (SearchHit hit: docs)
      {
        SearchHitField retField = hit.field(returnfield); // (this can be null in theory/by mistake)
        if (null != retField) {
          String suggestion = (String) retField.value();
          if (bReturningEntities && (null != aliasTable))
          {
            // More alias handling
            EntityFeaturePojo alias = aliasTable.getAliasMaster(suggestion);
            if (null != alias) { // Found!
              if (alias.getIndex().equalsIgnoreCase("discard")) { // Discard this entity
                continue;
              }
              else {
                // (these need to be present)
                suggestion = alias.getIndex();
              }
            }//TESTED
          }
          else { // (old code, still valid for verbs or no aliases)
            if ( returnfield.equals(AssociationFeaturePojo.verb_) && hit.field(AssociationFeaturePojo.verb_category_) != null )
              //for some reason verb_cat can be null!?!?! i think this is broken (ent1 facebook inc/company verb *)
            {
              String verbcat = (String)hit.field(AssociationFeaturePojo.verb_category_).value();
              suggestion += " (" + verbcat + ")";
              suggestions.add(verbcat);
            }
          }
          suggestions.add(suggestion);

          if (suggestions.size() >= nNumSuggestionsToReturn) {
            break;
          }

        } // (end return string valid)
      }//end loop over suggestions

      // Add any aliases that I couldn't explicity convert to query terms
      if ((null != extraEntries) && (suggestions.size() < nNumSuggestionsToReturn)) {
        for (EntityFeaturePojo alias: extraEntries) {
          suggestions.add(alias.getIndex());
          if (suggestions.size() >= nNumSuggestionsToReturn) {
            break;
          }         
        }
      }//(end add any remaining entries)
View Full Code Here


            String masterAlias = entry.getKey();
            EntityPojo masterAliasEntity = new EntityPojo();
            masterAliasEntity.setIndex(masterAlias);
           
            BasicDBObject entityFeatureObj = (BasicDBObject) entry.getValue();
            EntityFeaturePojo aliasInfo = null;
            try {
              aliasInfo = EntityFeaturePojo.fromDb(entityFeatureObj, EntityFeaturePojo.class);
            }
            catch (Exception e) {
              logger.debug("Failed to deserialize aliasInfo", e);
            }

            if ((null != aliasInfo) && (null != aliasInfo.getAlias()))
            {             
              aliasInfo.setIndex(masterAlias);
              if ((null == aliasInfo.getDimension()) && (null != aliasInfo.getType())) {
                aliasInfo.setDimension(DimensionUtility.getDimensionByType(aliasInfo.getType()));
              }//TESTED
             
              logger.debug("aliasTable entry: " + aliasInfo.getIndex() + " vs " + Arrays.toString(aliasInfo.getAlias().toArray()));
             
              // This is going to collide in an ugly fashion across multiple communities,
              // we just have to live with that
              for (String aliasIndex: aliasInfo.getAlias()) {
                _aliasTable.put(aliasIndex, aliasInfo);
              }
              _aliasTable.put(aliasInfo.getIndex(), aliasInfo);
              EntityFeaturePojo currAlias = _reverseAliasTable.get(aliasInfo.getIndex());
              if (null == currAlias) {
                _reverseAliasTable.put(aliasInfo.getIndex(),aliasInfo);                 
              }
              else { // Collision ... this we can handle a little-bit more elegantly
                currAlias.getAlias().addAll(aliasInfo.getAlias());
                _reverseAliasTable.put(aliasInfo.getIndex(),currAlias);                 
              }                 
            }
          }
        }
View Full Code Here

    int nSynced = 0;
   
    List<EntityFeaturePojo> entities = new ArrayList<EntityFeaturePojo>();
    while ( dbc.hasNext() )
    {
      EntityFeaturePojo feature = EntityFeaturePojo.fromDb(dbc.next(),EntityFeaturePojo.class);
       
      if (null != feature.getAlias()) { // (some corrupt gazateer entry)

        // Handle groups (system group is: "4c927585d591d31d7b37097a")
        // if there is no community id, add system group (something is wrong if this happens?)
        if (null == feature.getCommunityId())
        {
          feature.setCommunityId(new ObjectId("4c927585d591d31d7b37097a"));           
        }
      }
     
      entities.add(feature);
      nSynced++;
View Full Code Here

      int nArraySize = (cur.count() > 1000) ? 1000 : cur.count();
      ArrayList<EntityFeaturePojo> batchList = new ArrayList<EntityFeaturePojo>(nArraySize);     
     
      while (cur.hasNext())
      {
        EntityFeaturePojo gp = EntityFeaturePojo.fromDb(cur.next(),EntityFeaturePojo.class);
        batchList.add(gp);
        if (batchList.size() >= nArraySize) {
          internalDelete(batchList, elasticManager);
          batchList.clear();
        }
View Full Code Here

  // S2: calc pythag significance, store first/last values ready for S3
  // S3: first time through, do sqrt bit of pythag, last time through add to PQ
  // S4: overwrite the entity values with aliased entities where necessary
 
  private void stage1_initAlias(EntSigHolder shp) {
    EntityFeaturePojo alias = _s1_aliasLookup.getAliasMaster(shp.index);
    if (null != alias) { // overwrite index
      if (alias.getIndex().equalsIgnoreCase("document_discard")) {
        // (document discard... shouldn't have this document at this point, we'll just carry on if we do though)
        return;
      }
      if (alias.getIndex().equalsIgnoreCase("discard")) {
        shp.aliasInfo = alias;
        shp.masterAliasSH = null;
        return;
      }     
      EntSigHolder masterAliasSH = null;
      if (null == _s1_aliasSummary) {
        _s1_aliasSummary = new HashMap<String, EntSigHolder>();
      }
      else {
        masterAliasSH = _s1_aliasSummary.get(alias.getIndex());
      }
      if (null == masterAliasSH) {
        masterAliasSH = new EntSigHolder(null, 0, null); //(use ESH as handy collection of req'd vars)
        _s1_aliasSummary.put(alias.getIndex(), masterAliasSH);             
      }     
      shp.masterAliasSH = masterAliasSH;
      shp.aliasInfo = alias;
      shp.masterAliasSH.aliasInfo = alias; // (no harm storing this in 2 places)
    }
View Full Code Here

      outputFilter = outputFilter.mustNot(FilterBuilders.nestedFilter(DocumentPojo.entities_,
                        FilterBuilders.termsFilter(EntityPojo.docQuery_index_, docDiscardAlias.getAlias().toArray())));
     
      if (recursionLevel <= 1) { // (only go two deep for now)
        for (String aliasIndex: docDiscardAlias.getAlias()) {
          EntityFeaturePojo docDiscardSubAlias = _aliasLookup.getAliases(aliasIndex);
          if (null != docDiscardSubAlias) {
            outputFilter = addNegativeSelectorToFilter(docDiscardSubAlias, outputFilter, 1 + recursionLevel);
          }
        }//TESTED
      }
View Full Code Here

    BoolFilterBuilder outputFilter = null;
   
    // First off: document discard aliases:
   
    if (null != _aliasLookup) { // Check out the document discard table...     
      EntityFeaturePojo docDiscardAlias = _aliasLookup.getAliases("DOCUMENT_DISCARD");
      if (null != docDiscardAlias) {       
        outputFilter = addNegativeSelectorToFilter(docDiscardAlias, outputFilter, 0);
      }           
    }//TESTED (by hand, nothing repeatable)
   
View Full Code Here

        }//TESTED
      }
      //TESTED (entity+association) - entity options, add dname as exact text query
     
      if (null != _aliasLookup) {
        EntityFeaturePojo masterAlias = _aliasLookup.getAliases(qt.entity)
          // (need to do it this way round to get the semantic links)
        if (null != masterAlias) {
          if (null == termBoolQ) {
            termBoolQ = QueryBuilders.boolQuery();
          }
          sQueryTerm.append(" OR ").append(sFieldName).append(":$manual_aliases");
          termBoolQ = termBoolQ.should(QueryBuilders.termQuery(sFieldName, qt.entity));
          termQ = termBoolQ = termBoolQ.should(QueryBuilders.termsQuery(sFieldName, masterAlias.getAlias().toArray()));

          // If want to add manual aliases as full text also...
          if ((null != qt.entityOpt) && qt.entityOpt.rawText) {
            if (null == this._extraFullTextTerms) {
              _extraFullTextTerms = new LinkedList<AdvancedQueryPojo.QueryTermPojo>();
            }
            String nonIndexField = null;
            if (AssociationPojo.docQuery_geo_index_ != sFieldName) { // (geo has no non-indexed form)
              nonIndexField = (AssociationPojo.docQuery_entity1_index_ == sFieldName) ? AssociationPojo.docQuery_entity1_ : AssociationPojo.docQuery_entity2_;
            }
           
            // (slightly annoying because I have to derive the dnames for all of them)
            for (String alias: masterAlias.getAlias()) {
              int nIndex2 = alias.lastIndexOf('/');
              String dName = alias.substring(0, nIndex2);
             
              if (EntityPojo.docQuery_index_ == sFieldName) { // (note: can use pointers here)
                AdvancedQueryPojo.QueryTermPojo qtExtra = new AdvancedQueryPojo.QueryTermPojo();
                qtExtra.etext = dName;
                _extraFullTextTerms.add(qtExtra);
              }
              else if (null != nonIndexField) {
                termQ = termBoolQ = termBoolQ.should(CrossVersionQueryBuilders.matchPhraseQuery(nonIndexField, dName));
              }
            }
            if (EntityPojo.docQuery_index_ == sFieldName) { // (note: can use pointers here)
              sQueryTerm.append(" OR (($manual_aliases").append("))");
            }
            else if (null != nonIndexField) {
              sQueryTerm.append(" OR ").append(nonIndexField).append(":$manual_aliases");                       
            }
           
          }//TESTED (entity + association) - aliases #1
         
          // Recall: we're abusing linkdata to contain aliases:
          if ((null != masterAlias.getSemanticLinks()) && !masterAlias.getSemanticLinks().isEmpty()) {
            String nonIndexField = null;
            if (AssociationPojo.docQuery_geo_index_ != sFieldName) { // (geo has no non-indexed form)
              nonIndexField = (AssociationPojo.docQuery_entity1_index_ == sFieldName) ? AssociationPojo.docQuery_entity1_ : AssociationPojo.docQuery_entity2_;
            }
           
            if (null == this._extraFullTextTerms) {
              _extraFullTextTerms = new LinkedList<AdvancedQueryPojo.QueryTermPojo>();
            }
            for (String textAlias: masterAlias.getSemanticLinks()) {
              if (EntityPojo.docQuery_index_ == sFieldName) { // (note: can use pointers here)
                AdvancedQueryPojo.QueryTermPojo qtExtra = new AdvancedQueryPojo.QueryTermPojo();
                qtExtra.etext = textAlias;
                _extraFullTextTerms.add(qtExtra);
              }
View Full Code Here

      else {
        System.out.println("FAIL: " + response.getMessage());
      }
      // Change 1 of the aliases
      Iterator<EntityFeaturePojo> it = aliases.values().iterator();
      EntityFeaturePojo alias = it.next();
      alias.addToSemanticLinks(Arrays.asList("AlexTest"));
      EntityFeaturePojo alias2 = it.next();
      alias2.addAlias("XXXXX/person");
      Set<String> failures = infDriver.updateAliases(Arrays.asList(alias, alias2), aliasCommIdStr, false, response);
      if (!response.isSuccess()) {
        System.out.println("UPDATE TOTAL FAILURE: " + response.getMessage());
      }
      else if (!failures.isEmpty()) {
        System.out.println("UPDATE FAILURES: " + Arrays.toString(failures.toArray()));
      }
      else {
        System.out.println("UPDATED " + alias.getIndex() + " AND " + alias2.getIndex());
      }
      // Check upserting (failure and success) into an existing share
      // upsert fail:
      String savedAlias = alias.getIndex();
      alias.setIndex("testXXX/person");
      if (aliases.containsKey("testXXX/person")) {
        System.out.println("(skipping upsert fail, target index exists)");
      }
      else {
        failures = infDriver.updateAliases(Arrays.asList(alias, alias2), aliasCommIdStr, false, response);
        if (!response.isSuccess()) {
          System.out.println("UPDATE TOTAL FAILURE (CORRECT IF ONLY 1 ALIAS SPECIFIED): " + response.getMessage() + " " + Arrays.toString(failures.toArray()));
        }
        else if (!failures.isEmpty()) {
          System.out.println("UPSERT CORRECTLY FAILS: " + Arrays.toString(failures.toArray()));
        }
        else {
          System.out.println("INCORRECTLY UPDATED " + alias.getIndex());
        }
      }
      // upsert success:
      failures = infDriver.updateAliases(Arrays.asList(alias), aliasCommIdStr, true, response);
      if (!response.isSuccess()) {
        System.out.println("UPDATE TOTAL FAILURE: " + response.getMessage());
      }
      else if (!failures.isEmpty()) {
        System.out.println("UPDATE FAILURES: " + Arrays.toString(failures.toArray()));
      }
      else {
        System.out.println("UPDATED " + alias.getIndex());
      }
      // Check upserting into a new community     
      failures = infDriver.updateAliases(Arrays.asList(alias), emptyAliasCommIdStr, true, response);
      if (!response.isSuccess()) {
        System.out.println("UPDATE TOTAL FAILURE: " + response.getMessage());
      }
      else if (!failures.isEmpty()) {
        System.out.println("UPDATE FAILURES: " + Arrays.toString(failures.toArray()));
      }
      else {
        System.out.println("UPDATED " + alias.getIndex());
      }
      // Check removal
      boolean bRemoval = true;
      if (bRemoval) {
        failures = infDriver.removeAliases(Arrays.asList(alias.getIndex()), emptyAliasCommIdStr, response);
        if (!response.isSuccess()) {
          System.out.println("REMOVE TOTAL FAILURE: " + response.getMessage());
        }
        else if (!failures.isEmpty()) {
          System.out.println("REMOVE FAILURES: " + Arrays.toString(failures.toArray()));
        }
        else {
          System.out.println("REMOVE " + alias.getIndex());
        }
        failures = infDriver.removeAliases(Arrays.asList(alias.getIndex()), aliasCommIdStr, response);
        if (!response.isSuccess()) {
          System.out.println("REMOVE TOTAL FAILURE: " + response.getMessage());
        }
        else if (!failures.isEmpty()) {
          System.out.println("REMOVE FAILURES: " + Arrays.toString(failures.toArray()));
        }
        else {
          System.out.println("REMOVE " + alias.getIndex());
        }
        // Reset old shares back again
        alias.setIndex(savedAlias);
        if (null != alias.getSemanticLinks()) {
          alias.getSemanticLinks().remove("AlexTest");
        }
        alias.getAlias().remove("XXXXX/person");
        if (null != alias2.getSemanticLinks()) {
          alias2.getSemanticLinks().remove("AlexTest");
        }
        alias2.getAlias().remove("XXXXX/person");
        failures = infDriver.updateAliases(Arrays.asList(alias, alias2), aliasCommIdStr, false, response);
        System.out.println("RESET: " + response.isSuccess() + " " + (2 - failures.size()));
      }
    }//(end alias testing)
   
View Full Code Here

TOP

Related Classes of com.ikanow.infinit.e.data_model.store.feature.entity.EntityFeaturePojo

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.