Package com.ikanow.infinit.e.data_model.index

Examples of com.ikanow.infinit.e.data_model.index.ElasticSearchManager


        AssociationFeaturePojo event = AssociationFeaturePojo.fromDb(cur.next(),AssociationFeaturePojo.class)
        events.add(event);
        eventIds.add(new StringBuffer(event.getIndex()).append(":").append(event.getCommunityId()).toString());
        eventFeatureDB.remove(new BasicDBObject("index", event.getIndex()));
      }
      ElasticSearchManager elasticManager = ElasticSearchManager.getIndex("association_index");
      elasticManager.bulkDeleteDocuments(eventIds);
     
    } catch (NumberFormatException e) {
      e.printStackTrace();
    } catch (MongoException e) {
      e.printStackTrace();
View Full Code Here


    String[] communityIdStrs = SocialUtils.getCommunityIds(userIdStr, communityIdStrList);
   
    //(timing)
    long nQuerySetupTime = System.currentTimeMillis();
 
    ElasticSearchManager indexMgr = getIndexManager(communityIdStrs);
    SearchRequestBuilder searchSettings = indexMgr.getSearchOptions();

    StringBuffer querySummary = new StringBuffer();
    BaseQueryBuilder queryObj = null;
    InternalTempFilterInfo tempFilterInfo = null;
    try {
      queryObj = getBaseQuery(query, communityIdStrs, communityIdStrList, userIdStr, querySummary);
      if (null == queryObj) { // only occurs if has 1 element with ftext starting $cache:
        return getSavedQueryInstead(query.qt.get(0).ftext.substring(7), communityIdStrs, query); // (step over cache preamble)
      }
      tempFilterInfo = getBaseFilter(query, communityIdStrs);
    }
    catch (Exception e) {
      Globals.populateStackTrace(errorString, e);
      if (null != e.getCause()) {
        errorString.append("[CAUSE=").append(e.getCause().getMessage()).append("]");
        Globals.populateStackTrace(errorString, e.getCause());       
      }
      errorString.append(": " + e.getMessage());
      return null;
    }
   
    //DEBUG
    //querySummary.append(new Gson().toJson(query, AdvancedQueryPojo.class));
   
  // 0.4] Pre-Lucene Scoring
   
    // 0.4.1] General
   
    // Different options:
    //   a] Get the most recent N documents matching the query, score post-query
    //   b] Get the N highest (Lucene) scoring documents, incorporate significance post-query if desired
    // In both cases, N depends on whether significance calculation is taking place (and on the "skip" param)
   
    int nRecordsToOutput = query.output.docs.numReturn;
    int nRecordsToSkip = query.output.docs.skip;
    int nRecordsToGet = query.score.numAnalyze;

    final int nMAXRECORDSTOOUTPUT = 10000;
    final int nMAXRECORDSTOGET = 20000;
   
    // Some sanity checking on doc numbers:
    if (nRecordsToOutput > nMAXRECORDSTOOUTPUT) { // Upper limit...
      errorString.append(": Max # docs to return is 10000.");
      return null;
    }
    if (nRecordsToGet < nRecordsToOutput) {
      nRecordsToGet = nRecordsToOutput;
    }
    else if (nRecordsToGet > nMAXRECORDSTOGET) { // Upper limit...
      nRecordsToGet = nMAXRECORDSTOGET; // (we can do something sensible with this so carry on regardless)
    }
   
    boolean bUseSignificance = (query.score.sigWeight > 0.0);
    boolean bNeedExtraResultsForEnts =
      ((query.output.aggregation != null) && (query.output.aggregation.entsNumReturn != null) && (query.output.aggregation.entsNumReturn > 0))
        ||
      (query.output.docs.enable && (query.output.docs.numReturn > 0) && (query.output.docs.ents) && (query.score.scoreEnts));
   
    if (bUseSignificance || bNeedExtraResultsForEnts) {
     
      // Some logic taken from the original "knowledge/search"
      while ( (nRecordsToSkip + nRecordsToOutput > nRecordsToGet) && (nRecordsToGet <= nMAXRECORDSTOGET) )
      {
        nRecordsToGet += nRecordsToGet;
      }
      if (nRecordsToGet > nMAXRECORDSTOGET) {
        errorString.append(": Can only skip through to 20000 documents.");       
        return null;
      }
      searchSettings.setSize(nRecordsToGet);
     
      //TESTED
    }
    else if (query.output.docs.enable) { // In this case we just need the minimum number of records
      // (whether searching by date or by relevance)
      searchSettings.setFrom(nRecordsToSkip);
      nRecordsToSkip = 0; // (so it isn't double counted in the processing module)
      nRecordsToGet = nRecordsToOutput;
      searchSettings.setSize(nRecordsToGet);
      //TESTED
    }
    else { // In thise case we're just outputting aggregations, and not even ones that come from the docs
      nRecordsToGet = 0; // (use this variable everywhere where we care about bring docs back either to output or for suitable aggregation)
      searchSettings.setSize(0);
    }
   
    // Sort on score if relevance is being used   
   
    if (nRecordsToGet > 0) {
      if (query.score.relWeight > 0.0) { // (b) above
        // Using score is default, nothing to do
      }
      else { // (a) above
        // Debug code, if rel weight negative then use date to check Lucene score is better...
        if (query.score.relWeight < 0.0) {
          query.score.relWeight = -query.score.relWeight;
        }
        // Set Lucene to order:
        searchSettings.addSort(DocumentPojo.publishedDate_, SortOrder.DESC);
      }//TOTEST
    }//(if docs aren't enabled, don't need to worry about sorting)
   
    // 0.4.2] Prox scoring (needs to happen after [0.3]

    // Add proximity scoring:
    boolean bLowAccuracyDecay = false;
    if ((nRecordsToGet > 0) || (null == _scoringParams.adjustAggregateSig) || _scoringParams.adjustAggregateSig) {
      // (ie if we're getting docs or applying scores to entities)
     
      if (!_aggregationAccuracy.equals("full")) {
        bLowAccuracyDecay = true;
      }
      queryObj = addProximityBasedScoring(queryObj, searchSettings, query.score, tempFilterInfo.parentFilterObj, bLowAccuracyDecay);
     
      if (null == _scoringParams.adjustAggregateSig) { // auto-decide .. if ftext is set and is non-trivial
        if ((null != query.score.timeProx) || (null != query.score.geoProx)) {
            // (These are set to null above if badly formed)
          _scoringParams.adjustAggregateSig = true;         
        }
      }
     
    }// (else not worth the effort) 
   
    // 0.4.3] Source weightings (if any)
   
    queryObj = applyManualWeights(queryObj, query.score);
   
  // 0.5] Pre-lucene output options
   
    // only return the id field and score
    // (Both _id and score come back as default options, SearchHit:: getId and getScore, don't need anything else)

    // Facets
   
    // (These are needed for the case where we need to perform aggregations manually)
    Integer manualEntsNumReturn = null;
    Integer manualEventsNumReturn = null;
    Integer manualFactsNumReturn = null;
    Integer manualGeoNumReturn = null;
   
    //DEBUG
    //System.out.println(new Gson().toJson(query.output.aggregation));
   
    if ((null != query.output.aggregation) && (null != query.output.aggregation.raw)) { // Like query, specify raw aggregation (Facets)
      // Gross raw handling for facets
      if ((null != query.raw) && (null != query.raw.query)) {
        // Don't currently support raw query and raw facets because I can't work out how to apply
        // the override on group/source!
        errorString.append(": Not currently allowed raw query and raw facets");
        return null;
      }
      else { // Normal code
        searchSettings.setFacets(query.output.aggregation.raw.getBytes());
      }
    }
    else { // Apply various aggregation (=="facet") outputs to searchSettings
      boolean bSpecialCase = (null != query.raw) && (null != query.raw.query);
     
      if (!_aggregationAccuracy.equals("full")) {
        if (null != query.output.aggregation) {
          if (_aggregationAccuracy.equals("low")) {
            manualEntsNumReturn = query.output.aggregation.entsNumReturn;
            manualEventsNumReturn = query.output.aggregation.eventsNumReturn;
            manualFactsNumReturn = query.output.aggregation.factsNumReturn;
            manualGeoNumReturn = query.output.aggregation.geoNumReturn;
          }                   
          query.output.aggregation.entsNumReturn = null;
          query.output.aggregation.eventsNumReturn = null;
          query.output.aggregation.factsNumReturn = null;
          query.output.aggregation.geoNumReturn = null;
          // (allow time aggregation)
          // (allow source aggregation)
        }
      }
      AggregationUtils.parseOutputAggregation(query.output.aggregation, _aliasLookup,
                            tempFilterInfo.entityTypeFilterStrings, tempFilterInfo.assocVerbFilterStrings,
                              searchSettings, bSpecialCase?tempFilterInfo.parentFilterObj:null);

      // In partial accuracy case, restore aggregation
      if (null != manualEntsNumReturn) {
        query.output.aggregation.entsNumReturn = manualEntsNumReturn;
      }
      if (null != manualEventsNumReturn) {
        query.output.aggregation.eventsNumReturn = manualEventsNumReturn;
      }
      if (null != manualFactsNumReturn) {
        query.output.aggregation.factsNumReturn = manualFactsNumReturn;
      }
      if (null != manualGeoNumReturn) {
        query.output.aggregation.geoNumReturn = manualGeoNumReturn;
      }
      //TESTED
    }
    //TESTED x2     
   
    //(timing)
    nQuerySetupTime = System.currentTimeMillis() - nQuerySetupTime;
   
  // 0.6] Perform Lucene query
   
    // 0.6.1: query extensions: pre-query hook
    ArrayList<IQueryExtension> queryExtensions = null;
    if (null != _queryExtensions) {
      queryId = new ObjectId();
      queryExtensions = new ArrayList<IQueryExtension>(_queryExtensions.size());
      for (Class<IQueryExtension> queryExtensionClass: _queryExtensions) {
        // Don't catch any exceptions thrown here - let it bubble upwards
        IQueryExtension queryExtension = queryExtensionClass.newInstance();
        queryExtension.preQueryActivities(queryId, query, communityIdStrs);
        queryExtensions.add(queryExtension);
      }
    }//TESTED (see test.QueryExtensionsTestCode)
   
    // Built-in federated query engine ...
    if (null != _federatedQueryCache) {
      // 2 modes:
      // 1) If srcInclude is true(default) then check each source vs the table
      // 2) If srcInclude is false, or no sources specified, then check each community vs the table
     
      // 1:
      if ((null != query.input) && (null != query.input.sources) && ((null == query.input.srcInclude) || query.input.srcInclude))
      {
        for (String srcKey: query.input.sources) {
          FederatedQueryInMemoryCache fedQueryCacheEl = _federatedQueryCache.get(srcKey);
          if (null != fedQueryCacheEl) {
            if (null == this._builtInFederatedQueryEngine) {
              _builtInFederatedQueryEngine = new SimpleFederatedQueryEngine();
            }
            _builtInFederatedQueryEngine.addEndpoint(fedQueryCacheEl.source);
          }
        }
      }//TESTED (//TESTED (http://localhost:8184/knowledge/document/query/53ab42a2e4b04bcfe2de4387?qt[0].entity=%22garyhart.com/externaldomain%22&output.docs.numReturn=10&input.sources=inf...federated.externaldomain.&input.srcInclude=true))
     
      // 2:
      else { //Get federated queries from communities
        HashSet<String> excludeSrcs = null;
        for (String commIdStr: communityIdStrs) {
          FederatedQueryInMemoryCache fedQueryCacheEl = _federatedQueryCache.get(commIdStr);
          if (null != fedQueryCacheEl) {
           
            if ((null != query.input) && (null != query.input.sources)) { // (there are exclude sources)
              if (null == excludeSrcs) {
                excludeSrcs = new HashSet<String>(query.input.sources);
              }
            }//TESTED (http://localhost:8184/knowledge/document/query/53ab42a2e4b04bcfe2de4387?qt[0].entity=%22garyhart.com/externaldomain%22&output.docs.numReturn=10&input.sources=inf...federated.externaldomain.&input.srcInclude=false)
           
            for (Map.Entry<String, SourceFederatedQueryConfigPojo> fedQueryKV: fedQueryCacheEl.sources.entrySet()) {
              if ((null == excludeSrcs) || !excludeSrcs.contains(fedQueryKV.getKey())) {
                if (null == this._builtInFederatedQueryEngine) {
                  _builtInFederatedQueryEngine = new SimpleFederatedQueryEngine();
                }
                _builtInFederatedQueryEngine.addEndpoint(fedQueryKV.getValue());
              }
            }
          }
        }//TESTED (by hand)
      }
      if (null != _builtInFederatedQueryEngine) {
        _builtInFederatedQueryEngine.preQueryActivities(queryId, query, communityIdStrs);
      }
    }   
   
    // 0.6.2: the main query
    if ((null != query.explain) && query.explain) { // (for diagnostic - will return lucene explanation)
      searchSettings.setExplain(true);
    }
   
    SearchResponse queryResults = null;

    // (_source can now be enabled, so this is necessary to avoid returning it)
    searchSettings.addFields();
    if ((null != query.raw) && (null != query.raw.query))
    {
      // (Can bypass all other settings)       
      searchSettings.setQuery(query.raw.query);
      queryResults = indexMgr.doQuery(null, tempFilterInfo.parentFilterObj, searchSettings);
    }//TESTED '{ "raw": { "match_all": {} } }'
    else
    {
      // Where I can, use the source filter as part of the query so that
      // facets will apply to query+filter, not just filter
      queryObj = QueryBuilders.boolQuery().must(queryObj).must(QueryBuilders.constantScoreQuery(tempFilterInfo.parentFilterObj).boost(0.0F));
     
      queryResults = indexMgr.doQuery(queryObj, null, searchSettings);
    }//TESTED '{}' etc
   
    long nLuceneTime = queryResults.getTookInMillis();

  // 0.7] Lucene scores 
View Full Code Here

    sb.append(',').append(DocumentPojoIndexMap.manyGeoDocumentIndexCollection_);
    for (String sCommunityId: communityIdStrs) {
      sb.append(',').append("docs_").append(sCommunityId);
    }
    sb.append('/').append(DocumentPojoIndexMap.documentType_);
    ElasticSearchManager indexMgr = ElasticSearchManager.getIndex(sb.toString());
    return indexMgr;
  }//TESTED (cut and paste from original code)
View Full Code Here

        }
      }
      else { // Check if the index instance is running
        // (this will also update the node replication for local-only indexes - handy side effect!)
        boolean bAllGood = true;
        ElasticSearchManager esm = null;
        esm = ElasticSearchManager.getIndex("association_index");
        bAllGood &= esm.pingIndex();
        esm = ElasticSearchManager.getIndex("entity_index");
        bAllGood &= esm.pingIndex();

        bAllGood &= (DbManager.getSocial().getCommunity().count() > 0);
        // (also test connection to the DB)

        if (!bAllGood)
View Full Code Here

    }//TESTED
   
    // 2] Get all the elasticsearch indexes that can time out:   
    // (https://github.com/elasticsearch/elasticsearch/blob/master/src/main/java/org/elasticsearch/rest/action/admin/indices/alias/get/RestGetIndicesAliasesAction.java)
   
    ElasticSearchManager indexMgr = ElasticSearchManager.getIndex(DUMMY_INDEX);
    ClusterStateResponse retVal = indexMgr.getRawClient().admin().cluster().prepareState()
        .setIndices("recs_t_*")
        .setRoutingTable(false).setNodes(false).setListenerThreaded(false).get();

    long now = new Date().getTime();
   
    for (IndexMetaData indexMetadata: retVal.getState().getMetaData()) {
      String index = indexMetadata.index();

      //DEBUG
      //System.out.println("INDEX = " + index);
     
      Matcher m = INDEX_TO_COMMUNITY_AND_DATE.matcher(index);
      if (!m.matches() || (2 != m.groupCount())) {
        continue; // (just looks like one of our indexes)
      }
     
      // 3] Go get the community ... the record is in format "recs_t_<community>_<data_format>"
     
      //TODO (INF-2533): check if the community has a default age-out period

      @SuppressWarnings("unused")
      String communityId = m.group(1);
      int ageOutTime = -1;
     
      //DEBUG
      //System.out.println("INDEX COMMUNITY = " + communityId);
     
      // 4] Now parse out the date, in one of the following formats:
      // - YYYY.MM.DD  ... (default age-out: 30 days)
      //TODO (INF-2533): add these
      // - YYYY.MM  ... (default age-out: 12 months)
      // - YYYY    ... (default age-out: 3 years)
     
      String dateStr = m.group(2);
      Date indexDate = null;
      long periodInMs = -1;
      ThreadSafeSimpleDateFormat dateFormatter = new ThreadSafeSimpleDateFormat("yyyy.MM.dd");
      try {
        indexDate = dateFormatter.parse(dateStr);
        if (-1 == ageOutTime) { // (ie not overridden)
          ageOutTime = AGE_OUT_TIME_DAYS; // (default)
        }
        periodInMs = 3600L*24L*1000L; // (24h)
      }//TESTED
      catch (Exception e) { // failed to parse date, just carry on
        indexDate = null;
      }
      //TODO (INF-2533): add other date formats here
     
      if (null == indexDate) {
        continue;
      }
      // If we're here we managed to parse one of the dates
     
      //DEBUG
      //System.out.println("INDEX DATE = " + indexDate);
     
      long then = indexDate.getTime();
      if ((now - then) > periodInMs*ageOutTime) {
        //DEBUG
        //System.out.println("DELETE INDEX: " + (now - then)/(periodInMs*ageOutTime));
       
        ElasticSearchManager recordsIndex = IndexManager.getIndex(index);
        recordsIndex.deleteMe();     
      }//TESTED
    }
  }//TESTED
View Full Code Here

    for (TestLogstashExtractorPojo testInfo: secondaryQueue) {
     
      String commIdStr = testInfo.deleteOnlyCommunityId.toString();
     
      // Get all the indexes that might need to be cleansed:
      ElasticSearchManager indexMgr = ElasticSearchManager.getIndex(DUMMY_INDEX);
     
      // Stashed index
     
      ArrayList<String> indices = new ArrayList<String>();
     
      String stashedIndex = "recs_" + commIdStr;
      ClusterStateResponse retVal = indexMgr.getRawClient().admin().cluster().prepareState()
          .setIndices(stashedIndex)
          .setRoutingTable(false).setNodes(false).setListenerThreaded(false).get();
     
      if (!retVal.getState().getMetaData().getIndices().isEmpty()) {
        indices.add(stashedIndex);
      } // (else doesn't exist...)
     
      // Live indexes:
     
      String indexPattern = new StringBuffer("recs_t_").append(commIdStr).append("*").toString();
      retVal = indexMgr.getRawClient().admin().cluster().prepareState()
          .setIndices(indexPattern)
          .setRoutingTable(false).setNodes(false).setListenerThreaded(false).get();

      for (IndexMetaData indexMetadata: retVal.getState().getMetaData()) {
        //DEBUG
View Full Code Here

    doc.addToMetadata("TestMeta", metadataObj);
    System.out.println("DOC_META=" + docMap.extendBuilder(BaseApiPojo.getDefaultBuilder()).setPrettyPrinting().create().toJson(doc));
   
// Changes to new ElasticSearch construct (particularly for bulk add)
   
    ElasticSearchManager indexManager = IndexManager.createIndex("test", null, false, null, null, ImmutableSettings.settingsBuilder());
   
    BulkResponse result = null;
    // All docs
    result = indexManager.bulkAddDocuments(IndexManager.mapListToIndex(docset, new TypeToken<Set<DocumentPojo>>(){},
                                  new DocumentPojoIndexMap()), "_id", null, true);
    if (result.hasFailures()) {
      System.out.print("****** FAILED: ");
      System.out.println(result.buildFailureMessage());
    }
   
    //Delete index (once testing complete)
    indexManager.deleteMe();
   
   
       
  }
View Full Code Here

      if (null != indexes) {
        // Convert this generic list into a list of indexes that actually exists
        // (ie duplicate the _alias call that is made in non-timestamp cases)
        // (https://github.com/elasticsearch/elasticsearch/blob/master/src/main/java/org/elasticsearch/rest/action/admin/indices/alias/get/RestGetIndicesAliasesAction.java)
       
        ElasticSearchManager indexMgr = ElasticSearchManager.getIndex(RECS_DUMMY_INDEX);
        ClusterStateResponse retVal = indexMgr.getRawClient().admin().cluster().prepareState()
            .setIndices(indexes)
            .setRoutingTable(false).setNodes(false).setListenerThreaded(false).get();

        for (IndexMetaData indexMetadata: retVal.getState().getMetaData()) {
          String index = indexMetadata.index();
View Full Code Here

TOP

Related Classes of com.ikanow.infinit.e.data_model.index.ElasticSearchManager

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.