/*******************************************************************************
* Copyright 2012, The Infinit.e Open Source Project.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, version 3,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
******************************************************************************/
package com.ikanow.infinit.e.api.knowledge;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.log4j.Logger;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.Version;
import org.apache.lucene.queryParser.CrossVersionQueryParser;
import org.apache.lucene.search.CrossVersionIndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.CrossVersionIndexWriter;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.bson.types.ObjectId;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.action.search.SearchRequestBuilder;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.BaseQueryBuilder;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHitField;
import org.elasticsearch.search.sort.SortOrder;
import com.ikanow.infinit.e.api.knowledge.aliases.AliasLookupTable;
import com.ikanow.infinit.e.api.knowledge.aliases.AliasManager;
import com.ikanow.infinit.e.api.utils.SocialUtils;
import com.ikanow.infinit.e.data_model.api.BasePojoApiMap;
import com.ikanow.infinit.e.data_model.api.ResponsePojo;
import com.ikanow.infinit.e.data_model.api.ResponsePojo.ResponseObject;
import com.ikanow.infinit.e.data_model.api.knowledge.DimensionListPojo;
import com.ikanow.infinit.e.data_model.api.knowledge.SearchSuggestPojo;
import com.ikanow.infinit.e.data_model.api.knowledge.SearchSuggestPojoApiMap;
import com.ikanow.infinit.e.data_model.index.ElasticSearchManager;
import com.ikanow.infinit.e.data_model.index.feature.entity.EntityFeaturePojoIndexMap;
import com.ikanow.infinit.e.data_model.index.feature.event.AssociationFeaturePojoIndexMap;
import com.ikanow.infinit.e.data_model.store.DbManager;
import com.ikanow.infinit.e.data_model.store.MongoDbManager;
import com.ikanow.infinit.e.data_model.store.document.EntityPojo;
import com.ikanow.infinit.e.data_model.store.document.GeoPojo;
import com.ikanow.infinit.e.data_model.store.feature.association.AssociationFeaturePojo;
import com.ikanow.infinit.e.data_model.store.feature.entity.EntityFeaturePojo;
import com.ikanow.infinit.e.data_model.store.feature.geo.GeoFeaturePojo;
import com.ikanow.infinit.e.data_model.utils.ContentUtils;
import com.ikanow.infinit.e.data_model.utils.DimensionUtility;
import com.mongodb.BasicDBList;
import com.mongodb.BasicDBObject;
import com.mongodb.CommandResult;
import com.mongodb.DBCollection;
/**
* This class is for all operations related to the retrieval, addition
* or update of people within the system
*
* @author cmorgan
*
*/
//(remove this during active development - want to just depress a deprecation warning but no way of doing this for both 0.19 and 1.0)
//@SuppressWarnings("deprecation")
@SuppressWarnings("all")
public class SearchHandler
{
private static final Logger logger = Logger.getLogger(SearchHandler.class);
private final StringBuffer logMsg = new StringBuffer();
private static long lastSuggestLog = 0;
private static long lastAliasLog = 0;
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// SEARCH SUGGEST API call
//TODO (INF-1660): here and for assoc, should enforce doc_count>0? (or should i remove from entity feature when freq hits 0??)
// (or both?)
private static final String entityIndex_ = EntityFeaturePojoIndexMap.indexCollectionName_ + "/" + EntityFeaturePojoIndexMap.indexName_;
public ResponsePojo getSuggestions(String userIdStr, String term, String communityIdStrList, boolean bIncludeGeo, boolean bIncludeLinkdata, boolean bWantNoAlias)
{
long nSysTime = System.currentTimeMillis();
ResponsePojo rp = new ResponsePojo();
ElasticSearchManager gazIndex = ElasticSearchManager.getIndex(entityIndex_);
// Need to do a quick decomposition of the term to fit in with analyzed strings
String escapedterm = null;
StandardTokenizer st = new StandardTokenizer(Version.LUCENE_30, new StringReader(ContentUtils.stripDiacritics(term)));
CharTermAttribute termAtt = st.addAttribute(CharTermAttribute.class);
StringBuffer sb = new StringBuffer();
try {
try {
st.reset();
while (st.incrementToken()) {
if (sb.length() > 0) {
sb.append(" +");
}
else {
sb.append('+');
}
sb.append(luceneEncodeTerm(termAtt.toString()));
}
}
finally {
st.close();
}
} catch (IOException e) {
e.printStackTrace();
}
if (!term.endsWith(" ") || (0 == sb.length())) { // Could be in the middle of typing, stick a * on the end
sb.append('*');
}//TESTED
escapedterm = sb.toString();
// Create the search query
SearchRequestBuilder searchOptions = gazIndex.getSearchOptions();
BaseQueryBuilder queryObj1 = QueryBuilders.queryString(escapedterm).defaultField(EntityFeaturePojoIndexMap.Mapping.RootObject.RootProperties.alias_pri_);
String[] communityIdStrs = SocialUtils.getCommunityIds(userIdStr, communityIdStrList);
BaseQueryBuilder queryObj2 = QueryBuilders.boolQuery().should(QueryBuilders.termsQuery(EntityFeaturePojo.communityId_, communityIdStrs));
BaseQueryBuilder queryObj = QueryBuilders.boolQuery().must(queryObj1).must(queryObj2);
searchOptions.addSort(EntityFeaturePojo.doccount_, SortOrder.DESC);
searchOptions.addFields(EntityFeaturePojo.disambiguated_name_, EntityFeaturePojo.doccount_,
EntityFeaturePojo.type_, EntityFeaturePojo.dimension_);
if (bIncludeGeo) {
searchOptions.addFields(EntityFeaturePojo.geotag_);
searchOptions.addFields(EntityFeaturePojo.ontology_type_);
}
if (bIncludeLinkdata) {
searchOptions.addFields(EntityFeaturePojo.linkdata_);
}
// Initial alias handling:
AliasLookupTable aliasTable = null;
HashMap<String, SearchSuggestPojo> aliasResults = null;
if (!bWantNoAlias) {
AliasManager aliasManager = AliasManager.getAliasManager();
if (null != aliasManager) {
aliasTable = aliasManager.getAliasLookupTable(communityIdStrList, communityIdStrs, null, userIdStr);
}
}
//TESTED
// Also create an internal Lucene index for aliases, in case any of them do not have actual entities representing them
List<EntityFeaturePojo> extraEntries = null;
if (null != aliasTable) {
extraEntries = checkAliasMasters(aliasTable, escapedterm);
}
// (end initial alias handling)
int nDesiredSize = 20;
if (null == aliasTable) {
searchOptions.setSize(nDesiredSize); // will forward all 20
}
else {
searchOptions.addFields(EntityFeaturePojo.index_);
searchOptions.setSize(3*nDesiredSize); // will forward top 20 after de-aliasing
aliasResults = new HashMap<String, SearchSuggestPojo>();
// (We use this to ensure we only include each entity once after aliasing)
}
//TESTED
// Perform the search
SearchResponse rsp = gazIndex.doQuery(queryObj, searchOptions);
// Format the return values
SearchHit[] docs = rsp.getHits().getHits();
DimensionListPojo dimlist = new DimensionListPojo();
int nDocsAdded = 0;
if (null != extraEntries) { // Put the alias masters at the top:
//DEBUG
//System.out.println(Arrays.toString(extraEntries.toArray()));
for (EntityFeaturePojo alias: extraEntries) {
SearchSuggestPojo sp = new SearchSuggestPojo();
if (null != alias.getDimension()) {
sp.setDimension(alias.getDimension().toString());
}
else {
sp.setDimension("What");
}
sp.setValue(alias.getDisambiguatedName());
sp.setType(alias.getType());
if (bIncludeGeo) {
sp.setGeotag(alias.getGeotag());
}
sp.setOntology_type(alias.getOntology_type());
dimlist.addSearchSuggestPojo(sp);
}
}//TESTED (inc geo)
if (null != docs)
{
for (SearchHit hit: docs)
{
SearchHitField shf = hit.field(EntityFeaturePojo.disambiguated_name_);
if (null == shf) { // robustness check, sometimes if the harvester goes wrong this field might be missing
continue;
}
String disname = (String) shf.value();
String type = (String) hit.field(EntityFeaturePojo.type_).value();
String dimension = (String) hit.field(EntityFeaturePojo.dimension_).value();
SearchSuggestPojo sp = new SearchSuggestPojo();
sp.setValue(disname);
sp.setDimension(dimension);
sp.setType(type);
if (bIncludeGeo)
{
SearchHitField loc = hit.field(EntityFeaturePojo.geotag_);
if ( loc != null )
sp.setLocFromES((String) loc.value());
SearchHitField ont = hit.field(EntityFeaturePojo.ontology_type_);
if ( ont != null )
sp.setOntology_type((String)ont.value());
}
if (bIncludeLinkdata) {
SearchHitField linkdata = hit.field(EntityFeaturePojo.linkdata_);
if ( linkdata != null )
sp.setLinkdata(linkdata.values());
}
// More alias handling
String index = null;
if (null != aliasTable) {
index = (String) hit.field(EntityFeaturePojo.index_).value();
EntityFeaturePojo alias = aliasTable.getAliasMaster(index);
if (null != alias) { // Found!
if (alias.getIndex().equalsIgnoreCase("discard")) { // Discard this entity
continue;
}
else if ((null != alias.getDisambiguatedName()) && (null != alias.getType())) {
// (these need to be present)
//DEBUG (perf critical)
//logger.debug("Alias! Replace " + index + " with " + alias.getIndex());
index = alias.getIndex();
disname = alias.getDisambiguatedName();
type = alias.getType();
if (null != alias.getDimension()) {
dimension = alias.getDimension().toString();
}
else { // Guess from type
dimension = DimensionUtility.getDimensionByType(type).toString();
}
// Reset values:
sp.setValue(disname);
sp.setDimension(dimension);
sp.setType(type);
}
}
SearchSuggestPojo existing = aliasResults.get(index);
if (null != existing) {
//DEBUG (perf critical)
//logger.debug("Alias! Remove duplicate " + index);
if ((null == existing.getGeotag()) && (null != sp.getGeotag())) {
// (if they're both set then sigh just ignore on a first-come-first-served basis)
existing.setGeotag(sp.getGeotag());
existing.setOntology_type(sp.getOntology_type());
}//TESTED
if (null != sp.getLinkdata()) { // (here we can just combine the linkdata)
if (null == existing.getLinkdata()) {
existing.setLinkdata(sp.getLinkdata());
}
else {
existing.getLinkdata().addAll(sp.getLinkdata());
}
}//TESTED
continue; // (ie don't add this guy)
}
else { // add it
aliasResults.put(index, sp);
}
}
//TESTED
// end more alias handing
dimlist.addSearchSuggestPojo(sp);
// (only adds unique entries, ie handles multiple communities "ok" (only ok
// because it doesn't sum the doccounts across multiple communities, you'd probably
// want to use facets for that, but it doesn't seem worth it, especially since we're
// pretty short on field cache space)
if (++nDocsAdded >= nDesiredSize) { // (can happen in the de-aliasing case)
break;
}//TESTED
}
}
rp.setData(dimlist);
rp.setResponse(new ResponseObject("Suggestions",true,term));
if (nSysTime > (lastSuggestLog + 5000)) {
lastSuggestLog = nSysTime;
logMsg.setLength(0);
logMsg.append("knowledge/searchSuggest query=").append(escapedterm);
logMsg.append(" groups=").append(communityIdStrList);
logMsg.append(" found=").append(docs.length);
logMsg.append(" time=").append(System.currentTimeMillis() - nSysTime).append(" ms");
logger.info(logMsg.toString());
}
return rp;
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Geo suggestions code
// (Haven't yet converted geo feature to string literals)
public ResponsePojo getSuggestionsGeo(String userIdStr, String term, String communityIdStrList)
{
ResponsePojo rp = new ResponsePojo();
//validate term object to be a lat,lng or location
if ( term == null )
rp.setResponse(new ResponseObject("Suggestions Geo", false, "search term is required, was not provided"));
boolean isLatLng = false;
Double[] latlng = new Double[2];
String[] terms = term.split(",");
if ( terms.length == 2 )
{
try
{
latlng[0] = Double.parseDouble(terms[0]);
latlng[1] = Double.parseDouble(terms[1]);
isLatLng = true;
}
catch (Exception e)
{
//could not parse as double, treat as location
//just fall through
}
}
List<SearchSuggestPojo> locations = null;
if ( isLatLng )
{
//lookup location name via lat/lng
locations = reverseGeoLookup(latlng[0], latlng[1]);
}
else
{
//lookup lat/lngs via location name
rp.setResponse(new ResponseObject("Suggestions Geo", false, "Search term provided could not be parsed as lat, lng... geotag lookup by name not yet supported."));
return rp;
}
rp.setData(locations, new SearchSuggestPojoApiMap());
rp.setResponse(new ResponseObject("Suggestions Geo", true, term));
return rp;
}
private static Double MAXIMUM_DISTANCE_IN_METERS = 50000.0;
/**
* Performs a reverse geolookup, takes a lat/lon and returns a list of nearby
* locations
*
* @param latitude
* @param longitude
* @return
*/
private List<SearchSuggestPojo> reverseGeoLookup(Double latitude, Double longitude)
{
List<SearchSuggestPojo> locations = null;
BasicDBList results = runGeoNear(latitude, longitude);
if ( results != null)
{
locations = new ArrayList<SearchSuggestPojo>();
if ( results.size() > 0 )
{
for ( int i = 0; i < 10 && i < results.size(); i++ )
{
BasicDBObject result = (BasicDBObject) results.get(i);
Double distance = result.getDouble("dis");
BasicDBObject obj = (BasicDBObject) result.get("obj");
locations.add( buildLocation(obj, distance) );
}
}
}
return locations;
}
/**
* Sends a geonear command to the feature.geo database. Returns back
* a list of the nearest 10 locations
*
* @param lat
* @param lon
* @return
*/
private BasicDBList runGeoNear(Double lat, Double lon)
{
String location = null;
BasicDBObject command = new BasicDBObject("geoNear", "geo");
Double[] coordinates = {lat,lon};
command.put("near", coordinates);
command.put("maxDistance", MAXIMUM_DISTANCE_IN_METERS);
CommandResult commandResult = MongoDbManager.getDB("feature").command(command);
if ( commandResult.ok() && commandResult.containsField("results") )
{
BasicDBList results = (BasicDBList)commandResult.get("results");
return results;
}
return null;
}
/**
* Takes a geonear result object and returns a searchsuggestpojo
*
* @param location
* @param distance
* @return
*/
private SearchSuggestPojo buildLocation(BasicDBObject location, Double distance)
{
GeoFeaturePojo feature = GeoFeaturePojo.fromDb(location, GeoFeaturePojo.class);
SearchSuggestPojo suggest = new SearchSuggestPojo();
suggest.setOntology_type(feature.getOntology_type());
suggest.setScore(distance);
suggest.setValue(buildLocation(feature));
suggest.setGeotag(new GeoPojo(feature.getGeoindex().lat, feature.getGeoindex().lon));
return suggest;
}
/**
* Takes a feature.geo object from the geonear results and tries
* to build out an object matching city, region, country or
* search_field if all those are null.
*
* @param location
* @param distance
* @return
*/
private String buildLocation(GeoFeaturePojo feature)
{
StringBuilder result = new StringBuilder();
boolean needComma = false;
if ( feature.getCity() != null )
{
result.append(feature.getCity());
needComma = true;
}
if ( feature.getRegion() != null )
{
if ( needComma )
result.append(", ");
result.append(feature.getRegion());
needComma = true;
}
if ( feature.getCountry() != null )
{
if ( needComma )
result.append(", ");
result.append(feature.getCountry());
needComma = true;
}
if ( result.length() == 0 )
{
result.append(feature.getSearch_field());
}
return result.toString();
}
// Event suggestions code
private static final String assocIndex_ = AssociationFeaturePojoIndexMap.indexCollectionName_ + "/" + AssociationFeaturePojoIndexMap.indexName_;
public ResponsePojo getAssociationSuggestions(String userIdStr, String ent1, String verb, String ent2, String field, String communityIdStrList, boolean bWantNoAlias)
{
ResponsePojo rp = new ResponsePojo();
try
{
// Community ids, needed in a couple of places
String[] communityIdStrs = SocialUtils.getCommunityIds(userIdStr, communityIdStrList);
// Initial alias handling:
AliasLookupTable aliasTable = null;
// Initial alias handling:
if (!bWantNoAlias) {
AliasManager aliasManager = AliasManager.getAliasManager();
if (null != aliasManager) {
aliasTable = aliasManager.getAliasLookupTable(communityIdStrList, communityIdStrs, null, userIdStr);
}
}//TESTED
ElasticSearchManager esm = ElasticSearchManager.getIndex(assocIndex_);
SearchRequestBuilder searchOptions = esm.getSearchOptions();
BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
boolean bExtraQueryTerms = false;
String term = "";
if ( !ent1.equals("null") )
{
if ( field.equals(AssociationFeaturePojo.entity1_) )
term = ent1;
else {
bExtraQueryTerms = true;
EntityFeaturePojo alias = null;
if (null != aliasTable) {
alias = aliasTable.getAliasMaster(ent1);
}
if (null != alias) { // Found!
boolQuery.must(QueryBuilders.termsQuery(AssociationFeaturePojo.entity1_index_, alias.getAlias().toArray()));
}
else {
boolQuery.must(QueryBuilders.termQuery(AssociationFeaturePojo.entity1_index_, ent1));
}//TESTED
}
}
if ( !verb.equals("null") )
{
if ( field.equals(AssociationFeaturePojo.verb_) )
term = verb;
else
{
bExtraQueryTerms = true;
boolQuery.must(QueryBuilders.queryString(new StringBuffer("+").append(verb.replaceAll("\\s+", " +")).toString()).
defaultField(AssociationFeaturePojo.verb_));
}
}
if ( !ent2.equals("null") )
{
if ( field.equals(AssociationFeaturePojo.entity2_) )
term = ent2;
else {
bExtraQueryTerms = true;
EntityFeaturePojo alias = null;
if (null != aliasTable) {
alias = aliasTable.getAliasMaster(ent2);
}
if (null != alias) { // Found!
boolQuery.must(QueryBuilders.termsQuery(AssociationFeaturePojo.entity2_index_, alias.getAlias().toArray()));
}
else {
boolQuery.must(QueryBuilders.termQuery(AssociationFeaturePojo.entity2_index_, ent2));
}
}//TESTED (cut and paste from entity1)
}
String escapedterm = null;
StandardTokenizer st = new StandardTokenizer(Version.LUCENE_30, new StringReader(ContentUtils.stripDiacritics(term)));
CharTermAttribute termAtt = st.addAttribute(CharTermAttribute.class);
StringBuffer sb = new StringBuffer();
try {
try {
st.reset();
while (st.incrementToken()) {
if (sb.length() > 0) {
sb.append(" +");
}
else {
sb.append('+');
}
sb.append(luceneEncodeTerm(termAtt.toString()));
}
}
finally {
st.close();
}
} catch (IOException e) {
e.printStackTrace();
}
if (!term.endsWith(" ") || (0 == sb.length())) { // Could be in the middle of typing, stick a * on the end
sb.append('*');
}//TESTED
escapedterm = sb.toString();
// Also create an internal Lucene index for aliases, in case any of them do not have actual entities representing them
List<EntityFeaturePojo> extraEntries = null;
BoolQueryBuilder extraQueryTerms = null;
if (field.startsWith("entity")) {
String indexField = field.startsWith("entity1") ? "entity1_index" : "entity2_index";
if (null != aliasTable) {
extraEntries = checkAliasMasters(aliasTable, escapedterm);
}
if (null != extraEntries) {
extraQueryTerms = QueryBuilders.boolQuery();
int nExtraTerms = 0;
Iterator<EntityFeaturePojo> aliasIt = extraEntries.iterator();
while (aliasIt.hasNext()) {
EntityFeaturePojo alias = aliasIt.next();
nExtraTerms += alias.getAlias().size();
if (!bExtraQueryTerms && (nExtraTerms > 20)) { // If not filtering on event type we'll be more aggressive
break;
}//TESTED
if (bExtraQueryTerms && (nExtraTerms > 60)) { // If the number of terms gets too large bail anyway
break;
}//TESTED
extraQueryTerms.should(QueryBuilders.termsQuery(indexField, alias.getAlias().toArray()));
aliasIt.remove();
}//end loop over entities
}//if found new aliases
}//(if this is an entity lookup) TESTED - including breaking out because of # of terms
// (end initial alias handling)
if (null == extraQueryTerms) {
boolQuery.must(QueryBuilders.queryString(escapedterm).defaultField(field));
}
else {//(in this case combine the escaped term with the aliases
extraQueryTerms.should(QueryBuilders.queryString(escapedterm).defaultField(field));
boolQuery.must(extraQueryTerms);
}//TESTED
boolQuery.must(QueryBuilders.termsQuery(AssociationFeaturePojo.communityId_, communityIdStrs));
searchOptions.addSort(AssociationFeaturePojo.doccount_, SortOrder.DESC);
// Work out which fields to return:
//TODO (INF-1234) need to work out what to do with quotations and similar here (ie entityX without entityX_index)
String returnfield;
boolean bReturningEntities = true;
if ( field.equals(AssociationFeaturePojo.entity1_) ) {
returnfield = AssociationFeaturePojo.entity1_index_;
searchOptions.addFields( AssociationFeaturePojo.entity1_index_, AssociationFeaturePojo.doccount_);
}
else if ( field.equals(AssociationFeaturePojo.entity2_)) {
returnfield = AssociationFeaturePojo.entity2_index_;
searchOptions.addFields( AssociationFeaturePojo.entity2_index_, AssociationFeaturePojo.doccount_);
}
else {
bReturningEntities = false;
returnfield = AssociationFeaturePojo.verb_;
searchOptions.addFields( AssociationFeaturePojo.verb_, AssociationFeaturePojo.verb_category_, AssociationFeaturePojo.doccount_);
}
int nNumSuggestionsToReturn = 20;
if (bReturningEntities && (null != aliasTable)) {
searchOptions.setSize(3*nNumSuggestionsToReturn); // we're going to remove some duplicates so get more than we need
}
else { // normal case
searchOptions.setSize(nNumSuggestionsToReturn);
}
SearchResponse rsp = esm.doQuery(boolQuery, searchOptions);
SearchHit[] docs = rsp.getHits().getHits();
//Currently this code takes the results and puts
//them into a set so there are no duplicates
//duplicates occur for example when you search for
//obama you get obama/quotation/quote1 and obama/travel/spain
//may want to work this differnt, or atleast sum up
//frequency
Set<String> suggestions = new HashSet<String>();
for (SearchHit hit: docs)
{
SearchHitField retField = hit.field(returnfield); // (this can be null in theory/by mistake)
if (null != retField) {
String suggestion = (String) retField.value();
if (bReturningEntities && (null != aliasTable))
{
// More alias handling
EntityFeaturePojo alias = aliasTable.getAliasMaster(suggestion);
if (null != alias) { // Found!
if (alias.getIndex().equalsIgnoreCase("discard")) { // Discard this entity
continue;
}
else {
// (these need to be present)
suggestion = alias.getIndex();
}
}//TESTED
}
else { // (old code, still valid for verbs or no aliases)
if ( returnfield.equals(AssociationFeaturePojo.verb_) && hit.field(AssociationFeaturePojo.verb_category_) != null )
//for some reason verb_cat can be null!?!?! i think this is broken (ent1 facebook inc/company verb *)
{
String verbcat = (String)hit.field(AssociationFeaturePojo.verb_category_).value();
suggestion += " (" + verbcat + ")";
suggestions.add(verbcat);
}
}
suggestions.add(suggestion);
if (suggestions.size() >= nNumSuggestionsToReturn) {
break;
}
} // (end return string valid)
}//end loop over suggestions
// Add any aliases that I couldn't explicity convert to query terms
if ((null != extraEntries) && (suggestions.size() < nNumSuggestionsToReturn)) {
for (EntityFeaturePojo alias: extraEntries) {
suggestions.add(alias.getIndex());
if (suggestions.size() >= nNumSuggestionsToReturn) {
break;
}
}
}//(end add any remaining entries)
//TESTED
String[] suggestionArray = new String[suggestions.size()];
rp.setData(Arrays.asList(suggestions.toArray(suggestionArray)), (BasePojoApiMap<String>)null);
String searchTerm = "";
if ( field.equals(AssociationFeaturePojo.entity1_))
searchTerm = ent1;
else if ( field.equals(AssociationFeaturePojo.verb_))
searchTerm = verb;
else
searchTerm = ent2;
rp.setResponse(new ResponseObject("Association Suggestions", true, searchTerm));
}
catch (Exception ex)
{
ex.printStackTrace();
rp.setResponse(new ResponseObject("Association Suggestions",false,"Response returned unsuccessfully: " + ex.getMessage()));
}
return rp;
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Unused Alias code - returns aliases for a term
// (The GUI code crashes or something, and anyway I'm not convinced we want to expose this to the user)
public ResponsePojo getAliasSuggestions(String userIdStr, String term, String field, String communityIdStrList)
{
long nSysTime = System.currentTimeMillis();
ResponsePojo rp = new ResponsePojo();
// (keep user facing data model consistent, ie index(ex gazateer_index), actual_name/alias, disambiguated_name (ex disambiguous_name))
if (field.equalsIgnoreCase(EntityPojo.actual_name_) || field.equalsIgnoreCase(EntityFeaturePojo.alias_)) {
field = EntityFeaturePojo.alias_;
}
else if (field.equalsIgnoreCase("disambiguous_name") || field.equals(EntityPojo.disambiguated_name_)
|| field.equals(EntityFeaturePojo.disambiguated_name_)) {
//^^ (for bw compatibility from GUI)
field = EntityFeaturePojo.disambiguated_name_;
}
else if (field.equalsIgnoreCase("gazateer_index") || field.equalsIgnoreCase(EntityPojo.index_)) { // (for bw compatibility from GUI)
field = EntityFeaturePojo.index_;
}
else if (!field.equalsIgnoreCase(EntityFeaturePojo.index_)) {
rp.setResponse(new ResponseObject("aliasSuggest",false, "Field " + field + " not recognized"));
return rp;
}
try
{
Collection<Set<String>> aliasSet = findAliases(null, field, Arrays.asList(term), userIdStr, communityIdStrList).values();
Set<String> superSet = new HashSet<String>();
for (Set<String> set : aliasSet )
{
superSet.addAll(set);
}
rp.setData(superSet, (BasePojoApiMap<String>)null);
rp.setResponse(new ResponseObject("aliasSuggest",true,"Successfully returned aliases"));
if (nSysTime > (lastAliasLog + 5000)) {
lastAliasLog = nSysTime;
logMsg.setLength(0);
logMsg.append("knowledge/aliasSuggest query=").append(term);
logMsg.append(" found=").append(superSet.size());
logMsg.append(" time=").append(System.currentTimeMillis() - nSysTime).append(" ms");
logger.info(logMsg.toString());
}
}
catch (Exception e)
{
// If an exception occurs log the error
logger.error("Exception Message: " + e.getMessage(), e);
rp.setResponse(new ResponseObject("aliasSuggest",false,"Error returning aliases"));
}
return rp;
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Alias utility code - used by (unused) alias suggestions code above and also for alias expansion
public static Map<String, Set<String>> findAliases(DBCollection entityFeatureDb, String field, Collection<String> terms, String userIdStr, String communityIdStrList)
{
Map<String, Set<String>> aliases = new HashMap<String, Set<String>>();
String[] communityIdStrs = SocialUtils.getCommunityIds(userIdStr, communityIdStrList);
try
{
if (null == entityFeatureDb) {
entityFeatureDb = DbManager.getFeature().getEntity();
}
// Get all the aliases in one go, will sort them out later
BasicDBObject query = new BasicDBObject();
query.put(field, new BasicDBObject(MongoDbManager.in_, terms));
ObjectId[] communityIds = new ObjectId[communityIdStrs.length];
int i = 0;
for (String idStr: communityIdStrs) {
communityIds[i] = new ObjectId(idStr);
i++;
}
query.put(EntityFeaturePojo.communityId_, new BasicDBObject(MongoDbManager.in_, communityIds));
List<EntityFeaturePojo> gpl = EntityFeaturePojo.listFromDb(entityFeatureDb.find(query), EntityFeaturePojo.listType());
for ( String s : terms )
{
aliases.put(s, new HashSet<String>());
for (EntityFeaturePojo gpit : gpl)
{
if ((field.equals(EntityFeaturePojo.index_) && gpit.getIndex().equals(s)) // gazname
||
(field.equals(EntityFeaturePojo.disambiguated_name_) && gpit.getDisambiguatedName().equals(s)) // alias
||
(field.equals(EntityFeaturePojo.alias_) && gpit.getAlias().contains(s))) // alias
{
aliases.get(s).addAll(gpit.getAlias());
}
}
}
}
catch(Exception e)
{
logger.error("Exception Message: " + e.getMessage(), e);
}
return aliases;
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// INTERNAL SEARCHING OF ALIAS MASTERS (USES LUCENE)
private static CrossVersionIndexSearcher _aliasSearcherCache = null;
private static Date _searcherCacheLastCreated = null;
private static EntityFeaturePojo[] indexToSearchCacheIndexes = null;
private synchronized void createAliasSearchCache(AliasLookupTable aliasTable)
{
// Check if we need to update the Lucene store:
if ((null != _searcherCacheLastCreated) && (null != aliasTable.getLastModified())) {
if (_searcherCacheLastCreated.getTime() >= aliasTable.getLastModified().getTime()) {
return;
}
}//TESTED
RAMDirectory idx = new RAMDirectory();
try {
CrossVersionIndexWriter writer = new CrossVersionIndexWriter(idx, Version.LUCENE_30, new StandardAnalyzer(Version.LUCENE_30));
int nAdded = 0;
indexToSearchCacheIndexes = new EntityFeaturePojo[aliasTable.masters().size()];
for (EntityFeaturePojo alias: aliasTable.masters()) {
if ((null != alias.getIndex()) && (null != alias.getDisambiguatedName()) && (null != alias.getAlias())
&& !alias.getIndex().equalsIgnoreCase("discard") && !alias.getAlias().contains(alias.getIndex()))
{
// (that last check just means there's no point in including the alias if it has itself as a sub-alias)
writer.addSingleAnalyzedUnstoredFieldDocument("name", alias.getDisambiguatedName());
indexToSearchCacheIndexes[nAdded] = alias;
nAdded++;
//System.out.println("CACHE ADD: " + alias.getDisambiguatedName() + ": " + nAdded + " - " + alias.getIndex());
}
}
writer.close();
if (nAdded > 0) {
if (null != _aliasSearcherCache) {
try {
_aliasSearcherCache.getIndexReader().close();
}
catch (Exception e) {}
}
_aliasSearcherCache = new CrossVersionIndexSearcher(idx);
if (null != _aliasSearcherCache) {
_searcherCacheLastCreated = aliasTable.getLastModified();
}
}
else {
_aliasSearcherCache = null;
_searcherCacheLastCreated = aliasTable.getLastModified();
}
}//TESTED
catch (Exception e) {
//Probably should never happen once set up correctly
e.printStackTrace();
}
}//TESTED
private ArrayList<EntityFeaturePojo> checkAliasMasters(AliasLookupTable aliasTable, String term) {
createAliasSearchCache(aliasTable); // (only does anything if needed)
ArrayList<EntityFeaturePojo> retVal = null;
if (null != _aliasSearcherCache) {
try {
if (term.startsWith("*")) { // match all
retVal = new ArrayList<EntityFeaturePojo>(indexToSearchCacheIndexes.length);
for (EntityFeaturePojo ent: indexToSearchCacheIndexes) {
if (null != ent) {
retVal.add(ent);
}
else {
break;
}
}
}//TESTED (end special case, "*" wildcard)
else {
Query query = new CrossVersionQueryParser(Version.LUCENE_30, "name", new StandardAnalyzer(Version.LUCENE_30)).parse(term);
TopDocs results = _aliasSearcherCache.search(query, aliasTable.masters().size());
ScoreDoc[] hits = results.scoreDocs;
if (hits.length > 0) {
retVal = new ArrayList<EntityFeaturePojo>(hits.length);
for (ScoreDoc hit: hits) {
retVal.add(indexToSearchCacheIndexes[hit.doc]);
}
}
}//TESTED (normal case, Lucene lookup)
}
catch (Exception e) {
//Probably should never happen once set up correctly
e.printStackTrace();
}
}
return retVal;
}//TESTED
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Some Lucene utlities:
public static String luceneEncode(String rawQuery)
{
// + - && || ! ( ) { } [ ] ^ " ~ * ? : \ /
/// add quotes to make it exact
return '"' + rawQuery.replaceAll("([\"+~*?:/|&(){}\\[\\]\\^\\!\\-\\\\])", "\\\\$1") + '"';
}
public static String luceneEncodeTerm(String rawQueryTerm)
{
// + - && || ! ( ) { } [ ] ^ " ~ * ? : \
/// (no quotes)
return rawQueryTerm.replaceAll("([\"+~*?:/|&(){}\\[\\]\\^\\!\\-\\\\])", "\\\\$1");
}
}