/**
* Copyright (C) 2010 Peter Karich <jetwick_@_pannous_._info>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.jetwick.es;
import org.elasticsearch.index.query.FilterBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.BaseFilterBuilder;
import org.elasticsearch.index.query.FilterBuilders;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.RangeFilterBuilder;
import java.io.IOException;
import java.util.LinkedHashSet;
import java.util.Set;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import java.util.LinkedHashMap;
import java.util.Map;
import de.jetwick.data.JTweet;
import java.util.Collection;
import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.json.JsonXContent;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.elasticsearch.search.facet.AbstractFacetBuilder;
import org.elasticsearch.search.facet.FacetBuilders;
import de.jetwick.util.Helper;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.search.sort.SortOrder;
import de.jetwick.util.MapEntry;
import de.jetwick.util.MyDate;
import de.jetwick.util.StrEntry;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map.Entry;
import static de.jetwick.es.ElasticTweetSearch.*;
import org.elasticsearch.action.search.SearchRequestBuilder;
/**
*
* @author Peter Karich, peat_hal 'at' users 'dot' sourceforge 'dot' net
*/
public abstract class JetwickQuery implements Serializable {
private static final Logger logger = LoggerFactory.getLogger(JetwickQuery.class);
private static final long serialVersionUID = 1L;
public final static String SAVED_SEARCHES = "ss";
private int from;
private int size = 10;
protected String queryString;
protected boolean escape = false;
protected boolean explain = false;
private List<StrEntry> sortFields = new ArrayList<StrEntry>();
private List<Entry<String, Object>> filterQueries = new ArrayList<Entry<String, Object>>();
private Map<String, Integer> facetFields = new LinkedHashMap<String, Integer>();
private List<StrEntry> facetQueries = new ArrayList<StrEntry>();
private boolean dateFacets = false;
public JetwickQuery() {
this(null, false);
}
public JetwickQuery(boolean facets) {
this(null, facets);
}
public JetwickQuery(String queryStr, boolean facets) {
init(queryStr, facets);
}
public SearchRequestBuilder initRequestBuilder(SearchRequestBuilder srb) {
Integer rows = getSize();
if (rows == null)
rows = 10;
Integer start = getFrom();
if (start == null)
start = 0;
srb.setSearchType(SearchType.QUERY_THEN_FETCH).//QUERY_AND_FETCH would return too many results
setFrom(start).setSize(rows);
for (StrEntry e : getSortFields()) {
if ("asc".equals(e.getValue()))
srb.addSort(e.getKey(), SortOrder.ASC);
else if ("desc".equals(e.getValue()))
srb.addSort(e.getKey(), SortOrder.DESC);
}
QueryBuilder qb = createQuery(getQuery());
qb = processFilterQueries(qb);
processFacetFields(srb);
processFacetQueries(srb);
srb.setQuery(qb);
return srb;
}
public void processFacetFields(SearchRequestBuilder srb) {
for (Entry<String, Integer> ff : getFacetFields().entrySet()) {
srb.addFacet(fromFacetField(ff.getKey(), ff.getValue()));
}
}
protected void processFacetQueries(SearchRequestBuilder srb) {
for (StrEntry e : getFacetQueries()) {
srb.addFacet(fromFacetQuery(e));
}
}
public QueryBuilder processFilterQueries(QueryBuilder qb) {
FilterBuilder fb = null;
for (Entry<String, Object> entry : getFilterQueries()) {
FilterBuilder tmp = fromFilterQuery(entry);
if (tmp == null)
continue;
if (fb != null)
fb = FilterBuilders.andFilter(fb, tmp);
else
fb = tmp;
}
if (fb != null)
return QueryBuilders.filteredQuery(qb, fb);
return qb;
}
public FilterBuilder fromFilterQuery(Entry<String, Object> entry) {
return filterQuery2Builder(entry.getKey(), entry.getValue());
}
public AbstractFacetBuilder fromFacetField(String ff, int limit) {
return FacetBuilders.termsFacet(ff).field(ff).size(limit);
}
public AbstractFacetBuilder fromFacetQuery(StrEntry e) {
String name = e.getKey() + ":" + e.getValue();
return FacetBuilders.filterFacet(name, filterQuery2Builder(e.getKey(), e.getValue()));
}
public static Object getTermValue(String val) {
Object newVal = val;
if (val.startsWith("\"") && val.endsWith("\""))
newVal = val.substring(1, val.length() - 1);
// guess the type
try {
newVal = Long.parseLong(val);
} catch (Exception ex) {
try {
newVal = Double.parseDouble(val);
} catch (Exception ex2) {
}
}
return newVal;
}
public JetwickQuery setExplain(boolean explain) {
this.explain = explain;
return this;
}
public boolean isExplain() {
return explain;
}
public String getDefaultAnalyzer() {
return "search_analyzer";
}
protected QueryBuilder createQuery(String queryStr) {
return QueryBuilders.matchAllQuery();
}
public static FilterBuilder filterQuery2Builder(String key, Object input) {
String val = input.toString();
if (key.contains(ElasticTweetSearch._ID))
return idFilter(key, val);
else if (val.contains(" OR ")) {
// handle field:(val OR val2 OR ...)
if (val.startsWith("(") && val.endsWith(")"))
val = val.substring(1, val.length() - 1);
String[] res = val.split(" OR ");
Object[] terms = new Object[res.length];
for (int i = 0; i < res.length; i++) {
terms[i] = getTermValue(res[i]);
}
return FilterBuilders.termsFilter(key, terms);
}
if (val.startsWith("[NOW") || val.startsWith("[DAY")) {
throw new UnsupportedOperationException("Solr's date math is not yet implemented with ES");
} else if (val.startsWith("[")) {
val = val.substring(1, val.length() - 1);
int index1 = val.indexOf(" ");
if (index1 < 0)
throw new IllegalStateException("couldn't handle filter " + key + ":" + val);
RangeFilterBuilder rfb = FilterBuilders.rangeFilter(key);
Object from = null;
Object to = null;
if (!val.startsWith("*") && !val.startsWith("-Infinity")) {
try {
from = Integer.parseInt(val.substring(0, index1));
} catch (NumberFormatException ex) {
from = Helper.toDate(val.substring(0, index1));
}
rfb.from(from).includeLower(true);
}
if (!val.endsWith("*") && !val.endsWith("Infinity")) {
String tmp = val.substring(index1 + " TO ".length());
try {
to = Integer.parseInt(tmp);
} catch (NumberFormatException ex) {
to = Helper.toDate(tmp);
}
if (from != null)
rfb.to(to).includeUpper(true);
else
rfb.lte(to);
}
if (from == null && to == null)
return FilterBuilders.existsFilter(val);
return rfb;
} else if (key.startsWith("-")) {
return FilterBuilders.notFilter(FilterBuilders.termFilter(key.substring(1), getTermValue(val)));
} else
return FilterBuilders.termFilter(key, getTermValue(val));
}
public static BaseFilterBuilder idFilter(String key, String val) {
int index = key.indexOf(ElasticTweetSearch._ID);
String type = key.substring(index + ElasticTweetSearch._ID.length());
BaseFilterBuilder fb;
if (val.contains(" OR "))
fb = FilterBuilders.idsFilter(type).ids(val.split(" OR "));
else
fb = FilterBuilders.idsFilter(type).ids(val);
if (key.startsWith("-"))
fb = FilterBuilders.notFilter(fb);
return fb;
}
public static FilterBuilder filters2Builder(Collection<String> filterStrings) {
List<FilterBuilder> filters = new ArrayList<FilterBuilder>();
for (String tmpFq : filterStrings) {
String strs[] = tmpFq.split("\\:");
if (strs.length != 2)
throw new UnsupportedOperationException("string split should result in 2 parts but didn't -> " + strs);
filters.add(filterQuery2Builder(strs[0], strs[1]));
}
return FilterBuilders.orFilter(filters.toArray(new FilterBuilder[filters.size()]));
}
public JetwickQuery setEscape(boolean b) {
escape = b;
return this;
}
public boolean isDateFacets() {
return dateFacets;
}
public JetwickQuery setDateFacets(boolean dateFacet) {
this.dateFacets = dateFacet;
return this;
}
public int getFrom() {
return from;
}
public JetwickQuery setFrom(int from) {
this.from = from;
return this;
}
public int getSize() {
return size;
}
public JetwickQuery setSize(int size) {
this.size = size;
return this;
}
public String getQuery() {
if (queryString == null || queryString.length() == 0)
queryString = "";
if (escape)
return smartEscapeQuery(queryString);
return queryString;
}
public JetwickQuery setQuery(String queryString) {
this.queryString = queryString;
return this;
}
public JetwickQuery addFacetField(String field, Integer limit) {
facetFields.put(field, limit);
return this;
}
public JetwickQuery addFacetField(String field) {
return addFacetField(field, 10);
}
public JetwickQuery removeFacets() {
facetFields.clear();
facetQueries.clear();
return this;
}
public JetwickQuery clearSort(String field, String order) {
sortFields.clear();
return this;
}
public JetwickQuery setSort(String sortKey, String sortVal) {
sortFields.clear();
addSort(sortKey, sortVal);
return this;
}
private JetwickQuery addSort(String field, String order) {
sortFields.add(new StrEntry(field, order));
return this;
}
public List<StrEntry> getSortFields() {
return sortFields;
}
public JetwickQuery init(String queryStr, boolean facets) {
if (queryStr == null)
queryStr = "";
queryStr = queryStr.replaceAll("--", "-").trim();
if (queryStr.isEmpty() || queryStr.equals("*:*") || queryStr.equals("*"))
queryStr = "";
setQuery(queryStr);
if (facets)
attachFacetibility();
return this;
}
public JetwickQuery attachFacetibility() {
return this;
}
public JetwickQuery attachUserFacets() {
// addFacetField(USER, 10);
return this;
}
public JetwickQuery addUserFilter(String userName) {
if (userName != null && !userName.trim().isEmpty()) {
userName = trimUserName(userName);
// if (userName.contains(" "))
// userName = "\"" + userName + "\"";
addFilterQuery(USER, userName);
}
return this;
}
public String getUserFilter() {
String neg = "-" + USER;
for (Entry<String, Object> entry : getFilterQueries()) {
if (entry.getKey().equals(USER) || entry.getKey().equals(neg))
return entry.getKey() + ":" + entry.getValue();
}
return null;
}
public String extractUserName() {
for (Entry<String, Object> e : getFilterQueries()) {
if (USER.equals(e.getKey())) {
String tmp = e.getValue().toString();
if (tmp.contains(" OR "))
return "";
if (tmp.length() > 1 && tmp.startsWith("\"") && tmp.endsWith("\""))
tmp = tmp.substring(1, tmp.length() - 1);
return trimUserName(tmp);
}
}
return "";
}
public static String trimUserName(String userName) {
userName = userName.toLowerCase();
if (userName.startsWith("@"))
userName = userName.substring(1);
return userName;
}
public boolean containsFilter(String key, Object val) {
String negateKey = "-" + key;
for (Entry<String, Object> e : getFilterQueries()) {
if ((e.getKey().equals(key) || e.getKey().equals(negateKey))
&& e.getValue().equals(val))
return true;
}
return false;
}
public boolean containsFilterKey(String filterKey) {
return getFirstFilterQuery(filterKey) != null;
}
public String getFirstFilterQuery(String key) {
for (Entry<String, Object> e : getFilterQueries()) {
if (e.getKey().equals(key))
return e.getValue().toString();
}
return null;
}
public boolean replaceFilterQuery(String filter) {
int index = filter.indexOf(":");
if (index < 0)
return false;
String filterKey = filter.substring(0, index);
String val = filter.substring(index + 1);
removeFilterQueries(filterKey);
addFilterQuery(filterKey, val);
return true;
}
/**
* this method removes the specified filter from the first fq.
* Example: reduceFilterQuery(q contains fq=test:hi OR test:me, test:hi);
* after that q contains fq=test:me
*/
public boolean reduceFilterQuery(String filter) {
int index = filter.indexOf(":");
if (index < 0)
return false;
String filterKey = filter.substring(0, index);
String filterValueToReduce = getFirstFilterQuery(filterKey);
if (filterValueToReduce == null)
return false;
index = filter.indexOf(filterValueToReduce);
if (index < 0)
return false;
System.out.println("" + getFilterQueries());
removeFilterQuery(filterKey, filterValueToReduce);
System.out.println("" + getFilterQueries());
filterValueToReduce = "";
int alreadyAdded = 0;
String res[] = filterValueToReduce.split(" OR ");
for (int i = 0; i < res.length; i++) {
if (filter.equals(res[i]))
continue;
if (alreadyAdded++ > 0)
filterValueToReduce += " OR ";
filterValueToReduce += res[i];
}
if (!filterValueToReduce.isEmpty())
addFilterQuery(filterKey, filterValueToReduce);
return true;
}
public JetwickQuery addFilterQuery(String field, Object filter) {
filterQueries.add(new MapEntry(field, filter));
return this;
}
public JetwickQuery removeFilterQuery(String field, Object filter) {
Iterator<Entry<String, Object>> iter = filterQueries.iterator();
while (iter.hasNext()) {
Entry e = iter.next();
if (e.getKey().equals(field) && e.getValue().equals(filter)) {
iter.remove();
break;
}
}
return this;
}
public JetwickQuery removeFilterQueries(String filterKey) {
String negateFilterKey = "-" + filterKey;
Iterator<Entry<String, Object>> iter = filterQueries.iterator();
while (iter.hasNext()) {
Entry e = iter.next();
if (e.getKey().equals(filterKey) || e.getKey().equals(negateFilterKey))
iter.remove();
}
return this;
}
public List<Entry<String, Object>> getFilterQueries() {
return filterQueries;
}
public JetwickQuery getCopy() {
JetwickQuery q = new TweetQuery().setQuery(queryString);
q.setFrom(from).setSize(size);
for (Entry<String, String> e : getSortFields()) {
q.addSort(e.getKey(), e.getValue());
}
for (Entry<String, Object> fq : getFilterQueries()) {
q.addFilterQuery(fq.getKey(), fq.getValue());
}
for (Entry<String, Integer> ff : getFacetFields().entrySet()) {
q.addFacetField(ff.getKey(), ff.getValue());
}
for (Entry<String, String> fq : getFacetQueries()) {
q.addFacetQuery(fq.getKey(), fq.getValue());
}
q.setDateFacets(dateFacets);
return q;
}
public JetwickQuery addFacetQuery(String name, String query) {
facetQueries.add(new StrEntry(name, query));
return this;
}
public List<StrEntry> getFacetQueries() {
return facetQueries;
}
public Map<String, Integer> getFacetFields() {
return facetFields;
}
public void attachPagability(int page, int hitsPerPage) {
setFrom(page * hitsPerPage).setSize(hitsPerPage);
}
public abstract JetwickQuery addLatestDateFilter(int hours);
public abstract JetwickQuery addLatestDateFilter(MyDate date);
public JetwickQuery addNoSpamFilter() {
addFilterQuery(QUALITY, "[" + (JTweet.QUAL_SPAM + 1) + " TO *]");
return this;
}
public JetwickQuery addNoDupsFilter() {
addFilterQuery(DUP_COUNT, 0);
return this;
}
public JetwickQuery addIsOriginalTweetFilter() {
addFilterQuery(IS_RT, false);
return this;
}
public JetwickQuery addOnlyWithLinks() {
addFilterQuery(URL_COUNT, "[1 TO *]");
return this;
}
@Override
public String toString() {
//q=algorithm&fq=quality_i%3A%5B27+TO+*%5D&fq=dups_i%3A%5B*+TO+0%5D&fq=crt_b%3A%22false%22&start=0&rows=15&sort=retw_i+desc
String res = "";
if (queryString != null)
res = "q=" + Helper.urlEncode(queryString) + "&";
res += "start=" + from;
res += "&rows=" + size;
for (Entry<String, String> e : getSortFields()) {
res += "&sort=" + Helper.urlEncode(e.getKey() + " " + e.getValue());
}
for (Entry<String, Object> fq : getFilterQueries()) {
res += "&fq=" + Helper.urlEncode(fq.getKey() + ":" + fq.getValue().toString());
}
for (Entry<String, Integer> ff : getFacetFields().entrySet()) {
res += "&facet=" + ff.getValue() + "|" + Helper.urlEncode(ff.getKey());
}
for (Entry<String, String> fq : getFacetQueries()) {
res += "&facetQuery=" + Helper.urlEncode(fq.getKey() + ":" + fq.getValue());
}
res += "&dateFacets=" + dateFacets;
return res;
}
public static TweetQuery parseQuery(String qString) {
TweetQuery q = parseQuery(new TweetQuery(false), qString);
return q;
}
public static <Q extends JetwickQuery> Q parseQuery(Q q, String qString) {
qString = Helper.urlDecode(qString);
for (String str : qString.split("&")) {
int index = str.indexOf("=");
if (str.trim().isEmpty() || index < 0)
continue;
String key = str.substring(0, index);
String val = str.substring(index + 1);
try {
if ("dateFacets".equals(key))
q.setDateFacets(Boolean.parseBoolean(val));
else if ("start".equals(key))
q.setFrom(Integer.parseInt(val));
else if ("rows".equals(key))
q.setSize(Integer.parseInt(val));
else if ("q".equals(key))
q.setQuery(val);
else if ("fq".equals(key)) {
String strs[] = val.split("\\:", 2);
q.addFilterQuery(strs[0], getTermValue(strs[1]));
} else if ("facetQuery".equals(key)) {
String strs[] = val.split("\\:", 2);
q.addFacetQuery(strs[0], strs[1]);
} else if ("facet".equals(key)) {
int index2 = val.indexOf("|");
if (index2 < 0)
q.addFacetField(val);
else {
int limit = 10;
try {
limit = Integer.parseInt(val.substring(0, index2));
} catch (Exception ex) {
}
q.addFacetField(val.substring(index2 + 1), limit);
}
} else if ("sort".equals(key)) {
String strs[] = val.split(" ");
q.setSort(strs[0], strs[1]);
}
} catch (Exception ex) {
logger.error("Couldn't parse " + key + ":" + val + " in query:" + qString, ex);
}
}
return q;
}
public static String smartEscapeQuery(String str) {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < str.length(); i++) {
char c = str.charAt(i);
if (c == '!' || c == '(' || c == ')' || c == ':' || c == '^'
|| c == '[' || c == ']' || c == '{' || c == '}' || c == '~'
|| c == '?' || c == '|' || c == '&' || c == ';') {
sb.append('\\');
}
sb.append(c);
}
return sb.toString();
}
/**
* smartEscapeQuery without : and ^ escaping
*/
public static String smartEscapeQuery2(String str) {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < str.length(); i++) {
char c = str.charAt(i);
if (c == '!' || c == '[' || c == ']' || c == '{' || c == '}' || c == '~'
|| c == '?' || c == '|' || c == '&' || c == ';') {
sb.append('\\');
}
sb.append(c);
}
return sb.toString();
}
/**
* @return true if string contains characters not good for twitter search.
* Those characters are similar if not identical to the special lucene-query params.
*/
public static boolean containsForbiddenChars(String str) {
for (int i = 0; i < str.length(); i++) {
char c = str.charAt(i);
if (c == '\\' || c == '!' || c == '(' || c == ')' || c == ':'
|| c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~'
|| c == '*' || c == '?' || c == '|' || c == '&' || c == ';') {
return true;
}
}
return false;
}
public static boolean isPowerQuery(String str) {
return str.contains(" OR ") || str.contains("-") || containsForbiddenChars(str);
}
public static String escapeQuery(String str) {
// copied from solrs' ClientUtils.escapeQueryChars
StringBuilder sb = new StringBuilder();
for (int i = 0; i < str.length(); i++) {
char c = str.charAt(i);
if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':'
|| c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~'
|| c == '*' || c == '?' || c == '|' || c == '&' || c == ';'
|| Character.isWhitespace(c)) {
sb.append('\\');
}
sb.append(c);
}
return sb.toString();
}
@Override
public boolean equals(Object obj) {
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
final JetwickQuery other = (JetwickQuery) obj;
if (this.from != other.from)
return false;
if (this.size != other.size)
return false;
if ((this.queryString == null) ? (other.queryString != null) : !this.queryString.equals(other.queryString))
return false;
if (this.sortFields != other.sortFields && (this.sortFields == null || !this.sortFields.equals(other.sortFields)))
return false;
if (this.filterQueries != other.filterQueries && (this.filterQueries == null || !this.filterQueries.equals(other.filterQueries)))
return false;
if (this.facetFields != other.facetFields && (this.facetFields == null || !this.facetFields.equals(other.facetFields)))
return false;
if (this.facetQueries != other.facetQueries && (this.facetQueries == null || !this.facetQueries.equals(other.facetQueries)))
return false;
if (this.dateFacets != other.dateFacets)
return false;
return true;
}
@Override
public int hashCode() {
int hash = 5;
hash = 41 * hash + this.from;
hash = 41 * hash + this.size;
hash = 41 * hash + (this.queryString != null ? this.queryString.hashCode() : 0);
hash = 41 * hash + (this.sortFields != null ? this.sortFields.hashCode() : 0);
hash = 41 * hash + (this.filterQueries != null ? this.filterQueries.hashCode() : 0);
hash = 41 * hash + (this.facetFields != null ? this.facetFields.hashCode() : 0);
hash = 41 * hash + (this.facetQueries != null ? this.facetQueries.hashCode() : 0);
hash = 41 * hash + (this.dateFacets ? 1 : 0);
return hash;
}
public static String toString(ToXContent tmp) {
try {
return tmp.toXContent(JsonXContent.contentBuilder(), ToXContent.EMPTY_PARAMS).
prettyPrint().
string();
} catch (Exception ex) {
return "<ERROR:" + ex.getMessage() + ">";
}
}
protected Collection<String> doSnowballTermsStemming(Collection<Entry<String, Integer>> terms) {
final Iterator<Entry<String, Integer>> iter = terms.iterator();
Tokenizer tokenizer = new TokenizerFromSet(new Iterator<String>() {
@Override
public boolean hasNext() {
return iter.hasNext();
}
@Override
public String next() {
return iter.next().getKey();
}
@Override
public void remove() {
iter.remove();
}
});
return doSnowballStemming(tokenizer);
}
public Set<String> doSnowballStemming(TokenStream ts) {
Set<String> res = new LinkedHashSet<String>();
ts = new SnowballFilter(ts, "English");
try {
while (ts.incrementToken()) {
res.add(ts.getAttribute(TermAttribute.class).term());
}
} catch (IOException ex) {
logger.error("Exception while stemming to snoball", ex);
}
return res;
}
public JetwickQuery createFriendsQuery(String key, Collection<String> friends) {
if (friends.isEmpty())
return this;
StringBuilder fq = new StringBuilder("(");
int counter = 0;
for (String screenName : friends) {
if (counter++ > 0)
fq.append(" OR ");
fq.append(screenName);
}
fq.append(")");
addFilterQuery(key, fq.toString());
return this;
}
}