/**
* Copyright (C) 2010 Peter Karich <jetwick_@_pannous_._info>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.jetwick.es;
import org.elasticsearch.index.query.FilterBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.FilterBuilders;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.QueryStringQueryBuilder.Operator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Date;
import de.jetwick.util.StrEntry;
import org.elasticsearch.search.facet.AbstractFacetBuilder;
import de.jetwick.util.Helper;
import de.jetwick.util.MyDate;
import org.elasticsearch.search.facet.FacetBuilders;
import org.elasticsearch.search.facet.range.RangeFacetBuilder;
import java.util.Map.Entry;
import java.util.Collection;
import static de.jetwick.es.ElasticTweetSearch.*;
import org.elasticsearch.action.search.SearchRequestBuilder;
/**
*
* @author Peter Karich, peat_hal 'at' users 'dot' sourceforge 'dot' net
*/
public class TweetQuery extends JetwickQuery {
private final Logger logger = LoggerFactory.getLogger(getClass());
private static final long serialVersionUID = 1L;
public TweetQuery() {
super();
}
public TweetQuery(boolean init) {
super(null, init);
}
public TweetQuery(String queryStr) {
super(queryStr, true);
}
public TweetQuery(String queryStr, boolean init) {
super(queryStr, init);
}
private transient FilterBuilder dateFilter = null;
@Override
public SearchRequestBuilder initRequestBuilder(SearchRequestBuilder srb) {
// the dateFilter should not apply to the date facets!
dateFilter = null;
srb = super.initRequestBuilder(srb);
if (dateFilter != null)
srb.setFilter(dateFilter);
if (isDateFacets()) {
// too much work to convert the generic case with all the date math
// so cheat for our case:
String name = ElasticTweetSearch.DATE_FACET;
RangeFacetBuilder rfb = FacetBuilders.rangeFacet(name).field(ElasticTweetSearch.DATE);
MyDate date = new MyDate();
// latest
rfb.addUnboundedTo(Helper.toLocalDateTime(date.clone().minusHours(8).castToHour().toDate()));
// first day
rfb.addUnboundedTo(Helper.toLocalDateTime(date.castToDay().toDate()));
for (int i = 0; i < 7; i++) {
// 'from' must be smaller than 'to'!
Date oldDate = date.toDate();
rfb.addRange(Helper.toLocalDateTime(date.minusDays(1).toDate()),
Helper.toLocalDateTime(oldDate));
}
// oldest
rfb.addUnboundedFrom(Helper.toLocalDateTime(date.toDate()));
srb.addFacet(rfb);
}
return srb;
}
@Override
public FilterBuilder fromFilterQuery(Entry<String, Object> entry) {
FilterBuilder tmp = super.fromFilterQuery(entry);
if (entry.getKey().equals(ElasticTweetSearch.DATE)) {
if (dateFilter != null)
dateFilter = FilterBuilders.andFilter(dateFilter, tmp);
else
dateFilter = tmp;
return null;
} else
return tmp;
}
@Override
public AbstractFacetBuilder fromFacetField(String ff, int limit) {
AbstractFacetBuilder facetBuilder;
// if (ff.equals(ElasticTweetSearch.FIRST_URL_TITLE) || ff.equals(ElasticTweetSearch.TAG)) {
// hmmh no real differences ... strange
// facetBuilder = FacetBuilders.termsStatsFacet(ff).keyField(ff).valueScript("doc.score").order(ComparatorType.TOTAL).size(limit);
// fb = FacetBuilders.termsStats(ff).keyField(ff).valueScript("doc.relevance.value").order(ComparatorType.TOTAL);//.size(15);
// fb = FacetBuilders.termsStats(ff).keyField(ff).valueScript("doc.relevance.value").order(ComparatorType.COUNT).size(15);
// } else
facetBuilder = super.fromFacetField(ff, limit);
if (dateFilter != null)
facetBuilder.facetFilter(dateFilter);
return facetBuilder;
}
@Override
public AbstractFacetBuilder fromFacetQuery(StrEntry e) {
AbstractFacetBuilder facetBuilder = super.fromFacetQuery(e);
if (dateFilter != null)
facetBuilder.facetFilter(dateFilter);
return facetBuilder;
}
@Override
public TweetQuery attachFacetibility() {
// setDateFacets(true).
addFacetField(TAG, 15).addFacetField(LANG).
// originality
addFacetField(IS_RT);
// addFacetField(FIRST_URL_TITLE);
// // latest
// q.addFacetQuery(FILTER_ENTRY_LATEST_DT);
// // archive
// q.addFacetQuery(FILTER_ENTRY_OLD_DT);
addFacetQuery(RT_COUNT, "[5 TO *]");
addFacetQuery(RT_COUNT, "[20 TO *]");
addFacetQuery(RT_COUNT, "[50 TO *]");
addFacetQuery(DUP_COUNT, "0");
addFacetQuery(DUP_COUNT, "[1 TO *]");
// spam
// q.addFacetQuery(FILTER_SPAM);
// q.addFacetQuery(FILTER_NO_SPAM);
// links
addFacetQuery(URL_COUNT, "[1 TO *]");
addFacetQuery(URL_COUNT, "0");
return this;
}
public TweetQuery createFriendsQuery(Collection<String> friends) {
return (TweetQuery) super.createFriendsQuery("user", friends);
}
@Override
protected QueryBuilder createQuery(String queryStr) {
QueryBuilder qb;
if (queryStr == null || queryStr.isEmpty())
qb = QueryBuilders.matchAllQuery();
else {
// fields can also contain patterns like so name.* to match more fields
qb = QueryBuilders.queryString(queryStr).defaultOperator(Operator.AND).
field(ElasticTweetSearch.TWEET_TEXT).field(ElasticTweetSearch.TITLE).field(ElasticTweetSearch.USER, 0).
allowLeadingWildcard(false).analyzer(getDefaultAnalyzer()).useDisMax(true);
}
return qb;
// return QueryBuilders.customScoreQuery(qb).script("_score * doc['relevancy'].value").lang("js");
// long time = new MyDate().castToHour().getTime();
// return customScoreQuery(qb)
// .script(
// "var boost = _score;"
// + "if(doc['tw_i'].value <= 30) boost *= 0.1;"
// + "if(doc['quality_i'].value <= 65) boost *= 0.1;"
// + "var retweet = doc['retw_i'].value;"
// + "var scale = 10000;"// time vs. retweet -> what should be more important? +0.1 because boost should end up to be 0 for 0 retweets
// + "if(retweet <= 100) boost *= 0.1 + retweet / scale; else boost *= 0.1 + 100 / scale;"
// + "boost / (3.6e-9 * (mynow - doc['dt'].value) + 1);"
// ).
// lang("js").param("mynow", time);
}
@Override
public JetwickQuery addLatestDateFilter(int hours) {
return addLatestDateFilter(new MyDate().minusHours(hours).castToHour());
}
@Override
public JetwickQuery addLatestDateFilter(MyDate date) {
addFilterQuery(DATE, "[" + date.toLocalString() + " TO *]");
return this;
}
}