Package de.jetwick.es

Source Code of de.jetwick.es.TweetQuery

/**
* Copyright (C) 2010 Peter Karich <jetwick_@_pannous_._info>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*         http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.jetwick.es;

import org.elasticsearch.index.query.FilterBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.FilterBuilders;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.QueryStringQueryBuilder.Operator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Date;
import de.jetwick.util.StrEntry;
import org.elasticsearch.search.facet.AbstractFacetBuilder;
import de.jetwick.util.Helper;
import de.jetwick.util.MyDate;
import org.elasticsearch.search.facet.FacetBuilders;
import org.elasticsearch.search.facet.range.RangeFacetBuilder;
import java.util.Map.Entry;
import java.util.Collection;
import static de.jetwick.es.ElasticTweetSearch.*;
import org.elasticsearch.action.search.SearchRequestBuilder;

/**
*
* @author Peter Karich, peat_hal 'at' users 'dot' sourceforge 'dot' net
*/
public class TweetQuery extends JetwickQuery {

    private final Logger logger = LoggerFactory.getLogger(getClass());
    private static final long serialVersionUID = 1L;

    public TweetQuery() {
        super();
    }

    public TweetQuery(boolean init) {
        super(null, init);
    }

    public TweetQuery(String queryStr) {
        super(queryStr, true);
    }

    public TweetQuery(String queryStr, boolean init) {
        super(queryStr, init);
    }
    private transient FilterBuilder dateFilter = null;

    @Override
    public SearchRequestBuilder initRequestBuilder(SearchRequestBuilder srb) {
        // the dateFilter should not apply to the date facets!
        dateFilter = null;
        srb = super.initRequestBuilder(srb);

        if (dateFilter != null)
            srb.setFilter(dateFilter);

        if (isDateFacets()) {
            // too much work to convert the generic case with all the date math
            // so cheat for our case:
            String name = ElasticTweetSearch.DATE_FACET;
            RangeFacetBuilder rfb = FacetBuilders.rangeFacet(name).field(ElasticTweetSearch.DATE);
            MyDate date = new MyDate();

            // latest
            rfb.addUnboundedTo(Helper.toLocalDateTime(date.clone().minusHours(8).castToHour().toDate()));
            // first day           
            rfb.addUnboundedTo(Helper.toLocalDateTime(date.castToDay().toDate()));

            for (int i = 0; i < 7; i++) {
                // 'from' must be smaller than 'to'!
                Date oldDate = date.toDate();
                rfb.addRange(Helper.toLocalDateTime(date.minusDays(1).toDate()),
                        Helper.toLocalDateTime(oldDate));
            }

            // oldest
            rfb.addUnboundedFrom(Helper.toLocalDateTime(date.toDate()));
            srb.addFacet(rfb);
        }

        return srb;
    }

    @Override
    public FilterBuilder fromFilterQuery(Entry<String, Object> entry) {
        FilterBuilder tmp = super.fromFilterQuery(entry);
        if (entry.getKey().equals(ElasticTweetSearch.DATE)) {
            if (dateFilter != null)
                dateFilter = FilterBuilders.andFilter(dateFilter, tmp);
            else
                dateFilter = tmp;
            return null;
        } else
            return tmp;
    }

    @Override
    public AbstractFacetBuilder fromFacetField(String ff, int limit) {
        AbstractFacetBuilder facetBuilder;
//        if (ff.equals(ElasticTweetSearch.FIRST_URL_TITLE) || ff.equals(ElasticTweetSearch.TAG)) {
        // hmmh no real differences ... strange
//            facetBuilder = FacetBuilders.termsStatsFacet(ff).keyField(ff).valueScript("doc.score").order(ComparatorType.TOTAL).size(limit);
//                    fb = FacetBuilders.termsStats(ff).keyField(ff).valueScript("doc.relevance.value").order(ComparatorType.TOTAL);//.size(15);
//                    fb = FacetBuilders.termsStats(ff).keyField(ff).valueScript("doc.relevance.value").order(ComparatorType.COUNT).size(15);
//        } else       
        facetBuilder = super.fromFacetField(ff, limit);

        if (dateFilter != null)
            facetBuilder.facetFilter(dateFilter);

        return facetBuilder;
    }

    @Override
    public AbstractFacetBuilder fromFacetQuery(StrEntry e) {
        AbstractFacetBuilder facetBuilder = super.fromFacetQuery(e);
        if (dateFilter != null)
            facetBuilder.facetFilter(dateFilter);
        return facetBuilder;
    }

    @Override
    public TweetQuery attachFacetibility() {
//        setDateFacets(true).
                addFacetField(TAG, 15).addFacetField(LANG).
                // originality
                addFacetField(IS_RT);
//                addFacetField(FIRST_URL_TITLE);

//        // latest
//        q.addFacetQuery(FILTER_ENTRY_LATEST_DT);
//        // archive
//        q.addFacetQuery(FILTER_ENTRY_OLD_DT);

        addFacetQuery(RT_COUNT, "[5 TO *]");
        addFacetQuery(RT_COUNT, "[20 TO *]");
        addFacetQuery(RT_COUNT, "[50 TO *]");

        addFacetQuery(DUP_COUNT, "0");
        addFacetQuery(DUP_COUNT, "[1 TO *]");

        // spam
//        q.addFacetQuery(FILTER_SPAM);
//        q.addFacetQuery(FILTER_NO_SPAM);

        // links
        addFacetQuery(URL_COUNT, "[1 TO *]");
        addFacetQuery(URL_COUNT, "0");

        return this;
    }

    public TweetQuery createFriendsQuery(Collection<String> friends) {
        return (TweetQuery) super.createFriendsQuery("user", friends);
    }

    @Override
    protected QueryBuilder createQuery(String queryStr) {
        QueryBuilder qb;
        if (queryStr == null || queryStr.isEmpty())
            qb = QueryBuilders.matchAllQuery();
        else {
            // fields can also contain patterns like so name.* to match more fields
            qb = QueryBuilders.queryString(queryStr).defaultOperator(Operator.AND).
                    field(ElasticTweetSearch.TWEET_TEXT).field(ElasticTweetSearch.TITLE).field(ElasticTweetSearch.USER, 0).
                    allowLeadingWildcard(false).analyzer(getDefaultAnalyzer()).useDisMax(true);
        }

        return qb;
//        return QueryBuilders.customScoreQuery(qb).script("_score * doc['relevancy'].value").lang("js");

//        long time = new MyDate().castToHour().getTime();
//        return customScoreQuery(qb)
//                .script(
//                "var boost = _score;"
//                + "if(doc['tw_i'].value <= 30) boost *= 0.1;"
//                + "if(doc['quality_i'].value <= 65) boost *= 0.1;"
//                + "var retweet = doc['retw_i'].value;"
//                + "var scale = 10000;"// time vs. retweet -> what should be more important? +0.1 because boost should end up to be 0 for 0 retweets
//                + "if(retweet <= 100) boost *= 0.1 + retweet / scale; else boost *= 0.1 + 100 / scale;"
//                + "boost / (3.6e-9 * (mynow - doc['dt'].value) + 1);"               
//                ).
//                lang("js").param("mynow", time);
    }

    @Override
    public JetwickQuery addLatestDateFilter(int hours) {
        return addLatestDateFilter(new MyDate().minusHours(hours).castToHour());
    }

    @Override
    public JetwickQuery addLatestDateFilter(MyDate date) {
        addFilterQuery(DATE, "[" + date.toLocalString() + " TO *]");
        return this;
    }
}
TOP

Related Classes of de.jetwick.es.TweetQuery

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.