Package org.fao.geonet.services.main

Source Code of org.fao.geonet.services.main.SearchSuggestion$FrequencyComparator

//=============================================================================
//===  Copyright (C) 2010 Food and Agriculture Organization of the
//===  United Nations (FAO-UN), United Nations World Food Programme (WFP)
//===  and United Nations Environment Programme (UNEP)
//===
//===  This program is free software; you can redistribute it and/or modify
//===  it under the terms of the GNU General Public License as published by
//===  the Free Software Foundation; either version 2 of the License, or (at
//===  your option) any later version.
//===
//===  This program is distributed in the hope that it will be useful, but
//===  WITHOUT ANY WARRANTY; without even the implied warranty of
//===  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
//===  General Public License for more details.
//===
//===  You should have received a copy of the GNU General Public License
//===  along with this program; if not, write to the Free Software
//===  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
//===
//===  Contact: Jeroen Ticheler - FAO - Viale delle Terme di Caracalla 2,
//===  Rome - Italy. email: geonetwork@osgeo.org
//==============================================================================

package org.fao.geonet.services.main;

import com.google.common.collect.ComparisonChain;
import jeeves.interfaces.Service;
import jeeves.server.ServiceConfig;
import jeeves.server.context.ServiceContext;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.WildcardQuery;
import org.fao.geonet.exceptions.BadParameterEx;
import org.fao.geonet.kernel.search.IndexAndTaxonomy;
import org.fao.geonet.kernel.search.LuceneConfig;
import org.fao.geonet.kernel.search.index.GeonetworkMultiReader;
import org.fao.geonet.utils.Log;
import org.fao.geonet.Util;
import org.apache.commons.lang.StringUtils;
import org.fao.geonet.GeonetContext;
import org.fao.geonet.constants.Geonet;
import org.fao.geonet.kernel.search.LuceneSearcher;
import org.fao.geonet.kernel.search.SearchManager;
import org.fao.geonet.kernel.search.SearchManager.TermFrequency;
import org.fao.geonet.utils.Xml;
import org.jdom.Attribute;
import org.jdom.Content;
import org.jdom.Element;

import java.io.IOException;
import java.util.*;

/**
* Return a list of suggestion for a field. The values could be filtered and
* only values contained in field values are returned.
*
* Two modes are defined. First one is browse the index for a field and return a
* list of suggestion. For this origin should be set to INDEX_TERM_VALUES. The
* second mode, using origin RECORDS_FIELD_VALUES, perform a search and extract
* the field for all docs and return the matching values.
*
* If no origin is set, a RECORDS_FIELD_VALUES mode is done and if no suggestion
* found, a INDEX_TERM_VALUES mode is done.
*
*
* The response body is converted to JSON using search-suggestion.xsl
*
*
* OpenSearch suggestion specification:
* http://www.opensearch.org/Specifications/OpenSearch/Extensions/Suggestions/1.0
*
*/
public class SearchSuggestion implements Service {
    static final String RECORDS_FIELD_VALUES = "RECORDS_FIELD_VALUES";

    static final String INDEX_TERM_VALUES = "INDEX_TERM_VALUES";

    static final String PARAM_ORIGIN = "origin";
    static final String PARAM_FIELD = "field";
    static final String PARAM_Q = "q";
    static final String PARAM_MAX_NUMBER_OF_TERMS = "maxNumberOfTerms";
    static final String PARAM_THRESHOLD = "threshold";
    static final String PARAM_SORT_BY = "sortBy";
    static final String ELEM_ITEMS = "items";
    static final String ELEM_ITEM = "item";
    static final String ATT_TERM = "term";
    static final String ATT_FREQ = "freq";
    static final String CONFIG_PARAM_MAX_NUMBER_OF_TERMS = "max_number_of_terms";
    static final String CONFIG_PARAM_DEFAULT_SEARCH_FIELD = "default_search_field";
    static final String CONFIG_PARAM_THRESHOLD = PARAM_THRESHOLD;
    private static final String SUMMARY_FACET_CONFIG_KEY = "suggestions";

    /**
     * Max number of term's values to look in the index. For large catalogue
     * this value should be increased in order to get better results. If this
     * value is too high, then looking for terms could take more times. The use
     * of good analyzer should allow to reduce the number of useless values like
     * (a, the, ...).
     */
    private Integer _maxNumberOfTerms;

    /**
     * Minimum frequency for a term value to be proposed in suggestion.
     */
    private Integer _threshold;

    /**
     * Default field to search in. any is full-text search field.
     */
    private String _defaultSearchField = "any";

    private ServiceConfig _config;
   
    enum SORT_BY_OPTION {
        FREQUENCY, ALPHA, STARTSWITHFIRST
    }
   
    /**
     * Sort a TermFrequency collection by placing element starting with prefix on top
     * then alphabetical order.
     */
    public static class StartsWithComparator implements Comparator<TermFrequency> {
        private String prefix = "";

        public StartsWithComparator(String prefix) {
            this.prefix = prefix;
        }

        public int startsWith(String str) {
            return StringUtils.startsWithIgnoreCase(str, prefix) ? -1 : 1;
        }

        public int compare(TermFrequency term1, TermFrequency term2) {
            return ComparisonChain.start()
                    .compare(startsWith(term1.getTerm()), startsWith(term2.getTerm()))
                    .compare(term1.getTerm(), term2.getTerm()).result();
        }
    }
   
    /**
     * Sort a TermFrequency collection by decreasing frequency and alphabetical order
     */
    public static class FrequencyComparator implements Comparator<TermFrequency> {
        public int compare(TermFrequency term1, TermFrequency term2) {
            return ComparisonChain.start()
                    .compare(term2.getFrequency(), term1.getFrequency())
                    .compare(term1.getTerm(), term2.getTerm())
                    .result();
        }
    }
   
    /**
     * Set default parameters
     */
    public void init(String appPath, ServiceConfig config) throws Exception {
        _threshold = Integer.valueOf(config.getValue(PARAM_THRESHOLD));
        String maxNumberOfTerms = config.getValue(CONFIG_PARAM_MAX_NUMBER_OF_TERMS);
        _maxNumberOfTerms = Integer.valueOf(maxNumberOfTerms);
        _defaultSearchField = config.getValue(CONFIG_PARAM_DEFAULT_SEARCH_FIELD);
        _config = config;
    }

    /**
     * Browse the index and return suggestion list.
     */
    public Element exec(Element params, ServiceContext context)
            throws Exception {
        // The field to search in
        String fieldName = Util.getParam(params, PARAM_FIELD, _defaultSearchField);
        // The value to search for
        String searchValue = Util.getParam(params, PARAM_Q, "");
        String searchValueWithoutWildcard = searchValue.replaceAll("[*?]", "");

        // Search index term and/or index records
        String origin = Util.getParam(params, PARAM_ORIGIN, "");
        // The max number of terms to return - only apply while searching terms
        int maxNumberOfTerms = Util.getParam(params, PARAM_MAX_NUMBER_OF_TERMS,
                _maxNumberOfTerms);
        // The minimum frequency for a term value to be proposed in suggestion -
        // only apply while searching terms
        int threshold = Util.getParam(params, PARAM_THRESHOLD, _threshold);
       
        String sortBy = Util.getParam(params, PARAM_SORT_BY, SORT_BY_OPTION.FREQUENCY.toString());
       
        if (Log.isDebugEnabled(Geonet.SEARCH_ENGINE)) {
            Log.debug(Geonet.SEARCH_ENGINE,
                    "Autocomplete on field: '" + fieldName + "'" +
                    "\tsearching: '" + searchValue + "'" +
                    "\tthreshold: '" + threshold + "'" +
                    "\tmaxNumberOfTerms: '" + maxNumberOfTerms + "'" +
                    "\tsortBy: '" + sortBy + "'" +
                    "\tfrom: '" + origin + "'");
        }
       
        GeonetContext gc = (GeonetContext) context
                .getHandlerContext(Geonet.CONTEXT_NAME);
        SearchManager sm = gc.getBean(SearchManager.class);
        // The response element
        Element suggestionsResponse = new Element(ELEM_ITEMS);

        TreeSet<SearchManager.TermFrequency> listOfSuggestions;

        // Starts with element first
        if (sortBy.equalsIgnoreCase(SORT_BY_OPTION.STARTSWITHFIRST.toString())) {
            listOfSuggestions = new TreeSet<SearchManager.TermFrequency>(new StartsWithComparator(searchValueWithoutWildcard));
        } else if (sortBy.equalsIgnoreCase(SORT_BY_OPTION.ALPHA.toString())) {
            // Sort by alpha and frequency
            listOfSuggestions = new TreeSet<SearchManager.TermFrequency>();
        } else {
            listOfSuggestions = new TreeSet<SearchManager.TermFrequency>(new FrequencyComparator());
        }
       
        // If a field is stored, field values could be retrieved from the index
        // The main advantage is that only values from records visible to the
        // user are returned, because the search filter the results first.
        if (origin.equals("") || origin.equals(RECORDS_FIELD_VALUES)) {
            LuceneSearcher searcher = (LuceneSearcher) sm.newSearcher(SearchManager.LUCENE, Geonet.File.SEARCH_LUCENE);

            searcher.getSuggestionForFields(context, fieldName, searchValue, _config, maxNumberOfTerms, threshold, listOfSuggestions);
        }
        // No values found from the index records field value ...
        if (origin.equals(INDEX_TERM_VALUES)
                || (listOfSuggestions.size() == 0 && origin.equals(""))) {
            // If a field is not stored, field values could not be retrieved
            // In that case search the index
            listOfSuggestions.addAll(sm.getTermsFequency(
                    fieldName, searchValue, maxNumberOfTerms, threshold, context));
        }

        if (Log.isDebugEnabled(Geonet.SEARCH_ENGINE)) {
            Log.debug(Geonet.SEARCH_ENGINE,
                    "  Found: " + listOfSuggestions.size()
                            + " suggestions from " + origin + ".");
        }
       
        suggestionsResponse.setAttribute(new Attribute(PARAM_ORIGIN, origin));

        for (TermFrequency suggestion : listOfSuggestions) {
            Element md = new Element(ELEM_ITEM);
            // md.setAttribute("term", suggestion.replaceAll("\"",""));
            md.setAttribute(ATT_TERM, suggestion.getTerm());
            md.setAttribute(ATT_FREQ, suggestion.getFrequency() + "");
            suggestionsResponse.addContent(md);
        }

        return suggestionsResponse;

    }
}
TOP

Related Classes of org.fao.geonet.services.main.SearchSuggestion$FrequencyComparator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.