Package org.jtalks.jcommune.model.search

Source Code of org.jtalks.jcommune.model.search.StopWordsFilter

/**
* Copyright (C) 2011  JTalks.org Team
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
* Lesser General Public License for more details.
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
*/
package org.jtalks.jcommune.model.search;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.commons.lang.StringUtils;
import org.apache.lucene.util.Version;
import org.apache.solr.analysis.StopFilterFactory;
import org.hibernate.search.util.HibernateSearchResourceLoader;

/**
* Deletes stop words in the search text.
*
* @author Anuar_Nurmakanov
*
*/
public class StopWordsFilter implements SearchRequestFilter {
    private List<String> stopWordsFiles;
    private boolean ignoreCase;
   
    /**
     * @param stopWordsFiles list of files that contain stop words
     * @param ignoreCase ignore case
     */
    public StopWordsFilter(List<String> stopWordsFiles, boolean ignoreCase) {
        this.stopWordsFiles = stopWordsFiles;
        this.ignoreCase = ignoreCase;
    }
   
    /**
     * {@inheritDoc}
     */
    @Override
    public String filter(String searchText) {
        String result = searchText;
        for (String stopWordsFile : stopWordsFiles) {
            result = filter(result, stopWordsFile);
        }
        return result;
    }
   
    /**
     * This method performs a filtration of the search text.
     * 
     * @param searchText search text
     * @param stopWordsFile file that contains stop words
     * @return result of filtration
     */
    private String filter(String searchText, String stopWordsFile) {
        StopFilterFactory filterFactory = new StopFilterFactory();
        Map<String, String> arguments = new HashMap<>();
        arguments.put("words", stopWordsFile);
        arguments.put("luceneMatchVersion", String.valueOf(Version.LUCENE_31));
        arguments.put("ignoreCase", String.valueOf(ignoreCase));
        filterFactory.init(arguments);
        filterFactory.inform(new HibernateSearchResourceLoader());
       
        Set<String> stopWords = (Set<String>)filterFactory.getStopWords();
        List<String> searchTerms = splitSearchText(searchText);
        searchTerms.removeAll(stopWords);
        return joinSearchTerms(searchTerms);
    }
   
    /**
     * Performs a splitting the search text.
     *
     * @param searchText search text
     * @return list of terms
     */
    private List<String> splitSearchText(String searchText) {
        if (ignoreCase) {
            searchText = searchText.toLowerCase();
        }
        return new ArrayList<>(
                Arrays.asList(searchText.split("\\s"))
        );
    }
   
    /**
     * Creates the single string from list of terms.
     *
     * @param searchTerms search text
     * @return the single string from list of terms
     */
    private String joinSearchTerms(List<String> searchTerms) {
        return StringUtils.join(searchTerms, " ");
    }
}
TOP

Related Classes of org.jtalks.jcommune.model.search.StopWordsFilter

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.