Package org.dspace.search

Source Code of org.dspace.search.DSQuery

/*
* DSQuery.java
*
* Version: $Revision: 3705 $
*
* Date: $Date: 2009-04-11 17:02:24 +0000 (Sat, 11 Apr 2009) $
*
* Copyright (c) 2002-2005, Hewlett-Packard Company and Massachusetts
* Institute of Technology.  All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* - Neither the name of the Hewlett-Packard Company nor the name of the
* Massachusetts Institute of Technology nor the names of their
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
* TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
package org.dspace.search;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import org.apache.log4j.Logger;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.queryParser.TokenMgrError;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.dspace.content.Collection;
import org.dspace.content.Community;
import org.dspace.core.ConfigurationManager;
import org.dspace.core.Constants;
import org.dspace.core.Context;
import org.dspace.core.LogManager;
import org.dspace.sort.SortOption;

// issues
// need to filter query string for security
// cmd line query needs to process args correctly (seems to split them up)
/**
* DSIndexer contains various static methods for performing queries on indices,
* for collections and communities.
*
*/
public class DSQuery
{
    // Result types
    static final String ALL = "999";

    static final String ITEM = "" + Constants.ITEM;

    static final String COLLECTION = "" + Constants.COLLECTION;

    static final String COMMUNITY = "" + Constants.COMMUNITY;

    // cache a Lucene IndexSearcher for more efficient searches
    private static IndexSearcher searcher = null;

    private static String indexDir = null;
   
    private static String operator = null;
   
    private static long lastModified;
   
    /** log4j logger */
    private static Logger log = Logger.getLogger(DSQuery.class);

   
    static
    {
        String maxClauses = ConfigurationManager.getProperty("search.max-clauses");
        if (maxClauses != null)
        {
            BooleanQuery.setMaxClauseCount(Integer.parseInt(maxClauses));
        }
       
        indexDir = ConfigurationManager.getProperty("search.dir");
       
        operator = ConfigurationManager.getProperty("search.operator");  
    }

    /**
     * Do a query, returning a QueryResults object
     *
     * @param c  context
     * @param args query arguments in QueryArgs object
     *
     * @return query results QueryResults
     */
    public static QueryResults doQuery(Context c, QueryArgs args)
            throws IOException
    {
        String querystring = args.getQuery();
        QueryResults qr = new QueryResults();
        List hitHandles = new ArrayList();
        List hitIds     = new ArrayList();
        List hitTypes   = new ArrayList();

        // set up the QueryResults object
        qr.setHitHandles(hitHandles);
        qr.setHitIds(hitIds);
        qr.setHitTypes(hitTypes);
        qr.setStart(args.getStart());
        qr.setPageSize(args.getPageSize());
        qr.setEtAl(args.getEtAl());

        // massage the query string a bit
        querystring = checkEmptyQuery(querystring); // change nulls to an empty string
        // We no longer need to work around the Lucene bug with recent versions
        //querystring = workAroundLuceneBug(querystring); // logicals changed to && ||, etc.
        querystring = stripHandles(querystring); // remove handles from query string
        querystring = stripAsterisk(querystring); // remove asterisk from beginning of string

        try
        {
            // grab a searcher, and do the search
            Searcher searcher = getSearcher(c);

            QueryParser qp = new QueryParser("default", DSIndexer.getAnalyzer());
            log.debug("Final query string: " + querystring);
           
            if (operator == null || operator.equals("OR"))
            {
              qp.setDefaultOperator(QueryParser.OR_OPERATOR);
            }
            else
            {
              qp.setDefaultOperator(QueryParser.AND_OPERATOR);
            }
           
            Query myquery = qp.parse(querystring);
            Hits hits = null;

            try
            {
                if (args.getSortOption() == null)
                {
                    SortField[] sortFields = new SortField[] {
                            new SortField("search.resourcetype", true),
                            new SortField(null, SortField.SCORE, SortOption.ASCENDING.equals(args.getSortOrder()))
                        };
                    hits = searcher.search(myquery, new Sort(sortFields));
                }
                else
                {
                    SortField[] sortFields = new SortField[] {
                            new SortField("search.resourcetype", true),
                            new SortField("sort_" + args.getSortOption().getName(), SortOption.DESCENDING.equals(args.getSortOrder())),
                            SortField.FIELD_SCORE
                        };
                    hits = searcher.search(myquery, new Sort(sortFields));
                }
            }
            catch (Exception e)
            {
                // Lucene can throw an exception if it is unable to determine a sort time from the specified field
                // Provide a fall back that just works on relevancy.
                log.error("Unable to use speficied sort option: " + (args.getSortOption() == null ? "type/relevance": args.getSortOption().getName()));
                hits = searcher.search(myquery, new Sort(SortField.FIELD_SCORE));
            }
           
            // set total number of hits
            qr.setHitCount(hits.length());

            // We now have a bunch of hits - snip out a 'window'
            // defined in start, count and return the handles
            // from that window
            // first, are there enough hits?
            if (args.getStart() < hits.length())
            {
                // get as many as we can, up to the window size
                // how many are available after snipping off at offset 'start'?
                int hitsRemaining = hits.length() - args.getStart();

                int hitsToProcess = (hitsRemaining < args.getPageSize()) ? hitsRemaining
                        : args.getPageSize();

                for (int i = args.getStart(); i < (args.getStart() + hitsToProcess); i++)
                {
                    Document d = hits.doc(i);

                    String resourceId   = d.get("search.resourceid");
                    String resourceType = d.get("search.resourcetype");

                    String handleText = d.get("handle");
                    String handleType = d.get("type");

                    switch (Integer.parseInt( resourceType != null ? resourceType : handleType))
                    {
                        case Constants.ITEM:
                            hitTypes.add(new Integer(Constants.ITEM));
                            break;

                        case Constants.COLLECTION:
                            hitTypes.add(new Integer(Constants.COLLECTION));
                            break;

                        case Constants.COMMUNITY:
                            hitTypes.add(new Integer(Constants.COMMUNITY));
                            break;
                    }

                    hitHandles.add( handleText );
                    hitIds.add( resourceId == null ? null: Integer.parseInt(resourceId) );
                }
            }
        }
        catch (NumberFormatException e)
        {
            log.warn(LogManager.getHeader(c, "Number format exception", "" + e));
            qr.setErrorMsg("number-format-exception");
        }
        catch (ParseException e)
        {
            // a parse exception - log and return null results
            log.warn(LogManager.getHeader(c, "Invalid search string", "" + e));
            qr.setErrorMsg("invalid-search-string");
        }
        catch (TokenMgrError tme)
        {
            // Similar to parse exception
            log.warn(LogManager.getHeader(c, "Invalid search string", "" + tme));
            qr.setErrorMsg("invalid-search-string");
        }
        catch(BooleanQuery.TooManyClauses e)
        {
            log.warn(LogManager.getHeader(c, "Query too broad", e.toString()));
            qr.setErrorMsg("query-too-broad");
        }

        return qr;
    }

    static String checkEmptyQuery(String myquery)
    {
        if (myquery == null || myquery.equals("()") || myquery.equals(""))
        {
            myquery = "empty_query_string";
        }

        return myquery;
    }

    /**
     * Workaround Lucene bug that breaks wildcard searching.
     * This is no longer required with Lucene upgrades.
     *
     * @param myquery
     * @return
     * @deprecated
     */
    static String workAroundLuceneBug(String myquery)
    {
        // Lucene currently has a bug which breaks wildcard
        // searching when you have uppercase characters.
        // Here we substitute the boolean operators -- which
        // have to be uppercase -- before tranforming the
        // query string to lowercase.
        return myquery.replaceAll(" AND ", " && ")
                      .replaceAll(" OR ", " || ")
                      .replaceAll(" NOT ", " ! ")
                      .toLowerCase();
    }

    static String stripHandles(String myquery)
    {
        // Drop beginning pieces of full handle strings
        return myquery.replaceAll("^\\s*http://hdl\\.handle\\.net/", "")
                      .replaceAll("^\\s*hdl:", "");
    }

    static String stripAsterisk(String myquery)
    {
        // query strings (or words) begining with "*" cause a null pointer error
        return myquery.replaceAll("^\\*", "")
                      .replaceAll("\\s\\*", " ")
                      .replaceAll("\\(\\*", "(")
                      .replaceAll(":\\*", ":");
    }

    /**
     * Do a query, restricted to a collection
     *
     * @param c
     *            context
     * @param args
     *            query args
     * @param coll
     *            collection to restrict to
     *
     * @return QueryResults same results as doQuery, restricted to a collection
     */
    public static QueryResults doQuery(Context c, QueryArgs args,
            Collection coll) throws IOException
    {
        String querystring = args.getQuery();

        querystring = checkEmptyQuery(querystring);

        String location = "l" + (coll.getID());

        String newquery = new String("+(" + querystring + ") +location:\""
                + location + "\"");

        args.setQuery(newquery);

        return doQuery(c, args);
    }

    /**
     * Do a query, restricted to a community
     *
     * @param c
     *            context
     * @param args
     *            query args
     * @param comm
     *            community to restrict to
     *
     * @return QueryResults same results as doQuery, restricted to a collection
     */
    public static QueryResults doQuery(Context c, QueryArgs args, Community comm)
            throws IOException
    {
        String querystring = args.getQuery();

        querystring = checkEmptyQuery(querystring);

        String location = "m" + (comm.getID());

        String newquery = new String("+(" + querystring + ") +location:\""
                + location + "\"");

        args.setQuery(newquery);

        return doQuery(c, args);
    }


    /**
     * Do a query, printing results to stdout largely for testing, but it is
     * useful
     */
    public static void doCMDLineQuery(String query)
    {
        System.out.println("Command line query: " + query);
        System.out.println("Only reporting default-sized results list");

        try
        {
            Context c = new Context();

            QueryArgs args = new QueryArgs();
            args.setQuery(query);

            QueryResults results = doQuery(c, args);

            Iterator i = results.getHitHandles().iterator();
            Iterator j = results.getHitTypes().iterator();

            while (i.hasNext())
            {
                String thisHandle = (String) i.next();
                Integer thisType = (Integer) j.next();
                String type = Constants.typeText[thisType.intValue()];

                // also look up type
                System.out.println(type + "\t" + thisHandle);
            }
        }
        catch (Exception e)
        {
            System.out.println("Exception caught: " + e);
        }
    }

    /**
     * Close any IndexSearcher that is currently open.
     */
    public static void close()
    {
        if (searcher != null)
        {
            try
            {
                searcher.close();
                searcher = null;
            }
            catch (IOException ioe)
            {
                log.error("DSQuery: Unable to close open IndexSearcher", ioe);
            }
        }
    }
   
    public static void main(String[] args)
    {
        if (args.length > 0)
        {
            DSQuery.doCMDLineQuery(args[0]);
        }
    }

    /*---------  protected methods ----------*/

    /** 
     * get an IndexReader.
     * @throws IOException
     */
    protected static IndexReader getIndexReader()
      throws IOException
    {
      return getSearcher(null).getIndexReader();
    }
   
    /**
     * get an IndexSearcher, hopefully a cached one (gives much better
     * performance.) checks to see if the index has been modified - if so, it
     * creates a new IndexSearcher
     */
    protected static synchronized IndexSearcher getSearcher(Context c)
            throws IOException
    {
      
        // If we have already opened a searcher, check to see if the index has been updated
        // If it has, we need to close the existing searcher - we will open a new one later
        if (searcher != null && lastModified != IndexReader.getCurrentVersion(indexDir))
        {
            try
            {
                // Close the cached IndexSearcher
                searcher.close();
            }
            catch (IOException ioe)
            {
                // Index is probably corrupt. Log the error, but continue to either:
                // 1) Return existing searcher (may yet throw exception, no worse than throwing here)
                log.warn("DSQuery: Unable to check for updated index", ioe);
            }
            finally
            {
              searcher = null;
            }
        }

        // There is no existing searcher - either this is the first execution,
        // or the index has been updated and we closed the old index.
        if (searcher == null)
        {
            // So, open a new searcher
            lastModified = IndexReader.getCurrentVersion(indexDir);
            searcher = new IndexSearcher(indexDir){
              /*
               * TODO: Has Lucene fixed this bug yet?
               * Lucene doesn't release read locks in
               * windows properly on finalize. Our hack
               * extend IndexSearcher to force close().
               */
                protected void finalize() throws Throwable {
                this.close();
                super.finalize();
              }
            };
        }

        return searcher;
    }
}

// it's now up to the display page to do the right thing displaying
// items & communities & collections
TOP

Related Classes of org.dspace.search.DSQuery

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.