Source Code of org.apache.solr.search.QueryParsing$StrParser

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */


package org.apache.solr.search;


import org.apache.lucene.document.Field;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.*;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.MapSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.core.SolrCore;
import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.function.FunctionQuery;


import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.regex.Pattern;


/**
 * Collection of static utilities useful for query parsing.
 *
 * @version $Id: QueryParsing.java 652124 2008-04-29 21:07:25Z yonik $
 */
public class QueryParsing {
  /** the SolrParam used to override the QueryParser "default operator" */
  public static final String OP = "q.op";
  public static final String V = "v";      // value of this parameter
  public static final String F = "f";      // field that a query or command pertains to
  public static final String TYPE = "type";// type of this query or command
  public static final String DEFTYPE = "defType"; // default type for any direct subqueries
  public static final String LOCALPARAM_START = "{!";
  public static final char LOCALPARAM_END = '}';


  /** 
   * Helper utility for parsing a query using the Lucene QueryParser syntax. 
   * @param qs query expression in standard Lucene syntax
   * @param schema used for default operator (overridden by params) and passed to the query parser for field format analysis information
   */
  public static Query parseQuery(String qs, IndexSchema schema) {
    return parseQuery(qs, null, schema);
  }


  /** 
   * Helper utility for parsing a query using the Lucene QueryParser syntax. 
   * @param qs query expression in standard Lucene syntax
   * @param defaultField default field used for unqualified search terms in the query expression
   * @param schema used for default operator (overridden by params) and passed to the query parser for field format analysis information
   */
  public static Query parseQuery(String qs, String defaultField, IndexSchema schema) {
    try {
      Query query = schema.getSolrQueryParser(defaultField).parse(qs);


      if (SolrCore.log.isLoggable(Level.FINEST)) {
        SolrCore.log.finest("After QueryParser:" + query);
      }


      return query;


    } catch (ParseException e) {
      SolrCore.log(e);
      throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"Error parsing Lucene query",e);
    }
  }


  /**
   * Helper utility for parsing a query using the Lucene QueryParser syntax. 
   * @param qs query expression in standard Lucene syntax
   * @param defaultField default field used for unqualified search terms in the query expression
   * @param params used to determine the default operator, overriding the schema specified operator
   * @param schema used for default operator (overridden by params) and passed to the query parser for field format analysis information
   */
  public static Query parseQuery(String qs, String defaultField, SolrParams params, IndexSchema schema) {
    try {
      SolrQueryParser parser = schema.getSolrQueryParser(defaultField);
      String opParam = params.get(OP);
      if (opParam != null) {
        parser.setDefaultOperator("AND".equals(opParam) ? QueryParser.Operator.AND : QueryParser.Operator.OR);
      }
      Query query = parser.parse(qs);


      if (SolrCore.log.isLoggable(Level.FINEST)) {
        SolrCore.log.finest("After QueryParser:" + query);
      }


      return query;


    } catch (ParseException e) {
      SolrCore.log(e);
      throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"Query parsing error: " + e.getMessage(),e);
    }
  }




  // note to self: something needs to detect infinite recursion when parsing queries
  static int parseLocalParams(String txt, int start, Map<String,String> target, SolrParams params) throws ParseException {
    int off=start;
    if (!txt.startsWith(LOCALPARAM_START,off)) return start;
    StrParser p = new StrParser(txt,start,txt.length());
    p.pos+=2; // skip over "{!"


    for(;;) {
      /*
      if (p.pos>=txt.length()) {
        throw new ParseException("Missing '}' parsing local params '" + txt + '"');
      }
      */
      char ch = p.peek();
      if (ch==LOCALPARAM_END) {
        return p.pos+1;
      }


      String id = p.getId();
      if (id.length()==0) {
        throw new ParseException("Expected identifier '}' parsing local params '" + txt + '"');


      }
      String val=null;


      ch = p.peek();
      if (ch!='=') {
        // single word... treat {!func} as type=func for easy lookup
        val = id;
        id = TYPE;
      } else {
        // saw equals, so read value
        p.pos++;
        ch = p.peek();
        if (ch=='\"' || ch=='\'') {
          val = p.getQuotedString();
        } else if (ch=='$') {
          p.pos++;
          // dereference parameter
          String pname = p.getId();
          if (params!=null) {
            val = params.get(pname);
          }
        } else {
          // read unquoted literal ended by whitespace or '}'
          // there is no escaping.
          int valStart = p.pos;
          for (;;) {
            if (p.pos >= p.end) {
              throw new ParseException("Missing end to unquoted value starting at " + valStart + " str='" + txt +"'");
            }
            char c = p.val.charAt(p.pos);
            if (c==LOCALPARAM_END || Character.isWhitespace(c)) {
              val = p.val.substring(valStart, p.pos);
              break;
            }
            p.pos++;
          }
        }
      }
      if (target != null) target.put(id,val);
    }
  }


  /**
   *  "foo" returns null
   *  "{!prefix f=myfield}yes" returns type="prefix",f="myfield",v="yes"
   *  "{!prefix f=myfield v=$p}" returns type="prefix",f="myfield",v=params.get("p")
   */
  public static SolrParams getLocalParams(String txt, SolrParams params) throws ParseException {
    if (txt==null || !txt.startsWith(LOCALPARAM_START)) {
      return null;      
    }
    Map<String,String> localParams = new HashMap<String,String>();
    int start = QueryParsing.parseLocalParams(txt, 0, localParams, params);


    String val;
    if (start >= txt.length()) {
      // if the rest of the string is empty, check for "v" to provide the value
      val = localParams.get(V);
      val = val==null ? "" : val;
    } else {
      val = txt.substring(start);
    }
    localParams.put(V,val);
    return new MapSolrParams(localParams);
  }




  private static Pattern sortSep = Pattern.compile(",");


  /**
   * Returns null if the sortSpec is the standard sort desc.
   *
   * <p>
   * The form of the sort specification string currently parsed is:
   * </p>
   * <pre>>
   * SortSpec ::= SingleSort [, SingleSort]*
   * SingleSort ::= <fieldname> SortDirection
   * SortDirection ::= top | desc | bottom | asc
   * </pre>
   * Examples:
   * <pre>
   *   score desc               #normal sort by score (will return null)
   *   weight bottom            #sort by weight ascending 
   *   weight desc              #sort by weight descending
   *   height desc,weight desc  #sort by height descending, and use weight descending to break any ties
   *   height desc,weight asc   #sort by height descending, using weight ascending as a tiebreaker
   * </pre>
   *
   */
  public static Sort parseSort(String sortSpec, IndexSchema schema) {
    if (sortSpec==null || sortSpec.length()==0) return null;


    String[] parts = sortSep.split(sortSpec.trim());
    if (parts.length == 0) return null;


    SortField[] lst = new SortField[parts.length];
    for( int i=0; i<parts.length; i++ ) {
      String part = parts[i].trim();
      boolean top=true;
        
      int idx = part.indexOf( ' ' );
      if( idx > 0 ) {
        String order = part.substring( idx+1 ).trim();
        if( "desc".equals( order ) || "top".equals(order) ) {
          top = true;
        }
        else if ("asc".equals(order) || "bottom".equals(order)) {
          top = false;
        }
        else {
          throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Unknown sort order: "+order);
        }
        part = part.substring( 0, idx ).trim();
      }
      else {
        throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Missing sort order." );
      }
      
      if( "score".equals(part) ) {
        if (top) {
          // If there is only one thing in the list, just do the regular thing...
          if( parts.length == 1 ) {
            return null; // do normal scoring...
          }
          lst[i] = SortField.FIELD_SCORE;
        }
        else {
          lst[i] = new SortField(null, SortField.SCORE, true);
        }
      } 
      else {
        // getField could throw an exception if the name isn't found
        SchemaField f = null;
        try{
          f = schema.getField(part);
        }
        catch( SolrException e ){
          throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "can not sort on undefined field: "+part, e );
        }
        if (f == null || !f.indexed()){
          throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "can not sort on unindexed field: "+part );
        }
        lst[i] = f.getType().getSortField(f,top);
      }
    }
    return new Sort(lst);
  }




  ///////////////////////////
  ///////////////////////////
  ///////////////////////////


  static FieldType writeFieldName(String name, IndexSchema schema, Appendable out, int flags) throws IOException {
    FieldType ft = null;
    ft = schema.getFieldTypeNoEx(name);
    out.append(name);
    if (ft==null) {
      out.append("(UNKNOWN FIELD "+name+')');
    }
    out.append(':');
    return ft;
  }


  static void writeFieldVal(String val, FieldType ft, Appendable out, int flags) throws IOException {
    if (ft!=null) {
      out.append(ft.toExternal(new Field("",val, Field.Store.YES, Field.Index.UN_TOKENIZED)));
    } else {
      out.append(val);
    }
  }
  /** @see #toString(Query,IndexSchema) */
  public static void toString(Query query, IndexSchema schema, Appendable out, int flags) throws IOException {
    boolean writeBoost=true;


    if (query instanceof TermQuery) {
      TermQuery q = (TermQuery)query;
      Term t = q.getTerm();
      FieldType ft = writeFieldName(t.field(), schema, out, flags);
      writeFieldVal(t.text(), ft, out, flags);
    } else if (query instanceof RangeQuery) {
      RangeQuery q = (RangeQuery)query;
      String fname = q.getField();
      FieldType ft = writeFieldName(fname, schema, out, flags);
      out.append( q.isInclusive() ? '[' : '{' );
      Term lt = q.getLowerTerm();
      Term ut = q.getUpperTerm();
      if (lt==null) {
        out.append('*');
      } else {
        writeFieldVal(lt.text(), ft, out, flags);
      }


      out.append(" TO ");


      if (ut==null) {
        out.append('*');
      } else {
        writeFieldVal(ut.text(), ft, out, flags);
      }


      out.append( q.isInclusive() ? ']' : '}' );


    } else if (query instanceof ConstantScoreRangeQuery) {
      ConstantScoreRangeQuery q = (ConstantScoreRangeQuery)query;
      String fname = q.getField();
      FieldType ft = writeFieldName(fname, schema, out, flags);
      out.append( q.includesLower() ? '[' : '{' );
      String lt = q.getLowerVal();
      String ut = q.getUpperVal();
      if (lt==null) {
        out.append('*');
      } else {
        writeFieldVal(lt, ft, out, flags);
      }


      out.append(" TO ");


      if (ut==null) {
        out.append('*');
      } else {
        writeFieldVal(ut, ft, out, flags);
      }


      out.append( q.includesUpper() ? ']' : '}' );
    } else if (query instanceof BooleanQuery) {
      BooleanQuery q = (BooleanQuery)query;
      boolean needParens=false;


      if (q.getBoost() != 1.0 || q.getMinimumNumberShouldMatch() != 0) {
        needParens=true;
      }
      if (needParens) {
        out.append('(');
      }
      boolean first=true;
      for (BooleanClause c : (List<BooleanClause>)q.clauses()) {
        if (!first) {
          out.append(' ');
        } else {
          first=false;
        }


        if (c.isProhibited()) {
          out.append('-');
        } else if (c.isRequired()) {
          out.append('+');
        }
        Query subQuery = c.getQuery();
        boolean wrapQuery=false;


        // TODO: may need to put parens around other types
        // of queries too, depending on future syntax.
        if (subQuery instanceof BooleanQuery) {
          wrapQuery=true;
        }


        if (wrapQuery) {
          out.append('(');
        }


        toString(subQuery, schema, out, flags);


        if (wrapQuery) {
          out.append(')');
        }
      }


      if (needParens) {
        out.append(')');
      }
      if (q.getMinimumNumberShouldMatch()>0) {
        out.append('~');
        out.append(Integer.toString(q.getMinimumNumberShouldMatch()));
      }


    } else if (query instanceof PrefixQuery) {
      PrefixQuery q = (PrefixQuery)query;
      Term prefix = q.getPrefix();
      FieldType ft = writeFieldName(prefix.field(), schema, out, flags);
      out.append(prefix.text());
      out.append('*');
    } else if (query instanceof ConstantScorePrefixQuery) {
      ConstantScorePrefixQuery q = (ConstantScorePrefixQuery)query;
      Term prefix = q.getPrefix();
      FieldType ft = writeFieldName(prefix.field(), schema, out, flags);
      out.append(prefix.text());
      out.append('*');
    } else if (query instanceof WildcardQuery) {
      out.append(query.toString());
      writeBoost=false;
    } else if (query instanceof FuzzyQuery) {
      out.append(query.toString());
      writeBoost=false;      
    } else if (query instanceof ConstantScoreQuery) {
      out.append(query.toString());
      writeBoost=false;
    } else {
      out.append(query.getClass().getSimpleName()
              + '(' + query.toString() + ')' );
      writeBoost=false;
    }


    if (writeBoost && query.getBoost() != 1.0f) {
      out.append("^");
      out.append(Float.toString(query.getBoost()));
    }


  }
  
  /**
   * Formats a Query for debugging, using the IndexSchema to make 
   * complex field types readable.
   *
   * <p>
   * The benefit of using this method instead of calling 
   * <code>Query.toString</code> directly is that it knows about the data
   * types of each field, so any field which is encoded in a particularly 
   * complex way is still readable. The downside is that it only knows 
   * about built in Query types, and will not be able to format custom 
   * Query classes.
   * </p>
   */
  public static String toString(Query query, IndexSchema schema) {
    try {
      StringBuilder sb = new StringBuilder();
      toString(query, schema, sb, 0);
      return sb.toString();
    } catch (Exception e) {
      throw new RuntimeException(e);
    }
  }








  // simple class to help with parsing a string
  static class StrParser {
    String val;
    int pos;
    int end;


    StrParser(String val) {
      this(val,0,val.length());
    }


    StrParser(String val, int start, int end) {
      this.val = val;
      this.pos = start;
      this.end = end;
    }


    void eatws() {
      while (pos<end && Character.isWhitespace(val.charAt(pos))) pos++;
    }


    void skip(int nChars) {
      pos = Math.max(pos+nChars, end);
    }


    boolean opt(String s) {
      eatws();
      int slen=s.length();
      if (val.regionMatches(pos, s, 0, slen)) {
        pos+=slen;
        return true;
      }
      return false;
    }


    boolean opt(char ch) {
      eatws();
      if (val.charAt(pos) == ch) {
        pos++;
        return true;
      }
      return false;
    }




    void expect(String s) throws ParseException {
      eatws();
      int slen=s.length();
      if (val.regionMatches(pos, s, 0, slen)) {
        pos+=slen;
      } else {
        throw new ParseException("Expected '"+s+"' at position " + pos + " in '"+val+"'");
      }
    }


    float getFloat() throws ParseException {
      eatws();
      char[] arr = new char[end-pos];
      int i;
      for (i=0; i<arr.length; i++) {
        char ch = val.charAt(pos);
        if ( (ch>='0' && ch<='9')
             || ch=='+' || ch=='-'
             || ch=='.' || ch=='e' || ch=='E'
        ) {
          pos++;
          arr[i]=ch;
        } else {
          break;
        }
      }


      return Float.parseFloat(new String(arr,0,i));
    }


    String getId() throws ParseException {
      eatws();
      int id_start=pos;
      if (pos<end && Character.isJavaIdentifierStart(val.charAt(pos))) {
        pos++;
        while (pos<end) {
          char ch = val.charAt(pos);
          if (!Character.isJavaIdentifierPart(ch) && ch!='.') {
            break;
          }
          pos++;
        }
        return val.substring(id_start, pos);
      }
      throw new ParseException("Expected identifier at pos " + pos + " str='" + val + "'");
    }


    // return null if not a string
    String getQuotedString() throws ParseException {
      eatws();
      char delim = peekChar();
      if (!(delim=='\"' || delim=='\'')) {
        return null;
      }
      int val_start = ++pos;
      StringBuilder sb = new StringBuilder(); // needed for escaping
      for(;;) {
        if (pos>=end) {
          throw new ParseException("Missing end quote for string at pos " + (val_start-1) + " str='"+val+"'");
        }
        char ch = val.charAt(pos);
        if (ch=='\\') {
          ch = pos<end ? val.charAt(pos++) : 0;
        } else if (ch==delim) {
          pos++;
          return sb.toString();
        }
        sb.append(ch);
        pos++;
      }
    }


    // next non-whitespace char
    char peek() {
      eatws();
      return pos<end ? val.charAt(pos) : 0;
    }


    // next char
    char peekChar() {
      return pos<end ? val.charAt(pos) : 0;  
    }


    public String toString() {
      return "'" + val + "'" + ", pos=" + pos;
    }


  }


  /**
   * Builds a list of String which are stringified versions of a list of Queries
   */
  public static List<String> toString(List<Query> queries, IndexSchema schema) {
    List<String> out = new ArrayList<String>(queries.size());
    for (Query q : queries) {
      out.add(QueryParsing.toString(q, schema));
    }
    return out;
  }


  /** 
   * Parse a function, returning a FunctionQuery
   *
   * <p>
   * Syntax Examples....
   * </p>
   *
   * <pre>
   * // Numeric fields default to correct type
   * // (ie: IntFieldSource or FloatFieldSource)
   * // Others use explicit ord(...) to generate numeric field value
   * myfield
   *
   * // OrdFieldSource
   * ord(myfield)
   *
   * // ReverseOrdFieldSource
   * rord(myfield)
   *
   * // LinearFloatFunction on numeric field value
   * linear(myfield,1,2)
   *
   * // MaxFloatFunction of LinearFloatFunction on numeric field value or constant
   * max(linear(myfield,1,2),100)
   *
   * // ReciprocalFloatFunction on numeric field value
   * recip(myfield,1,2,3)
   *
   * // ReciprocalFloatFunction on ReverseOrdFieldSource
   * recip(rord(myfield),1,2,3)
   *
   * // ReciprocalFloatFunction on LinearFloatFunction on ReverseOrdFieldSource
   * recip(linear(rord(myfield),1,2),3,4,5)
   * </pre>
   */
  public static FunctionQuery parseFunction(String func, IndexSchema schema) throws ParseException {
    SolrCore core = SolrCore.getSolrCore();
    return (FunctionQuery)(QParser.getParser(func,"func",new LocalSolrQueryRequest(core,new HashMap())).parse());
    // return new FunctionQuery(parseValSource(new StrParser(func), schema));
  }


}
Source Code of org.apache.solr.search.QueryParsing$StrParser

Related Classes of org.apache.solr.search.QueryParsing$StrParser