Package org.apache.nutch.searcher

Examples of org.apache.nutch.searcher.Query


   * Limit number of query terms and extra query terms
   * @param input
   * @param output
   */
  public Query limitTerms(Query input) {
    Query output=new Query(input.getConf());
    Clause[] clauses = input.getClauses();
    int termsCounter=0;
    int termsExtraCounter=0;
       
    for (int i=0; i<clauses.length; i++) {
      Clause c = clauses[i];

        if (c.getField().equals(Clause.DEFAULT_FIELD) && !c.isProhibited() && termsCounter>=maxQueryTerms) { // is it is a term and reached the limiti
          continue;
        }     
        if ((!c.getField().equals(Clause.DEFAULT_FIELD) || c.isProhibited()) && termsExtraCounter>=maxQueryExtraTerms) // it is an exstra term or a not
          continue;                                

        if (c.isPhrase()) {                        
          Term[] terms = c.getPhrase().getTerms();
         
          int newLength=terms.length;
          if (c.getField().equals(Clause.DEFAULT_FIELD) && !c.isProhibited()) {
            if (terms.length+termsCounter>maxQueryTerms) {
                newLength=maxQueryTerms-termsCounter;
                termsCounter+=newLength;
            }
            else {
              termsCounter+=terms.length;
            }
          }
          else {
            if (terms.length+termsExtraCounter>maxQueryExtraTerms) {
                newLength=maxQueryExtraTerms-termsExtraCounter;
                termsExtraCounter+=newLength;
            }
            else {
              termsExtraCounter+=terms.length;
            }
          }
                   
            if (newLength!=terms.length) {             
              if (newLength==1) {
                output.addClause(new Clause(terms[0], c.isRequired(), c.isProhibited(), c.getConf()));
                }
              else {
                Term[] newTerms=new Term[newLength];
                System.arraycopy(terms, 0, newTerms, 0, newLength);
                    output.addClause(new Clause(new Phrase(newTerms), c.isRequired(), c.isProhibited(), c.getConf()));
                }
            }
            else {
              output.addClause(c);               
            }            
        }
        else {
          output.addClause(c);       
          if (c.getField().equals(Clause.DEFAULT_FIELD) && !c.isProhibited()) {
            termsCounter++; 
          }
          else {
            termsExtraCounter++;
View Full Code Here


      System.exit(-1);
    }

    Configuration conf = NutchConfiguration.create();
    NutchBean bean = new NutchBean(conf);
    Query query = Query.parse(args[0], conf);
    Hits hits = bean.search(query, 10);
    System.out.println("Total hits: " + hits.getTotal());
    int length = (int)Math.min(hits.getTotal(), 10);
    Hit[] show = hits.getHits(0, length);
    HitDetails[] details = bean.getDetails(show);
View Full Code Here

      if (lastRequest) { // BUG 200608
        break;
      }
       
        // optimize query by prohibiting more matches on some excluded values
        Query optQuery = (Query)query.clone();
        for (int i = 0; i < excludedValues.size(); i++) {
          if (i == MAX_PROHIBITED_TERMS)
            break;
          optQuery.addProhibitedTerm(((String)excludedValues.get(i)),dedupField);
        }
        numHitsRaw = (int)(numHitsRaw * rawHitsFactor);
        //if (LOG.isInfoEnabled()) {
          LOG.debug("re-searching for "+numHitsRaw+" raw hits, query: "+optQuery);
        //}
View Full Code Here

   * Limit number of query terms and extra query terms
   * @param input
   * @param output
   */
  public Query limitTerms(Query input) {
    Query output=new Query(input.getConf());
    Clause[] clauses = input.getClauses();
    int termsCounter=0;
    int termsExtraCounter=0;
       
    for (int i=0; i<clauses.length; i++) {
      Clause c = clauses[i];

        if (c.getField().equals(Clause.DEFAULT_FIELD) && !c.isProhibited() && termsCounter>=maxQueryTerms) { // is it is a term and reached the limiti
          continue;
        }     
        if ((!c.getField().equals(Clause.DEFAULT_FIELD) || c.isProhibited()) && termsExtraCounter>=maxQueryExtraTerms) // it is an exstra term or a not
          continue;                                

        if (c.isPhrase()) {                        
          Term[] terms = c.getPhrase().getTerms();
         
          int newLength=terms.length;
          if (c.getField().equals(Clause.DEFAULT_FIELD) && !c.isProhibited()) {
            if (terms.length+termsCounter>maxQueryTerms) {
                newLength=maxQueryTerms-termsCounter;
                termsCounter+=newLength;
            }
            else {
              termsCounter+=terms.length;
            }
          }
          else {
            if (terms.length+termsExtraCounter>maxQueryExtraTerms) {
                newLength=maxQueryExtraTerms-termsExtraCounter;
                termsExtraCounter+=newLength;
            }
            else {
              termsExtraCounter+=terms.length;
            }
          }
                   
            if (newLength!=terms.length) {             
              if (newLength==1) {
                output.addClause(new Clause(terms[0], c.isRequired(), c.isProhibited(), c.getConf()));
                }
              else {
                Term[] newTerms=new Term[newLength];
                System.arraycopy(terms, 0, newTerms, 0, newLength);
                    output.addClause(new Clause(new Phrase(newTerms), c.isRequired(), c.isProhibited(), c.getConf()));
                }
            }
            else {
              output.addClause(c);               
            }            
        }
        else {
          output.addClause(c);       
          if (c.getField().equals(Clause.DEFAULT_FIELD) && !c.isProhibited()) {
            termsCounter++; 
          }
          else {
            termsExtraCounter++;
View Full Code Here

      System.exit(-1);
    }

    Configuration conf = NutchConfiguration.create();
    NutchBean bean = new NutchBean(conf);
    Query query = Query.parse(args[0], conf);
    Hits hits = bean.search(query, 10);
    System.out.println("Total hits: " + hits.getTotal());
    int length = (int)Math.min(hits.getTotal(), 10);
    Hit[] show = hits.getHits(0, length);
    HitDetails[] details = bean.getDetails(show);
View Full Code Here

    }
  }

/** Parse a query. */
  final public Query parse(Configuration conf) throws ParseException {
  Query query = new Query(conf);
  ArrayList terms;
  Token token;
  String field;
  boolean stop;
  boolean prohibited;
    nonOpOrTerm();
    label_1:
    while (true) {
      switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
      case WORD:
      case ACRONYM:
      case SIGRAM:
      case PLUS:
      case MINUS:
      case QUOTE:
        ;
        break;
      default:
        jj_la1[0] = jj_gen;
        break label_1;
      }
      stop=true; prohibited=false; field = Clause.DEFAULT_FIELD;
      switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
      case PLUS:
      case MINUS:
        switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
        case PLUS:
          jj_consume_token(PLUS);
              stop=false;
          break;
        case MINUS:
          jj_consume_token(MINUS);
                                        stop=false;prohibited=true;
          break;
        default:
          jj_la1[1] = jj_gen;
          jj_consume_token(-1);
          throw new ParseException();
        }
        break;
      default:
        jj_la1[2] = jj_gen;
        ;
      }
      if (jj_2_1(2147483647)) {
        token = jj_consume_token(WORD);
        jj_consume_token(COLON);
                             field = token.image;
      } else {
        ;
      }
      switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
      case QUOTE:
        terms = phrase(field);
                           stop=false;
        break;
      case WORD:
      case ACRONYM:
      case SIGRAM:
        // quoted terms or
              terms = compound(field);
        break;
      default:
        jj_la1[3] = jj_gen;
        jj_consume_token(-1);
        throw new ParseException();
      }
      nonOpOrTerm();
      String[] array = (String[])terms.toArray(new String[terms.size()]);

      if (stop
          && field == Clause.DEFAULT_FIELD
          && terms.size()==1
          && isStopWord(array[0])) {
        // ignore stop words only when single, unadorned terms in default field
      } else {
        if (prohibited)
          query.addProhibitedPhrase(array, field);
        else
          query.addRequiredPhrase(array, field);
      }
    }
    {if (true) return query;}
    throw new Error("Missing return statement in function");
  }
View Full Code Here

        + hitsPerPage
        + (sort == null ? "" : "&sort=" + sort
            + (reverse ? "&reverse=true" : "")
            + (dedupField == null ? "" : "&dedupField=" + dedupField));

    Query query = Query.parse(queryString, getServiceLocator()
        .getConfiguration());
    NutchBean.LOG.info("query: " + queryString);

    // execute the query
    Hits hits;
View Full Code Here

    NutchBean bean = locator.getNutchBean();

    Hit hit = new Hit(Integer.parseInt(request.getParameter("idx")), Integer
        .parseInt(request.getParameter("id")));
    HitDetails details = bean.getDetails(hit);
    Query query = Query.parse(request.getParameter("query"), locator
        .getConfiguration());

    // put explanation and hitDetails into request so view can access them
    request.setAttribute("explanation", bean.getExplanation(query, hit));
    request.setAttribute("hitDetails", details.toHtml());
View Full Code Here

    Configuration conf = NutchwaxConfiguration.getConfiguration();
   
    NutchBean bean = new NutchwaxBean(conf);
   
    Query query = Query.parse(args[0], conf);
   
    Hits hits = bean.search(query, 10);
    System.out.println("Total hits: " + hits.getTotal());
   
    int length = (int)Math.min(hits.getTotal(), 10);
View Full Code Here

public class TestQueryParser extends TestCase {

  private static Configuration conf = NutchConfiguration.create();
  public void assertQueryEquals(String query, String result) throws Exception {
    try {
      Query q = NutchAnalysis.parseQuery(query, conf);
      String s = q.toString();
      if (!s.equals(result)) {
        fail("Query /" + query + "/ yielded /" + s + "/, expecting /" + result
            + "/");
      }
    } catch (Exception e) {
View Full Code Here

TOP

Related Classes of org.apache.nutch.searcher.Query

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.