Package org.apache.nutch.searcher

Examples of org.apache.nutch.searcher.Query$Clause


    BufferedReader in = new BufferedReader(new InputStreamReader(System.in));

    String line;

    while ((line = in.readLine()) != null) {
      Query query = Query.parse(line, conf);
      SpellCheckerTerms terms = checker.checkSpelling(query, line);
      StringBuffer buf = new StringBuffer();

      for (int i = 0; i < terms.size(); i++) {
        SpellCheckerTerm currentTerm = terms.getSpellCheckerTerm(i);
View Full Code Here


* "subcollection".
*/
public class SubcollectionPreSearchExtension implements PreSearchExtensionPoint {

  public void doPreSearch(ServiceLocator locator) {
    Query original=locator.getSearch().getQuery();
    Query modified=(Query)original.clone();
    try{
      String value=locator.getSearchForm().getValueString(SubcollectionIndexingFilter.FIELD_NAME);
      if(value!=null && value.trim().length()>0) {
        modified.addRequiredTerm(value, SubcollectionIndexingFilter.FIELD_NAME);
      }
    } catch (Exception e){
     
    }
    locator.getSearch().setQuery(modified);
View Full Code Here

    assertFalse(m1.equals(m3));
  }
 
  private KeyMatch[] getKeyMatchesForString(String string) {
   
    Query q;
    HashMap context=new HashMap();
    context.put(CountFilter.KEY_COUNT,"1");
    try {
      q = Query.parse(string, conf);
      return km.getMatches(q,context);
View Full Code Here

    assertTrue(keymatch.equals(matched[0]));
  }

  private KeyMatch[] getKeyMatchesForString(String string) {
   
    Query q;
    HashMap context=new HashMap();
    context.put(CountFilter.KEY_COUNT,"1");
    try {
      q = Query.parse(string, conf);
      return km.getMatches(q,context);
View Full Code Here

    } finally {
      in.close();
    }
   
    // Convert the query string into a proper Query
    Query query = Query.parse(queryBuf.toString(), conf);
    System.out.println("Summary: '" + s.getSummary(body.toString(), query) + "'");
  }
View Full Code Here

    int numDupes = RequestUtils.getIntegerParameter(request, NUM_DUPES,
      defaultNumDupes);
    String[] fields = request.getParameterValues(FIELDS);

    // parse out the query
    Query queryObj = Query.parse(query, lang, this.conf);
    if (NutchBean.LOG.isInfoEnabled()) {
      NutchBean.LOG.info("query: " + query);
      NutchBean.LOG.info("lang: " + lang);
    }
View Full Code Here

    }
  }

/** Parse a query. */
  final public Query parse(Configuration conf) throws ParseException {
  Query query = new Query(conf);
  ArrayList terms;
  Token token;
  String field;
  boolean stop;
  boolean prohibited;
    nonOpOrTerm();
    label_1:
    while (true) {
      switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
      case WORD:
      case ACRONYM:
      case SIGRAM:
      case PLUS:
      case MINUS:
      case QUOTE:
        ;
        break;
      default:
        jj_la1[0] = jj_gen;
        break label_1;
      }
      stop=true; prohibited=false; field = Clause.DEFAULT_FIELD;
      switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
      case PLUS:
      case MINUS:
        switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
        case PLUS:
          jj_consume_token(PLUS);
              stop=false;
          break;
        case MINUS:
          jj_consume_token(MINUS);
                                        stop=false;prohibited=true;
          break;
        default:
          jj_la1[1] = jj_gen;
          jj_consume_token(-1);
          throw new ParseException();
        }
        break;
      default:
        jj_la1[2] = jj_gen;
        ;
      }
      if (jj_2_1(2147483647)) {
        token = jj_consume_token(WORD);
        jj_consume_token(COLON);
                             field = token.image;
      } else {
        ;
      }
      switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
      case QUOTE:
        terms = phrase(field);
                           stop=false;
        break;
      case WORD:
      case ACRONYM:
      case SIGRAM:
        // quoted terms or
              terms = compound(field);
        break;
      default:
        jj_la1[3] = jj_gen;
        jj_consume_token(-1);
        throw new ParseException();
      }
      nonOpOrTerm();
      String[] array = (String[])terms.toArray(new String[terms.size()]);

      if (stop
          && field == Clause.DEFAULT_FIELD
          && terms.size()==1
          && isStopWord(array[0])) {
        // ignore stop words only when single, unadorned terms in default field
      } else {
        if (prohibited)
          query.addProhibitedPhrase(array, field);
        else
          query.addRequiredPhrase(array, field);
      }
    }
    {if (true) return query;}
    throw new Error("Missing return statement in function");
  }
View Full Code Here

    public void run() {

      numTotal.incrementAndGet();

      try {
        Query runner = Query.parse(query, conf);
        long start = System.currentTimeMillis();
        Hits hits = bean.search(runner, 10);
        long end = System.currentTimeMillis();
        numResolved.incrementAndGet();
        long total = (end - start);
View Full Code Here

        try {
          Configuration conf = NutchConfiguration.create();
          conf.set("arquivo.include.types","html|xhtml+xml|xml|pdf|postscript|text|msword|vnd.ms-powerpoint|rtf|richtext"); // at nutch-site.xml
       
          TRECFeaturesExtractor extractorBean=new TRECFeaturesExtractor(conf,new Path(args[1]));
          Query queryInput = null;         
          BooleanQuery queryOutput = null;
          TermQuery queryDocno = null;
          boolean debug=Boolean.parseBoolean(args[3]);
         
          /*
 
View Full Code Here

      if (lastRequest) { // BUG 200608
        break;
      }
       
        // optimize query by prohibiting more matches on some excluded values
        Query optQuery = (Query)query.clone();
        for (int i = 0; i < excludedValues.size(); i++) {
          if (i == MAX_PROHIBITED_TERMS)
            break;
          optQuery.addProhibitedTerm(((String)excludedValues.get(i)),dedupField);
        }
        numHitsRaw = (int)(numHitsRaw * rawHitsFactor);
        //if (LOG.isInfoEnabled()) {
          LOG.debug("re-searching for "+numHitsRaw+" raw hits, query: "+optQuery);
        //}
View Full Code Here

TOP

Related Classes of org.apache.nutch.searcher.Query$Clause

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.