Package org.apache.lucene.search

Examples of org.apache.lucene.search.BooleanQuery$TooManyClauses


      DebugFile.incIdent();
    }

    NewsMessageRecord[] aRetArr;
 
  BooleanQuery oQrx = new BooleanQuery();

  oQrx.add(new TermQuery(new Term("workarea",sWorkArea)),BooleanClause.Occur.MUST);

    if (null!=sNewsGroupCategoryName)
    oQrx.add(new TermQuery(new Term("container",sNewsGroupCategoryName)),BooleanClause.Occur.MUST);   

    if (dtFromDate!=null && dtToDate!=null)
    oQrx.add(new RangeQuery(new Term("created",DateTools.dateToString(dtFromDate, DateTools.Resolution.DAY)),
                  new Term("created",DateTools.dateToString(dtToDate, DateTools.Resolution.DAY)), true), BooleanClause.Occur.MUST);   
    else if (dtFromDate!=null)
    oQrx.add(new RangeQuery(new Term("created",DateTools.dateToString(dtFromDate, DateTools.Resolution.DAY)),
                  new Term("created",DateTools.dateToString(new Date(299,11,31), DateTools.Resolution.DAY)), true), BooleanClause.Occur.MUST);   
    else if (dtToDate!=null)
    oQrx.add(new RangeQuery(new Term("created",DateTools.dateToString(new Date(79,11,31), DateTools.Resolution.DAY)),
                  new Term("created",DateTools.dateToString(dtToDate, DateTools.Resolution.DAY)), true), BooleanClause.Occur.MUST);   

  BooleanQuery oQry = new BooleanQuery();

  if (null!=sAuthor)
    oQry.add(new TermQuery(new Term("author",sAuthor)),BooleanClause.Occur.SHOULD);

  if (null!=sTitle)
    oQry.add(new TermQuery(new Term("title",sTitle)),BooleanClause.Occur.SHOULD);

  if (null!=sText)
    oQry.add(new TermQuery(new Term("text",sText)),BooleanClause.Occur.SHOULD);

  oQrx.add(oQry, BooleanClause.Occur.MUST);
 
  String sSegments = Gadgets.chomp(sLuceneIndexPath,File.separator)+"k_newsmsgs"+File.separator+sWorkArea; 
    if (DebugFile.trace) DebugFile.writeln("new IndexSearcher("+sSegments+")");
View Full Code Here


 
  public void testSimpleSearch() throws Exception
  {
    IndexReader reader = searcher.getIndexReader();
   
    BooleanQuery bquery;
    SectionSearchQuery squery;
    Scorer scorer;
    int count;
   
    // 1. (+f1:aa +f2:aaa)
    bquery = new BooleanQuery();
    bquery.add(new TermQuery(new Term("f1","aa")), BooleanClause.Occur.MUST);
    bquery.add(new TermQuery(new Term("f2","aaa")), BooleanClause.Occur.MUST);

    scorer = bquery.weight(searcher).scorer(reader, true, true);
    count = 0;
    while(scorer.nextDoc() != Scorer.NO_MORE_DOCS) count++;
    assertEquals("non-section count mismatch", 4, count);
   
    squery = new SectionSearchQuery(bquery);
    scorer = squery.weight(searcher).scorer(reader, true, true);
    count = 0;
    while(scorer.nextDoc() != Scorer.NO_MORE_DOCS) count++;
    assertEquals("seciton count mismatch", 2, count);
   
    // 2. (+f1:bb + f2:aaa)
    bquery = new BooleanQuery();
    bquery.add(new TermQuery(new Term("f1","bb")), BooleanClause.Occur.MUST);
    bquery.add(new TermQuery(new Term("f2","aaa")), BooleanClause.Occur.MUST);

    scorer = bquery.weight(searcher).scorer(reader, true, true);
    count = 0;
    while(scorer.nextDoc() != Scorer.NO_MORE_DOCS) count++;
    assertEquals("non-section count mismatch", 4, count);
   
    squery = new SectionSearchQuery(bquery);
    scorer = squery.weight(searcher).scorer(reader, true, true);
    count = 0;
    while(scorer.nextDoc() != Scorer.NO_MORE_DOCS) count++;
    assertEquals("seciton count mismatch", 3, count);
   
    // 3. (+f1:aa +f2:bbb)
    bquery = new BooleanQuery();
    bquery.add(new TermQuery(new Term("f1","aa")), BooleanClause.Occur.MUST);
    bquery.add(new TermQuery(new Term("f2","bbb")), BooleanClause.Occur.MUST);

    scorer = bquery.weight(searcher).scorer(reader, true, true);
    count = 0;
    while(scorer.nextDoc() != Scorer.NO_MORE_DOCS) count++;
    assertEquals("non-section count mismatch", 3, count);
   
    squery = new SectionSearchQuery(bquery);
    scorer = squery.weight(searcher).scorer(reader, true, true);
    count = 0;
    while(scorer.nextDoc() != Scorer.NO_MORE_DOCS) count++;
    assertEquals("seciton count mismatch", 2, count);
   
    // 4. (+f1:aa +(f2:bbb f2:ccc))
    BooleanQuery bquery2 = new BooleanQuery();
    bquery2.add(new TermQuery(new Term("f2","bbb")), BooleanClause.Occur.SHOULD);
    bquery2.add(new TermQuery(new Term("f2","ccc")), BooleanClause.Occur.SHOULD);
    bquery = new BooleanQuery();
    bquery.add(new TermQuery(new Term("f1","aa")), BooleanClause.Occur.MUST);
    bquery.add(bquery2, BooleanClause.Occur.MUST);

    scorer = bquery.weight(searcher).scorer(reader, true, true);
    count = 0;
View Full Code Here

 
  private void metaDataSearch(IndexSearcher searcher) throws Exception
  {
    IndexReader reader = searcher.getIndexReader();
   
    BooleanQuery bquery;
    SectionSearchQuery squery;
    Scorer scorer;
    int count;
   
    // 1.
    bquery = new BooleanQuery();
    bquery.add(new TermQuery(new Term("f1","aa")), BooleanClause.Occur.MUST);
    bquery.add(new IntMetaDataQuery(intMetaTerm, new IntMetaDataQuery.SimpleValueValidator(100)), BooleanClause.Occur.MUST);
    squery = new SectionSearchQuery(bquery);
    scorer = squery.weight(searcher).scorer(reader, true, true);
    count = 0;
    while(scorer.nextDoc() != Scorer.NO_MORE_DOCS) count++;
    assertEquals("section count mismatch", 1, count);
   
    // 2.
    bquery = new BooleanQuery();
    bquery.add(new TermQuery(new Term("f1","aa")), BooleanClause.Occur.MUST);
    bquery.add(new IntMetaDataQuery(intMetaTerm, new IntMetaDataQuery.SimpleValueValidator(200)), BooleanClause.Occur.MUST);
    squery = new SectionSearchQuery(bquery);
    scorer = squery.weight(searcher).scorer(reader, true, true);
    count = 0;
    while(scorer.nextDoc() != Scorer.NO_MORE_DOCS) count++;
    assertEquals("section count mismatch", 1, count);
   
    // 3.
    bquery = new BooleanQuery();
    bquery.add(new TermQuery(new Term("f1","bb")), BooleanClause.Occur.MUST);
    bquery.add(new IntMetaDataQuery(intMetaTerm, new IntMetaDataQuery.SimpleValueValidator(200)), BooleanClause.Occur.MUST);
    squery = new SectionSearchQuery(bquery);
    scorer = squery.weight(searcher).scorer(reader, true, true);
    count = 0;
    while(scorer.nextDoc() != Scorer.NO_MORE_DOCS) count++;
    assertEquals("section count mismatch", 2, count);
   
    // 4.
    bquery = new BooleanQuery();
    bquery.add(new TermQuery(new Term("f1","aa")), BooleanClause.Occur.MUST);
    bquery.add(new IntMetaDataQuery(intMetaTerm, new IntMetaDataQuery.SimpleValueValidator(300)), BooleanClause.Occur.MUST);
    squery = new SectionSearchQuery(bquery);
    scorer = squery.weight(searcher).scorer(reader, true, true);
    count = 0;
    while(scorer.nextDoc() != Scorer.NO_MORE_DOCS) count++;
    assertEquals("section count mismatch", 1, count);
   
    // 5.
    bquery = new BooleanQuery();
    bquery.add(new TermQuery(new Term("f1","bb")), BooleanClause.Occur.MUST);
    bquery.add(new IntMetaDataQuery(intMetaTerm, new IntMetaDataQuery.SimpleValueValidator(300)), BooleanClause.Occur.MUST);
    squery = new SectionSearchQuery(bquery);
    scorer = squery.weight(searcher).scorer(reader, true, true);
    count = 0;
    while(scorer.nextDoc() != Scorer.NO_MORE_DOCS) count++;
    assertEquals("section count mismatch", 3, count);
View Full Code Here

      BrowseSelection idSel=new BrowseSelection("idRange");
      int rangeIndex = 2;
      idSel.addNotValue(_idRanges[0]);
      int expectedHitNum = 1;
      br.addSelection(idSel);
      BooleanQuery q = new BooleanQuery();
      q.add(NumericRangeQuery.newIntRange("NUM", 10, 10, true, true), Occur.MUST_NOT);
      q.add(new MatchAllDocsQuery(), Occur.MUST);
      br.setQuery(q);
     
      result = boboBrowser.browse(br);
     
      assertEquals(expectedHitNum,result.getNumHits());
View Full Code Here

      mQuery = null;
      if (queryText != null && queryText.length() > 0) {
        // start the creation of the lucene query object

        try {
          mQuery = new BooleanQuery();

          for (int k = 0; k < indexConfigs.length; k++) {

            String[] searchFieldArr = indexConfigs[k].getSearchFieldList();
            for (int i = 0; i < searchFieldArr.length; i++) {

              QueryParser parser = new QueryParser(Version.LUCENE_30, searchFieldArr[i], mAnalyzer);
              parser.setDefaultOperator(QueryParser.AND_OPERATOR);
              parser.setAllowLeadingWildcard(true);
              Query fieldQuery = parser.parse(queryText);

              // Add as OR
              mQuery.add(fieldQuery, Occur.SHOULD);
            }
            //System.out.println("Query: '" + queryText + "' -> '" + mQuery.toString() + "'");

          }
        } catch (ParseException exc) {
          throw new RegainException("Error while parsing search pattern '" + mQueryText +
                  "': " + exc.getMessage(), exc);
        }

        // Check whether access control is used
        if (allGroups != null && allGroups.length > 0) {
          // Create a query that matches any group
          BooleanQuery groupQuery = new BooleanQuery();
          for (int i = 0; i < allGroups.length; i++) {
            // Add as OR
            groupQuery.add(new TermQuery(new Term("groups", allGroups[i])), Occur.SHOULD);
          }

          // Create a main query that contains the group query and the search query
          // combined with AND
          BooleanQuery mainQuery = new BooleanQuery();
          mainQuery.add(mQuery, Occur.MUST);
          mainQuery.add(groupQuery, Occur.MUST);

          // Set the main query as query to use
          mQuery = mainQuery;
        }
      }

      // Add the mimetype field search
      if (mimeTypeFieldText != null) {
        BooleanQuery mimetypeFieldQuery = new BooleanQuery();
        mimetypeFieldQuery.add(new TermQuery(new Term("mimetype",
                mimeTypeFieldText.substring(9).replace("\"", ""))),
                Occur.SHOULD);
        BooleanQuery mainQuery = new BooleanQuery();
        if (mQuery != null) {
          mainQuery.add(mQuery, Occur.MUST);
        }
        mainQuery.add(mimetypeFieldQuery, Occur.MUST);

        // Set the main query as query to use
        mQuery = mainQuery;
      }
View Full Code Here

      Analyzer analyzer = DocumentCreator.createDocumentAnalyzer();
      QueryParser parser = new QueryParser(DocumentCreator.FIELD_TEXT,analyzer);
     
      TermQuery matchUserQuery = new TermQuery(new Term(DocumentCreator.FIELD_USER,username));
      Query inputQuery = parser.parse(query);
      BooleanQuery boolQuery = new BooleanQuery();
      boolQuery.add(matchUserQuery, BooleanClause.Occur.MUST);
      boolQuery.add(inputQuery,BooleanClause.Occur.MUST);
     
      Hits hits = searcher.search(boolQuery);
      List<BookmarkDoc> bmDocs = new ArrayList<BookmarkDoc>();
      if (offset >= 0 && offset < hits.length()) {
        if (count > 0) {         
View Full Code Here

  //*-------------------------------------------------------------------
  StringBuffer queryString = new StringBuffer();
  NumberFormat nf = NumberFormat.getInstance();
  nf.setMaximumIntegerDigits(3); nf.setMaximumFractionDigits(4);
  float wt = WT_QTYPE;      //*--- Weight for question type entities
  BooleanQuery theQuery = new BooleanQuery();
  LOOP: for (int i = 0; i < tokenList.size(); i++)
  {
   //*-- first try two word query tokens and then single word tokens
   String etype = null;
   if (i > 0) etype = qhash.get( tokenList.get(i - 1).termText() + " " + tokenList.get(i).termText() );
   if ( (etype == null) || (etype.length() < 2)) etype = qhash.get( tokenList.get(i).termText() );
    
   if ( (etype != null) && (etype.length() > 2) )
    { String[] etypes = etype.split("OR");
      for (int j = 0; j < etypes.length; j++)
      { queryString.append("contents:" + etypes[j].trim() + "^" + nf.format(wt) + " ");
        TermQuery tq = new TermQuery( new Term("contents", etypes[j])); tq.setBoost(wt);
        theQuery.add(tq, BooleanClause.Occur.SHOULD);
        entities.add(etypes[j]);
      }
     break LOOP;
    }
   }
  
  //*-------------------------------------------
  //*-- 2. Find entities in the question words
  //*-------------------------------------------
  wt = WT_ENTITY;
  for (int i = 0; i < tokenList.size(); i++)
  { if ( tokenList.get(i).type().equals("ENTITY") )  
    { String qword = tokenList.get(i).termText();
      queryString.append("contents:" + qword + "^" + nf.format(wt) + " ");
      TermQuery tq = new TermQuery( new Term("contents", qword)); tq.setBoost(wt);
      theQuery.add(tq, BooleanClause.Occur.SHOULD);
    }
  }
 
  //*-------------------------------------------------------------------------------
  //*-- 3. Create a list of weighted trigrams/bigrams/unigrams from the query
  //*-------------------------------------------------------------------------------
  int numNouns = nouns.size(); int numVerbs = verbs.size(); int numAdjectives = adjectives.size();
  String[] queryWords = question.split("\\s+"); int wordsLength = queryWords.length;
  boolean[] contentWord = new boolean[wordsLength];
  for (int i = 0; i < wordsLength; i++)
   { queryWords[i] = queryWords[i].toLowerCase(Constants.locale);
     contentWord[i] = false;
     for (int j = 0; j < nouns.size(); j++) if (queryWords[i].equalsIgnoreCase(nouns.get(j))) contentWord[i] = true;
     for (int j = 0; j < verbs.size(); j++) if (queryWords[i].equalsIgnoreCase(verbs.get(j))) contentWord[i] = true;
     for (int j = 0; j < adjectives.size(); j++) if (queryWords[i].equalsIgnoreCase(adjectives.get(j))) contentWord[i] = true;
   }
 
  String joinChar; 
  //*-- generate all possible bigrams with higher weights for bigrams that do not have stopwords
  float WT_NORM_BIGRAM = WT_BIGRAM;
  for (int i = 1; i < 4; i++) if (wordsLength > (Math.pow(2, (i + 1)))) WT_NORM_BIGRAM /= 2;
  LOOP2: for (int i = 1; i < wordsLength; i++)
  { 
   //*-- skip if the previous word was a question word
   //*-- if the previous word was a stop word use a underscore to build the bigram, otherwise use a space
   wt = 0;
   if ( !questionWords.contains(queryWords[i-1]) )
   {
     if (stopWords.contains(queryWords[i-1]) && stopWords.contains(queryWords[i])) continue LOOP2;
     joinChar = (stopWords.contains(queryWords[i-1]) || stopWords.contains(queryWords[i])) ? "_": " ";
     for (int j = i-1; j < i+1; j++) wt += (contentWord[j]) ? WT_NORM_BIGRAM: 0;
     String bigram = queryWords[i-1] + joinChar + queryWords[i];
     queryString.append("contents:\"" + bigram + "\"~0^" + wt + " ");
     PhraseQuery pq = new PhraseQuery(); pq.add( new Term("contents", bigram)); pq.setBoost(wt); pq.setSlop(0);
     theQuery.add(pq, BooleanClause.Occur.SHOULD);
     bigrams.add(bigram);
   }
  } //*-- end of for
 
  //*-- create unigrams from non-stop words and weigh unigrams near the start of the question
  //*-- higher than unigrams near the end of the question
  LOOP3: for (int i = 0; i < wordsLength; i++)
  { wt = WT_UNIGRAM;
 
    //*-- skip punctuation and very short words
    if ( (queryWords[i].length() < 2|| (!contentWord[i]) ) continue LOOP3;
   
    wt *=  ( (numNouns > 0) && (nouns.get(0).equalsIgnoreCase(queryWords[i])) ) ? 8:
           ( (numNouns > 1) && (nouns.get(1).equalsIgnoreCase(queryWords[i])) ) ? 4: 1;
    wt *=  ( (numVerbs > 0) && (verbs.get(0).equalsIgnoreCase(queryWords[i])) ) ? 4:
           ( (numVerbs > 1) && (verbs.get(1).equalsIgnoreCase(queryWords[i])) ) ? 2: 1;
    wt *=  ( (numAdjectives > 0) && (adjectives.get(0).equalsIgnoreCase(queryWords[i])) ) ? 4:
           ( (numAdjectives > 1) && (adjectives.get(1).equalsIgnoreCase(queryWords[i])) ) ? 2: 1;
  
   queryString.append("contents:" + queryWords[i] + "^" + nf.format(wt) + " ");
   TermQuery tq = new TermQuery( new Term("contents", queryWords[i])); tq.setBoost(wt);
   theQuery.add(tq, BooleanClause.Occur.SHOULD);
  } //*-- end of for

  //*--------------------------------------------------------------------------
  //*-- 4. Add the query transformation for the part. query type and add the synonyms
  //*--------------------------------------------------------------------------
/*  wt = WT_SYNONYMS;
  for (int j = 0; j < synonyms.length; j++)
  { queryString.append("contents:" + synonyms[j] + "^" + nf.format(wt) + " ");
    TermQuery tq = new TermQuery( new Term("contents", synonyms[j])); tq.setBoost(wt);
    theQuery.add(tq, BooleanClause.Occur.SHOULD);
  }
  */
  wt = WT_TRANSFORM;
  Matcher matcher = whatPattern.matcher(question);
  if ( (matcher.matches()) && (nouns.size() > 0) )
  {  String qTransform = "\"" + nouns.get(0) + "_is" + "\"";
     queryString.append("contents:" + qTransform + "^" + nf.format(wt) + " ");
     TermQuery tq = new TermQuery( new Term("contents", qTransform)); tq.setBoost(wt);
     theQuery.add(tq, BooleanClause.Occur.SHOULD);
     qTransform = "\"" + nouns.get(0) + "_was" + "\"";
     queryString.append("contents:" + qTransform + "^" + nf.format(wt) + " ");
     tq = new TermQuery( new Term("contents", qTransform)); tq.setBoost(wt);
     theQuery.add(tq, BooleanClause.Occur.SHOULD);
  }
 
  matcher = wherePattern.matcher(question);
  if ( (matcher.matches()) && (nouns.size() > 0) )
  {  String qTransform = "is_located" + "\"";
     queryString.append("contents:" + qTransform + "^" + nf.format(wt) + " ");
     TermQuery tq = new TermQuery( new Term("contents", qTransform)); tq.setBoost(wt);
     theQuery.add(tq, BooleanClause.Occur.SHOULD);
     qTransform = "\"located_at\"";
     queryString.append("contents:" + qTransform + "^" + nf.format(wt) + " ");
     tq = new TermQuery( new Term("contents", qTransform)); tq.setBoost(wt);
     theQuery.add(tq, BooleanClause.Occur.SHOULD);
  }
 
//  String query = queryString.toString();
//System.out.println("query string " + query);
//System.out.println("gen q: " + theQuery);
View Full Code Here

  if (query == null) return null;
 
  //*-- extract a list of tokens with entities
  analyzer.setExtractEntities(true);
  Token[] tokens = tokensFromAnalysis(analyzer, query)
  BooleanQuery fullQuery = new BooleanQuery();
  LOOP: for (int i = 0; i < tokens.length; i++)
   {
     String word = tokens[i].termText().toLowerCase(Constants.locale);
    
     //*-- skip punctuation and very short words
     if ( (word.length() < 2) || (stopWords.contains(word)) ) continue LOOP;
  
     TermQuery tq = new TermQuery(new Term("contents", word));
     tq.setBoost((float) 10.0);     //*-- give an average boost to the phrases
     fullQuery.add(tq, BooleanClause.Occur.SHOULD);
   }
 
  return (fullQuery);
}
View Full Code Here

{
  //*-- open the database environment
  boolean readOnly = true; boolean dupFlag = false;
  dbt.openDB(Constants.EXT_FILES_DB, readOnly, dupFlag);
 
  BooleanQuery query = new BooleanQuery();
  TupleBinding tpb = new IndexableDoc().getBdbBinding();
  DatabaseEntry data = new DatabaseEntry();
  if ( dbt.fetch(key, data) )
   {
     //*-- extract the text of the document
     IndexableDoc idoc = (IndexableDoc) tpb.entryToObject(data);
     String docText = idoc.getContents().toString();
   
     //*-- tokenize the text
     analyzer.setExtractEntities(false);
     Token[] tokens = null;
     try { tokens = tokensFromAnalysis(analyzer, docText); }
     catch (IOException ie) { logger.error("IO Error: Could not tokenize" + ie.getMessage()); }
   
     //*-- build a query from the individual tokens taken from the document, limit the
     //*-- number of tokens from the document to 100.
     int numTokens = 0;
     LOOP: for (int i = 0; i < tokens.length; i++)
      { String token = tokens[i].termText().toLowerCase(Constants.locale);
        if ( (stopWords.contains(token)) || (token.length() < 3)) continue;
  TermQuery tq = new TermQuery(new Term("contents", token));
  query.add(tq, BooleanClause.Occur.SHOULD);
  if (++numTokens >= 100) break LOOP;
      //*-- end of for
   } //*-- end of if
 
  dbt.closeDB();
View Full Code Here

        return new WildcardQuery(term);
      }

        /* Fuzzy Query */
      case SIMILIAR_TO: {
        BooleanQuery similarityQuery = new BooleanQuery();

        LowercaseWhitespaceAnalyzer analyzer = new LowercaseWhitespaceAnalyzer();
        TokenStream tokenStream = analyzer.tokenStream(String.valueOf(IEntity.ALL_FIELDS), new StringReader(value));
        Token token = null;
        while ((token = tokenStream.next()) != null) {
          Term term = new Term(fieldname, token.termText());
          similarityQuery.add(new BooleanClause(new FuzzyQuery(term), Occur.MUST));
        }

        return similarityQuery;
      }
    }
View Full Code Here

TOP

Related Classes of org.apache.lucene.search.BooleanQuery$TooManyClauses

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.