Package com.flaptor.indextank.index.term.query

Source Code of com.flaptor.indextank.index.term.query.TermBasedQueryMatcher

/*
* Copyright (c) 2011 LinkedIn, Inc
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/

package com.flaptor.indextank.index.term.query;

import java.util.List;
import java.util.NavigableMap;
import java.util.PriorityQueue;

import com.flaptor.indextank.index.DocId;
import com.flaptor.indextank.index.QueryMatcher;
import com.flaptor.indextank.index.ScoredMatch;
import com.flaptor.indextank.index.TopMatches;
import com.flaptor.indextank.index.results.SimpleScoredDocIds;
import com.flaptor.indextank.index.scorer.Faceter;
import com.flaptor.indextank.index.scorer.FacetingManager;
import com.flaptor.indextank.index.scorer.MatchFilter;
import com.flaptor.indextank.index.scorer.Scorer;
import com.flaptor.indextank.index.term.DocTermMatch;
import com.flaptor.indextank.index.term.TermMatcher;
import com.flaptor.indextank.query.AndQuery;
import com.flaptor.indextank.query.DifferenceQuery;
import com.flaptor.indextank.query.MatchAllQuery;
import com.flaptor.indextank.query.OrQuery;
import com.flaptor.indextank.query.PrefixTermQuery;
import com.flaptor.indextank.query.Query;
import com.flaptor.indextank.query.QueryNode;
import com.flaptor.indextank.query.RangeQuery;
import com.flaptor.indextank.query.SimplePhraseQuery;
import com.flaptor.indextank.query.TermQuery;
import com.flaptor.indextank.util.AbstractSkippableIterable;
import com.flaptor.indextank.util.SkippableIterable;
import com.flaptor.indextank.util.SkippableIterator;
import com.flaptor.indextank.util.Skippables;
import com.flaptor.indextank.util.Union;
import com.google.common.base.Function;
import com.google.common.base.Predicate;
import com.google.common.base.Predicates;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;

public class TermBasedQueryMatcher implements QueryMatcher {

  private final TermMatcher matcher;
  private final Scorer scorer;
  private final FacetingManager facetingManager;

  public TermBasedQueryMatcher(Scorer scorer, TermMatcher matcher, FacetingManager facetingManager) {
    this.matcher = matcher;
    this.scorer = scorer;
    this.facetingManager = facetingManager;
  }
 
  public TopMatches findMatches(Query query, Predicate<DocId> idFilter, int limit, int scoringFunctionIndex) {
    Iterable<RawMatch> rawMatches = match(query.getRoot());
    return getBestResults(rawMatches, idFilter, limit, query, scoringFunctionIndex);
  }

  @Override
  public TopMatches findMatches(Query query, int limit, int scoringFunctionIndex) {
    return findMatches(query, Predicates.<DocId>alwaysTrue(), limit, scoringFunctionIndex);
  }

  public int countMatches(Query query, Predicate<DocId> idFilter) {
      return getCount(match(query.getRoot()), idFilter);
  }
 
  @Override
  public int countMatches(Query query) {
      return countMatches(query, Predicates.<DocId>alwaysTrue());
  }
 
  @Override
  public boolean hasChanges(DocId docid) {
    return matcher.hasChanges(docid);
  }

  private TopMatches getBestResults(Iterable<RawMatch> rawMatches, Predicate<DocId> docFilter, int n, Query query, int scoringFunctionIndex) {
      String property = System.getProperty("limitTermBasedQueryMatcher");
      int limit = Integer.MAX_VALUE;
      int minTime = Integer.MAX_VALUE;
      if (property != null) {
          String[] parts = property.split(",");
          limit = Integer.parseInt(parts[0]);
          minTime = Integer.parseInt(parts[1]);
      }
     
      Faceter faceter = facetingManager.createFaceter();
     
      long startTime = System.currentTimeMillis();
    PriorityQueue<ScoredMatch> top = new PriorityQueue<ScoredMatch>(n, ScoredMatch.INVERSE_ORDER);
   
    int totalCount = 0;

    Iterable<ScoredMatch> matches = matcher.decode(rawMatches, query.getRoot().getBoostedNorm());

    MatchFilter facetFilter = null;
    if (query.getFilteringFacets() != null) {
      facetFilter = facetingManager.getFacetFilter(query.getFilteringFacets());
    }
   
    for (ScoredMatch match : matches) {
      if (docFilter.apply(match.getDocId())) {
        if (facetFilter == null || facetFilter.matches(match.getDocId(), match.getScore(), query.getNow(), query.getVars())) {
          if (query.getRangeFilter() == null || query.getRangeFilter().matches(match.getDocId(), match.getScore(), query.getNow(), query.getVars())) {
              rescore(match, query, scoringFunctionIndex);
              faceter.computeDocument(match.getDocId());
             
              if (top.size() < n || top.peek().compareTo(match) > 0) {
                  ScoredMatch newMatch;
                  if (top.size() == n) {
                      newMatch = top.remove();
                      newMatch.getDocId().updateFrom(match.getDocId());
                      newMatch.setScore(match.getScore());
                  } else {
                      newMatch = new ScoredMatch(match.getScore(), match.getDocId().copy(256));
                  }
                        top.add(newMatch);
              }
            totalCount++;
           
            if (totalCount > limit) {
                if (System.currentTimeMillis() - startTime > minTime) {
                    totalCount = -totalCount;
                    break;
                }
            }
          }
        }
      }
    }
    List<ScoredMatch> list = Lists.newArrayList(new ScoredMatch[top.size()]);
    for (int i = top.size()-1; i >= 0; i--) {
        list.set(i, top.poll());
        }
    return new SimpleScoredDocIds(list, n, totalCount, faceter.getFacets());
  }
 
  private int getCount(Iterable<RawMatch> rawMatches, Predicate<DocId> docFilter) {
      int totalCount = 0;
      Iterable<ScoredMatch> matches = matcher.decode(rawMatches, 1d);
     
      for (ScoredMatch m : matches) {
          if (docFilter.apply(m.getDocId())) {
              totalCount++;
          }
      }
      return totalCount;
  }

  private ScoredMatch rescore(ScoredMatch match, Query query, int functionIndex) {
      match.setScore(scorer.scoreDocument(match.getDocId(), match.getScore(), query.getNow(), query.getVars(), functionIndex));
      return match;
  }
 
  private SkippableIterable<RawMatch> match(QueryNode query) {
    // dispatch to specific methods based on query type
        if      (query instanceof TermQuery)         return matchTerm       (         (TermQuery) query );
    else if (query instanceof PrefixTermQuery)   return matchPrefix     (   (PrefixTermQuery) query );
    else if (query instanceof AndQuery)          return matchAnd        (          (AndQuery) query );
    else if (query instanceof OrQuery)           return matchOr         (           (OrQuery) query );
    else if (query instanceof DifferenceQuery)   return matchDifference (   (DifferenceQuery) query );
    else if (query instanceof SimplePhraseQuery) return matchPhrase     ( (SimplePhraseQuery) query );
    else if (query instanceof MatchAllQuery)     return matchAll        (     (MatchAllQuery) query );
    else if (query instanceof RangeQuery)        throw new IllegalArgumentException("Range queries not supported yet");
    else throw new IllegalArgumentException("Unsupported query type: " + query.getClass());
    }

    private SkippableIterable<RawMatch> matchTerm(TermQuery query) {
    final SkippableIterable<DocTermMatch> items = matcher.getMatches(query.getField(), query.getTerm());
        final double boost = query.getBoost();
    return new AbstractSkippableIterable<RawMatch>() {
            public SkippableIterator<RawMatch> iterator() {
                return new SkippableIterator<RawMatch>() {
                    RawMatch m = new RawMatch(0, 0d, boost);
                    SkippableIterator<DocTermMatch> it = items.iterator();
                    public void skipTo(int i) {
                        it.skipTo(i);
                    }
                    @Override
                    public boolean hasNext() {
                        return it.hasNext();
                    }
                    @Override
                    public RawMatch next() {
                        DocTermMatch dtm = it.next();
                        m.setRawId(dtm.getRawId());
                        m.setScore(dtm.getTermScore());
                        m.setBoost(boost);
                        //System.out.println("TERM: [S:"+m.getScore()+", B:"+m.getBoost()+", N:"+m.getNorm()+"]");
                        return m;
                    }
                    @Override
                    public void remove() {
                        it.remove();
                    }
                };
            }
        };
  }

    private SkippableIterable<RawMatch> matchPrefix(PrefixTermQuery query) {
        NavigableMap<String, SkippableIterable<DocTermMatch>> matches = matcher.getMatches(query.getField(), query.getTerm(), getNextPrefix(query.getTerm()));
        final RawMatch rawMatch = new RawMatch(0, 0d, query.getBoost());

        Union<DocTermMatch, RawMatch> union = new Union<DocTermMatch, RawMatch>(matches.values()) {
            @Override
            protected RawMatch transform(DocTermMatch k) {
                rawMatch.setRawId(k.getRawId());
                rawMatch.setScore(0d);
                return rawMatch;
            }
           
            @Override
            protected boolean shouldUse(RawMatch v, List<DocTermMatch> ks) {
                for (DocTermMatch docTermMatch : ks) {
                    rawMatch.setScore(rawMatch.getScore() + docTermMatch.getSquareTermScore());
                }
               
                rawMatch.setScore(Math.sqrt(rawMatch.getScore()));
               
                return true;
            }

            @Override
            protected int comp(DocTermMatch a, DocTermMatch b) {
                return a.getRawId() - b.getRawId();
            }
        };
       
        return union;
    }
   
    private static String getNextPrefix(String prefix) {
        return prefix.substring(0, prefix.length() - 1) + (char)(prefix.charAt(prefix.length() - 1) + 1);     
    }
   
  private SkippableIterable<RawMatch> matchAnd(AndQuery query) {
    SkippableIterable<RawMatch> left = match(query.getLeftQuery());
    SkippableIterable<RawMatch> right = match(query.getRightQuery());
        double boost = query.getBoost();
    SkippableIterable<RawMatch> am = new AndMerger2(left, right, boost);
        return am;
  }

  private SkippableIterable<RawMatch> matchPhrase(final SimplePhraseQuery query) {
    String field = query.getField();
    List<String> terms = query.getTerms();
    int[] termPositions = query.getTermPositions();
        double boost = query.getBoost();
    // each term gets converted to its item list by matching it to the given field
    return new PhraseMerger(Iterables.transform(terms, getFieldMatcher(field)), termPositions, boost);
  }

  private SkippableIterable<RawMatch> matchOr(OrQuery query) {
        QueryNode leftQuery = query.getLeftQuery();
        QueryNode rightQuery = query.getRightQuery();
    SkippableIterable<RawMatch> left = match(leftQuery);
    SkippableIterable<RawMatch> right = match(rightQuery);
    return new OrMerger2(left, right, query.getBoost(), leftQuery.getBoost(), rightQuery.getBoost(), leftQuery.getNorm(), rightQuery.getNorm());
  }

  private SkippableIterable<RawMatch> matchDifference(DifferenceQuery query) {
    SkippableIterable<RawMatch> left = match(query.getLeftQuery());
    SkippableIterable<RawMatch> right = match(query.getRightQuery());
        double boost = query.getBoost();
    return new DifferenceMerger(left, right, boost);
  }
 
  private SkippableIterable<RawMatch> matchAll(MatchAllQuery query) {
      return Skippables.transform(matcher.getAllDocs(), new Function<Integer, RawMatch>() {
            @Override
            public RawMatch apply(Integer i) {
                return new RawMatch(i, 1d, 1d);
            }
        });
  }
 
  private Function<String, SkippableIterable<DocTermMatch>> getFieldMatcher(final String field) {
    return new Function<String, SkippableIterable<DocTermMatch>>() {
      @Override
      public SkippableIterable<DocTermMatch> apply(String term) {
        return matcher.getMatches(field, term);
      }
    };
  }

}
TOP

Related Classes of com.flaptor.indextank.index.term.query.TermBasedQueryMatcher

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.