Package org.apache.accumulo.examples.wikisearch.sample

Examples of org.apache.accumulo.examples.wikisearch.sample.Results


    }
   
    StringWriter xml = new StringWriter();
    StringWriter html = new StringWriter();
   
    Results results = query(query, auths);
    try {
      // Marshall the query results object
      JAXBContext ctx = JAXBContext.newInstance(Results.class);
      Marshaller m = ctx.createMarshaller();
      m.marshal(results, xml);
View Full Code Here


   
    Set<Range> ranges = new HashSet<Range>();
    Set<String> typeFilter = types;
    String array[] = authorizations.toArray(new String[0]);
    Authorizations auths = new Authorizations(array);
    Results results = new Results();
   
    // Get the query string
    String queryString = query;
   
    StopWatch abstractQueryLogic = new StopWatch();
    StopWatch optimizedQuery = new StopWatch();
    StopWatch queryGlobalIndex = new StopWatch();
    StopWatch optimizedEventQuery = new StopWatch();
    StopWatch fullScanQuery = new StopWatch();
    StopWatch processResults = new StopWatch();
   
    abstractQueryLogic.start();
   
    StopWatch parseQuery = new StopWatch();
    parseQuery.start();
   
    QueryParser parser;
    try {
      if (log.isDebugEnabled()) {
        log.debug("ShardQueryLogic calling QueryParser.execute");
      }
      parser = new QueryParser();
      parser.execute(queryString);
    } catch (org.apache.commons.jexl2.parser.ParseException e1) {
      throw new IllegalArgumentException("Error parsing query", e1);
    }
    int hash = parser.getHashValue();
    parseQuery.stop();
    if (log.isDebugEnabled()) {
      log.debug(hash + " Query: " + queryString);
    }
   
    Set<String> fields = new HashSet<String>();
    for (String f : parser.getQueryIdentifiers()) {
      fields.add(f);
    }
    if (log.isDebugEnabled()) {
      log.debug("getQueryIdentifiers: " + parser.getQueryIdentifiers().toString());
    }
    // Remove any negated fields from the fields list, we don't want to lookup negated fields
    // in the index.
    fields.removeAll(parser.getNegatedTermsForOptimizer());
   
    if (log.isDebugEnabled()) {
      log.debug("getQueryIdentifiers: " + parser.getQueryIdentifiers().toString());
    }
    // Get the mapping of field name to QueryTerm object from the query. The query term object
    // contains the operator, whether its negated or not, and the literal to test against.
    Multimap<String,QueryTerm> terms = parser.getQueryTerms();
   
    // Find out which terms are indexed
    // TODO: Should we cache indexed terms or does that not make sense since we are always
    // loading data.
    StopWatch queryMetadata = new StopWatch();
    queryMetadata.start();
    Map<String,Multimap<String,Class<? extends Normalizer>>> metadataResults;
    try {
      metadataResults = findIndexedTerms(connector, auths, fields, typeFilter);
    } catch (Exception e1) {
      throw new RuntimeException("Error in metadata lookup", e1);
    }
   
    // Create a map of indexed term to set of normalizers for it
    Multimap<String,Normalizer> indexedTerms = HashMultimap.create();
    for (Entry<String,Multimap<String,Class<? extends Normalizer>>> entry : metadataResults.entrySet()) {
      // Get the normalizer from the normalizer cache
      for (Class<? extends Normalizer> clazz : entry.getValue().values()) {
        indexedTerms.put(entry.getKey(), normalizerCacheMap.get(clazz));
      }
    }
    queryMetadata.stop();
    if (log.isDebugEnabled()) {
      log.debug(hash + " Indexed Terms: " + indexedTerms.toString());
    }
   
    Set<String> orTerms = parser.getOrTermsForOptimizer();
   
    // Iterate over the query terms to get the operators specified in the query.
    ArrayList<String> unevaluatedExpressions = new ArrayList<String>();
    boolean unsupportedOperatorSpecified = false;
    for (Entry<String,QueryTerm> entry : terms.entries()) {
      if (null == entry.getValue()) {
        continue;
      }
     
      if (null != this.unevaluatedFields && this.unevaluatedFields.contains(entry.getKey().trim())) {
        unevaluatedExpressions.add(entry.getKey().trim() + " " + entry.getValue().getOperator() + " " + entry.getValue().getValue());
      }
     
      int operator = JexlOperatorConstants.getJJTNodeType(entry.getValue().getOperator());
      if (!(operator == ParserTreeConstants.JJTEQNODE || operator == ParserTreeConstants.JJTNENODE || operator == ParserTreeConstants.JJTLENODE
          || operator == ParserTreeConstants.JJTLTNODE || operator == ParserTreeConstants.JJTGENODE || operator == ParserTreeConstants.JJTGTNODE || operator == ParserTreeConstants.JJTERNODE)) {
        unsupportedOperatorSpecified = true;
        break;
      }
    }
    if (null != unevaluatedExpressions)
      unevaluatedExpressions.trimToSize();
    if (log.isDebugEnabled()) {
      log.debug(hash + " unsupportedOperators: " + unsupportedOperatorSpecified + " indexedTerms: " + indexedTerms.toString() + " orTerms: "
          + orTerms.toString() + " unevaluatedExpressions: " + unevaluatedExpressions.toString());
    }
   
    // We can use the intersecting iterator over the field index as an optimization under the
    // following conditions
    //
    // 1. No unsupported operators in the query.
    // 2. No 'or' operators and at least one term indexed
    // or
    // 1. No unsupported operators in the query.
    // 2. and all terms indexed
    // or
    // 1. All or'd terms are indexed. NOTE, this will potentially skip some queries and push to a full table scan
    // // WE should look into finding a better way to handle whether we do an optimized query or not.
    boolean optimizationSucceeded = false;
    boolean orsAllIndexed = false;
    if (orTerms.isEmpty()) {
      orsAllIndexed = false;
    } else {
      orsAllIndexed = indexedTerms.keySet().containsAll(orTerms);
    }
   
    if (log.isDebugEnabled()) {
      log.debug("All or terms are indexed");
    }
   
    if (!unsupportedOperatorSpecified
        && (((null == orTerms || orTerms.isEmpty()) && indexedTerms.size() > 0) || (fields.size() > 0 && indexedTerms.size() == fields.size()) || orsAllIndexed)) {
      optimizedQuery.start();
      // Set up intersecting iterator over field index.
     
      // Get information from the global index for the indexed terms. The results object will contain the term
      // mapped to an object that contains the total count, and partitions where this term is located.
     
      // TODO: Should we cache indexed term information or does that not make sense since we are always loading data
      queryGlobalIndex.start();
      IndexRanges termIndexInfo;
      try {
        // If fields is null or zero, then it's probably the case that the user entered a value
        // to search for with no fields. Check for the value in index.
        if (fields.isEmpty()) {
          termIndexInfo = this.getTermIndexInformation(connector, auths, queryString, typeFilter);
          if (null != termIndexInfo && termIndexInfo.getRanges().isEmpty()) {
            // Then we didn't find anything in the index for this query. This may happen for an indexed term that has wildcards
            // in unhandled locations.
            // Break out of here by throwing a named exception and do full scan
            throw new DoNotPerformOptimizedQueryException();
          }
          // We need to rewrite the query string here so that it's valid.
          if (termIndexInfo instanceof UnionIndexRanges) {
            UnionIndexRanges union = (UnionIndexRanges) termIndexInfo;
            StringBuilder buf = new StringBuilder();
            String sep = "";
            for (String fieldName : union.getFieldNamesAndValues().keySet()) {
              buf.append(sep).append(fieldName).append(" == ");
              if (!(queryString.startsWith("'") && queryString.endsWith("'"))) {
                buf.append("'").append(queryString).append("'");
              } else {
                buf.append(queryString);
              }
              sep = " or ";
            }
            if (log.isDebugEnabled()) {
              log.debug("Rewrote query for non-fielded single term query: " + queryString + " to " + buf.toString());
            }
            queryString = buf.toString();
          } else {
            throw new RuntimeException("Unexpected IndexRanges implementation");
          }
        } else {
          RangeCalculator calc = this.getTermIndexInformation(connector, auths, indexedTerms, terms, this.getIndexTableName(), this.getReverseIndexTableName(),
              queryString, this.queryThreads, typeFilter);
          if (null == calc.getResult() || calc.getResult().isEmpty()) {
            // Then we didn't find anything in the index for this query. This may happen for an indexed term that has wildcards
            // in unhandled locations.
            // Break out of here by throwing a named exception and do full scan
            throw new DoNotPerformOptimizedQueryException();
          }
          termIndexInfo = new UnionIndexRanges();
          termIndexInfo.setIndexValuesToOriginalValues(calc.getIndexValues());
          termIndexInfo.setFieldNamesAndValues(calc.getIndexEntries());
          termIndexInfo.getTermCardinality().putAll(calc.getTermCardinalities());
          for (Range r : calc.getResult()) {
            // foo is a placeholder and is ignored.
            termIndexInfo.add("foo", r);
          }
        }
      } catch (TableNotFoundException e) {
        log.error(this.getIndexTableName() + "not found", e);
        throw new RuntimeException(this.getIndexTableName() + "not found", e);
      } catch (org.apache.commons.jexl2.parser.ParseException e) {
        throw new RuntimeException("Error determining ranges for query: " + queryString, e);
      } catch (DoNotPerformOptimizedQueryException e) {
        log.info("Indexed fields not found in index, performing full scan");
        termIndexInfo = null;
      }
      queryGlobalIndex.stop();
     
      // Determine if we should proceed with optimized query based on results from the global index
      boolean proceed = false;
      if (null == termIndexInfo || termIndexInfo.getFieldNamesAndValues().values().size() == 0) {
        proceed = false;
      } else if (null != orTerms && orTerms.size() > 0 && (termIndexInfo.getFieldNamesAndValues().values().size() == indexedTerms.size())) {
        proceed = true;
      } else if (termIndexInfo.getFieldNamesAndValues().values().size() > 0) {
        proceed = true;
      } else if (orsAllIndexed) {
        proceed = true;
      } else {
        proceed = false;
      }
      if (log.isDebugEnabled()) {
        log.debug("Proceed with optimized query: " + proceed);
        if (null != termIndexInfo)
          log.debug("termIndexInfo.getTermsFound().size(): " + termIndexInfo.getFieldNamesAndValues().values().size() + " indexedTerms.size: "
              + indexedTerms.size() + " fields.size: " + fields.size());
      }
      if (proceed) {
       
        if (log.isDebugEnabled()) {
          log.debug(hash + " Performing optimized query");
        }
        // Use the scan ranges from the GlobalIndexRanges object as the ranges for the batch scanner
        ranges = termIndexInfo.getRanges();
        if (log.isDebugEnabled()) {
          log.info(hash + " Ranges: count: " + ranges.size() + ", " + ranges.toString());
        }
       
        // Create BatchScanner, set the ranges, and setup the iterators.
        optimizedEventQuery.start();
        BatchScanner bs = null;
        try {
          bs = connector.createBatchScanner(this.getTableName(), auths, queryThreads);
          bs.setRanges(ranges);
          IteratorSetting si = new IteratorSetting(21, "eval", OptimizedQueryIterator.class);
         
          if (log.isDebugEnabled()) {
            log.debug("Setting scan option: " + EvaluatingIterator.QUERY_OPTION + " to " + queryString);
          }
          // Set the query option
          si.addOption(EvaluatingIterator.QUERY_OPTION, queryString);
          // Set the Indexed Terms List option. This is the field name and normalized field value pair separated
          // by a comma.
          StringBuilder buf = new StringBuilder();
          String sep = "";
          for (Entry<String,String> entry : termIndexInfo.getFieldNamesAndValues().entries()) {
            buf.append(sep);
            buf.append(entry.getKey());
            buf.append(":");
            buf.append(termIndexInfo.getIndexValuesToOriginalValues().get(entry.getValue()));
            buf.append(":");
            buf.append(entry.getValue());
            if (sep.equals("")) {
              sep = ";";
            }
          }
          if (log.isDebugEnabled()) {
            log.debug("Setting scan option: " + FieldIndexQueryReWriter.INDEXED_TERMS_LIST + " to " + buf.toString());
          }
          FieldIndexQueryReWriter rewriter = new FieldIndexQueryReWriter();
          String q = "";
          try {
            q = queryString;
            q = rewriter.applyCaseSensitivity(q, true, false);// Set upper/lower case for fieldname/fieldvalue
            Map<String,String> opts = new HashMap<String,String>();
            opts.put(FieldIndexQueryReWriter.INDEXED_TERMS_LIST, buf.toString());
            q = rewriter.removeNonIndexedTermsAndInvalidRanges(q, opts);
            q = rewriter.applyNormalizedTerms(q, opts);
            if (log.isDebugEnabled()) {
              log.debug("runServerQuery, FieldIndex Query: " + q);
            }
          } catch (org.apache.commons.jexl2.parser.ParseException ex) {
            log.error("Could not parse query, Jexl ParseException: " + ex);
          } catch (Exception ex) {
            log.error("Problem rewriting query, Exception: " + ex.getMessage());
          }
          si.addOption(BooleanLogicIterator.FIELD_INDEX_QUERY, q);
         
          // Set the term cardinality option
          sep = "";
          buf.delete(0, buf.length());
          for (Entry<String,Long> entry : termIndexInfo.getTermCardinality().entrySet()) {
            buf.append(sep);
            buf.append(entry.getKey());
            buf.append(":");
            buf.append(entry.getValue());
            sep = ",";
          }
          if (log.isDebugEnabled())
            log.debug("Setting scan option: " + BooleanLogicIterator.TERM_CARDINALITIES + " to " + buf.toString());
          si.addOption(BooleanLogicIterator.TERM_CARDINALITIES, buf.toString());
          if (this.useReadAheadIterator) {
            if (log.isDebugEnabled()) {
              log.debug("Enabling read ahead iterator with queue size: " + this.readAheadQueueSize + " and timeout: " + this.readAheadTimeOut);
            }
            si.addOption(ReadAheadIterator.QUEUE_SIZE, this.readAheadQueueSize);
            si.addOption(ReadAheadIterator.TIMEOUT, this.readAheadTimeOut);
           
          }
         
          if (null != unevaluatedExpressions) {
            StringBuilder unevaluatedExpressionList = new StringBuilder();
            String sep2 = "";
            for (String exp : unevaluatedExpressions) {
              unevaluatedExpressionList.append(sep2).append(exp);
              sep2 = ",";
            }
            if (log.isDebugEnabled())
              log.debug("Setting scan option: " + EvaluatingIterator.UNEVALUTED_EXPRESSIONS + " to " + unevaluatedExpressionList.toString());
            si.addOption(EvaluatingIterator.UNEVALUTED_EXPRESSIONS, unevaluatedExpressionList.toString());
          }
         
          bs.addScanIterator(si);
         
          processResults.start();
          processResults.suspend();
          long count = 0;
          for (Entry<Key,Value> entry : bs) {
            count++;
            // The key that is returned by the EvaluatingIterator is not the same key that is in
            // the table. The value that is returned by the EvaluatingIterator is a kryo
            // serialized EventFields object.
            processResults.resume();
            Document d = this.createDocument(entry.getKey(), entry.getValue());
            results.getResults().add(d);
            processResults.suspend();
          }
          log.info(count + " matching entries found in optimized query.");
          optimizationSucceeded = true;
          processResults.stop();
        } catch (TableNotFoundException e) {
          log.error(this.getTableName() + "not found", e);
          throw new RuntimeException(this.getIndexTableName() + "not found", e);
        } finally {
          if (bs != null) {
            bs.close();
          }
        }
        optimizedEventQuery.stop();
      }
      optimizedQuery.stop();
    }
   
    // WE should look into finding a better way to handle whether we do an optimized query or not.
    // We are not setting up an else condition here because we may have aborted the logic early in the if statement.
    if (!optimizationSucceeded || ((null != orTerms && orTerms.size() > 0) && (indexedTerms.size() != fields.size()) && !orsAllIndexed)) {
      // if (!optimizationSucceeded || ((null != orTerms && orTerms.size() > 0) && (indexedTerms.size() != fields.size()))) {
      fullScanQuery.start();
      if (log.isDebugEnabled()) {
        log.debug(hash + " Performing full scan query");
      }
     
      // Set up a full scan using the date ranges from the query
      // Create BatchScanner, set the ranges, and setup the iterators.
      BatchScanner bs = null;
      try {
        // The ranges are the start and end dates
        Collection<Range> r = getFullScanRange(beginDate, endDate, terms);
        ranges.addAll(r);
       
        if (log.isDebugEnabled()) {
          log.debug(hash + " Ranges: count: " + ranges.size() + ", " + ranges.toString());
        }
       
        bs = connector.createBatchScanner(this.getTableName(), auths, queryThreads);
        bs.setRanges(ranges);
        IteratorSetting si = new IteratorSetting(22, "eval", EvaluatingIterator.class);
        // Create datatype regex if needed
        if (null != typeFilter) {
          StringBuilder buf = new StringBuilder();
          String s = "";
          for (String type : typeFilter) {
            buf.append(s).append(type).append(".*");
            s = "|";
          }
          if (log.isDebugEnabled())
            log.debug("Setting colf regex iterator to: " + buf.toString());
          IteratorSetting ri = new IteratorSetting(21, "typeFilter", RegExFilter.class);
          RegExFilter.setRegexs(ri, null, buf.toString(), null, null, false);
          bs.addScanIterator(ri);
        }
        if (log.isDebugEnabled()) {
          log.debug("Setting scan option: " + EvaluatingIterator.QUERY_OPTION + " to " + queryString);
        }
        si.addOption(EvaluatingIterator.QUERY_OPTION, queryString);
        if (null != unevaluatedExpressions) {
          StringBuilder unevaluatedExpressionList = new StringBuilder();
          String sep2 = "";
          for (String exp : unevaluatedExpressions) {
            unevaluatedExpressionList.append(sep2).append(exp);
            sep2 = ",";
          }
          if (log.isDebugEnabled())
            log.debug("Setting scan option: " + EvaluatingIterator.UNEVALUTED_EXPRESSIONS + " to " + unevaluatedExpressionList.toString());
          si.addOption(EvaluatingIterator.UNEVALUTED_EXPRESSIONS, unevaluatedExpressionList.toString());
        }
        bs.addScanIterator(si);
        long count = 0;
        processResults.start();
        processResults.suspend();
        for (Entry<Key,Value> entry : bs) {
          count++;
          // The key that is returned by the EvaluatingIterator is not the same key that is in
          // the partition table. The value that is returned by the EvaluatingIterator is a kryo
          // serialized EventFields object.
          processResults.resume();
          Document d = this.createDocument(entry.getKey(), entry.getValue());
          results.getResults().add(d);
          processResults.suspend();
        }
        processResults.stop();
        log.info(count + " matching entries found in full scan query.");
      } catch (TableNotFoundException e) {
View Full Code Here

    this.tableName = tableName;
  }
 
  public Results runQuery(Connector connector, String query, List<String> authorizations) {
   
    Results results = new Results();
    Authorizations auths = new Authorizations(StringUtils.join(authorizations, "|"));
   
    Matcher match = queryPattern.matcher(query);
    if (!match.matches()) {
      throw new IllegalArgumentException("Query does not match the pattern: DOCUMENT:partitionId/wikitype/uid, your query: " + query.toString());
    } else {
      String partitionId = match.group(1);
      String wikitype = match.group(2);
      String id = match.group(3);
     
      log.debug("Received pieces: " + partitionId + ", " + wikitype + ", " + id);
     
      // Create the Range
      Key startKey = new Key(partitionId, WikipediaMapper.DOCUMENT_COLUMN_FAMILY, wikitype + NULL_BYTE + id);
      Key endKey = new Key(partitionId, WikipediaMapper.DOCUMENT_COLUMN_FAMILY, wikitype + NULL_BYTE + id + NULL_BYTE);
      Range r = new Range(startKey, true, endKey, false);
     
      log.debug("Setting range: " + r);
     
      try {
        Scanner scanner = connector.createScanner(this.getTableName(), auths);
        scanner.setRange(r);
        // This should in theory only match one thing.
        for (Entry<Key,Value> entry : scanner) {
          Document doc = new Document();
          doc.setId(id);
          Field val = new Field();
          val.setFieldName("DOCUMENT");
          val.setFieldValue(new String(Base64.decodeBase64(entry.getValue().toString())));
          doc.getFields().add(val);
          results.getResults().add(doc);
        }
      } catch (TableNotFoundException e) {
        throw new RuntimeException("Table not found: " + this.getTableName(), e);
      }
     
View Full Code Here

  public void testTitle() {
    Logger.getLogger(AbstractQueryLogic.class).setLevel(Level.OFF);
    Logger.getLogger(RangeCalculator.class).setLevel(Level.OFF);
    List<String> auths = new ArrayList<String>();
    auths.add("enwiki");
    Results results = table.runQuery(c, auths, "TITLE == 'afghanistanhistory'", null, null, null);
    for (Document doc : results.getResults()) {
      System.out.println("id: " + doc.getId());
      for (Field field : doc.getFields())
        System.out.println(field.getFieldName() + " -> " + field.getFieldValue());
    }
  }
View Full Code Here

  public void testTitle() {
    Logger.getLogger(AbstractQueryLogic.class).setLevel(Level.OFF);
    Logger.getLogger(RangeCalculator.class).setLevel(Level.OFF);
    List<String> auths = new ArrayList<String>();
    auths.add("enwiki");
    Results results = table.runQuery(c, auths, "TITLE == 'afghanistanhistory'", null, null, null);
    for (Document doc : results.getResults()) {
      System.out.println("id: " + doc.getId());
      for (Field field : doc.getFields())
        System.out.println(field.getFieldName() + " -> " + field.getFieldValue());
    }
  }
View Full Code Here

TOP

Related Classes of org.apache.accumulo.examples.wikisearch.sample.Results

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.