Examples of org.apache.accumulo.core.client.BatchScanner

org.apache.accumulo.core.client.BatchScanner
Implementations of BatchScanner support efficient lookups of many ranges in accumulo. Use this when looking up lots of ranges and you expect each range to contain a small amount of data. Also only use this when you do not care about the returned data being in sorted order. If you want to lookup a few ranges and expect those ranges to contain a lot of data, then use the Scanner instead. Also, the Scanner will return data in sorted order, this will not.

      ranges.add(new Range(row));
    }
    
    // logger.setLevel(Level.TRACE);
    
    BatchScanner bs = getConnector().createBatchScanner("bss", Constants.NO_AUTHS, 4);
    
    HashMap<Text,Value> found = new HashMap<Text,Value>();
    
    for (int i = 0; i < 20; i++) {
      
      found.clear();
      
      long t1 = System.currentTimeMillis();
      
      bs.setRanges(ranges);
      
      for (Entry<Key,Value> entry : bs) {
        found.put(entry.getKey().getRow(), entry.getValue());
      }

View Full Code Here

          log.info(hash + " Ranges: count: " + ranges.size() + ", " + ranges.toString());
        }
        
        // Create BatchScanner, set the ranges, and setup the iterators.
        optimizedEventQuery.start();
        BatchScanner bs = null;
        try {
          bs = connector.createBatchScanner(this.getTableName(), auths, queryThreads);
          bs.setRanges(ranges);
          IteratorSetting si = new IteratorSetting(21, "eval", OptimizedQueryIterator.class);
          
          if (log.isDebugEnabled()) {
            log.debug("Setting scan option: " + EvaluatingIterator.QUERY_OPTION + " to " + queryString);
          }
          // Set the query option
          si.addOption(EvaluatingIterator.QUERY_OPTION, queryString);
          // Set the Indexed Terms List option. This is the field name and normalized field value pair separated
          // by a comma.
          StringBuilder buf = new StringBuilder();
          String sep = "";
          for (Entry<String,String> entry : termIndexInfo.getFieldNamesAndValues().entries()) {
            buf.append(sep);
            buf.append(entry.getKey());
            buf.append(":");
            buf.append(termIndexInfo.getIndexValuesToOriginalValues().get(entry.getValue()));
            buf.append(":");
            buf.append(entry.getValue());
            if (sep.equals("")) {
              sep = ";";
            }
          }
          if (log.isDebugEnabled()) {
            log.debug("Setting scan option: " + FieldIndexQueryReWriter.INDEXED_TERMS_LIST + " to " + buf.toString());
          }
          FieldIndexQueryReWriter rewriter = new FieldIndexQueryReWriter();
          String q = "";
          try {
            q = queryString;
            q = rewriter.applyCaseSensitivity(q, true, false);// Set upper/lower case for fieldname/fieldvalue
            Map<String,String> opts = new HashMap<String,String>();
            opts.put(FieldIndexQueryReWriter.INDEXED_TERMS_LIST, buf.toString());
            q = rewriter.removeNonIndexedTermsAndInvalidRanges(q, opts);
            q = rewriter.applyNormalizedTerms(q, opts);
            if (log.isDebugEnabled()) {
              log.debug("runServerQuery, FieldIndex Query: " + q);
            }
          } catch (org.apache.commons.jexl2.parser.ParseException ex) {
            log.error("Could not parse query, Jexl ParseException: " + ex);
          } catch (Exception ex) {
            log.error("Problem rewriting query, Exception: " + ex.getMessage());
          }
          si.addOption(BooleanLogicIterator.FIELD_INDEX_QUERY, q);
          
          // Set the term cardinality option
          sep = "";
          buf.delete(0, buf.length());
          for (Entry<String,Long> entry : termIndexInfo.getTermCardinality().entrySet()) {
            buf.append(sep);
            buf.append(entry.getKey());
            buf.append(":");
            buf.append(entry.getValue());
            sep = ",";
          }
          if (log.isDebugEnabled())
            log.debug("Setting scan option: " + BooleanLogicIterator.TERM_CARDINALITIES + " to " + buf.toString());
          si.addOption(BooleanLogicIterator.TERM_CARDINALITIES, buf.toString());
          if (this.useReadAheadIterator) {
            if (log.isDebugEnabled()) {
              log.debug("Enabling read ahead iterator with queue size: " + this.readAheadQueueSize + " and timeout: " + this.readAheadTimeOut);
            }
            si.addOption(ReadAheadIterator.QUEUE_SIZE, this.readAheadQueueSize);
            si.addOption(ReadAheadIterator.TIMEOUT, this.readAheadTimeOut);
            
          }
          
          if (null != unevaluatedExpressions) {
            StringBuilder unevaluatedExpressionList = new StringBuilder();
            String sep2 = "";
            for (String exp : unevaluatedExpressions) {
              unevaluatedExpressionList.append(sep2).append(exp);
              sep2 = ",";
            }
            if (log.isDebugEnabled())
              log.debug("Setting scan option: " + EvaluatingIterator.UNEVALUTED_EXPRESSIONS + " to " + unevaluatedExpressionList.toString());
            si.addOption(EvaluatingIterator.UNEVALUTED_EXPRESSIONS, unevaluatedExpressionList.toString());
          }
          
          bs.addScanIterator(si);
          
          processResults.start();
          processResults.suspend();
          long count = 0;
          for (Entry<Key,Value> entry : bs) {
            count++;
            // The key that is returned by the EvaluatingIterator is not the same key that is in
            // the table. The value that is returned by the EvaluatingIterator is a kryo
            // serialized EventFields object.
            processResults.resume();
            Document d = this.createDocument(entry.getKey(), entry.getValue());
            results.getResults().add(d);
            processResults.suspend();
          }
          log.info(count + " matching entries found in optimized query.");
          optimizationSucceeded = true;
          processResults.stop();
        } catch (TableNotFoundException e) {
          log.error(this.getTableName() + "not found", e);
          throw new RuntimeException(this.getIndexTableName() + "not found", e);
        } finally {
          if (bs != null) {
            bs.close();
          }
        }
        optimizedEventQuery.stop();
      }
      optimizedQuery.stop();
    }
    
    // WE should look into finding a better way to handle whether we do an optimized query or not.
    // We are not setting up an else condition here because we may have aborted the logic early in the if statement.
    if (!optimizationSucceeded || ((null != orTerms && orTerms.size() > 0) && (indexedTerms.size() != fields.size()) && !orsAllIndexed)) {
      // if (!optimizationSucceeded || ((null != orTerms && orTerms.size() > 0) && (indexedTerms.size() != fields.size()))) {
      fullScanQuery.start();
      if (log.isDebugEnabled()) {
        log.debug(hash + " Performing full scan query");
      }
      
      // Set up a full scan using the date ranges from the query
      // Create BatchScanner, set the ranges, and setup the iterators.
      BatchScanner bs = null;
      try {
        // The ranges are the start and end dates
        Collection<Range> r = getFullScanRange(beginDate, endDate, terms);
        ranges.addAll(r);
        
        if (log.isDebugEnabled()) {
          log.debug(hash + " Ranges: count: " + ranges.size() + ", " + ranges.toString());
        }
        
        bs = connector.createBatchScanner(this.getTableName(), auths, queryThreads);
        bs.setRanges(ranges);
        IteratorSetting si = new IteratorSetting(22, "eval", EvaluatingIterator.class);
        // Create datatype regex if needed
        if (null != typeFilter) {
          StringBuilder buf = new StringBuilder();
          String s = "";
          for (String type : typeFilter) {
            buf.append(s).append(type).append(".*");
            s = "|";
          }
          if (log.isDebugEnabled())
            log.debug("Setting colf regex iterator to: " + buf.toString());
          IteratorSetting ri = new IteratorSetting(21, "typeFilter", RegExFilter.class);
          RegExFilter.setRegexs(ri, null, buf.toString(), null, null, false);
          bs.addScanIterator(ri);
        }
        if (log.isDebugEnabled()) {
          log.debug("Setting scan option: " + EvaluatingIterator.QUERY_OPTION + " to " + queryString);
        }
        si.addOption(EvaluatingIterator.QUERY_OPTION, queryString);
        if (null != unevaluatedExpressions) {
          StringBuilder unevaluatedExpressionList = new StringBuilder();
          String sep2 = "";
          for (String exp : unevaluatedExpressions) {
            unevaluatedExpressionList.append(sep2).append(exp);
            sep2 = ",";
          }
          if (log.isDebugEnabled())
            log.debug("Setting scan option: " + EvaluatingIterator.UNEVALUTED_EXPRESSIONS + " to " + unevaluatedExpressionList.toString());
          si.addOption(EvaluatingIterator.UNEVALUTED_EXPRESSIONS, unevaluatedExpressionList.toString());
        }
        bs.addScanIterator(si);
        long count = 0;
        processResults.start();
        processResults.suspend();
        for (Entry<Key,Value> entry : bs) {
          count++;
          // The key that is returned by the EvaluatingIterator is not the same key that is in
          // the partition table. The value that is returned by the EvaluatingIterator is a kryo
          // serialized EventFields object.
          processResults.resume();
          Document d = this.createDocument(entry.getKey(), entry.getValue());
          results.getResults().add(d);
          processResults.suspend();
        }
        processResults.stop();
        log.info(count + " matching entries found in full scan query.");
      } catch (TableNotFoundException e) {
        log.error(this.getTableName() + "not found", e);
      } finally {
        if (bs != null) {
          bs.close();
        }
      }
      fullScanQuery.stop();
    }

View Full Code Here

      Mutation m = new Mutation(asText(r));
      m.put(asText(random.nextInt()), asText(random.nextInt()), new Value(Integer.toHexString(r).getBytes()));
      bw.addMutation(m);
    }
    bw.close();
    BatchScanner s = c.createBatchScanner("test", Constants.NO_AUTHS, 2);
    s.setRanges(Collections.singletonList(new Range()));
    Key key = null;
    int count = 0;
    for (Entry<Key,Value> entry : s) {
      if (key != null)
        assertTrue(key.compareTo(entry.getKey()) < 0);

View Full Code Here

    System.out.printf("Multiple thread scan time %6.2f \n", time2 / 1000.0);
    
  }
  
  private long batchScan(List<Range> ranges, int threads) throws Exception {
    BatchScanner bs = getConnector().createBatchScanner("test_ingest", TestIngest.AUTHS, threads);
    
    bs.setRanges(ranges);
    
    int count = 0;
    
    long t1 = System.currentTimeMillis();
    
    byte rval[] = new byte[50];
    Random random = new Random();
    
    for (Entry<Key,Value> entry : bs) {
      count++;
      int row = VerifyIngest.getRow(entry.getKey());
      int col = VerifyIngest.getCol(entry.getKey());
      
      if (row < 0 || row >= NUM_TO_INGEST) {
        throw new Exception("unexcepted row " + row);
      }
      
      rval = TestIngest.genRandomValue(random, rval, 2, row, col);
      
      if (entry.getValue().compareTo(rval) != 0) {
        throw new Exception("unexcepted value row=" + row + " col=" + col);
      }
    }
    
    long t2 = System.currentTimeMillis();
    
    bs.close();
    
    if (count != NUM_TO_INGEST) {
      throw new Exception("Batch Scan did not return expected number of values " + count);
    }

View Full Code Here

    
    if (!caught)
      throw new Exception("Scan did not fail");
    
    // try to batch scan the table
    BatchScanner bs = getConnector().createBatchScanner("tt", Constants.NO_AUTHS, 2);
    bs.setRanges(Collections.singleton(new Range()));
    
    caught = false;
    try {
      for (Entry<Key,Value> entry : bs) {
        entry.getKey();

View Full Code Here

      columns[index++] = new Text(term);
    }
    
    log.debug("Looking up terms " + searchTerms + " expect to find " + docID);
    
    BatchScanner bs = state.getConnector().createBatchScanner(indexTableName, Constants.NO_AUTHS, 10);
    IteratorSetting ii = new IteratorSetting(20, "ii", IntersectingIterator.class);
    IntersectingIterator.setColumnFamilies(ii, columns);
    bs.addScanIterator(ii);
    bs.setRanges(Collections.singleton(new Range()));
    
    boolean sawDocID = false;
    
    for (Entry<Key,Value> entry2 : bs) {
      if (entry2.getKey().getColumnQualifier().equals(docID)) {
        sawDocID = true;
        // TODO breaking w/o reading all data causes batch reader to spew exceptions
        // break;
      }
    }
    
    bs.close();
    
    if (!sawDocID)
      throw new Exception("Did not see doc " + docID + " in index.  terms:" + searchTerms + " " + indexTableName + " " + dataTableName);
  }

View Full Code Here

    }
    
    log.debug("Found " + hashes.size() + " hashes starting at " + uuid);
    
    // use batch scanner to verify all of these exist in index
    BatchScanner indexScanner = conn.createBatchScanner(indexTableName, Constants.NO_AUTHS, 3);
    ArrayList<Range> ranges = new ArrayList<Range>();
    for (Text row : hashes.keySet()) {
      ranges.add(new Range(row));
    }
    
    indexScanner.setRanges(ranges);
    
    Map<Text,Text> hashes2 = new HashMap<Text,Text>();
    
    for (Entry<Key,Value> entry : indexScanner)
      hashes2.put(entry.getKey().getRow(), new Text(entry.getValue().get()));
    
    log.debug("Looked up " + ranges.size() + " ranges, found " + hashes2.size());
    
    if (!hashes.equals(hashes2)) {
      log.error("uuids from doc table : " + hashes.values());
      log.error("uuids from index     : " + hashes2.values());
      throw new Exception("Mismatch between document table and index " + indexTableName + " " + imageTableName);
    }
    
    indexScanner.close();
    
  }

View Full Code Here

      ranges.add(new Range(new Text(String.format("%08x", (i << 8) - 16))));
    }
    
    getConnector().tableOperations().create("t3");
    getConnector().tableOperations().addSplits("t3", splits);
    BatchScanner bs = getConnector().createBatchScanner("t3", Constants.NO_AUTHS, 3);
    bs.setRanges(ranges);
    count = 0;
    for (Entry<Key,Value> entry : bs) {
      if (entry != null)
        count++;
    }
    
    if (count != 0) {
      throw new Exception("Did not see expected number of entries, count = " + count);
    }
    
    bs.close();
    
  }

View Full Code Here

    
    for (int i = 0; i < words.length; i++) {
      words[i] = new Text(Insert.generateRandomWord(rand));
    }
    
    BatchScanner bs = state.getConnector().createBatchScanner(indexTableName, Constants.NO_AUTHS, 16);
    IteratorSetting ii = new IteratorSetting(20, "ii", IntersectingIterator.class.getName());
    IntersectingIterator.setColumnFamilies(ii, words);
    bs.addScanIterator(ii);
    bs.setRanges(Collections.singleton(new Range()));
    
    HashSet<Text> documentsFoundInIndex = new HashSet<Text>();
    
    for (Entry<Key,Value> entry2 : bs) {
      documentsFoundInIndex.add(entry2.getKey().getColumnQualifier());
    }
    
    bs.close();
    
    bs = state.getConnector().createBatchScanner(dataTableName, Constants.NO_AUTHS, 16);
    
    for (int i = 0; i < words.length; i++) {
      IteratorSetting more = new IteratorSetting(20 + i, "ii" + i, RegExFilter.class);
      RegExFilter.setRegexs(more, null, null, null, "(^|(.*\\s))" + words[i] + "($|(\\s.*))", false);
      bs.addScanIterator(more);
    }
    
    bs.setRanges(Collections.singleton(new Range()));
    
    HashSet<Text> documentsFoundByGrep = new HashSet<Text>();
    
    for (Entry<Key,Value> entry2 : bs) {
      documentsFoundByGrep.add(entry2.getKey().getRow());
    }
    
    bs.close();
    
    if (!documentsFoundInIndex.equals(documentsFoundByGrep)) {
      throw new Exception("Set of documents found not equal for words " + Arrays.asList(words).toString() + " " + documentsFoundInIndex + " "
          + documentsFoundByGrep);
    }

View Full Code Here

    for (Entry<Key,Value> entry : scanner)
      documentsToDelete.add(new Range(entry.getKey().getColumnQualifier()));
    
    if (documentsToDelete.size() > 0) {
      // use a batch scanner to fetch all documents
      BatchScanner bscanner = state.getConnector().createBatchScanner(docTableName, Constants.NO_AUTHS, 8);
      bscanner.setRanges(documentsToDelete);
      
      BatchWriter ibw = state.getMultiTableBatchWriter().getBatchWriter(indexTableName);
      BatchWriter dbw = state.getMultiTableBatchWriter().getBatchWriter(docTableName);
      
      int count = 0;
      
      for (Entry<Key,Value> entry : bscanner) {
        String docID = entry.getKey().getRow().toString();
        String doc = entry.getValue().toString();
        
        Insert.unindexDocument(ibw, doc, docID, numPartitions);
        
        Mutation m = new Mutation(docID);
        m.putDelete("doc", "");
        
        dbw.addMutation(m);
        count++;
      }
      
      bscanner.close();
      
      state.getMultiTableBatchWriter().flush();
      
      if (count != documentsToDelete.size()) {
        throw new Exception("Batch scanner did not return expected number of docs " + count + " " + documentsToDelete.size());

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.accumulo.core.client.BatchScanner

mil.nga.giat.geowave.accumulo.metadata.AbstractAccumuloPersistence

mil.nga.giat.geowave.vector.query.DistributedRenderQuery

org.apache.accumulo.core.client.mock.MockConnectorTest

org.apache.accumulo.core.client.mock.MockTableOperationsTest

org.apache.accumulo.core.client.mock.TestBatchScanner821

org.apache.accumulo.core.iterators.user.IntersectingIteratorTest

org.apache.accumulo.core.iterators.user.TransformingIteratorTest

org.apache.accumulo.core.util.shell.commands.GrepCommand

org.apache.accumulo.core.util.shell.Shell$GrepCommand

org.apache.accumulo.examples.client.RandomBatchScanner

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.