Package mil.nga.giat.geowave.accumulo.mapreduce.input

Source Code of mil.nga.giat.geowave.accumulo.mapreduce.input.GeoWaveRecordReader

package mil.nga.giat.geowave.accumulo.mapreduce.input;

import java.io.Closeable;
import java.io.IOException;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.math.RoundingMode;
import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.NoSuchElementException;
import java.util.Set;

import mil.nga.giat.geowave.accumulo.AccumuloOperations;
import mil.nga.giat.geowave.accumulo.mapreduce.JobContextAdapterStore;
import mil.nga.giat.geowave.accumulo.query.InputFormatAccumuloRangeQuery;
import mil.nga.giat.geowave.accumulo.util.CloseableIteratorWrapper;
import mil.nga.giat.geowave.store.CloseableIterator;
import mil.nga.giat.geowave.store.filter.QueryFilter;
import mil.nga.giat.geowave.store.index.Index;
import mil.nga.giat.geowave.store.query.DistributableQuery;

import org.apache.accumulo.core.client.AccumuloException;
import org.apache.accumulo.core.client.AccumuloSecurityException;
import org.apache.accumulo.core.data.ByteSequence;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.PartialKey;
import org.apache.accumulo.core.data.Range;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.log4j.Logger;

import com.google.common.base.Preconditions;
import com.google.common.collect.Iterators;

/**
* This class is used by the GeoWaveInputFormat to read data from an Accumulo
* data store.
*
* @param <T>
*            The native type for the reader
*/
public class GeoWaveRecordReader<T> extends
    RecordReader<GeoWaveInputKey, T>
{
  private static final class RangeIndexPair
  {
    private final Range range;
    private final Index index;

    public RangeIndexPair(
        final Range range,
        final Index index ) {
      this.range = range;
      this.index = index;
    }

    @Override
    public int hashCode() {
      final int prime = 31;
      int result = 1;
      result = (prime * result) + ((index == null) ? 0 : index.hashCode());
      result = (prime * result) + ((range == null) ? 0 : range.hashCode());
      return result;
    }

    @Override
    public boolean equals(
        final Object obj ) {
      if (this == obj) {
        return true;
      }
      if (obj == null) {
        return false;
      }
      if (getClass() != obj.getClass()) {
        return false;
      }
      final RangeIndexPair other = (RangeIndexPair) obj;
      if (index == null) {
        if (other.index != null) {
          return false;
        }
      }
      else if (!index.equals(other.index)) {
        return false;
      }
      if (range == null) {
        if (other.range != null) {
          return false;
        }
      }
      else if (!range.equals(other.range)) {
        return false;
      }
      return true;
    }
  }

  protected static class ProgressPerRange
  {
    private final float startProgress;
    private final float deltaProgress;

    public ProgressPerRange(
        final float startProgress,
        final float endProgress ) {
      this.startProgress = startProgress;
      this.deltaProgress = endProgress - startProgress;
    }

    public float getOverallProgress(
        final float rangeProgress ) {
      return startProgress + (rangeProgress * deltaProgress);
    }
  }

  protected static final Logger LOGGER = Logger.getLogger(GeoWaveRecordReader.class);
  protected long numKeysRead;
  protected CloseableIterator<?> iterator;
  protected Key currentAccumuloKey = null;
  protected Map<RangeIndexPair, ProgressPerRange> progressPerRange;
  protected GeoWaveInputKey currentGeoWaveKey = null;
  protected RangeIndexPair currentGeoWaveRangeIndexPair = null;
  protected T currentValue = null;
  protected GeoWaveInputSplit split;

  /**
   * Initialize a scanner over the given input split using this task attempt
   * configuration.
   */
  @Override
  public void initialize(
      final InputSplit inSplit,
      final TaskAttemptContext attempt )
      throws IOException {
    split = (GeoWaveInputSplit) inSplit;

    try {
      final AccumuloOperations operations = GeoWaveInputFormat.getAccumuloOperations(attempt);

      final JobContextAdapterStore adapterStore = GeoWaveInputFormat.getDataAdapterStore(
          attempt,
          operations);
      final DistributableQuery query = GeoWaveInputFormat.getQuery(attempt);
      final String[] additionalAuthorizations = GeoWaveInputFormat.getAuthorizations(attempt);

      numKeysRead = 0;
      final Map<RangeIndexPair, CloseableIterator<?>> iteratorsPerRange = new LinkedHashMap<RangeIndexPair, CloseableIterator<?>>();

      final Set<Index> indices = split.getIndices();
      for (final Index i : indices) {
        final List<Range> ranges = split.getRanges(i);
        List<QueryFilter> queryFilters = null;
        if (query != null) {
          queryFilters = query.createFilters(i.getIndexModel());
        }
        for (final Range r : ranges) {
          iteratorsPerRange.put(
              new RangeIndexPair(
                  r,
                  i),
              new InputFormatAccumuloRangeQuery(
                  adapterStore.getAdapterIds(),
                  i,
                  r,
                  queryFilters,
                  additionalAuthorizations).query(
                  operations,
                  adapterStore,
                  null,
                  true));

        }
      }
      // calculate max cardinality
      final Collection<RangeIndexPair> pairs = iteratorsPerRange.keySet();
      int maxCardinality = 1;
      for (final RangeIndexPair p : pairs) {
        maxCardinality = Math.max(
            maxCardinality,
            GeoWaveInputFormat.getMaxCardinalityFromRange(p.range));
      }
      // calculate big integer sum of all ranges at this cardinality, and
      // keep track of the incremental sum at each range
      BigInteger sum = BigInteger.ZERO;
      final Map<RangeIndexPair, BigInteger> incrementalRangeSums = new LinkedHashMap<RangeIndexPair, BigInteger>();
      for (final RangeIndexPair p : pairs) {
        incrementalRangeSums.put(
            p,
            sum);
        final BigInteger range = GeoWaveInputFormat.getRange(
            p.range,
            maxCardinality);
        sum = sum.add(range);
      }
      // finally we can compute percent progress
      progressPerRange = new LinkedHashMap<RangeIndexPair, ProgressPerRange>();
      RangeIndexPair prevRangeIndex = null;
      float prevProgress = 0f;
      final BigDecimal decimalSum = new BigDecimal(
          sum);
      for (final Entry<RangeIndexPair, BigInteger> entry : incrementalRangeSums.entrySet()) {
        final BigInteger value = entry.getValue();
        final float progress = (float) new BigDecimal(
            value).divide(
            decimalSum,
            RoundingMode.HALF_UP).doubleValue();
        if (prevRangeIndex != null) {
          progressPerRange.put(
              prevRangeIndex,
              new ProgressPerRange(
                  prevProgress,
                  progress));
        }
        prevRangeIndex = entry.getKey();
        prevProgress = progress;
      }
      progressPerRange.put(
          prevRangeIndex,
          new ProgressPerRange(
              prevProgress,
              1f));
      // concatenate iterators
      iterator = new CloseableIteratorWrapper<Object>(
          new Closeable() {
            @Override
            public void close()
                throws IOException {
              for (final CloseableIterator<?> it : iteratorsPerRange.values()) {
                it.close();
              }
            }
          },
          concatenateWithCallback(
              iteratorsPerRange.entrySet().iterator(),
              new NextRangeCallback() {

                @Override
                public void setRange(
                    final RangeIndexPair indexPair ) {
                  currentGeoWaveRangeIndexPair = indexPair;
                }
              }));
    }
    catch (AccumuloException | AccumuloSecurityException e) {
      LOGGER.error(
          "Unable to query accumulo for range input split",
          e);
    }
  }

  @Override
  public void close() {
    if (iterator != null) {
      try {
        iterator.close();
      }
      catch (final IOException e) {
        LOGGER.warn(
            "Unable to close iterator",
            e);
      }
    }
  }

  @Override
  public boolean nextKeyValue()
      throws IOException,
      InterruptedException {
    if (iterator != null) {
      if (iterator.hasNext()) {
        ++numKeysRead;
        final Object value = iterator.next();
        if (value instanceof Entry) {
          final Entry<GeoWaveInputKey, T> entry = (Entry<GeoWaveInputKey, T>) value;
          currentGeoWaveKey = entry.getKey();
          if (currentGeoWaveKey == null) {
            currentAccumuloKey = null;
          }
          else {
            currentAccumuloKey = currentGeoWaveKey.getAccumuloKey();
          }
          currentValue = entry.getValue();
        }
        return true;
      }
    }
    return false;
  }

  @Override
  public float getProgress()
      throws IOException {
    if ((numKeysRead > 0) && (currentAccumuloKey == null)) {
      return 1.0f;
    }
    if (currentGeoWaveRangeIndexPair == null) {
      return 0.0f;
    }
    final ProgressPerRange progress = progressPerRange.get(currentGeoWaveRangeIndexPair);
    if (progress == null) {
      return getProgressForRange(
          currentGeoWaveRangeIndexPair.range,
          currentAccumuloKey);
    }
    return getOverallProgress(
        currentGeoWaveRangeIndexPair.range,
        currentAccumuloKey,
        progress);
  }

  private static float getOverallProgress(
      final Range range,
      final Key currentKey,
      final ProgressPerRange progress ) {
    final float rangeProgress = getProgressForRange(
        range,
        currentKey);
    return progress.getOverallProgress(rangeProgress);
  }

  private static float getProgressForRange(
      final ByteSequence start,
      final ByteSequence end,
      final ByteSequence position ) {
    final int maxDepth = Math.min(
        Math.max(
            end.length(),
            start.length()),
        position.length());
    final BigInteger startBI = new BigInteger(
        GeoWaveInputFormat.extractBytes(
            start,
            maxDepth));
    final BigInteger endBI = new BigInteger(
        GeoWaveInputFormat.extractBytes(
            end,
            maxDepth));
    final BigInteger positionBI = new BigInteger(
        GeoWaveInputFormat.extractBytes(
            position,
            maxDepth));
    return (float) (positionBI.subtract(
        startBI).doubleValue() / endBI.subtract(
        startBI).doubleValue());
  }

  private static float getProgressForRange(
      final Range range,
      final Key currentKey ) {
    if (currentKey == null) {
      return 0f;
    }
    if ((range.getStartKey() != null) && (range.getEndKey() != null)) {
      if (!range.getStartKey().equals(
          range.getEndKey(),
          PartialKey.ROW)) {
        // just look at the row progress
        return getProgressForRange(
            range.getStartKey().getRowData(),
            range.getEndKey().getRowData(),
            currentKey.getRowData());
      }
      else if (!range.getStartKey().equals(
          range.getEndKey(),
          PartialKey.ROW_COLFAM)) {
        // just look at the column family progress
        return getProgressForRange(
            range.getStartKey().getColumnFamilyData(),
            range.getEndKey().getColumnFamilyData(),
            currentKey.getColumnFamilyData());
      }
      else if (!range.getStartKey().equals(
          range.getEndKey(),
          PartialKey.ROW_COLFAM_COLQUAL)) {
        // just look at the column qualifier progress
        return getProgressForRange(
            range.getStartKey().getColumnQualifierData(),
            range.getEndKey().getColumnQualifierData(),
            currentKey.getColumnQualifierData());
      }
    }
    // if we can't figure it out, then claim no progress
    return 0f;
  }

  @Override
  public GeoWaveInputKey getCurrentKey()
      throws IOException,
      InterruptedException {
    return currentGeoWaveKey;
  }

  @Override
  public T getCurrentValue()
      throws IOException,
      InterruptedException {
    return currentValue;
  }

  private static interface NextRangeCallback
  {
    public void setRange(
        RangeIndexPair indexPair );
  }

  /**
   * Mostly guava's concatenate method, but there is a need for a callback
   * between iterators
   */
  private static Iterator<Object> concatenateWithCallback(
      final Iterator<Entry<RangeIndexPair, CloseableIterator<?>>> inputs,
      final NextRangeCallback nextRangeCallback ) {
    Preconditions.checkNotNull(inputs);
    return new Iterator<Object>() {
      Iterator<?> currentIterator = Iterators.emptyIterator();
      Iterator<?> removeFrom;
      RangeIndexPair currentRangeIndex;

      @Override
      public boolean hasNext() {
        boolean currentHasNext;
        while (!(currentHasNext = Preconditions.checkNotNull(
            currentIterator).hasNext()) && inputs.hasNext()) {
          final Entry<RangeIndexPair, CloseableIterator<?>> entry = inputs.next();
          nextRangeCallback.setRange(entry.getKey());
          currentIterator = entry.getValue();
        }
        return currentHasNext;
      }

      @Override
      public Object next() {
        if (!hasNext()) {
          throw new NoSuchElementException();
        }
        removeFrom = currentIterator;
        return currentIterator.next();
      }

      @Override
      public void remove() {
        Preconditions.checkState(
            removeFrom != null,
            "no calls to next() since last call to remove()");
        removeFrom.remove();
        removeFrom = null;
      }
    };
  }
}
TOP

Related Classes of mil.nga.giat.geowave.accumulo.mapreduce.input.GeoWaveRecordReader

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.