Package com.google.appengine.tools.mapreduce.inputs

Source Code of com.google.appengine.tools.mapreduce.inputs.DatastoreShardStrategy$DoubleSplitter

package com.google.appengine.tools.mapreduce.inputs;

import static com.google.appengine.api.datastore.Entity.KEY_RESERVED_PROPERTY;
import static com.google.appengine.api.datastore.Entity.SCATTER_RESERVED_PROPERTY;
import static com.google.appengine.api.datastore.FetchOptions.Builder.withLimit;
import static com.google.appengine.api.datastore.Query.CompositeFilterOperator.AND;
import static com.google.appengine.api.datastore.Query.FilterOperator.GREATER_THAN_OR_EQUAL;
import static com.google.appengine.api.datastore.Query.FilterOperator.LESS_THAN;
import static com.google.appengine.api.datastore.Query.FilterOperator.LESS_THAN_OR_EQUAL;
import static com.google.appengine.api.datastore.Query.SortDirection.ASCENDING;
import static com.google.appengine.api.datastore.Query.SortDirection.DESCENDING;
import static com.google.appengine.tools.mapreduce.inputs.BaseDatastoreInput.createQuery;

import com.google.appengine.api.datastore.DatastoreFailureException;
import com.google.appengine.api.datastore.DatastoreService;
import com.google.appengine.api.datastore.DatastoreTimeoutException;
import com.google.appengine.api.datastore.Entity;
import com.google.appengine.api.datastore.EntityNotFoundException;
import com.google.appengine.api.datastore.Key;
import com.google.appengine.api.datastore.PreparedQuery;
import com.google.appengine.api.datastore.Query;
import com.google.appengine.api.datastore.Query.CompositeFilter;
import com.google.appengine.api.datastore.Query.CompositeFilterOperator;
import com.google.appengine.api.datastore.Query.Filter;
import com.google.appengine.api.datastore.Query.FilterPredicate;
import com.google.appengine.api.datastore.Query.SortDirection;
import com.google.appengine.api.datastore.Rating;
import com.google.appengine.tools.cloudstorage.ExceptionHandler;
import com.google.appengine.tools.cloudstorage.RetryHelper;
import com.google.appengine.tools.cloudstorage.RetryParams;
import com.google.appengine.tools.mapreduce.impl.util.SerializationUtil;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableList.Builder;
import com.google.common.collect.ImmutableMap;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.ConcurrentModificationException;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.concurrent.Callable;
import java.util.logging.Logger;

/**
* Splits an arbitrary datastore query. This is used by {@link DatastoreInput} to shard queries.
* This is done in one of two ways:
*
*  1. If the query contains an inequality filter, the lower and upper bounds are determined (this
* may involve querying the datastore) then the range is split naively. This works well when the
* property that is being queried on is uniformly distributed.
*
*  2. If the query does not contain an inequality filter. The query will be partitioned by the
* entity key. This is done by using the "__scatter__" property to get a random sample of the
* keyspace and partitioning based on that. This can result in a poor distribution if there are
* equality filters on the query that bias the selection with respect to certain regions of
* keyspace.
*
*  The following clauses are not supported by this class: An inequality filter of unsupported type.
* (Only numeric and date types are currently supported: {@link "https://developers.google.com/appengine/docs/java/datastore/entities#Java_Properties_and_value_types"}
* )
*
*  Filters that are incompatable with datastore cursors such as: Combining multiple clauses with an
* OR. A filter on a value being NOT_EQUAL. A filter on a value being IN a set. {@link "https://developers.google.com/appengine/docs/java/datastore/queries#Java_Limitations_of_cursors"}
*/
public class DatastoreShardStrategy {

  private static class Range {
    private FilterPredicate lowerBound;
    private FilterPredicate upperBound;

    Range() {}

    Range(FilterPredicate lowerBound, FilterPredicate upperBound) {
      this.lowerBound = lowerBound;
      this.upperBound = upperBound;
      checkPropertyNameMatches();
    }

    private void checkPropertyNameMatches() {
      if (lowerBound != null && upperBound != null
          && !lowerBound.getPropertyName().equals(upperBound.getPropertyName())) {
        throw new IllegalArgumentException("A range must be defined on only one property found: "
            + lowerBound + " and " + upperBound);
      }
    }

    void setLowerBound(FilterPredicate lowerBound) {
      if (this.lowerBound != null) {
        throw new UnsupportedOperationException(
            "Found both: " + this.lowerBound + " and " + lowerBound + " on the same query.");
      }
      this.lowerBound = lowerBound;
      checkPropertyNameMatches();
    }

    void setUpperBound(FilterPredicate upperBound) {
      if (this.upperBound != null) {
        throw new UnsupportedOperationException(
            "Found both: " + this.upperBound + " and " + upperBound + " on the same query.");
      }
      this.upperBound = upperBound;
      checkPropertyNameMatches();
    }

    String getPropertyName() {
      if (lowerBound != null) {
        return lowerBound.getPropertyName();
      }
      if (upperBound != null) {
        return upperBound.getPropertyName();
      }
      return null;
    }

    FilterPredicate getLowerBound() {
      return lowerBound;
    }

    FilterPredicate getUpperBound() {
      return upperBound;
    }
  }

  private interface Splitter<T extends Serializable & Comparable<T>> {
    SortedSet<T> getSplitPoints(Range range, int numSplits);
  }

  private static class DoubleSplitter implements Splitter<Double> {
    @Override
    public SortedSet<Double> getSplitPoints(Range range, int numSplits) {
      return splitRange(((Number) range.getLowerBound().getValue()).doubleValue(),
          ((Number) range.getUpperBound().getValue()).doubleValue(), numSplits);
    }
  }

  private static class LongSplitter implements Splitter<Long> {
    @Override
    public SortedSet<Long> getSplitPoints(Range range, int numSplits) {
      return splitRange(((Number) range.getLowerBound().getValue()).longValue(),
          ((Number) range.getUpperBound().getValue()).longValue(), numSplits);
    }
  }

  private static class DateSplitter implements Splitter<Long> {
    @Override
    public SortedSet<Long> getSplitPoints(Range range, int numSplits) {
      return splitRange(((Date) range.getLowerBound().getValue()).getTime(),
          ((Date) range.getUpperBound().getValue()).getTime(), numSplits);
    }
  }

  private static class RatingSplitter implements Splitter<Long> {
    @Override
    public SortedSet<Long> getSplitPoints(Range range, int numSplits) {
      return splitRange(((Rating) range.getLowerBound().getValue()).getRating(),
          ((Rating) range.getUpperBound().getValue()).getRating(), numSplits);
    }
  }

  private static final Logger logger = Logger.getLogger(DatastoreShardStrategy.class.getName());

  private static final ExceptionHandler EXCEPTION_HANDLER = new ExceptionHandler.Builder().retryOn(
      ConcurrentModificationException.class, DatastoreTimeoutException.class,
      DatastoreFailureException.class).abortOn(EntityNotFoundException.class).build();

  private static final Map<Class<?>, Splitter<?>> typeMap =
      ImmutableMap.<Class<?>, Splitter<?>>builder()
          // Doubles
          .put(Float.class, new DoubleSplitter()).put(Double.class, new DoubleSplitter())
          // Integers
          .put(Byte.class, new LongSplitter())
          .put(Short.class, new LongSplitter())
          .put(Integer.class, new LongSplitter())
          .put(Long.class, new LongSplitter())
          .put(Date.class, new DateSplitter())
          .put(Rating.class, new RatingSplitter())
          .build();

  private final DatastoreService datastore;


  DatastoreShardStrategy(DatastoreService datastoreService) {
    this.datastore = datastoreService;
  }

  /**
   * @return A list of query objects that together cover the same input data as inputQuery.
   * @param query the query to divide into multiple queries
   * @param numSegments the number of queries to divide the inputQuery into. The actual result may
   *        contain fewer if it cannot be divided that finely.
   */
  @SuppressWarnings("deprecation")
  public List<Query> splitQuery(Query query, int numSegments) {
    if (query.getFilterPredicates() != null && !query.getFilterPredicates().isEmpty()) {
      throw new IllegalArgumentException(
          "FilterPredicates are not supported call Query.setFiler() instead.");
    }
    List<Filter> equalityFilters = new ArrayList<>();
    Range range = new Range();
    filterToEqualityListAndRange(query.getFilter(), equalityFilters, range);

    String property = range.getPropertyName();
    List<Range> ranges;
    if (property == null) {
      ranges = getScatterSplitPoints(query.getNamespace(), query.getKind(), numSegments);
    } else {
      if (range.getUpperBound() == null) {
        FilterPredicate predicate = findFirstPredicate(query.getNamespace(), query.getKind(),
            equalityFilters, property, DESCENDING);
        if (predicate == null) {
          return ImmutableList.of(query);
        }
        range.setUpperBound(predicate);
      }
      if (range.getLowerBound() == null) {
        FilterPredicate predicate = findFirstPredicate(query.getNamespace(), query.getKind(),
            equalityFilters, property, ASCENDING);
        if (predicate == null) {
          return ImmutableList.of(query);
        }
        range.setLowerBound(predicate);
      }
      ranges = boundriesToRanges(range, getSplitter(range).getSplitPoints(range, numSegments));
    }
    return toQueries(query, equalityFilters, ranges);
  }

  /**
   * Uses the scatter property to distribute ranges to segments.
   *
   *  A random scatter property is added to 1 out of every 512 entities see:
   * http://code.google.com/p/appengine-mapreduce/wiki/ScatterPropertyImplementation
   *
   *  Retrieving the entities with the highest scatter values provides a random sample of entities.
   * Because they are randomly selected, their distribution in keyspace should be the same as other
   * entities.
   *
   *  Looking at Keyspace, It looks something like this:
   * |---*------*------*---*--------*-----*--------*--| Where "*" is a scatter entity and "-" is
   * some other entity.
   *
   *  So once sample entities are obtained them by key allows them to serve as boundaries between
   * ranges of keyspace.
   */
  private List<Range> getScatterSplitPoints(String namespace, String kind, final int numSegments) {
    Query query = createQuery(namespace, kind).addSort(SCATTER_RESERVED_PROPERTY).setKeysOnly();
    List<Key> splitPoints = sortKeys(runQuery(query, numSegments - 1));
    List<Range> result = new ArrayList<>(splitPoints.size() + 1);
    FilterPredicate lower = null;
    for (Key k : splitPoints) {
      result.add(new Range(lower, new FilterPredicate(KEY_RESERVED_PROPERTY, LESS_THAN, k)));
      lower = new FilterPredicate(KEY_RESERVED_PROPERTY, GREATER_THAN_OR_EQUAL, k);
    }
    result.add(new Range(lower, null));
    logger.info("Requested " + numSegments + " segments, retrieved " + result.size());
    return result;
  }

  private List<Key> sortKeys(List<Entity> entities) {
    List<Key> result = new ArrayList<Key>(entities.size());
    for (Entity e : entities) {
      result.add(e.getKey());
    }
    Collections.sort(result);
    return result;
  }

  private List<Query> toQueries(Query query, List<Filter> equalityFilters, List<Range> split) {
    List<Query> result = new ArrayList<>(split.size());
    for (Range r : split) {
      Query subQuery = SerializationUtil.clone(query);
      Builder<Filter> b = ImmutableList
          .<Filter>builder()
          .addAll(equalityFilters);
      if (r.getLowerBound() != null) {
        b.add(r.getLowerBound());
      }
      if (r.getUpperBound() != null) {
        b.add(r.getUpperBound());
      }
      ImmutableList<Filter> filters = b.build();
      if (!filters.isEmpty()) {
        if (filters.size() > 1) {
          subQuery.setFilter(new CompositeFilter(AND, filters));
        } else {
          subQuery.setFilter(filters.get(0));
        }
      }
      result.add(subQuery);
    }
    return result;
  }

  private void filterToEqualityListAndRange(Filter filter, List<Filter> equality, Range range) {
    if (filter == null) {
      return;
    }
    if (filter instanceof FilterPredicate) {
      FilterPredicate predicate = (FilterPredicate) filter;
      switch (predicate.getOperator()) {
        case EQUAL:
          equality.add(predicate);
          break;
        case GREATER_THAN:
        case GREATER_THAN_OR_EQUAL:
          range.setLowerBound(predicate);
          break;
        case IN:
          throw new UnsupportedOperationException(
              "Queries using an IN filter are unsupported because they do not work with cursors.");
        case LESS_THAN:
        case LESS_THAN_OR_EQUAL:
          range.setUpperBound(predicate);
          break;
        case NOT_EQUAL:
          throw new UnsupportedOperationException("Queries using an NOT_EQUAL filter are"
              + " unsupported because they do not work with cursors.");
        default:
          throw new UnsupportedOperationException("Unsupported query FilterPredicate");
      }
    } else if (filter instanceof CompositeFilter) {
      CompositeFilter composite = (CompositeFilter) filter;
      if (CompositeFilterOperator.AND != composite.getOperator()) {
        throw new UnsupportedOperationException(
            "AND is the only supported CompositeFilterOperator for datastore queries.");
      }
      for (Filter f : composite.getSubFilters()) {
        filterToEqualityListAndRange(f, equality, range);
      }
    } else {
      throw new UnsupportedOperationException(
          "Only FilterPredicate or CompositeFilter are supported for datastore queries.");
    }
  }

  private Splitter<?> getSplitter(Range range) {
    Object value = range.getLowerBound().getValue();
    Object type = value == null ? null : value.getClass();
    Splitter<?> splitter = typeMap.get(type);
    if (splitter == null) {
      throw new IllegalArgumentException("Unsupported value type for inequality filter: " + type);
    }
    return splitter;
  }

  private static <T> List<Range> boundriesToRanges(Range orig, SortedSet<T> boundrySet) {
    List<Range> result = new ArrayList<>();
    List<T> boundries = new ArrayList<>(boundrySet);
    String property = orig.getPropertyName();
    FilterPredicate lower =
        new FilterPredicate(property, orig.getLowerBound().getOperator(), boundries.get(0));
    for (int i = 1; i < boundries.size() - 1; i++) {
      result.add(new Range(lower, new FilterPredicate(property, LESS_THAN, boundries.get(i))));
      lower = new FilterPredicate(property, GREATER_THAN_OR_EQUAL, boundries.get(i));
    }
    result.add(new Range(lower, new FilterPredicate(property, orig.getUpperBound().getOperator(),
        boundries.get(boundries.size() - 1))));
    return result;
  }

  private FilterPredicate findFirstPredicate(String namespace, String kind,
      List<Filter> equalityFilters, String propertyName, SortDirection direction) {
    Query q = createQuery(namespace, kind).addSort(propertyName, direction);
    if (!equalityFilters.isEmpty()) {
      if (equalityFilters.size() == 1) {
        q.setFilter(equalityFilters.get(0));
      } else {
        q.setFilter(new CompositeFilter(AND, equalityFilters));
      }
    }
    List<Entity> item = runQuery(q, 1);
    if (item.isEmpty()) {
      return null;
    }
    return new FilterPredicate(propertyName,
        direction == DESCENDING ? LESS_THAN_OR_EQUAL : GREATER_THAN_OR_EQUAL, item.get(0));
  }

  private List<Entity> runQuery(Query q, final int limit) {
    final PreparedQuery preparedQuery = datastore.prepare(q);
    return RetryHelper.runWithRetries(new Callable<List<Entity>>() {
      @Override
      public List<Entity> call() {
        List<Entity> list = preparedQuery.asList(withLimit(limit));
        list.size(); // Forces the loading of all the data.
        return list;
      }
    },RetryParams.getDefaultInstance(), EXCEPTION_HANDLER);
  }

  @VisibleForTesting
  static SortedSet<Long> splitRange(long start, long end, int numSplits) {
    SortedSet<Long> result = new TreeSet<>();
    result.add(start);
    long range = end - start;
    for (int i = 1; i < numSplits; i++) {
      double fraction = i / (double) numSplits;
      result.add(start + Math.round(range * fraction));
    }
    result.add(end);
    return result;
  }

  @VisibleForTesting
  static SortedSet<Double> splitRange(double start, double end, int numSplits) {
    SortedSet<Double> result = new TreeSet<>();
    result.add(start);
    double range = end - start;
    for (int i = 1; i < numSplits; i++) {
      double fraction = i / (double) numSplits;
      result.add(start + range * fraction);
    }
    result.add(end);
    return result;
  }
}
TOP

Related Classes of com.google.appengine.tools.mapreduce.inputs.DatastoreShardStrategy$DoubleSplitter

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.