Source Code of co.cask.cdap.api.dataset.lib.TimeseriesDataset$Entry

/*
 * Copyright © 2014 Cask Data, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */


package co.cask.cdap.api.dataset.lib;


import co.cask.cdap.api.annotation.Property;
import co.cask.cdap.api.common.Bytes;
import co.cask.cdap.api.dataset.DatasetSpecification;
import co.cask.cdap.api.dataset.table.Put;
import co.cask.cdap.api.dataset.table.Row;
import co.cask.cdap.api.dataset.table.Table;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.AbstractIterator;
import com.google.common.collect.Lists;


import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;


/**
 * An abstract class for time series Datasets.
 */
abstract class TimeseriesDataset extends AbstractDataset {


  public static final String ATTR_TIME_INTERVAL_TO_STORE_PER_ROW = "timeIntervalToStorePerRow";


  /**
   * See {@link TimeseriesTable} javadoc for description.
   */
  public static final long DEFAULT_TIME_INTERVAL_PER_ROW = TimeUnit.HOURS.toMillis(1);


  // This is a hard limit on the number of rows to read per read. This is safety-check, not intended to rely on in user
  // code. We need this check in current implementation and this may change when we have readHigherOrEq() mentioned
  // above.
  // That means that max time range to be scanned is
  // rowPartitionIntervalSize * MAX_ROWS_TO_SCAN_PER_READ
  // For 1 min intervals this is ~ 70 days, for 1 hour intervals this is ~11.5 years
  /**
   * Limit on the number of rows to scan per read.
   */
  public static final int MAX_ROWS_TO_SCAN_PER_READ = 100000;


  protected final Table table;


  @Property
  long rowPartitionIntervalSize;


  /**
   * Base constructor that only sets the name of the data set.
   */
  TimeseriesDataset(DatasetSpecification spec, Table table) {
    super(spec.getName(), table);
    this.rowPartitionIntervalSize = spec.getLongProperty(ATTR_TIME_INTERVAL_TO_STORE_PER_ROW,
                                                         DEFAULT_TIME_INTERVAL_PER_ROW);
    this.table = table;
  }


  /**
   * Writes constructed value. This implementation overrides the existing value.
   * This method can be overridden to apply update logic relevant to the subclass (e.g. increment counter).
   *
   * @param row row key to write to
   * @param columnName column name to write to
   * @param value value passed with {@link Entry} into
   */
  void write(byte[] row, byte[] columnName, byte[] value) {
    Put put = new Put(row, columnName, value);
    table.put(put);
  }


  void write(byte[] key, byte[] value, long timestamp, byte[]... tags) {


    // Note: no need to validate entry as long as its fullness enforced by its constructor
    // Please see the class javadoc for details on the stored data format.


    byte[] row = createRow(key, timestamp, rowPartitionIntervalSize);


    // Note: we could move sorting code to Entry, but we didn't as we use same ctor when reading and we
    // don't need to sort during reading (they are already sorted asc according to storage format).
    byte[][] sortedTags = tags.clone();
    sortTags(sortedTags);


    byte[] columnName = createColumnName(timestamp, sortedTags);


    write(row, columnName, value);
  }


  long internalIncrement(byte[] counter, long amount, long timestamp, byte[]... tags) {
    byte[][] sortedTags = tags.clone();
    sortTags(sortedTags);
    byte[] columnName = createColumnName(timestamp, sortedTags);
    byte[] rowName = createRow(counter, timestamp, rowPartitionIntervalSize);
    return table.incrementAndGet(rowName, columnName, amount);
  }


  private int applyLimitOnRowsToRead(long timeIntervalsCount) {
    return (timeIntervalsCount > MAX_ROWS_TO_SCAN_PER_READ) ? MAX_ROWS_TO_SCAN_PER_READ : (int) timeIntervalsCount;
  }


  /**
   * Returns the value that will be used as the actual row key.
   * It has the following format:
   * {@code <key>[<timestamp>/<rowPartitionIntervalSize>]}.
   *
   * @param key a user-provided entry key value
   * @param timestamp is 8-byte encoded long which defines interval timestamp stamp
   * @param rowPartitionIntervalSize the size of time interval for partitioning data into rows. Used for performance
   *                    optimization. Please refer to {@link TimeseriesTable} for more details including how to choose
   *                    this value.
   * @return a composite value used as the row key
   */
  @VisibleForTesting
  static byte[] createRow(byte[] key, long timestamp, long rowPartitionIntervalSize) {
    return Bytes.add(key, Bytes.toBytes(getRowKeyTimestampPart(timestamp, rowPartitionIntervalSize)));
  }


  private static long getRowKeyTimestampPart(final long timestamp, final long rowPartitionIntervalSize) {
    return timestamp / rowPartitionIntervalSize;
  }


  private static void sortTags(byte[][] tags) {
    Arrays.sort(tags, Bytes.BYTES_COMPARATOR);
  }


  /**
   * Returns the value that will be used as the actual column name.
   * Column name has the following format: {@code <timestamp><tags>}. Sorting of tags is needed for
   * efficient filtering based on provided tags during reading
   *
   * @param timestamp is 8-byte encoded long: user-provided entry timestamp.
   * @param tags is an encoded user-provided entry tags list. It is formatted as:
   * {@code [<tag_length><tag_value>]*}, where tag length is the 4-byte encoded int length of the tag and tags
   *             are sorted in ascending order
   */
  @VisibleForTesting
  static byte[] createColumnName(long timestamp, byte[][] tags) {
    // hint: possible perf improvement: we can calculate the columnLength ahead of time and avoid creating many array
    //       objects


    // hint: possible perf improvement: we can actually store just the diff from the timestamp encoded in the row key
    //       and by doing that reduce the footprint of every stored entry
    // hint: consider different column name format: we may want to know "sooner" how many there are tags to make other
    //       parts of the code run faster and avoid creating too many array objects. This may be easily doable as column
    //       name is immutable.
    byte[] columnName = createColumnNameFirstPart(timestamp);
    for (byte[] tag : tags) {
      // hint: possible perf improvement: use compressed int (see Bytes.intToByte()) or at least Bytes.toBytes(short)
      //       which should be well enough
      columnName = Bytes.add(columnName, Bytes.toBytes(tag.length), tag);
    }


    return columnName;
  }


  private static byte[] createColumnNameFirstPart(final long timestamp) {
    return Bytes.toBytes(timestamp);
  }


  @VisibleForTesting
  static long getTimeIntervalsCount(final long startTime, final long endTime,
                                    final long rowPartitionIntervalSize) {
    return (getRowKeyTimestampPart(endTime, rowPartitionIntervalSize) -
      getRowKeyTimestampPart(startTime, rowPartitionIntervalSize) + 1);
  }


  @VisibleForTesting
  static byte[] getRowOfKthInterval(final byte[] key,
                                    final long timeRangeStart,
                                    // zero-based
                                    final int intervalIndex,
                                    final long rowPartitionIntervalSize) {
    return createRow(key, timeRangeStart + intervalIndex * rowPartitionIntervalSize, rowPartitionIntervalSize);
  }


  @VisibleForTesting
  static boolean hasTags(final byte[] columnName) {
    // if columnName only has timestamp, then there's no tags encoded into column name
    return (columnName.length > Bytes.SIZEOF_LONG);
  }


  @VisibleForTesting
  static long parseTimeStamp(final byte[] columnName) {
    return Bytes.toLong(columnName, 0);
  }


  /**
   * Reads entries for a given time range and returns an {@code Iterator<Entry>}. This method is intended to be
   * used by subclasses to define their own public <code>read</code> method.
   * NOTE: A limit is placed on the max number of time intervals to be scanned during a read, as defined by
   * {@link #MAX_ROWS_TO_SCAN_PER_READ}.
   *
   * @param key name of the entry to read
   * @param startTime defines start of the time range to read, inclusive
   * @param endTime defines end of the time range to read, inclusive
   * @param tags defines a set of tags that MUST present in every returned entry.
   *        NOTE: using tags returns entries containing all tags that were providing during writing
   * @return an iterator over entries that satisfy provided conditions
   */
  final Iterator<Entry> readInternal(byte[] key, long startTime, long endTime, byte[]... tags) {
    // validating params
    Preconditions.checkArgument(startTime <= endTime,
                                "Provided time range condition is incorrect: startTime > endTime");


    return new EntryScanner(key, startTime, endTime, tags);
  }


  /**
   * Create Entry. Checking if filter tags are contained in columnName and parsing tags in one pass.
   *
   * @param key key of the entries to read
   * @param value value of the entries
   * @param columnName columnName of the entries integrated timestamp and tags
   * @param tags the tags to filter entries
   * @return an Entry by parsing tags from columnName, if the columnName contains sortedTags. Otherwise, return
   * <code>null</code>
   */
  private Entry createEntry(final byte[] key, final byte[] value, final byte[] columnName, final byte[][] tags) {
    // columnName doesn't contain tags.
    if (!hasTags(columnName)) {
      if (tags == null || tags.length == 0) {
        return new Entry(key, value, parseTimeStamp(columnName));
      }
      return null;
    }


    // columnName contains tags.
    byte[][] sortedTags = null;
    if (tags != null) {
      sortedTags = tags.clone();
      sortTags(sortedTags);
    }


    // Since we know that tags are sorted we can test match in one pass (like in merge sort)
    int curPos = Bytes.SIZEOF_LONG;
    int curTagToCheck = 0;
    List<byte[]> parsedTags = Lists.newArrayList();


    while (curPos < columnName.length - 1) {
      int tagLength = Bytes.toInt(columnName, curPos);
      curPos += Bytes.SIZEOF_INT;
      int tagStartPos = curPos;
      curPos += tagLength;


      // parse tag from columnName
      if (tagLength > columnName.length) {
        return null;
      } else {
        byte[] tag = new byte[tagLength];
        System.arraycopy(columnName, tagStartPos, tag, 0, tagLength);
        parsedTags.add(tag);
      }
      // we need to parse all tags in columnName if no sortedTags is passed. And we need parse the remaining tags
      // in the columnName, after sortedTags are matched.
      if (sortedTags == null || sortedTags.length == 0 || curTagToCheck == sortedTags.length) {
        continue;
      }
      // check tags encoded in columnName against sortedTags.
      // tag is encoded in columnName array from curPos and in length of tagLength.
      int tagsMatch;
      tagsMatch = Bytes.compareTo(columnName, tagStartPos, tagLength,
                                  sortedTags[curTagToCheck], 0, sortedTags[curTagToCheck].length);
      if (tagsMatch == 0) {
        // Tags match, advancing to the next tag to be checked.
        curTagToCheck++;
      } else if (tagsMatch > 0) {
        // Tags do NOT match and fetched tag is bigger than the one we are matching against. Since tags encoded in
        // sorted order this means we will not find this tag we are matching against.
        return null;
      }
      // tagsMatch < 0 means we can advance and check against next tag encoded into the column
    }
    if (sortedTags != null && curTagToCheck < sortedTags.length) {
      // this means we didn't find all required tags in the entry data
      return null;
    }
    return new Entry(key, value, parseTimeStamp(columnName), parsedTags.toArray(new byte[parsedTags.size()][]));
  }


  /**
   * An iterator over entries.
   */
  public final class EntryScanner extends AbstractIterator<Entry> {
    private final byte[] key;
    private final long startTime;
    private final byte[][] tags;


    // the number of rows to fetch
    private final long timeIntervalsCount;


    private final byte[] startColumnName;
    private final byte[] endColumnName;


    // track the number of rows scanned through
    private int rowScanned;


    // use an internal iterator to avoid leaking AbstractIterator methods to outside.
    private Iterator<Map.Entry<byte[], byte[]>>  internalIterator;


    /**
     * Construct an EntryScanner. Should only be called by TimeseriesTable.
     *
     * @param key key of the entries to read
     * @param startTime defines start of the time range to read, inclusive
     * @param endTime defines end of the time range to read, inclusive
     * @param tags defines a set of tags that MUST present in every returned entry.
     *        NOTE: using tags returns entries containing all tags that were providing during writing
     */
    @VisibleForTesting
    EntryScanner(byte[] key, long startTime, long endTime, byte[][] tags) {
      this.key = key;
      this.startTime = startTime;
      this.tags = tags;


      // calculating time intervals (i.e. rows, as one row = one time interval) to fetch.
      long timeIntervals = getTimeIntervalsCount(startTime, endTime, rowPartitionIntervalSize);
      timeIntervalsCount = applyLimitOnRowsToRead(timeIntervals);
      // Note: do NOT use tags when calculating start/stop column keys due to the column name format.
      startColumnName = createColumnNameFirstPart(startTime);
      endColumnName = createColumnNameFirstPart(endTime + 1);
      internalIterator = null;
    }


    @Override
    protected Entry computeNext() {
      while ((internalIterator == null || !internalIterator.hasNext()) && rowScanned < timeIntervalsCount) {
        byte[] row = getRowOfKthInterval(key, startTime, rowScanned, rowPartitionIntervalSize);
        internalIterator = createIterator(row);
        rowScanned++;
      }
      if (rowScanned <= timeIntervalsCount && internalIterator != null && internalIterator.hasNext()) {
        Map.Entry<byte[], byte[]> entry = internalIterator.next();
        Entry returnValue = createEntry(key, entry.getValue(), entry.getKey(), tags);
        if (returnValue == null) {
          return computeNext();
        }
        return returnValue;
      }
      return endOfData();
    }


    private Iterator<Map.Entry<byte[], byte[]>> createIterator(byte[] row) {
      Row currentRow = table.get(row,
                                 // we only need to set left bound on the first row: others cannot have records
                                 // with the timestamp less than startTime
                                 (rowScanned == 0) ? startColumnName : null,
                                 // we only need to set right bound on the last row: others cannot have records
                                 // with the timestamp greater than startTime
                                 (rowScanned == timeIntervalsCount - 1) ? endColumnName : null,
                                 // read all
                                 -1);


      if (!currentRow.isEmpty()) {
        return currentRow.getColumns().entrySet().iterator();
      }


      return null;
    }
  }


  /**
   * Time series DataSet entry.
   */
  static class Entry {
    private byte[] key;
    private byte[] value;
    private long timestamp;
    private byte[][] tags;


    /**
     * Creates instance of the time series entry.
     *
     * @param key key of the entry. E.g. "metric1"
     * @param value value to store
     * @param timestamp timestamp of the entry
     * @param tags optional list of tags associated with the entry
     */
    public Entry(final byte[] key, final byte[] value, final long timestamp, final byte[]... tags) {
      this.key = key;
      this.value = value;
      this.timestamp = timestamp;
      this.tags = tags;
    }


    /**
     * Returns the key of the entry.
     * @return the key of the entry
     */
    public byte[] getKey() {
      return key;
    }


    /**
     * Returns the count value of the entry.
     * @return the count value of the entry
     */
    public byte[] getValue() {
      return value;
    }


    /**
     * Returns the timestamp of the entry.
     * @return the timestamp of the entry
     */
    public long getTimestamp() {
      return timestamp;
    }


    /**
     * Returns the tags associated with the entry.
     * @return the tags associated with the entry
     */
    public byte[][] getTags() {
      return tags;
    }
  }
}
Source Code of co.cask.cdap.api.dataset.lib.TimeseriesDataset$Entry

Related Classes of co.cask.cdap.api.dataset.lib.TimeseriesDataset$Entry