Package com.google.appengine.tools.mapreduce.inputs

Source Code of com.google.appengine.tools.mapreduce.inputs.BlobstoreInput

// Copyright 2011 Google Inc. All Rights Reserved.
package com.google.appengine.tools.mapreduce.inputs;

import com.google.appengine.api.blobstore.BlobInfoFactory;
import com.google.appengine.api.blobstore.BlobKey;
import com.google.appengine.tools.mapreduce.Input;
import com.google.appengine.tools.mapreduce.InputReader;
import com.google.common.base.Preconditions;

import java.util.ArrayList;
import java.util.List;

/**
* BlobstoreLineInput shards files in Blobstore on separator boundaries.
*
*/
public final class BlobstoreInput extends Input<byte[]> {

  private static final long MIN_SHARD_SIZE = 1024L;
  private static final long serialVersionUID = 2235444204028285444L;

  private final String blobKey;
  private final byte separator;
  private final int shardCount;

  public BlobstoreInput(String blobKey, byte separator, int shardCount) {
    this.blobKey = blobKey;
    this.separator = separator;
    this.shardCount = shardCount;
  }

  @Override
  public List<? extends InputReader<byte[]>> createReaders() {
    long blobSize = new BlobInfoFactory().loadBlobInfo(new BlobKey(blobKey)).getSize();
    return split(blobKey, blobSize, shardCount);
  }

  private List<? extends InputReader<byte[]>> split(String blobKey, long blobSize, int shardCount) {
    Preconditions.checkNotNull(blobKey);
    Preconditions.checkArgument(shardCount > 0);
    Preconditions.checkArgument(blobSize >= 0);

    // Sanity check
    if (shardCount * MIN_SHARD_SIZE > blobSize) {
      shardCount = (int) (blobSize / MIN_SHARD_SIZE) + 1;
    }

    long splitLength = blobSize / shardCount;

    List<BlobstoreInputReader> result = new ArrayList<>();

    long startOffset = 0L;
    for (int i = 1; i < shardCount; i++) {
      long endOffset = i * splitLength;
      result.add(new BlobstoreInputReader(blobKey, startOffset, endOffset, separator));
      startOffset = endOffset;
    }
    result.add(new BlobstoreInputReader(blobKey, startOffset, blobSize, separator));
    return result;
  }
}
TOP

Related Classes of com.google.appengine.tools.mapreduce.inputs.BlobstoreInput

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.