Package com.atlantbh.hadoop.s3.io

Source Code of com.atlantbh.hadoop.s3.io.S3BucketReader

package com.atlantbh.hadoop.s3.io;

import java.io.IOException;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.amazonaws.auth.PropertiesCredentials;
import com.amazonaws.services.s3.AmazonS3Client;
import com.amazonaws.services.s3.model.GetObjectRequest;
import com.amazonaws.services.s3.model.ListObjectsRequest;
import com.amazonaws.services.s3.model.ObjectListing;
import com.amazonaws.services.s3.model.S3Object;
import com.amazonaws.services.s3.model.S3ObjectSummary;

/**
* Class used for reading the objects from Amazon S3. Amazon S3 credentials are read from
* "AwsCredentials.properties" file in jar
*
* @author seljaz
*
*/
public class S3BucketReader {
  Logger LOG = LoggerFactory.getLogger(S3BucketReader.class);

  AmazonS3Client s3Client;

  ListObjectsRequest listObjectsRequest;
  ObjectListing objectListing;

  GetObjectRequest getObjectRequest;

  int currentPosition;
  S3ObjectSummary currentObject = null;
  /**
   * Initializes S3 bucket reader for reading the keys from S3 bucket, having same prefix and staring with
   * key equals to marker
   * @param bucketName S3 bucket name
   * @param keyPrefix prefix of the keys
   * @param marker first key to read from
   * @param maxKeys maximal number of keys to retrieve from S3 in single call
   * @throws IOException
   */
  public S3BucketReader(String bucketName, String keyPrefix, String marker, int maxKeys) throws IOException {
    listObjectsRequest = new ListObjectsRequest(bucketName, keyPrefix, marker, "", maxKeys);
    s3Client = new AmazonS3Client(new PropertiesCredentials(
        S3RecordReader.class.getResourceAsStream("/AwsCredentials.properties")));
  }

  /**
   * Reads next key from S3
   * @return {@link S3ObjectSummary} of the key or <code>null</code> if there are no more keys to read
   */
  public S3ObjectSummary getNextKey() {
    if (objectListing == null) {
      LOG.debug("Listing objects");
      objectListing = s3Client.listObjects(listObjectsRequest);
    }

    // we have more elements in list
    if (currentPosition < objectListing.getObjectSummaries().size()) {
      currentObject = objectListing.getObjectSummaries().get(currentPosition++);
    } else if (objectListing.isTruncated()) {
      LOG.debug("Current batch reached its end. Fetching next page.");
      // when we're at the end, check if there's more data to read
      listObjectsRequest.setMarker(objectListing.getNextMarker());
      LOG.debug("New marker is set to {}", listObjectsRequest.getMarker());

      objectListing = s3Client.listObjects(listObjectsRequest);
      currentPosition = 0;

      if (currentPosition < objectListing.getObjectSummaries().size()) {
        currentObject = objectListing.getObjectSummaries().get(currentPosition++);
      } else {
        LOG.debug("No more objects to read");
        currentObject = null;
      }
    } else {
      currentObject = null;
    }

    return currentObject;
  }

  /**
   * Get S3 object using object summary
   *
   * @param objectSummary {@link S3ObjectSummary} for key to retrieve
   * @return {@link S3Object} read from S3
   */
  public S3Object getObject(S3ObjectSummary objectSummary) {
    getObjectRequest = new GetObjectRequest(objectSummary.getBucketName(), objectSummary.getKey());
    return s3Client.getObject(getObjectRequest);
  }

  /**
   * List S3 objects from bucket with same prefix
   *
   * @param bucketName S3 bucket name
   * @param keyPrefix prefix of the keys
   * @param maxKeys maximal number of keys to return
   * @return S3 object listing
   */
  public ObjectListing listObjects(String bucketName, String keyPrefix, int maxKeys) {

    ListObjectsRequest request = new ListObjectsRequest(bucketName, keyPrefix, null, null, maxKeys);
    return s3Client.listObjects(request);
  }

  /**
   * Get next batch of S3 objects
   *
   * @param objectListing instance of {@link ObjectListing} used in previous listObject call
   * @return the next set of S3 keys
   */
  public ObjectListing listObjects(ObjectListing objectListing) {

    return s3Client.listNextBatchOfObjects(objectListing);
  }
}
TOP

Related Classes of com.atlantbh.hadoop.s3.io.S3BucketReader

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.