Package com.backtype.cascading.tap

Source Code of com.backtype.cascading.tap.VersionedTap

package com.backtype.cascading.tap;

import java.io.IOException;

import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.RecordReader;

import com.backtype.hadoop.datastores.VersionedStore;
import com.backtype.support.CascadingUtils;
import cascading.flow.FlowProcess;
import cascading.scheme.Scheme;
import cascading.tap.hadoop.Hfs;

public class VersionedTap extends Hfs {
  public static enum TapMode {SOURCE, SINK}

  public Long version = null;

  // a sane default for the number of versions of your data to keep around
  private int versionsToKeep = 3;

  // source-specific
  public TapMode mode;

  // sink-specific
  private String newVersionPath;

  public VersionedTap(String dir, Scheme<JobConf,RecordReader,OutputCollector,?,?> scheme, TapMode mode)
      throws IOException {
    super(scheme, dir);
    this.mode = mode;
  }


  public VersionedTap setVersion(long version) {
    this.version = version;
    return this;
  }

  /**
    * Sets the number of versions of your data to keep. Unneeded versions are cleaned up on creation
    * of a new one. Pass a negative number to keep all versions.
    */
  public VersionedTap setVersionsToKeep(int versionsToKeep) {
    this.versionsToKeep = versionsToKeep;
    return this;
  }

  public int getVersionsToKeep() {
    return this.versionsToKeep;
  }

  public String getOutputDirectory() {
    return getPath().toString();
  }

  public VersionedStore getStore(JobConf conf) throws IOException {
    return new VersionedStore(FileSystem.get(conf), getOutputDirectory());
  }

  public String getSourcePath(JobConf conf) {
    VersionedStore store;
    try {
      store = getStore(conf);
      return (version != null) ? store.versionPath(version) : store.mostRecentVersionPath();
    } catch (IOException e) {
      throw new RuntimeException(e);
    }
  }

  public String getSinkPath(JobConf conf) {
    try {
      VersionedStore store = getStore(conf);
      return version == null ? store.createVersion() : store.createVersion(version);
    } catch (IOException e) {
      throw new RuntimeException(e);
    }
  }

  @Override
  public void sourceConfInit(FlowProcess<JobConf> process, JobConf conf) {
    super.sourceConfInit(process, conf);
    FileInputFormat.setInputPaths(conf, getSourcePath(conf));
  }

  @Override
  public void sinkConfInit(FlowProcess<JobConf> process, JobConf conf) {
    super.sinkConfInit(process, conf);

    if (newVersionPath == null)
      newVersionPath = getSinkPath(conf);

    FileOutputFormat.setOutputPath(conf, new Path(newVersionPath));
  }

  @Override
  public boolean resourceExists(JobConf jc) throws IOException {
    return getStore(jc).mostRecentVersion() != null;
  }

  @Override
  public boolean createResource(JobConf jc) throws IOException {
    throw new UnsupportedOperationException("Not supported yet.");
  }

  @Override
  public boolean deleteResource(JobConf jc) throws IOException {
    throw new UnsupportedOperationException("Not supported yet.");
  }

  @Override
  public String getIdentifier() {
    String outDir = getOutputDirectory();
    String versionString = (version == null) ? "LATEST" : version.toString();
    return "manhattan"
           + ((mode == TapMode.SINK) ? "sink" : "source")
           + ":" + outDir + ":" + versionString;
  }

  @Override
  public boolean commitResource(JobConf conf) throws IOException {
    VersionedStore store = new VersionedStore(FileSystem.get(conf), getOutputDirectory());

    if (newVersionPath != null) {
      store.succeedVersion(newVersionPath);
      CascadingUtils.markSuccessfulOutputDir(new Path(newVersionPath), conf);
      newVersionPath = null;
      store.cleanup(getVersionsToKeep());
    }

    return true;
  }

  @Override
  public boolean rollbackResource(JobConf conf) throws IOException {
    if (newVersionPath != null) {
      getStore(conf).failVersion(newVersionPath);
      newVersionPath = null;
    }

    return true;
  }
}
TOP

Related Classes of com.backtype.cascading.tap.VersionedTap

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.