Package com.oreilly.springdata.hadoop.streaming

Source Code of com.oreilly.springdata.hadoop.streaming.AbstractHdfsWriter

package com.oreilly.springdata.hadoop.streaming;

import java.util.Collection;
import java.util.concurrent.atomic.AtomicLong;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.springframework.data.hadoop.fs.FsShell;


/**
* Logic for writing to files of a specified size or other strategies go here as they are shared across
* implementations
*
*/
public abstract class AbstractHdfsWriter implements HdfsWriter {
 
  //TODO need to initialize the counter based on directory contents.
  private final AtomicLong counter = new AtomicLong(0L);
 
  private final AtomicLong bytesWritten = new AtomicLong(0L);
 
  private volatile boolean initialized;
 
  private String baseFilename = HdfsTextFileWriterFactory.DEFAULT_BASE_FILENAME;
  private String basePath = HdfsTextFileWriterFactory.DEFAULT_BASE_PATH;
  private String fileSuffix = HdfsTextFileWriterFactory.DEFAULT_FILE_SUFFIX;
  private long rolloverThresholdInBytes = HdfsTextFileWriterFactory.DEFAULT_ROLLOVER_THRESHOLD_IN_BYTES;

 
  public abstract FileSystem getFileSystem();
 
  protected void initializeCounterIfNecessary() {
    if (!initialized) {
      FsShell fsShell = new FsShell(getFileSystem().getConf(), getFileSystem());
      int maxCounter = 0;
      boolean foundFile = false;
      Collection<FileStatus> fileStats = fsShell.ls(this.getBasePath());
      for (FileStatus fileStatus : fileStats) {
        String shortName = fileStatus.getPath().getName();
        int counterFromName = getCounterFromName(shortName);
        if (counterFromName != -1) {
          foundFile = true;
        }
        if (counterFromName > maxCounter) {
          maxCounter = counterFromName;
        }
      }
      if (foundFile) {
        this.setCounter(maxCounter+1);       
      }
     
      initialized = true;
    }
  }


  protected int getCounterFromName(String shortName) {
    Pattern pattern = Pattern.compile("([\\d+]{1,})");
    Matcher matcher = pattern.matcher(shortName);
    if (matcher.find()) {
      return Integer.parseInt(matcher.group());
    }
    return -1;     
  }
 
  public long getRolloverThresholdInBytes() {
    return rolloverThresholdInBytes;
  }

  public void setRolloverThresholdInBytes(long rolloverThresholdInBytes) {
    this.rolloverThresholdInBytes = rolloverThresholdInBytes;
  }


  public String getFileSuffix() {
    return fileSuffix;
  }

  public void setFileSuffix(String fileSuffix) {
    this.fileSuffix = fileSuffix;
  }

 
  public String getBaseFilename() {
    return baseFilename;
  }

  public void setBaseFilename(String baseFilename) {
    this.baseFilename = baseFilename;
  }

  public String getBasePath() {
    return basePath;
  }

  public void setBasePath(String basePath) {
    this.basePath = basePath;
  }

  public long getCounter() {
    return counter.get();
  }
 
  public void setCounter(long value) {
    counter.set(value);
  }
 
  public void incrementCounter() {
    counter.incrementAndGet();
  }
 
  public void incrementBytesWritten(long bytesWritten) {
    this.bytesWritten.addAndGet(bytesWritten);
  }
 
  public void resetBytesWritten() {
    this.bytesWritten.set(0L);
  }
 
  public long getBytesWritten() {
    return bytesWritten.get();
  }
 
  public String getFileName() {
    //TODO configure file suffix
    return basePath + baseFilename + "-" + getCounter() + "." + fileSuffix;
  }
 
}
TOP

Related Classes of com.oreilly.springdata.hadoop.streaming.AbstractHdfsWriter

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.