Package org.apache.hadoop.tools.rumen.state

Source Code of org.apache.hadoop.tools.rumen.state.StatePool$StatePair

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.tools.rumen.state;

import java.io.DataInput;
import java.io.DataInputStream;
import java.io.DataOutput;
import java.io.DataOutputStream;
import java.io.IOException;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.HashMap;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.tools.rumen.Anonymizer;
import org.apache.hadoop.tools.rumen.datatypes.DataType;
import org.codehaus.jackson.JsonEncoding;
import org.codehaus.jackson.JsonFactory;
import org.codehaus.jackson.JsonGenerator;
import org.codehaus.jackson.JsonParser;
import org.codehaus.jackson.Version;
import org.codehaus.jackson.annotate.JsonIgnore;
import org.codehaus.jackson.map.DeserializationConfig;
import org.codehaus.jackson.map.ObjectMapper;
import org.codehaus.jackson.map.SerializationConfig;
import org.codehaus.jackson.map.module.SimpleModule;

/**
* A pool of states. States used by {@link DataType}'s can be managed the
* {@link StatePool}. {@link StatePool} also supports persistence. Persistence
* is key to share states across multiple {@link Anonymizer} runs.
*/
@SuppressWarnings("unchecked")
public class StatePool {
  private static final long VERSION = 1L;
  private boolean isUpdated = false;
  private boolean isInitialized = false;
  private Configuration conf;
 
  // persistence configuration
  public static final String DIR_CONFIG = "rumen.anonymization.states.dir";
  public static final String RELOAD_CONFIG =
    "rumen.anonymization.states.reload";
  public static final String PERSIST_CONFIG =
    "rumen.anonymization.states.persist";
 
  // internal state management configs
  private static final String COMMIT_STATE_FILENAME = "latest";
  private static final String CURRENT_STATE_FILENAME = "temp";
 
  private String timeStamp;
  private Path persistDirPath;
  private boolean reload;
  private boolean persist;
 
  /**
   * A wrapper class that binds the state implementation to its implementing
   * class name.
   */
  public static class StatePair {
    private String className;
    private State state;
   
    public StatePair(State state) {
      this.className = state.getClass().getName();
      this.state = state;
    }
   
    public String getClassName() {
      return className;
    }
   
    public void setClassName(String className) {
      this.className = className;
    }
   
    public State getState() {
      return state;
    }
   
    public void setState(State state) {
      this.state = state;
    }
  }
 
  /**
   * Identifies to identify and cache {@link State}s.
   */
  private HashMap<String, StatePair> pool = new HashMap<String, StatePair>();
 
  public void addState(Class id, State state) {
    if (pool.containsKey(id.getName())) {
      throw new RuntimeException("State '" + state.getName() + "' added for the"
          + " class " + id.getName() + " already exists!");
    }
    isUpdated = true;
    pool.put(id.getName(), new StatePair(state));
  }
 
  public State getState(Class clazz) {
    return pool.containsKey(clazz.getName())
           ? pool.get(clazz.getName()).getState()
           : null;
  }
 
  // For testing
  @JsonIgnore
  public boolean isUpdated() {
    if (!isUpdated) {
      for (StatePair statePair : pool.values()) {
        // if one of the states have changed, then the pool is dirty
        if (statePair.getState().isUpdated()) {
          isUpdated = true;
          return true;
        }
      }
    }
    return isUpdated;
  }
 
  /**
   * Initialized the {@link StatePool}. This API also reloads the previously
   * persisted state. Note that the {@link StatePool} should be initialized only
   * once.
   */
  public void initialize(Configuration conf) throws Exception {
    if (isInitialized) {
      throw new RuntimeException("StatePool is already initialized!");
    }
   
    this.conf = conf;
    String persistDir = conf.get(DIR_CONFIG);
    reload = conf.getBoolean(RELOAD_CONFIG, false);
    persist = conf.getBoolean(PERSIST_CONFIG, false);
   
    // reload if configured
    if (reload || persist) {
      System.out.println("State Manager initializing. State directory : "
                         + persistDir);
      System.out.println("Reload:" + reload + " Persist:" + persist);
      if (persistDir == null) {
        throw new RuntimeException("No state persist directory configured!"
                                   + " Disable persistence.");
      } else {
        this.persistDirPath = new Path(persistDir);
      }
    } else {
      System.out.println("State Manager disabled.");
    }
   
    // reload
    reload();
   
    // now set the timestamp
    DateFormat formatter =
      new SimpleDateFormat("dd-MMM-yyyy-hh'H'-mm'M'-ss'S'");
    Calendar calendar = Calendar.getInstance();
    calendar.setTimeInMillis(System.currentTimeMillis());
    timeStamp = formatter.format(calendar.getTime());
   
    isInitialized = true;
  }
 
  private void reload() throws Exception {
    if (reload) {
      // Reload persisted entries
      Path stateFilename = new Path(persistDirPath, COMMIT_STATE_FILENAME);
      FileSystem fs = stateFilename.getFileSystem(conf);
      if (fs.exists(stateFilename)) {
        reloadState(stateFilename, conf);
      } else {
        throw new RuntimeException("No latest state persist directory found!"
                                   + " Disable persistence and run.");
      }
    }
  }
 
  private void reloadState(Path stateFile, Configuration conf)
  throws Exception {
    FileSystem fs = stateFile.getFileSystem(conf);
    if (fs.exists(stateFile)) {
      System.out.println("Reading state from " + stateFile.toString());
      FSDataInputStream in = fs.open(stateFile);
     
      read(in);
      in.close();
    } else {
      System.out.println("No state information found for " + stateFile);
    }
  }
 
  private void read(DataInput in) throws IOException {
    ObjectMapper mapper = new ObjectMapper();
    mapper.configure(
        DeserializationConfig.Feature.CAN_OVERRIDE_ACCESS_MODIFIERS, true);
   
    // define a module
    SimpleModule module = new SimpleModule("State Serializer"
        new Version(0, 1, 1, "FINAL"));
    // add the state deserializer
    module.addDeserializer(StatePair.class, new StateDeserializer());

    // register the module with the object-mapper
    mapper.registerModule(module);

    JsonParser parser =
      mapper.getJsonFactory().createJsonParser((DataInputStream)in);
    StatePool statePool = mapper.readValue(parser, StatePool.class);
    this.setStates(statePool.getStates());
    parser.close();
  }
 
  /**
   * Persists the current state to the state directory. The state will be
   * persisted to the 'latest' file in the state directory.
   */
  public void persist() throws IOException {
    if (!persist) {
      return;
    }
    if (isUpdated()) {
      System.out.println("State is updated! Committing.");
      Path currStateFile = new Path(persistDirPath, CURRENT_STATE_FILENAME);
      Path commitStateFile = new Path(persistDirPath, COMMIT_STATE_FILENAME);
      FileSystem fs = currStateFile.getFileSy
TOP

Related Classes of org.apache.hadoop.tools.rumen.state.StatePool$StatePair

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.