Package com.ikanow.infinit.e.utility.hadoop

Source Code of com.ikanow.infinit.e.utility.hadoop.HadoopPrototypingTool$MyPermissionCollection

/*******************************************************************************
* Copyright 2012 The Infinit.e Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
package com.ikanow.infinit.e.utility.hadoop;

import java.io.FilePermission;
import java.io.IOException;
import java.io.InputStream;
import java.net.SocketPermission;
import java.security.AllPermission;
import java.security.CodeSource;
import java.security.Permission;
import java.security.PermissionCollection;
import java.security.Policy;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Enumeration;
import java.util.Iterator;
import java.util.PropertyPermission;
import java.util.Scanner;

import javax.script.Invocable;
import javax.script.ScriptEngine;
import javax.script.ScriptEngineManager;
import javax.script.ScriptException;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.ReduceContext;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.TaskInputOutputContext;
import org.apache.hadoop.util.ToolRunner;
import org.bson.BSONObject;
import org.bson.BasicBSONObject;
import org.bson.types.BasicBSONList;

import com.ikanow.infinit.e.data_model.custom.InfiniteMongoConfig;
import com.ikanow.infinit.e.data_model.custom.InfiniteMongoConfigUtil;
import com.ikanow.infinit.e.data_model.store.MongoDbUtil;
import com.ikanow.infinit.e.data_model.utils.IkanowSecurityManager;
import com.mongodb.BasicDBList;
import com.mongodb.BasicDBObject;
import com.mongodb.hadoop.io.BSONWritable;
import com.mongodb.hadoop.util.MongoTool;
import com.mongodb.util.JSON;

// This protoptype version is very slow, should run with the actual Rhino script engine,
// and there are a number of unnecessary memory copies for simplicity.
// We'll see if this is noticeably slow (it should really only be run on small datasets anyway)

public class HadoopPrototypingTool extends MongoTool {

    private static final Log log = LogFactory.getLog(HadoopPrototypingTool.class);
   
  public static class JavascriptUtils {
    private ScriptEngineManager _factory = null;
    private ScriptEngine _engine = null;
    private boolean _memoryOptimized = false;
    private TaskInputOutputContext<?, ?, BSONWritable, BSONWritable> _context = null;
    private ReduceContext<BSONWritable, BSONWritable, BSONWritable, BSONWritable> _reduceContext = null;
   
    private IkanowSecurityManager _secManager;   
   
    //////////////////////////////////////////////////////
   
    // (Java side) Interface
   
    protected boolean isInitialized() { return (_engine != null); }
   
    protected boolean isMemoryOptimized() { return _memoryOptimized; }
   
    // (JS side) Interface
   
    public Object clone(Boolean topLevel) { return topLevel ? new BSONWritable() : new BasicDBObject(); }
   
    // "Streaming" output (less efficient, more memory friendly)
   
    public void write(BSONWritable key, BSONWritable value) throws IOException, InterruptedException {
      try {
        _secManager.setSecureFlag(false);
        _context.write(key, value);
      }
      finally {
        _secManager.setSecureFlag(true);       
      }
    }//TESTED
   
    // "Streaming" input (less efficient, more memory friendly)
   
    public boolean hasNext() throws IOException, InterruptedException {
      try {
        _secManager.setSecureFlag(false);
        boolean hasNext = _reduceContext.getValues().iterator().hasNext();
        return hasNext;
      }
      finally {
        _secManager.setSecureFlag(true);       
      }
    }//TESTED
   
    public String next() throws IOException, InterruptedException {
      try {
        _secManager.setSecureFlag(false);
        BSONWritable next = _reduceContext.getValues().iterator().next();
        if (null == next) {
          return null;
        }
        else {
          return JSON.serialize(MongoDbUtil.convert(next));
        }
      }
      finally {
        _secManager.setSecureFlag(true);       
      }
    }//TESTED
   
    //////////////////////////////////////////////////////
   
    protected void setupJavascript(String script,
        TaskInputOutputContext<?, ?, BSONWritable, BSONWritable> context, // for output
        ReduceContext<BSONWritable, BSONWritable, BSONWritable, BSONWritable> reduceContext // for streaming input
        )
    { 
      Policy.setPolicy(new MinimalPolicy());
      _secManager = new IkanowSecurityManager(context.getConfiguration().getBoolean(InfiniteMongoConfigUtil.IS_ADMIN, false));
     
      _context = context;
      _reduceContext = reduceContext;
      if (null == _engine) {
        _factory = new ScriptEngineManager();
        _engine = _factory.getEngineByName("JavaScript")
        try {
          InputStream in1 = HadoopPrototypingTool.class.getResourceAsStream("javascript_utils_part1.js");
          _engine.eval(new Scanner(in1).useDelimiter("\\A").next());
          _engine.eval(script);
          InputStream in2 = HadoopPrototypingTool.class.getResourceAsStream("javascript_utils_part2.js");
          _engine.eval(new Scanner(in2).useDelimiter("\\A").next());
         
          // We've loaded the script so now we can check for memory-optimization mode
          try {
            Object memOptimizationMode = _engine.get("_memoryOptimization");
            if (null != memOptimizationMode) {
              if (memOptimizationMode instanceof Boolean) {
                _memoryOptimized = (Boolean)memOptimizationMode;
              }
              else {
                _memoryOptimized = Boolean.parseBoolean(memOptimizationMode.toString());
              }
            }//TESTED
           
            if (_memoryOptimized) {
              // (do it this way because in the future might want to separate these 2 out)
              _engine.put("_inContext", this);
              _engine.put("_outContext", this);
              setupOutput();
            }
          }
          catch (Exception e) {} // we don't care about errors for this clause specifically
         
          // Now overwrite it to ensure it's always present
          _engine.put("_memoryOptimization", _memoryOptimized);
         
        }
        catch (ScriptException e) {
          throw new RuntimeException("setupJavascript: " + e.getMessage(), e);
        }
       
      }
    }   
   
    protected void setupOutput() {
      JavascriptUtils objFactory = this; // (clone creates BSONWritables)
      BasicBSONList listFactory = new BasicBSONList();
      _engine.put("objFactory", objFactory);
      _engine.put("listFactory", listFactory);
    }
   
    protected BasicBSONList generateOutput()
    {     
      setupOutput();
      BasicBSONList outList = new BasicBSONList();
      _engine.put("outList", outList);     
     
      try {
        _engine.eval("s1(_emit_list); ");
      } catch (ScriptException e) {
        throw new RuntimeException("generateOutput: " + e.getMessage());
      }
      return outList;     
    }
   
    //////////////////////////////////////////////////////
   
    protected void map(Object key, BSONObject value)
    {
      _engine.put("_map_input_key", key.toString());
      String valueStr = value.toString(); // (these BSON objects are actually DBObjects, hence have a sensible toString())
      _engine.put("_map_input_value", valueStr);
      try {
        _secManager.setSecureFlag(true);
        ((Invocable) _engine).invokeFunction("internal_mapper", (Object[])null);
      }
      catch (Exception e) {
        Object internalError = _engine.get("_internalError");
        if ((internalError instanceof Boolean) && ((Boolean)internalError)) {
          log.warn("Internal error on doc: " + key);
        }
        else {
          throw new RuntimeException("map1: " + e.getMessage(), e);
        }
      }
      finally {
        _secManager.setSecureFlag(false);       
      }
    }
    protected void combine(BSONWritable key, Iterable<BSONWritable> values)
    {
      _engine.put("_combine_input_key", JSON.serialize(new BasicBSONObject(key.toMap())));
      if (!this.isMemoryOptimized()) {
       
        ArrayList<BasicBSONObject> tmpValues = new ArrayList<BasicBSONObject>();
        for (BSONWritable val: values) {
          tmpValues.add(new BasicBSONObject(val.toMap()));
        }
        _engine.put("_combine_input_values", JSON.serialize(tmpValues));
      }
      try {
        _secManager.setSecureFlag(true);
        ((Invocable) _engine).invokeFunction("internal_combiner", (Object[])null);
      }
      catch (ScriptException e) {
        if ((null != e.getMessage()) && e.getMessage().contains("ReferenceError: \"combine\" is not defined")) {
          _secManager.setSecureFlag(false); // (set again in the reducer)
          reduce(key, values);
        }
        else {
          throw new RuntimeException("combine: " + e.getMessage(), e);
        }
      }
      catch (NoSuchMethodException e) {
        _secManager.setSecureFlag(false); // (set again in the reducer)
        reduce(key, values);
      }
      finally {
        _secManager.setSecureFlag(false);       
      }
    }
    protected void reduce(BSONWritable key, Iterable<BSONWritable> values)
    {
      _engine.put("_reduce_input_key", JSON.serialize(new BasicBSONObject(key.toMap())));
      if (!this.isMemoryOptimized()) {
        ArrayList<BasicBSONObject> tmpValues = new ArrayList<BasicBSONObject>();
        for (BSONWritable val: values) {
          tmpValues.add(new BasicBSONObject(val.toMap()));
        }
        _engine.put("_reduce_input_values", JSON.serialize(tmpValues));
      }
      try {
        _secManager.setSecureFlag(true);
        ((Invocable) _engine).invokeFunction("internal_reducer", (Object[])null);
      }
      catch (ScriptException e) {
        throw new RuntimeException("reduce: " + e.getMessage(), e);
      }
      catch (NoSuchMethodException e) {
        throw new RuntimeException("reduce: " + e.getMessage(), e);
      }     
      finally {
        _secManager.setSecureFlag(false);       
      }
    }
  }
 
  public static class JavascriptMapper extends Mapper<Object, BSONObject, BSONWritable, BSONWritable> {
    private JavascriptUtils _javascript = new JavascriptUtils();

    @Override
    public void setup(Context context) {
      _javascript.setupJavascript(context.getConfiguration().get("arguments"), context, null);
      _javascript._engine.put("_query", context.getConfiguration().get("mongo.input.query"));
     
      // Set up cache if one is specified
      InfiniteMongoConfig config = new InfiniteMongoConfig(context.getConfiguration());
      BasicDBList caches = config.getCacheList();
      if ((null != caches) && !caches.isEmpty()) {       
        try {
          CacheUtils.addJSONCachesToEngine(caches, _javascript._engine, _javascript._secManager, (config.getLimit() > 0));
        } catch (Exception e) {
          throw new RuntimeException("Error setting up caches: " + caches);
        }
      }
    }
   
    @Override
    public void map( Object key, BSONObject value, Context context ) throws IOException, InterruptedException
    {
      _javascript.map(key, value);
     
      if (!_javascript.isMemoryOptimized()) {
        BasicBSONList out = _javascript.generateOutput();
        for (Object bson: out) {
          BSONWritable bsonObj = (BSONWritable) bson;
          BSONWritable outkey = (BSONWritable) bsonObj.get("key");
          BSONWritable outval = (BSONWritable) bsonObj.get("val");
          if (null == outkey) {
            throw new IOException("Map: Can't output a null key from " + value.get("_id"));         
          }
          if (null == outval) {         
            throw new IOException("Map: Can't output a null value, key: " + MongoDbUtil.convert(outkey) + " from " + value.get("_id"));
          }
          context.write(outkey, outval);
        }
      }
    }
  }
 
  public static class JavascriptCombiner extends Reducer<BSONWritable, BSONWritable, BSONWritable, BSONWritable>
  {
    private JavascriptUtils _javascript = new JavascriptUtils();
   
    @Override
    public void setup(Context context) {
      _javascript.setupJavascript(context.getConfiguration().get("arguments"), context, context);
      _javascript._engine.put("_query", context.getConfiguration().get("mongo.input.query"));

      // Set up cache if one is specified
      InfiniteMongoConfig config = new InfiniteMongoConfig(context.getConfiguration());
      BasicDBList caches = config.getCacheList();
      if ((null != caches) && !caches.isEmpty()) {       
        try {
          CacheUtils.addJSONCachesToEngine(caches, _javascript._engine, _javascript._secManager, (config.getLimit() > 0));
        } catch (Exception e) {
          throw new RuntimeException("Error setting up caches: " + caches);
        }
      }
    }
   
    public void reduce( BSONWritable key, Iterable<BSONWritable> values, Context context )
    throws IOException, InterruptedException
    {
      _javascript.combine(key, values);

      if (!_javascript.isMemoryOptimized()) {
        BasicBSONList out = _javascript.generateOutput();
        for (Object bson: out) {
          BSONWritable bsonObj = (BSONWritable) bson;
          BSONWritable outkey = (BSONWritable) bsonObj.get("key");
          BSONWritable outval = (BSONWritable) bsonObj.get("val");
          if (null == outkey) {
            throw new IOException("Combine: Can't output a null key from " + key);         
          }
          if (null == outval) {         
            throw new IOException("Combine: Can't output a null value, key: " + MongoDbUtil.convert(outkey) + " from " + key);
          }
          context.write(outkey, outval);
        }
      }
    }
  }

  public static class JavascriptReducer extends Reducer<BSONWritable, BSONWritable, BSONWritable, BSONWritable>
  {
    private JavascriptUtils _javascript = new JavascriptUtils();
   
    @Override
    public void setup(Context context) {
      _javascript.setupJavascript(context.getConfiguration().get("arguments"), context, context);
      _javascript._engine.put("_query", context.getConfiguration().get("mongo.input.query"));

      // Set up cache if one is specified
      InfiniteMongoConfig config = new InfiniteMongoConfig(context.getConfiguration());
      BasicDBList caches = config.getCacheList();
      if ((null != caches) && !caches.isEmpty()) {       
        try {
          CacheUtils.addJSONCachesToEngine(caches, _javascript._engine, _javascript._secManager, (config.getLimit() > 0));
        } catch (Exception e) {
          throw new RuntimeException("Error setting up caches: " + caches);
        }
      }
    }
   
    public void reduce( BSONWritable key, Iterable<BSONWritable> values, Context context )
    throws IOException, InterruptedException
    {
      _javascript.reduce(key, values);
     
      if (!_javascript.isMemoryOptimized()) {
        BasicBSONList out = _javascript.generateOutput();
        for (Object bson: out) {
          BSONWritable bsonObj = (BSONWritable) bson;
          BSONWritable outkey = (BSONWritable) bsonObj.get("key");
          BSONWritable outval = (BSONWritable) bsonObj.get("val");
          if (null == outkey) {
            throw new IOException("Reduce: Can't output a null key from " + key);         
          }
          if (null == outval) {         
            throw new IOException("Reduce: Can't output a null value, key: " + MongoDbUtil.convert(outkey) + " from " + key);
          }
          context.write(outkey, outval);
        }
      }
    }
  }
    public static void main( String[] args ) throws Exception{
        final int exitCode = ToolRunner.run( new HadoopPrototypingTool(), args );
        System.exit( exitCode );
    }
   
  //////////////////////////////////////////////////////

    // Specify security policy:
   
    public static class MinimalPolicy extends Policy {

        private static PermissionCollection perms;

        public MinimalPolicy() {
            super();
            if (perms == null) {
                perms = new MyPermissionCollection();
                addPermissions();
            }
        }

        @Override
        public PermissionCollection getPermissions(CodeSource codesource) {
            return perms;
        }

        private void addPermissions() {
            SocketPermission socketPermission = new SocketPermission("*:1024-", "connect, resolve");
            PropertyPermission propertyPermission = new PropertyPermission("*", "read, write");
            FilePermission filePermission = new FilePermission("<<ALL FILES>>", "read");
            AllPermission allPermission = new AllPermission();

            perms.add(socketPermission);
            perms.add(propertyPermission);
            perms.add(filePermission);
            perms.add(allPermission);
        }

    }
    public static class MyPermissionCollection extends PermissionCollection {

        private static final long serialVersionUID = 614300921365729272L;

        ArrayList<Permission> perms = new ArrayList<Permission>();

        public void add(Permission p) {
            perms.add(p);
        }

        public boolean implies(Permission p) {
            for (Iterator<Permission> i = perms.iterator(); i.hasNext();) {
                if (((Permission) i.next()).implies(p)) {
                    return true;
                }
            }
            return false;
        }

        public Enumeration<Permission> elements() {
            return Collections.enumeration(perms);
        }

        public boolean isReadOnly() {
            return false;
        }

    }
}
TOP

Related Classes of com.ikanow.infinit.e.utility.hadoop.HadoopPrototypingTool$MyPermissionCollection

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.