Source Code of com.esri.hadoop.examples.MapperClass

package com.esri.hadoop.examples;
import java.io.IOException;


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;


import com.esri.core.geometry.Envelope;
import com.esri.core.geometry.Envelope2D;
import com.esri.core.geometry.GeometryEngine;
import com.esri.core.geometry.Point;
import com.esri.core.geometry.QuadTree;
import com.esri.core.geometry.QuadTree.QuadTreeIterator;
import com.esri.core.geometry.SpatialReference;
import com.esri.json.EsriFeatureClass;




public class MapperClass extends Mapper<LongWritable, Text, Text, IntWritable> {
  
  // column indices for values in the CSV
  int longitudeIndex;
  int latitudeIndex;
  


  // in ca_counties.json, the label for the polygon is "NAME"
  String labelAttribute;
  
  EsriFeatureClass featureClass;
  SpatialReference spatialReference;
  QuadTree quadTree;
  QuadTreeIterator quadTreeIter;
  
  private void buildQuadTree(){
    quadTree = new QuadTree(new Envelope2D(-180, -90, 180, 90), 8);
    
    Envelope envelope = new Envelope();
    for (int i=0;i<featureClass.features.length;i++){
      featureClass.features[i].geometry.queryEnvelope(envelope);
      quadTree.insert(i, new Envelope2D(envelope.getXMin(), envelope.getYMin(), envelope.getXMax(), envelope.getYMax()));
    }
    
    quadTreeIter = quadTree.getIterator();
  }
  
  /**
   * Query the quadtree for the feature containing the given point
   * 
   * @param pt point as longitude, latitude
   * @return index to feature in featureClass or -1 if not found
   */
  private int queryQuadTree(Point pt)
  {
    // reset iterator to the quadrant envelope that contains the point passed
    quadTreeIter.resetIterator(pt, 0);
    
    int elmHandle = quadTreeIter.next();
    
    while (elmHandle >= 0){
      int featureIndex = quadTree.getElement(elmHandle);
      
      // we know the point and this feature are in the same quadrant, but we need to make sure the feature
      // actually contains the point
      if (GeometryEngine.contains(featureClass.features[featureIndex].geometry, pt, spatialReference)){
        return featureIndex;
      }
      
      elmHandle = quadTreeIter.next();
    }
    
    // feature not found
    return -1;
  }
  
  
  /**
   * Sets up mapper with filter geometry provided as argument[0] to the jar
   */
  @Override
  public void setup(Context context)
  {
    Configuration config = context.getConfiguration();
    
    spatialReference = SpatialReference.create(4326);


    // first pull values from the configuration    
    String featuresPath = config.get("sample.features.input");
    labelAttribute = config.get("sample.features.keyattribute", "NAME");
    latitudeIndex = config.getInt("samples.csvdata.columns.lat", 1);
    longitudeIndex = config.getInt("samples.csvdata.columns.long", 2);
    
    FSDataInputStream iStream = null;
    
    spatialReference = SpatialReference.create(4326);
    
    try {
      // load the JSON file provided as argument 0
      FileSystem hdfs = FileSystem.get(config);
      iStream = hdfs.open(new Path(featuresPath));
      featureClass = EsriFeatureClass.fromJson(iStream);
    } 
    catch (Exception e)
    {
      e.printStackTrace();
    } 
    finally
    {
      if (iStream != null)
      {
        try {
          iStream.close();
        } catch (IOException e) { }
      }
    }
    
    // build a quadtree of our features for fast queries
    if (featureClass != null){
      buildQuadTree();
    }
  }
  
  @Override
  public void map(LongWritable key, Text val, Context context)
      throws IOException, InterruptedException {
    
    /* 
     * The TextInputFormat we set in the configuration, by default, splits a text file line by line.
     * The key is the byte offset to the first character in the line.  The value is the text of the line.
     */
    
    String line = val.toString();
    String [] values = line.split(",");
    
    // Note: We know the data coming in is clean, but in practice it's best not to
    //       assume clean data.  This is especially true with big data processing
    float latitude = Float.parseFloat(values[latitudeIndex]);
    float longitude = Float.parseFloat(values[longitudeIndex]);
    
    // Create our Point directly from longitude and latitude
    Point point = new Point(longitude, latitude);
    
    // Each map only processes one earthquake record at a time, so we start out with our count 
    // as 1.  Aggregation will occur in the combine/reduce stages
    IntWritable one = new IntWritable(1);
    
    int featureIndex = queryQuadTree(point);
    
    if (featureIndex >= 0){
      String name = (String)featureClass.features[featureIndex].attributes.get(labelAttribute);
      
      if (name == null) 
        name = "???";
      
      context.write(new Text(name), one);
    } else {
      context.write(new Text("*Outside Feature Set"), one);
    }
  }
}
Source Code of com.esri.hadoop.examples.MapperClass

Related Classes of com.esri.hadoop.examples.MapperClass