Package sounder.pig.points

Source Code of sounder.pig.points.KDTree$KDPointComparator

package sounder.pig.points;

import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashMap;

import org.apache.pig.EvalFunc;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.BagFactory;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.impl.logicalLayer.schema.Schema;
import org.apache.pig.impl.util.Utils;

/**
   Constructs a k-d tree from the passed in databag containing
   points. NOTE: This is intended as a proof-of-concept and is
   unlikely to be production worthy.
*/
public class KDTree extends EvalFunc<DataBag> {
    private static Comparator<KDPoint> comparators[];
    private static final Integer ID_FIELD = 0;
    private static final Integer IS_ROOT_FIELD = 1;
    private static final Integer AXIS_FIELD = 2;
    private static final Integer ABOVE_CHILD_FIELD = 3;
    private static final Integer BELOW_CHILD_FIELD = 4;
    private static final Integer POINT_FIELD = 5;
   
    public DataBag exec(Tuple input) throws IOException {
        if (input == null || input.size() < 1 || input.isNull(0)) { return null; }

        DataBag points = (DataBag)input.get(0);       // {(id, point:(x1,x2,...,xK))}
        KDPoint[] asPoints = toPoints(points);

        return generateTree(asPoints);       
    }

    /**
       Check if the input tuple can make a valid KDPoint object
     */
    private boolean isValidPoint(Tuple t) throws ExecException {
        if (t.isNull(0) || t.isNull(1)) { return false; }
        return true;
    }

    /**
       Construct an array of KDPoint objects from the passed in DataBag
       of tuples
     */
    private KDPoint[] toPoints(DataBag points) throws ExecException {
        KDPoint[] result = new KDPoint[((Long)points.size()).intValue()];
        int idx = 0;
        for (Tuple t : points) {
            if (isValidPoint(t)) {
                result[idx] = new KDPoint(t);
                idx++;
            }
        }
        return result;
    }

    /**
       Recursively generate a k-d tree from the passed in array of points
     */
    private DataBag generateTree(KDPoint[] points) throws ExecException {
        if (points.length == 0) { return null; }

        int maxD = points[0].getDimensionality();
        comparators = new Comparator[maxD];
        for (int i = 0; i < maxD; i++) {
            comparators[i] = new KDPointComparator(i);
        }
        KDPoint root = generate(0, maxD, points, 0, points.length-1);
        root.isRoot = true;
        return root.toBag();
    }

    private KDPoint generate(int d, int maxD, KDPoint[] points, int left, int right) throws ExecException {
        if (right < left) { return null; }
        if (right == left) {
            KDPoint returnPoint = points[left];
            if (returnPoint != null) { returnPoint.setAxis(d); }
            return returnPoint;
        }

        int m = (right-left)/2;
        // Yes, sort every time. Not super efficient
        Arrays.sort(points, left, right+1, comparators[d]);

        KDPoint medianPoint = points[left+m];
        medianPoint.setAxis(d);
       
        if (++d >= maxD) { d = 0; }
       
        medianPoint.setBelowChild(generate(d, maxD, points, left, left+m-1));
  medianPoint.setAboveChild(generate(d, maxD, points, left+m+1, right));
        return medianPoint;
    }

    /**
       Set the appropriate output schema so pig doesn't get confused
     */
    public Schema outputSchema(Schema input) {
        Schema schema = null;
        try {
            schema = Utils.getSchemaFromString("result:bag{t:tuple(id:chararray, is_root:int, axis:int, above_child:chararray, below_child:chararray, point:tuple(lng:double, lat:double))}");
        } catch (Exception e) {
            e.printStackTrace();
        }
        return schema;
    }
   
    /**
       Simple representation of a multi-dimensional point
     */
    class KDPoint {
       
        final String pointId;
        final int dimensionality;
        public boolean isRoot;
        protected String aboveChildId;
        protected String belowChildId;
        protected KDPoint aboveChild; // Above child, right in 2-D case
        protected KDPoint belowChild; // Below child, left in 2-D case
        protected Integer axis; // Splitting axis for this node (0 or 1) in 2-D case
        double values[];

        /**
           Construct a KDPoint from the passed in tuple representation
         */
        public KDPoint(Tuple pointTuple) throws ExecException {
            this.pointId = (String)pointTuple.get(0);
            Tuple point = (Tuple)pointTuple.get(1);
           
            int d = this.dimensionality = point.size();
            values = new double[d];
            for (int i = 0; i < d; i++) {
                values[i] = (Double)point.get(i);
            }
        }

        public KDPoint getAboveChild() { return aboveChild; }
        public String getAboveChildId() { return aboveChildId; }
        public KDPoint getBelowChild() { return belowChild; }
        public String getBelowChildId() { return belowChildId; }
        public Integer getAxis() { return axis; }
        public String getPointId() { return pointId; }
        public int getDimensionality() { return dimensionality; }
        public double getCoordinate(int d) { return values[d]; }

        public void setAboveChild(KDPoint child) {
            this.aboveChild = child;
            if (child != null) { this.aboveChildId = child.getPointId(); }
        }
       
        public void setAboveChildId(String childId) { this.aboveChildId = childId; }
       
        public void setBelowChild(KDPoint child) {
            this.belowChild = child;
            if (child != null) { this.belowChildId = child.getPointId(); }
        }
       
        public void setBelowChildId(String childId) { this.belowChildId = childId; }
       
        public void setAxis(Integer axis) { this.axis = axis; }

        public Tuple toTuple() throws ExecException {
            TupleFactory tfact = TupleFactory.getInstance();
            Tuple result = tfact.newTuple(6);
            Tuple point = tfact.newTuple(dimensionality);

            for (int i = 0; i < dimensionality; i++) {
                point.set(i, values[i]);
            }
           
            result.set(0, pointId);
            result.set(1, (isRoot ? 1 : 0));
            result.set(2, axis);
            result.set(3, aboveChildId);
            result.set(4, belowChildId);
            result.set(5, point);
            return result;
        }

        public DataBag toBag() throws ExecException {
            DataBag result = BagFactory.getInstance().newDefaultBag();
            result.add(toTuple());
            if (aboveChild != null) {
                result.addAll(aboveChild.toBag());
            }

            if (belowChild != null) {
                result.addAll(belowChild.toBag());
            }
            return result;
        }
    }

    /**
       Simple comparator class for sorting KDPoints along a particular dimension
     */
    public class KDPointComparator implements Comparator<KDPoint> {
        public final int d;
        public static final double epsilon = 1E-9;
       
        public KDPointComparator (int d) {
            this.d = d;
  }

        public int compare(KDPoint p1, KDPoint p2) {
            double d1 = p1.getCoordinate(d);
            double d2 = p2.getCoordinate(d);
            if (lesser(d1, d2)) { return -1; }
            if (same(d1, d2)) { return 0; }   
            return +1;
  }

        public double value(double x) {
            if ((x >= 0) && (x <= epsilon)) { return 0.0; }
           
            if ((x < 0) && (-x <= epsilon)) { return 0.0; }
           
            return x;
  }

        public boolean lesser(double x, double y) { return value(x-y) < 0; }

        public boolean same (double d1, double d2) {
            if (Double.isNaN(d1)) { return Double.isNaN(d2); }
           
            if (d1 == d2) { return true; }
           
            if (Double.isInfinite(d1)) { return false; }
           
            return value (d1-d2) == 0;
  }
    }
}
TOP

Related Classes of sounder.pig.points.KDTree$KDPointComparator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.