Package org.apache.pig.data

Examples of org.apache.pig.data.DataBag


    BasicTable.Writer writer1 = new BasicTable.Writer(path, conf);
    int part = 0;
    TableInserter inserter = writer1.getInserter("part" + part, true);
    TypesUtils.resetTuple(tuple);
    DataBag bag1 = TypesUtils.createBag();
    Schema schColl = schema.getColumn(0).getSchema().getColumn(0).getSchema();
    Tuple tupColl1 = TypesUtils.createTuple(schColl);
    Tuple tupColl2 = TypesUtils.createTuple(schColl);

    DataBag bag2 = TypesUtils.createBag();
    Schema schColl2 = schema.getColumn(1).getSchema().getColumn(0).getSchema();
    Tuple tupColl2_1 = TypesUtils.createTuple(schColl2);
    Tuple tupColl2_2 = TypesUtils.createTuple(schColl2);
    Tuple collRecord1;
    try {
      collRecord1 = TypesUtils.createTuple(new Schema("f1:int, f2:string"));
    } catch (ParseException e) {
      e.printStackTrace();
      throw new IOException(e);
    }
    Tuple collRecord2;
    try {
      collRecord2 = TypesUtils.createTuple(new Schema("f1:int, f2:string"));
    } catch (ParseException e) {
      e.printStackTrace();
      throw new IOException(e);
    }

    // c3:collection(c3_1:collection(e:int,f:bool))
    DataBag bag3 = TypesUtils.createBag();
    DataBag bag3_1 = TypesUtils.createBag();
    DataBag bag3_2 = TypesUtils.createBag();

    Tuple tupColl3_1 = null;
    try {
      tupColl3_1 = TypesUtils.createTuple(new Schema("e:int,f:bool"));
    } catch (ParseException e) {
      e.printStackTrace();
      throw new IOException(e);
    }
    Tuple tupColl3_2;
    try {
      tupColl3_2 = TypesUtils.createTuple(new Schema("e:int,f:bool"));
    } catch (ParseException e) {
      e.printStackTrace();
      throw new IOException(e);
    }

    Tuple tupColl3_3 = null;
    try {
      tupColl3_3 = TypesUtils.createTuple(new Schema("e:int,f:bool"));
    } catch (ParseException e) {
      e.printStackTrace();
      throw new IOException(e);
    }
    Tuple tupColl3_4;
    try {
      tupColl3_4 = TypesUtils.createTuple(new Schema("e:int,f:bool"));
    } catch (ParseException e) {
      e.printStackTrace();
      throw new IOException(e);
    }

    byte[] abs1 = new byte[3];
    byte[] abs2 = new byte[4];
    tupColl1.set(0, 3.1415926);
    tupColl1.set(1, 1.6);
    abs1[0] = 11;
    abs1[1] = 12;
    abs1[2] = 13;
    tupColl1.set(2, new DataByteArray(abs1));
    bag1.add(tupColl1);
    tupColl2.set(0, 123.456789);
    tupColl2.set(1, 100);
    abs2[0] = 21;
    abs2[1] = 22;
    abs2[2] = 23;
    abs2[3] = 24;
    tupColl2.set(2, new DataByteArray(abs2));
    bag1.add(tupColl2);
    tuple.set(0, bag1);

    collRecord1.set(0, 1);
    collRecord1.set(1, "record1_string1");
    tupColl2_1.set(0, collRecord1);
    tupColl2_1.set(1, "hello1");
    bag2.add(tupColl2_1);

    collRecord2.set(0, 2);
    collRecord2.set(1, "record2_string1");
    tupColl2_2.set(0, collRecord2);
    tupColl2_2.set(1, "hello2");
    bag2.add(tupColl2_2);
    tuple.set(1, bag2);

    TypesUtils.resetTuple(tupColl3_1);
    TypesUtils.resetTuple(tupColl3_2);
    tupColl3_1.set(0, 1);
    tupColl3_1.set(1, true);
    tupColl3_2.set(0, 2);
    tupColl3_2.set(1, false);
    bag3_1.add(tupColl3_1);
    bag3_1.add(tupColl3_2);
    bag3.addAll(bag3_1);

    tupColl3_3.set(0, 3);
    tupColl3_3.set(1, true);
    tupColl3_4.set(0, 4);
    tupColl3_4.set(1, false);
    bag3_2.add(tupColl3_3);
    bag3_2.add(tupColl3_4);
    bag3.addAll(bag3_2);
    tuple.set(2, bag3);

    int row = 0;
    inserter.insert(new BytesWritable(String.format("k%d%d", part + 1, row + 1).getBytes()), tuple);

    row++;

    bag1.clear();
    bag2.clear();
    bag3.clear();
    bag3_1.clear();
    bag3_2.clear();
    TypesUtils.resetTuple(tupColl1);
    TypesUtils.resetTuple(tupColl2);
    TypesUtils.resetTuple(tupColl2_1);
    TypesUtils.resetTuple(tupColl2_2);
    TypesUtils.resetTuple(collRecord1);
    TypesUtils.resetTuple(collRecord2);
    TypesUtils.resetTuple(tupColl3_1);
    TypesUtils.resetTuple(tupColl3_2);
    TypesUtils.resetTuple(tupColl3_3);
    TypesUtils.resetTuple(tupColl3_4);

    tupColl1.set(0, 7654.321);
    tupColl1.set(1, 0.0001);
    abs1[0] = 31;
    abs1[1] = 32;
    abs1[2] = 33;
    tupColl1.set(2, new DataByteArray(abs1));
    bag1.add(tupColl1);
    tupColl2.set(0, 0.123456789);
    tupColl2.set(1, 0.3333);
    abs2[0] = 41;
    abs2[1] = 42;
    abs2[2] = 43;
    abs2[3] = 44;
    tupColl2.set(2, new DataByteArray(abs2));
    bag1.add(tupColl2);
    tuple.set(0, bag1);

    collRecord1.set(0, 3);
    collRecord1.set(1, "record1_string2");
    tupColl2_1.set(0, collRecord1);
    tupColl2_1.set(1, "hello1_2");
    bag2.add(tupColl2_1);

    collRecord2.set(0, 4);
    collRecord2.set(1, "record2_string2");
    tupColl2_2.set(0, collRecord2);
    tupColl2_2.set(1, "hello2_2");
    bag2.add(tupColl2_2);
    tuple.set(1, bag2);

    tupColl3_1.set(0, 5);
    tupColl3_1.set(1, true);
    tupColl3_2.set(0, 6);
    tupColl3_2.set(1, false);
    bag3_1.add(tupColl3_1);
    bag3_1.add(tupColl3_2);
    bag3.addAll(bag3_1);

    tupColl3_3.set(0, 7);
    tupColl3_3.set(1, true);
    tupColl3_4.set(0, 8);
    tupColl3_4.set(1, false);
    bag3_2.add(tupColl3_3);
    bag3_2.add(tupColl3_4);
    bag3.addAll(bag3_2);
    tuple.set(2, bag3);

    inserter.insert(new BytesWritable(String.format("k%d%d", part + 1, row + 1)
        .getBytes()), tuple);
View Full Code Here


        //Construct key
        Object key = t.get(1);
       
    // Construct an output bag and feed in the tuples
    DataBag opBag = mBagFactory.newDefaultBag();

    //Put the index, key, and value
    //in a tuple and return
    Pair <Integer, Integer> indexes = reducerMap.get(key)// first -> min, second ->max
 
    // For non skewed keys, we set the partition index to be -1
    if (indexes == null) {
      indexes = new Pair <Integer, Integer>(-1,0);
    }

    for (Integer reducerIdx=indexes.first, cnt=0; cnt <= indexes.second; reducerIdx++, cnt++) {
      if (reducerIdx >= totalReducers) {
        reducerIdx = 0;
      }
      Tuple opTuple = mTupleFactory.newTuple(4);
      opTuple.set(0, t.get(0));
      // set the partition index
      opTuple.set(1, reducerIdx.intValue());
      opTuple.set(2, key);
      opTuple.set(3, t.get(2));
     
      opBag.add(opTuple);
    }
   
    return opBag;
    }
View Full Code Here

        Tuple output = mTupleFactory.newTuple(2);
        output.set(0, key);
        // put the value in a bag so that the initial
        // version of the Algebraics will get a bag as
        // they would expect.
        DataBag bg = new SingleTupleBag(value);
        output.set(1, bg);
        return output;
    }
View Full Code Here

    public Map<String, Object> exec(Tuple in) throws IOException {
        Map<String, Object> output = new HashMap<String, Object>();
        if(in==null || in.size()==0)
            return null;
        Integer numQuantiles = null;
        DataBag samples = null;
        ArrayList<Tuple> quantilesList = new ArrayList<Tuple>();
        InternalMap weightedParts = new InternalMap();
        // the sample file has a tuple as under:
        // (numQuantiles, bag of samples)
        // numQuantiles here is the reduce parallelism
        try{
            numQuantiles = (Integer)in.get(0);
            samples = (DataBag)in.get(1);
           
            long numSamples = samples.size();
            long toSkip = numSamples / numQuantiles;
            if(toSkip == 0) {
                // numSamples is < numQuantiles;
                // set numQuantiles to numSamples
                numQuantiles = (int)numSamples;
View Full Code Here

    BasicTable.Writer writer1 = new BasicTable.Writer(path, conf);
    int part = 0;
    TableInserter inserter = writer1.getInserter("part" + part, true);
    TypesUtils.resetTuple(tuple);
    DataBag bagColl = TypesUtils.createBag();
    Schema schColl = schema.getColumn(0).getSchema().getColumn(0).getSchema();
    Tuple tupColl1 = TypesUtils.createTuple(schColl);
    Tuple tupColl2 = TypesUtils.createTuple(schColl);
    byte[] abs1 = new byte[3];
    byte[] abs2 = new byte[4];
    tupColl1.set(0, 3.1415926);
    tupColl1.set(1, 1.6);
    abs1[0] = 11;
    abs1[1] = 12;
    abs1[2] = 13;
    tupColl1.set(2, new DataByteArray(abs1));
    bagColl.add(tupColl1);
    tupColl2.set(0, 123.456789);
    tupColl2.set(1, 100);
    abs2[0] = 21;
    abs2[1] = 22;
    abs2[2] = 23;
    abs2[3] = 24;
    tupColl2.set(2, new DataByteArray(abs2));
    bagColl.add(tupColl2);
    tuple.set(0, bagColl);

    int row = 0;
    inserter.insert(new BytesWritable(String.format("k%d%d", part + 1, row + 1)
        .getBytes()), tuple);

    bagColl.clear();
    TypesUtils.resetTuple(tupColl1);
    TypesUtils.resetTuple(tupColl2);
    tupColl1.set(0, 7654.321);
    tupColl1.set(1, 0.0001);
    abs1[0] = 31;
    abs1[1] = 32;
    abs1[2] = 33;
    tupColl1.set(2, new DataByteArray(abs1));
    bagColl.add(tupColl1);
    tupColl2.set(0, 0.123456789);
    tupColl2.set(1, 0.3333);
    abs2[0] = 41;
    abs2[1] = 42;
    abs2[2] = 43;
    abs2[3] = 44;
    tupColl2.set(2, new DataByteArray(abs2));
    bagColl.add(tupColl2);
    tuple.set(0, bagColl);
    inserter.insert(new BytesWritable(String.format("k%d%d", part + 1, row + 1)
        .getBytes()), tuple);

    inserter.close();
View Full Code Here

    map.put("a", "x");
    map.put("b", "y");
    map.put("c", "z");
    tuple.set(2, map);

    DataBag bagColl = TypesUtils.createBag();
    Schema schColl = schema.getColumn(3).getSchema().getColumn(0).getSchema();
    Tuple tupColl1 = TypesUtils.createTuple(schColl);
    Tuple tupColl2 = TypesUtils.createTuple(schColl);
    byte[] abs1 = new byte[3];
    byte[] abs2 = new byte[4];
    tupColl1.set(0, 3.1415926);
    tupColl1.set(1, 1.6);
    abs1[0] = 11;
    abs1[1] = 12;
    abs1[2] = 13;
    tupColl1.set(2, new DataByteArray(abs1));
    bagColl.add(tupColl1);
    tupColl2.set(0, 123.456789);
    tupColl2.set(1, 100);
    abs2[0] = 21;
    abs2[1] = 22;
    abs2[2] = 23;
    abs2[3] = 24;
    tupColl2.set(2, new DataByteArray(abs2));
    bagColl.add(tupColl2);
    tuple.set(3, bagColl);

    int row = 0;
    inserter.insert(new BytesWritable(String.format("k%d%d", part + 1, row + 1)
        .getBytes()), tuple);
View Full Code Here

    map.put("a", "x");
    map.put("b", "y");
    map.put("c", "z");
    tuple.set(2, map);

    DataBag bagColl = TypesUtils.createBag();
    Schema schColl = schema.getColumn(3).getSchema().getColumn(0).getSchema();
    Tuple tupColl1 = TypesUtils.createTuple(schColl);
    Tuple tupColl2 = TypesUtils.createTuple(schColl);
    byte[] abs1 = new byte[3];
    byte[] abs2 = new byte[4];
    tupColl1.set(0, 3.1415926);
    tupColl1.set(1, 1.6);
    abs1[0] = 11;
    abs1[1] = 12;
    abs1[2] = 13;
    tupColl1.set(2, new DataByteArray(abs1));
    bagColl.add(tupColl1);
    tupColl2.set(0, 123.456789);
    tupColl2.set(1, 100);
    abs2[0] = 21;
    abs2[1] = 22;
    abs2[2] = 23;
    abs2[3] = 24;
    tupColl2.set(2, new DataByteArray(abs2));
    bagColl.add(tupColl2);
    tuple.set(3, bagColl);

    int row = 0;
    inserter.insert(new BytesWritable(String.format("k%d%d", part + 1, row + 1)
        .getBytes()), tuple);
View Full Code Here

    map.put("a", "x");
    map.put("b", "y");
    map.put("c", "z");
    tuple.set(2, map);

    DataBag bagColl = TypesUtils.createBag();
    Schema schColl = schema.getColumn(3).getSchema().getColumn(0).getSchema();
    Tuple tupColl1 = TypesUtils.createTuple(schColl);
    Tuple tupColl2 = TypesUtils.createTuple(schColl);
    byte[] abs1 = new byte[3];
    byte[] abs2 = new byte[4];
    tupColl1.set(0, 3.1415926);
    tupColl1.set(1, 1.6);
    abs1[0] = 11;
    abs1[1] = 12;
    abs1[2] = 13;
    tupColl1.set(2, new DataByteArray(abs1));
    bagColl.add(tupColl1);
    tupColl2.set(0, 123.456789);
    tupColl2.set(1, 100);
    abs2[0] = 21;
    abs2[1] = 22;
    abs2[2] = 23;
    abs2[3] = 24;
    tupColl2.set(2, new DataByteArray(abs2));
    bagColl.add(tupColl2);
    tuple.set(3, bagColl);

    int row = 0;
    inserter.insert(new BytesWritable(String.format("k%d%d", part + 1, row + 1)
        .getBytes()), tuple);
View Full Code Here

        return res;
    }

    public Result getNext(DataBag db) throws ExecException {
        Result ret = null;
        DataBag tmpBag = BagFactory.getInstance().newDefaultBag();
        for(ret = getNext(dummyTuple);ret.returnStatus!=POStatus.STATUS_EOP;ret=getNext(dummyTuple)){
            if(ret.returnStatus == POStatus.STATUS_ERR) return ret;
            tmpBag.add((Tuple)ret.result);
        }
        ret.result = tmpBag;
        ret.returnStatus = (tmpBag.size() == 0)? POStatus.STATUS_EOP : POStatus.STATUS_OK;
        return ret;
    }
View Full Code Here

    public DataBag exec(Tuple input) throws IOException {
        try{
            numInputs = (Integer)input.get(0);
            myNumber = (Integer)input.get(1);
       
            DataBag output = mBagFactory.newDefaultBag();
           
            numGroupsPerInput = (int) Math.ceil(Math.pow(DEFAULT_PARALLELISM, 1.0/numInputs));
            int numGroupsGoingTo = (int) Math.pow(numGroupsPerInput,numInputs - 1);
               
            int[] digits = new int[numInputs];
            for (int i=0; i<numInputs; i++){
                if (i == myNumber){
                    Random r = new Random(System.currentTimeMillis());
                    digits[i] = r.nextInt(numGroupsPerInput);
                }else{
                    digits[i] = 0;
                }
            }
               
            for (int i=0; i<numGroupsGoingTo; i++){
                output.add(toTuple(digits));
                next(digits);
            }           
   
            return output;
        }catch(ExecException e){
View Full Code Here

TOP

Related Classes of org.apache.pig.data.DataBag

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.