Package org.apache.pig

Examples of org.apache.pig.ResourceSchema


        ResourceFieldSchema mapfs = new ResourceFieldSchema();
        mapfs.setType(DataType.MAP);
       
        ResourceFieldSchema tuplefs = getSmallTupleFieldSchema();
       
        ResourceSchema outSchema = new ResourceSchema();
        outSchema.setFields(new ResourceFieldSchema[]{bagfs, stringfs, stringfs, doublefs, floatfs,
                intfs, longfs, mapfs, tuplefs});
        ResourceFieldSchema outfs = new ResourceFieldSchema();
        outfs.setSchema(outSchema);
        outfs.setType(DataType.TUPLE);
       
View Full Code Here


    }

    public static ResourceFieldSchema getFullTupTextDataBagFieldSchema() throws IOException{
        ResourceFieldSchema tuplefs = getSmallBagTextTupleFieldSchema();
       
        ResourceSchema outBagSchema = new ResourceSchema();
        outBagSchema.setFields(new ResourceFieldSchema[]{tuplefs});
        ResourceFieldSchema outBagfs = new ResourceFieldSchema();
        outBagfs.setSchema(outBagSchema);
        outBagfs.setType(DataType.BAG);
       
        return outBagfs;
View Full Code Here

   
    public static ResourceFieldSchema getFloatDataBagFieldSchema(int column) throws IOException {
        ResourceFieldSchema intfs = new ResourceFieldSchema();
        intfs.setType(DataType.INTEGER);
       
        ResourceSchema tupleSchema = new ResourceSchema();
        ResourceFieldSchema[] fss = new ResourceFieldSchema[column];
        for (int i=0;i<column;i++) {
            fss[i] = intfs;
        }
        tupleSchema.setFields(fss);
        ResourceFieldSchema tuplefs = new ResourceFieldSchema();
        tuplefs.setSchema(tupleSchema);
        tuplefs.setType(DataType.TUPLE);
       
        ResourceSchema bagSchema = new ResourceSchema();
        bagSchema.setFields(new ResourceFieldSchema[]{tuplefs});
        ResourceFieldSchema bagfs = new ResourceFieldSchema();
        bagfs.setSchema(bagSchema);
        bagfs.setType(DataType.BAG);
       
        return bagfs;
View Full Code Here

        ResourceFieldSchema doublefs = new ResourceFieldSchema();
        doublefs.setType(DataType.DOUBLE);
        ResourceFieldSchema booleanfs = new ResourceFieldSchema();
        booleanfs.setType(DataType.BOOLEAN);
       
        ResourceSchema tupleSchema = new ResourceSchema();
        tupleSchema.setFields(new ResourceFieldSchema[]{stringfs, longfs, intfs, doublefs, floatfs, stringfs, intfs, doublefs, floatfs, booleanfs});
        ResourceFieldSchema tuplefs = new ResourceFieldSchema();
        tuplefs.setSchema(tupleSchema);
        tuplefs.setType(DataType.TUPLE);
       
        return tuplefs;
View Full Code Here

    }
   
    public void testBytesToComplexTypeMisc() throws IOException, ParseException {
        String s = "(a,b";
        Schema schema = Utils.getSchemaFromString("t:tuple(a:chararray, b:chararray)");
        ResourceFieldSchema rfs = new ResourceSchema(schema).getFields()[0];
        Tuple t = ps.getLoadCaster().bytesToTuple(s.getBytes(), rfs);
        assertTrue(t==null);
       
        s = "{(a,b}";
        schema = Utils.getSchemaFromString("b:bag{t:tuple(a:chararray, b:chararray)}");
        rfs = new ResourceSchema(schema).getFields()[0];
        DataBag b = ps.getLoadCaster().bytesToBag(s.getBytes(), rfs);
        assertTrue(b==null);
       
        s = "{(a,b)";
        schema = Utils.getSchemaFromString("b:bag{t:tuple(a:chararray, b:chararray)}");
        rfs = new ResourceSchema(schema).getFields()[0];
        b = ps.getLoadCaster().bytesToBag(s.getBytes(), rfs);
        assertTrue(b==null);
       
        s = "[ab]";
        Map<String, Object> m = ps.getLoadCaster().bytesToMap(s.getBytes());
        assertTrue(m==null);
       
        s = "[a#b";
        m = ps.getLoadCaster().bytesToMap(s.getBytes());
        assertTrue(m==null);
       
        s = "[a#]";
        m = ps.getLoadCaster().bytesToMap(s.getBytes());
        Map.Entry<String, Object> entry = m.entrySet().iterator().next();
        assertTrue(entry.getKey().equals("a"));
        assertTrue(entry.getValue()==null);
       
        s = "[#]";
        m = ps.getLoadCaster().bytesToMap(s.getBytes());
        assertTrue(m==null);
       
        s = "(a,b)";
        schema = Utils.getSchemaFromString("t:tuple()");
        rfs = new ResourceSchema(schema).getFields()[0];
        t = ps.getLoadCaster().bytesToTuple(s.getBytes(), rfs);
        assertTrue(t.size()==1);
        assertTrue(t.get(0) instanceof DataByteArray);
        assertTrue(t.get(0).toString().equals("a,b"));
       
        s = "[a#(1,2,3)]";
        m = ps.getLoadCaster().bytesToMap(s.getBytes());
        entry = m.entrySet().iterator().next();
        assertTrue(entry.getKey().equals("a"));
        assertTrue(entry.getValue() instanceof DataByteArray);
        assertTrue(entry.getValue().toString().equals("(1,2,3)"));
       
        s = "(a,b,(123,456,{(1,2,3)}))";
        schema = Utils.getSchemaFromString("t:tuple()");
        rfs = new ResourceSchema(schema).getFields()[0];
        t = ps.getLoadCaster().bytesToTuple(s.getBytes(), rfs);
        assertTrue(t.size()==1);
        assertTrue(t.get(0) instanceof DataByteArray);
        assertTrue(t.get(0).toString().equals("a,b,(123,456,{(1,2,3)})"));
       
        s = "(a,b,(123,456,{(1,2,3}))";
        schema = Utils.getSchemaFromString("t:tuple()");
        rfs = new ResourceSchema(schema).getFields()[0];
        t = ps.getLoadCaster().bytesToTuple(s.getBytes(), rfs);
        assertTrue(t==null);
    }
View Full Code Here

    }

    private LogicalSchema getSchemaFromMetaData() throws FrontendException {
        if (getLoadFunc()!=null && getLoadFunc() instanceof LoadMetadata) {
            try {
                ResourceSchema resourceSchema = ((LoadMetadata)loadFunc).getSchema(getFileSpec().getFileName(), new Job(conf));
                Schema oldSchema = Schema.getPigSchema(resourceSchema);
                return Util.translateSchema(oldSchema);
            } catch (IOException e) {
                throw new FrontendException("Cannot get schema from loadFunc " + loadFunc.getClass().getName(), 2245, e);
            }
View Full Code Here

                int errCode = 2104;
                String msg = "Error while determining schema of SequenceFileStorage data.";
                throw new ExecException(msg, errCode, PigException.BUG, e);
            }
        }
        return new ResourceSchema(s);
    }
View Full Code Here

        Which is to say, columns that have metadata will be returned as named tuples, but unknown columns will go into a bag.
        This way, wide rows can still be handled by the bag, but known columns can easily be referenced.
         */

        // top-level schema, no type
        ResourceSchema schema = new ResourceSchema();

        // get default marshallers and validators
        Map<MarshallerType, AbstractType> marshallers = getDefaultMarshallers(cfDef);
        Map<ByteBuffer,AbstractType> validators = getValidatorMap(cfDef);

        // add key
        ResourceFieldSchema keyFieldSchema = new ResourceFieldSchema();
        keyFieldSchema.setName("key");
        keyFieldSchema.setType(getPigType(marshallers.get(MarshallerType.KEY_VALIDATOR)));

        ResourceSchema bagSchema = new ResourceSchema();
        ResourceFieldSchema bagField = new ResourceFieldSchema();
        bagField.setType(DataType.BAG);
        bagField.setName("columns");
        // inside the bag, place one tuple with the default comparator/validator schema
        ResourceSchema bagTupleSchema = new ResourceSchema();
        ResourceFieldSchema bagTupleField = new ResourceFieldSchema();
        bagTupleField.setType(DataType.TUPLE);
        ResourceFieldSchema bagcolSchema = new ResourceFieldSchema();
        ResourceFieldSchema bagvalSchema = new ResourceFieldSchema();
        bagcolSchema.setName("name");
        bagvalSchema.setName("value");
        bagcolSchema.setType(getPigType(marshallers.get(MarshallerType.COMPARATOR)));
        bagvalSchema.setType(getPigType(marshallers.get(MarshallerType.DEFAULT_VALIDATOR)));
        bagTupleSchema.setFields(new ResourceFieldSchema[] { bagcolSchema, bagvalSchema });
        bagTupleField.setSchema(bagTupleSchema);
        bagSchema.setFields(new ResourceFieldSchema[] { bagTupleField });
        bagField.setSchema(bagSchema);

        // will contain all fields for this schema
        List<ResourceFieldSchema> allSchemaFields = new ArrayList<ResourceFieldSchema>();
        // add the key first, then the indexed columns, and finally the bag
        allSchemaFields.add(keyFieldSchema);

        if (!widerows && (cfInfo.compactCqlTable || !cfInfo.cql3Table))
        {
            // defined validators/indexes
            for (ColumnDef cdef : cfDef.column_metadata)
            {
                // make a new tuple for each col/val pair
                ResourceSchema innerTupleSchema = new ResourceSchema();
                ResourceFieldSchema innerTupleField = new ResourceFieldSchema();
                innerTupleField.setType(DataType.TUPLE);
                innerTupleField.setSchema(innerTupleSchema);
                innerTupleField.setName(new String(cdef.getName()));

                ResourceFieldSchema idxColSchema = new ResourceFieldSchema();
                idxColSchema.setName("name");
                idxColSchema.setType(getPigType(marshallers.get(MarshallerType.COMPARATOR)));

                ResourceFieldSchema valSchema = new ResourceFieldSchema();
                AbstractType validator = validators.get(cdef.name);
                if (validator == null)
                    validator = marshallers.get(MarshallerType.DEFAULT_VALIDATOR);
                valSchema.setName("value");
                valSchema.setType(getPigType(validator));

                innerTupleSchema.setFields(new ResourceFieldSchema[] { idxColSchema, valSchema });
                allSchemaFields.add(innerTupleField);
            }  
        }

        // bag at the end for unknown columns
View Full Code Here

            throws IOException {
        Schema schema = new Schema();
        schema.add(new FieldSchema("subject", DataType.CHARARRAY));
        schema.add(new FieldSchema("object", DataType.CHARARRAY));
        schema.add(new FieldSchema("lang", DataType.CHARARRAY));
        return new ResourceSchema(schema);
    }
View Full Code Here

    public ResourceSchema getSchema(String location, Job job)
            throws IOException {
        Schema schema = new Schema();
        schema.add(new FieldSchema("subject", DataType.CHARARRAY));
        schema.add(new FieldSchema("object", DataType.CHARARRAY));
        return new ResourceSchema(schema);
    }
View Full Code Here

TOP

Related Classes of org.apache.pig.ResourceSchema

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.