Package org.apache.pig

Examples of org.apache.pig.ResourceSchema$ResourceFieldSchema


                createSchema("{t:(chararray, chararray)}"))), is(expected));
    }

    private ResourceSchema createSchema(String schema) {
        try {
            return new ResourceSchema(Utils.getSchemaFromString(schema));
        } catch (Exception ex) {
            throw new RuntimeException(ex);
        }
    }
View Full Code Here


                        Object object = pt.getTuple().get(i);
                        if (DataType.isAtomic(type)) {
                            return object.toString();
                        }
                        else if (type == DataType.TUPLE) {
                            ResourceSchema rs = new ResourceSchema();
                            rs.setFields(new ResourceFieldSchema[] { field });
                            PigTuple rpt = new PigTuple(rs);
                            rpt.setTuple((Tuple) object);
                        }
                        else {
                            Assert.isTrue(false, String.format("Unsupported data type [%s] for field [%s]; use only 'primitives' or 'tuples'", DataType.findTypeName(type), getFieldName()));
View Full Code Here

                createSchema("bag: {t:(chararray, chararray)}")))), is(expected));
    }

    private ResourceSchema createSchema(String schema) {
        try {
            return new ResourceSchema(Utils.getSchemaFromString(schema));
        } catch (Exception ex) {
            throw new RuntimeException(ex);
        }
    }
View Full Code Here

            throw new EsHadoopSerializationException("Big integers are not supported by Elasticsearch - consider using a different type (such as string)");
        // DateTime introduced in Pig 12
        case 70: //DataType.BIGDECIMAL
            throw new EsHadoopSerializationException("Big decimals are not supported by Elasticsearch - consider using a different type (such as string)");
        case DataType.MAP:
            ResourceSchema nestedSchema = field.getSchema();

            // empty tuple shortcut
            if (nestedSchema == null) {
                generator.writeBeginObject();
                generator.writeEndObject();
                break;
            }

            ResourceFieldSchema[] nestedFields = nestedSchema.getFields();

            generator.writeBeginObject();
            // Pig maps are actually String -> Object association so we can save the key right away
            for (Map.Entry<?, ?> entry : ((Map<?, ?>) object).entrySet()) {
                generator.writeFieldName(alias.toES(entry.getKey().toString()));
                if (!write(entry.getValue(), nestedFields[0], generator)) {
                    return false;
                }
            }
            generator.writeEndObject();
            break;

        case DataType.TUPLE:
            return writeTuple(object, field, generator, useTupleFieldNames, false);

        case DataType.BAG:
            nestedSchema = field.getSchema();

            // empty tuple shortcut
            if (nestedSchema == null) {
                generator.writeBeginArray();
                generator.writeEndArray();
                break;
            }

            ResourceFieldSchema bagType = nestedSchema.getFields()[0];

            generator.writeBeginArray();
            for (Tuple tuple : (DataBag) object) {
                if (!write(tuple, bagType, generator)) {
                    return false;
View Full Code Here

    private boolean writeRootTuple(Tuple tuple, ResourceFieldSchema field, Generator generator, boolean writeTupleFieldNames) {
        return writeTuple(tuple, field, generator, writeTupleFieldNames, true);
    }

    private boolean writeTuple(Object object, ResourceFieldSchema field, Generator generator, boolean writeTupleFieldNames, boolean isRoot) {
        ResourceSchema nestedSchema = field.getSchema();

        boolean result = true;
        boolean writeAsObject = isRoot || writeTupleFieldNames;

        boolean isEmpty = (nestedSchema == null);

        if (!isEmpty) {
            // check if the tuple contains only empty fields
            boolean allEmpty = true;
            for (ResourceFieldSchema nestedField : nestedSchema.getFields()) {
                allEmpty &= (nestedField.getSchema() == null && PigUtils.isComplexType(nestedField));
            }
            isEmpty = allEmpty;
        }

        // empty tuple shortcut
        if (isEmpty) {
            if (!isRoot) {
                generator.writeBeginArray();
            }
            if (writeAsObject) {
                generator.writeBeginObject();
                generator.writeEndObject();
            }
            if (!isRoot) {
                generator.writeEndArray();
            }
            return result;
        }

        ResourceFieldSchema[] nestedFields = nestedSchema.getFields();

        // use getAll instead of get(int) to avoid having to handle Exception...
        List<Object> tuples = ((Tuple) object).getAll();

        if (!isRoot) {
View Full Code Here

        } else {
            log.warn("Could not find schema file for "+location);
            return null;
        }
        log.debug("Found schema file: "+schemaFile.toString());
        ResourceSchema resourceSchema = null;
        try {
            resourceSchema = new ObjectMapper().readValue(schemaFile.open(), ResourceSchema.class);
        } catch (JsonParseException e) {
            log.warn("Unable to load Resource Schema for "+location);
            e.printStackTrace();
View Full Code Here

            Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass(),
                    new String[] {signature});
            String serializedSchema = p.getProperty(signature+".schema");
            if (serializedSchema == null) return tup;
            try {
                schema = new ResourceSchema(Utils.getSchemaFromString(serializedSchema));
            } catch (ParseException e) {
                mLog.error("Unable to parse serialized schema " + serializedSchema, e);
            }
        }
View Full Code Here

       
        pig.registerQuery("A = LOAD 'originput2' using org.apache.pig.piggybank.storage.PigStorageSchema() " +
        "as (f:int);");
        pig.registerQuery("B = group A by f;");
        Schema origSchema = pig.dumpSchema("B");
        ResourceSchema rs1 = new ResourceSchema(origSchema);
        pig.registerQuery("STORE B into 'bout' using org.apache.pig.piggybank.storage.PigStorageSchema();");
       
        pig.registerQuery("C = LOAD 'bout' using org.apache.pig.piggybank.storage.PigStorageSchema();");
        Schema genSchema = pig.dumpSchema("C");
        ResourceSchema rs2 = new ResourceSchema(genSchema);
        Assert.assertTrue("generated schema equals original" , ResourceSchema.equals(rs1, rs2));
       
        pig.registerQuery("C1 = LOAD 'bout' as (a0:int, A: {t: (f:int) } );");
        pig.registerQuery("D = foreach C1 generate a0, SUM(A);");
View Full Code Here

        pig.registerQuery("A = LOAD 'originput' using org.apache.pig.piggybank.storage.PigStorageSchema(',') " +
        "as (f1:chararray, f2:int);");
        pig.registerQuery("B = group A by f1;");
        Schema origSchema = pig.dumpSchema("B");
        ResourceSchema rs1 = new ResourceSchema(origSchema);
        pig.registerQuery("STORE B into 'cout' using org.apache.pig.piggybank.storage.PigStorageSchema();");
       
        pig.registerQuery("C = LOAD 'cout' using org.apache.pig.piggybank.storage.PigStorageSchema();");
        Schema genSchema = pig.dumpSchema("C");
        ResourceSchema rs2 = new ResourceSchema(genSchema);
        Assert.assertTrue("generated schema equals original" , ResourceSchema.equals(rs1, rs2));
       
        pig.registerQuery("C1 = LOAD 'cout' as (a0:chararray, A: {t: (f1:chararray, f2:int) } );");
        pig.registerQuery("D = foreach C1 generate a0, SUM(A.f2);");
View Full Code Here

        ResourceFieldSchema stringfs = new ResourceFieldSchema();
        stringfs.setType(DataType.CHARARRAY);
        ResourceFieldSchema intfs = new ResourceFieldSchema();
        intfs.setType(DataType.INTEGER);
       
        ResourceSchema tupleSchema = new ResourceSchema();
        tupleSchema.setFields(new ResourceFieldSchema[]{intfs, stringfs});
        ResourceFieldSchema tuplefs = new ResourceFieldSchema();
        tuplefs.setSchema(tupleSchema);
        tuplefs.setType(DataType.TUPLE);
       
        return tuplefs;
View Full Code Here

TOP

Related Classes of org.apache.pig.ResourceSchema$ResourceFieldSchema

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.