Examples of org.apache.hcatalog.data.schema.HCatSchema

org.apache.hcatalog.data.schema.HCatSchema
HCatSchema. This class is NOT thread-safe.

     *                     for the current context
     */
    public static HCatSchema getTableSchema(Configuration conf)
        throws IOException {
        InputJobInfo inputJobInfo = getJobInfo(conf);
        HCatSchema allCols = new HCatSchema(new LinkedList<HCatFieldSchema>());
        for (HCatFieldSchema field :
            inputJobInfo.getTableInfo().getDataColumns().getFields())
            allCols.append(field);
        for (HCatFieldSchema field :
            inputJobInfo.getTableInfo().getPartitionColumns().getFields())
            allCols.append(field);
        return allCols;
    }

View Full Code Here

                    throw new HCatException(ErrorType.ERROR_EXCEED_MAXPART, "total number of partitions is " + parts.size());
                }


                // populate partition info
                for (Partition ptn : parts) {
                    HCatSchema schema = HCatUtil.extractSchema(
                        new org.apache.hadoop.hive.ql.metadata.Partition(table, ptn));
                    PartInfo partInfo = extractPartInfo(schema, ptn.getSd(),
                        ptn.getParameters(), conf, inputJobInfo);
                    partInfo.setPartitionValues(InternalUtil.createPtnKeyValueMap(table, ptn));
                    partInfoList.add(partInfo);
                }


            } else {
                //Non partitioned table
                HCatSchema schema = HCatUtil.extractSchema(table);
                PartInfo partInfo = extractPartInfo(schema, table.getTTable().getSd(),
                    table.getParameters(), conf, inputJobInfo);
                partInfo.setPartitionValues(new HashMap<String, String>());
                partInfoList.add(partInfo);
            }

View Full Code Here

        // We also need to update the output Schema with these deletions.


        // Note that, output storage handlers never sees partition columns in data
        // or schema.


        HCatSchema schemaWithoutParts = new HCatSchema(schema.getFields());
        for (String partKey : partMap.keySet()) {
            Integer idx;
            if ((idx = schema.getPosition(partKey)) != null) {
                posOfPartCols.add(idx);
                schemaWithoutParts.remove(schema.get(partKey));
            }
        }


        // Also, if dynamic partitioning is being used, we want to
        // set appropriate list of columns for the columns to be dynamically specified.
        // These would be partition keys too, so would also need to be removed from
        // output schema and partcols


        if (jobInfo.isDynamicPartitioningUsed()) {
            for (String partKey : jobInfo.getDynamicPartitioningKeys()) {
                Integer idx;
                if ((idx = schema.getPosition(partKey)) != null) {
                    posOfPartCols.add(idx);
                    posOfDynPartCols.add(idx);
                    schemaWithoutParts.remove(schema.get(partKey));
                }
            }
        }


        HCatUtil.validatePartitionSchema(

View Full Code Here

            // else - this means pig's optimizer never invoked the pushProjection
            // method - so we need all fields and hence we should not call the
            // setOutputSchema on HCatInputFormat
            if (HCatUtil.checkJobContextIfRunningFromBackend(job)) {
                try {
                    HCatSchema hcatTableSchema = (HCatSchema) udfProps.get(HCatConstants.HCAT_TABLE_SCHEMA);
                    outputSchema = hcatTableSchema;
                    HCatInputFormat.setOutputSchema(job, outputSchema);
                } catch (Exception e) {
                    throw new IOException(e);
                }

View Full Code Here

            .setBoolean(HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, true);


        Table table = phutil.getTable(location,
            hcatServerUri != null ? hcatServerUri : PigHCatUtil.getHCatServerUri(job),
            PigHCatUtil.getHCatServerPrincipal(job));
        HCatSchema hcatTableSchema = HCatUtil.getTableSchemaWithPtnCols(table);
        try {
            PigHCatUtil.validateHCatTableSchemaFollowsPigRules(hcatTableSchema);
        } catch (IOException e) {
            throw new PigException(
                "Table schema incompatible for reading through HCatLoader :" + e.getMessage()
                    + ";[Table schema was " + hcatTableSchema.toString() + "]"
                , PigHCatUtil.PIG_EXCEPTION_CODE, e);
        }
        storeInUDFContext(signature, HCatConstants.HCAT_TABLE_SCHEMA, hcatTableSchema);
        outputSchema = hcatTableSchema;
        return PigHCatUtil.getResourceSchema(hcatTableSchema);

View Full Code Here

                fieldSchemas.add(getHCatFSFromPigFS(fSchema, hcatFieldSchema));
            } catch (HCatException he) {
                throw new FrontendException(he.getMessage(), PigHCatUtil.PIG_EXCEPTION_CODE, he);
            }
        }
        return new HCatSchema(fieldSchemas);
    }

View Full Code Here

                field = bagSchema.getField(0).schema.getField(0);
            } else {
                field = bagSchema.getField(0);
            }
            arrFields.add(getHCatFSFromPigFS(field, hcatFieldSchema == null ? null : hcatFieldSchema.getArrayElementSchema().get(0)));
            return new HCatFieldSchema(fSchema.alias, Type.ARRAY, new HCatSchema(arrFields), "");


        case DataType.TUPLE:
            List<String> fieldNames = new ArrayList<String>();
            List<HCatFieldSchema> hcatFSs = new ArrayList<HCatFieldSchema>();
            HCatSchema structSubSchema = hcatFieldSchema == null ? null : hcatFieldSchema.getStructSubSchema();
            List<FieldSchema> fields = fSchema.schema.getFields();
            for (int i = 0; i < fields.size(); i++) {
                FieldSchema fieldSchema = fields.get(i);
                fieldNames.add(fieldSchema.alias);
                hcatFSs.add(getHCatFSFromPigFS(fieldSchema, structSubSchema == null ? null : structSubSchema.get(i)));
            }
            return new HCatFieldSchema(fSchema.alias, Type.STRUCT, new HCatSchema(hcatFSs), "");


        case DataType.MAP: {
            // Pig's schema contain no type information about map's keys and
            // values. So, if its a new column assume <string,string> if its existing
            // return whatever is contained in the existing column.


            HCatFieldSchema valFS;
            List<HCatFieldSchema> valFSList = new ArrayList<HCatFieldSchema>(1);


            if (hcatFieldSchema != null) {
                return new HCatFieldSchema(fSchema.alias, Type.MAP, Type.STRING, hcatFieldSchema.getMapValueSchema(), "");
            }


            // Column not found in target table. Its a new column. Its schema is map<string,string>
            valFS = new HCatFieldSchema(fSchema.alias, Type.STRING, "");
            valFSList.add(valFS);
            return new HCatFieldSchema(fSchema.alias, Type.MAP, Type.STRING, new HCatSchema(valFSList), "");
        }


        default:
            throw new FrontendException("Unsupported type: " + type + "  in Pig's schema", PigHCatUtil.PIG_EXCEPTION_CODE);
        }

View Full Code Here


            case STRUCT:
                if (pigObj == null) {
                    return null;
                }
                HCatSchema structSubSchema = hcatFS.getStructSubSchema();
                // Unwrap the tuple.
                List<Object> all = ((Tuple) pigObj).getAll();
                ArrayList<Object> converted = new ArrayList<Object>(all.size());
                for (int i = 0; i < all.size(); i++) {
                    converted.add(getJavaObj(all.get(i), structSubSchema.get(i)));
                }
                return converted;


            case ARRAY:
                if (pigObj == null) {

View Full Code Here

                    // Map values can be primitive or complex
                }
                break;


            case DataType.BAG:
                HCatSchema arrayElementSchema = hcatField == null ? null : hcatField.getArrayElementSchema();
                for (FieldSchema innerField : pigField.schema.getField(0).schema.getFields()) {
                    validateSchema(innerField, getColFromSchema(pigField.alias, arrayElementSchema));
                }
                break;


            case DataType.TUPLE:
                HCatSchema structSubSchema = hcatField == null ? null : hcatField.getStructSubSchema();
                for (FieldSchema innerField : pigField.schema.getFields()) {
                    validateSchema(innerField, getColFromSchema(pigField.alias, structSubSchema));
                }
                break;

View Full Code Here

     * @throws HCatException
     */
    public void testGetSetByType1() throws HCatException {
        HCatRecord inpRec = getHCatRecords()[0];
        HCatRecord newRec = new DefaultHCatRecord(inpRec.size());
        HCatSchema hsch =
                HCatSchemaUtils.getHCatSchema(
                        "a:tinyint,b:smallint,c:int,d:bigint,e:float,f:double,g:boolean,h:string,i:binary,j:string");




        newRec.setByte("a", hsch, inpRec.getByte("a", hsch));

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.hcatalog.data.schema.HCatSchema

org.apache.giraph.io.hcatalog.GiraphHCatInputFormat

org.apache.hcatalog.common.HCatUtil

org.apache.hcatalog.common.TestHCatUtil

org.apache.hcatalog.data.JsonSerDe

org.apache.hcatalog.data.schema.TestHCatSchema

org.apache.hcatalog.data.schema.TestHCatSchemaUtils

org.apache.hcatalog.data.TestDefaultHCatRecord

org.apache.hcatalog.data.TestLazyHCatRecord

org.apache.hcatalog.hbase.HBaseHCatStorageHandler

org.apache.hcatalog.hbase.HBaseRevisionManagerUtil

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.