Package org.apache.pig.impl.logicalLayer.schema

Examples of org.apache.pig.impl.logicalLayer.schema.Schema


                              "8", "9"});
       
        pig.registerQuery("A = LOAD 'originput2' using org.apache.pig.piggybank.storage.PigStorageSchema() " +
        "as (f:int);");
        pig.registerQuery("B = group A by f;");
        Schema origSchema = pig.dumpSchema("B");
        ResourceSchema rs1 = new ResourceSchema(origSchema);
        pig.registerQuery("STORE B into 'bout' using org.apache.pig.piggybank.storage.PigStorageSchema();");
       
        pig.registerQuery("C = LOAD 'bout' using org.apache.pig.piggybank.storage.PigStorageSchema();");
        Schema genSchema = pig.dumpSchema("C");
        ResourceSchema rs2 = new ResourceSchema(genSchema);
        Assert.assertTrue("generated schema equals original" , ResourceSchema.equals(rs1, rs2));
       
        pig.registerQuery("C1 = LOAD 'bout' as (a0:int, A: {t: (f:int) } );");
        pig.registerQuery("D = foreach C1 generate a0, SUM(A);");
View Full Code Here


    public void testSchemaConversion2() throws Exception {  
        pig.registerQuery("A = LOAD 'originput' using org.apache.pig.piggybank.storage.PigStorageSchema(',') " +
        "as (f1:chararray, f2:int);");
        pig.registerQuery("B = group A by f1;");
        Schema origSchema = pig.dumpSchema("B");
        ResourceSchema rs1 = new ResourceSchema(origSchema);
        pig.registerQuery("STORE B into 'cout' using org.apache.pig.piggybank.storage.PigStorageSchema();");
       
        pig.registerQuery("C = LOAD 'cout' using org.apache.pig.piggybank.storage.PigStorageSchema();");
        Schema genSchema = pig.dumpSchema("C");
        ResourceSchema rs2 = new ResourceSchema(genSchema);
        Assert.assertTrue("generated schema equals original" , ResourceSchema.equals(rs1, rs2));
       
        pig.registerQuery("C1 = LOAD 'cout' as (a0:chararray, A: {t: (f1:chararray, f2:int) } );");
        pig.registerQuery("D = foreach C1 generate a0, SUM(A.f2);");
View Full Code Here

       
        //TEST TOBAG
        TOBAG tb = new TOBAG();

        //test output schema of udf
        Schema expectedSch =
            Schema.generateNestedSchema(DataType.BAG, DataType.INTEGER);

        //check schema of TOBAG when given input tuple having only integers
        Schema inputSch = new Schema();
        inputSch.add(new FieldSchema(null, DataType.INTEGER));
        assertEquals("schema of tobag when input has only ints",
                expectedSch, tb.outputSchema(inputSch));

        //add another int column
        inputSch.add(new FieldSchema(null, DataType.INTEGER));
        assertEquals("schema of tobag when input has only ints",
                expectedSch, tb.outputSchema(inputSch));

        //add a long column
        inputSch.add(new FieldSchema(null, DataType.LONG));
        //expect null inner schema
        expectedSch =
            Schema.generateNestedSchema(DataType.BAG, DataType.NULL);
        assertEquals("schema of tobag when input has ints and long",
                expectedSch, tb.outputSchema(inputSch));

       
        //test schema when input is a tuple with inner schema
        Schema tupInSchema = new Schema(new FieldSchema("x", DataType.CHARARRAY));
        inputSch = new Schema();
        inputSch.add(new FieldSchema("a", tupInSchema, DataType.TUPLE));
        Schema inputSchCp = new Schema(inputSch);
        inputSchCp.getField(0).alias = null;
        expectedSch = new Schema(new FieldSchema(null, inputSchCp, DataType.BAG));
        assertEquals("schema of tobag when input has cols of type tuple ",
                expectedSch, tb.outputSchema(inputSch));
       
        inputSch.add(new FieldSchema("b", tupInSchema, DataType.TUPLE));
        assertEquals("schema of tobag when input has cols of type tuple ",
                expectedSch, tb.outputSchema(inputSch));
       
        //add a column of type tuple with different inner schema
        tupInSchema = new Schema(new FieldSchema("x", DataType.BYTEARRAY));
        inputSch.add(new FieldSchema("c", tupInSchema, DataType.TUPLE));
        //expect null inner schema
        expectedSch =
            Schema.generateNestedSchema(DataType.BAG, DataType.NULL);
        assertEquals("schema of tobag when input has cols of type tuple with diff inner schema",
View Full Code Here

  }
 
  public static Schema toPigSchema(
      org.apache.hadoop.zebra.schema.Schema tschema)
      throws FrontendException {
    Schema ret = new Schema();
    for (String col : tschema.getColumns()) {
      org.apache.hadoop.zebra.schema.Schema.ColumnSchema columnSchema =
        tschema.getColumn(col);
      if (columnSchema != null) {
        ColumnType ct = columnSchema.getType();
        if (ct == org.apache.hadoop.zebra.schema.ColumnType.RECORD ||
            ct == org.apache.hadoop.zebra.schema.ColumnType.COLLECTION)
          ret.add(new FieldSchema(col, toPigSchema(columnSchema.getSchema()), ct.pigDataType()));
        else
          ret.add(new FieldSchema(col, ct.pigDataType()));
      } else {
        ret.add(new FieldSchema(null, null));
      }
    }
    return ret;
  }
View Full Code Here

class TableOutputFormat implements OutputFormat<BytesWritable, Tuple> {
  @Override
  public void checkOutputSpecs(FileSystem ignored, JobConf job) throws IOException {
    StoreConfig storeConfig = MapRedUtil.getStoreConfig(job);
    String location = storeConfig.getLocation(), schemaStr;
    Schema schema = storeConfig.getSchema();
    org.apache.pig.SortInfo pigSortInfo = storeConfig.getSortInfo();

    /* TODO
     * use a home-brewn comparator ??
     */
    String comparator = null;
    String sortColumnNames = null;
    if (pigSortInfo != null)
    {
      List<org.apache.pig.SortColInfo> sortColumns = pigSortInfo.getSortColInfoList();
      StringBuilder sb = new StringBuilder();
      if (sortColumns != null && sortColumns.size() >0)
      {
        org.apache.pig.SortColInfo sortColumn;
        String sortColumnName;
        boolean descending = false;
        for (int i = 0; i < sortColumns.size(); i++)
        {
          sortColumn = sortColumns.get(i);
          sortColumnName = sortColumn.getColName();
          if (sortColumnName == null)
            throw new IOException("Zebra does not support column positional reference yet");
          if (sortColumn.getSortOrder() == Order.DESCENDING)
          {
            Log LOG = LogFactory.getLog(TableLoader.class);
            LOG.warn("Sorting in descending order is not supported by Zebra and the table will be unsorted.");
            descending = true;
            break;
          }
          if (!org.apache.pig.data.DataType.isAtomic(schema.getField(sortColumnName).type))
            throw new IOException(schema.getField(sortColumnName).alias+" is not of simple type as required for a sort column now.");
          if (i > 0)
            sb.append(",");
          sb.append(sortColumnName);
        }
        if (!descending)
View Full Code Here

      keyClass = (Class<Writable>) reader.getKeyClass();
      valClass = (Class<Writable>) reader.getValueClass();
    } catch (ClassCastException e) {
      throw new RuntimeException("SequenceFile contains non-Writable objects", e);
    }
    Schema schema = new Schema();
    setKeyValueTypes(keyClass, valClass)
    schema.add(new Schema.FieldSchema(null, keyType));
    schema.add(new Schema.FieldSchema(null, valType));
    return schema;
  }
View Full Code Here

    }

    private void readBaseData(List<LogicalOperator> loads) throws ExecException, FrontendException {
        baseData = new HashMap<LOLoad, DataBag>();
        for (LogicalOperator op : loads) {
            Schema schema = op.getSchema();
            if(schema == null) {
                throw new ExecException("Example Generator requires a schema. Please provide a schema while loading data.");
            }
           
            DataBag opBaseData = BagFactory.getInstance().newDefaultBag();
View Full Code Here

         */
        public ResourceFieldSchema(FieldSchema fieldSchema) {
            type = fieldSchema.type;
            name = fieldSchema.alias;
            description = "autogenerated from Pig Field Schema";
            Schema inner = fieldSchema.schema;
            if (type == DataType.BAG && fieldSchema.schema != null
                    && !fieldSchema.schema.isTwoLevelAccessRequired()) {
                log.info("Insert two-level access to Resource Schema");
                FieldSchema fs = new FieldSchema("t", fieldSchema.schema);
                inner = new Schema(fs);               
            }
           
            // allow partial schema
            if ((type == DataType.BAG || type == DataType.TUPLE)
                    && inner != null) {
View Full Code Here

            for (int i = 0; i < ctorArgs.length; i++) {
                // Can use the same strings, they're immutable
                args[i] = ctorArgs[i];
            }
        }
        Schema s = null;
        if (inputArgsSchema != null) s = inputArgsSchema.clone();
        return new FuncSpec(className, args, s);
    }
View Full Code Here

    private LogicalSchema getSchemaFromMetaData() throws FrontendException {
        if (getLoadFunc()!=null && getLoadFunc() instanceof LoadMetadata) {
            try {
                ResourceSchema resourceSchema = ((LoadMetadata)loadFunc).getSchema(getFileSpec().getFileName(), new Job(conf));
                Schema oldSchema = Schema.getPigSchema(resourceSchema);
                return Util.translateSchema(oldSchema);
            } catch (IOException e) {
                throw new FrontendException("Cannot get schema from loadFunc " + loadFunc.getClass().getName(), 2245, e);
            }
        }
View Full Code Here

TOP

Related Classes of org.apache.pig.impl.logicalLayer.schema.Schema

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.