Package org.apache.hadoop.hive.serde2

Examples of org.apache.hadoop.hive.serde2.SerDe


  public static void executeSelectList(QueryDef qDef, Partition oPart, ForwardSink rS)
      throws WindowingException
  {
    ArrayList<ColumnDef> cols = qDef.getSelectList().getColumns();
    ObjectInspector selectOI = qDef.getSelectList().getOI();
    SerDe oSerDe = qDef.getOutput().getSerDe();
    Object[] output = new Object[cols.size()];

    WhereDef whDef = qDef.getWhere();
    boolean applyWhere = whDef != null;
    Converter whConverter = !applyWhere ? null
        : ObjectInspectorConverters
            .getConverter(
                whDef.getOI(),
                PrimitiveObjectInspectorFactory.javaBooleanObjectInspector);
    ExprNodeEvaluator whCondEval = !applyWhere ? null : whDef
        .getExprEvaluator();

    Writable value = null;
    PartitionIterator<Object> pItr = oPart.iterator();
    RuntimeUtils.connectLeadLagFunctionsToPartition(qDef, pItr);
    while (pItr.hasNext())
    {
      int colCnt = 0;
      ArrayList selectList = new ArrayList();
      Object oRow = pItr.next();

      if (applyWhere)
      {
        Object whCond = null;
        try
        {
          whCond = whCondEval.evaluate(oRow);
          whCond = whConverter.convert(whCond);
        }
        catch (HiveException he)
        {
          throw new WindowingException(he);
        }
        if (whCond == null || !((Boolean) whCond).booleanValue())
        {
          continue;
        }
      }

      for (ColumnDef cDef : cols)
      {
        try
        {
          Object newCol = cDef.getExprEvaluator().evaluate(oRow);
          output[colCnt++] = newCol;
          selectList.add(newCol);
        }
        catch (HiveException he)
        {
          throw new WindowingException(he);
        }
      }
     
      //if the implementation of ForwardSink accepts an object,
      //forward it to the next operator in chain
      //else collect the writable key-value pairs for outstream
      if(rS.acceptObject()){
        rS.collectOutput(output);
      }else{
        try
        {
          value = oSerDe.serialize(selectList, selectOI);
        }
        catch (SerDeException se)
        {
          throw new WindowingException(se);
        }
View Full Code Here


    TableFunctionEvaluator tEval = tabDef.getFunction();
    String partClassName = tEval.getPartitionClass();
    int partMemSize = tEval.getPartitionMemSize();

    Partition part = null;
    SerDe serde = tabDef.getInput().getSerde();
    part = new Partition(partClassName, partMemSize, serde,
        (StructObjectInspector) oi);
    return part;

  }
View Full Code Here

    synchronized (TestOrcFile.class) {
      inspector = (StructObjectInspector)
          ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class,
              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    SerDe serde = new OrcSerde();
    HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat();
    FileSinkOperator.RecordWriter writer =
        outFormat.getHiveRecordWriter(conf, testFilePath, MyRow.class, true,
            properties, Reporter.NULL);
    ReaderWriterProfiler.setProfilerOptions(conf);
    writer.write(serde.serialize(new MyRow(1,2), inspector));
    writer.write(serde.serialize(new MyRow(2,2), inspector));
    writer.write(serde.serialize(new MyRow(3,2), inspector));
    writer.close(true);
    serde = new OrcSerde();
    properties.setProperty("columns", "x,y");
    properties.setProperty("columns.types", "int:int");
    serde.initialize(conf, properties);
    assertEquals(OrcSerde.OrcSerdeRow.class, serde.getSerializedClass());
    inspector = (StructObjectInspector) serde.getObjectInspector();
    assertEquals("struct<x:int,y:int>", inspector.getTypeName());
    InputFormat<?,?> in = new OrcInputFormat();
    FileInputFormat.setInputPaths(conf, testFilePath.toString());
    InputSplit[] splits = in.getSplits(conf, 1);
    assertEquals(1, splits.length);

    // the the validate input method
    ArrayList<FileStatus> fileList = new ArrayList<FileStatus>();
    assertEquals(false,
        ((InputFormatChecker) in).validateInput(fs, new HiveConf(), fileList));
    fileList.add(fs.getFileStatus(testFilePath));
    assertEquals(true,
        ((InputFormatChecker) in).validateInput(fs, new HiveConf(), fileList));
    fileList.add(fs.getFileStatus(workDir));
    assertEquals(false,
        ((InputFormatChecker) in).validateInput(fs, new HiveConf(), fileList));


    // read the whole file
    org.apache.hadoop.mapred.RecordReader reader =
        in.getRecordReader(splits[0], conf, Reporter.NULL);
    Object key = reader.createKey();
    Writable value = (Writable) reader.createValue();
    int rowNum = 0;
    List<? extends StructField> fields =inspector.getAllStructFieldRefs();
    IntObjectInspector intInspector =
        (IntObjectInspector) fields.get(0).getFieldObjectInspector();
    assertEquals(0.0, reader.getProgress(), 0.00001);
    assertEquals(0, reader.getPos());
    while (reader.next(key, value)) {
      assertEquals(++rowNum, intInspector.get(inspector.
          getStructFieldData(serde.deserialize(value), fields.get(0))));
      assertEquals(2, intInspector.get(inspector.
          getStructFieldData(serde.deserialize(value), fields.get(1))));
    }
    assertEquals(3, rowNum);
    assertEquals(1.0, reader.getProgress(), 0.00001);
    reader.close();

    // read just the first column
    conf.set("hive.io.file.readcolumn.ids", "0");
    reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
    key = reader.createKey();
    value = (Writable) reader.createValue();
    rowNum = 0;
    fields = inspector.getAllStructFieldRefs();
    while (reader.next(key, value)) {
      assertEquals(++rowNum, intInspector.get(inspector.
          getStructFieldData(value, fields.get(0))));
      assertEquals(null, inspector.getStructFieldData(value, fields.get(1)));
    }
    assertEquals(3, rowNum);
    reader.close();

    // test the mapping of empty string to all columns
    conf.set("hive.io.file.readcolumn.ids", "");
    reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
    key = reader.createKey();
    value = (Writable) reader.createValue();
    rowNum = 0;
    fields = inspector.getAllStructFieldRefs();
    while (reader.next(key, value)) {
      assertEquals(++rowNum, intInspector.get(inspector.
          getStructFieldData(value, fields.get(0))));
      assertEquals(2, intInspector.get(inspector.
          getStructFieldData(serde.deserialize(value), fields.get(1))));
    }
    assertEquals(3, rowNum);
    reader.close();
  }
View Full Code Here

    synchronized (TestOrcFile.class) {
      inspector = (StructObjectInspector)
          ObjectInspectorFactory.getReflectionObjectInspector(NestedRow.class,
              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    SerDe serde = new OrcSerde();
    OutputFormat<?, ?> outFormat = new OrcOutputFormat();
    RecordWriter writer =
        outFormat.getRecordWriter(fs, conf, testFilePath.toString(),
            Reporter.NULL);
    writer.write(NullWritable.get(),
        serde.serialize(new NestedRow(1,2,3), inspector));
    writer.write(NullWritable.get(),
        serde.serialize(new NestedRow(4,5,6), inspector));
    writer.write(NullWritable.get(),
        serde.serialize(new NestedRow(7,8,9), inspector));
    writer.close(Reporter.NULL);
    serde = new OrcSerde();
    properties.setProperty("columns", "z,r");
    properties.setProperty("columns.types", "int:struct<x:int,y:int>");
    serde.initialize(conf, properties);
    inspector = (StructObjectInspector) serde.getObjectInspector();
    InputFormat<?,?> in = new OrcInputFormat();
    FileInputFormat.setInputPaths(conf, testFilePath.toString());
    InputSplit[] splits = in.getSplits(conf, 1);
    assertEquals(1, splits.length);
    conf.set("hive.io.file.readcolumn.ids", "1");
View Full Code Here

        outFormat.getHiveRecordWriter(conf, testFilePath, MyRow.class, true,
            properties, Reporter.NULL);
    writer.close(true);
    properties.setProperty("columns", "x,y");
    properties.setProperty("columns.types", "int:int");
    SerDe serde = new OrcSerde();
    serde.initialize(conf, properties);
    InputFormat<?,?> in = new OrcInputFormat();
    FileInputFormat.setInputPaths(conf, testFilePath.toString());
    InputSplit[] splits = in.getSplits(conf, 1);
    assertEquals(1, splits.length);

    // read the whole file
    conf.set("hive.io.file.readcolumn.ids", "0,1");
    org.apache.hadoop.mapred.RecordReader reader =
        in.getRecordReader(splits[0], conf, Reporter.NULL);
    Object key = reader.createKey();
    Object value = reader.createValue();
    assertEquals(0.0, reader.getProgress(), 0.00001);
    assertEquals(0, reader.getPos());
    assertEquals(false, reader.next(key, value));
    reader.close();
    assertEquals(null, serde.getSerDeStats());
  }
View Full Code Here

    synchronized (TestOrcFile.class) {
      inspector = (StructObjectInspector)
          ObjectInspectorFactory.getReflectionObjectInspector(StringRow.class,
              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    SerDe serde = new OrcSerde();
    HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat();
    FileSinkOperator.RecordWriter writer =
        outFormat.getHiveRecordWriter(conf, testFilePath, StringRow.class,
            true, properties, Reporter.NULL);
    writer.write(serde.serialize(new StringRow("owen"), inspector));
    writer.write(serde.serialize(new StringRow("beth"), inspector));
    writer.write(serde.serialize(new StringRow("laurel"), inspector));
    writer.write(serde.serialize(new StringRow("hazen"), inspector));
    writer.write(serde.serialize(new StringRow("colin"), inspector));
    writer.write(serde.serialize(new StringRow("miles"), inspector));
    writer.close(true);
    serde = new OrcSerde();
    properties.setProperty("columns", "str,str2");
    serde.initialize(conf, properties);
    inspector = (StructObjectInspector) serde.getObjectInspector();
    assertEquals("struct<str:string,str2:string>", inspector.getTypeName());
    InputFormat<?,?> in = new OrcInputFormat();
    FileInputFormat.setInputPaths(conf, testFilePath.toString());
    InputSplit[] splits = in.getSplits(conf, 1);
    assertEquals(1, splits.length);
View Full Code Here

      .deserialize(jobInfoString);
    StorerInfo storeInfo = jobInfo.getTableInfo().getStorerInfo();
    HCatStorageHandler storageHandler = HCatUtil.getStorageHandler(
      context.getConfiguration(), storeInfo);
    Class<? extends SerDe> serde = storageHandler.getSerDeClass();
    SerDe sd = (SerDe) ReflectionUtils.newInstance(serde,
      context.getConfiguration());
    context.getConfiguration().set("mapred.output.value.class",
      sd.getSerializedClass().getName());

    RecordWriter<WritableComparable<?>, HCatRecord> rw;
    if (HCatBaseOutputFormat.getJobInfo(context).isDynamicPartitioningUsed()){
      // When Dynamic partitioning is used, the RecordWriter instance initialized here isn't used. Can use null.
      // (That's because records can't be written until the values of the dynamic partitions are deduced.
View Full Code Here

   * Test the LazySimpleSerDe class.
   */
  public void testRegexSerDe() throws Throwable {
    try {
      // Create the SerDe
      SerDe serDe = createSerDe(
          "host,identity,user,time,request,status,size,referer,agent",
          "string,string,string,string,string,string,string,string,string",
          "([^ ]*) ([^ ]*) ([^ ]*) (-|\\[[^\\]]*\\]) ([^ \"]*|\"[^\"]*\") "
          + "([0-9]*) ([0-9]*) ([^ \"]*|\"[^\"]*\") ([^ \"]*|\"[^\"]*\")",
          "%1$s %2$s %3$s %4$s %5$s %6$s %7$s %8$s %9$s");

      // Data
      Text t = new Text(
          "127.0.0.1 - - [26/May/2009:00:00:00 +0000] "
              + "\"GET /someurl/?track=Blabla(Main) HTTP/1.1\" 200 5864 - "
              + "\"Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) "
              + "AppleWebKit/525.19 (KHTML, like Gecko) Chrome/1.0.154.65 Safari/525.19\"");

      // Deserialize
      Object row = serDe.deserialize(t);
      ObjectInspector rowOI = serDe.getObjectInspector();

      System.out.println("Deserialized row: " + row);

      // Serialize
      Text serialized = (Text) serDe.serialize(row, rowOI);
      assertEquals(t, serialized);

      // Do some changes (optional)
      ObjectInspector standardWritableRowOI = ObjectInspectorUtils
          .getStandardObjectInspector(rowOI, ObjectInspectorCopyOption.WRITABLE);
      Object standardWritableRow = ObjectInspectorUtils.copyToStandardObject(
          row, rowOI, ObjectInspectorCopyOption.WRITABLE);

      // Serialize
      serialized = (Text) serDe.serialize(standardWritableRow,
          standardWritableRowOI);
      assertEquals(t, serialized);

    } catch (Throwable e) {
      e.printStackTrace();
View Full Code Here

      .deserialize(jobInfoString);
    StorerInfo storeInfo = jobInfo.getTableInfo().getStorerInfo();
    HiveStorageHandler storageHandler = HCatUtil.getStorageHandler(
      context.getConfiguration(), storeInfo);
    Class<? extends SerDe> serde = storageHandler.getSerDeClass();
    SerDe sd = (SerDe) ReflectionUtils.newInstance(serde,
      context.getConfiguration());
    context.getConfiguration().set("mapred.output.value.class",
      sd.getSerializedClass().getName());

    RecordWriter<WritableComparable<?>, HCatRecord> rw;
    if (HCatBaseOutputFormat.getJobInfo(context).isDynamicPartitioningUsed()){
      // When Dynamic partitioning is used, the RecordWriter instance initialized here isn't used. Can use null.
      // (That's because records can't be written until the values of the dynamic partitions are deduced.
View Full Code Here

    StructObjectInspector rowOI1 = (StructObjectInspector) ObjectInspectorFactory
        .getReflectionObjectInspector(MyTestClassBigger.class,
        ObjectInspectorOptions.JAVA);
    String fieldNames1 = ObjectInspectorUtils.getFieldNames(rowOI1);
    String fieldTypes1 = ObjectInspectorUtils.getFieldTypes(rowOI1);
    SerDe serde1 = getSerDe(fieldNames1, fieldTypes1);
    serde1.getObjectInspector();

    StructObjectInspector rowOI2 = (StructObjectInspector) ObjectInspectorFactory
        .getReflectionObjectInspector(MyTestClass.class,
        ObjectInspectorOptions.JAVA);
    String fieldNames2 = ObjectInspectorUtils.getFieldNames(rowOI2);
    String fieldTypes2 = ObjectInspectorUtils.getFieldTypes(rowOI2);
    SerDe serde2 = getSerDe(fieldNames2, fieldTypes2);
    ObjectInspector serdeOI2 = serde2.getObjectInspector();

    int num = 100;
    for (int itest = 0; itest < num; itest++) {
      int randField = r.nextInt(11);
      Byte b = randField > 0 ? null : Byte.valueOf((byte) r.nextInt());
      Short s = randField > 1 ? null : Short.valueOf((short) r.nextInt());
      Integer n = randField > 2 ? null : Integer.valueOf(r.nextInt());
      Long l = randField > 3 ? null : Long.valueOf(r.nextLong());
      Float f = randField > 4 ? null : Float.valueOf(r.nextFloat());
      Double d = randField > 5 ? null : Double.valueOf(r.nextDouble());
      String st = randField > 6 ? null : TestBinarySortableSerDe
          .getRandString(r);
      HiveDecimal bd = randField > 7 ? null : TestBinarySortableSerDe.getRandHiveDecimal(r);
      Date date = randField > 8 ? null : TestBinarySortableSerDe.getRandDate(r);
      MyTestInnerStruct is = randField > 9 ? null : new MyTestInnerStruct(r
          .nextInt(5) - 2, r.nextInt(5) - 2);
      List<Integer> li = randField > 10 ? null : TestBinarySortableSerDe
          .getRandIntegerArray(r);
      byte[] ba  = TestBinarySortableSerDe.getRandBA(r, itest);
      Map<String, List<MyTestInnerStruct>> mp = new HashMap<String, List<MyTestInnerStruct>>();
      String key = TestBinarySortableSerDe.getRandString(r);
      List<MyTestInnerStruct> value = randField > 9 ? null
          : getRandStructArray(r);
      mp.put(key, value);
      String key1 = TestBinarySortableSerDe.getRandString(r);
      mp.put(key1, null);
      String key2 = TestBinarySortableSerDe.getRandString(r);
      List<MyTestInnerStruct> value2 = getRandStructArray(r);
      mp.put(key2, value2);

      MyTestClassBigger input = new MyTestClassBigger(b, s, n, l, f, d, st, bd, date, is,
          li, ba, mp);
      BytesWritable bw = (BytesWritable) serde1.serialize(input, rowOI1);
      Object output = serde2.deserialize(bw);

      if (0 != compareDiffSizedStructs(input, rowOI1, output, serdeOI2)) {
        System.out.println("structs      = "
            + SerDeUtils.getJSONString(input, rowOI1));
        System.out.println("deserialized = "
View Full Code Here

TOP

Related Classes of org.apache.hadoop.hive.serde2.SerDe

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.