Package org.apache.hadoop.hive.serde2.columnar

Examples of org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable


    RCFileOutputFormat.setColumnNumber(cloneConf, bytesArray.length);
    cloneConf.setInt(RCFile.RECORD_INTERVAL_CONF_STR, intervalRecordCount);

    RCFile.Writer writer = new RCFile.Writer(fs, cloneConf, testFile, null, codec);

    BytesRefArrayWritable bytes = new BytesRefArrayWritable(bytesArray.length);
    for (int i = 0; i < bytesArray.length; i++) {
      BytesRefWritable cu = null;
      cu = new BytesRefWritable(bytesArray[i], 0, bytesArray[i].length);
      bytes.set(i, cu);
    }
    for (int i = 0; i < writeCount; i++) {
      writer.append(bytes);
    }
    writer.close();
View Full Code Here


    RCFileOutputFormat.setColumnNumber(cloneConf, bytesArray.length);
    cloneConf.setInt(RCFile.RECORD_INTERVAL_CONF_STR, intervalRecordCount);

    RCFile.Writer writer = new RCFile.Writer(fs, cloneConf, testFile, null, codec);

    BytesRefArrayWritable bytes = new BytesRefArrayWritable(bytesArray.length);
    for (int i = 0; i < bytesArray.length; i++) {
      BytesRefWritable cu = null;
      cu = new BytesRefWritable(bytesArray[i], 0, bytesArray[i].length);
      bytes.set(i, cu);
    }
    for (int i = 0; i < writeCount; i++) {
      writer.append(bytes);
    }
    writer.close();
View Full Code Here

      if (selColIdx == -1) {
        return null;
      }

      if (rest == null) {
        rest = new BytesRefArrayWritable();
      }

      rest.resetValid(recordsNumInValBuffer);

      if (!currentValue.inited) {
View Full Code Here

      if (!(val instanceof BytesRefArrayWritable)) {
        throw new UnsupportedOperationException(
            "Currently the writer can only accept BytesRefArrayWritable");
      }

      BytesRefArrayWritable columns = (BytesRefArrayWritable) val;
      int size = columns.size();
      for (int i = 0; i < size; i++) {
        BytesRefWritable cu = columns.get(i);
        int plainLen = cu.getLength();
        columnBufferSize += plainLen;
        columnValuePlainLength[i] += plainLen;
        columnBuffers[i].append(cu);
      }

      if (size < columnNumber) {
        for (int i = columns.size(); i < columnNumber; i++) {
          columnBuffers[i].append(BytesRefWritable.ZeroBytesRefWritable);
        }
      }

      bufferedRecords++;
View Full Code Here

                        RCFile.createMetadata(new Text("apple"),
                                              new Text("block"),
                                              new Text("cat"),
                                              new Text("dog")),
                        new DefaultCodec());
    BytesRefArrayWritable bytes = new BytesRefArrayWritable(record_1.length);
    for (int i = 0; i < record_1.length; i++) {
      BytesRefWritable cu = new BytesRefWritable(record_1[i], 0,
          record_1[i].length);
      bytes.set(i, cu);
    }
    writer.append(bytes);
    bytes.clear();
    for (int i = 0; i < record_2.length; i++) {
      BytesRefWritable cu = new BytesRefWritable(record_2[i], 0,
          record_2[i].length);
      bytes.set(i, cu);
    }
    writer.append(bytes);
    writer.close();

    Object[] expectedRecord_1 = {new ByteWritable((byte) 123),
        new ShortWritable((short) 456), new IntWritable(789),
        new LongWritable(1000), new DoubleWritable(5.3),
        new Text("hive and hadoop"), null, null};

    Object[] expectedRecord_2 = {new ByteWritable((byte) 100),
        new ShortWritable((short) 200), new IntWritable(123),
        new LongWritable(1000), new DoubleWritable(5.3),
        new Text("hive and hadoop"), null, null};

    RCFile.Reader reader = new RCFile.Reader(fs, file, conf);
    assertEquals(new Text("block"),
                 reader.getMetadata().get(new Text("apple")));
    assertEquals(new Text("block"),
                 reader.getMetadataValueOf(new Text("apple")));
    assertEquals(new Text("dog"),
                 reader.getMetadataValueOf(new Text("cat")));
    LongWritable rowID = new LongWritable();

    for (int i = 0; i < 2; i++) {
      reader.next(rowID);
      BytesRefArrayWritable cols = new BytesRefArrayWritable();
      reader.getCurrentRow(cols);
      cols.resetValid(8);
      Object row = serDe.deserialize(cols);

      StructObjectInspector oi = (StructObjectInspector) serDe
          .getObjectInspector();
      List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
View Full Code Here

        "5.3".getBytes("UTF-8"),
        "hive and hadoop".getBytes("UTF-8"),
        new byte[0],
        "NULL".getBytes("UTF-8")};
   
    BytesRefArrayWritable bytes = new BytesRefArrayWritable(record_1.length);
    for (int i = 0; i < record_1.length; i++) {
      BytesRefWritable cu = new BytesRefWritable(record_1[i], 0,
          record_1[i].length);
      bytes.set(i, cu);
    }
    writer.append(bytes);
    bytes.clear();
    for (int i = 0; i < record_2.length; i++) {
      BytesRefWritable cu = new BytesRefWritable(record_2[i], 0,
          record_2[i].length);
      bytes.set(i, cu);
    }
    writer.append(bytes);
    writer.close();

    RCFile.Reader reader = new RCFile.Reader(fs, file, conf);
   
    LongWritable rowID = new LongWritable();
    assertTrue(reader.next(rowID));
    assertEquals(rowID.get(), 0L);
   
    assertTrue(reader.next(rowID));
    assertEquals(rowID.get(), 1L);
   
    BytesRefArrayWritable result = null;
    BytesRefWritable brw;
    for (int col=0; col < 8; col++) {
      BytesRefArrayWritable result2 = reader.getColumn(col, result);
      if (result == null) {
        assertNotNull(result2);
        result = result2;
      } else {
        // #getColumn(2) should return the instance passed in:
View Full Code Here

    byte[][] record = {null, null, null, null, null, null, null, null};

    RCFileOutputFormat.setColumnNumber(conf, expectedFieldsData.length);
    RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null,
        new DefaultCodec());
    BytesRefArrayWritable bytes = new BytesRefArrayWritable(record.length);
    final int recCount = 100;
    Random rand = new Random();
    for (int recIdx = 0; recIdx < recCount; recIdx++) {
      for (int i = 0; i < record.length; i++) {
        record[i] = new Integer(rand.nextInt()).toString().getBytes("UTF-8");
      }
      for (int i = 0; i < record.length; i++) {
        BytesRefWritable cu = new BytesRefWritable(record[i], 0,
            record[i].length);
        bytes.set(i, cu);
      }
      writer.append(bytes);
      bytes.clear();
    }
    writer.close();

    // Insert junk in middle of file. Assumes file is on local disk.
    RandomAccessFile raf = new RandomAccessFile(file.toUri().getPath(), "rw");
    long corruptOffset = raf.length() / 2;
    LOG.info("corrupting " + raf + " at offset " + corruptOffset);
    raf.seek(corruptOffset);
    raf.writeBytes("junkjunkjunkjunkjunkjunkjunkjunk");
    raf.close();

    // Set the option for tolerating corruptions. The read should succeed.
    Configuration tmpConf = new Configuration(conf);
    tmpConf.setBoolean("hive.io.rcfile.tolerate.corruptions", true);
    RCFile.Reader reader = new RCFile.Reader(fs, file, tmpConf);

    LongWritable rowID = new LongWritable();

    while (true) {
      boolean more = reader.next(rowID);
      if (!more) {
        break;
      }
      BytesRefArrayWritable cols = new BytesRefArrayWritable();
      reader.getCurrentRow(cols);
      cols.resetValid(8);
    }

    reader.close();
  }
View Full Code Here

    String[] row = new String[]{"Tester", "Bart", "333 X St.", "Reno", "NV",
                                "USA"};
    RCFile.Reader reader =
      new RCFile.Reader(fs, new Path("src/test/data/rc-file-v0.rc"), conf);
    LongWritable rowID = new LongWritable();
    BytesRefArrayWritable cols = new BytesRefArrayWritable();
    assertTrue("old file reader first row", reader.next(rowID));
    reader.getCurrentRow(cols);
    assertEquals(row.length, cols.size());
    for (int i=0; i < cols.size(); ++i) {
      assertEquals(row[i], new String(cols.get(i).getBytesCopy()));
    }
    assertFalse("old file reader end", reader.next(rowID));
    reader.close();
  }
View Full Code Here

    RCFileOutputFormat.setColumnNumber(conf, fieldsData.length);
    RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null,
        new DefaultCodec());

    BytesRefArrayWritable bytes = new BytesRefArrayWritable(fieldsData.length);
    for (int i = 0; i < fieldsData.length; i++) {
      BytesRefWritable cu = null;
      cu = new BytesRefWritable(fieldsData[i], 0, fieldsData[i].length);
      bytes.set(i, cu);
    }

    for (int i = 0; i < count; i++) {
      writer.append(bytes);
    }
    writer.close();
    long fileLen = fs.getFileStatus(file).getLen();
    System.out.println("The file size of RCFile with " + bytes.size()
        + " number columns and " + count + " number rows is " + fileLen);
  }
View Full Code Here

    ColumnProjectionUtils.setFullyReadColumns(conf);
    RCFile.Reader reader = new RCFile.Reader(fs, file, conf);

    LongWritable rowID = new LongWritable();
    int actualRead = 0;
    BytesRefArrayWritable cols = new BytesRefArrayWritable();
    while (reader.next(rowID)) {
      reader.getCurrentRow(cols);
      cols.resetValid(8);
      Object row = serDe.deserialize(cols);

      StructObjectInspector oi = (StructObjectInspector) serDe
          .getObjectInspector();
      List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
      assertEquals("Field size should be 8", 8, fieldRefs.size());
      for (int i = 0; i < fieldRefs.size(); i++) {
        Object fieldData = oi.getStructFieldData(row, fieldRefs.get(i));
        Object standardWritableData = ObjectInspectorUtils
            .copyToStandardObject(fieldData, fieldRefs.get(i)
            .getFieldObjectInspector(), ObjectInspectorCopyOption.WRITABLE);
        assertEquals("Field " + i, standardWritableData, expectedFieldsData[i]);
      }
      // Serialize
      assertEquals(
          "Class of the serialized object should be BytesRefArrayWritable",
          BytesRefArrayWritable.class, serDe.getSerializedClass());
      BytesRefArrayWritable serializedText = (BytesRefArrayWritable) serDe
          .serialize(row, oi);
      assertEquals("Serialized data", s, serializedText);
      actualRead++;
    }
    reader.close();
View Full Code Here

TOP

Related Classes of org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.