Package org.apache.hadoop.hive.ql.io.orc

Examples of org.apache.hadoop.hive.ql.io.orc.Reader


                LOG.info(MessageFormat.format(
                        "Loading ORCFile metadata ({0}): {1}",
                        descriptor.getDataModelClass().getSimpleName(),
                        path));
            }
            Reader orc = OrcFile.createReader(fileSystem, path);
            StructObjectInspector sourceInspector = (StructObjectInspector) orc.getObjectInspector();
            driver = new DataModelDriver(descriptor, sourceInspector, configuration);
            if (LOG.isInfoEnabled()) {
                LOG.info(MessageFormat.format(
                        "Loading ORCFile contents ({0}): path={1}, range={2}+{3}",
                        descriptor.getDataModelClass().getSimpleName(),
                        path,
                        offset,
                        fragmentSize));
            }
            boolean[] availableColumns = computeAvailableColumns(orc);
            reader = orc.rows(offset, fragmentSize, availableColumns);
            currentReader = reader;
        }
        return reader;
    }
View Full Code Here


                LOG.info(MessageFormat.format(
                        "Loading ORCFile metadata ({0}): {1}",
                        context.getDataType().getSimpleName(),
                        status.getPath()));
            }
            Reader orc = OrcFile.createReader(context.getFileSystem(), status.getPath());
            if (LOG.isInfoEnabled()) {
                LOG.info(MessageFormat.format(
                        "Loaded ORCFile metadata ({0}): path={1}, rows={2}, deser-size={3}",
                        context.getDataType().getSimpleName(),
                        status.getPath(),
                        orc.getNumberOfRows(),
                        orc.getRawDataSize()));
            }
            BlockMap blockMap = BlockMap.create(
                    status.getPath().toString(),
                    status.getLen(),
                    BlockMap.computeBlocks(context.getFileSystem(), status),
                    false);
            // TODO configurable split
            for (StripeInformation stripe : orc.getStripes()) {
                long begin = stripe.getOffset();
                long end = begin + stripe.getLength();
                DirectInputFragment fragment = blockMap.get(begin, end);
                if (LOG.isDebugEnabled()) {
                    LOG.debug(MessageFormat.format(
View Full Code Here

        if (path == null) {
            log.info("Cannot find any ORC files from " + location +
                    ". Probably multiple load store in script.");
            return null;
        }
        Reader reader = OrcFile.createReader(fs, path);
        ObjectInspector oip = (ObjectInspector)reader.getObjectInspector();
        return TypeInfoUtils.getTypeInfoFromObjectInspector(oip);
    }
View Full Code Here

    @Test
    public void testSimpleStore() throws Exception {
        pigServer.registerQuery("A = load '" + INPUT1 + "' as (a0:int, a1:chararray);");
        pigServer.store("A", OUTPUT1, "OrcStorage");
        Path outputFilePath = new Path(new Path(OUTPUT1), "part-m-00000");
        Reader reader = OrcFile.createReader(fs, outputFilePath);
        assertEquals(reader.getNumberOfRows(), 2);

        RecordReader rows = reader.rows(null);
        Object row = rows.next(null);
        StructObjectInspector soi = (StructObjectInspector)reader.getObjectInspector();
        IntWritable intWritable = (IntWritable)soi.getStructFieldData(row,
                soi.getAllStructFieldRefs().get(0));
        Text text = (Text)soi.getStructFieldData(row,
                soi.getAllStructFieldRefs().get(1));
        assertEquals(intWritable.get(), 65536);
View Full Code Here

        pigServer.registerQuery("store B into '" + OUTPUT2 + "' using OrcStorage();");
        pigServer.registerQuery("store B into '" + OUTPUT3 +"' using OrcStorage('-c SNAPPY');");
        pigServer.executeBatch();

        Path outputFilePath = new Path(new Path(OUTPUT2), "part-r-00000");
        Reader reader = OrcFile.createReader(fs, outputFilePath);
        assertEquals(reader.getNumberOfRows(), 2);
        assertEquals(reader.getCompression(), CompressionKind.ZLIB);

        Path outputFilePath2 = new Path(new Path(OUTPUT3), "part-r-00000");
        reader = OrcFile.createReader(fs, outputFilePath2);
        assertEquals(reader.getNumberOfRows(), 2);
        assertEquals(reader.getCompression(), CompressionKind.SNAPPY);

        verifyData(outputFilePath, outputFilePath2, fs, 2);
    }
View Full Code Here

    private void verifyData(Path orcFile, Iterator<Tuple> iter, FileSystem fs, int expectedTotalRows) throws Exception {

        int expectedRows = 0;
        int actualRows = 0;
        Reader orcReader = OrcFile.createReader(fs, orcFile);
        ObjectInspector oi = orcReader.getObjectInspector();
        StructObjectInspector soi = (StructObjectInspector) oi;

        RecordReader reader = orcReader.rows(null);
        Object row = null;

        while (reader.hasNext()) {
            row = reader.next(row);
            expectedRows++;
View Full Code Here

    private void verifyData(Path orcFile, Path pigOrcFile, FileSystem fs, int expectedTotalRows) throws Exception {

        int expectedRows = 0;
        int actualRows = 0;
        Reader orcReaderExpected = OrcFile.createReader(fs, orcFile);
        StructObjectInspector soiExpected = (StructObjectInspector) orcReaderExpected.getObjectInspector();
        Reader orcReaderActual = OrcFile.createReader(fs, orcFile);
        StructObjectInspector soiActual = (StructObjectInspector) orcReaderActual.getObjectInspector();

        RecordReader readerExpected = orcReaderExpected.rows(null);
        Object expectedRow = null;
        RecordReader readerActual = orcReaderActual.rows(null);
        Object actualRow = null;

        while (readerExpected.hasNext()) {
            expectedRow = readerExpected.next(expectedRow);
            expectedRows++;
View Full Code Here

        }

        RecordReader recordReader;
        try {
            FileSystem fileSystem = path.getFileSystem(configuration);
            Reader reader = OrcFile.createReader(fileSystem, path);
            boolean[] include = findIncludedColumns(reader.getTypes(), columns);
            recordReader = reader.rows(start, length, include);
        }
        catch (Exception e) {
            throw Throwables.propagate(e);
        }
View Full Code Here

TOP

Related Classes of org.apache.hadoop.hive.ql.io.orc.Reader

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.