Package parquet.hadoop.metadata

Examples of parquet.hadoop.metadata.ParquetMetadata


                LOG.info(MessageFormat.format(
                        "Loading Parquet file metadata ({0}): {1}",
                        descriptor.getDataModelClass().getSimpleName(),
                        path));
            }
            ParquetMetadata footer = ParquetFileReader.readFooter(hadoopConfiguration, path);
            List<BlockMetaData> blocks = filterBlocks(footer.getBlocks());
            if (blocks.isEmpty()) {
                return null;
            }
            long totalRecords = computeTotalRecords(blocks);
            this.averageBytesPerRecord = (double) fragmentSize / totalRecords;
            if (LOG.isInfoEnabled()) {
                LOG.info(MessageFormat.format(
                        "Loading Parquet file contents ({0}): path={1}, range={2}+{3}",
                        descriptor.getDataModelClass().getSimpleName(),
                        path,
                        offset,
                        fragmentSize));
            }
            this.fileReader = new ParquetFileReader(
                    hadoopConfiguration,
                    path,
                    blocks,
                    footer.getFileMetaData().getSchema().getColumns());
            this.materializer = new DataModelMaterializer(
                    descriptor,
                    footer.getFileMetaData().getSchema(),
                    mappingConfiguration);
            this.columnIo = new ColumnIOFactory().getColumnIO(
                    materializer.getMaterializeSchema(),
                    footer.getFileMetaData().getSchema());
        }
        return fileReader.readNextRowGroup();
    }
View Full Code Here


      List<Footer> footers = ParquetFileReader.readFooters(this.storageEngine.getHadoopConfig(), path);
      readEntryWithPath.getPath();

      for (Footer footer : footers) {
        int index = 0;
        ParquetMetadata metadata = footer.getParquetMetadata();
        for (BlockMetaData rowGroup : metadata.getBlocks()){
          // need to grab block information from HDFS
          columnChunkMetaData = rowGroup.getColumns().iterator().next();
          start = columnChunkMetaData.getFirstDataPageOffset();
          // this field is not being populated correctly, but the column chunks know their sizes, just summing them for now
          //end = start + rowGroup.getTotalByteSize();
View Full Code Here

    assertEquals(3, metadata.size());
    for (Footer footer : metadata) {
      final File file = new File(footer.getFile().toUri());
      assertTrue(file.getName(), file.getName().startsWith("part"));
      assertTrue(file.getPath(), file.exists());
      final ParquetMetadata parquetMetadata = footer.getParquetMetadata();
      assertEquals(2, parquetMetadata.getBlocks().size());
      final Map<String, String> keyValueMetaData = parquetMetadata.getFileMetaData().getKeyValueMetaData();
      assertEquals("bar", keyValueMetaData.get("foo"));
      assertEquals(footer.getFile().getName(), keyValueMetaData.get(footer.getFile().getName()));
    }
  }
View Full Code Here

    assertEquals(3, metadata.size());
    for (Footer footer : metadata) {
      final File file = new File(footer.getFile().toUri());
      assertTrue(file.getName(), file.getName().startsWith("part"));
      assertTrue(file.getPath(), file.exists());
      final ParquetMetadata parquetMetadata = footer.getParquetMetadata();
      assertEquals(2, parquetMetadata.getBlocks().size());
      final Map<String, String> keyValueMetaData = parquetMetadata.getFileMetaData().getKeyValueMetaData();
      assertEquals("bar", keyValueMetaData.get("foo"));
      assertEquals(footer.getFile().getName(), keyValueMetaData.get(footer.getFile().getName()));
    }
  }
View Full Code Here

        throw new IOException(String.format("Unable to find footer for file %s", status.getPath().getName()));
      }

      for (Footer footer : footers) {
        int index = 0;
        ParquetMetadata metadata = footer.getParquetMetadata();
        for (BlockMetaData rowGroup : metadata.getBlocks()) {
          long valueCountInGrp = 0;
          // need to grab block information from HDFS
          columnChunkMetaData = rowGroup.getColumns().iterator().next();
          start = columnChunkMetaData.getFirstDataPageOffset();
          // this field is not being populated correctly, but the column chunks know their sizes, just summing them for
View Full Code Here

                  footers.get(e.getPath()),
                  rowGroupScan.getColumns()
              )
          );
        } else {
          ParquetMetadata footer = footers.get(e.getPath());
          readers.add(new DrillParquetReader(footer, e, columns, conf));
        }
        if (rowGroupScan.getSelectionRoot() != null) {
          String[] r = rowGroupScan.getSelectionRoot().split("/");
          String[] p = e.getPath().split("/");
View Full Code Here

    ParquetInputSplit split;
    if (oldSplit instanceof FileSplit) {
      final Path finalPath = ((FileSplit) oldSplit).getPath();
      final JobConf cloneJob = projectionPusher.pushProjectionsAndFilters(conf, finalPath.getParent());

      final ParquetMetadata parquetMetadata = ParquetFileReader.readFooter(cloneJob, finalPath);
      final List<BlockMetaData> blocks = parquetMetadata.getBlocks();
      final FileMetaData fileMetaData = parquetMetadata.getFileMetaData();

      final ReadContext readContext = new DataWritableReadSupport()
          .init(cloneJob, fileMetaData.getKeyValueMetaData(), fileMetaData.getSchema());
      schemaSize = MessageTypeParser.parseMessageType(readContext.getReadSupportMetadata()
          .get(DataWritableReadSupport.HIVE_SCHEMA_KEY)).getFieldCount();
View Full Code Here

    ParquetInputSplit split;
    if (oldSplit instanceof FileSplit) {
      final Path finalPath = ((FileSplit) oldSplit).getPath();
      final JobConf cloneJob = projectionPusher.pushProjectionsAndFilters(conf, finalPath.getParent());

      final ParquetMetadata parquetMetadata = ParquetFileReader.readFooter(cloneJob, finalPath);
      final List<BlockMetaData> blocks = parquetMetadata.getBlocks();
      final FileMetaData fileMetaData = parquetMetadata.getFileMetaData();

      final ReadContext readContext = new DataWritableReadSupport()
          .init(cloneJob, fileMetaData.getKeyValueMetaData(), fileMetaData.getSchema());
      schemaSize = MessageTypeParser.parseMessageType(readContext.getReadSupportMetadata()
          .get(DataWritableReadSupport.HIVE_SCHEMA_KEY)).getFieldCount();
View Full Code Here

    assertEquals(3, metadata.size());
    for (Footer footer : metadata) {
      final File file = new File(footer.getFile().toUri());
      assertTrue(file.getName(), file.getName().startsWith("part"));
      assertTrue(file.getPath(), file.exists());
      final ParquetMetadata parquetMetadata = footer.getParquetMetadata();
      assertEquals(2, parquetMetadata.getBlocks().size());
      final Map<String, String> keyValueMetaData = parquetMetadata.getFileMetaData().getKeyValueMetaData();
      assertEquals("bar", keyValueMetaData.get("foo"));
      assertEquals(footer.getFile().getName(), keyValueMetaData.get(footer.getFile().getName()));
    }
  }
View Full Code Here

    ParquetInputSplit split;
    if (oldSplit instanceof FileSplit) {
      final Path finalPath = ((FileSplit) oldSplit).getPath();
      final JobConf cloneJob = projectionPusher.pushProjectionsAndFilters(conf, finalPath.getParent());

      final ParquetMetadata parquetMetadata = ParquetFileReader.readFooter(cloneJob, finalPath);
      final List<BlockMetaData> blocks = parquetMetadata.getBlocks();
      final FileMetaData fileMetaData = parquetMetadata.getFileMetaData();

      final ReadContext readContext = new DataWritableReadSupport()
          .init(cloneJob, fileMetaData.getKeyValueMetaData(), fileMetaData.getSchema());
      schemaSize = MessageTypeParser.parseMessageType(readContext.getReadSupportMetadata()
          .get(DataWritableReadSupport.HIVE_SCHEMA_KEY)).getFieldCount();
View Full Code Here

TOP

Related Classes of parquet.hadoop.metadata.ParquetMetadata

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.