Package parquet.hadoop.metadata

Examples of parquet.hadoop.metadata.ParquetMetadata


    for (Footer footer : footers) {
      final Path file = footer.getFile();
      LOG.debug(file);
      FileSystem fs = file.getFileSystem(configuration);
      FileStatus fileStatus = fs.getFileStatus(file);
      ParquetMetadata parquetMetaData = footer.getParquetMetadata();
      List<BlockMetaData> blocks = parquetMetaData.getBlocks();
      BlockLocation[] fileBlockLocations = fs.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
      splits.addAll(
          generateSplits(
              blocks,
              fileBlockLocations,
              fileStatus,
              parquetMetaData.getFileMetaData(),
              readSupportClass,
              readContext.getRequestedSchema().toString(),
              readContext.getReadSupportMetadata())
          );
    }
View Full Code Here


        for (final FileStatus currentFile : statuses) {
          footers.add(threadPool.submit(new Callable<ParquetMetadata>() {
            @Override
            public ParquetMetadata call() throws Exception {
              try {
                ParquetMetadata footer = ParquetFileReader.readFooter(configuration, currentFile);
                return footer;
              } catch (Exception e) {
                throw new ParquetDecodingException("could not read footer", e);
              }
            }
          }));
        }
        int previousPercent = 0;
        int n = 60;
        System.out.print("0% [");
        for (int j = 0; j < n; j++) {
          System.out.print(" ");

        }
        System.out.print("] 100%");
        for (int j = 0; j < n + 6; j++) {
          System.out.print('\b');
        }
        while (!footers.isEmpty()) {
          Future<ParquetMetadata> futureFooter = footers.removeFirst();
          if (!futureFooter.isDone()) {
            footers.addLast(futureFooter);
            continue;
          }
          ParquetMetadata footer = futureFooter.get();
          int currentPercent = (++i * n / statuses.size());
          while (currentPercent > previousPercent) {
            System.out.print("*");
            previousPercent ++;
          }
View Full Code Here

   * @return the global meta data for all the footers
   */
  static GlobalMetaData getGlobalMetaData(List<Footer> footers) {
    GlobalMetaData fileMetaData = null;
    for (Footer footer : footers) {
      ParquetMetadata currentMetadata = footer.getParquetMetadata();
      fileMetaData = mergeInto(currentMetadata.getFileMetaData(), fileMetaData);
    }
    return fileMetaData;
  }
View Full Code Here

  }

  public static List<Footer> readSummaryFile(Configuration configuration, FileStatus summaryStatus) throws IOException {
    final Path parent = summaryStatus.getPath().getParent();
    ParquetMetadata mergedFooters = readFooter(configuration, summaryStatus);
    Map<Path, ParquetMetadata> footers = new HashMap<Path, ParquetMetadata>();
    List<BlockMetaData> blocks = mergedFooters.getBlocks();
    for (BlockMetaData block : blocks) {
      String path = block.getPath();
      Path fullPath = new Path(parent, path);
      ParquetMetadata current = footers.get(fullPath);
      if (current == null) {
        current = new ParquetMetadata(mergedFooters.getFileMetaData(), new ArrayList<BlockMetaData>());
        footers.put(fullPath, current);
      }
      current.getBlocks().add(block);
    }
    List<Footer> result = new ArrayList<Footer>();
    for (Entry<Path, ParquetMetadata> entry : footers.entrySet()) {
      result.add(new Footer(entry.getKey(), entry.getValue()));
    }
View Full Code Here

   * @throws IOException
   */
  public void end(Map<String, String> extraMetaData) throws IOException {
    state = state.end();
    if (DEBUG) LOG.debug(out.getPos() + ": end");
    ParquetMetadata footer = new ParquetMetadata(new FileMetaData(schema, extraMetaData, Version.FULL_VERSION), blocks);
    serializeFooter(footer, out);
    out.close();
  }
View Full Code Here

    Path metaDataPath = new Path(outputPath, PARQUET_METADATA_FILE);
    FileSystem fs = outputPath.getFileSystem(configuration);
    outputPath = outputPath.makeQualified(fs);
    FSDataOutputStream metadata = fs.create(metaDataPath);
    metadata.write(MAGIC);
    ParquetMetadata metadataFooter = mergeFooters(outputPath, footers);
    serializeFooter(metadataFooter, metadata);
    metadata.close();
  }
View Full Code Here

      for (BlockMetaData block : footer.getParquetMetadata().getBlocks()) {
        block.setPath(path);
        blocks.add(block);
      }
    }
    return new ParquetMetadata(fileMetaData.merge(), blocks);
  }
View Full Code Here

                  footers.get(e.getPath()),
                  rowGroupScan.getColumns()
              )
          );
        } else {
          ParquetMetadata footer = footers.get(e.getPath());
          readers.add(new DrillParquetReader(footer, e, columns, conf));
        }
        if (rowGroupScan.getSelectionRoot() != null) {
          String[] r = rowGroupScan.getSelectionRoot().split("/");
          String[] p = e.getPath().split("/");
View Full Code Here

        throw new IOException(String.format("Unable to find footer for file %s", status.getPath().getName()));
      }

      for (Footer footer : footers) {
        int index = 0;
        ParquetMetadata metadata = footer.getParquetMetadata();
        for (BlockMetaData rowGroup : metadata.getBlocks()) {
          long valueCountInGrp = 0;
          // need to grab block information from HDFS
          columnChunkMetaData = rowGroup.getColumns().iterator().next();
          start = columnChunkMetaData.getFirstDataPageOffset();
          // this field is not being populated correctly, but the column chunks know their sizes, just summing them for
View Full Code Here

    assertEquals(3, metadata.size());
    for (Footer footer : metadata) {
      final File file = new File(footer.getFile().toUri());
      assertTrue(file.getName(), file.getName().startsWith("part"));
      assertTrue(file.getPath(), file.exists());
      final ParquetMetadata parquetMetadata = footer.getParquetMetadata();
      assertEquals(2, parquetMetadata.getBlocks().size());
      final Map<String, String> keyValueMetaData = parquetMetadata.getFileMetaData().getKeyValueMetaData();
      assertEquals("bar", keyValueMetaData.get("foo"));
      assertEquals(footer.getFile().getName(), keyValueMetaData.get(footer.getFile().getName()));
    }
  }
View Full Code Here

TOP

Related Classes of parquet.hadoop.metadata.ParquetMetadata

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.