Package org.apache.hadoop.io.SequenceFile

Examples of org.apache.hadoop.io.SequenceFile.Reader


      reader.close();
    } else if (Format.SEQUENCE == inFmt && Format.SEQUENCE == outFmt) {
      /*
       * Record reader will produce keys that are custom writables and values that are custom writable.
       */
      Reader reader = new Reader(fs, new Path(crushOutput.getAbsolutePath()), job);

      assertThat(reader.isCompressed(), is(compressionType != CompressionType.NONE));

      if (reader.isCompressed()) {
        assertThat(reader.isBlockCompressed(), is(compressionType == CompressionType.BLOCK));
        assertThat(reader.getCompressionCodec().getClass(), equalTo((Object) CustomCompressionCodec.class));
      }

      CustomWritable key = new CustomWritable();
      CustomWritable value = new CustomWritable();

      for (String fileName : fileNames) {
        int max = Integer.parseInt(fileName.substring(4));

        for (int k = 1, v = max * 100 + 1; k <= max; k++, v++) {
          reader.next(key, value);

          assertThat(fileName, key.get(), equalTo((long) k));
          assertThat(fileName, value.get(), equalTo((long) v));
        }
      }

      assertThat("Should be at end of crush output file" + crushedOutFileName, reader.next(key, value), is(false));

      reader.close();
    } else if (Format.TEXT == inFmt && Format.SEQUENCE == outFmt) {

      Reader reader = new Reader(fs, new Path(crushOutput.getAbsolutePath()), job);

      assertThat(reader.isCompressed(), is(compressionType != CompressionType.NONE));

      if (reader.isCompressed()) {
        assertThat(reader.isBlockCompressed(), is(compressionType == CompressionType.BLOCK));
        assertThat(reader.getCompressionCodec().getClass(), equalTo((Object) CustomCompressionCodec.class));
      }

      Text key = new Text();
      Text value = new Text();

      for (String fileName : fileNames) {
        int max = Integer.parseInt(fileName.substring(4));

        for (int k = 1, v = max * 100 + 1; k <= max; k++, v++) {
          reader.next(key, value);

          assertThat(fileName, key.toString(), equalTo(Integer.toString(k)));
          assertThat(fileName, value.toString(), equalTo(Integer.toString(v)));
        }
      }

      assertThat("Should be at end of crush output file" + crushedOutFileName, reader.next(key, value), is(false));

      reader.close();
    } else {
      fail();
    }
  }
View Full Code Here


    List<String> actualBucketFiles = new ArrayList<String>();

    Text key = new Text();
    Text value = new Text();

    Reader reader = new Reader(FileSystem.get(job), crush.getBucketFiles(), job);

    while(reader.next(key, value)) {
      actualBucketFiles.add(format("%s\t%s", key, value));
    }

    reader.close();

    Collections.sort(expectedBucketFiles);
    Collections.sort(actualBucketFiles);

    assertThat(actualBucketFiles, equalTo(expectedBucketFiles));

    /*
     * Verify the partition map.
     */
    Reader partitionMapReader = new Reader(FileSystem.get(job), crush.getPartitionMap(), job);

    IntWritable partNum = new IntWritable();

    Map<String, Integer> actualPartitions = new HashMap<String, Integer>();

    while (partitionMapReader.next(key, partNum)) {
      actualPartitions.put(key.toString(), partNum.get());
    }

    partitionMapReader.close();

    /*
     * These crush files need to allocated into 5 partitions:
     *
     * in/2-1            55 bytes
View Full Code Here

    bucketToPartition = new HashMap<Text, Integer>(100);

    try {
      FileSystem fs = FileSystem.get(job);

      Reader reader = new Reader(fs, new Path(path), job);

      Text bucket = new Text();
      IntWritable partNum = new IntWritable();

      while (reader.next(bucket, partNum)) {
        int partNumValue = partNum.get();

        if (partNumValue < 0 || partNumValue >= expPartitions) {
          throw new IllegalArgumentException("Partition " + partNumValue + " not allowed with " + expPartitions + " reduce tasks");
        }
View Full Code Here

  public void testAppend() throws IOException {
    final int COL_COUNT = 10;
    final byte [] regionName = Bytes.toBytes("regionname");
    final byte [] tableName = Bytes.toBytes("tablename");
    final byte [] row = Bytes.toBytes("row");
    Reader reader = null;
    HLog log = new HLog(fs, dir, this.conf, null);
    try {
      // Write columns named 1, 2, 3, etc. and then values of single byte
      // 1, 2, 3...
      long timestamp = System.currentTimeMillis();
      List<KeyValue> cols = new ArrayList<KeyValue>();
      for (int i = 0; i < COL_COUNT; i++) {
        cols.add(new KeyValue(row, Bytes.toBytes("column:" + Integer.toString(i)),
          timestamp, new byte[] { (byte)(i + '0') }));
      }
      log.append(regionName, tableName, cols, false, System.currentTimeMillis());
      long logSeqId = log.startCacheFlush();
      log.completeCacheFlush(regionName, tableName, logSeqId);
      log.close();
      Path filename = log.computeFilename(log.getFilenum());
      log = null;
      // Now open a reader on the log and assert append worked.
      reader = new SequenceFile.Reader(fs, filename, conf);
      HLogKey key = new HLogKey();
      KeyValue val = new KeyValue();
      for (int i = 0; i < COL_COUNT; i++) {
        reader.next(key, val);
        assertTrue(Bytes.equals(regionName, key.getRegionName()));
        assertTrue(Bytes.equals(tableName, key.getTablename()));
        assertTrue(Bytes.equals(row, val.getRow()));
        assertEquals((byte)(i + '0'), val.getValue()[0]);
        System.out.println(key + " " + val);
      }
      while (reader.next(key, val)) {
        // Assert only one more row... the meta flushed row.
        assertTrue(Bytes.equals(regionName, key.getRegionName()));
        assertTrue(Bytes.equals(tableName, key.getTablename()));
        assertTrue(Bytes.equals(HLog.METAROW, val.getRow()));
        assertTrue(Bytes.equals(HLog.METAFAMILY, val.getFamily()));
        assertEquals(0, Bytes.compareTo(HLog.COMPLETE_CACHE_FLUSH,
          val.getValue()));
        System.out.println(key + " " + val);
      }
    } finally {
      if (log != null) {
        log.closeAndDelete();
      }
      if (reader != null) {
        reader.close();
      }
    }
  }
View Full Code Here

        Path dest = new Path(fullyQualifiedFileName + ".recovered");
        log.debug("Sorting log file to DSF " + dest);
        fs.mkdirs(dest);
        int part = 0;
       
        Reader reader = new SequenceFile.Reader(local, new Path(findLocalFilename(localLog)), fs.getConf());
        try {
          final ArrayList<Pair<LogFileKey,LogFileValue>> kv = new ArrayList<Pair<LogFileKey,LogFileValue>>();
          long memorySize = 0;
          while (true) {
            final long position = reader.getPosition();
            final LogFileKey key = new LogFileKey();
            final LogFileValue value = new LogFileValue();
            try {
              if (!reader.next(key, value))
                break;
            } catch (EOFException e) {
              log.warn("Unexpected end of file reading write ahead log " + localLog);
              break;
            }
            kv.add(new Pair<LogFileKey,LogFileValue>(key, value));
            memorySize += reader.getPosition() - position;
            if (memorySize > SORT_BUFFER_SIZE) {
              writeSortedEntries(dest, part++, kv);
              kv.clear();
              memorySize = 0;
            }
          }
          if (!kv.isEmpty())
            writeSortedEntries(dest, part++, kv);
          fs.create(new Path(dest, "finished")).close();
        } finally {
          reader.close();
        }
      }
     
      private void writeSortedEntries(Path dest, int part, final List<Pair<LogFileKey,LogFileValue>> kv) throws IOException {
        String path = dest + String.format("/part-r-%05d", part);
        log.debug("Writing partial log file to DSF " + path);
        log.debug("Sorting");
        Span span = Trace.start("Logger sort");
        span.data("logfile", dest.getName());
        Collections.sort(kv, new Comparator<Pair<LogFileKey,LogFileValue>>() {
          @Override
          public int compare(Pair<LogFileKey,LogFileValue> o1, Pair<LogFileKey,LogFileValue> o2) {
            return o1.getFirst().compareTo(o2.getFirst());
          }
        });
        span.stop();
        span = Trace.start("Logger write");
        span.data("logfile", dest.getName());
        MapFile.Writer writer = new MapFile.Writer(fs.getConf(), fs, path, LogFileKey.class, LogFileValue.class);
        short replication = (short) acuConf.getCount(Property.LOGGER_RECOVERY_FILE_REPLICATION);
        fs.setReplication(new Path(path + "/" + MapFile.DATA_FILE_NAME), replication);
        fs.setReplication(new Path(path + "/" + MapFile.INDEX_FILE_NAME), replication);
        try {
          for (Pair<LogFileKey,LogFileValue> entry : kv)
            writer.append(entry.getFirst(), entry.getSecond());
        } finally {
          writer.close();
          span.stop();
        }
      }
     
      private void copyLog(final String localLog, final String fullyQualifiedFileName) throws IOException {
        Path dest = new Path(fullyQualifiedFileName + ".copy");
        log.debug("Copying log file to DSF " + dest);
        fs.delete(dest, true);
        LogFileKey key = new LogFileKey();
        LogFileValue value = new LogFileValue();
        Writer writer = null;
        Reader reader = null;
        try {
          short replication = (short) acuConf.getCount(Property.LOGGER_RECOVERY_FILE_REPLICATION);
          writer = SequenceFile.createWriter(fs, fs.getConf(), dest, LogFileKey.class, LogFileValue.class, fs.getConf().getInt("io.file.buffer.size", 4096),
              replication, fs.getDefaultBlockSize(), SequenceFile.CompressionType.BLOCK, new DefaultCodec(), null, new Metadata());
          FileSystem local = TraceFileSystem.wrap(FileSystem.getLocal(fs.getConf()).getRaw());
          reader = new SequenceFile.Reader(local, new Path(findLocalFilename(localLog)), fs.getConf());
          while (reader.next(key, value)) {
            writer.append(key, value);
          }
        } catch (IOException ex) {
          log.warn("May have a partial copy of a recovery file: " + localLog, ex);
        } finally {
          if (reader != null)
            reader.close();
          if (writer != null)
            writer.close();
        }
        // Make file appear in the shared file system as the target name only after it is completely copied
        fs.rename(dest, new Path(fullyQualifiedFileName));
View Full Code Here

        Writer writer = SequenceFile.createWriter(fs, conf, file, Text.class,
                BehemothDocument.class);
        writer.append(new Text("test"), doc);
        writer.close();

        Reader reader = new org.apache.hadoop.io.SequenceFile.Reader(fs, file,
                conf);
        Text key2 = new Text();
        BehemothDocument doc2 = new BehemothDocument();
        reader.next(key2, doc2);
        reader.close();

        fs.delete(file, true);

        // check the values
    }
View Full Code Here

        Path dest = new Path(fullyQualifiedFileName + ".recovered");
        log.debug("Sorting log file to DSF " + dest);
        fs.mkdirs(dest);
        int part = 0;
       
        Reader reader = new SequenceFile.Reader(local, localFilename(localLog), conf);
        try {
          final ArrayList<Pair<LogFileKey,LogFileValue>> kv = new ArrayList<Pair<LogFileKey,LogFileValue>>();
          long memorySize = 0;
          while (true) {
            final long position = reader.getPosition();
            final LogFileKey key = new LogFileKey();
            final LogFileValue value = new LogFileValue();
            try {
              if (!reader.next(key, value))
                break;
            } catch (EOFException e) {
              log.warn("Unexpected end of file reading write ahead log " + localLog);
              break;
            }
            kv.add(new Pair<LogFileKey,LogFileValue>(key, value));
            memorySize += reader.getPosition() - position;
            if (memorySize > SORT_BUFFER_SIZE) {
              writeSortedEntries(dest, part++, kv);
              kv.clear();
              memorySize = 0;
            }
          }
          if (!kv.isEmpty())
            writeSortedEntries(dest, part++, kv);
          fs.create(new Path(dest, "finished")).close();
        } finally {
          reader.close();
        }
      }
     
      private void writeSortedEntries(Path dest, int part, final List<Pair<LogFileKey,LogFileValue>> kv) throws IOException {
        String path = dest + String.format("/part-r-%05d", part);
        log.debug("Writing partial log file to DSF " + path);
        log.debug("Sorting");
        Span span = Trace.start("Logger sort");
        span.data("logfile", dest.getName());
        Collections.sort(kv, new Comparator<Pair<LogFileKey,LogFileValue>>() {
          @Override
          public int compare(Pair<LogFileKey,LogFileValue> o1, Pair<LogFileKey,LogFileValue> o2) {
            return o1.getFirst().compareTo(o2.getFirst());
          }
        });
        span.stop();
        span = Trace.start("Logger write");
        span.data("logfile", dest.getName());
        MapFile.Writer writer = new MapFile.Writer(conf, fs, path, LogFileKey.class, LogFileValue.class);
        short replication = 1;
        fs.setReplication(new Path(path + "/" + MapFile.DATA_FILE_NAME), replication);
        fs.setReplication(new Path(path + "/" + MapFile.INDEX_FILE_NAME), replication);
        try {
          for (Pair<LogFileKey,LogFileValue> entry : kv)
            writer.append(entry.getFirst(), entry.getSecond());
        } finally {
          writer.close();
          span.stop();
        }
      }
     
      private void copyLog(final String localLog, final String fullyQualifiedFileName) throws IOException {
        Path dest = new Path(fullyQualifiedFileName + ".copy");
        log.debug("Copying log file to DSF " + dest);
        fs.delete(dest, true);
        LogFileKey key = new LogFileKey();
        LogFileValue value = new LogFileValue();
        Writer writer = null;
        Reader reader = null;
        try {
          short replication = 1;
          writer = SequenceFile.createWriter(fs, conf, dest, LogFileKey.class, LogFileValue.class, fs.getConf().getInt("io.file.buffer.size", 4096),
              replication, fs.getDefaultBlockSize(), SequenceFile.CompressionType.BLOCK, new DefaultCodec(), null, new Metadata());
          FileSystem local = FileSystem.getLocal(conf).getRaw();
          reader = new SequenceFile.Reader(local, localFilename(localLog), conf);
          while (reader.next(key, value)) {
            writer.append(key, value);
          }
        } catch (IOException ex) {
          log.warn("May have a partial copy of a recovery file: " + localLog, ex);
        } finally {
          if (reader != null)
            reader.close();
          if (writer != null)
            writer.close();
        }
        // Make file appear in the shared file system as the target name only after it is completely copied
        fs.rename(dest, new Path(fullyQualifiedFileName));
View Full Code Here

TOP

Related Classes of org.apache.hadoop.io.SequenceFile.Reader

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.