Package org.apache.hadoop.io.SequenceFile

Examples of org.apache.hadoop.io.SequenceFile.Writer


    FlumeConfiguration conf = FlumeConfiguration.get();
    Path path = new Path("hdfs://localhost/testfile");
    FileSystem hdfs = path.getFileSystem(conf);
    hdfs.deleteOnExit(path);

    Writer w = SequenceFile.createWriter(hdfs, conf, path,
        WriteableEventKey.class, WriteableEvent.class);
    b.mark("hdfs_fileopen_started");

    Event e = null;
    while ((e = mem.next()) != null) {
      // writing
      w.append(new WriteableEventKey(e), new WriteableEvent(e));
    }
    w.close();
    b.mark("seqfile_hdfs_write");

    hdfs.close();
    b.done();
  }
View Full Code Here


        && !ZlibFactory.isNativeZlibLoaded(conf)) {
      throw new IllegalArgumentException("SequenceFile doesn't work with "
          + "GzipCodec without native-hadoop code!");
    }

    Writer writer = null;
    FSDataOutputStream out = new FSDataOutputStream(
        new FlushingSequenceFileWriter.LocalFSFileOutputStream(new File(fname),
            true), null);
    if (!compress) {
      writer = new FlushingSequenceFileWriter(conf, out, keyClass, valClass,
View Full Code Here

    FileSystem hdfs = path.getFileSystem(conf);
    hdfs.deleteOnExit(path);

    Event e = new EventImpl("EVENT".getBytes());

    Writer w = SequenceFile.createWriter(hdfs, conf, path,
        WriteableEventKey.class, WriteableEvent.class);

    // writing
    w.append(new WriteableEventKey(e), new WriteableEvent(e));
    w.close();

    FileStatus stats = hdfs.getFileStatus(path);
    assertTrue(stats.getLen() > 0);

    // reading
View Full Code Here

    // writing
    FSDataOutputStream dos = hdfs.create(path);
    hdfs.deleteOnExit(path);

    // this version's Writer has ownOutputStream=false.
    Writer writer = SequenceFile.createWriter(conf, dos,
        WriteableEventKey.class, WriteableEvent.class,
        SequenceFile.CompressionType.NONE, new DefaultCodec());

    Event e = new EventImpl("EVENT".getBytes());

    writer.append(new WriteableEventKey(e), new WriteableEvent(e));
    writer.sync();
    writer.close();

    dos.close(); // It is strange that I have to close the underlying
    // FSDataOutputStream.

    // WTF: nothing written by this writer!
View Full Code Here

     
      if (numSpills == 0) {
        //create dummy files
        for (int i = 0; i < partitions; i++) {
          segmentStart = finalOut.getPos();
          Writer writer = SequenceFile.createWriter(job, finalOut,
                                                    job.getMapOutputKeyClass(),
                                                    job.getMapOutputValueClass(),
                                                    compressionType, codec);
          finalIndexOut.writeLong(segmentStart);
          finalIndexOut.writeLong(finalOut.getPos() - segmentStart);
          writer.close();
        }
        finalOut.close();
        finalIndexOut.close();
        return;
      }
      {
        //create a sorter object as we need access to the SegmentDescriptor
        //class and merge methods
        Sorter sorter = new Sorter(localFs, job.getOutputKeyComparator(), valClass, job);
        sorter.setProgressable(reporter);
       
        for (int parts = 0; parts < partitions; parts++){
          List<SegmentDescriptor> segmentList =
            new ArrayList<SegmentDescriptor>(numSpills);
          for(int i = 0; i < numSpills; i++) {
            FSDataInputStream indexIn = localFs.open(indexFileName[i]);
            indexIn.seek(parts * 16);
            long segmentOffset = indexIn.readLong();
            long segmentLength = indexIn.readLong();
            indexIn.close();
            SegmentDescriptor s = sorter.new SegmentDescriptor(segmentOffset,
                                                               segmentLength, filename[i]);
            s.preserveInput(true);
            s.doSync();
            segmentList.add(i, s);
          }
          segmentStart = finalOut.getPos();
          RawKeyValueIterator kvIter = sorter.merge(segmentList, new Path(getTaskId()));
          SequenceFile.Writer writer = SequenceFile.createWriter(job, finalOut,
                                                                 job.getMapOutputKeyClass(), job.getMapOutputValueClass(),
                                                                 compressionType, codec);
          sorter.writeFile(kvIter, writer);
          //close the file - required esp. for block compression to ensure
          //partition data don't span partition boundaries
          writer.close();
          //when we write the offset/length to the final index file, we write
          //longs for both. This helps us to reliably seek directly to the
          //offset/length for a partition when we start serving the byte-ranges
          //to the reduces. We probably waste some space in the file by doing
          //this as opposed to writing VLong but it helps us later on.
View Full Code Here

    if (fs.exists(in))
      fs.delete(in, true);

    final NullWritable value = NullWritable.get();

    Writer centerWriter = new SequenceFile.Writer(fs, conf, center,
        VectorWritable.class, NullWritable.class);

    final SequenceFile.Writer dataWriter = SequenceFile.createWriter(fs, conf,
        in, VectorWritable.class, NullWritable.class, CompressionType.NONE);

    int i = 0;

    BufferedReader br = new BufferedReader(
        new InputStreamReader(fs.open(txtIn)));
    String line;
    while ((line = br.readLine()) != null) {
      String[] split = line.split("\t");
      DenseDoubleVector vec = new DenseDoubleVector(split.length);
      for (int j = 0; j < split.length; j++) {
        vec.set(j, Double.parseDouble(split[j]));
      }
      VectorWritable vector = new VectorWritable(vec);
      dataWriter.append(vector, value);
      if (k > i) {
          assert centerWriter != null;
          centerWriter.append(vector, value);
      } else {
        if (centerWriter != null) {
          centerWriter.close();
          centerWriter = null;
        }
      }
      i++;
    }
View Full Code Here

  public void testJavaSerialization() throws Exception {
    Path file = new Path(System.getProperty("test.build.data",".") +
        "/testseqser.seq");
   
    fs.delete(file, true);
    Writer writer = SequenceFile.createWriter(fs, conf, file, Long.class,
        String.class);
   
    writer.append(1L, "one");
    writer.append(2L, "two");
   
    writer.close();
   
    Reader reader = new Reader(fs, file, conf);
    assertEquals(1L, reader.next((Object) null));
    assertEquals("one", reader.getCurrentValue((Object) null));
    assertEquals(2L, reader.next((Object) null));
View Full Code Here

     
      if (numSpills == 0) {
        //create dummy files
        for (int i = 0; i < partitions; i++) {
          segmentStart = finalOut.getPos();
          Writer writer = SequenceFile.createWriter(job, finalOut,
                                                    job.getMapOutputKeyClass(),
                                                    job.getMapOutputValueClass(),
                                                    compressionType, codec);
          finalIndexOut.writeLong(segmentStart);
          finalIndexOut.writeLong(finalOut.getPos() - segmentStart);
          writer.close();
        }
        finalOut.close();
        finalIndexOut.close();
        return;
      }
      {
        //create a sorter object as we need access to the SegmentDescriptor
        //class and merge methods
        Sorter sorter = new Sorter(localFs, job.getOutputKeyComparator(),
                                   keyClass, valClass, job);
        sorter.setProgressable(reporter);
       
        for (int parts = 0; parts < partitions; parts++){
          List<SegmentDescriptor> segmentList =
            new ArrayList<SegmentDescriptor>(numSpills);
          for(int i = 0; i < numSpills; i++) {
            FSDataInputStream indexIn = localFs.open(indexFileName[i]);
            indexIn.seek(parts * 16);
            long segmentOffset = indexIn.readLong();
            long segmentLength = indexIn.readLong();
            indexIn.close();
            SegmentDescriptor s = sorter.new SegmentDescriptor(segmentOffset,
                                                               segmentLength, filename[i]);
            s.preserveInput(true);
            s.doSync();
            segmentList.add(i, s);
          }
          segmentStart = finalOut.getPos();
          RawKeyValueIterator kvIter = sorter.merge(segmentList, new Path(getTaskId()));
          SequenceFile.Writer writer = SequenceFile.createWriter(job, finalOut,
                                                                 job.getMapOutputKeyClass(), job.getMapOutputValueClass(),
                                                                 compressionType, codec);
          sorter.writeFile(kvIter, writer);
          //close the file - required esp. for block compression to ensure
          //partition data don't span partition boundaries
          writer.close();
          //when we write the offset/length to the final index file, we write
          //longs for both. This helps us to reliably seek directly to the
          //offset/length for a partition when we start serving the byte-ranges
          //to the reduces. We probably waste some space in the file by doing
          //this as opposed to writing VLong but it helps us later on.
View Full Code Here

    if (fs.exists(in))
      fs.delete(in, true);

    final NullWritable value = NullWritable.get();

    Writer centerWriter = new SequenceFile.Writer(fs, conf, center,
        VectorWritable.class, NullWritable.class);

    final SequenceFile.Writer dataWriter = SequenceFile.createWriter(fs, conf,
        in, VectorWritable.class, NullWritable.class, CompressionType.NONE);

    int i = 0;

    BufferedReader br = new BufferedReader(
        new InputStreamReader(fs.open(txtIn)));
    String line;
    while ((line = br.readLine()) != null) {
      String[] split = line.split("\t");
      int columnLength = split.length;
      int indexPos = 0;
      if (hasKey) {
        columnLength = columnLength - 1;
        indexPos++;
      }

      DenseDoubleVector vec = new DenseDoubleVector(columnLength);
      for (int j = 0; j < columnLength; j++) {
        vec.set(j, Double.parseDouble(split[j + indexPos]));
      }

      VectorWritable vector;
      if (hasKey) {
        NamedDoubleVector named = new NamedDoubleVector(split[0], vec);
        vector = new VectorWritable(named);
      } else {
        vector = new VectorWritable(vec);
      }

      dataWriter.append(vector, value);
      if (k > i) {
        centerWriter.append(vector, value);
      }
      i++;
    }
    br.close();
    centerWriter.close();
    dataWriter.close();
    return in;
  }
View Full Code Here

    // store the output in a sequence file
    Path base = getTestTempDirPath("testdata");
    FileSystem fs = base.getFileSystem(conf);

    Path outputFile = new Path(base, "PartialBuilderTest.seq");
    Writer writer = SequenceFile.createWriter(fs, conf, outputFile,
        TreeID.class, MapredOutput.class);

    try {
      for (int index = 0; index < NUM_TREES; index++) {
        writer.append(keys[index], values[index]);
      }
    } finally {
      Closeables.close(writer, false);
    }
View Full Code Here

TOP

Related Classes of org.apache.hadoop.io.SequenceFile.Writer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.