Examples of org.apache.hadoop.fs.FSDataOutputStream

org.apache.hadoop.fs.FSDataOutputStream
Utility that wraps a {@link OutputStream} in a {@link DataOutputStream}, buffers output through a {@link BufferedOutputStream} and creates a checksumfile.

      long length = fs.getFileStatus(new Path(zipFilePath)).getLen();


        ZipFile zipFile = new ZipFile(in,length, CHINESE_CHARSET,true);
        Enumeration<?> emu = zipFile.getEntries();
        BufferedInputStream bis;
        FSDataOutputStream fos;
        BufferedOutputStream bos;
        Path file, parentFile;
        ZipEntry entry;
        byte[] cache = new byte[CACHE_SIZE];
        while (emu.hasMoreElements()) {
            entry = (ZipEntry) emu.nextElement();
            if (entry.isDirectory()) {
              fs2.mkdirs(new Path(destDir , entry.getName()));
                continue;
            }
            bis = new BufferedInputStream(zipFile.getInputStream(entry));
            file = new Path(destDir , entry.getName());
            parentFile = file.getParent();
            if (parentFile != null && (!fs2.exists(parentFile))) {
              fs2.mkdirs(parentFile);
            }
            fos =fs2.create(file,true);
            bos = new BufferedOutputStream(fos, CACHE_SIZE);
            int nRead = 0;
            while ((nRead = bis.read(cache, 0, CACHE_SIZE)) != -1) {
                fos.write(cache, 0, nRead);
            }
            bos.flush();
            bos.close();
            fos.close();
            bis.close();
        }
        zipFile.close();
    }

View Full Code Here

    }
    
    
      IndexUtils.truncate(fs, A);
      IndexUtils.truncate(fs,B);
     FSDataOutputStream outputA= fs.create(A,true);    
     FSDataOutputStream outputB= fs.create(B,true);    
        
    
    FileStatus[] filelist = fs.listStatus(dir);
  
    Long bytesRead = 0l;
    long maxsize = 1024l * 1024 * 1024 * 100;
    
  
    for (FileStatus f : filelist) {
      if (!f.isDir() && !f.getPath().getName().startsWith("_")) {
        FSDataInputStream in = fs.open(f.getPath());
        BufferedReader bf=new BufferedReader(new InputStreamReader(in)); 
        String line;
        while ((line = bf.readLine()) != null) {
          bytesRead += line.getBytes().length;
          String towrite=line;
          towrite=line.replaceAll(",", "_").replaceAll("\001", ",").replaceAll("\t", ",").replaceAll("\"", "");
          if(!towrite.isEmpty()&&towrite.indexOf("rep0")>=0)
          {


            String[] cols=towrite.split(",");
            for(String s:cols)
            {
              if(s.indexOf("@abtest@")>=0)
              {
                String[] sss=s.split("@abtest@");
                if(sss.length>1)
                {
                  
                  outputA.writeUTF(sss[1]+"\r\n");
                break;
                }


              }
            }
          
          }
          
          if(!towrite.isEmpty()&&towrite.indexOf("rep1")>=0)
          {


            String[] cols=towrite.split(",");
            for(String s:cols)
            {
              if(s.indexOf("@abtest@")>=0)
              {
                String[] sss=s.split("@abtest@");
                if(sss.length>1)
                {
                  outputB.writeUTF(sss[1]+"\r\n");
                break;
                }


              }
            }
          
          }
          if (bytesRead >= maxsize) {
            bf.close();
            in.close();
            return;
          }
        }
        bf.close();
        in.close();
      }
    }
    
     outputA.close();
     outputB.close();
     


    return;
    
    }

View Full Code Here

      Path indexlinks = new Path(rtnpath, "indexLinks");
      if (iscopy) {
        if (lfs.exists(indexlinks)) {
          lfs.delete(indexlinks, true);
        }
        FSDataOutputStream outlinks = lfs.create(indexlinks);
        outlinks.write((new String(storepath.toString() + "\r\n"))
            .getBytes());
        outlinks.close();
        completePath.mkdirs();
      }
    }


    return workPath.getAbsolutePath();

View Full Code Here


  public void writeToFile(Path loc, JobConf job, Checksum crc)
      throws IOException {
    final FileSystem rfs = FileSystem.getLocal(job).getRaw();
    CheckedOutputStream chk = null;
    final FSDataOutputStream out = rfs.create(loc);
    try {
      if (crc != null) {
        crc.reset();
        chk = new CheckedOutputStream(out, crc);
        chk.write(buf.array());
        out.writeLong(chk.getChecksum().getValue());
      } else {
        out.write(buf.array());
      }
    } finally {
      if (chk != null) {
        chk.close();
      } else {
        out.close();
      }
    }
  }

View Full Code Here

  private File path1, path2, path3;
  private MiniDFSCluster cluster;


  private void writeFile(FileSystem fileSys, Path name, int repl)
  throws IOException {
    FSDataOutputStream stm = fileSys.create(name, true,
        fileSys.getConf().getInt("io.file.buffer.size", 4096),
        (short)repl, (long)blockSize);
    byte[] buffer = new byte[fileSize];
    Random rand = new Random(seed);
    rand.nextBytes(buffer);
    stm.write(buffer);
    stm.close();
  }

View Full Code Here

  public void store(JobInProgress job) {
    if (active && retainTime > 0) {
      JobID jobId = job.getStatus().getJobID();
      Path jobStatusFile = getInfoFilePath(jobId);
      try {
        FSDataOutputStream dataOut = fs.create(jobStatusFile);


        job.getStatus().write(dataOut);


        job.getProfile().write(dataOut);


        job.getCounters().write(dataOut);


        TaskCompletionEvent[] events = 
                job.getTaskCompletionEvents(0, Integer.MAX_VALUE);
        dataOut.writeInt(events.length);
        for (TaskCompletionEvent event : events) {
          event.write(dataOut);
        }


        dataOut.close();
      } catch (IOException ex) {
        LOG.warn("Could not store [" + jobId + "] job info : " +
                 ex.getMessage(), ex);
        try {
          fs.delete(jobStatusFile, true);

View Full Code Here

    MiniDFSCluster cluster = new MiniDFSCluster(conf, repl, true, null);
    FileSystem fs1 = cluster.getFileSystem();
    FileSystem fs2 = AppendTestUtil.createHdfsWithDifferentUsername(fs1.getConf());    
    
    Path path = new Path("/testWriterInterrupted");
    FSDataOutputStream stm = fs1.create(path);
    byte[] toWrite = AppendTestUtil.randomBytes(0, 5);
    
    CountDownLatch countdown = new CountDownLatch(1);
    AtomicReference<Throwable> thrown = new AtomicReference<Throwable>();
    WriterThread writerThread = new AppendTestUtil.WriterThread(

View Full Code Here

      //buffer + header lengths for the partitions
      long size = (bufend >= bufstart
          ? bufend - bufstart
          : (bufvoid - bufend) + bufstart) +
                  partitions * APPROX_HEADER_LENGTH;
      FSDataOutputStream out = null;
      try {
        // create spill file
        final SpillRecord spillRec = new SpillRecord(partitions);
        final Path filename =
            mapOutputFile.getSpillFileForWrite(numSpills, size);
        out = rfs.create(filename);


        final int endPosition = (kvend > kvstart)
          ? kvend
          : kvoffsets.length + kvend;
        sorter.sort(MapOutputBuffer.this, kvstart, endPosition, reporter);
        int spindex = kvstart;
        IndexRecord rec = new IndexRecord();
        InMemValBytes value = new InMemValBytes();
        for (int i = 0; i < partitions; ++i) {
          IFile.Writer<K, V> writer = null;
          try {
            long segmentStart = out.getPos();
            writer = new Writer<K, V>(job, out, keyClass, valClass, codec,
                                      spilledRecordsCounter);
            if (combinerRunner == null) {
              // spill directly
              DataInputBuffer key = new DataInputBuffer();
              while (spindex < endPosition &&
                  kvindices[kvoffsets[spindex % kvoffsets.length]
                            + PARTITION] == i) {
                final int kvoff = kvoffsets[spindex % kvoffsets.length];
                getVBytesForOffset(kvoff, value);
                key.reset(kvbuffer, kvindices[kvoff + KEYSTART],
                          (kvindices[kvoff + VALSTART] - 
                           kvindices[kvoff + KEYSTART]));
                writer.append(key, value);
                ++spindex;
              }
            } else {
              int spstart = spindex;
              while (spindex < endPosition &&
                  kvindices[kvoffsets[spindex % kvoffsets.length]
                            + PARTITION] == i) {
                ++spindex;
              }
              // Note: we would like to avoid the combiner if we've fewer
              // than some threshold of records for a partition
              if (spstart != spindex) {
                combineCollector.setWriter(writer);
                RawKeyValueIterator kvIter =
                  new MRResultIterator(spstart, spindex);
                combinerRunner.combine(kvIter, combineCollector);
              }
            }


            // close the writer
            writer.close();


            // record offsets
            rec.startOffset = segmentStart;
            rec.rawLength = writer.getRawLength();
            rec.partLength = writer.getCompressedLength();
            spillRec.putIndex(rec, i);


            writer = null;
          } finally {
            if (null != writer) writer.close();
          }
        }


        if (totalIndexCacheMemory >= INDEX_CACHE_MEMORY_LIMIT) {
          // create spill index file
          Path indexFilename =
              mapOutputFile.getSpillIndexFileForWrite(numSpills, partitions
                  * MAP_OUTPUT_INDEX_RECORD_LENGTH);
          spillRec.writeToFile(indexFilename, job);
        } else {
          indexCacheList.add(spillRec);
          totalIndexCacheMemory +=
            spillRec.size() * MAP_OUTPUT_INDEX_RECORD_LENGTH;
        }
        LOG.info("Finished spill " + numSpills);
        ++numSpills;
      } finally {
        if (out != null) out.close();
      }
    }

View Full Code Here

     * directly to a spill file. Consider this "losing".
     */
    private void spillSingleRecord(final K key, final V value,
                                   int partition) throws IOException {
      long size = kvbuffer.length + partitions * APPROX_HEADER_LENGTH;
      FSDataOutputStream out = null;
      try {
        // create spill file
        final SpillRecord spillRec = new SpillRecord(partitions);
        final Path filename =
            mapOutputFile.getSpillFileForWrite(numSpills, size);
        out = rfs.create(filename);
        
        // we don't run the combiner for a single record
        IndexRecord rec = new IndexRecord();
        for (int i = 0; i < partitions; ++i) {
          IFile.Writer<K, V> writer = null;
          try {
            long segmentStart = out.getPos();
            // Create a new codec, don't care!
            writer = new IFile.Writer<K,V>(job, out, keyClass, valClass, codec,
                                            spilledRecordsCounter);


            if (i == partition) {
              final long recordStart = out.getPos();
              writer.append(key, value);
              // Note that our map byte count will not be accurate with
              // compression
              mapOutputByteCounter.increment(out.getPos() - recordStart);
            }
            writer.close();


            // record offsets
            rec.startOffset = segmentStart;
            rec.rawLength = writer.getRawLength();
            rec.partLength = writer.getCompressedLength();
            spillRec.putIndex(rec, i);


            writer = null;
          } catch (IOException e) {
            if (null != writer) writer.close();
            throw e;
          }
        }
        if (totalIndexCacheMemory >= INDEX_CACHE_MEMORY_LIMIT) {
          // create spill index file
          Path indexFilename =
              mapOutputFile.getSpillIndexFileForWrite(numSpills, partitions
                  * MAP_OUTPUT_INDEX_RECORD_LENGTH);
          spillRec.writeToFile(indexFilename, job);
        } else {
          indexCacheList.add(spillRec);
          totalIndexCacheMemory +=
            spillRec.size() * MAP_OUTPUT_INDEX_RECORD_LENGTH;
        }
        ++numSpills;
      } finally {
        if (out != null) out.close();
      }
    }

View Full Code Here

          mapOutputFile.getOutputFileForWrite(finalOutFileSize);
      Path finalIndexFile =
          mapOutputFile.getOutputIndexFileForWrite(finalIndexFileSize);


      //The output stream for the final single output file
      FSDataOutputStream finalOut = rfs.create(finalOutputFile, true, 4096);


      if (numSpills == 0) {
        //create dummy files
        IndexRecord rec = new IndexRecord();
        SpillRecord sr = new SpillRecord(partitions);
        try {
          for (int i = 0; i < partitions; i++) {
            long segmentStart = finalOut.getPos();
            Writer<K, V> writer =
              new Writer<K, V>(job, finalOut, keyClass, valClass, codec, null);
            writer.close();
            rec.startOffset = segmentStart;
            rec.rawLength = writer.getRawLength();
            rec.partLength = writer.getCompressedLength();
            sr.putIndex(rec, i);
          }
          sr.writeToFile(finalIndexFile, job);
        } finally {
          finalOut.close();
        }
        return;
      }
      {
        IndexRecord rec = new IndexRecord();
        final SpillRecord spillRec = new SpillRecord(partitions);
        for (int parts = 0; parts < partitions; parts++) {
          //create the segments to be merged
          List<Segment<K,V>> segmentList =
            new ArrayList<Segment<K, V>>(numSpills);
          for(int i = 0; i < numSpills; i++) {
            IndexRecord indexRecord = indexCacheList.get(i).getIndex(parts);


            Segment<K,V> s =
              new Segment<K,V>(job, rfs, filename[i], indexRecord.startOffset,
                               indexRecord.partLength, codec, true);
            segmentList.add(i, s);


            if (LOG.isDebugEnabled()) {
              LOG.debug("MapId=" + mapId + " Reducer=" + parts +
                  "Spill =" + i + "(" + indexRecord.startOffset + "," +
                  indexRecord.rawLength + ", " + indexRecord.partLength + ")");
            }
          }


          //merge
          @SuppressWarnings("unchecked")
          RawKeyValueIterator kvIter = Merger.merge(job, rfs,
                         keyClass, valClass, codec,
                         segmentList, job.getInt("io.sort.factor", 100),
                         new Path(mapId.toString()),
                         job.getOutputKeyComparator(), reporter,
                         null, spilledRecordsCounter);


          //write merged output to disk
          long segmentStart = finalOut.getPos();
          Writer<K, V> writer =
              new Writer<K, V>(job, finalOut, keyClass, valClass, codec,
                               spilledRecordsCounter);
          if (combinerRunner == null || numSpills < minSpillsForCombine) {
            Merger.writeFile(kvIter, writer, reporter, job);
          } else {
            combineCollector.setWriter(writer);
            combinerRunner.combine(kvIter, combineCollector);
          }


          //close
          writer.close();


          // record offsets
          rec.startOffset = segmentStart;
          rec.rawLength = writer.getRawLength();
          rec.partLength = writer.getCompressedLength();
          spillRec.putIndex(rec, parts);
        }
        spillRec.writeToFile(finalIndexFile, job);
        finalOut.close();
        for(int i = 0; i < numSpills; i++) {
          rfs.delete(filename[i],true);
        }
      }
    }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.hadoop.fs.FSDataOutputStream

com.alimama.mdrill.ui.service.utils.WebServiceParams

com.asakusafw.runtime.util.cache.HadoopFileCacheRepository

com.asakusafw.thundergate.runtime.cache.CacheStorageTest

com.bah.geterdun.HeaderManagement

com.dianping.cat.hadoop.hdfs.UploaderAndCleaner

com.hadoop.compression.lzo.LzoIndex

edu.umd.cloud9.io.FSProperty

ivory.core.preprocess.BuildTermDocVectors$DocLengthDataWriterMapper

ivory.core.preprocess.BuildTermDocVectors$MyMapper

ivory.core.util.CLIRUtils

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.