Examples of org.apache.hadoop.util.LineReader

org.apache.hadoop.util.LineReader
A class that provides a line reader from an input stream. Depending on the constructor used, lines will either be terminated by:
- one of the following: '\n' (LF) , '\r' (CR), or '\r\n' (CR+LF).
- or, a custom byte sequence delimiter
In both cases, EOF also terminates an otherwise unterminated line.

    assertEquals("split on fake newline", "abc\u200axyz", line.toString());
  }


  @Test
  public void testNewLines() throws Exception {
    LineReader in = makeStream("a\nbb\n\nccc\rdddd\r\neeeee");
    Text out = new Text();
    in.readLine(out);
    assertEquals("line1 length", 1, out.getLength());
    in.readLine(out);
    assertEquals("line2 length", 2, out.getLength());
    in.readLine(out);
    assertEquals("line3 length", 0, out.getLength());
    in.readLine(out);
    assertEquals("line4 length", 3, out.getLength());
    in.readLine(out);
    assertEquals("line5 length", 4, out.getLength());
    in.readLine(out);
    assertEquals("line5 length", 5, out.getLength());
    assertEquals("end of file", 0, in.readLine(out));
  }

View Full Code Here

    for (int i=0;i<numberOfCharToFillTheBuffer;i++) {  
      fillerString.append('a'); // char 'a' as a filler for the test string
    }


    TestData = fillerString + TestPartOfInput;
    lineReader = new LineReader(
        new ByteArrayInputStream(TestData.getBytes()),Delimiter.getBytes());
    
    line = new Text();
    
    lineReader.readLine(line); 
    Assert.assertEquals(fillerString.toString(),line.toString());
    
    lineReader.readLine(line);
    Assert.assertEquals(Expected, line.toString());
    
    /*TEST_2
     * The test scenario is such that,
     * the character/s preceding the delimiter,
     * equals the starting character/s of delimiter
     */
    
    Delimiter = "record";
    StringBuilder TestStringBuilder = new StringBuilder();
    
    TestStringBuilder.append(Delimiter+"Kerala ");
    TestStringBuilder.append(Delimiter+"Bangalore");
    TestStringBuilder.append(Delimiter+" North Korea");
    TestStringBuilder.append(Delimiter+Delimiter+
                        "Guantanamo");
    TestStringBuilder.append(Delimiter+"ecord"+"recor"+"core"); //~EOF with 're'
    
    TestData=TestStringBuilder.toString();
    
    lineReader = new LineReader(
        new ByteArrayInputStream(TestData.getBytes()),Delimiter.getBytes());
    
    lineReader.readLine(line); 
    Assert.assertEquals("",line.toString());
    lineReader.readLine(line);

View Full Code Here

          continue;
        }
        Path path = file.getPath();
        FileSystem fs = path.getFileSystem(job.getConfiguration());
        FSDataInputStream fileIn = fs.open(path);
        LineReader in = new LineReader(fileIn, job.getConfiguration());
        int lineLen = 0;
        while(true) {
          Text lineText = new Text();
          lineLen = in.readLine(lineText);
          if(lineLen <= 0) {
          break;
          }
          Matcher m = LINE_PATTERN.matcher(lineText.toString());
          if((m != null) && m.matches()) {
            TableName tableName = TableName.valueOf(m.group(1));
            int startRow = Integer.parseInt(m.group(2));
            int rows = Integer.parseInt(m.group(3));
            int totalRows = Integer.parseInt(m.group(4));
            float sampleRate = Float.parseFloat(m.group(5));
            int clients = Integer.parseInt(m.group(6));
            boolean flushCommits = Boolean.parseBoolean(m.group(7));
            boolean writeToWAL = Boolean.parseBoolean(m.group(8));
            boolean reportLatency = Boolean.parseBoolean(m.group(9));


            LOG.debug("tableName=" + tableName +
                      " split["+ splitList.size() + "] " +
                      " startRow=" + startRow +
                      " rows=" + rows +
                      " totalRows=" + totalRows +
                      " sampleRate=" + sampleRate +
                      " clients=" + clients +
                      " flushCommits=" + flushCommits +
                      " writeToWAL=" + writeToWAL +
                      " reportLatency=" + reportLatency);


            PeInputSplit newSplit =
              new PeInputSplit(tableName, startRow, rows, totalRows, sampleRate, clients,
                flushCommits, writeToWAL, reportLatency);
            splitList.add(newSplit);
          }
        }
        in.close();
      }


      LOG.info("Total # of splits: " + splitList.size());
      return splitList;
    }

View Full Code Here

    LOG.info("Extracting text file");
    long end = start + length;
    FileSystem fs = file.getFileSystem(conf);
    FSDataInputStream filestream = fs.open(file);
    CompressionCodec codec = (new CompressionCodecFactory(conf)).getCodec(file);
    LineReader filereader;
    Seekable fileseeker = filestream;


    // Hadoop 1.0 does not have support for custom record delimiter and thus
    // we
    // are supporting only default one.
    // We might add another "else if" case for SplittableCompressionCodec once
    // we drop support for Hadoop 1.0.
    if (codec == null) {
      filestream.seek(start);
      filereader = new LineReader(filestream);
    } else {
      filereader = new LineReader(codec.createInputStream(filestream,
          codec.createDecompressor()), conf);
      fileseeker = filestream;
    }
    if (start != 0) {
      // always throw away first record because
      // one extra line is read in previous split
      start += filereader.readLine(new Text(), 0);
    }
    int size;
    LOG.info("Start position: " + String.valueOf(start));
    long next = start;
    while (next <= end) {
      Text line = new Text();
      size = filereader.readLine(line, Integer.MAX_VALUE);
      if (size == 0) {
        break;
      }
      if (codec == null) {
        next += size;

View Full Code Here

      
      for (FileStatus file: listStatus(job)) {
        Path path = file.getPath();
        FileSystem fs = path.getFileSystem(job.getConfiguration());
        FSDataInputStream fileIn = fs.open(path);
        LineReader in = new LineReader(fileIn, job.getConfiguration());
        int lineLen = 0;
        while(true) {
          Text lineText = new Text();
          lineLen = in.readLine(lineText);
          if(lineLen <= 0) {
          break;
          }
          Matcher m = LINE_PATTERN.matcher(lineText.toString());
          if((m != null) && m.matches()) {
            int startRow = Integer.parseInt(m.group(1));
            int rows = Integer.parseInt(m.group(2));
            int totalRows = Integer.parseInt(m.group(3));
            int clients = Integer.parseInt(m.group(4));
            int rowsPerPut = Integer.parseInt(m.group(5));


            LOG.debug("split["+ splitList.size() + "] " + 
                     " startRow=" + startRow +
                     " rows=" + rows +
                     " totalRows=" + totalRows +
                     " clients=" + clients +
                     " rowsPerPut=" + rowsPerPut);


            PeInputSplit newSplit =
              new PeInputSplit(startRow, rows, totalRows, clients, rowsPerPut);
            splitList.add(newSplit);
          }
        }
        in.close();
      }
      
      LOG.info("Total # of splits: " + splitList.size());
      return splitList;
    }

View Full Code Here

    LOG.info("\t of length " + length);


    FileSystem fs = file.getFileSystem(conf);
    FSDataInputStream filestream = fs.open(file);
    CompressionCodec codec = (new CompressionCodecFactory(conf)).getCodec(file);
    LineReader filereader;
    Seekable fileseeker = filestream;


    // Hadoop 1.0 does not have support for custom record delimiter and thus we
    // are supporting only default one.
    // We might add another "else if" case for SplittableCompressionCodec once
    // we drop support for Hadoop 1.0.
    if (codec == null) {
      filestream.seek(start);
      filereader = new LineReader(filestream);
    } else {
      filereader = new LineReader(
          codec.createInputStream(filestream, codec.createDecompressor()), conf);
      fileseeker = filestream;
    }


    if (start != 0) {
      // always throw away first record because
      // one extra line is read in previous split
      start += filereader.readLine(new Text(), 0);
    }
    int size;
    LOG.info("Start position: " + String.valueOf(start));
    long next = start;
    while (next <= end) {
      Text line = new Text();
      size = filereader.readLine(line, Integer.MAX_VALUE);
      if (size == 0) {
        break;
      }
      if (codec == null) {
        next += size;

View Full Code Here

      
      for (FileStatus file: listStatus(job)) {
        Path path = file.getPath();
        FileSystem fs = path.getFileSystem(job.getConfiguration());
        FSDataInputStream fileIn = fs.open(path);
        LineReader in = new LineReader(fileIn, job.getConfiguration());
        int lineLen = 0;
        while(true) {
          Text lineText = new Text();
          lineLen = in.readLine(lineText);
          if(lineLen <= 0) {
          break;
          }
          Matcher m = LINE_PATTERN.matcher(lineText.toString());
          if((m != null) && m.matches()) {
            int startRow = Integer.parseInt(m.group(1));
            int rows = Integer.parseInt(m.group(2));
            int totalRows = Integer.parseInt(m.group(3));
            int clients = Integer.parseInt(m.group(4));
            
            LOG.debug("split["+ splitList.size() + "] " + 
                     " startRow=" + startRow +
                     " rows=" + rows +
                     " totalRows=" + totalRows +
                     " clients=" + clients);
            
            PeInputSplit newSplit = new PeInputSplit(startRow, rows, totalRows, clients);
            splitList.add(newSplit);
          }
        }
        in.close();
      }
      
      LOG.info("Total # of splits: " + splitList.size());
      return splitList;
    }

View Full Code Here

  private static Path workDir =
    new Path(new Path(System.getProperty("test.build.data", "/tmp")),
             "TestConcatenatedCompressedInput").makeQualified(localFs);


  private static LineReader makeStream(String str) throws IOException {
    return new LineReader(new ByteArrayInputStream(str.getBytes("UTF-8")),
                          defaultConf);
  }

View Full Code Here

    final FileInputStream in2 = new FileInputStream(fnLocal2.toString());
    assertEquals("concat bytes available", 2734, in1.available());
    assertEquals("concat bytes available", 3413, in2.available()); // w/hdr CRC


    CompressionInputStream cin2 = gzip.createInputStream(in2);
    LineReader in = new LineReader(cin2);
    Text out = new Text();


    int numBytes, totalBytes=0, lineNum=0;
    while ((numBytes = in.readLine(out)) > 0) {
      ++lineNum;
      totalBytes += numBytes;
    }
    in.close();
    assertEquals("total uncompressed bytes in concatenated test file",
                 5346, totalBytes);
    assertEquals("total uncompressed lines in concatenated test file",
                 84, lineNum);

View Full Code Here

   * @throws Exception
   */
  public static void main(String[] args) throws Exception {
    for(String arg: args) {
      System.out.println("Working on " + arg);
      LineReader reader = makeStream(unquote(arg));
      Text line = new Text();
      int size = reader.readLine(line);
      while (size > 0) {
        System.out.println("Got: " + line.toString());
        size = reader.readLine(line);
      }
      reader.close();
    }
  }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.hadoop.util.LineReader

co.nubetech.hiho.dedup.DelimitedLineRecordReader

com.cloudera.iterativereduce.io.HDFSLineParser

com.datasalt.pangool.tuplemr.mapred.lib.input.TupleTextInputFormat$TupleTextInputReader

com.hadoop.mapred.DeprecatedLzoLineRecordReader

com.hadoop.mapreduce.LzoLineRecordReader

com.twitter.elephantbird.mapreduce.input.LzoBinaryB64LineRecordReader

com.twitter.elephantbird.mapreduce.input.LzoJsonRecordReader

com.twitter.elephantbird.mapreduce.input.LzoLineRecordReader

com.twitter.elephantbird.mapreduce.input.LzoW3CLogRecordReader

edu.umd.cloud9.collection.aquaint2.Aquaint2DocnoMapping

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.