Package org.apache.hadoop.util

Examples of org.apache.hadoop.util.LineReader


    assertEquals("split on fake newline", "abc\u200axyz", line.toString());
  }

  @Test
  public void testNewLines() throws Exception {
    LineReader in = makeStream("a\nbb\n\nccc\rdddd\r\neeeee");
    Text out = new Text();
    in.readLine(out);
    assertEquals("line1 length", 1, out.getLength());
    in.readLine(out);
    assertEquals("line2 length", 2, out.getLength());
    in.readLine(out);
    assertEquals("line3 length", 0, out.getLength());
    in.readLine(out);
    assertEquals("line4 length", 3, out.getLength());
    in.readLine(out);
    assertEquals("line5 length", 4, out.getLength());
    in.readLine(out);
    assertEquals("line5 length", 5, out.getLength());
    assertEquals("end of file", 0, in.readLine(out));
  }
View Full Code Here


    for (int i=0;i<numberOfCharToFillTheBuffer;i++) { 
      fillerString.append('a'); // char 'a' as a filler for the test string
    }

    TestData = fillerString + TestPartOfInput;
    lineReader = new LineReader(
        new ByteArrayInputStream(TestData.getBytes()),Delimiter.getBytes());
   
    line = new Text();
   
    lineReader.readLine(line);
    Assert.assertEquals(fillerString.toString(),line.toString());
   
    lineReader.readLine(line);
    Assert.assertEquals(Expected, line.toString());
   
    /*TEST_2
     * The test scenario is such that,
     * the character/s preceding the delimiter,
     * equals the starting character/s of delimiter
     */
   
    Delimiter = "record";
    StringBuilder TestStringBuilder = new StringBuilder();
   
    TestStringBuilder.append(Delimiter+"Kerala ");
    TestStringBuilder.append(Delimiter+"Bangalore");
    TestStringBuilder.append(Delimiter+" North Korea");
    TestStringBuilder.append(Delimiter+Delimiter+
                        "Guantanamo");
    TestStringBuilder.append(Delimiter+"ecord"+"recor"+"core"); //~EOF with 're'
   
    TestData=TestStringBuilder.toString();
   
    lineReader = new LineReader(
        new ByteArrayInputStream(TestData.getBytes()),Delimiter.getBytes());
   
    lineReader.readLine(line);
    Assert.assertEquals("",line.toString());
    lineReader.readLine(line);
View Full Code Here

          continue;
        }
        Path path = file.getPath();
        FileSystem fs = path.getFileSystem(job.getConfiguration());
        FSDataInputStream fileIn = fs.open(path);
        LineReader in = new LineReader(fileIn, job.getConfiguration());
        int lineLen = 0;
        while(true) {
          Text lineText = new Text();
          lineLen = in.readLine(lineText);
          if(lineLen <= 0) {
          break;
          }
          Matcher m = LINE_PATTERN.matcher(lineText.toString());
          if((m != null) && m.matches()) {
            TableName tableName = TableName.valueOf(m.group(1));
            int startRow = Integer.parseInt(m.group(2));
            int rows = Integer.parseInt(m.group(3));
            int totalRows = Integer.parseInt(m.group(4));
            float sampleRate = Float.parseFloat(m.group(5));
            int clients = Integer.parseInt(m.group(6));
            boolean flushCommits = Boolean.parseBoolean(m.group(7));
            boolean writeToWAL = Boolean.parseBoolean(m.group(8));
            boolean reportLatency = Boolean.parseBoolean(m.group(9));

            LOG.debug("tableName=" + tableName +
                      " split["+ splitList.size() + "] " +
                      " startRow=" + startRow +
                      " rows=" + rows +
                      " totalRows=" + totalRows +
                      " sampleRate=" + sampleRate +
                      " clients=" + clients +
                      " flushCommits=" + flushCommits +
                      " writeToWAL=" + writeToWAL +
                      " reportLatency=" + reportLatency);

            PeInputSplit newSplit =
              new PeInputSplit(tableName, startRow, rows, totalRows, sampleRate, clients,
                flushCommits, writeToWAL, reportLatency);
            splitList.add(newSplit);
          }
        }
        in.close();
      }

      LOG.info("Total # of splits: " + splitList.size());
      return splitList;
    }
View Full Code Here

    LOG.info("Extracting text file");
    long end = start + length;
    FileSystem fs = file.getFileSystem(conf);
    FSDataInputStream filestream = fs.open(file);
    CompressionCodec codec = (new CompressionCodecFactory(conf)).getCodec(file);
    LineReader filereader;
    Seekable fileseeker = filestream;

    // Hadoop 1.0 does not have support for custom record delimiter and thus
    // we
    // are supporting only default one.
    // We might add another "else if" case for SplittableCompressionCodec once
    // we drop support for Hadoop 1.0.
    if (codec == null) {
      filestream.seek(start);
      filereader = new LineReader(filestream);
    } else {
      filereader = new LineReader(codec.createInputStream(filestream,
          codec.createDecompressor()), conf);
      fileseeker = filestream;
    }
    if (start != 0) {
      // always throw away first record because
      // one extra line is read in previous split
      start += filereader.readLine(new Text(), 0);
    }
    int size;
    LOG.info("Start position: " + String.valueOf(start));
    long next = start;
    while (next <= end) {
      Text line = new Text();
      size = filereader.readLine(line, Integer.MAX_VALUE);
      if (size == 0) {
        break;
      }
      if (codec == null) {
        next += size;
View Full Code Here

     
      for (FileStatus file: listStatus(job)) {
        Path path = file.getPath();
        FileSystem fs = path.getFileSystem(job.getConfiguration());
        FSDataInputStream fileIn = fs.open(path);
        LineReader in = new LineReader(fileIn, job.getConfiguration());
        int lineLen = 0;
        while(true) {
          Text lineText = new Text();
          lineLen = in.readLine(lineText);
          if(lineLen <= 0) {
          break;
          }
          Matcher m = LINE_PATTERN.matcher(lineText.toString());
          if((m != null) && m.matches()) {
            int startRow = Integer.parseInt(m.group(1));
            int rows = Integer.parseInt(m.group(2));
            int totalRows = Integer.parseInt(m.group(3));
            int clients = Integer.parseInt(m.group(4));
            int rowsPerPut = Integer.parseInt(m.group(5));

            LOG.debug("split["+ splitList.size() + "] " +
                     " startRow=" + startRow +
                     " rows=" + rows +
                     " totalRows=" + totalRows +
                     " clients=" + clients +
                     " rowsPerPut=" + rowsPerPut);

            PeInputSplit newSplit =
              new PeInputSplit(startRow, rows, totalRows, clients, rowsPerPut);
            splitList.add(newSplit);
          }
        }
        in.close();
      }
     
      LOG.info("Total # of splits: " + splitList.size());
      return splitList;
    }
View Full Code Here

    LOG.info("\t of length " + length);

    FileSystem fs = file.getFileSystem(conf);
    FSDataInputStream filestream = fs.open(file);
    CompressionCodec codec = (new CompressionCodecFactory(conf)).getCodec(file);
    LineReader filereader;
    Seekable fileseeker = filestream;

    // Hadoop 1.0 does not have support for custom record delimiter and thus we
    // are supporting only default one.
    // We might add another "else if" case for SplittableCompressionCodec once
    // we drop support for Hadoop 1.0.
    if (codec == null) {
      filestream.seek(start);
      filereader = new LineReader(filestream);
    } else {
      filereader = new LineReader(
          codec.createInputStream(filestream, codec.createDecompressor()), conf);
      fileseeker = filestream;
    }

    if (start != 0) {
      // always throw away first record because
      // one extra line is read in previous split
      start += filereader.readLine(new Text(), 0);
    }
    int size;
    LOG.info("Start position: " + String.valueOf(start));
    long next = start;
    while (next <= end) {
      Text line = new Text();
      size = filereader.readLine(line, Integer.MAX_VALUE);
      if (size == 0) {
        break;
      }
      if (codec == null) {
        next += size;
View Full Code Here

     
      for (FileStatus file: listStatus(job)) {
        Path path = file.getPath();
        FileSystem fs = path.getFileSystem(job.getConfiguration());
        FSDataInputStream fileIn = fs.open(path);
        LineReader in = new LineReader(fileIn, job.getConfiguration());
        int lineLen = 0;
        while(true) {
          Text lineText = new Text();
          lineLen = in.readLine(lineText);
          if(lineLen <= 0) {
          break;
          }
          Matcher m = LINE_PATTERN.matcher(lineText.toString());
          if((m != null) && m.matches()) {
            int startRow = Integer.parseInt(m.group(1));
            int rows = Integer.parseInt(m.group(2));
            int totalRows = Integer.parseInt(m.group(3));
            int clients = Integer.parseInt(m.group(4));
           
            LOG.debug("split["+ splitList.size() + "] " +
                     " startRow=" + startRow +
                     " rows=" + rows +
                     " totalRows=" + totalRows +
                     " clients=" + clients);
           
            PeInputSplit newSplit = new PeInputSplit(startRow, rows, totalRows, clients);
            splitList.add(newSplit);
          }
        }
        in.close();
      }
     
      LOG.info("Total # of splits: " + splitList.size());
      return splitList;
    }
View Full Code Here

  private static Path workDir =
    new Path(new Path(System.getProperty("test.build.data", "/tmp")),
             "TestConcatenatedCompressedInput").makeQualified(localFs);

  private static LineReader makeStream(String str) throws IOException {
    return new LineReader(new ByteArrayInputStream(str.getBytes("UTF-8")),
                          defaultConf);
  }
View Full Code Here

    final FileInputStream in2 = new FileInputStream(fnLocal2.toString());
    assertEquals("concat bytes available", 2734, in1.available());
    assertEquals("concat bytes available", 3413, in2.available()); // w/hdr CRC

    CompressionInputStream cin2 = gzip.createInputStream(in2);
    LineReader in = new LineReader(cin2);
    Text out = new Text();

    int numBytes, totalBytes=0, lineNum=0;
    while ((numBytes = in.readLine(out)) > 0) {
      ++lineNum;
      totalBytes += numBytes;
    }
    in.close();
    assertEquals("total uncompressed bytes in concatenated test file",
                 5346, totalBytes);
    assertEquals("total uncompressed lines in concatenated test file",
                 84, lineNum);
View Full Code Here

   * @throws Exception
   */
  public static void main(String[] args) throws Exception {
    for(String arg: args) {
      System.out.println("Working on " + arg);
      LineReader reader = makeStream(unquote(arg));
      Text line = new Text();
      int size = reader.readLine(line);
      while (size > 0) {
        System.out.println("Got: " + line.toString());
        size = reader.readLine(line);
      }
      reader.close();
    }
  }
View Full Code Here

TOP

Related Classes of org.apache.hadoop.util.LineReader

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.