Examples of org.apache.hadoop.util.LineReader

org.apache.hadoop.util.LineReader
A class that provides a line reader from an input stream. Depending on the constructor used, lines will either be terminated by:
- one of the following: '\n' (LF) , '\r' (CR), or '\r\n' (CR+LF).
- or, a custom byte sequence delimiter
In both cases, EOF also terminates an otherwise unterminated line.

    final Text rawUgi = new Text();
    final FileSystem fs = userloc.getFileSystem(conf);
    final ArrayList<UserGroupInformation> ugiList =
        new ArrayList<UserGroupInformation>();


    LineReader in = null;
    try {
      in = new LineReader(fs.open(userloc));
      while (in.readLine(rawUgi) > 0) {//line is of the form username[,group]*
        // e is end position of user name in this line
        int e = rawUgi.find(",");
        if (rawUgi.getLength() == 0 || e == 0) {
          throw new IOException("Missing username: " + rawUgi);
        }
        if (e == -1) {
          e = rawUgi.getLength();
        }
        final String username = Text.decode(rawUgi.getBytes(), 0, e);
        UserGroupInformation ugi = null;
        try {
          ugi = UserGroupInformation.createProxyUser(username,
                    UserGroupInformation.getLoginUser());
        } catch (IOException ioe) {
          LOG.error("Error while creating a proxy user " ,ioe);
        }
        if (ugi != null) {
          ugiList.add(ugi);
        }
        // No need to parse groups, even if they exist. Go to next line
      }
    } finally {
      if (in != null) {
        in.close();
      }
    }
    return ugiList;
  }

View Full Code Here


      Text key = new Text();
      for (FileStatus file: files) {
        Path path = file.getPath();
        FileSystem fs = path.getFileSystem(job.getConfiguration());
        LineReader reader = new LineReader(fs.open(path));
        long pos = 0;
        int n;
        try {
          while ((n = reader.readLine(key)) > 0) {
            String[] hosts = getStoreDirHosts(fs, path);
            splits.add(new FileSplit(path, pos, n, hosts));
            pos += n;
          }
        } finally {
          reader.close();
        }
      }


      return splits;
    }

View Full Code Here


    private void parseMetaData() throws IOException {
      FSDataInputStream in = fs.open(masterIndexPath);
      FileStatus masterStat = fs.getFileStatus(masterIndexPath);
      masterIndexTimestamp = masterStat.getModificationTime();
      LineReader lin = new LineReader(in, getConf());
      Text line = new Text();
      long read = lin.readLine(line);


     // the first line contains the version of the index file
      String versionLine = line.toString();
      String[] arr = versionLine.split(" ");
      version = Integer.parseInt(arr[0]);
      // make it always backwards-compatible
      if (this.version > HarFileSystem.VERSION) {
        throw new IOException("Invalid version " + 
            this.version + " expected " + HarFileSystem.VERSION);
      }


      // each line contains a hashcode range and the index file name
      String[] readStr = null;
      while(read < masterStat.getLen()) {
        int b = lin.readLine(line);
        read += b;
        readStr = line.toString().split(" ");
        int startHash = Integer.parseInt(readStr[0]);
        int endHash  = Integer.parseInt(readStr[1]);
        stores.add(new Store(Long.parseLong(readStr[2]), 
            Long.parseLong(readStr[3]), startHash,
            endHash));
        line.clear();
      }
      try {
        // close the master index
        lin.close();
      } catch(IOException io){
        // do nothing just a read.
      }


      FSDataInputStream aIn = fs.open(archiveIndexPath);
      FileStatus archiveStat = fs.getFileStatus(archiveIndexPath);
      archiveIndexTimestamp = archiveStat.getModificationTime();
      LineReader aLin;
      String retStr = null;
      // now start reading the real index file
      for (Store s: stores) {
        read = 0;
        aIn.seek(s.begin);
        aLin = new LineReader(aIn, getConf());
        while (read + s.begin < s.end) {
          int tmp = aLin.readLine(line);
          read += tmp;
          String lineFeed = line.toString();
          String[] parsed = lineFeed.split(" ");
          parsed[0] = decodeFileName(parsed[0]);
          archive.put(new Path(parsed[0]), new HarStatus(lineFeed));

View Full Code Here


      Text key = new Text();
      for (FileStatus file: files) {
        Path path = file.getPath();
        FileSystem fs = path.getFileSystem(job.getConfiguration());
        LineReader reader = new LineReader(fs.open(path));
        long pos = 0;
        int n;
        try {
          while ((n = reader.readLine(key)) > 0) {
            String[] hosts = getStoreDirHosts(fs, path);
            splits.add(new FileSplit(path, pos, n, hosts));
            pos += n;
          }
        } finally {
          reader.close();
        }
      }


      return splits;
    }

View Full Code Here


      // open the file and seek to the start of the split
      FileSystem fs = file.getFileSystem(conf);
      FSDataInputStream fileIn = fs.open(split.getPath());
      if(codec != null) {
        in = new LineReader(codec.createInputStream(fileIn), conf);
        end = Long.MAX_VALUE;
      } else {
        if(start != 0) {
          // Skipping first line because we are not the first split, so start could not be
          // the start of the record
          skipFirstLine = true;
          --start;
          fileIn.seek(start);
        }
        in = new LineReader(fileIn, conf);
      }
      if (skipFirstLine) {
        // skip the line and re-establish "start".
        start += in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start));
      }

View Full Code Here

          if (fileStats[i].getPath().getName().startsWith("_")) {
            continue;
          }


          LOG.info("processing " + fileStats[i].getPath());
          LineReader reader = new LineReader(fs.open(fileStats[i].getPath()));


          Text line = new Text();
          while (reader.readLine(line) > 0) {
            String[] arr = line.toString().split("\\t+", 2);


            int docno = Integer.parseInt(arr[0]);
            int len = Integer.parseInt(arr[1]);


            // Note that because of speculative execution there may be multiple copies of doclength
            // data. Therefore, we can't just count number of doclengths read. Instead, keep track
            // of largest docno encountered.
            if (docno < docnoOffset) {
              throw new RuntimeException("Error: docno " + docno + " < docnoOffset " + docnoOffset
                  + "!");
            }


            doclengths[docno - docnoOffset] = len;


            if (docno > maxDocno) {
              maxDocno = docno;
            }
            if (docno < minDocno) {
              minDocno = docno;
            }
          }
          reader.close();
          context.getCounter(DocLengths.Files).increment(1);
        }


        LOG.info("min docno: " + minDocno);
        LOG.info("max docno: " + maxDocno);

View Full Code Here

        event = parser.nextEvent();
      }


      printStream.close();


      LineReader goldLines = new LineReader(inputGoldStream);
      LineReader resultLines =
          new LineReader(new PossiblyDecompressedInputStream(resultPath, conf));


      int lineNumber = 1;


      try {
        Text goldLine = new Text();
        Text resultLine = new Text();


        int goldRead = goldLines.readLine(goldLine);
        int resultRead = resultLines.readLine(resultLine);


        while (goldRead * resultRead != 0) {
          if (!goldLine.equals(resultLine)) {
            assertEquals("Type mismatch detected", goldLine, resultLine);
            break;
          }


          goldRead = goldLines.readLine(goldLine);
          resultRead = resultLines.readLine(resultLine);


          ++lineNumber;
        }


        if (goldRead != resultRead) {
          assertEquals("the " + (goldRead > resultRead ? "gold" : resultRead)
              + " file contains more text at line " + lineNumber, goldRead,
              resultRead);
        }


        success = true;
      } finally {
        goldLines.close();
        resultLines.close();


        if (success) {
          lfs.delete(resultPath, false);
        }
      }

View Full Code Here


    private void parseMetaData() throws IOException {
      FSDataInputStream in = fs.open(masterIndexPath);
      FileStatus masterStat = fs.getFileStatus(masterIndexPath);
      masterIndexTimestamp = masterStat.getModificationTime();
      LineReader lin = new LineReader(in, getConf());
      Text line = new Text();
      long read = lin.readLine(line);


     // the first line contains the version of the index file
      String versionLine = line.toString();
      String[] arr = versionLine.split(" ");
      version = Integer.parseInt(arr[0]);
      // make it always backwards-compatible
      if (this.version > HarFileSystem.VERSION) {
        throw new IOException("Invalid version " + 
            this.version + " expected " + HarFileSystem.VERSION);
      }


      // each line contains a hashcode range and the index file name
      String[] readStr = null;
      while(read < masterStat.getLen()) {
        int b = lin.readLine(line);
        read += b;
        readStr = line.toString().split(" ");
        int startHash = Integer.parseInt(readStr[0]);
        int endHash  = Integer.parseInt(readStr[1]);
        stores.add(new Store(Long.parseLong(readStr[2]), 
            Long.parseLong(readStr[3]), startHash,
            endHash));
        line.clear();
      }
      try {
        // close the master index
        lin.close();
      } catch(IOException io){
        // do nothing just a read.
      }


      FSDataInputStream aIn = fs.open(archiveIndexPath);
      FileStatus archiveStat = fs.getFileStatus(archiveIndexPath);
      archiveIndexTimestamp = archiveStat.getModificationTime();
      LineReader aLin;


      // now start reading the real index file
      for (Store s: stores) {
        read = 0;
        aIn.seek(s.begin);
        aLin = new LineReader(aIn, getConf());
        while (read + s.begin < s.end) {
          int tmp = aLin.readLine(line);
          read += tmp;
          String lineFeed = line.toString();
          String[] parsed = lineFeed.split(" ");
          parsed[0] = decodeFileName(parsed[0]);
          archive.put(new Path(parsed[0]), new HarStatus(lineFeed));

View Full Code Here

      }


    }
  }
  private LineReader makeStream(String str) throws IOException {
    return new LineReader(new ByteArrayInputStream
                                           (str.getBytes("UTF-8")), 
                                           defaultConf);
  }

View Full Code Here

                                           (str.getBytes("UTF-8")), 
                                           defaultConf);
  }
  
  public void testUTF8() throws Exception {
    LineReader in = null;


    try {
      in = makeStream("abcd\u20acbdcd\u20ac");
      Text line = new Text();
      in.readLine(line);
      assertEquals("readLine changed utf8 characters",
                   "abcd\u20acbdcd\u20ac", line.toString());
      in = makeStream("abc\u200axyz");
      in.readLine(line);
      assertEquals("split on fake newline", "abc\u200axyz", line.toString());
    } finally {
      if (in != null) {
        in.close();
      }
    }
  }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.hadoop.util.LineReader

co.nubetech.hiho.dedup.DelimitedLineRecordReader

com.cloudera.iterativereduce.io.HDFSLineParser

com.datasalt.pangool.tuplemr.mapred.lib.input.TupleTextInputFormat$TupleTextInputReader

com.hadoop.mapred.DeprecatedLzoLineRecordReader

com.hadoop.mapreduce.LzoLineRecordReader

com.twitter.elephantbird.mapreduce.input.LzoBinaryB64LineRecordReader

com.twitter.elephantbird.mapreduce.input.LzoJsonRecordReader

com.twitter.elephantbird.mapreduce.input.LzoLineRecordReader

com.twitter.elephantbird.mapreduce.input.LzoW3CLogRecordReader

edu.umd.cloud9.collection.aquaint2.Aquaint2DocnoMapping

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.