Package org.apache.hadoop.util

Examples of org.apache.hadoop.util.LineReader


    final Text rawUgi = new Text();
    final FileSystem fs = userloc.getFileSystem(conf);
    final ArrayList<UserGroupInformation> ugiList =
        new ArrayList<UserGroupInformation>();

    LineReader in = null;
    try {
      in = new LineReader(fs.open(userloc));
      while (in.readLine(rawUgi) > 0) {//line is of the form username[,group]*
        // e is end position of user name in this line
        int e = rawUgi.find(",");
        if (rawUgi.getLength() == 0 || e == 0) {
          throw new IOException("Missing username: " + rawUgi);
        }
        if (e == -1) {
          e = rawUgi.getLength();
        }
        final String username = Text.decode(rawUgi.getBytes(), 0, e);
        UserGroupInformation ugi = null;
        try {
          ugi = UserGroupInformation.createProxyUser(username,
                    UserGroupInformation.getLoginUser());
        } catch (IOException ioe) {
          LOG.error("Error while creating a proxy user " ,ioe);
        }
        if (ugi != null) {
          ugiList.add(ugi);
        }
        // No need to parse groups, even if they exist. Go to next line
      }
    } finally {
      if (in != null) {
        in.close();
      }
    }
    return ugiList;
  }
View Full Code Here


      Text key = new Text();
      for (FileStatus file: files) {
        Path path = file.getPath();
        FileSystem fs = path.getFileSystem(job.getConfiguration());
        LineReader reader = new LineReader(fs.open(path));
        long pos = 0;
        int n;
        try {
          while ((n = reader.readLine(key)) > 0) {
            String[] hosts = getStoreDirHosts(fs, path);
            splits.add(new FileSplit(path, pos, n, hosts));
            pos += n;
          }
        } finally {
          reader.close();
        }
      }

      return splits;
    }
View Full Code Here

    private void parseMetaData() throws IOException {
      FSDataInputStream in = fs.open(masterIndexPath);
      FileStatus masterStat = fs.getFileStatus(masterIndexPath);
      masterIndexTimestamp = masterStat.getModificationTime();
      LineReader lin = new LineReader(in, getConf());
      Text line = new Text();
      long read = lin.readLine(line);

     // the first line contains the version of the index file
      String versionLine = line.toString();
      String[] arr = versionLine.split(" ");
      version = Integer.parseInt(arr[0]);
      // make it always backwards-compatible
      if (this.version > HarFileSystem.VERSION) {
        throw new IOException("Invalid version " +
            this.version + " expected " + HarFileSystem.VERSION);
      }

      // each line contains a hashcode range and the index file name
      String[] readStr = null;
      while(read < masterStat.getLen()) {
        int b = lin.readLine(line);
        read += b;
        readStr = line.toString().split(" ");
        int startHash = Integer.parseInt(readStr[0]);
        int endHash  = Integer.parseInt(readStr[1]);
        stores.add(new Store(Long.parseLong(readStr[2]),
            Long.parseLong(readStr[3]), startHash,
            endHash));
        line.clear();
      }
      try {
        // close the master index
        lin.close();
      } catch(IOException io){
        // do nothing just a read.
      }

      FSDataInputStream aIn = fs.open(archiveIndexPath);
      FileStatus archiveStat = fs.getFileStatus(archiveIndexPath);
      archiveIndexTimestamp = archiveStat.getModificationTime();
      LineReader aLin;
      String retStr = null;
      // now start reading the real index file
      for (Store s: stores) {
        read = 0;
        aIn.seek(s.begin);
        aLin = new LineReader(aIn, getConf());
        while (read + s.begin < s.end) {
          int tmp = aLin.readLine(line);
          read += tmp;
          String lineFeed = line.toString();
          String[] parsed = lineFeed.split(" ");
          parsed[0] = decodeFileName(parsed[0]);
          archive.put(new Path(parsed[0]), new HarStatus(lineFeed));
View Full Code Here

      Text key = new Text();
      for (FileStatus file: files) {
        Path path = file.getPath();
        FileSystem fs = path.getFileSystem(job.getConfiguration());
        LineReader reader = new LineReader(fs.open(path));
        long pos = 0;
        int n;
        try {
          while ((n = reader.readLine(key)) > 0) {
            String[] hosts = getStoreDirHosts(fs, path);
            splits.add(new FileSplit(path, pos, n, hosts));
            pos += n;
          }
        } finally {
          reader.close();
        }
      }

      return splits;
    }
View Full Code Here

      // open the file and seek to the start of the split
      FileSystem fs = file.getFileSystem(conf);
      FSDataInputStream fileIn = fs.open(split.getPath());
      if(codec != null) {
        in = new LineReader(codec.createInputStream(fileIn), conf);
        end = Long.MAX_VALUE;
      } else {
        if(start != 0) {
          // Skipping first line because we are not the first split, so start could not be
          // the start of the record
          skipFirstLine = true;
          --start;
          fileIn.seek(start);
        }
        in = new LineReader(fileIn, conf);
      }
      if (skipFirstLine) {
        // skip the line and re-establish "start".
        start += in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start));
      }
View Full Code Here

          if (fileStats[i].getPath().getName().startsWith("_")) {
            continue;
          }

          LOG.info("processing " + fileStats[i].getPath());
          LineReader reader = new LineReader(fs.open(fileStats[i].getPath()));

          Text line = new Text();
          while (reader.readLine(line) > 0) {
            String[] arr = line.toString().split("\\t+", 2);

            int docno = Integer.parseInt(arr[0]);
            int len = Integer.parseInt(arr[1]);

            // Note that because of speculative execution there may be multiple copies of doclength
            // data. Therefore, we can't just count number of doclengths read. Instead, keep track
            // of largest docno encountered.
            if (docno < docnoOffset) {
              throw new RuntimeException("Error: docno " + docno + " < docnoOffset " + docnoOffset
                  + "!");
            }

            doclengths[docno - docnoOffset] = len;

            if (docno > maxDocno) {
              maxDocno = docno;
            }
            if (docno < minDocno) {
              minDocno = docno;
            }
          }
          reader.close();
          context.getCounter(DocLengths.Files).increment(1);
        }

        LOG.info("min docno: " + minDocno);
        LOG.info("max docno: " + maxDocno);
View Full Code Here

        event = parser.nextEvent();
      }

      printStream.close();

      LineReader goldLines = new LineReader(inputGoldStream);
      LineReader resultLines =
          new LineReader(new PossiblyDecompressedInputStream(resultPath, conf));

      int lineNumber = 1;

      try {
        Text goldLine = new Text();
        Text resultLine = new Text();

        int goldRead = goldLines.readLine(goldLine);
        int resultRead = resultLines.readLine(resultLine);

        while (goldRead * resultRead != 0) {
          if (!goldLine.equals(resultLine)) {
            assertEquals("Type mismatch detected", goldLine, resultLine);
            break;
          }

          goldRead = goldLines.readLine(goldLine);
          resultRead = resultLines.readLine(resultLine);

          ++lineNumber;
        }

        if (goldRead != resultRead) {
          assertEquals("the " + (goldRead > resultRead ? "gold" : resultRead)
              + " file contains more text at line " + lineNumber, goldRead,
              resultRead);
        }

        success = true;
      } finally {
        goldLines.close();
        resultLines.close();

        if (success) {
          lfs.delete(resultPath, false);
        }
      }
View Full Code Here

    private void parseMetaData() throws IOException {
      FSDataInputStream in = fs.open(masterIndexPath);
      FileStatus masterStat = fs.getFileStatus(masterIndexPath);
      masterIndexTimestamp = masterStat.getModificationTime();
      LineReader lin = new LineReader(in, getConf());
      Text line = new Text();
      long read = lin.readLine(line);

     // the first line contains the version of the index file
      String versionLine = line.toString();
      String[] arr = versionLine.split(" ");
      version = Integer.parseInt(arr[0]);
      // make it always backwards-compatible
      if (this.version > HarFileSystem.VERSION) {
        throw new IOException("Invalid version " +
            this.version + " expected " + HarFileSystem.VERSION);
      }

      // each line contains a hashcode range and the index file name
      String[] readStr = null;
      while(read < masterStat.getLen()) {
        int b = lin.readLine(line);
        read += b;
        readStr = line.toString().split(" ");
        int startHash = Integer.parseInt(readStr[0]);
        int endHash  = Integer.parseInt(readStr[1]);
        stores.add(new Store(Long.parseLong(readStr[2]),
            Long.parseLong(readStr[3]), startHash,
            endHash));
        line.clear();
      }
      try {
        // close the master index
        lin.close();
      } catch(IOException io){
        // do nothing just a read.
      }

      FSDataInputStream aIn = fs.open(archiveIndexPath);
      FileStatus archiveStat = fs.getFileStatus(archiveIndexPath);
      archiveIndexTimestamp = archiveStat.getModificationTime();
      LineReader aLin;

      // now start reading the real index file
      for (Store s: stores) {
        read = 0;
        aIn.seek(s.begin);
        aLin = new LineReader(aIn, getConf());
        while (read + s.begin < s.end) {
          int tmp = aLin.readLine(line);
          read += tmp;
          String lineFeed = line.toString();
          String[] parsed = lineFeed.split(" ");
          parsed[0] = decodeFileName(parsed[0]);
          archive.put(new Path(parsed[0]), new HarStatus(lineFeed));
View Full Code Here

      }

    }
  }
  private LineReader makeStream(String str) throws IOException {
    return new LineReader(new ByteArrayInputStream
                                           (str.getBytes("UTF-8")),
                                           defaultConf);
  }
View Full Code Here

                                           (str.getBytes("UTF-8")),
                                           defaultConf);
  }
 
  public void testUTF8() throws Exception {
    LineReader in = null;

    try {
      in = makeStream("abcd\u20acbdcd\u20ac");
      Text line = new Text();
      in.readLine(line);
      assertEquals("readLine changed utf8 characters",
                   "abcd\u20acbdcd\u20ac", line.toString());
      in = makeStream("abc\u200axyz");
      in.readLine(line);
      assertEquals("split on fake newline", "abc\u200axyz", line.toString());
    } finally {
      if (in != null) {
        in.close();
      }
    }
  }
View Full Code Here

TOP

Related Classes of org.apache.hadoop.util.LineReader

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.