Examples of water.fvec.Vec$VectorGroup

water.fvec.Vec
Class representing the group of vectors. Vectors from the same group have same distribution of chunks among nodes. Each vector is member of exactly one group. Default group of one vector is created for each vector. Group of each vector can be retrieved by calling group() method; The expected mode of operation is that user wants to add new vectors matching the source. E.g. parse creates several vectors (one for each column) which are all colocated and are colocated with the original bytevector. To do this, user should first ask for the set of keys for the new vectors by calling addVecs method on the target group. Vectors in the group will have the same keys except for the prefix which specifies index of the vector inside the group. The only information the group object carries is it's own key and the number of vectors it contains(deleted vectors still count). Because vectors(and chunks) share the same key-pattern with the group, default group with only one vector does not have to be actually created, it is implicit. @author tomasnykodym

    ArrayList<Vec> avecs = new ArrayList<>(); // adapted vectors
    ArrayList<String> anames = new ArrayList<>(); // names for adapted vector


    for( int c=0; c<map.length; c++ ) // Iterate over columns
      if(map[c] != null) { // Column needs adaptation
        Vec adaptedVec;
        if (toEnum[c]) { // Vector was flipped to column already, compose transformation
          adaptedVec = TransfVec.compose((TransfVec) frvecs[c], map[c], vfr.domains()[c], false);
        } else adaptedVec = frvecs[c].makeTransf(map[c], vfr.domains()[c]);
        avecs.add(frvecs[c] = adaptedVec);
        anames.add(names[c]); // Collect right names

View Full Code Here

   */
  public static H2ODrm drmFromSeqfile(String filename, int parMin) {
    long rows = 0;
    int cols = 0;
    Frame frame = null;
    Vec labels = null;


    SequenceFile.Reader reader = null;
    try {
      String uri = filename;
      Configuration conf = new Configuration();
      Path path = new Path(uri);
      FileSystem fs = FileSystem.get(URI.create(uri), conf);
      Vec.Writer writers[];
      Vec.Writer labelwriter = null;
      boolean isIntKey = false, isLongKey = false, isStringKey = false;


      reader = new SequenceFile.Reader(fs, path, conf);


      if (reader.getValueClass() != VectorWritable.class) {
        System.out.println("ValueClass in file " + filename +
                           "must be VectorWritable, but found " +
                           reader.getValueClassName());
        return null;
      }


      Writable key = (Writable)
        ReflectionUtils.newInstance(reader.getKeyClass(), conf);
      VectorWritable value = (VectorWritable)
        ReflectionUtils.newInstance(reader.getValueClass(), conf);


      long start = reader.getPosition();


      if (reader.getKeyClass() == Text.class) {
        isStringKey = true;
      } else if (reader.getKeyClass() == LongWritable.class) {
        isLongKey = true;
      } else {
        isIntKey = true;
      }


      while (reader.next(key, value)) {
        if (cols == 0) {
          Vector v = value.get();
          cols = Math.max(v.size(), cols);
        }
        if (isLongKey) {
          rows = Math.max(((LongWritable)(key)).get()+1, rows);
        }
        if (isIntKey) {
          rows = Math.max(((IntWritable)(key)).get()+1, rows);
        }
        if (isStringKey) {
          rows++;
        }
      }
      reader.seek(start);


      frame = H2OHelper.emptyFrame(rows, cols, parMin, -1);
      writers = new Vec.Writer[cols];
      for (int i = 0; i < writers.length; i++) {
        writers[i] = frame.vecs()[i].open();
      }


      if (reader.getKeyClass() == Text.class) {
        labels = frame.anyVec().makeZero();
        labelwriter = labels.open();
      }


      long r = 0;
      while (reader.next(key, value)) {
        Vector v = value.get();

View Full Code Here

   * @param filename Filename to create and store DRM data in.
   * @param drm DRM object storing Matrix data in memory.
   */
  public static void drmToFile(String filename, H2ODrm drm) throws java.io.IOException {
    Frame frame = drm.frame;
    Vec labels = drm.keys;
    String uri = filename;
    Configuration conf = new Configuration();
    Path path = new Path(uri);
    FileSystem fs = FileSystem.get(URI.create(uri), conf);
    SequenceFile.Writer writer = null;
    boolean isSparse = H2OHelper.isSparse(frame);
    ValueString vstr = new ValueString();


    if (labels != null) {
      writer = SequenceFile.createWriter(fs, conf, path, Text.class, VectorWritable.class);
    } else {
      writer = SequenceFile.createWriter(fs, conf, path, IntWritable.class, VectorWritable.class);
    }


    for (long r = 0; r < frame.anyVec().length(); r++) {
      Vector v = null;
      if (isSparse) {
        v = new SequentialAccessSparseVector(frame.numCols());
      } else {
        v = new DenseVector(frame.numCols());
      }


      for (int c = 0; c < frame.numCols(); c++) {
        v.setQuick(c, frame.vecs()[c].at(r));
      }


      if (labels != null) {
        writer.append(new Text(labels.atStr(vstr, r).toString()), new VectorWritable(v));
      } else {
        writer.append(new IntWritable((int)r), new VectorWritable(v));
      }
    }

View Full Code Here

   * @param B in-core Mahout Matrix.
   * @return new DRM containing drmA times B.
   */
  public static H2ODrm exec(H2ODrm drmA, Matrix B) {
    Frame A = drmA.frame;
    Vec keys = drmA.keys;
    Frame AinCoreB = null;


    if (B instanceof DiagonalMatrix) {
      AinCoreB = execDiagonal(A, B.viewDiagonal());
    } else {

View Full Code Here

   * @param x in-core Mahout Vector.
   * @return new DRM containing Ax.
   */
  public static H2ODrm exec(H2ODrm drmA, Vector x) {
    Frame A = drmA.frame;
    Vec keys = drmA.keys;
    final H2OBCast<Vector> bx = new H2OBCast<Vector>(x);


    // Ax is written into nc (single element, not array) with an MRTask on A,
    // and therefore will be similarly partitioned as A.
    //

View Full Code Here

   * @param R Range object specifying the start and end row numbers to filter.
   * @return new DRM with just the filtered rows.
   */
  public static H2ODrm exec(H2ODrm drmA, final Range R) {
    Frame A = drmA.frame;
    Vec keys = drmA.keys;


    // Run a filtering MRTask on A. If row number falls within R.start() and
    // R.end(), then the row makes it into the output
    Frame Arr = new MRTask() {
        public void map(Chunk chks[], NewChunk ncs[]) {
          int chunkSize = chks[0].len();
          long chunkStart = chks[0].start();


          // First check if the entire chunk even overlaps with R
          if (chunkStart > R.end() || (chunkStart + chunkSize) < R.start()) {
            return;
          }


          // This chunk overlaps, filter out just the overlapping rows
          for (int r = 0; r < chunkSize; r++) {
            if (!R.contains(chunkStart + r)) {
              continue;
            }


            for (int c = 0; c < chks.length; c++) {
              ncs[c].addNum(chks[c].at0(r));
            }
          }
        }
      }.doAll(A.numCols(), A).outputFrame(null, null);


    Vec Vrr = (keys == null) ? null : new MRTask() {
        // This is a String keyed DRM. Do the same thing as above,
        // but this time just one column of Strings.
        public void map(Chunk chk, NewChunk nc) {
          int chunkSize = chk.len();
          long chunkStart = chk.start();

View Full Code Here

   * @return new DRM containing A (element-wise) B.
   */
  public static H2ODrm exec(H2ODrm drmA, H2ODrm drmB, final String op) {
    final Frame A = drmA.frame;
    final Frame B = drmB.frame;
    Vec keys = drmA.keys;
    int AewB_cols = A.numCols();


    // AewB is written into ncs[] with an MRTask on A, and therefore will
    // be similarly partitioned as A.
    //
    // B may or may not be similarly partitioned as A, but must have the
    // same dimensions of A.
    Frame AewB = new MRTask() {
        private double opfn(String op, double a, double b) {
          if (a == 0.0 && b == 0.0) {
            return 0.0;
          }
          if (op.equals("+")) {
            return a + b;
          } else if (op.equals("-")) {
            return a - b;
          } else if (op.equals("*")) {
            return a * b;
          } else if (op.equals("/")) {
            return a / b;
          }
          return 0.0;
        }
        @Override
        public void map(Chunk chks[], NewChunk ncs[]) {
          int chunkSize = chks[0].len();
          Vec B_vecs[] = B.vecs();
          long start = chks[0].start();


          for (int c = 0; c < chks.length; c++) {
            for (int r = 0; r < chunkSize; r++) {
              ncs[c].addNum(opfn(op, chks[c].at0(r), B_vecs[c].at(start + r)));

View Full Code Here

   * @param drmB DRM representing matrix B.
   * @return new DRM containing rows of B below A.
   */
  public static H2ODrm exec(H2ODrm drmA, H2ODrm drmB) {
    final Frame fra = drmA.frame;
    final Vec keysa = drmA.keys;
    final Frame frb = drmB.frame;
    final Vec keysb = drmB.keys;


    // Create new frame and copy A's data at the top, and B's data below.
    // Create the frame in the same VectorGroup as A, so A's data does not
    // cross the wire during copy. B's data could potentially cross the wire.
    Frame frbind = H2OHelper.emptyFrame(fra.numRows() + frb.numRows(), fra.numCols(),
            -1, -1, fra.anyVec().group());
    Vec keys = null;


    MRTask task = new MRTask() {
        public void map(Chunk chks[], NewChunk nc) {
          Vec A_vecs[] = fra.vecs();
          Vec B_vecs[] = frb.vecs();
          long A_rows = fra.numRows();
          long B_rows = frb.numRows();
          long start = chks[0].start();
          int chunkSize = chks[0].len();
          ValueString vstr = new ValueString();

View Full Code Here

   * @param op Element-wise operator encoded as a String.
   * @return new DRM containing A (element-wise) B.
   */
  public static H2ODrm exec(H2ODrm drmA, final double s, final String op) {
    Frame A = drmA.frame;
    Vec keys = drmA.keys;
    int AewScalar_cols = A.numCols();


    // AewScalar is written into ncs[] with an MRTask on A, and therefore will
    // be similarly partitioned as A.
    Frame AewScalar = new MRTask() {

View Full Code Here

   * @param drmB DRM representing matrix B.
   * @return new DRM containing columns of A and B adjacent.
   */
  public static H2ODrm exec(H2ODrm drmA, H2ODrm drmB) {
    Frame fra = drmA.frame;
    Vec keysa = drmA.keys;
    Frame frb = drmB.frame;
    Vec keysb = drmB.keys;


    // If A and B are similarly partitioned, ..
    if (fra.anyVec().group() == frb.anyVec().group()) {
      // .. then, do a light weight zip()
      return zip(fra, keysa, frb, keysb);

View Full Code Here

0 1 2 3

TOP

Related Classes of water.fvec.Vec$VectorGroup

org.apache.mahout.h2obindings.H2OHdfs

org.apache.mahout.h2obindings.H2OHelper

org.apache.mahout.h2obindings.ops.ABt

org.apache.mahout.h2obindings.ops.AewB

org.apache.mahout.h2obindings.ops.AewScalar

org.apache.mahout.h2obindings.ops.At

org.apache.mahout.h2obindings.ops.AtA

org.apache.mahout.h2obindings.ops.AtB

org.apache.mahout.h2obindings.ops.Ax

org.apache.mahout.h2obindings.ops.Cbind

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.