/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.mahout.h2obindings.ops;
import org.apache.mahout.math.Matrix;
import org.apache.mahout.h2obindings.H2OBlockMatrix;
import org.apache.mahout.h2obindings.drm.H2ODrm;
import water.MRTask;
import water.fvec.Frame;
import water.fvec.Vec;
import water.fvec.Chunk;
import water.fvec.NewChunk;
import java.io.Serializable;
import java.util.Arrays;
import scala.reflect.ClassTag;
/**
* MapBlock operator.
*/
public class MapBlock {
/**
* Execute a BlockMapFunction on DRM partitions to create a new DRM.
*
* @param drmA DRM representing matrix A.
* @param ncol Number of columns output by BMF.
* @param bmf BlockMapFunction which maps input DRM partition to output.
* @param isRstr flag indicating if key type of output DRM is a String.
* @param k ClassTag of intput DRM key type.
* @param r ClassTag of output DRM key type.
* @return new DRM constructed from mapped blocks of drmA through bmf.
*/
public static <K,R> H2ODrm exec(H2ODrm drmA, int ncol, Object bmf, final boolean isRstr,
final ClassTag<K> k, final ClassTag<R> r) {
Frame A = drmA.frame;
Vec keys = drmA.keys;
/**
* MRTask to execute bmf on partitions. Partitions are
* made accessible to bmf in the form of H2OBlockMatrix.
*/
class MRTaskBMF extends MRTask<MRTaskBMF> {
Serializable bmf;
Vec labels;
MRTaskBMF(Object _bmf, Vec _labels) {
// BlockMapFun does not implement Serializable,
// but Scala closures are _always_ Serializable.
//
// So receive the object as a plain Object (else
// compilation fails) and typcast it with conviction,
// that Scala always tags the actually generated
// closure functions with Serializable.
bmf = (Serializable)_bmf;
labels = _labels;
}
/** Create H2OBlockMatrix from the partition */
private Matrix blockify(Chunk chks[]) {
return new H2OBlockMatrix(chks);
}
/** Ingest the output of bmf into the output partition */
private void deblockify(Matrix out, NewChunk ncs[]) {
// assert (out.colSize() == ncs.length)
for (int c = 0; c < out.columnSize(); c++) {
for (int r = 0; r < out.rowSize(); r++) {
ncs[c].addNum(out.getQuick(r, c));
}
}
}
// Input:
// chks.length == A.numCols()
//
// Output:
// ncs.length == (A.numCols() + 1) if String keyed
// (A.numCols() + 0) if Int or Long keyed
//
// First A.numCols() ncs[] elements are fed back the output
// of bmf() output's _2 in deblockify()
//
// If String keyed, then MapBlockHelper.exec() would have
// filled in the Strings into ncs[ncol] already
//
public void map(Chunk chks[], NewChunk ncs[]) {
long start = chks[0].start();
NewChunk nclabel = isRstr ? ncs[ncs.length - 1] : null;
deblockify(MapBlockHelper.exec(bmf, blockify(chks), start, labels, nclabel, k, r), ncs);
// assert chks[i]._len == ncs[j]._len
}
}
int ncolRes = ncol + (isRstr ? 1 : 0);
Frame fmap = new MRTaskBMF(bmf, keys).doAll(ncolRes, A).outputFrame(null, null);
Vec vmap = null;
if (isRstr) {
// If output was String keyed, then the last Vec in fmap is the String vec.
// If so, peel it out into a separate Vec (vmap) and set fmap to be the
// Frame with just the first ncol Vecs
vmap = fmap.vecs()[ncol];
fmap = new Frame(Arrays.copyOfRange(fmap.vecs(), 0, ncol));
}
return new H2ODrm(fmap, vmap);
}
}