Package org.apache.mahout.df.mapreduce.partial

Source Code of org.apache.mahout.df.mapreduce.partial.InterResults

/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.mahout.df.mapreduce.partial;

import java.io.IOException;

import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.mahout.df.node.Node;

/**
* Stores/Loads the intermediate results of step1 needed by step2.<br>
* This class should not be needed outside of the partial package, so all its
* methods are protected.<br>
*/
public class InterResults {
  private InterResults() {
  }

  /**
   * Load the trees and the keys returned from the first step
   *
   * @param fs forest path file system
   * @param forestPath file path to the (key,tree) file
   * @param numMaps number of map tasks
   * @param numTrees total number of trees in the forest
   * @param partition current partition
   * @param keys array of size numTrees, will contain the loaded keys
   * @param trees array of size numTrees, will contain the loaded trees
   * @return number of instances in the current partition
   * @throws IOException
   */
  public static int load(FileSystem fs, Path forestPath, int numMaps,
      int numTrees, int partition, TreeID[] keys, Node[] trees)
      throws IOException {
    if (keys.length != trees.length) {
      throw new IllegalArgumentException("keys.length != trees.length");
    }

    FSDataInputStream in = fs.open(forestPath);

    TreeID key = new TreeID();
    int numInstances = -1;

    try {
      // get current partition's size
      for (int p = 0; p < numMaps; p++) {
        if (p == partition) {
          numInstances = in.readInt();
        } else {
          in.readInt();
        }
      }

      // load (key, tree)
      int current = 0;
      for (int index = 0; index < numTrees; index++) {
        key.readFields(in);

        if (key.partition() != partition) {
          keys[current] = key.clone();
          trees[current] = Node.read(in);

          current++;
        } else {
          // skip the trees of the current partition
          Node.read(in);
        }
      }

      if (current != keys.length) {
        throw new IllegalStateException("loaded less keys/trees than expected");
      }
    } finally {
      in.close();
    }

    return numInstances;
  }

  /**
   * Write the forest trees into a file
   *
   * @param fs File System
   * @param keys keys returned by the first step
   * @param trees trees returned by the first step
   * @param sizes partitions' sizes in hadoop order
   * @throws IOException
   */
  public static void store(FileSystem fs, Path forestPath,
      TreeID[] keys, Node[] trees, int[] sizes) throws IOException {
    if (keys.length != trees.length) {
      throw new IllegalArgumentException("keys.length != trees.length");
    }

    int numTrees = keys.length;
    int numMaps = sizes.length;

    FSDataOutputStream out = fs.create(forestPath);

    // write partitions' sizes
    for (int p = 0; p < numMaps; p++) {
      out.writeInt(sizes[p]);
    }

    // write the data
    for (int index = 0; index < numTrees; index++) {
      keys[index].write(out);
      trees[index].write(out);
    }

    out.close();
  }

}
TOP

Related Classes of org.apache.mahout.df.mapreduce.partial.InterResults

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.