Package org.apache.mahout.clustering.spectral.eigencuts

Source Code of org.apache.mahout.clustering.spectral.eigencuts.EigencutsAffinityCutsJob$EigencutsAffinityCutsReducer

/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.mahout.clustering.spectral.eigencuts;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.mahout.clustering.spectral.common.VertexWritable;
import org.apache.mahout.math.RandomAccessSparseVector;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@Deprecated
public final class EigencutsAffinityCutsJob {

  private static final Logger log = LoggerFactory.getLogger(EigencutsAffinityCutsJob.class);

  private EigencutsAffinityCutsJob() {
  }

  enum CUTSCOUNTER {
    NUM_CUTS
  }

  /**
   * Runs a single iteration of defining cluster boundaries, based on
   * previous calculations and the formation of the "cut matrix".
   *
   * @param currentAffinity Path to the current affinity matrix.
   * @param cutMatrix Path to the sensitivity matrix.
   * @param nextAffinity Output path for the new affinity matrix.
   */
  public static long runjob(Path currentAffinity, Path cutMatrix, Path nextAffinity, Configuration conf)
    throws IOException, ClassNotFoundException, InterruptedException {
   
    // these options allow us to differentiate between the two vectors
    // in the mapper and reducer - we'll know from the working path
    // which SequenceFile we're accessing
    conf.set(EigencutsKeys.AFFINITY_PATH, currentAffinity.getName());
    conf.set(EigencutsKeys.CUTMATRIX_PATH, cutMatrix.getName());
   
    Job job = new Job(conf, "EigencutsAffinityCutsJob");
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(VertexWritable.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(VectorWritable.class);
    job.setMapperClass(EigencutsAffinityCutsMapper.class);
    job.setCombinerClass(EigencutsAffinityCutsCombiner.class);
    job.setReducerClass(EigencutsAffinityCutsReducer.class);
   
    //FileInputFormat.addInputPath(job, currentAffinity);
    FileInputFormat.addInputPath(job, cutMatrix);
    FileOutputFormat.setOutputPath(job, nextAffinity);
   
    boolean succeeded = job.waitForCompletion(true);
    if (!succeeded) {
      throw new IllegalStateException("Job failed!");
    }

    return job.getCounters().findCounter(CUTSCOUNTER.NUM_CUTS).getValue();
  }
 
  public static class EigencutsAffinityCutsMapper
    extends Mapper<IntWritable, VectorWritable, Text, VertexWritable> {
   
    @Override
    protected void map(IntWritable key, VectorWritable row, Context context)
      throws IOException, InterruptedException {
     
      // all this method does is construct a bunch of vertices, mapping those
      // together which have the same *combination* of indices; for example,
      // (1, 3) will have the same key as (3, 1) but a different key from (1, 1)
      // and (3, 3) (which, incidentally, will also not be grouped together)
      String type = context.getWorkingDirectory().getName();
      Vector vector = row.get();
      for (Vector.Element e : vector.all()) {
        String newkey = Math.max(key.get(), e.index()) + "_" + Math.min(key.get(), e.index());
        context.write(new Text(newkey), new VertexWritable(key.get(), e.index(), e.get(), type));
      }
    }
  }
 
  public static class EigencutsAffinityCutsCombiner
    extends Reducer<Text, VertexWritable, Text, VertexWritable> {
   
    @Override
    protected void reduce(Text t, Iterable<VertexWritable> vertices,
        Context context) throws IOException, InterruptedException {
      // there should be exactly 4 items in the iterable; two from the
      // first Path source, and two from the second with matching (i, j) indices
     
      // the idea here is that we want the two vertices of the "cut" matrix,
      // and if either of them has a non-zero value, we want to:
      //
      // 1) zero out the two affinity vertices, and
      // 2) add their former values to the (i, i) and (j, j) coordinates
      //
      // though obviously we want to perform these steps in reverse order
      Configuration conf = context.getConfiguration();
      log.debug("{}", t);
      boolean zero = false;
      int i = -1;
      int j = -1;
      double k = 0;
      int count = 0;
      for (VertexWritable v : vertices) {
        count++;
        if (v.getType().equals(conf.get(EigencutsKeys.AFFINITY_PATH))) {
          i = v.getRow();
          j = v.getCol();
          k = v.getValue();
        } else if (v.getValue() != 0.0) {
          zero = true;
        }
      }
      // if there are only two vertices, we have a diagonal
      // we want to preserve whatever is currently in the diagonal,
      // since this is acting as a running sum of all other values
      // that have been "cut" so far - simply return this element as is
      if (count == 2) {
        VertexWritable vw = new VertexWritable(i, j, k, "unimportant");
        context.write(new Text(String.valueOf(i)), vw);
        return;
      }
     
      // do we zero out the values?
      VertexWritable outI = new VertexWritable();
      VertexWritable outJ = new VertexWritable();
      if (zero) {
        // increment the cut counter
        context.getCounter(CUTSCOUNTER.NUM_CUTS).increment(1);
       
        // we want the values to exist on the diagonal
        outI.setCol(i);
        outJ.setCol(j);
       
        // also, set the old values to zero
        VertexWritable zeroI = new VertexWritable();
        VertexWritable zeroJ = new VertexWritable();
        zeroI.setCol(j);
        zeroI.setValue(0);
        zeroJ.setCol(i);
        zeroJ.setValue(0);
        zeroI.setType("unimportant");
        zeroJ.setType("unimportant");
        context.write(new Text(String.valueOf(i)), zeroI);
        context.write(new Text(String.valueOf(j)), zeroJ);
      } else {
        outI.setCol(j);
        outJ.setCol(i);
      }
     
      // set the values and write them
      outI.setValue(k);
      outJ.setValue(k);
      outI.setType("unimportant");
      outJ.setType("unimportant");
      context.write(new Text(String.valueOf(i)), outI);
      context.write(new Text(String.valueOf(j)), outJ);
    }
  }
 
  public static class EigencutsAffinityCutsReducer
    extends Reducer<Text, VertexWritable, IntWritable, VectorWritable> {
   
    @Override
    protected void reduce(Text row, Iterable<VertexWritable> entries,
        Context context) throws IOException, InterruptedException {
      // now to assemble the vectors
      RandomAccessSparseVector output = new RandomAccessSparseVector(
          context.getConfiguration().getInt(EigencutsKeys.AFFINITY_DIMENSIONS, Integer.MAX_VALUE), 100);
      int rownum = Integer.parseInt(row.toString());
      for (VertexWritable e : entries) {
        // first, are we setting a diagonal?
        if (e.getCol() == rownum) {
          // add to what's already present
          output.setQuick(e.getCol(), output.getQuick(e.getCol()) + e.getValue());
        } else {
          // simply set the value
          output.setQuick(e.getCol(), e.getValue());
        }
      }
      context.write(new IntWritable(rownum), new VectorWritable(output));
    }
  }
}
TOP

Related Classes of org.apache.mahout.clustering.spectral.eigencuts.EigencutsAffinityCutsJob$EigencutsAffinityCutsReducer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.