Package Recommender

Source Code of Recommender.ParallelKNN$CalcReduce

package Recommender;

import Database.KDDParser;
import Database.Primitives.Similarity;
import Database.Primitives.Song;
import Database.Primitives.User;
import Database.Similarities;
import Database.Songs;
import Database.Users;
import Main.Main;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Mapper.Context;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

/**
*
* @author ninj0x
*/
public class ParallelKNN extends Configured implements Recommender {

    final static String CHUNK_LIST_NAME = "chunkNameList.txt";
    final static String MY_CHUNK_NAME_ID = "mychunk";
    final static String THRESHOLD = "threshold";

    @Override
    public void createNeighborhoods() {
        FileStatus[] chunks;
        try {
            Configuration conf = new Configuration();
            FileSystem fs = FileSystem.get(conf);
            Path chunkDir = new Path(Main.getOptions().getArgumentList().get(0));
            chunks = fs.listStatus(chunkDir);
            for (FileStatus chunk : chunks) {
                runCalc(chunk.getPath(), chunks);
            }
        } catch (IOException ex) {
            Logger.getLogger(ParallelKNN.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    @Override
    public void recommendSong(String activeUserFile, double threshold) throws FileNotFoundException {
        throw new UnsupportedOperationException("Not supported yet.");
//        try {
//            Configuration conf = new Configuration();
//            FileSystem fs = FileSystem.get(conf);
//            Path chunkDir = new Path(Main.getOptions().getArgumentList().get(0));
//
//        } catch (IOException ex) {
//            Logger.getLogger(ParallelKNN.class.getName()).log(Level.SEVERE, null, ex);
//        }       
    }

    /**
     * *************************************************************************
     * Calculate Neighborhoods
     */
    /**
     * Map Class for Calculating Neighbors Input: KDD Database in Preprocessed
     * chunks based on Song Output: Neighborhoods for all songs Compare chunks
     * to each other and reduce to get the top K neighbors
     */
    public static class CalcMap
            extends Mapper<Object, Text, IntWritable, Text> {

        private Users myUsers = new Users();
        private Songs mySongs = new Songs();
        private int threshold;

        @Override
        protected void setup(Context context) throws IOException, InterruptedException {
            threshold = context.getConfiguration().getInt(THRESHOLD, 1);
            Path[] cachedFiles = DistributedCache.getLocalCacheFiles(context.getConfiguration());
            String myChunkName = context.getConfiguration().get(MY_CHUNK_NAME_ID);
            for (Path file : cachedFiles) {
                if (file.getName().matches(myChunkName)) {
                    KDDParser parser = new KDDParser(file.toString());
                    parser.parse(mySongs, myUsers);
                }
            }
        }

        @Override
        public void map(Object key, Text chunkURI, Context context) throws IOException, InterruptedException {

            Users otherUsers = new Users();
            Songs otherSongs = new Songs();
            KDDParser parser = new KDDParser(chunkURI.toString(), context.getConfiguration());
            parser.parse(otherSongs, otherUsers);


            //forall items i  //ith iteration
            for (Song i : mySongs) {

                //    forall items j  //split this into N parts
                for (Song j : otherSongs) {
                    double numerator = 0, denominator_left = 0, denominator_right = 0;

                    if (j.equals(i)) {
                        continue;
                    }

                    //        forall users user
                    int userCount = 0;
                    for (User user : myUsers) {
                        double num = 0, den_l = 0, den_r = 0;
//                        System.out.println("comparing! song: " + i.getID() + " and " + j.getID());
                        User other = otherUsers.getUser(user.getID());
                        if (other == null) {
                            continue;
                        }
                        if (user.rated(i) && other.rated(j)) {
//                            System.out.println("if (user.rated(i) && user.rated(j))! song: " + i.getID() + " and " + j.getID());
                            userCount++;
                            double iTmp = user.getRating(i) - user.getAvgRating();
                            double jTmp = other.getRating(j) - other.getAvgRating();

                            num = iTmp * jTmp;
                            den_l = iTmp * iTmp;
                            den_r = jTmp * jTmp;
                        }
                        numerator += num;
                        denominator_left += den_l;
                        denominator_right += den_r;
                    }
                    /*
                     * sim will equal 1.0 if both songs are rated the same (have
                     * the same difference, for each user) sim will equal -1.0
                     * if the songs have the same difference but different signs
                     * NaN if no users have rated both songs If negative, should
                     * not recommend...
                     */

                    double sim = numerator / Math.sqrt(denominator_left * denominator_right);
                    if (userCount > threshold) {
                        Similarity is = new Similarity(j, sim);
                        context.write(new IntWritable(i.getID()), new Text(is.toString()));
                    }
                }
            }
        }
    }

    public static class CalcReduce
            extends Reducer<IntWritable, Text, IntWritable, Text> {

        @Override
        public void reduce(IntWritable songId, Iterable<Text> sim,
                Context context) throws IOException, InterruptedException {
            Similarities neih = new Similarities();
            for (Text s : sim) {
//                System.out.println("sim " + s.toString());
                neih.insert(new Similarity(s.toString()));
            }

            for (Similarity s : neih) {
                context.write(songId, new Text(s.toString()));
            }
        }
    }

    private int runCalc(Path myChunk, FileStatus[] chunks) {
        Configuration conf = new Configuration();

        //need to add MainChunk to DistributedCache
        DistributedCache.addCacheFile(myChunk.toUri(), conf);

        conf.set(MY_CHUNK_NAME_ID, myChunk.getName());
        conf.setInt(THRESHOLD, Main.getOptions().getRatingCountThreshold());
        Job job;
        try {
            job = new Job(conf, "KNNParallelRecommender");
            job.setJarByClass(ParallelKNN.class);
            job.setMapperClass(CalcMap.class);
            job.setCombinerClass(CalcReduce.class);
            job.setReducerClass(CalcReduce.class);
            job.setOutputKeyClass(IntWritable.class);
            job.setOutputValueClass(Text.class);
            FileInputFormat.setMaxInputSplitSize(job, myChunk.toString().getBytes().length+5);
            FileInputFormat.addInputPath(job, createChunkNameFile(conf, chunks));
            FileOutputFormat.setOutputPath(job, getOutputPath(conf, myChunk));
            try {
                System.out.println("Running on chunk "+myChunk.toString());
                job.waitForCompletion(true);
                System.out.println("Done!");
            } catch (InterruptedException ex) {
                Logger.getLogger(ParallelKNN.class.getName()).log(Level.SEVERE, null, ex);
            } catch (ClassNotFoundException ex) {
                Logger.getLogger(ParallelKNN.class.getName()).log(Level.SEVERE, null, ex);
            }
        } catch (IOException ex) {
            Logger.getLogger(ParallelKNN.class.getName()).log(Level.SEVERE, null, ex);
        }


        return 0;
    }

    private Path createChunkNameFile(Configuration conf, FileStatus[] chunks) {
        FileSystem fs;
        Path chuckNameList = new Path(CHUNK_LIST_NAME);
        try {
            fs = FileSystem.get(conf);
            if (fs.exists(chuckNameList)) {
                fs.delete(chuckNameList, true);
            }

            FSDataOutputStream out = fs.create(chuckNameList);

            for (FileStatus chunk : chunks) {
                out.writeBytes(chunk.getPath().toString() + "\n");
            }

            out.close();
        } catch (IOException ex) {
            Logger.getLogger(ParallelKNN.class.getName()).log(Level.SEVERE, null, ex);
        }
        return chuckNameList;
    }

    private Path getOutputPath(Configuration conf, Path chunk) {
        String outputDir = Main.getOptions().getArgumentList().get(1);
        int indexOfExtension = chunk.getName().lastIndexOf(".");
        outputDir += "/";
        outputDir += chunk.getName().substring(0, indexOfExtension);
        Path out = new Path(outputDir);
        removeOldOutputDir(conf, out);
        return out;
    }

    private void removeOldOutputDir(Configuration conf, Path out) {
        try {
            FileSystem fs = FileSystem.get(conf);
            if (fs.exists(out)) {
                fs.delete(out, true);
            }
        } catch (IOException ex) {
            Logger.getLogger(ParallelKNN.class.getName()).log(Level.SEVERE, null, ex);
        }
    }
}
TOP

Related Classes of Recommender.ParallelKNN$CalcReduce

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.