Package finderbots.recommenders.hadoop

Source Code of finderbots.recommenders.hadoop.VectorsToCSVFunction$Context

package finderbots.recommenders.hadoop;

import cascading.flow.FlowProcess;
import cascading.operation.BaseOperation;
import cascading.operation.Function;
import cascading.operation.FunctionCall;
import cascading.tuple.Fields;
import cascading.tuple.Tuple;
import cascading.tuple.TupleEntry;
import com.google.common.collect.BiMap;
import com.google.common.collect.HashBiMap;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.Logger;
import org.apache.mahout.common.Pair;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;

/**
* User: pat
* Date: 8/12/13
* Time: 4:45 PM
*/

public class VectorsToCSVFunction extends BaseOperation implements Function {
    private static Logger LOGGER = Logger.getRootLogger();
    private static HashBiMap<String,String> itemIndex;
    private static HashBiMap<String,String> rowIndex;

    static class Context{
        public static int i;
    }

    public VectorsToCSVFunction(Fields outfields){
        super(outfields);
    }

    public void operate( FlowProcess flowProcess, FunctionCall functionCall )
    {
        TupleEntry arguments = functionCall.getArguments();
        int key = arguments.getInteger(arguments.getFields().get(0));
        try {
            String doJoinString = (String)flowProcess.getProperty("joining");
            String itemIDString = rowIndex.inverse().get(String.valueOf(key));
            Vector va = ((VectorWritable)arguments.getObject(arguments.getFields().get(1))).get();
            String vaDoc = createOrderedDoc(va, itemIndex);
            Tuple tuple;
            if(doJoinString.equals("true")){
                Vector vb = ((VectorWritable)arguments.getObject(arguments.getFields().get(3))).get();
                String vbDoc = createOrderedDoc(vb, itemIndex);
                tuple = new Tuple(itemIDString, vaDoc, vbDoc);
            } else { // not joining, just converting to CSV
                tuple = new Tuple(itemIDString, vaDoc);
            }
            functionCall.getOutputCollector().add(tuple);

        } catch (Exception e) {
            e.printStackTrace()//To change body of catch statement use File | Settings | File Templates.
        }
        //now do some magic to write fields to the output tuple.
        int i = 0;
    }

    @Override
    public void prepare(cascading.flow.FlowProcess flowProcess, cascading.operation.OperationCall operationCall) {
        try {
            String itemIndexPath = (String)flowProcess.getProperty("itemIndexPath");
            String rowIndexPath = (String)flowProcess.getProperty("rowIndexPath");
            itemIndex = Utils.readIndex(new Path(itemIndexPath));
            if(!itemIndexPath.equals(rowIndexPath)){
                rowIndex = Utils.readIndex(new Path(rowIndexPath));
            } else { //identical indexes
                rowIndex = itemIndex;
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    private String createOrderedDoc( Vector v, BiMap<String, String> elementIndex){
        String doc = new String("");
        //sort the vector by element weight
        class VectorElementComparator implements Comparator<Pair<Integer,Double>> {

            @Override
            public int compare(Pair<Integer,Double> o1, Pair<Integer,Double> o2) {
                if (o1.getSecond() > o2.getSecond()) {
                    return -1;
                } else if (o1.getSecond() < o2.getSecond()) {
                    return 1;
                } else if (o1.getFirst() > o2.getFirst()) {
                    return -1;
                } else if (o1.getFirst() < o2.getFirst()) {
                    return 1;
                } else {
                    return 0;
                }
            }
        }

        ArrayList<Pair<Integer,Double>> itemList = new ArrayList<Pair<Integer,Double>>();
        for(Vector.Element ve : v.nonZeroes()){
            Pair<Integer,Double> item = new Pair<Integer, Double>(ve.index(),ve.get());
            itemList.add(item);
        }
        Collections.sort(itemList, new VectorElementComparator());
        for(Pair<Integer,Double> item : itemList){
            int i = item.getFirst();
            String s = String.valueOf(i);
            String exID = elementIndex.inverse().get(s);
            doc += exID+" ";
        }
        return doc;
    }

    //Don't use for DRMs because the vectors are not sorted and the docs should have terms ordered by strength
    String createDoc(Vector v, HashBiMap<String,String> index){
        String doc = "";
        for(Vector.Element ve : v.nonZeroes()){
            doc += index.inverse().get(String.valueOf(ve.index()))+" ";
        }
        return doc;
    }


}
TOP

Related Classes of finderbots.recommenders.hadoop.VectorsToCSVFunction$Context

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.