Package dumplucene

Source Code of dumplucene.TFIDF_Anliz

/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package dumplucene;


import java.io.BufferedReader;
import java.io.DataInputStream;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.HashMap;
import org.apache.hadoop.conf.Configuration;
import org.apache.mahout.math.VectorWritable;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.mahout.math.NamedVector;
import org.apache.hadoop.fs.FileSystem;
import org.apache.mahout.math.RandomAccessSparseVector;
import org.apache.mahout.math.Vector.Element;

/**
*
* @author Sony
*/
public class TFIDF_Anliz {
   
    public static void main(String args[]) throws Exception {
       
        HashMap dict = new HashMap();
        String str[];
        try {
            // Open the file that is the first
            // command line parameter
            FileInputStream fstream = new FileInputStream(System.getProperty("dict")) ; //"resources\\outputDictionnaryPathAndFilename");
            // Get the object of DataInputStream
            DataInputStream in = new DataInputStream(fstream);
            BufferedReader br = new BufferedReader(new InputStreamReader(in));
            String strLine;
            strLine = br.readLine();
            strLine = br.readLine();
            //Read File Line By Line
            while ((strLine = br.readLine()) != null) {
                // Print the content on the console
                //System.out.println(strLine);
                str = strLine.split("\t");
                dict.put(new Integer (str[2]) .intValue(), str[0]);
                //System.out.println( dict.get(str[2]));// + " = "+str[1]);
            }
            //Close the input stream
            in.close();
        } catch (Exception e) {//Catch exception if any
            System.err.println("Error: " + e.getMessage());
        }
  
       
       
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(conf);
        String vectorsPath = System.getProperty("vect"); //"resources\\outputVectorPathAndFilename"; //
        Path path = new Path(vectorsPath);
       
        //Make a map
       

        SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
        LongWritable key = new LongWritable();
        VectorWritable value = new VectorWritable();
        while (reader.next(key, value)) {
            NamedVector namedVector = (NamedVector) value.get();
            RandomAccessSparseVector vect = (RandomAccessSparseVector) namedVector.getDelegate();

            for (Element e : vect) {
                if (e.get() > 0) {
                    //System.out.println("Token: " + e.index() + ", TF-IDF weight: " + e.get());
                    System.out.println(dict.get(e.index()) + "," + e.get());
                }
            }
        }
        reader.close();
    }
}
TOP

Related Classes of dumplucene.TFIDF_Anliz

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.