/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package dumplucene;
import java.io.BufferedReader;
import java.io.DataInputStream;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.HashMap;
import org.apache.hadoop.conf.Configuration;
import org.apache.mahout.math.VectorWritable;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.mahout.math.NamedVector;
import org.apache.hadoop.fs.FileSystem;
import org.apache.mahout.math.RandomAccessSparseVector;
import org.apache.mahout.math.Vector.Element;
/**
*
* @author Sony
*/
public class TFIDF_Anliz {
public static void main(String args[]) throws Exception {
HashMap dict = new HashMap();
String str[];
try {
// Open the file that is the first
// command line parameter
FileInputStream fstream = new FileInputStream(System.getProperty("dict")) ; //"resources\\outputDictionnaryPathAndFilename");
// Get the object of DataInputStream
DataInputStream in = new DataInputStream(fstream);
BufferedReader br = new BufferedReader(new InputStreamReader(in));
String strLine;
strLine = br.readLine();
strLine = br.readLine();
//Read File Line By Line
while ((strLine = br.readLine()) != null) {
// Print the content on the console
//System.out.println(strLine);
str = strLine.split("\t");
dict.put(new Integer (str[2]) .intValue(), str[0]);
//System.out.println( dict.get(str[2]));// + " = "+str[1]);
}
//Close the input stream
in.close();
} catch (Exception e) {//Catch exception if any
System.err.println("Error: " + e.getMessage());
}
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
String vectorsPath = System.getProperty("vect"); //"resources\\outputVectorPathAndFilename"; //
Path path = new Path(vectorsPath);
//Make a map
SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
LongWritable key = new LongWritable();
VectorWritable value = new VectorWritable();
while (reader.next(key, value)) {
NamedVector namedVector = (NamedVector) value.get();
RandomAccessSparseVector vect = (RandomAccessSparseVector) namedVector.getDelegate();
for (Element e : vect) {
if (e.get() > 0) {
//System.out.println("Token: " + e.index() + ", TF-IDF weight: " + e.get());
System.out.println(dict.get(e.index()) + "," + e.get());
}
}
}
reader.close();
}
}