}
public static Map<Integer, PriorityQueue<Pair<Double,String>>> getTopDocIdsByTopic(Path docTopicsPath, int numDocs) {
Map<Integer, PriorityQueue<Pair<Double,String>>> docIdMap = new HashMap<Integer, PriorityQueue<Pair<Double,String>>>();
Map<Integer, Double> maxDocScores = new HashMap<Integer,Double>();
SequenceFileDirectoryReader pointsReader = null;
try {
Text k = new Text();
VectorWritable vw = new VectorWritable();
pointsReader = new SequenceFileDirectoryReader(docTopicsPath);
while (pointsReader.next(k, vw)) {
String docId = k.toString();
Vector normGamma = vw.get();
Iterator<Element> iter = normGamma.iterateNonZero();
double maxTopicScore = 0.0;
int idx = 0;
int topic = 0;
while(iter.hasNext()) {
Element e = iter.next();
double score = e.get();
if (score > maxTopicScore) {
maxTopicScore = score;
topic = idx;
}
idx++;
}
PriorityQueue<Pair<Double,String>> docIdsForTopic = docIdMap.get(topic);
if (docIdsForTopic == null) {
docIdsForTopic = new PriorityQueue<Pair<Double,String>>(numDocs);
}
Double maxDocScoreForTopic = maxDocScores.get(topic);
if (maxDocScoreForTopic == null) {
maxDocScoreForTopic = 0.0;
}
if (maxTopicScore > maxDocScoreForTopic) {
maxDocScores.put(topic, maxTopicScore);
}
enqueue(docIdsForTopic, docId, maxTopicScore, numDocs);
docIdMap.put(topic, docIdsForTopic);
}
} catch (IOException e) {
LOG.error("IOException caught while reading clustered points", e);
} finally {
if (pointsReader != null) {
pointsReader.close();
}
}
for (Map.Entry<Integer, Double> entry : maxDocScores.entrySet()) {
System.out.println("For topic: " + entry.getKey() + " max score: " + entry.getValue());