Source Code of uk.ac.cam.ha293.tweetlabel.eval.CosineManager

package uk.ac.cam.ha293.tweetlabel.eval;


import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.util.List;
import java.util.Map;
import java.util.HashMap;
import java.util.Set;
import java.util.HashSet;
import java.io.IOException;


import uk.ac.cam.ha293.tweetlabel.classify.FullAlchemyClassification;
import uk.ac.cam.ha293.tweetlabel.classify.FullCalaisClassification;
import uk.ac.cam.ha293.tweetlabel.classify.FullTextwiseClassification;
import uk.ac.cam.ha293.tweetlabel.topics.FullLLDAClassification;
import uk.ac.cam.ha293.tweetlabel.topics.FullSVMClassification;
import uk.ac.cam.ha293.tweetlabel.util.Tools;


//note: this class needs to handle both inferred vs baseline similarity,
//and also needs to be able to determine which topics are most similar.
public class CosineManager {


  public static void similarityReport(String topicType, double alpha) {
    List<Long> uids = Tools.getCSVUserIDs();
    double cosineSum = 0.0;
    int cosineCount = 0;
    double squareSum = 0.0;
    for(Long uid : uids) {
      if(topicType.equals("alchemy")) {
        FullAlchemyClassification baseline = new FullAlchemyClassification(uid);
        FullLLDAClassification inferred = new FullLLDAClassification(topicType,alpha,uid);
        double sim = inferred.cosineSimilarity(baseline);
        cosineSum += sim;
        squareSum += sim*sim;
        cosineCount++;
      } else if(topicType.equals("calais")) {
        FullCalaisClassification baseline = new FullCalaisClassification(uid);
        FullLLDAClassification inferred = new FullLLDAClassification(topicType,alpha,uid);
        double sim = inferred.cosineSimilarity(baseline);
        cosineSum += sim;
        squareSum += sim*sim;
        cosineCount++;
      } else if(topicType.equals("textwise")) {
        FullTextwiseClassification baseline = new FullTextwiseClassification(uid,true);
        FullLLDAClassification inferred = new FullLLDAClassification("textwiseproper",alpha,uid);
        double sim = inferred.cosineSimilarity(baseline);
        cosineSum += sim;
        squareSum += sim*sim;
        cosineCount++;
      }
      //System.out.println("UID:"+uid+", CS:"+sim);
      //if(cosineCount % 100 == 0) System.out.println(cosineCount);
    }
    double sd = Math.sqrt((1.0/cosineCount)*squareSum - Math.pow((cosineSum/cosineCount),2));
    double avg = cosineSum/cosineCount;
    System.out.println(topicType+"\t"+alpha+"\t"+avg+"\t"+sd);
  }
  
  public static void kSimilarityReport(String topicType, double alpha, int k) {
    List<Long> uids = Tools.getCSVUserIDs();
    double cosineSum = 0.0;
    int cosineCount = 0;
    double squareSum = 0.0;
    for(Long uid : uids) {
      if(topicType.equals("alchemy")) {
        FullAlchemyClassification baseline = new FullAlchemyClassification(uid);
        FullLLDAClassification inferred = new FullLLDAClassification(topicType,alpha,uid);
        double sim = cosineKSimilarity(baseline,inferred,k);
        cosineSum += sim;
        squareSum += sim*sim;
        cosineCount++;
      } else if(topicType.equals("calais")) {
        FullCalaisClassification baseline = new FullCalaisClassification(uid);
        FullLLDAClassification inferred = new FullLLDAClassification(topicType,alpha,uid);
        double sim = cosineKSimilarity(baseline,inferred,k);
        cosineSum += sim;
        squareSum += sim*sim;
        cosineCount++;
      } else if(topicType.equals("textwise")) {
        FullTextwiseClassification baseline = new FullTextwiseClassification(uid,true);
        FullLLDAClassification inferred = new FullLLDAClassification("textwiseproper",alpha,uid);
        double sim = cosineKSimilarity(baseline,inferred,k);
        cosineSum += sim;
        squareSum += sim*sim;
        cosineCount++;
      }
      //System.out.println("UID:"+uid+", CS:"+sim);
      //if(cosineCount % 100 == 0) System.out.println(cosineCount);
    }
    double sd = Math.sqrt((1.0/cosineCount)*squareSum - Math.pow((cosineSum/cosineCount),2));
    double avg = cosineSum/cosineCount;
    System.out.println(topicType+"\t"+alpha+"\t"+avg+"\t"+sd);
  }
  
  public static void similarityReport(String topicType, double alpha, boolean fewerProfiles, int reduction) {
    List<Long> uids = Tools.getCSVUserIDs();
    double cosineSum = 0.0;
    int cosineCount = 0;
    double squareSum = 0.0;
    for(Long uid : uids) {
      if(topicType.equals("alchemy")) {
        FullAlchemyClassification baseline = new FullAlchemyClassification(uid);
        FullLLDAClassification inferred = new FullLLDAClassification(topicType,alpha,fewerProfiles,reduction,uid);
        if(inferred.getCategorySet().isEmpty()) continue;
        double sim = inferred.cosineSimilarity(baseline);
        cosineSum += sim;
        squareSum += sim*sim;
        cosineCount++;
      } else if(topicType.equals("calais")) {
        FullCalaisClassification baseline = new FullCalaisClassification(uid);
        FullLLDAClassification inferred = new FullLLDAClassification(topicType,alpha,fewerProfiles,reduction,uid);
        if(inferred.getCategorySet().isEmpty()) continue;
        double sim = inferred.cosineSimilarity(baseline);
        cosineSum += sim;
        squareSum += sim*sim;
        cosineCount++;
      } else if(topicType.equals("textwise")) {
        FullTextwiseClassification baseline = new FullTextwiseClassification(uid,true);
        FullLLDAClassification inferred = new FullLLDAClassification("textwiseproper",alpha,fewerProfiles,reduction,uid);
        if(inferred.getCategorySet().isEmpty()) continue;
        double sim = inferred.cosineSimilarity(baseline);
        cosineSum += sim;
        squareSum += sim*sim;
        cosineCount++;
      }
      //System.out.println("UID:"+uid+", CS:"+sim);
      //if(cosineCount % 100 == 0) System.out.println(cosineCount);
    }
    double sd = Math.sqrt((1.0/cosineCount)*squareSum - Math.pow((cosineSum/cosineCount),2));
    double avg = cosineSum/cosineCount;
    System.out.println(topicType+"\t"+alpha+"\t"+avg+"\t"+sd);
  }
  
  public static void kSimilarityReport(String topicType, double alpha, boolean fewerProfiles, int reduction, int k) {
    List<Long> uids = Tools.getCSVUserIDs();
    double cosineSum = 0.0;
    int cosineCount = 0;
    double squareSum = 0.0;
    for(Long uid : uids) {
      if(topicType.equals("alchemy")) {
        FullAlchemyClassification baseline = new FullAlchemyClassification(uid);
        FullLLDAClassification inferred = new FullLLDAClassification(topicType,alpha,fewerProfiles,reduction,uid);
        if(inferred.getCategorySet().isEmpty()) continue;
        double sim = cosineKSimilarity(baseline,inferred,k);
        cosineSum += sim;
        squareSum += sim*sim;
        cosineCount++;
      } else if(topicType.equals("calais")) {
        FullCalaisClassification baseline = new FullCalaisClassification(uid);
        FullLLDAClassification inferred = new FullLLDAClassification(topicType,alpha,fewerProfiles,reduction,uid);
        if(inferred.getCategorySet().isEmpty()) continue;
        double sim = cosineKSimilarity(baseline,inferred,k);
        cosineSum += sim;
        squareSum += sim*sim;
        cosineCount++;
      } else if(topicType.equals("textwise")) {
        FullTextwiseClassification baseline = new FullTextwiseClassification(uid,true);
        FullLLDAClassification inferred = new FullLLDAClassification("textwiseproper",alpha,fewerProfiles,reduction,uid);
        if(inferred.getCategorySet().isEmpty()) continue;
        double sim = cosineKSimilarity(baseline,inferred,k);
        cosineSum += sim;
        squareSum += sim*sim;
        cosineCount++;
      }
      //System.out.println("UID:"+uid+", CS:"+sim);
      //if(cosineCount % 100 == 0) System.out.println(cosineCount);
    }
    double sd = Math.sqrt((1.0/cosineCount)*squareSum - Math.pow((cosineSum/cosineCount),2));
    double avg = cosineSum/cosineCount;
    System.out.println(topicType+"\t"+alpha+"\t"+avg+"\t"+sd);
  }
  
  public static void fullSimilarityReport() {
    String[] topicTypes = {"alchemy","calais","textwise"};
    double[] alphas = {0.25,0.5,0.75,1.0,1.25,1.5,1.75,2.0};
    for(String topicType : topicTypes) {
      for(double alpha : alphas) {
        similarityReport(topicType,alpha);
      }
    }
  }
  
  public static void fullKSimilarityReport(int k) {
    String[] topicTypes = {"alchemy","calais","textwise"};
    double[] alphas = {0.25,0.5,0.75,1.0,1.25,1.5,1.75,2.0};
    for(String topicType : topicTypes) {
      for(double alpha : alphas) {
        kSimilarityReport(topicType,alpha,k);
      }
    }
  }
  
  public static void svmSimilarityReport(String topicType) {
    double cosineSum = 0.0;
    int cosineCount = 0;
    double squareSum = 0.0;
    for(Long uid : Tools.getCSVUserIDs()) {
      if(topicType.equals("alchemy")) {
        FullAlchemyClassification fac = new FullAlchemyClassification(uid);
        FullSVMClassification fsm = new FullSVMClassification(topicType,uid);
        double sim = fsm.cosineSimilarity(fac);
        cosineSum += sim;
        squareSum += sim*sim;
        cosineCount++;
      } else if(topicType.equals("calais")) {
        FullCalaisClassification fcc = new FullCalaisClassification(uid);
        FullSVMClassification fsm = new FullSVMClassification(topicType,uid);
        double sim = fsm.cosineSimilarity(fcc);
        cosineSum += sim;
        squareSum += sim*sim;
        cosineCount++;
      } else if(topicType.equals("textwise")) {
        FullTextwiseClassification ftc = new FullTextwiseClassification(uid,true);
        FullSVMClassification fsm = new FullSVMClassification(topicType,uid);
        double sim = fsm.cosineSimilarity(ftc);
        cosineSum += sim;
        squareSum += sim*sim;
        cosineCount++;
      }
    }
    double sd = Math.sqrt((1.0/cosineCount)*squareSum - Math.pow((cosineSum/cosineCount),2));
    double avg = cosineSum/cosineCount;
    System.out.println(topicType+"\t"+avg+"\t"+sd);
  }
  
  public static void topKSVMSimilarityReport(String topicType, int k) {
    double cosineSum = 0.0;
    int cosineCount = 0;
    double squareSum = 0.0;
    for(Long uid : Tools.getCSVUserIDs()) {
      if(topicType.equals("alchemy")) {
        FullAlchemyClassification fac = new FullAlchemyClassification(uid);
        FullSVMClassification fsm = new FullSVMClassification(topicType,uid);
        double sim = cosineKSimilarity(fac,fsm,k);
        cosineSum += sim;
        squareSum += sim*sim;
        cosineCount++;
      } else if(topicType.equals("calais")) {
        FullCalaisClassification fcc = new FullCalaisClassification(uid);
        FullSVMClassification fsm = new FullSVMClassification(topicType,uid);
        double sim = cosineKSimilarity(fcc,fsm,k);
        cosineSum += sim;
        squareSum += sim*sim;
        cosineCount++;
      } else if(topicType.equals("textwise")) {
        FullTextwiseClassification ftc = new FullTextwiseClassification(uid,true);
        FullSVMClassification fsm = new FullSVMClassification(topicType,uid);
        double sim = cosineKSimilarity(ftc,fsm,k);
        cosineSum += sim;
        squareSum += sim*sim;
        cosineCount++;
      }
    }
    double sd = Math.sqrt((1.0/cosineCount)*squareSum - Math.pow((cosineSum/cosineCount),2));
    double avg = cosineSum/cosineCount;
    System.out.println(topicType+"\t"+avg+"\t"+sd);
  }
  
  public static void topicAnalysis(String topicType, double alpha) {
    List<Long> uids = Tools.getCSVUserIDs();
    Map<String,Set<Long>> profileSets = new HashMap<String,Set<Long>>();
    Map<Long,String> userSetLookup = new HashMap<Long,String>();
    //firstly create the baseline profile sets
    //IMPORTANT: profiles with no classifications aren't included in sets!
    for(Long uid : uids) {
      String topTopic = "";
      if(topicType.equals("alchemy")) {
        FullAlchemyClassification c = new FullAlchemyClassification(uid);
        if(c.getCategorySet().size()==0) continue;
        topTopic = c.getCategorySet().toArray(new String[0])[0];
      } else if(topicType.equals("calais")) {
        FullCalaisClassification c = new FullCalaisClassification(uid);
        if(c.getCategorySet().size()==0) continue;
        topTopic = c.getCategorySet().toArray(new String[0])[0];
      } else if(topicType.equals("textwiseproper")) {
        FullTextwiseClassification c = new FullTextwiseClassification(uid,true);
        if(c.getCategorySet().size()==0) continue;
        topTopic = c.getCategorySet().toArray(new String[0])[0];
      }
      if(profileSets.containsKey(topTopic)) {
        profileSets.get(topTopic).add(uid);
      } else {
        Set<Long> newSet = new HashSet<Long>();
        newSet.add(uid);
        profileSets.put(topTopic,newSet);
      }
      userSetLookup.put(uid, topTopic);
    }
    
    //System.out.println("Created top-topic profile sets, "+profileSets.size()+" sets in total");
    
    //then look up the inferred topics for each profile and compare it to the sets
    Map<String,Double> averageTopicCosinesSum = new HashMap<String,Double>();
    Map<String,Integer> averageTopicCosinesCount = new HashMap<String,Integer>();
    Map<String,Integer> correctTopTopicClassifications = new HashMap<String,Integer>();
    Map<Long,Double> similarities = CosineManager.loadCosineSimilarities(topicType, alpha);
    for(Long uid : uids) {
      //work out cosine similarity, contribute to average
      double sim = similarities.get(uid);
      String setName = userSetLookup.get(uid);
      if(averageTopicCosinesSum.containsKey(setName)) {
        averageTopicCosinesSum.put(setName, averageTopicCosinesSum.get(setName)+sim);
        averageTopicCosinesCount.put(setName, averageTopicCosinesCount.get(setName)+1);
      } else {
        averageTopicCosinesSum.put(setName, sim);
        averageTopicCosinesCount.put(setName, 1);
      }
      
      //work out top-topic, does it match?
      FullLLDAClassification llda = new FullLLDAClassification(topicType,alpha,uid);
      if(llda.getCategorySet().size()==0) continue;
      String topTopic = llda.getCategorySet().toArray(new String[0])[0];
      if(topTopic.equals(userSetLookup.get(uid))) {
        //managed to correctly infer the most prominent topic
        if(correctTopTopicClassifications.containsKey(topTopic)) {
          correctTopTopicClassifications.put(topTopic, correctTopTopicClassifications.get(topTopic)+1);
        } else {
          correctTopTopicClassifications.put(topTopic, 1);
        }
      }
    }
    
    /*
    //normalise and sort hashmaps for topic average cosines
    for(String topic : averageTopicCosinesSum.keySet()) {
      averageTopicCosinesSum.put(topic, averageTopicCosinesSum.get(topic)/averageTopicCosinesCount.get(topic));
    }
    Map<String,Double> averageTopicCosines = Tools.sortMapByValueDesc(averageTopicCosinesSum);
    System.out.println("Average cosine similarities per top-topic profile for "+topicType+" "+alpha);
    for(String topic : averageTopicCosines.keySet()) {
      System.out.println(topic+","+averageTopicCosines.get(topic));
    }
    System.out.println();
    */


    //normalise and sort hashmaps for top-topic assignments
    Map<String,Double> correctTopTopicProportions = new HashMap<String,Double>();
    for(String topic : correctTopTopicClassifications.keySet()) {
      //change the map so that it stores (# of correct classifications for topic / total number of profiles in topic)
      correctTopTopicProportions.put(topic, (double)correctTopTopicClassifications.get(topic)/profileSets.get(topic).size());
    }
    correctTopTopicProportions = Tools.sortMapByValueDesc(correctTopTopicProportions);
    System.out.println("% of profiles with top-topic correctly inferred for "+topicType+" "+alpha);
    /*
    for(String topic : correctTopTopicProportions.keySet()) {
      System.out.println(topic+","+correctTopTopicProportions.get(topic));
    }*/
    for(String topic : correctTopTopicProportions.keySet()) {
      System.out.println(topic);
    }
    for(String topic : correctTopTopicProportions.keySet()) {
      System.out.println(correctTopTopicProportions.get(topic));
    }
  }
  
  public static void fullTopicAnalysis() {
    String[] topicTypes = {"alchemy","calais","textwise"};
    double[] alphas = {0.25,0.5,0.75,1.0,1.25,1.5,1.75,2.0};
    for(String topicType : topicTypes) {
      for(double alpha : alphas) {
        topicAnalysis(topicType,alpha);
      }
    }
  }
  
  public static void createCosineSimilarities() {
    //String[] topicTypes = {"alchemy","calais","textwise"};
    String[] topicTypes = {"textwiseproper"};
    double[] alphas = {0.25,0.5,0.75,1.0,1.25,1.5,1.75,2.0};
    List<Long> uids = Tools.getCSVUserIDs();
    try {
      for(String topicType : topicTypes) {
        for(double alpha : alphas) {
          System.out.println("Saving cosine sims for "+topicType+" "+alpha);
          FileOutputStream fileOut = new FileOutputStream("cosinesims/"+topicType+"-"+alpha+".csv");
          PrintWriter writeOut = new PrintWriter(fileOut);
          writeOut.println("\"uid\",\"similarity\"");
          if(topicType.equals("alchemy")) {
            for(long uid : uids) {
              FullAlchemyClassification baseline = new FullAlchemyClassification(uid);
              FullLLDAClassification llda = new FullLLDAClassification(topicType,alpha,uid);
              writeOut.println(uid+","+llda.cosineSimilarity(baseline));
            }
          } else if(topicType.equals("calais")) {
            for(long uid : uids) {
              FullCalaisClassification baseline = new FullCalaisClassification(uid);
              FullLLDAClassification llda = new FullLLDAClassification(topicType,alpha,uid);
              writeOut.println(uid+","+llda.cosineSimilarity(baseline));
            }
          } else if(topicType.equals("textwise")) {
            for(long uid : uids) {
              FullTextwiseClassification baseline = new FullTextwiseClassification(uid,false);
              FullLLDAClassification llda = new FullLLDAClassification(topicType,alpha,uid);
              writeOut.println(uid+","+llda.cosineSimilarity(baseline));
            }
          }else if(topicType.equals("textwiseproper")) {
            for(long uid : uids) {
              FullTextwiseClassification baseline = new FullTextwiseClassification(uid,true);
              FullLLDAClassification llda = new FullLLDAClassification(topicType,alpha,uid);
              writeOut.println(uid+","+llda.cosineSimilarity(baseline));
            }
          }
          writeOut.close();
          fileOut.close();
        }
      }
    } catch (Exception e) {
      e.printStackTrace();
    }
  }
  
  public static Map<Long,Double> loadCosineSimilarities(String topicType, double alpha) {
    Map<Long,Double> result = new HashMap<Long,Double>();
    String nextLine = "";
    String[] split = new String[2];
    try {
      FileInputStream fileIn = new FileInputStream("cosinesims/"+topicType+"-"+alpha+".csv");
      BufferedReader buffer = new BufferedReader(new InputStreamReader(fileIn));
      buffer.readLine(); //skip past the CSV descriptor
      while(true) {
        nextLine = buffer.readLine();
        //If nextLine is null, we still have to save the final profile!
        if(nextLine == null) {
          break;
        }
        split = nextLine.split(",");
        result.put(Long.parseLong(split[0]), Double.parseDouble(split[1]));
      }
    } catch (Exception e) {
      System.out.println(nextLine);
      System.out.println(split[0]);
      e.printStackTrace();
    }
    return result;
  }
  
  public static double cosineSimilarity(Map<String,Double> v1, Map<String,Double> v2) {
    double sum = 0.0;
    for(String cat : v1.keySet()) {
      sum += (v1.get(cat)*v1.get(cat));
    }
    double v1mag = Math.sqrt(sum);
    sum = 0.0;
    for(String cat : v2.keySet()) {
      sum += (v2.get(cat)*v2.get(cat));
    }
    double v2mag = Math.sqrt(sum);
    
    Set<String> catSet = new HashSet<String>();
    catSet.addAll(v1.keySet());
    catSet.addAll(v2.keySet());
    
    double score = 0.0;
    for(String cat : catSet) {
      if(v1.containsKey(cat) && v2.containsKey(cat)) {
        score += (v1.get(cat) * v2.get(cat));
      }
    }
    
    Double magnitudes = (v1mag * v2mag);
    score /= magnitudes;
    if(Double.isNaN(score)) {
      return 0.0; //NaN caused by zero vectors ie no classifications!
    }
    else return score;
  }
  
  public static void jsSimilarityReport(String topicType, double alpha) {
    //System.out.println("JS Divergences to Baseline for "+topicType+" "+alpha+":");
    List<Long> uids = Tools.getCSVUserIDs();
    double cosineSum = 0.0;
    double squareSum = 0.0;
    int cosineCount = 0;
    for(Long uid : uids) {
      //System.out.println(cosineCount);
      if(topicType.equals("alchemy")) {
        FullAlchemyClassification baseline = new FullAlchemyClassification(uid);
        FullLLDAClassification inferred = new FullLLDAClassification(topicType,alpha,uid);
        double sim = inferred.jsDivergence(baseline);
        cosineSum += sim;
        squareSum += sim*sim;
        cosineCount++;
      } else if(topicType.equals("calais")) {
        FullCalaisClassification baseline = new FullCalaisClassification(uid);
        FullLLDAClassification inferred = new FullLLDAClassification(topicType,alpha,uid);
        double sim = inferred.jsDivergence(baseline);
        cosineSum += sim;
        squareSum += sim*sim;
        cosineCount++;
      } else if(topicType.equals("textwise")) {
        FullTextwiseClassification baseline = new FullTextwiseClassification(uid,true);
        FullLLDAClassification inferred = new FullLLDAClassification("textwiseproper",alpha,uid);
        double sim = inferred.jsDivergence(baseline);
        cosineSum += sim;
        squareSum += sim*sim;
        cosineCount++;
      }
      //System.out.println("UID:"+uid+", CS:"+sim);
      //if(cosineCount % 100 == 0) System.out.println(cosineCount);
    }
    double sd = Math.sqrt((1.0/cosineCount)*squareSum - Math.pow((cosineSum/cosineCount),2));
    System.out.println(topicType+","+alpha+","+cosineSum/cosineCount+","+sd);
  }
  
  public static void fullJSSimilarityReport() {
    String[] topicTypes = {"alchemy","calais","textwise"};
    double[] alphas = {0.25,0.5,0.75,1.0,1.25,1.5,1.75,2.0};
    for(String topicType : topicTypes) {
      for(double alpha : alphas) {
        jsSimilarityReport(topicType,alpha);
      }
    }
  }
  
  public static void precisionRecallReport(String topicType, double alpha, boolean svm) {
    Map<String,Set<Long>> gtTopicSets = new HashMap<String,Set<Long>>();
    Map<String,Set<Long>> lldaTopicSets = new HashMap<String,Set<Long>>();
    for(String topic : Tools.getTopics(topicType)) {
      gtTopicSets.put(topic, new HashSet<Long>());
      lldaTopicSets.put(topic, new HashSet<Long>());
    }
    Set<Long> noClassifications = new HashSet<Long>();
    for(Long uid : Tools.getCSVUserIDs()) {
      String topTopic = "";
      if(topicType.equals("alchemy")) {
        FullAlchemyClassification c = new FullAlchemyClassification(uid);
        if(c.getCategorySet().size()==0) {
          noClassifications.add(uid);
          continue;
        }
        topTopic = c.getCategorySet().toArray(new String[1])[0];
      } else if(topicType.equals("calais")) {
        FullCalaisClassification c = new FullCalaisClassification(uid);
        if(c.getCategorySet().size()==0) {
          noClassifications.add(uid);
          continue;
        }
        topTopic = c.getCategorySet().toArray(new String[1])[0];
        if(topTopic.equals("Other") && c.getCategorySet().size() < 2) {
          noClassifications.add(uid);
          continue;
        } else if(topTopic.equals("Other")) {
          topTopic = c.getCategorySet().toArray(new String[1])[1];
        }
      } else if(topicType.equals("textwise")) {
        FullTextwiseClassification c = new FullTextwiseClassification(uid,true);
        if(c.getCategorySet().size()==0) {
          noClassifications.add(uid);
          continue;
        }
        topTopic = c.getCategorySet().toArray(new String[1])[0];
      }
      if(svm) {
        FullSVMClassification svmClassification = new FullSVMClassification(topicType,uid);
        String topSVMTopic = svmClassification.getCategorySet().toArray(new String[1])[0];
        gtTopicSets.get(topTopic).add(uid);
        lldaTopicSets.get(topSVMTopic).add(uid);
      } else {
        FullLLDAClassification llda = new FullLLDAClassification(topicType,alpha,uid);
        if(topicType.equals("textwise")) llda = new FullLLDAClassification("textwiseproper",alpha,uid); 
        String topLLDATopic = llda.getCategorySet().toArray(new String[1])[0];
        gtTopicSets.get(topTopic).add(uid);
        lldaTopicSets.get(topLLDATopic).add(uid);
      }
    }
    
    Map<String,Double> topicPrecisions = new HashMap<String,Double>();
    Map<String,Double> topicRecalls = new HashMap<String,Double>();
    Map<String,Double> topicFs = new HashMap<String,Double>();
    for(String topic : gtTopicSets.keySet()) {
      Set<Long> gtSet = gtTopicSets.get(topic);
      Set<Long> lldaSet = lldaTopicSets.get(topic);
      Set<Long> intersection = new HashSet<Long>(gtSet);
      intersection.retainAll(lldaSet);
      //System.out.println(topic+", intersection: "+intersection.size()+", retrieved: "+lldaSet.size()+", relevant: "+gtSet.size());
      double precision = (double)intersection.size() / lldaSet.size();
      double recall = (double)intersection.size() / gtSet.size();
      double f = (2*precision*recall)/(precision+recall);
      topicPrecisions.put(topic, precision);
      topicRecalls.put(topic, recall);
      topicFs.put(topic, f);
    }
    
    topicFs = Tools.sortMapByValueDesc(topicFs);
    System.out.println("TOPIC RESULTS FOR "+topicType+" alpha "+alpha);
    System.out.println("topic\tprecision\trecall\tF");
    for(String topic : topicFs.keySet()) {
      System.out.println(topic+"\t"+topicPrecisions.get(topic)+"\t"+topicRecalls.get(topic)+"\t"+topicFs.get(topic));
    }
  }
  
  public static void fullPR(boolean svm) {
    String[] topicTypes = {"textwise"};
    double[] alphas = {0.25,0.5,0.75,1.0,1.25,1.5,1.75,2.0};
    for(String topicType : topicTypes) {
      for(double alpha : alphas) {
        precisionRecallReport(topicType,alpha,svm);
      }
    }
  }
  
  public static double cosineKSimilarity(FullAlchemyClassification c1, FullSVMClassification c2, int k) {
    Set<String> catSet = new HashSet<String>();
    String[] topics1 = c1.getCategorySet().toArray(new String[1]);
    String[] topics2 = c2.getCategorySet().toArray(new String[1]);
    for(int i=0;i<k;i++) {
      if(i<topics1.length) catSet.add(topics1[i]);
      if(i<topics2.length) catSet.add(topics2[i]);
    }
    
    double mag1 = 0.0;
    double mag2 = 0.0;
    double score = 0.0;
    for(String cat : catSet) {
      if(c1.hasCategory(cat) && c2.hasCategory(cat)) {
        score += (c1.getScore(cat) * c2.getScore(cat));
        mag1 += c1.getScore(cat)*c1.getScore(cat);
        mag2 += c2.getScore(cat)*c2.getScore(cat);
      }
    }
    mag1 = Math.sqrt(mag1);
    mag2 = Math.sqrt(mag2);


    //normalise by magnitudes
    Double magnitudes = (mag1 * c2.magnitude());
    score /= magnitudes;
    if(Double.isNaN(score)) {
      return 0.0; //NaN caused by zero vectors ie no classifications!
    }
    else return score;
  }
  
  public static double cosineKSimilarity(FullCalaisClassification c1, FullSVMClassification c2, int k) {
    Set<String> catSet = new HashSet<String>();
    String[] topics1 = c1.getCategorySet().toArray(new String[1]);
    String[] topics2 = c2.getCategorySet().toArray(new String[1]);
    for(int i=0;i<k;i++) {
      if(i<topics1.length) catSet.add(topics1[i]);
      if(i<topics2.length) catSet.add(topics2[i]);
    }
    
    double mag1 = 0.0;
    double mag2 = 0.0;
    double score = 0.0;
    for(String cat : catSet) {
      if(c1.hasCategory(cat) && c2.hasCategory(cat)) {
        score += (c1.getScore(cat) * c2.getScore(cat));
        mag1 += c1.getScore(cat)*c1.getScore(cat);
        mag2 += c2.getScore(cat)*c2.getScore(cat);
      }
    }
    mag1 = Math.sqrt(mag1);
    mag2 = Math.sqrt(mag2);


    //normalise by magnitudes
    Double magnitudes = (mag1 * c2.magnitude());
    score /= magnitudes;
    if(Double.isNaN(score)) {
      return 0.0; //NaN caused by zero vectors ie no classifications!
    }
    else return score;
  }
  
  public static double cosineKSimilarity(FullTextwiseClassification c1, FullSVMClassification c2, int k) {
    Set<String> catSet = new HashSet<String>();
    String[] topics1 = c1.getCategorySet().toArray(new String[1]);
    String[] topics2 = c2.getCategorySet().toArray(new String[1]);
    for(int i=0;i<k;i++) {
      if(i<topics1.length) catSet.add(topics1[i]);
      if(i<topics2.length) catSet.add(topics2[i]);
    }
    
    double mag1 = 0.0;
    double mag2 = 0.0;
    double score = 0.0;
    for(String cat : catSet) {
      if(c1.hasCategory(cat) && c2.hasCategory(cat)) {
        score += (c1.getScore(cat) * c2.getScore(cat));
        mag1 += c1.getScore(cat)*c1.getScore(cat);
        mag2 += c2.getScore(cat)*c2.getScore(cat);
      }
    }
    mag1 = Math.sqrt(mag1);
    mag2 = Math.sqrt(mag2);


    //normalise by magnitudes
    Double magnitudes = (mag1 * c2.magnitude());
    score /= magnitudes;
    if(Double.isNaN(score)) {
      return 0.0; //NaN caused by zero vectors ie no classifications!
    }
    else return score;
  }
  
  public static double cosineKSimilarity(FullAlchemyClassification c1, FullLLDAClassification c2, int k) {
    Set<String> catSet = new HashSet<String>();
    String[] topics1 = c1.getCategorySet().toArray(new String[1]);
    String[] topics2 = c2.getCategorySet().toArray(new String[1]);
    for(int i=0;i<k;i++) {
      if(i<topics1.length) catSet.add(topics1[i]);
      if(i<topics2.length) catSet.add(topics2[i]);
    }
    
    double mag1 = 0.0;
    double mag2 = 0.0;
    double score = 0.0;
    for(String cat : catSet) {
      if(c1.hasCategory(cat) && c2.hasCategory(cat)) {
        score += (c1.getScore(cat) * c2.getScore(cat));
        mag1 += c1.getScore(cat)*c1.getScore(cat);
        mag2 += c2.getScore(cat)*c2.getScore(cat);
      }
    }
    mag1 = Math.sqrt(mag1);
    mag2 = Math.sqrt(mag2);


    //normalise by magnitudes
    Double magnitudes = (mag1 * mag2);
    score /= magnitudes;
    if(Double.isNaN(score)) {
      return 0.0; //NaN caused by zero vectors ie no classifications!
    }
    else return score;
  }
  
  public static double cosineKSimilarity(FullCalaisClassification c1, FullLLDAClassification c2, int k) {
    Set<String> catSet = new HashSet<String>();
    String[] topics1 = c1.getCategorySet().toArray(new String[1]);
    String[] topics2 = c2.getCategorySet().toArray(new String[1]);
    for(int i=0;i<k;i++) {
      if(i<topics1.length) catSet.add(topics1[i]);
      if(i<topics2.length) catSet.add(topics2[i]);
    }
    
    double mag1 = 0.0;
    double mag2 = 0.0;
    double score = 0.0;
    for(String cat : catSet) {
      if(c1.hasCategory(cat) && c2.hasCategory(cat)) {
        score += (c1.getScore(cat) * c2.getScore(cat));
        mag1 += c1.getScore(cat)*c1.getScore(cat);
        mag2 += c2.getScore(cat)*c2.getScore(cat);
      }
    }
    mag1 = Math.sqrt(mag1);
    mag2 = Math.sqrt(mag2);


    //normalise by magnitudes
    Double magnitudes = (mag1 * mag2);
    score /= magnitudes;
    if(Double.isNaN(score)) {
      return 0.0; //NaN caused by zero vectors ie no classifications!
    }
    else return score;
  }
  
  public static double cosineKSimilarity(FullTextwiseClassification c1, FullLLDAClassification c2, int k) {
    Set<String> catSet = new HashSet<String>();
    String[] topics1 = c1.getCategorySet().toArray(new String[1]);
    String[] topics2 = c2.getCategorySet().toArray(new String[1]);
    for(int i=0;i<k;i++) {
      if(i<topics1.length) catSet.add(topics1[i]);
      if(i<topics2.length) catSet.add(topics2[i]);
    }
    
    double mag1 = 0.0;
    double mag2 = 0.0;
    double score = 0.0;
    for(String cat : catSet) {
      if(c1.hasCategory(cat) && c2.hasCategory(cat)) {
        score += (c1.getScore(cat) * c2.getScore(cat));
        mag1 += c1.getScore(cat)*c1.getScore(cat);
        mag2 += c2.getScore(cat)*c2.getScore(cat);
      }
    }
    mag1 = Math.sqrt(mag1);
    mag2 = Math.sqrt(mag2);


    //normalise by magnitudes
    Double magnitudes = (mag1 * mag2);
    score /= magnitudes;
    if(Double.isNaN(score)) {
      return 0.0; //NaN caused by zero vectors ie no classifications!
    }
    else return score;
  }
  
  public static double cosineKSimilarity(FullLLDAClassification c1, FullLLDAClassification c2, int k) {
    Set<String> catSet = new HashSet<String>();
    String[] topics1 = c1.getCategorySet().toArray(new String[1]);
    String[] topics2 = c2.getCategorySet().toArray(new String[1]);
    for(int i=0;i<k;i++) {
      if(i<topics1.length) catSet.add(topics1[i]);
      if(i<topics2.length) catSet.add(topics2[i]);
    }
    
    double mag1 = 0.0;
    double mag2 = 0.0;
    double score = 0.0;
    for(String cat : catSet) {
      if(c1.hasCategory(cat) && c2.hasCategory(cat)) {
        score += (c1.getScore(cat) * c2.getScore(cat));
        mag1 += c1.getScore(cat)*c1.getScore(cat);
        mag2 += c2.getScore(cat)*c2.getScore(cat);
      }
    }
    mag1 = Math.sqrt(mag1);
    mag2 = Math.sqrt(mag2);


    //normalise by magnitudes
    Double magnitudes = (mag1 * mag2);
    score /= magnitudes;
    if(Double.isNaN(score)) {
      return 0.0; //NaN caused by zero vectors ie no classifications!
    }
    else return score;
  }
}
Source Code of uk.ac.cam.ha293.tweetlabel.eval.CosineManager

Related Classes of uk.ac.cam.ha293.tweetlabel.eval.CosineManager