Package uk.ac.cam.ha293.tweetlabel.classify

Examples of uk.ac.cam.ha293.tweetlabel.classify.TextwiseClassifier


    Map<String,Integer> urlCategoryCounts = new HashMap<String,Integer>();
   
    for(SimpleTweet tweet : tweets) {
      //System.err.println(tweet.getText());
     
      TextwiseClassification textClassification = TextwiseClassifier.classify(tweet.getText(), false);
     
      textClassification.print();
     
      for(String category : textClassification.getCategories()) {
        if(textCategoryScores.containsKey(category)) {
          textCategoryScores.put(category, textCategoryScores.get(category) + textClassification.lookupScore(category));
          textCategoryCounts.put(category, textCategoryCounts.get(category) + 1);
        } else {
          textCategoryScores.put(category, textClassification.lookupScore(category));
          textCategoryCounts.put(category, 1);
        }
      }
     
      for(String url : tweet.getUrls()) {
       
        //System.err.println(url);
       
        TextwiseClassification urlClassification = TextwiseClassifier.classify(url, true);
       
        urlClassification.print();
       
        for(String category : urlClassification.getCategories()) {
          if(urlCategoryScores.containsKey(category)) {
            urlCategoryScores.put(category, urlCategoryScores.get(category) + urlClassification.lookupScore(category));
            urlCategoryCounts.put(category, urlCategoryCounts.get(category) + 1);
          } else {
            urlCategoryScores.put(category, urlClassification.lookupScore(category));
            urlCategoryCounts.put(category, 1);
          }
        }
      }
     
View Full Code Here


    }
   
    Map<String,Double> classifications = new HashMap<String,Double>();
    Map<String,Integer> classificationsCount = new HashMap<String,Integer>();
    for(int i=0; i<concat.length()-500; i+=500) {
      TextwiseClassification textClassification;
      if(i+500>=concat.length()) textClassification = TextwiseClassifier.classify(concat.substring(i,concat.length()), false);
      else textClassification = TextwiseClassifier.classify(concat.substring(i,i+500), false)
      Map<String,Double> scores = textClassification.getCategoryScores();
      for(String cat : textClassification.getCategories()) {
        if(classifications.containsKey(cat)) {
          classifications.put(cat,classifications.get(cat)+scores.get(cat));
          classificationsCount.put(cat,classificationsCount.get(cat)+1);
        } else {
          classifications.put(cat,scores.get(cat));
View Full Code Here

  public static void similarityStuff() {
    String[] topicTypes = {"alchemy","calais","textwiseproper"};
    double[] alphas = {0.25,0.5,0.75,1.0,1.25,1.5,1.75,2.0};
   
    for(double alpha : alphas) {
      SimilarityMatrix lda = SimilarityMatrix.load("lda-50-1000-100-"+alpha);
      for(String topicType : topicTypes) {
        SimilarityMatrix llda = SimilarityMatrix.load("llda-"+topicType+"-"+alpha);
        System.out.println(topicType+"\t"+alpha+"\t"+SpearmanRank.srcc(lda,llda));
      }
    }

   
View Full Code Here

  public static void srccKFinder(String topicType, double alpha) {
    System.out.println("Finding optimal SRCC K for "+topicType+" "+alpha);
    String[] topics = Tools.getTopics(topicType);
    int maxK = topics.length;
    for(int k=1; k<=maxK; k++) {
      SimilarityMatrix baseline = new SimilarityMatrix(2506);
      baseline.fillRestricted(true, topicType, k, 0);
      SimilarityMatrix llda = new SimilarityMatrix(2506);
      llda.fillRestricted(false, topicType, k, alpha);
      System.out.println(alpha+","+topicType+","+k+","+SpearmanRank.jscSRCC(baseline, llda));
    }
  }
View Full Code Here

        //if(fac.getScore(topic) < scoreThreshold) break; //stop getting low-prob topics
        topics.add(topic);
        count++;
      }
    }else if(topicType.equals("liwc")) {
      FullLIWCClassification flc = new FullLIWCClassification(false,userID);
      int topTopics = 3;
      int count = 0;
      for(String topic : flc.getCategorySet()) {
        if(count == topTopics) break;
        topics.add(topic);
        count++;
      }
    } else if(topicType.equals("liwcnb")) {
      FullLIWCClassification flc = new FullLIWCClassification(true,userID);
      int topTopics = 3;
      double threshold = 0.2;
      int count = 0;
      for(String topic : flc.getCategorySet()) {
        if(count == topTopics) break;
        if(flc.getScore(topic) < threshold) break;
        topics.add(topic);
        count++;
      }
    } else {
      System.out.println("invalid topic type");
View Full Code Here

 
  public void fillLIWC(boolean naiveBayes) {
    System.out.println("Filling from LIWC classifications, NB="+naiveBayes);
    FullLIWCClassification[] classifications = new FullLIWCClassification[d];
    for(long id : Tools.getCSVUserIDs()) {
      classifications[indexLookup.get(id)] = new FullLIWCClassification(naiveBayes,id);
    }
   
    //cosine similarities!
    for(int m=0; m<d; m++) {
      System.out.println("On row "+m);
View Full Code Here

  public void fillLDAAndSave(int numTopics, int burn, int sample, double alpha) {
    //get clasifications
    System.out.println("Filling from LDA classifications");
    FullLDAClassification[] classifications = new FullLDAClassification[d];
    for(long id : Tools.getCSVUserIDs()) {
      classifications[indexLookup.get(id)] = new FullLDAClassification(id,numTopics,burn,sample,alpha);
    }
   
    //cosine similarities!
    for(int m=0; m<d; m++) {
      System.out.println("On row "+m);
View Full Code Here

  public void fillLDAJS(int numTopics, int burn, int sample, double alpha) {
    //get clasifications
    System.out.println("Filling from LDA classifications");
    FullLDAClassification[] classifications = new FullLDAClassification[d];
    for(long id : Tools.getCSVUserIDs()) {
      classifications[indexLookup.get(id)] = new FullLDAClassification(id,numTopics,burn,sample,alpha);
    }
   
    //cosine similarities!
    for(int m=0; m<d; m++) {
      System.out.println("On row "+m);
View Full Code Here

  }
 
  public static Set<Double> diversitySet(String topicType, double alpha, long uid) {
    Set<Double> valueSet = new HashSet<Double>();
    if(topicType.equals("lda")) {
      FullLDAClassification c = new FullLDAClassification(uid,1000,100,0,alpha);
      for(String cat : c.getCategorySet()) {
        valueSet.add(c.getScore(cat));
      }
    } else if(topicType.equals("alchemy")) {
      FullLLDAClassification c = new FullLLDAClassification("alchemy",alpha,uid);
      for(String cat : c.getCategorySet()) {
        valueSet.add(c.getScore(cat));
      }
    } else if(topicType.equals("calais")) {
      FullLLDAClassification c = new FullLLDAClassification("calais",alpha,uid);
      for(String cat : c.getCategorySet()) {
        valueSet.add(c.getScore(cat));
      }
    } else if(topicType.equals("textwise")) {
      FullLLDAClassification c = new FullLLDAClassification("textwise",alpha,uid);
      for(String cat : c.getCategorySet()) {
        valueSet.add(c.getScore(cat));
      }
    }
    return valueSet;
  }
View Full Code Here

 
  public void fillLLDA(String topicType, double alpha) {
    System.out.println("Filling from LLDA-inferred "+topicType+" classifications");
    FullLLDAClassification[] classifications = new FullLLDAClassification[d];
    for(long id : Tools.getCSVUserIDs()) {
      classifications[indexLookup.get(id)] = new FullLLDAClassification(topicType,alpha,id);
    }
   
    //cosine similarities!
    for(int m=0; m<d; m++) {
      System.out.println("On row "+m);
View Full Code Here

TOP

Related Classes of uk.ac.cam.ha293.tweetlabel.classify.TextwiseClassifier

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.