Package uk.ac.cam.ha293.tweetlabel.topics

Examples of uk.ac.cam.ha293.tweetlabel.topics.MalletLDA


    Map<String,Double> urlCategoryScores = new HashMap<String,Double>();
    Map<String,Integer> urlCategoryCounts = new HashMap<String,Integer>();
   
    for(SimpleTweet tweet : tweets) {
      //Handle tweet content classification
      AlchemyClassification textClassification = AlchemyClassifier.classifyText(tweet.getText());
      if(textClassification == null) {
        System.err.println("Daily Transaction Limit has been reached (or a null classification was given weirdly)");
        return false;
      }
     
      writeOutTweets.print(tweet.getTweetID()+","+textClassification.getCategory()+","+textClassification.getScore());
      System.err.println("JUST WROTE TWEEET OUT");
     
      if(textCategoryScores.containsKey(textClassification.getCategory())) {
        //Add the new value, since the key already exists
        textCategoryScores.put(textClassification.getCategory(), textCategoryScores.get(textClassification.getCategory()) + textClassification.getScore());
        textCategoryCounts.put(textClassification.getCategory(), textCategoryCounts.get(textClassification.getCategory()) + 1);
      } else {
        //Insert the category into the map
        textCategoryScores.put(textClassification.getCategory(), textClassification.getScore());
        textCategoryCounts.put(textClassification.getCategory(), 1);
      }
     
      //Handle URL classification
      for(String url : tweet.getUrls()) {
        AlchemyClassification urlClassification = AlchemyClassifier.classifyURL(url);
        if(urlClassification == null) {
          System.err.println("Daily Transaction Limit has been reached (or a null classification was given weirdly)");
          return false;
        }
       
        writeOutTweets.print(","+urlClassification.getCategory()+","+urlClassification.getScore());
       
        if(urlCategoryScores.containsKey(urlClassification.getCategory())) {
          //Add the new value, since the key already exists
          urlCategoryScores.put(urlClassification.getCategory(), urlCategoryScores.get(urlClassification.getCategory()) + urlClassification.getScore());
          urlCategoryCounts.put(urlClassification.getCategory(), urlCategoryCounts.get(urlClassification.getCategory()) + 1);
        } else {
          //Insert the category into the map
          urlCategoryScores.put(urlClassification.getCategory(), urlClassification.getScore());
          urlCategoryCounts.put(urlClassification.getCategory(), 1);
        }
      }

      //writeOutTweets.println();
    }
View Full Code Here


        if(count < startFromHere) {count++;continue;}
        String[] split = nextLine.split("\t");
        long uid = Long.parseLong(split[0]);
        BigInteger tid = new BigInteger(split[1]);
        String tweet = split[2];
        AlchemyClassification c = AlchemyClassifier.classifyText(tweet);
        while(c == null) {
          c = AlchemyClassifier.classifyText(tweet);
         
        }
        System.out.println(count+"\t"+uid+"\t"+tid+"\t"+c.getCategory()+"\t"+c.getScore());
        //writeOut += uid+"\t"+tid+"\t"+c.getCategory()+"\t"+c.getScore()+"\n";
        count++;
      }
      //write.print(writeOut);
      //write.close();
View Full Code Here

   
    Map<String,Double> resultsMap = new HashMap<String,Double>();
    Map<String,Integer> countsMap = new HashMap<String,Integer>();
   
    for(SimpleTweet tweet : tweets) {
      CalaisClassification classification = CalaisClassifier.classifyText(tweet.getText());
      if(classification == null) {
        //Normally this is because of a <100 char tweet
        continue;
      }
      for(String category : classification.getCategories()) {
        double score = classification.lookupScore(category);     
        if(resultsMap.containsKey(category)) {
          resultsMap.put(category, resultsMap.get(category) + score);
          countsMap.put(category, countsMap.get(category) + 1);
        } else {
          resultsMap.put(category, score);
View Full Code Here

  //For use in Corpus' getFullProfileCorpus(topicType) method
  public Document asLLDADocument(String topicType) {
    Set<String> topics = new HashSet<String>();
    //Note: if not LIWC or LIWCNB, we have no topics yet!
    if(topicType.equals("alchemy")) {
      FullAlchemyClassification fac = new FullAlchemyClassification(userID);
      int topTopics = 3;
      //alchemy too sparse to threshold
      int count = 0;
      for(String topic : fac.getCategorySet()) {
        if(count == topTopics) break; //stop getting more than 3 topics
        //if(fac.getScore(topic) < scoreThreshold) break; //stop getting low-prob topics
        topics.add(topic);
        count++;
      }
View Full Code Here

public class Diversity {
 
  private static Set<Double> diversitySet(String topicType, long uid) {
    Set<Double> valueSet = new HashSet<Double>();
    if(topicType.equals("alchemy")) {
      FullAlchemyClassification c = new FullAlchemyClassification(uid);
      for(String cat : c.getCategorySet()) {
        valueSet.add(c.getScore(cat));
      }
    } else if(topicType.equals("calais")) {
      FullCalaisClassification c = new FullCalaisClassification(uid);
      for(String cat : c.getCategorySet()) {
        valueSet.add(c.getScore(cat));
      }
    } else if(topicType.equals("textwise")) {
      FullCalaisClassification c = new FullCalaisClassification(uid);
      for(String cat : c.getCategorySet()) {
        valueSet.add(c.getScore(cat));
      }
    }
    return valueSet;
  }
View Full Code Here

          //get average cosine similarity to baseline
          double cosineSum = 0.0;
          int cosineCount = 0;
          for(Long uid : uids) {
            if(topicType.equals("alchemy")) {
              FullAlchemyClassification baseline = new FullAlchemyClassification(uid);
              FullLLDAClassification llda = new FullLLDAClassification(topicType,alpha,false,reduction,uid);
              double sim = llda.cosineSimilarity(baseline);
              cosineSum += sim;
              cosineCount++;
            } else if(topicType.equals("calais")) {
View Full Code Here

          if(kCount == k) break;
          kCount++;
          lldaTopicSet.add(topic);
        }
        if(topicType.equals("alchemy")) {
          FullAlchemyClassification baseline = new FullAlchemyClassification(uid);
          kCount=0;
          for(String topic : baseline.getCategorySet()) {
            if(kCount == k) break;
            kCount++;
            baselineTopicSet.add(topic);
          }
        } else if(topicType.equals("calais")) {
          FullCalaisClassification baseline = new FullCalaisClassification(uid);
          kCount=0;
          for(String topic : baseline.getCategorySet()) {
            if(kCount == k) break;
            if(topic.equals("Other")) continue;
            kCount++;
            baselineTopicSet.add(topic);
          }
        } else if(topicType.equals("textwise")) {
          FullTextwiseClassification baseline = new FullTextwiseClassification(uid,true);
          kCount=0;
          for(String topic : baseline.getCategorySet()) {
            if(kCount == k) break;
            kCount++;
            baselineTopicSet.add(topic);
          }
        }
View Full Code Here

          kCount++;
          svmTopicSet.add(topic);
          System.out.println("Adding topic "+topic+" "+svm.getScore(topic));
        }
        if(topicType.equals("alchemy")) {
          FullAlchemyClassification baseline = new FullAlchemyClassification(uid);
          kCount=0;
          for(String topic : baseline.getCategorySet()) {
            if(kCount == k) break;
            kCount++;
            baselineTopicSet.add(topic);
          }
        } else if(topicType.equals("calais")) {
          FullCalaisClassification baseline = new FullCalaisClassification(uid);
          kCount=0;
          for(String topic : baseline.getCategorySet()) {
            if(kCount == k) break;
            if(topic.equals("Other")) continue;
            kCount++;
            baselineTopicSet.add(topic);
          }
        } else if(topicType.equals("textwise")) {
          FullTextwiseClassification baseline = new FullTextwiseClassification(uid,true);
          kCount=0;
          for(String topic : baseline.getCategorySet()) {
            if(kCount == k) break;
            kCount++;
            baselineTopicSet.add(topic);
          }
        }
View Full Code Here

    for(String topic : Tools.getTopics(topicType)) {
      topicCounts.put(topic, 0);
    }
    for(Long uid : Tools.getCSVUserIDs()) {
      if(topicType.equals("alchemy")) {
        FullAlchemyClassification c = new FullAlchemyClassification(uid);
        if(c.getCategorySet().size() == 0) continue;
        String topTopic = c.getCategorySet().toArray(new String[1])[0];
        topicCounts.put(topTopic,topicCounts.get(topTopic)+1);
      } else if(topicType.equals("calais")) {
        FullCalaisClassification c = new FullCalaisClassification(uid);
        if(c.getCategorySet().size() == 0) continue;
        String topTopic = c.getCategorySet().toArray(new String[1])[0];
        if(topTopic.equals("Other") && c.getCategorySet().size() > 1topTopic = c.getCategorySet().toArray(new String[1])[1];
        else if(topTopic.equals("Other")) continue;
        topicCounts.put(topTopic,topicCounts.get(topTopic)+1);
      } else if(topicType.equals("textwise")) {
        FullTextwiseClassification c = new FullTextwiseClassification(uid,true);
        if(c.getCategorySet().size() == 0) continue;
        String topTopic = c.getCategorySet().toArray(new String[1])[0];
        topicCounts.put(topTopic,topicCounts.get(topTopic)+1);
      }
      count++;
    }
    double sum = 0.0;
View Full Code Here

        if(kCount == k) break;
        kCount++;
        lldaTopicSet.add(topic);
      }
      if(topicType.equals("alchemy")) {
        FullAlchemyClassification baseline = new FullAlchemyClassification(uid);
        kCount=0;
        for(String topic : baseline.getCategorySet()) {
          if(kCount == k) break;
          kCount++;
          baselineTopicSet.add(topic);
        }
      } else if(topicType.equals("calais")) {
        FullCalaisClassification baseline = new FullCalaisClassification(uid);
        kCount=0;
        for(String topic : baseline.getCategorySet()) {
          if(kCount == k) break;
          if(topic.equals("Other")) continue;
          kCount++;
          baselineTopicSet.add(topic);
        }
      } else if(topicType.equals("textwise")) {
        FullTextwiseClassification baseline = new FullTextwiseClassification(uid,true);
        kCount=0;
        for(String topic : baseline.getCategorySet()) {
          if(kCount == k) break;
          kCount++;
          baselineTopicSet.add(topic);
        }
      }
View Full Code Here

TOP

Related Classes of uk.ac.cam.ha293.tweetlabel.topics.MalletLDA

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.