Package cc.twittertools.search.api

Examples of cc.twittertools.search.api.TrecSearchThriftClient


      e.printStackTrace();
      System.exit(-1);
    }
   
    // instantiate search client
    TrecSearchThriftClient client = new TrecSearchThriftClient(params.getParamValue(HOST_OPTION),
        trainingPort, group, token);

    SimpleSearcher searcher = new SimpleSearcher(client, numResults);
   
    err.println("=== Train Queries ===");
   
    List<Double> thresholds = new ArrayList<Double>();
    double averageThreshold = 0;
    Iterator<GQuery> queryIterator = trainingQueries.iterator();
    while(queryIterator.hasNext()) {
      GQuery query = queryIterator.next();
     
      Map<Long, TResult> seenResults = searcher.search(query);
     
      SimpleJaccardClusterer clusterer = new SimpleJaccardClusterer(new ArrayList<TResult>(seenResults.values()));
     
      // sweep through jaccard steps, calculating F1
      double maxF1 = 0;
      double maxF1Threshold = 1;
      for (double j = 1.0; j >= 0.0; j -= stepSize) { // for each jaccard threshold step
        Clusters clusters = clusterer.cluster(j);
       
        // all clusters are created now, get a finalized set of results
        Set<Long> allResults = new HashSet<Long>(seenResults.keySet());
        allResults.removeAll(clusters.getAllClusteredResults()); // allResults includes unclustered plus one representative from each cluster
        for (Cluster c : clusters) {
          allResults.add(c.getFirstMember());
        }
       
        // calculate f1 on the finalized set
        Clusters seenClusters = new Clusters();
        Clusters trueClusters = clusterMembership.get(query.getTitle());
        Iterator<Long> resultIt = allResults.iterator();
        while (resultIt.hasNext()) {
          long result = resultIt.next();
          Cluster trueCluster = trueClusters.findCluster(result);
          if (trueCluster != null) { // if it is relevant, it will have a true cluster; if this is null, it's non-relevant
            seenClusters.add(trueCluster);
          }
        }
       
        int numRetrievedClusters = seenClusters.size();
        int numResultsReturned = allResults.size();
        int numTrueClusters = trueClusters.size();

        double precision = 0;
        double recall = 0;
        double f1 = 0;
        if (evalType.equals("unweighted")) {
          precision = numRetrievedClusters / (double) numResultsReturned;
          recall = numRetrievedClusters / (double) numTrueClusters;
          f1 = 2 * precision * recall / (precision + recall);
        } else {       
          // for weighted measurements, we need the weight of each cluster
          int retrievedWeight = 0;
          for (Cluster cluster : seenClusters) {
            int w = cluster.getWeight(query, qrels);
            retrievedWeight += w;
          }
          int resultsWeight = 0;
          for (long result : allResults) {
            int w = 0;
            if (seenClusters.findCluster(result) == null)
            resultsWeight += w;
          }
          int trueWeight = 0;
          for (Cluster cluster : trueClusters) {
            int w = cluster.getWeight(query, qrels);
            trueWeight += w;
          }
         
          precision = retrievedWeight / (double) resultsWeight; // <--- ??????
          recall = retrievedWeight / (double) trueWeight;
          f1 = 2 * precision * recall / (precision + recall);
        }
        if (f1 > maxF1) {
          maxF1 = f1;
          maxF1Threshold = j;
        }
      }
      thresholds.add(maxF1Threshold);
      err.println("F1: "+df.format(maxF1)+"; Jaccard: "+df.format(maxF1Threshold));
     
    }
   
    // get the average threshold
    for (double threshold : thresholds) {
      averageThreshold += threshold;
    }
    averageThreshold /= thresholds.size();
    err.println("Average Jaccard: "+averageThreshold);
   
    err.println("=== Test Queries ===");
   
    // now cluster the test queries and output
    queryIterator = queries.iterator();
    while(queryIterator.hasNext()) {
      GQuery query = queryIterator.next();
      err.println(query.getTitle());
     
      client = new TrecSearchThriftClient(params.getParamValue(HOST_OPTION), testingPort, group, token);
      searcher = new SimpleSearcher(client, numResults);
      Map<Long, TResult> seenResults = searcher.search(query);
     
      SimpleJaccardClusterer clusterer = new SimpleJaccardClusterer(new ArrayList<TResult>(seenResults.values()));
      Clusters clusters = clusterer.cluster(averageThreshold);
View Full Code Here


    if(group==null) {
      err.println("Invalid " + TOKEN_OPTION + ": must set a valid authentication token");
      System.exit(-1);
    }

    TrecSearchThriftClient client = new TrecSearchThriftClient(params.getParamValue(HOST_OPTION),
        Integer.parseInt(params.getParamValue(PORT_OPTION)), group, token);

    Iterator<GQuery> queryIterator = queries.iterator();
    while(queryIterator.hasNext()) {
      GQuery query = queryIterator.next();
      System.err.println(query.getTitle());
      String queryText = query.getText();
     
      // stupid hack.  need to lowercase the query vector
      FeatureVector temp = new FeatureVector(null);
      Iterator<String> qTerms = query.getFeatureVector().iterator();
      while(qTerms.hasNext()) {
        String term = qTerms.next();
        temp.addTerm(term.toLowerCase(), query.getFeatureVector().getFeaturetWeight(term));
      }
      temp.normalizeToOne();
      query.setFeatureVector(temp);
     
     
      // if we're doing feedback
      if(fbDocs > 0 && fbTerms > 0) {
        List<TResult> results = client.search(queryText, query.getQuerytweettime(), fbDocs);
        FeedbackRelevanceModel fb = new FeedbackRelevanceModel();
        fb.setOriginalQuery(query);
        fb.setRes(results);
        fb.build(stopper);
       
        FeatureVector fbVector = fb.asFeatureVector();
        fbVector.pruneToSize(fbTerms);
        fbVector.normalizeToOne();
        fbVector = FeatureVector.interpolate(query.getFeatureVector(), fbVector, ORIG_QUERY_WEIGHT);
   
        System.err.println(fbVector);
       
        StringBuilder builder = new StringBuilder();
        Iterator<String> terms = fbVector.iterator();
        while(terms.hasNext()) {
          String term = terms.next();
          if(term.length() < 2)
            continue;
          double prob = fbVector.getFeaturetWeight(term);
          builder.append(term + "^" + prob + " ");
        }
        queryText = builder.toString().trim();
       
      }
     
      List<TResult> results = client.search(queryText, query.getQuerytweettime(), numResults);
      String runTag = params.getParamValue(RUNTAG_OPTION);
      if(runTag==null)
        runTag = DEFAULT_RUNTAG;

      int i = 1;
View Full Code Here

    this.authToken = authToken;
   
    seenDocs = new HashMap<String,String>();
   
    try {
      client = new TrecSearchThriftClient(hostname, port, groupId, authToken);
    } catch (Exception e) {

    }
  }
View Full Code Here

TOP

Related Classes of cc.twittertools.search.api.TrecSearchThriftClient

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.