Package org.apache.mahout.common

Examples of org.apache.mahout.common.StringTuple


  public static void loadFeatureWeights(InMemoryBayesDatastore datastore,
                                        FileSystem fs,
                                        Path pathPattern,
                                        Configuration conf) throws IOException {
   
    StringTuple key = new StringTuple();
    DoubleWritable value = new DoubleWritable();
   
    FileStatus[] outputFiles = fs.globStatus(pathPattern);
    for (FileStatus fileStatus : outputFiles) {
      Path path = fileStatus.getPath();
      log.info("{}", path);
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
     
      // the key is either _label_ or label,feature
      long count = 0;
      while (reader.next(key, value)) {
        // Sum of weights for a Feature
        if (key.stringAt(0).equals(BayesConstants.FEATURE_SUM)) {
          datastore.setSumFeatureWeight(key.stringAt(1), value.get());
          count++;
          if (count % 50000 == 0) {
            log.info("Read {} feature weights", count);
          }
        }
View Full Code Here


                        Context context) throws IOException, InterruptedException {
    Set<String> outputValues = new HashSet<String>();
    for (StringTuple value : values) {
      outputValues.addAll(value.getEntries());
    }
    context.write(key, new StringTuple(outputValues));
  }
View Full Code Here

        @Override
        public boolean apply(String label, double sigmaJ) {
          double weight = Math.log((value.get() + alphaI) / (sigmaJSigmaK - sigmaJ + vocabCount));
         
          reporter.setStatus("Complementary Bayes Theta Normalizer Mapper: " + label + " => " + weight);
          StringTuple normalizerTuple = new StringTuple(BayesConstants.LABEL_THETA_NORMALIZER);
          normalizerTuple.add(label);
          try {
            output.collect(normalizerTuple, new DoubleWritable(weight));
          } catch (IOException e) {
            throw new IllegalStateException(e);
          } // output Sigma_j
          return true;
        }
      });
     
    } else {
      String label = key.stringAt(1);
     
      double dIJ = value.get();
      double denominator = 0.5 * (sigmaJSigmaK / vocabCount + dIJ * this.labelWeightSum.size());
      double weight = Math.log(1.0 - dIJ / denominator);
     
      reporter.setStatus("Complementary Bayes Theta Normalizer Mapper: " + label + " => " + weight);
     
      StringTuple normalizerTuple = new StringTuple(BayesConstants.LABEL_THETA_NORMALIZER);
      normalizerTuple.add(label);
     
      // output -D_ij
      output.collect(normalizerTuple, new DoubleWritable(weight));
     
    }
View Full Code Here

  public static void loadLabelWeights(InMemoryBayesDatastore datastore,
                                      FileSystem fs,
                                      Path pathPattern,
                                      Configuration conf) throws IOException {
   
    StringTuple key = new StringTuple();
    DoubleWritable value = new DoubleWritable();
   
    FileStatus[] outputFiles = fs.globStatus(pathPattern);
    for (FileStatus fileStatus : outputFiles) {
      Path path = fileStatus.getPath();
      log.info("{}", path);
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
     
      long count = 0;
      while (reader.next(key, value)) {
        // Sum of weights in a Label
        if (key.stringAt(0).equals(BayesConstants.LABEL_SUM)) {
          datastore.setSumLabelWeight(key.stringAt(1), value.get());
          count++;
          if (count % 10000 == 0) {
            log.info("Read {} label weights", count);
          }
        }
View Full Code Here

  public static void loadThetaNormalizer(InMemoryBayesDatastore datastore,
                                         FileSystem fs,
                                         Path pathPattern,
                                         Configuration conf) throws IOException {
   
    StringTuple key = new StringTuple();
    DoubleWritable value = new DoubleWritable();
   
    FileStatus[] outputFiles = fs.globStatus(pathPattern);
    for (FileStatus fileStatus : outputFiles) {
      Path path = fileStatus.getPath();
      log.info("{}", path);
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
     
      long count = 0;
      while (reader.next(key, value)) {
        // Sum of weights in a Label
        if (key.stringAt(0).equals(BayesConstants.LABEL_THETA_NORMALIZER)) {
          datastore.setThetaNormalizer(key.stringAt(1), value.get());
          count++;
          if (count % 50000 == 0) {
            log.info("Read {} theta norms", count);
          }
        }
View Full Code Here

  public static void loadSumWeight(InMemoryBayesDatastore datastore,
                                   FileSystem fs,
                                   Path pathPattern,
                                   Configuration conf) throws IOException {
   
    StringTuple key = new StringTuple();
    DoubleWritable value = new DoubleWritable();
   
    FileStatus[] outputFiles = fs.globStatus(pathPattern);
    for (FileStatus fileStatus : outputFiles) {
      Path path = fileStatus.getPath();
      log.info("{}", path);
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
     
      // the key is _label
      while (reader.next(key, value)) {
       
        if (key.stringAt(0).equals(BayesConstants.TOTAL_SUM)) { // Sum of
          // weights for
          // all Features and all Labels
          datastore.setSigmaJSigmaK(value.get());
          log.info("{}", value.get());
        }
View Full Code Here

 
  public static Map<String,Double> readLabelSums(FileSystem fs,
                                                 Path pathPattern,
                                                 Configuration conf) throws IOException {
    Map<String,Double> labelSum = new HashMap<String,Double>();
    StringTuple key = new StringTuple();
    DoubleWritable value = new DoubleWritable();
   
    FileStatus[] outputFiles = fs.globStatus(pathPattern);
   
    for (FileStatus fileStatus : outputFiles) {
      Path path = fileStatus.getPath();
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
      // the key is either _label_ or label,feature
      while (reader.next(key, value)) {
        if (key.stringAt(0).equals(BayesConstants.LABEL_SUM)) { // Sum of counts
          // of labels
          labelSum.put(key.stringAt(1), value.get());
        }
       
      }
    }
   
View Full Code Here

 
  public static Map<String,Double> readLabelDocumentCounts(FileSystem fs,
                                                           Path pathPattern,
                                                           Configuration conf) throws IOException {
    Map<String,Double> labelDocumentCounts = new HashMap<String,Double>();
    StringTuple key = new StringTuple();
    DoubleWritable value = new DoubleWritable();
   
    FileStatus[] outputFiles = fs.globStatus(pathPattern);
    for (FileStatus fileStatus : outputFiles) {
      Path path = fileStatus.getPath();
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
      // the key is either _label_ or label,feature
      while (reader.next(key, value)) {
        // Count of Documents in a Label
        if (key.stringAt(0).equals(BayesConstants.LABEL_COUNT)) {
          labelDocumentCounts.put(key.stringAt(1), value.get());
        }
       
      }
    }
   
View Full Code Here

    return labelDocumentCounts;
  }
 
  public static double readSigmaJSigmaK(FileSystem fs, Path pathPattern, Configuration conf) throws IOException {
    Map<String,Double> weightSum = new HashMap<String,Double>();
    StringTuple key = new StringTuple();
    DoubleWritable value = new DoubleWritable();
   
    FileStatus[] outputFiles = fs.globStatus(pathPattern);
    for (FileStatus fileStatus : outputFiles) {
      Path path = fileStatus.getPath();
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
      while (reader.next(key, value)) {
        if (weightSum.size() > 1) {
          throw new IOException("Incorrect Sum File");
        } else if (key.stringAt(0).equals(BayesConstants.TOTAL_SUM)) {
          weightSum.put(BayesConstants.TOTAL_SUM, value.get());
        }
       
      }
    }
View Full Code Here

    return weightSum.get(BayesConstants.TOTAL_SUM);
  }
 
  public static double readVocabCount(FileSystem fs, Path pathPattern, Configuration conf) throws IOException {
    Map<String,Double> weightSum = new HashMap<String,Double>();
    StringTuple key = new StringTuple();
    DoubleWritable value = new DoubleWritable();
   
    FileStatus[] outputFiles = fs.globStatus(pathPattern);
    for (FileStatus fileStatus : outputFiles) {
      Path path = fileStatus.getPath();
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
      while (reader.next(key, value)) {
        if (weightSum.size() > 1) {
          throw new IOException("Incorrect vocabCount File");
        }
        if (key.stringAt(0).equals(BayesConstants.FEATURE_SET_SIZE)) {
          weightSum.put(BayesConstants.FEATURE_SET_SIZE, value.get());
        }
       
      }
    }
View Full Code Here

TOP

Related Classes of org.apache.mahout.common.StringTuple

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.