Package org.apache.mahout.common

Examples of org.apache.mahout.common.StringTuple


    String label = key.stringAt(1);

    reporter.setStatus("Bayes Theta Normalizer Mapper: " + label);
   
    double weight = Math.log((value.get() + alpha_i) / (labelWeightSum.get(label) + vocabCount));
    StringTuple thetaNormalizerTuple = new StringTuple(BayesConstants.LABEL_THETA_NORMALIZER);
    thetaNormalizerTuple.add(label);
    output.collect(thetaNormalizerTuple, new DoubleWritable(weight));
  }
View Full Code Here


    return theSequenceFileOutputFormat.getRecordWriter(fs, job, name, arg3);
  }

  @Override
  protected String generateFileNameForKeyValue(WritableComparable<?> k, Writable v, String name) {
    StringTuple key = (StringTuple) k;
    if(key.length() == 3)
    {
      if(key.stringAt(0).equals(BayesConstants.WEIGHT))
        return "trainer-wordFreq/" + name;
      else if(key.stringAt(0).equals(BayesConstants.DOCUMENT_FREQUENCY))
        return "trainer-termDocCount/" + name;
    }
    else if(key.length() == 2)
    {
      if(key.stringAt(0).equals(BayesConstants.FEATURE_COUNT))
        return "trainer-featureCount/" + name;
      else if(key.stringAt(0).equals(BayesConstants.LABEL_COUNT))
        return "trainer-docCount/" + name;
    }
    throw new RuntimeException("Unrecognized Tuple: " + key);   
  }
View Full Code Here

  }

  public static void loadWeightMatrix(InMemoryBayesDatastore datastore,
      FileSystem fs, Path pathPattern, Configuration conf) throws IOException {

    StringTuple key = new StringTuple();
    DoubleWritable value = new DoubleWritable();

    FileStatus[] outputFiles = fs.globStatus(pathPattern);
    for (FileStatus fileStatus : outputFiles) {
      Path path = fileStatus.getPath();
      log.info("{}", path);
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);

      // the key is label,feature
      while (reader.next(key, value)) {

        datastore.loadFeatureWeight(key.stringAt(2), key.stringAt(1), value
            .get());

      }
    }
  }
View Full Code Here

  }

  public static void loadFeatureWeights(InMemoryBayesDatastore datastore,
      FileSystem fs, Path pathPattern, Configuration conf) throws IOException {

    StringTuple key = new StringTuple();
    DoubleWritable value = new DoubleWritable();

    FileStatus[] outputFiles = fs.globStatus(pathPattern);
    for (FileStatus fileStatus : outputFiles) {
      Path path = fileStatus.getPath();
      log.info("{}", path);
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);

      // the key is either _label_ or label,feature
      long count = 0;
      while (reader.next(key, value)) {

        if (key.stringAt(0).equals(BayesConstants.FEATURE_SUM)) { // Sum of
                                                                  // weights for
                                                                  // a Feature
          datastore.setSumFeatureWeight(key.stringAt(1), value.get());
          count++;
          if (count % 50000 == 0) {
            log.info("Read {} feature weights", count);
          }
        }
View Full Code Here

  }

  public static void loadLabelWeights(InMemoryBayesDatastore datastore,
      FileSystem fs, Path pathPattern, Configuration conf) throws IOException {

    StringTuple key = new StringTuple();
    DoubleWritable value = new DoubleWritable();

    FileStatus[] outputFiles = fs.globStatus(pathPattern);
    for (FileStatus fileStatus : outputFiles) {
      Path path = fileStatus.getPath();
      log.info("{}", path);
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);

      long count = 0;
      while (reader.next(key, value)) {
        if (key.stringAt(0).equals(BayesConstants.LABEL_SUM)) { // Sum of
                                                                // weights in a
                                                                // Label
          datastore.setSumLabelWeight(key.stringAt(1), value.get());
          count++;
          if (count % 10000 == 0) {
            log.info("Read {} label weights", count);
          }
        }
View Full Code Here

  }

  public static void loadThetaNormalizer(InMemoryBayesDatastore datastore,
      FileSystem fs, Path pathPattern, Configuration conf) throws IOException {

    StringTuple key = new StringTuple();
    DoubleWritable value = new DoubleWritable();

    FileStatus[] outputFiles = fs.globStatus(pathPattern);
    for (FileStatus fileStatus : outputFiles) {
      Path path = fileStatus.getPath();
      log.info("{}", path);
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);

      long count = 0;
      while (reader.next(key, value)) {
        if (key.stringAt(0).equals(BayesConstants.LABEL_THETA_NORMALIZER)) { // Sum
                                                                             // of
                                                                             // weights
                                                                             // in
                                                                             // a
                                                                             // Label
          datastore.setThetaNormalizer(key.stringAt(1), value.get());
          count++;
          if (count % 50000 == 0) {
            log.info("Read {} theta norms", count);
          }
        }
View Full Code Here

  }

  public static void loadSumWeight(InMemoryBayesDatastore datastore,
      FileSystem fs, Path pathPattern, Configuration conf) throws IOException {

    StringTuple key = new StringTuple();
    DoubleWritable value = new DoubleWritable();

    FileStatus[] outputFiles = fs.globStatus(pathPattern);
    for (FileStatus fileStatus : outputFiles) {
      Path path = fileStatus.getPath();
      log.info("{}", path);
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);

      // the key is _label
      while (reader.next(key, value)) {

        if (key.stringAt(0).equals(BayesConstants.TOTAL_SUM)) { // Sum of
                                                                // weights for
          // all Features and all Labels
          datastore.setSigma_jSigma_k(value.get());
          log.info("{}", value.get());
        }
View Full Code Here

  }

  public static Map<String, Double> readLabelSums(FileSystem fs,
      Path pathPattern, Configuration conf) throws IOException {
    Map<String, Double> labelSum = new HashMap<String, Double>();
    StringTuple key = new StringTuple();
    DoubleWritable value = new DoubleWritable();

    FileStatus[] outputFiles = fs.globStatus(pathPattern);

    for (FileStatus fileStatus : outputFiles) {
      Path path = fileStatus.getPath();
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
      // the key is either _label_ or label,feature
      while (reader.next(key, value)) {
        if (key.stringAt(0).equals(BayesConstants.LABEL_SUM)) { // Sum of counts
                                                                // of labels
          labelSum.put(key.stringAt(1), value.get());
        }

      }
    }
View Full Code Here

  }

  public static Map<String, Double> readLabelDocumentCounts(FileSystem fs,
      Path pathPattern, Configuration conf) throws IOException {
    Map<String, Double> labelDocumentCounts = new HashMap<String, Double>();
    StringTuple key = new StringTuple();
    DoubleWritable value = new DoubleWritable();

    FileStatus[] outputFiles = fs.globStatus(pathPattern);
    for (FileStatus fileStatus : outputFiles) {
      Path path = fileStatus.getPath();
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
      // the key is either _label_ or label,feature
      while (reader.next(key, value)) {
        if (key.stringAt(0).equals(BayesConstants.LABEL_COUNT)) { // Count of
                                                                  // Documents
                                                                  // in a Label
          labelDocumentCounts.put(key.stringAt(1), value.get());
        }

      }
    }
View Full Code Here

  }

  public static double readSigma_jSigma_k(FileSystem fs, Path pathPattern,
      Configuration conf) throws IOException {
    Map<String, Double> weightSum = new HashMap<String, Double>();
    StringTuple key = new StringTuple();
    DoubleWritable value = new DoubleWritable();

    FileStatus[] outputFiles = fs.globStatus(pathPattern);
    for (FileStatus fileStatus : outputFiles) {
      Path path = fileStatus.getPath();
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
      // the key is *
      while (reader.next(key, value)) {
        if (weightSum.size() > 1) {
          throw new IOException("Incorrect Sum File");
        } else if (key.stringAt(0).equals(BayesConstants.TOTAL_SUM)) {
          weightSum.put(BayesConstants.TOTAL_SUM, value.get());
        }

      }
    }
View Full Code Here

TOP

Related Classes of org.apache.mahout.common.StringTuple

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.