Package org.apache.mahout.common

Examples of org.apache.mahout.common.StringTuple


                                                                   PathType.GLOB,
                                                                   null,
                                                                   null,
                                                                   true,
                                                                   conf)) {
      StringTuple key = record.getFirst();
      DoubleWritable value = record.getSecond();
      // Sum of weights in a Label
      if (key.stringAt(0).equals(BayesConstants.LABEL_THETA_NORMALIZER)) {
        datastore.setThetaNormalizer(key.stringAt(1), value.get());
        if (++count % 50000 == 0) {
          log.info("Read {} theta norms", count);
        }
      }
    }
View Full Code Here


                                                                   PathType.GLOB,
                                                                   null,
                                                                   null,
                                                                   true,
                                                                   conf)) {
      StringTuple key = record.getFirst();
      DoubleWritable value = record.getSecond();
      if (key.stringAt(0).equals(BayesConstants.TOTAL_SUM)) {
        // Sum of weights for all Features and all Labels
        datastore.setSigmaJSigmaK(value.get());
        log.info("{}", value.get());
      }
    }
View Full Code Here

                                                                   PathType.GLOB,
                                                                   null,
                                                                   null,
                                                                   true,
                                                                   conf)) {
      StringTuple key = record.getFirst();
      DoubleWritable value = record.getSecond();
      if (key.stringAt(0).equals(BayesConstants.LABEL_SUM)) {
        // Sum of counts of labels
        labelSum.put(key.stringAt(1), value.get());
      }
    }
    return labelSum;
  }
View Full Code Here

                                                                   PathType.GLOB,
                                                                   null,
                                                                   null,
                                                                   true,
                                                                   conf)) {
      StringTuple key = record.getFirst();
      DoubleWritable value = record.getSecond();
      // Count of Documents in a Label
      if (key.stringAt(0).equals(BayesConstants.LABEL_COUNT)) {
        labelDocumentCounts.put(key.stringAt(1), value.get());
      }
    }
    return labelDocumentCounts;
  }
View Full Code Here

                                                                   PathType.GLOB,
                                                                   null,
                                                                   null,
                                                                   true,
                                                                   conf)) {
      StringTuple key = record.getFirst();
      DoubleWritable value = record.getSecond();
      if (weightSum.size() > 1) {
        throw new IllegalStateException("Incorrect Sum File");
      } else if (key.stringAt(0).equals(BayesConstants.TOTAL_SUM)) {
        weightSum.put(BayesConstants.TOTAL_SUM, value.get());
      }
    }
    return weightSum.get(BayesConstants.TOTAL_SUM);
  }
View Full Code Here

                                                                   true,
                                                                   conf)) {
      if (weightSum.size() > 1) {
        throw new IllegalStateException("Incorrect vocabCount File");
      }
      StringTuple key = record.getFirst();
      DoubleWritable value = record.getSecond();
      if (key.stringAt(0).equals(BayesConstants.FEATURE_SET_SIZE)) {
        weightSum.put(BayesConstants.FEATURE_SET_SIZE, value.get());
      }
    }
    return weightSum.get(BayesConstants.FEATURE_SET_SIZE);
  }
View Full Code Here

                                                                   PathType.GLOB,
                                                                   null,
                                                                   null,
                                                                   true,
                                                                   conf)) {
      StringTuple key = record.getFirst();
      DoubleWritable value = record.getSecond();
      String correctLabel = key.stringAt(1);
      String classifiedLabel = key.stringAt(2);
      Map<String,Integer> rowMatrix = confusionMatrix.get(correctLabel);
      if (rowMatrix == null) {
        rowMatrix = new HashMap<String,Integer>();
      }
      Integer count = Double.valueOf(value.get()).intValue();
View Full Code Here

    String label = key.stringAt(1);
   
    reporter.setStatus("Bayes Theta Normalizer Mapper: " + label);
   
    double weight = Math.log((value.get() + alphaI) / (labelWeightSum.get(label) + vocabCount));
    StringTuple thetaNormalizerTuple = new StringTuple(BayesConstants.LABEL_THETA_NORMALIZER);
    thetaNormalizerTuple.add(label);
    output.collect(thetaNormalizerTuple, new DoubleWritable(weight));
  }
View Full Code Here

    FileStatus[] statuses = fs.listStatus(output);
    assertEquals(1, statuses.length);
    Path filePath = statuses[0].getPath();
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, filePath, configuration);
    Text key = reader.getKeyClass().asSubclass(Text.class).newInstance();
    StringTuple value = reader.getValueClass().asSubclass(StringTuple.class).newInstance();

    reader.next(key, value);
    assertEquals(documentId1, key.toString());
    assertEquals(Arrays.asList("test", "document", "processor"), value.getEntries());
    reader.next(key, value);
    assertEquals(documentId2, key.toString());
    assertEquals(Arrays.asList("another", "one"), value.getEntries());
  }
View Full Code Here

  @Override
  protected void map(Text key, Text value, Context context) throws IOException, InterruptedException {
    TokenStream stream = analyzer.tokenStream(key.toString(), new StringReader(value.toString()));
    CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
    StringTuple document = new StringTuple();
    while (stream.incrementToken()) {
      if (termAtt.length() > 0) {
        document.add(new String(termAtt.buffer(), 0, termAtt.length()));
      }
    }
    context.write(key, document);
  }
View Full Code Here

TOP

Related Classes of org.apache.mahout.common.StringTuple

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.