Package org.apache.mahout.common

Examples of org.apache.mahout.common.StringTuple


                        Context context) throws IOException, InterruptedException {
    Set<String> outputValues = Sets.newHashSet();
    for (StringTuple value : values) {
      outputValues.addAll(value.getEntries());
    }
    context.write(key, new StringTuple(outputValues));
  }
View Full Code Here


    List<String> oValue = Lists.newArrayList();
    for (int selectedField : selectedFields) {
      oValue.add(fields[selectedField]);
    }
   
    context.write(new Text(oKey.toString()), new StringTuple(oValue));
   
  }
View Full Code Here

    FileStatus[] statuses = fs.listStatus(output, PathFilters.logsCRCFilter());
    assertEquals(1, statuses.length);
    Path filePath = statuses[0].getPath();
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, filePath, configuration);
    Text key = ClassUtils.instantiateAs((Class<? extends Text>) reader.getKeyClass(), Text.class);
    StringTuple value =
        ClassUtils.instantiateAs((Class<? extends StringTuple>) reader.getValueClass(), StringTuple.class);
    reader.next(key, value);
    assertEquals(documentId1, key.toString());
    assertEquals(Arrays.asList("test", "document", "processor"), value.getEntries());
    reader.next(key, value);
    assertEquals(documentId2, key.toString());
    assertEquals(Arrays.asList("another", "one"), value.getEntries());
  }
View Full Code Here

  }

  @Override
  protected String generateFileNameForKeyValue(WritableComparable<?> k,
      Writable v, String name) {
    StringTuple key = (StringTuple) k;

    if (key.length() == 1
        && key.stringAt(0).equals(BayesConstants.FEATURE_SET_SIZE))
      return "trainer-vocabCount/" + name;
    else
      return "trainer-tfIdf/" + name;
  }
View Full Code Here

  }

  private static ConfusionMatrix readResult(FileSystem fs, Path pathPattern, Configuration conf, BayesParameters params)
      throws IOException {
  
    StringTuple key = new StringTuple();
    DoubleWritable value = new DoubleWritable();
    String defaultLabel = params.get("defaultCat");
    FileStatus[] outputFiles = fs.globStatus(pathPattern);
    Map<String, Map<String, Integer>> confusionMatrix = new HashMap<String, Map<String, Integer>>();
   
    for (FileStatus fileStatus : outputFiles) {
      Path path = fileStatus.getPath();
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
      while (reader.next(key, value)) {       
        String correctLabel = key.stringAt(1);
        String classifiedLabel = key.stringAt(2);
        Map<String, Integer> rowMatrix = confusionMatrix.get(correctLabel);
        if(rowMatrix == null)
          rowMatrix = new HashMap<String, Integer>();       
        Integer count = Double.valueOf(value.get()).intValue();
        rowMatrix.put(classifiedLabel, count);
View Full Code Here

    // Output Length Normalized + TF Transformed Frequency per Word per Class
    // Log(1 + D_ij)/SQRT( SIGMA(k, D_kj) )
    for (Map.Entry<String, int[]> entry : wordList.entrySet()) {
      // key is label,word
      String token = entry.getKey();
      StringTuple tuple = new StringTuple();
      tuple.add(BayesConstants.WEIGHT);
      tuple.add(label);
      tuple.add(token);
      DoubleWritable f = new DoubleWritable(Math.log(1.0 + entry.getValue()[0]) / lengthNormalisation);
      output.collect(tuple, f);
    }
    reporter.setStatus("Bayes Feature Mapper: Document Label: " + label)
   
    // Output Document Frequency per Word per Class
   
    for (String token : wordList.keySet()) {
      // key is label,word
     
      StringTuple dfTuple = new StringTuple();
      dfTuple.add(BayesConstants.DOCUMENT_FREQUENCY);
      dfTuple.add(label);
      dfTuple.add(token);     
      output.collect(dfTuple, one);
     
      StringTuple tokenCountTuple = new StringTuple();
      tokenCountTuple.add(BayesConstants.FEATURE_COUNT);
      tokenCountTuple.add(token);
      output.collect(tokenCountTuple, one);

    }

    // output that we have seen the label to calculate the Count of Document per
    // class
    StringTuple labelCountTuple = new StringTuple();
    labelCountTuple.add(BayesConstants.LABEL_COUNT);
    labelCountTuple.add(label);
    output.collect(labelCountTuple, one);
  }
View Full Code Here

          .toArray(new String[ngrams.size()]), defaultCategory);
    
      String correctLabel = label;
      String classifiedLabel = result.getLabel();
     
      StringTuple outputTuple = new StringTuple(BayesConstants.CLASSIFIER_TUPLE);
      outputTuple.add(correctLabel);
      outputTuple.add(classifiedLabel);
     
      output.collect(outputTuple, new DoubleWritable(1.0));
    } catch (InvalidDatastoreException e) {
      throw new IOException(e.toString());
    }
View Full Code Here

  }

  @Override
  protected String generateFileNameForKeyValue(WritableComparable<?> k, Writable v,
                                               String name) {
    StringTuple key = (StringTuple) k;

    if(key.length() == 1 && key.stringAt(0).equals(BayesConstants.TOTAL_SUM))
    {
      return "Sigma_kSigma_j/" + name;
    }
    else{
      if(key.stringAt(0).equals(BayesConstants.FEATURE_SUM))
      {
        return "Sigma_j/" + name;
      }
      else if(key.stringAt(0).equals(BayesConstants.LABEL_SUM))
        return "Sigma_k/" + name;
      else
        throw new IllegalArgumentException("Unexpected StringTuple: " + key);
    }
  }
View Full Code Here

      OutputCollector<StringTuple, DoubleWritable> output, Reporter reporter)
      throws IOException {
    String label = key.stringAt(1);
    String feature = key.stringAt(2);
    reporter.setStatus("Bayes Weight Summer Mapper: " + key);
    StringTuple featureSum = new StringTuple(BayesConstants.FEATURE_SUM);
    featureSum.add(feature);
    output.collect(featureSum, value);// sum of weight for all labels for a
                                      // feature Sigma_j
    StringTuple labelSum = new StringTuple(BayesConstants.LABEL_SUM);
    labelSum.add(label);
    output.collect(labelSum, value);// sum of weight for all features for a
                                    // label Sigma_k
    StringTuple totalSum = new StringTuple(BayesConstants.TOTAL_SUM);
    output.collect(totalSum, value);// sum of weight of all features for all
                                    // label Sigma_kSigma_j

  }
View Full Code Here

      for (Map.Entry<String, Double> stringDoubleEntry : labelWeightSum.entrySet()) {
        String label = stringDoubleEntry.getKey();
        double weight = Math.log((value.get() + alpha_i) / (sigma_jSigma_k - stringDoubleEntry.getValue() + vocabCount));
       
        reporter.setStatus("Complementary Bayes Theta Normalizer Mapper: " + stringDoubleEntry + " => " + weight);
        StringTuple normalizerTuple = new StringTuple(BayesConstants.LABEL_THETA_NORMALIZER);
        normalizerTuple.add(label);       
        output.collect(normalizerTuple, new DoubleWritable(weight)); //output Sigma_j

      }

    } else {
      String label = key.stringAt(1);
        
      double D_ij = value.get();
      double denominator = 0.5 * ((sigma_jSigma_k / vocabCount) + (D_ij * this.labelWeightSum.size()));
      double weight = Math.log(1.0 - D_ij / denominator);

      reporter.setStatus("Complementary Bayes Theta Normalizer Mapper: " + label + " => " + weight);
    
      StringTuple normalizerTuple = new StringTuple(BayesConstants.LABEL_THETA_NORMALIZER);
      normalizerTuple.add(label);   
     
      output.collect(normalizerTuple, new DoubleWritable(weight));//output -D_ij
    

    }
View Full Code Here

TOP

Related Classes of org.apache.mahout.common.StringTuple

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.