Package org.apache.mahout.common

Examples of org.apache.mahout.common.StringTuple


    return theSequenceFileOutputFormat.getRecordWriter(fs, job, name, arg3);
  }
 
  @Override
  protected String generateFileNameForKeyValue(WritableComparable<?> k, Writable v, String name) {
    StringTuple key = (StringTuple) k;
   
    if ((key.length() == 1) && key.stringAt(0).equals(BayesConstants.FEATURE_SET_SIZE)) {
      return "trainer-vocabCount/" + name;
    } else {
      return "trainer-tfIdf/" + name;
    }
  }
View Full Code Here


    // Log(1 + D_ij)/SQRT( SIGMA(k, D_kj) )
    wordList.forEachPair(new ObjectIntProcedure<String>() {
      @Override
      public boolean apply(String token, int dKJ) {
        try {
          StringTuple tuple = new StringTuple();
          tuple.add(BayesConstants.WEIGHT);
          tuple.add(label);
          tuple.add(token);
          DoubleWritable f = new DoubleWritable(Math.log(1.0 + dKJ) / lengthNormalisation);
          output.collect(tuple, f);
        } catch (IOException e) {
          throw new IllegalStateException(e);
        }
        return true;
      }
    });
    reporter.setStatus("Bayes Feature Mapper: Document Label: " + label);
   
    // Output Document Frequency per Word per Class
    // Corpus Document Frequency (FEATURE_COUNT)
    // Corpus Term Frequency (FEATURE_TF)
    wordList.forEachPair(new ObjectIntProcedure<String>() {
      @Override
      public boolean apply(String token, int dKJ) {
        try {
          StringTuple dfTuple = new StringTuple();
          dfTuple.add(BayesConstants.DOCUMENT_FREQUENCY);
          dfTuple.add(label);
          dfTuple.add(token);
          output.collect(dfTuple, ONE);
         
          StringTuple tokenCountTuple = new StringTuple();
          tokenCountTuple.add(BayesConstants.FEATURE_COUNT);
          tokenCountTuple.add(token);
          output.collect(tokenCountTuple, ONE);
         
          StringTuple tokenTfTuple = new StringTuple();
          tokenTfTuple.add(BayesConstants.FEATURE_TF);
          tokenTfTuple.add(token);
          output.collect(tokenTfTuple, new DoubleWritable(dKJ));
        } catch (IOException e) {
          throw new IllegalStateException(e);
        }
        return true;
      }
    });
   
    // output that we have seen the label to calculate the Count of Document per
    // class
    StringTuple labelCountTuple = new StringTuple();
    labelCountTuple.add(BayesConstants.LABEL_COUNT);
    labelCountTuple.add(label);
    output.collect(labelCountTuple, ONE);
  }
View Full Code Here

    return theSequenceFileOutputFormat.getRecordWriter(fs, job, name, arg3);
  }
 
  @Override
  protected String generateFileNameForKeyValue(WritableComparable<?> k, Writable v, String name) {
    StringTuple key = (StringTuple) k;
   
    if ((key.length() == 1) && key.stringAt(0).equals(BayesConstants.TOTAL_SUM)) {
      return "Sigma_kSigma_j/" + name;
    } else {
      if (key.stringAt(0).equals(BayesConstants.FEATURE_SUM)) {
        return "Sigma_j/" + name;
      } else if (key.stringAt(0).equals(BayesConstants.LABEL_SUM)) {
        return "Sigma_k/" + name;
      } else {
        throw new IllegalArgumentException("Unexpected StringTuple: " + key);
      }
    }
View Full Code Here

                  OutputCollector<StringTuple,DoubleWritable> output,
                  Reporter reporter) throws IOException {
    String label = key.stringAt(1);
    String feature = key.stringAt(2);
    reporter.setStatus("Bayes Weight Summer Mapper: " + key);
    StringTuple featureSum = new StringTuple(BayesConstants.FEATURE_SUM);
    featureSum.add(feature);
    output.collect(featureSum, value); // sum of weight for all labels for a
    // feature Sigma_j
    StringTuple labelSum = new StringTuple(BayesConstants.LABEL_SUM);
    labelSum.add(label);
    output.collect(labelSum, value); // sum of weight for all features for a
    // label Sigma_k
    StringTuple totalSum = new StringTuple(BayesConstants.TOTAL_SUM);
    output.collect(totalSum, value); // sum of weight of all features for all
    // label Sigma_kSigma_j
   
  }
View Full Code Here

    super(StringTuple.class, true);
  }
 
  @Override
  public int compare(WritableComparable a, WritableComparable b) {
    StringTuple ta = (StringTuple) a;
    StringTuple tb = (StringTuple) b;
    Preconditions.checkArgument(ta.length() >= 2 && ta.length() <= 3 && tb.length() >= 2 && tb.length() <= 3, "StringTuple length out of bounds");
    // token
    String tmpa = ta.length() == 2 ? ta.stringAt(1) : ta.stringAt(2);
    String tmpb = tb.length() == 2 ? tb.stringAt(1) : tb.stringAt(2);
    int cmp = tmpa.compareTo(tmpb);
    if (cmp != 0) {
      return cmp;
    }
   
    // type, FEATURE_TF first, then FEATURE_COUNT, then DF or anything else.
    cmp = ta.stringAt(0).compareTo(tb.stringAt(0));
    if (cmp != 0) {
      if (ta.stringAt(0).equals(BayesConstants.FEATURE_TF)) {
        return -1;
      } else if (tb.stringAt(0).equals(BayesConstants.FEATURE_TF)) {
        return 1;
      } else if (ta.stringAt(0).equals(BayesConstants.FEATURE_COUNT)) {
        return -1;
      } else if (tb.stringAt(0).equals(BayesConstants.FEATURE_COUNT)) {
        return 1;
      } else {
        return cmp;
      }
    }

    // label or empty.
    tmpa = ta.length() == 2 ? "" : ta.stringAt(1);
    tmpb = tb.length() == 2 ? "" : tb.stringAt(1);
   
    cmp = tmpa.compareTo(tmpb);
    return cmp;
  }
View Full Code Here

  public static ConfusionMatrix readResult(FileSystem fs,
                                           Path pathPattern,
                                           Configuration conf,
                                           Parameters params) throws IOException {
   
    StringTuple key = new StringTuple();
    DoubleWritable value = new DoubleWritable();
    String defaultLabel = params.get("defaultCat");
    FileStatus[] outputFiles = fs.globStatus(pathPattern);
    Map<String,Map<String,Integer>> confusionMatrix = new HashMap<String,Map<String,Integer>>();
   
    for (FileStatus fileStatus : outputFiles) {
      Path path = fileStatus.getPath();
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
      while (reader.next(key, value)) {
        String correctLabel = key.stringAt(1);
        String classifiedLabel = key.stringAt(2);
        Map<String,Integer> rowMatrix = confusionMatrix.get(correctLabel);
        if (rowMatrix == null) {
          rowMatrix = new HashMap<String,Integer>();
        }
        Integer count = Double.valueOf(value.get()).intValue();
View Full Code Here

        defaultCategory);
     
      String correctLabel = label;
      String classifiedLabel = result.getLabel();
     
      StringTuple outputTuple = new StringTuple(BayesConstants.CLASSIFIER_TUPLE);
      outputTuple.add(correctLabel);
      outputTuple.add(classifiedLabel);
     
      output.collect(outputTuple, new DoubleWritable(1.0));
    } catch (InvalidDatastoreException e) {
      throw new IOException(e.toString());
    }
View Full Code Here

    return theSequenceFileOutputFormat.getRecordWriter(fs, job, name, arg3);
  }
 
  @Override
  protected String generateFileNameForKeyValue(WritableComparable<?> k, Writable v, String name) {
    StringTuple key = (StringTuple) k;
    if (key.length() == 3) {
      if (key.stringAt(0).equals(BayesConstants.WEIGHT)) {
        return "trainer-wordFreq/" + name;
      } else if (key.stringAt(0).equals(BayesConstants.DOCUMENT_FREQUENCY)) {
        return "trainer-termDocCount/" + name;
      }
    } else if (key.length() == 2) {
      if (key.stringAt(0).equals(BayesConstants.FEATURE_COUNT)) {
        return "trainer-featureCount/" + name;
      } else if (key.stringAt(0).equals(BayesConstants.LABEL_COUNT)) {
        return "trainer-docCount/" + name;
      }
    }
    throw new IllegalArgumentException("Unrecognized Tuple: " + key);
  }
View Full Code Here

    return iterator.hasNext();
  }
 
  @Override
  public List<String> next() {
    StringTuple transaction = iterator.next();
    return transaction.getEntries();
  }
View Full Code Here

  public static void loadWeightMatrix(InMemoryBayesDatastore datastore,
                                      FileSystem fs,
                                      Path pathPattern,
                                      Configuration conf) throws IOException {
   
    StringTuple key = new StringTuple();
    DoubleWritable value = new DoubleWritable();
   
    FileStatus[] outputFiles = fs.globStatus(pathPattern);
    for (FileStatus fileStatus : outputFiles) {
      Path path = fileStatus.getPath();
      log.info("{}", path);
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
     
      // the key is label,feature
      while (reader.next(key, value)) {
       
        datastore.loadFeatureWeight(key.stringAt(2), key.stringAt(1), value.get());
       
      }
    }
  }
View Full Code Here

TOP

Related Classes of org.apache.mahout.common.StringTuple

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.