Package org.apache.hadoop.io

Examples of org.apache.hadoop.io.DoubleWritable$Comparator


         
          reporter.setStatus("Complementary Bayes Theta Normalizer Mapper: " + label + " => " + weight);
          StringTuple normalizerTuple = new StringTuple(BayesConstants.LABEL_THETA_NORMALIZER);
          normalizerTuple.add(label);
          try {
            output.collect(normalizerTuple, new DoubleWritable(weight));
          } catch (IOException e) {
            throw new IllegalStateException(e);
          } // output Sigma_j
          return true;
        }
      });
     
    } else {
      String label = key.stringAt(1);
     
      double dIJ = value.get();
      double denominator = 0.5 * (sigmaJSigmaK / vocabCount + dIJ * this.labelWeightSum.size());
      double weight = Math.log(1.0 - dIJ / denominator);
     
      reporter.setStatus("Complementary Bayes Theta Normalizer Mapper: " + label + " => " + weight);
     
      StringTuple normalizerTuple = new StringTuple(BayesConstants.LABEL_THETA_NORMALIZER);
      normalizerTuple.add(label);
     
      // output -D_ij
      output.collect(normalizerTuple, new DoubleWritable(weight));
     
    }
   
  }
View Full Code Here


    FileSystem fs = new Path(dir).getFileSystem(job);
   
    List<PriorityQueue<StringDoublePair>> queues = new ArrayList<PriorityQueue<StringDoublePair>>();
   
    IntPairWritable key = new IntPairWritable();
    DoubleWritable value = new DoubleWritable();
    for (FileStatus status : fs.globStatus(new Path(dir, "*"))) {
      Path path = status.getPath();
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, job);
      while (reader.next(key, value)) {
        int topic = key.getFirst();
        int word = key.getSecond();
       
        ensureQueueSize(queues, topic);
        if (word >= 0 && topic >= 0) {
          double score = value.get();
          String realWord = wordList.get(word);
          maybeEnqueue(queues.get(topic), realWord, score, numWordsToPrint);
        }
      }
      reader.close();
View Full Code Here

        bu.add(Bytes.toBytes(BayesConstants.HBASE_COLUMN_FAMILY), Bytes.toBytes(label), Bytes
            .toBytes(weightSumPerLabel));
        table.put(bu);
      }
    }
    output.collect(key, new DoubleWritable(weightSumPerLabel));
   
  }
View Full Code Here

        totalScore += freqBigram.getFrequency();
      }
      // normalize the co-occurrence based counts.
      for (Bigram.Frequency freqBigram : freqBigrams) {
        key = freqBigram.getBigram();
        value = new DoubleWritable(freqBigram.getFrequency() / totalScore);
        output.collect(key, value);
      }
      freqBigrams.clear();
    }
View Full Code Here

        try {
          StringTuple tuple = new StringTuple();
          tuple.add(BayesConstants.WEIGHT);
          tuple.add(label);
          tuple.add(token);
          DoubleWritable f = new DoubleWritable(Math.log(1.0 + dKJ) / lengthNormalisation);
          output.collect(tuple, f);
        } catch (IOException e) {
          throw new IllegalStateException(e);
        }
        return true;
View Full Code Here

    reporter.setStatus("Bayes Theta Normalizer Mapper: " + label);
   
    double weight = Math.log((value.get() + alphaI) / (labelWeightSum.get(label) + vocabCount));
    StringTuple thetaNormalizerTuple = new StringTuple(BayesConstants.LABEL_THETA_NORMALIZER);
    thetaNormalizerTuple.add(label);
    output.collect(thetaNormalizerTuple, new DoubleWritable(weight));
  }
View Full Code Here

    while (values.hasNext()) {
      reporter.setStatus("Feature Reducer:" + key);
      sum += values.next().get();
    }
    reporter.setStatus("Bayes Feature Reducer: " + key + " => " + sum);
    output.collect(key, new DoubleWritable(sum));
  }
View Full Code Here

                                           Path pathPattern,
                                           Configuration conf,
                                           Parameters params) throws IOException {
   
    StringTuple key = new StringTuple();
    DoubleWritable value = new DoubleWritable();
    String defaultLabel = params.get("defaultCat");
    FileStatus[] outputFiles = fs.globStatus(pathPattern);
    Map<String,Map<String,Integer>> confusionMatrix = new HashMap<String,Map<String,Integer>>();
   
    for (FileStatus fileStatus : outputFiles) {
      Path path = fileStatus.getPath();
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
      while (reader.next(key, value)) {
        String correctLabel = key.stringAt(1);
        String classifiedLabel = key.stringAt(2);
        Map<String,Integer> rowMatrix = confusionMatrix.get(correctLabel);
        if (rowMatrix == null) {
          rowMatrix = new HashMap<String,Integer>();
        }
        Integer count = Double.valueOf(value.get()).intValue();
        rowMatrix.put(classifiedLabel, count);
        confusionMatrix.put(correctLabel, rowMatrix);
       
      }
    }
View Full Code Here

      } else if (key.stringAt(0).equals(BayesConstants.DOCUMENT_FREQUENCY)) {
        String label = key.stringAt(1);
        Double labelDocumentCount = labelDocumentCounts.get(label);
        double logIdf = Math.log(labelDocumentCount / value.get());
        key.replaceAt(0, BayesConstants.WEIGHT);
        output.collect(key, new DoubleWritable(logIdf));
        reporter.setStatus("Bayes TfIdf Mapper: log(Idf): " + key);
      } else {
        throw new IllegalArgumentException("Unrecognized Tuple: " + key);
      }
    } else if (key.length() == 2) {
View Full Code Here

     
      StringTuple outputTuple = new StringTuple(BayesConstants.CLASSIFIER_TUPLE);
      outputTuple.add(correctLabel);
      outputTuple.add(classifiedLabel);
     
      output.collect(outputTuple, new DoubleWritable(1.0));
    } catch (InvalidDatastoreException e) {
      throw new IOException(e.toString());
    }
  }
View Full Code Here

TOP

Related Classes of org.apache.hadoop.io.DoubleWritable$Comparator

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.