Package org.apache.hadoop.io

Examples of org.apache.hadoop.io.DoubleWritable$Comparator


      for (DoubleWritable vw : values) {
        double v = vw.get();
        Preconditions.checkArgument(!Double.isNaN(v), "Found NaN for topic=(%d,%d)", topicWord.getFirst(), topicWord.getSecond());
        accum += v;
      }
      context.write(topicWord, new DoubleWritable(accum));
    } else { // log sum sufficient statistics.
      double accum = Double.NEGATIVE_INFINITY;
      for (DoubleWritable vw : values) {
        double v = vw.get();
        Preconditions.checkArgument(!Double.isNaN(v), "Found NaN for topic = (%d,%d)", topicWord.getFirst(), topicWord.getSecond());
        accum = LDAUtil.logSum(accum, v);
        Preconditions.checkArgument(!Double.isNaN(accum), "Accumulated NaN for topic = (%d,%d)", topicWord.getFirst(), topicWord.getSecond());
      }
      context.write(topicWord, new DoubleWritable(accum));
    }
  }
View Full Code Here


    DenseMatrix pWgT = new DenseMatrix(numTopics, numWords);
    double[] logTotals = new double[numTopics];
    double ll = 0.0;

    IntPairWritable key = new IntPairWritable();
    DoubleWritable value = new DoubleWritable();
    for (FileStatus status : fs.globStatus(new Path(dir, "part-*"))) {
      Path path = status.getPath();
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, job);
      while (reader.next(key, value)) {
        int topic = key.getFirst();
        int word = key.getSecond();
        if (word == TOPIC_SUM_KEY) {
          logTotals[topic] = value.get();
          Preconditions.checkArgument(!Double.isInfinite(value.get()));
        } else if (topic == LOG_LIKELIHOOD_KEY) {
          ll = value.get();
        } else {
          Preconditions.checkArgument(topic >= 0, "topic should be non-negative, not %d", topic);
          Preconditions.checkArgument(word >= 0, "word should be non-negative not %d", word);
          Preconditions.checkArgument(pWgT.getQuick(topic, word) == 0.0);

          pWgT.setQuick(topic, word, value.get());
          Preconditions.checkArgument(!Double.isInfinite(pWgT.getQuick(topic, word)));
        }
      }
      reader.close();
    }
View Full Code Here

  private static void writeInitialState(Path statePath, int numTopics, int numWords) throws IOException {
    Configuration job = new Configuration();
    FileSystem fs = statePath.getFileSystem(job);

    DoubleWritable v = new DoubleWritable();

    Random random = RandomUtils.getRandom();

    for (int k = 0; k < numTopics; ++k) {
      Path path = new Path(statePath, "part-" + k);
      SequenceFile.Writer writer = new SequenceFile.Writer(fs, job, path, IntPairWritable.class, DoubleWritable.class);

      double total = 0.0; // total number of pseudo counts we made
      for (int w = 0; w < numWords; ++w) {
        Writable kw = new IntPairWritable(k, w);
        // A small amount of random noise, minimized by having a floor.
        double pseudocount = random.nextDouble() + 1.0E-8;
        total += pseudocount;
        v.set(Math.log(pseudocount));
        writer.append(kw, v);
      }
      Writable kTsk = new IntPairWritable(k, TOPIC_SUM_KEY);
      v.set(Math.log(total));
      writer.append(kTsk, v);

      writer.close();
    }
  }
View Full Code Here

    FileSystem fs = statePath.getFileSystem(job);

    double ll = 0.0;

    IntPairWritable key = new IntPairWritable();
    DoubleWritable value = new DoubleWritable();
    for (FileStatus status : fs.globStatus(new Path(statePath, "part-*"))) {
      Path path = status.getPath();
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, job);
      while (reader.next(key, value)) {
        if (key.getFirst() == LOG_LIKELIHOOD_KEY) {
          ll = value.get();
          break;
        }
      }
      reader.close();
    }
View Full Code Here

      long itemID = indexItemIDMap.get(itemIDIndex);
      for (SimilarItem similarItem : mostSimilarItems) {
        long otherItemID = similarItem.getItemID();
        if (itemID < otherItemID) {
          ctx.write(new EntityEntityWritable(itemID, otherItemID), new DoubleWritable(similarItem.getSimilarity()));
        } else {
          ctx.write(new EntityEntityWritable(otherItemID, itemID), new DoubleWritable(similarItem.getSimilarity()));
        }
      }
    }
  }
View Full Code Here

    indexItemIDMap.put(56, 56L);

    Mapper<IntWritable,VectorWritable,EntityEntityWritable,DoubleWritable>.Context context =
      EasyMock.createMock(Mapper.Context.class);

    context.write(new EntityEntityWritable(34L, 56L), new DoubleWritable(0.9));

    EasyMock.replay(context);

    Vector vector = new RandomAccessSparseVector(Integer.MAX_VALUE);
    vector.set(12, 0.2);
View Full Code Here

  @Test
  public void testMostSimilarItemPairsReducer() throws Exception {
    Reducer<EntityEntityWritable,DoubleWritable,EntityEntityWritable,DoubleWritable>.Context context =
      EasyMock.createMock(Reducer.Context.class);

    context.write(new EntityEntityWritable(123L, 456L), new DoubleWritable(0.5));

    EasyMock.replay(context);

    new MostSimilarItemPairsReducer().reduce(new EntityEntityWritable(123L, 456L),
        Arrays.asList(new DoubleWritable(0.5), new DoubleWritable(0.5)), context);

    EasyMock.verify(context);
  }
View Full Code Here

                  Context context) throws IOException, InterruptedException {
    Object candidate = StringUtils.fromString(value.toString());
   
    double fitness = evaluator.getFitness(candidate, null);
   
    context.write(key, new DoubleWritable(fitness));
  }
View Full Code Here

    Path output = new Path(outpath, "output.sorted");
    sorter.merge(outfiles, output);
   
    // import the evaluations
    LongWritable key = new LongWritable();
    DoubleWritable value = new DoubleWritable();
    Reader reader = new Reader(fs, output, conf);
    try {
      while (reader.next(key, value)) {
        evaluations.add(value.get());
      }
    } finally {
      reader.close();
    }
  }
View Full Code Here

  protected void reduce(Gram ngram, Iterable<Gram> values, Context context) throws IOException, InterruptedException {

    int[] gramFreq = {-1, -1};

    if (ngram.getType() == Gram.Type.UNIGRAM && emitUnigrams) {
      DoubleWritable dd = new DoubleWritable(ngram.getFrequency());
      Text t = new Text(ngram.getString());
      context.write(t, dd);
      return;
    }
    // TODO better way to handle errors? Wouldn't an exception thrown here
    // cause hadoop to re-try the job?
    String[] gram = new String[2];
    for (Gram value : values) {

      int pos = value.getType() == Gram.Type.HEAD ? 0 : 1;

      if (gramFreq[pos] != -1) {
        log.warn("Extra {} for {}, skipping", value.getType(), ngram);
        if (value.getType() == Gram.Type.HEAD) {
          context.getCounter(Skipped.EXTRA_HEAD).increment(1);
        } else {
          context.getCounter(Skipped.EXTRA_TAIL).increment(1);
        }
        return;
      }

      gram[pos] = value.getString();
      gramFreq[pos] = value.getFrequency();
    }

    if (gramFreq[0] == -1) {
      log.warn("Missing head for {}, skipping.", ngram);
      context.getCounter(Skipped.MISSING_HEAD).increment(1);
      return;
    } else if (gramFreq[1] == -1) {
      log.warn("Missing tail for {}, skipping", ngram);
      context.getCounter(Skipped.MISSING_TAIL).increment(1);
      return;
    }

    int k11 = ngram.getFrequency(); /* a&b */
    int k12 = gramFreq[0] - ngram.getFrequency(); /* a&!b */
    int k21 = gramFreq[1] - ngram.getFrequency(); /* !b&a */
    int k22 = (int) (ngramTotal - (gramFreq[0] + gramFreq[1] - ngram.getFrequency())); /* !a&!b */

    try {
      double llr = ll.logLikelihoodRatio(k11, k12, k21, k22);
      if (llr < minLLRValue) {
        context.getCounter(Skipped.LESS_THAN_MIN_LLR).increment(1);
        return;
      }
      DoubleWritable dd = new DoubleWritable(llr);
      Text t = new Text(ngram.getString());
      context.write(t, dd);
    } catch (IllegalArgumentException ex) {
      context.getCounter(Skipped.LLR_CALCULATION_ERROR).increment(1);
      log.error("Problem calculating LLR ratio: " + ex.getMessage());
View Full Code Here

TOP

Related Classes of org.apache.hadoop.io.DoubleWritable$Comparator

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.