Package org.apache.mahout.common

Examples of org.apache.mahout.common.IntPairWritable$Frequency


      SequenceFile.Writer writer = new SequenceFile.Writer(fs, job, path, IntPairWritable.class,
          DoubleWritable.class);
     
      double total = 0.0; // total number of pseudo counts we made
      for (int w = 0; w < numWords; ++w) {
        IntPairWritable kw = new IntPairWritable(k, w);
        // A small amount of random noise, minimized by having a floor.
        double pseudocount = random.nextDouble() + 1.0E-8;
        total += pseudocount;
        v.set(Math.log(pseudocount));
        writer.append(kw, v);
      }
      IntPairWritable kTsk = new IntPairWritable(k, TOPIC_SUM_KEY);
      v.set(Math.log(total));
      writer.append(kTsk, v);
     
      writer.close();
    }
View Full Code Here


    Path dir = new Path(statePath);
    FileSystem fs = dir.getFileSystem(job);
   
    double ll = 0.0;
   
    IntPairWritable key = new IntPairWritable();
    DoubleWritable value = new DoubleWritable();
    for (FileStatus status : fs.globStatus(new Path(dir, "part-*"))) {
      Path path = status.getPath();
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, job);
      while (reader.next(key, value)) {
        if (key.getFirst() == LOG_LIKELIHOOD_KEY) {
          ll = value.get();
          break;
        }
      }
      reader.close();
View Full Code Here

   
    DenseMatrix pWgT = new DenseMatrix(numTopics, numWords);
    double[] logTotals = new double[numTopics];
    double ll = 0.0;
   
    IntPairWritable key = new IntPairWritable();
    DoubleWritable value = new DoubleWritable();
    for (FileStatus status : fs.globStatus(new Path(dir, "part-*"))) {
      Path path = status.getPath();
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, job);
      while (reader.next(key, value)) {
        int topic = key.getFirst();
        int word = key.getSecond();
        if (word == TOPIC_SUM_KEY) {
          logTotals[topic] = value.get();
          if (Double.isInfinite(value.get())) {
            throw new IllegalArgumentException();
          }
View Full Code Here

      int w = e.index();
     
      for (int k = 0; k < state.getNumTopics(); ++k) {
        v.set(doc.phi(k, w) + Math.log(e.get()));
       
        IntPairWritable kw = new IntPairWritable(k, w);
       
        // ouput (topic, word)'s logProb contribution
        context.write(kw, v);
        logTotals[k] = LDAUtil.logSum(logTotals[k], v.get());
      }
    }
   
    // Output the totals for the statistics. This is to make
    // normalizing a lot easier.
    for (int k = 0; k < state.getNumTopics(); ++k) {
      IntPairWritable kw = new IntPairWritable(k, LDADriver.TOPIC_SUM_KEY);
      v.set(logTotals[k]);
      assert !Double.isNaN(v.get());
      context.write(kw, v);
    }
    IntPairWritable llk = new IntPairWritable(LDADriver.LOG_LIKELIHOOD_KEY, LDADriver.LOG_LIKELIHOOD_KEY);
    // Output log-likelihoods.
    v.set(doc.getLogLikelihood());
    context.write(llk, v);
  }
View Full Code Here

    DenseMatrix pWgT = new DenseMatrix(numTopics, numWords);
    double[] logTotals = new double[numTopics];
    double ll = 0.0;

    IntPairWritable key = new IntPairWritable();
    DoubleWritable value = new DoubleWritable();
    for (FileStatus status : fs.globStatus(new Path(dir, "part-*"))) {
      Path path = status.getPath();
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, job);
      while (reader.next(key, value)) {
        int topic = key.getFirst();
        int word = key.getSecond();
        if (word == TOPIC_SUM_KEY) {
          logTotals[topic] = value.get();
          Preconditions.checkArgument(!Double.isInfinite(value.get()));
        } else if (topic == LOG_LIKELIHOOD_KEY) {
          ll = value.get();
View Full Code Here

      Path path = new Path(statePath, "part-" + k);
      SequenceFile.Writer writer = new SequenceFile.Writer(fs, job, path, IntPairWritable.class, DoubleWritable.class);

      double total = 0.0; // total number of pseudo counts we made
      for (int w = 0; w < numWords; ++w) {
        Writable kw = new IntPairWritable(k, w);
        // A small amount of random noise, minimized by having a floor.
        double pseudocount = random.nextDouble() + 1.0E-8;
        total += pseudocount;
        v.set(Math.log(pseudocount));
        writer.append(kw, v);
      }
      Writable kTsk = new IntPairWritable(k, TOPIC_SUM_KEY);
      v.set(Math.log(total));
      writer.append(kTsk, v);

      writer.close();
    }
View Full Code Here

  private static double findLL(Path statePath, Configuration job) throws IOException {
    FileSystem fs = statePath.getFileSystem(job);

    double ll = 0.0;

    IntPairWritable key = new IntPairWritable();
    DoubleWritable value = new DoubleWritable();
    for (FileStatus status : fs.globStatus(new Path(statePath, "part-*"))) {
      Path path = status.getPath();
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, job);
      while (reader.next(key, value)) {
        if (key.getFirst() == LOG_LIKELIHOOD_KEY) {
          ll = value.get();
          break;
        }
      }
      reader.close();
View Full Code Here

                                                                        PathType.GLOB,
                                                                        null,
                                                                        null,
                                                                        true,
                                                                        job)) {
      IntPairWritable key = record.getFirst();
      DoubleWritable value = record.getSecond();
      int topic = key.getFirst();
      int word = key.getSecond();
      if (word == TOPIC_SUM_KEY) {
        logTotals[topic] = value.get();
        Preconditions.checkArgument(!Double.isInfinite(value.get()));
      } else if (topic == LOG_LIKELIHOOD_KEY) {
        ll = value.get();
View Full Code Here

      SequenceFile.Writer writer = new SequenceFile.Writer(fs, job, path, IntPairWritable.class, DoubleWritable.class);

      try {
        double total = 0.0; // total number of pseudo counts we made
        for (int w = 0; w < numWords; ++w) {
          Writable kw = new IntPairWritable(k, w);
          // A small amount of random noise, minimized by having a floor.
          double pseudocount = random.nextDouble() + 1.0E-8;
          total += pseudocount;
          v.set(Math.log(pseudocount));
          writer.append(kw, v);
        }
        Writable kTsk = new IntPairWritable(k, TOPIC_SUM_KEY);
        v.set(Math.log(total));
        writer.append(kTsk, v);
      } finally {
        Closeables.closeQuietly(writer);
      }
View Full Code Here

      Path path = new Path(statePath, "part-" + k);
      SequenceFile.Writer writer = new SequenceFile.Writer(fs, job, path, IntPairWritable.class, DoubleWritable.class);

      try {
        for (int w = 0; w < state.getNumWords(); ++w) {
          Writable kw = new IntPairWritable(k, w);
          v.set(state.logProbWordGivenTopic(w,k) + state.getLogTotal(k));
          writer.append(kw, v);
        }
        Writable kTsk = new IntPairWritable(k, TOPIC_SUM_KEY);
        v.set(state.getLogTotal(k));
        writer.append(kTsk, v);
      } finally {
        Closeables.closeQuietly(writer);
      }
    }
    Path path = new Path(statePath, "part-" + LOG_LIKELIHOOD_KEY);
    SequenceFile.Writer writer = new SequenceFile.Writer(fs, job, path, IntPairWritable.class, DoubleWritable.class);
    try {
      Writable kTsk = new IntPairWritable(LOG_LIKELIHOOD_KEY,LOG_LIKELIHOOD_KEY);
      v.set(state.getLogLikelihood());
      writer.append(kTsk, v);
    } finally {
      Closeables.closeQuietly(writer);
    }
View Full Code Here

TOP

Related Classes of org.apache.mahout.common.IntPairWritable$Frequency

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.