Package org.apache.mahout.common

Examples of org.apache.mahout.common.IntPairWritable$Frequency


    List<Queue<Pair<String,Double>>> queues = Lists.newArrayList();
    Map<Integer,Double> expSums = Maps.newHashMap();
    for (Pair<IntPairWritable,DoubleWritable> record :
         new SequenceFileDirIterable<IntPairWritable, DoubleWritable>(
             new Path(dir, "part-*"), PathType.GLOB, null, null, true, job)) {
      IntPairWritable key = record.getFirst();
      int topic = key.getFirst();
      int word = key.getSecond();
      ensureQueueSize(queues, topic);
      if (word >= 0 && topic >= 0) {
        double score = record.getSecond().get();
        if (expSums.get(topic) == null) {
          expSums.put(topic, 0.0);
View Full Code Here


                                                      int numWordsToPrint) throws IOException {
    FileSystem fs = new Path(dir).getFileSystem(job);
   
    List<PriorityQueue<StringDoublePair>> queues = new ArrayList<PriorityQueue<StringDoublePair>>();
   
    IntPairWritable key = new IntPairWritable();
    DoubleWritable value = new DoubleWritable();
    for (FileStatus status : fs.globStatus(new Path(dir, "part-*"))) {
      Path path = status.getPath();
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, job);
      while (reader.next(key, value)) {
        int topic = key.getFirst();
        int word = key.getSecond();
       
        ensureQueueSize(queues, topic);
        if (word >= 0 && topic >= 0) {
          double score = value.get();
          String realWord = wordList.get(word);
View Full Code Here

    List<Queue<Pair<String,Double>>> queues = Lists.newArrayList();
    Map<Integer,Double> expSums = Maps.newHashMap();
    for (Pair<IntPairWritable,DoubleWritable> record
        : new SequenceFileDirIterable<IntPairWritable, DoubleWritable>(
            new Path(dir, "part-*"), PathType.GLOB, null, null, true, job)) {
      IntPairWritable key = record.getFirst();
      int topic = key.getFirst();
      int word = key.getSecond();
      ensureQueueSize(queues, topic);
      if (word >= 0 && topic >= 0) {
        double score = record.getSecond().get();
        if (expSums.get(topic) == null) {
          expSums.put(topic, 0.0);
View Full Code Here

    List<PriorityQueue<StringDoublePair>> queues = new ArrayList<PriorityQueue<StringDoublePair>>();
    Map<Integer,Double> expSums = new HashMap<Integer, Double>();
    for (Pair<IntPairWritable,DoubleWritable> record :
         new SequenceFileDirIterable<IntPairWritable, DoubleWritable>(
             new Path(dir, "part-*"), PathType.GLOB, null, null, true, job)) {
      IntPairWritable key = record.getFirst();
      int topic = key.getFirst();
      int word = key.getSecond();
      ensureQueueSize(queues, topic);
      if (word >= 0 && topic >= 0) {
        double score = record.getSecond().get();
        if(expSums.get(topic) == null) {
          expSums.put(topic, 0d);
View Full Code Here

    protected void map(LongWritable key, Text value, Context ctx) throws IOException, InterruptedException {
      String[] tokens = TasteHadoopUtils.splitPrefTokens(value.toString());
      int userIDIndex = TasteHadoopUtils.idToIndex(Long.parseLong(tokens[0]));
      int itemIDIndex = TasteHadoopUtils.idToIndex(Long.parseLong(tokens[1]));
      double rating = Double.parseDouble(tokens[2]);
      ctx.write(new IntPairWritable(userIDIndex, itemIDIndex), new DoubleWritable(rating));
    }
View Full Code Here

      int w = e.index();
     
      for (int k = 0; k < state.getNumTopics(); ++k) {
        v.set(doc.phi(k, w) + Math.log(e.get()));
       
        IntPairWritable kw = new IntPairWritable(k, w);
       
        // output (topic, word)'s logProb contribution
        context.write(kw, v);
        logTotals[k] = LDAUtil.logSum(logTotals[k], v.get());
      }
    }

    // Output the totals for the statistics. This is to make
    // normalizing a lot easier.
    for (int k = 0; k < state.getNumTopics(); ++k) {
      IntPairWritable kw = new IntPairWritable(k, LDADriver.TOPIC_SUM_KEY);
      v.set(logTotals[k]);
      assert !Double.isNaN(v.get());
      context.write(kw, v);
    }
    IntPairWritable llk = new IntPairWritable(LDADriver.LOG_LIKELIHOOD_KEY, LDADriver.LOG_LIKELIHOOD_KEY);
    // Output log-likelihoods.
    v.set(doc.getLogLikelihood());
    context.write(llk, v);
  }
View Full Code Here

                                                                        PathType.GLOB,
                                                                        null,
                                                                        null,
                                                                        true,
                                                                        job)) {
      IntPairWritable key = record.getFirst();
      DoubleWritable value = record.getSecond();
      int topic = key.getFirst();
      int word = key.getSecond();
      if (word == TOPIC_SUM_KEY) {
        logTotals[topic] = value.get();
        Preconditions.checkArgument(!Double.isInfinite(value.get()));
      } else if (topic == LOG_LIKELIHOOD_KEY) {
        ll = value.get();
View Full Code Here

      Path path = new Path(statePath, "part-" + k);
      SequenceFile.Writer writer = new SequenceFile.Writer(fs, job, path, IntPairWritable.class, DoubleWritable.class);

      double total = 0.0; // total number of pseudo counts we made
      for (int w = 0; w < numWords; ++w) {
        Writable kw = new IntPairWritable(k, w);
        // A small amount of random noise, minimized by having a floor.
        double pseudocount = random.nextDouble() + 1.0E-8;
        total += pseudocount;
        v.set(Math.log(pseudocount));
        writer.append(kw, v);
      }
      Writable kTsk = new IntPairWritable(k, TOPIC_SUM_KEY);
      v.set(Math.log(total));
      writer.append(kTsk, v);

      writer.close();
    }
View Full Code Here

    for (int k = 0; k < state.getNumTopics(); ++k) {
      Path path = new Path(statePath, "part-" + k);
      SequenceFile.Writer writer = new SequenceFile.Writer(fs, job, path, IntPairWritable.class, DoubleWritable.class);

      for (int w = 0; w < state.getNumWords(); ++w) {
        Writable kw = new IntPairWritable(k, w);
        v.set(state.logProbWordGivenTopic(w,k) + state.getLogTotal(k));
        writer.append(kw, v);
      }
      Writable kTsk = new IntPairWritable(k, TOPIC_SUM_KEY);
      v.set(state.getLogTotal(k));
      writer.append(kTsk, v);
      writer.close();
    }
    Path path = new Path(statePath, "part-" + LOG_LIKELIHOOD_KEY);
    SequenceFile.Writer writer = new SequenceFile.Writer(fs, job, path, IntPairWritable.class, DoubleWritable.class);
    Writable kTsk = new IntPairWritable(LOG_LIKELIHOOD_KEY,LOG_LIKELIHOOD_KEY);
    v.set(state.getLogLikelihood());
    writer.append(kTsk, v);
    writer.close();
  }
View Full Code Here

    List<Queue<Pair<String,Double>>> queues = Lists.newArrayList();
    Map<Integer,Double> expSums = Maps.newHashMap();
    for (Pair<IntPairWritable,DoubleWritable> record :
         new SequenceFileDirIterable<IntPairWritable, DoubleWritable>(
             new Path(dir, "part-*"), PathType.GLOB, null, null, true, job)) {
      IntPairWritable key = record.getFirst();
      int topic = key.getFirst();
      int word = key.getSecond();
      ensureQueueSize(queues, topic);
      if (word >= 0 && topic >= 0) {
        double score = record.getSecond().get();
        if (expSums.get(topic) == null) {
          expSums.put(topic, 0.0);
View Full Code Here

TOP

Related Classes of org.apache.mahout.common.IntPairWritable$Frequency

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.