Package org.apache.mahout.matrix

Examples of org.apache.mahout.matrix.DenseMatrix


    return v;
  }

  private LDAState generateRandomState(int numWords, int numTopics) {
    double topicSmoothing = 50.0 / numTopics; // whatever
    Matrix m = new DenseMatrix(numTopics,numWords);
    double[] logTotals = new double[numTopics];
    for(int k = 0; k < numTopics; ++k) {
      double total = 0.0; // total number of pseudo counts we made
      for(int w = 0; w < numWords; ++w) {
        // A small amount of random noise, minimized by having a floor.
        double pseudocount = random.nextDouble() + 1.0E-10;
        total += pseudocount;
        m.setQuick(k,w,Math.log(pseudocount));
      }

      logTotals[k] = Math.log(total);
    }
View Full Code Here


    return v;
  }

  private LDAState generateRandomState(int numWords, int numTopics) {
    double topicSmoothing = 50.0 / numTopics; // whatever
    Matrix m = new DenseMatrix(numTopics, numWords);
    double[] logTotals = new double[numTopics];

    for (int k = 0; k < numTopics; ++k) {
      double total = 0.0; // total number of pseudo counts we made
      for (int w = 0; w < numWords; ++w) {
        // A small amount of random noise, minimized by having a floor.
        double pseudocount = random.nextDouble() + 1.0E-10;
        total += pseudocount;
        m.setQuick(k, w, Math.log(pseudocount));
      }

      logTotals[k] = Math.log(total);
    }
View Full Code Here

    // initialize variational approximation to p(z|doc)
    Vector gamma = new DenseVector(state.numTopics);
    gamma.assign(state.topicSmoothing + docTotal / state.numTopics);
    Vector nextGamma = new DenseVector(state.numTopics);

    DenseMatrix phi = new DenseMatrix(state.numTopics, docLength);

    // digamma is expensive, precompute
    Vector digammaGamma = digamma(gamma);
    // and log normalize:
    double digammaSumGamma = digamma(gamma.zSum());
    digammaGamma = digammaGamma.plus(-digammaSumGamma);

    Map<Integer, Integer> columnMap = new HashMap<Integer, Integer>();

    int iteration = 0;
    final int MAX_ITER = 20;

    boolean converged = false;
    double oldLL = 1;
    while (!converged && iteration < MAX_ITER) {
      nextGamma.assign(state.topicSmoothing); // nG := alpha, for all topics

      int mapping = 0;
      for (Iterator<Vector.Element> iter = wordCounts.iterateNonZero();
          iter.hasNext();) {
      Vector.Element e = iter.next();
        int word = e.index();
        Vector phiW = eStepForWord(word, digammaGamma);
        phi.assignColumn(mapping, phiW);
        if (iteration == 0) { // first iteration
          columnMap.put(word, mapping);
        }

        for (int k = 0; k < nextGamma.size(); ++k) {
View Full Code Here

    double topicSmoothing = Double.parseDouble(job.get(TOPIC_SMOOTHING_KEY));

    Path dir = new Path(statePath);
    FileSystem fs = dir.getFileSystem(job);

    DenseMatrix pWgT = new DenseMatrix(numTopics, numWords);
    double[] logTotals = new double[numTopics];
    double ll = 0.0;

    IntPairWritable key = new IntPairWritable();
    DoubleWritable value = new DoubleWritable();
    for (FileStatus status : fs.globStatus(new Path(dir, "part-*"))) {
      Path path = status.getPath();
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, job);
      while (reader.next(key, value)) {
        int topic = key.getX();
        int word = key.getY();
        if (word == TOPIC_SUM_KEY) {
          logTotals[topic] = value.get();
          if (Double.isInfinite(value.get())) {
            throw new IllegalArgumentException();
          }
        } else if (topic == LOG_LIKELIHOOD_KEY) {
          ll = value.get();
        } else {
          //System.out.println(topic + " " + word);
          if (!(topic >= 0 && word >= 0)) {
            throw new IllegalArgumentException(topic + " " + word);
          }
          if (pWgT.getQuick(topic, word) != 0.0) {
            throw new IllegalArgumentException();
          }
          pWgT.setQuick(topic, word, value.get());
          if (Double.isInfinite(pWgT.getQuick(topic, word))) {
            throw new IllegalArgumentException();
          }
        }
      }
      reader.close();
View Full Code Here

TOP

Related Classes of org.apache.mahout.matrix.DenseMatrix

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.