Package org.apache.mahout.math

Examples of org.apache.mahout.math.Vector


  public void reduce(Text key,
                     Iterator<VectorWritable> values,
                     OutputCollector<Text,Canopy> output,
                     Reporter reporter) throws IOException {
    while (values.hasNext()) {
      Vector point = values.next().get();
      canopyClusterer.addPointToCanopies(point, canopies, reporter);
    }
    for (Canopy canopy : canopies) {
      output.collect(new Text(canopy.getIdentifier()), canopy);
    }
View Full Code Here


  }
 
  @Override
  public void close() throws IOException {
    for (Canopy canopy : canopies) {
      Vector centroid = canopy.computeCentroid();
      VectorWritable vw = new VectorWritable();
      vw.set(centroid);
      outputCollector.collect(new Text("centroid"), vw);
    }
    super.close();
View Full Code Here

  public void reduce(WritableComparable<?> key,
                     Iterator<VectorWritable> values,
                     OutputCollector<WritableComparable<?>,VectorWritable> output,
                     Reporter reporter) throws IOException {
   
    Vector vector = new RandomAccessSparseVector(key.toString(), dimension, 10);
    while (values.hasNext()) {
      VectorWritable value = values.next();
      value.get().addTo(vector);
    }
    if (normPower != PartialVectorMerger.NO_NORMALIZING) {
      vector = vector.normalize(normPower);
    }
    if (sequentialAccess) {
      vector = new SequentialAccessSparseVector(vector);
    }
    vectorWritable.set(vector);
View Full Code Here

     * is empty.
     */
    int nextCanopyId = 0;
    while (!points.isEmpty()) {
      Iterator<Vector> ptIter = points.iterator();
      Vector p1 = ptIter.next();
      ptIter.remove();
      Canopy canopy = new Canopy(p1, nextCanopyId++);
      canopies.add(canopy);
      while (ptIter.hasNext()) {
        Vector p2 = ptIter.next();
        double dist = measure.distance(p1, p2);
        // Put all points that are within distance threshold T1 into the canopy
        if (dist < t1) {
          canopy.addPoint(p2);
        }
View Full Code Here

    int beginIndex = formattedString.indexOf('{');
    String id = formattedString.substring(0, beginIndex);
    String centroid = formattedString.substring(beginIndex);
    if (id.charAt(0) == 'C') {
      int canopyId = Integer.parseInt(formattedString.substring(1, beginIndex - 2));
      Vector canopyCentroid = AbstractVector.decodeVector(centroid);
      return new Canopy(canopyCentroid, canopyId);
    }
    return null;
  }
View Full Code Here

  @Override
  public void paint(Graphics g) {
    super.plotSampleData(g);
    Graphics2D g2 = (Graphics2D) g;
   
    Vector dv = new DenseVector(2);
    int i = DisplayDirichlet.result.size() - 1;
    for (Model<VectorWritable>[] models : result) {
      g2.setStroke(new BasicStroke(i == 0 ? 3 : 1));
      g2.setColor(colors[Math.min(DisplayDirichlet.colors.length - 1, i--)]);
      for (Model<VectorWritable> m : models) {
        AsymmetricSampledNormalModel mm = (AsymmetricSampledNormalModel) m;
        dv.set(0, mm.getStdDev().get(0) * 3);
        dv.set(1, mm.getStdDev().get(1) * 3);
        if (DisplayDirichlet.isSignificant(mm)) {
          DisplayDirichlet.plotEllipse(g2, mm.getMean(), dv);
        }
      }
    }
View Full Code Here

                    int desiredRank,
                    Matrix eigenVectors,
                    List<Double> eigenValues,
                    boolean isSymmetric) {
    log.info("Finding {} singular vectors of matrix with {} rows, via Lanczos", desiredRank, corpus.numRows());
    Vector currentVector = getInitialVector(corpus);
    Vector previousVector = new DenseVector(currentVector.size());
    Matrix basis = new SparseRowMatrix(new int[]{desiredRank, corpus.numCols()});
    basis.assignRow(0, currentVector);
    double alpha = 0;
    double beta = 0;
    DoubleMatrix2D triDiag = new DenseDoubleMatrix2D(desiredRank, desiredRank);
    for (int i = 1; i < desiredRank; i++) {
      startTime(TimingSection.ITERATE);
      Vector nextVector = isSymmetric ? corpus.times(currentVector) : corpus.timesSquared(currentVector);
      log.info("{} passes through the corpus so far...", i);
      calculateScaleFactor(nextVector);
      nextVector.assign(new Scale(1 / scaleFactor));
      nextVector.assign(previousVector, new PlusMult(-beta));
      // now orthogonalize
      alpha = currentVector.dot(nextVector);
      nextVector.assign(currentVector, new PlusMult(-alpha));
      endTime(TimingSection.ITERATE);
      startTime(TimingSection.ORTHOGANLIZE);
      orthoganalizeAgainstAllButLast(nextVector, basis);
      endTime(TimingSection.ORTHOGANLIZE);
      // and normalize
      beta = nextVector.norm(2);
      if (outOfRange(beta) || outOfRange(alpha)) {
        log.warn("Lanczos parameters out of range: alpha = {}, beta = {}.  Bailing out early!", alpha, beta);
        break;
      }
      final double b = beta;
      nextVector.assign(new Scale(1 / b));
      basis.assignRow(i, nextVector);
      previousVector = currentVector;
      currentVector = nextVector;
      // save the projections and norms!
      triDiag.set(i - 1, i - 1, alpha);
      if (i < desiredRank - 1) {
        triDiag.set(i - 1, i, beta);
        triDiag.set(i, i - 1, beta);
      }
    }
    startTime(TimingSection.TRIDIAG_DECOMP);

    log.info("Lanczos iteration complete - now to diagonalize the tri-diagonal auxiliary matrix.");
    // at this point, have tridiag all filled out, and basis is all filled out, and orthonormalized
    EigenvalueDecomposition decomp = new EigenvalueDecomposition(triDiag);

    DoubleMatrix2D eigenVects = decomp.getV();
    DoubleMatrix1D eigenVals = decomp.getRealEigenvalues();
    endTime(TimingSection.TRIDIAG_DECOMP);
    startTime(TimingSection.FINAL_EIGEN_CREATE);

    for (int i = 0; i < basis.numRows() - 1; i++) {
      Vector realEigen = new DenseVector(corpus.numCols());
      // the eigenvectors live as columns of V, in reverse order.  Weird but true.
      DoubleMatrix1D ejCol = eigenVects.viewColumn(basis.numRows() - i - 1);
      for (int j = 0; j < ejCol.size(); j++) {
        double d = ejCol.getQuick(j);
        realEigen.assign(basis.getRow(j), new PlusMult(d));
      }
      realEigen = realEigen.normalize();
      eigenVectors.assignRow(i, realEigen);
      log.info("Eigenvector {} found with eigenvalue {}", i, eigenVals.get(i));
      eigenValues.add(eigenVals.get(i));
    }
    log.info("LanczosSolver finished.");
View Full Code Here

   *          int number of words in the vocabulary
   * @param numWords
   *          E[count] for each word
   */
  private Vector generateRandomDoc(int numWords, double sparsity) throws MathException {
    Vector v = new DenseVector(numWords);
    PoissonDistribution dist = new PoissonDistributionImpl(sparsity);
    for (int i = 0; i < numWords; i++) {
      // random integer
      v.setQuick(i, dist.inverseCumulativeProbability(random.nextDouble()) + 1);
    }
    return v;
  }
View Full Code Here

 
  private void runTest(int numWords, double sparsity, int numTests) throws MathException {
    LDAState state = generateRandomState(numWords, NUM_TOPICS);
    LDAInference lda = new LDAInference(state);
    for (int t = 0; t < numTests; ++t) {
      Vector v = generateRandomDoc(numWords, sparsity);
      LDAInference.InferredDocument doc = lda.infer(v);
     
      assertEquals("wordCounts", doc.getWordCounts(), v);
      assertNotNull("gamma", doc.getGamma());
      for (Iterator<Vector.Element> iter = v.iterateNonZero(); iter.hasNext();) {
        int w = iter.next().index();
        for (int k = 0; k < NUM_TOPICS; ++k) {
          double logProb = doc.phi(k, w);
          assertTrue(k + " " + w + " logProb " + logProb, logProb <= 0.0);
        }
View Full Code Here

      nextVector.assign(basis.getRow(i), new PlusMult(-alpha));
    }
  }

  protected Vector getInitialVector(VectorIterable corpus) {
    Vector v = null;
    for (MatrixSlice slice : corpus) {
      Vector vector;
      if (slice == null || (vector = slice.vector()) == null || vector.getLengthSquared() == 0) {
        continue;
      }
      scaleFactor += vector.getLengthSquared();
      if (v == null) {
        v = new DenseVector(vector.size()).plus(vector);
      } else {
        v.assign(vector, plus);
      }
    }
    v.assign(div(v.norm(2)));
View Full Code Here

TOP

Related Classes of org.apache.mahout.math.Vector

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.