Package org.apache.mahout.math

Examples of org.apache.mahout.math.RandomAccessSparseVector$NonZeroIterator


                       Iterator<VectorWritable> it,
                       OutputCollector<IntWritable,VectorWritable> out,
                       Reporter reporter) throws IOException {
      Vector accumulator;
      if(it.hasNext()) {
        accumulator = new RandomAccessSparseVector(it.next().get());
      } else {
        return;
      }
      while(it.hasNext()) {
        Vector row = it.next().get();
View Full Code Here


        if(!(inputVector instanceof SequentialAccessSparseVector || inputVector instanceof DenseVector)) {
          inputVector = new SequentialAccessSparseVector(inputVector);
        }
        int outDim = conf.getInt(OUTPUT_VECTOR_DIMENSION, Integer.MAX_VALUE);
        outputVector = conf.getBoolean(IS_SPARSE_OUTPUT, false)
                     ? new RandomAccessSparseVector(outDim, 10)
                     : new DenseVector(outDim);
      } catch (IOException ioe) {
        throw new IllegalStateException(ioe);
      }
    }
View Full Code Here

 
  public static List<VectorWritable> getPointsWritable(double[][] raw) {
    List<VectorWritable> points = new ArrayList<VectorWritable>();
    int i = 0;
    for (double[] fr : raw) {
      Vector vec = new RandomAccessSparseVector(String.valueOf(i++), fr.length);
      vec.assign(fr);
      points.add(new VectorWritable(vec));
    }
    return points;
  }
View Full Code Here

 
  private static List<VectorWritable> getPointsWritable(double[][] raw) {
    List<VectorWritable> points = new ArrayList<VectorWritable>();
    int i = 0;
    for (double[] fr : raw) {
      Vector vec = new RandomAccessSparseVector(String.valueOf(i++), fr.length);
      vec.assign(fr);
      points.add(new VectorWritable(vec));
    }
    return points;
  }
View Full Code Here

 
  private static List<Vector> getPoints(double[][] raw) {
    List<Vector> points = new ArrayList<Vector>();
    int i = 0;
    for (double[] fr : raw) {
      Vector vec = new RandomAccessSparseVector(String.valueOf(i++), fr.length);
      vec.assign(fr);
      points.add(vec);
    }
    return points;
  }
View Full Code Here

    @Override
    public void configure(JobConf conf) {
      int outputDimension = conf.getInt(OUTPUT_VECTOR_DIMENSION, Integer.MAX_VALUE);
      outputVector = conf.getBoolean(IS_SPARSE_OUTPUT, false)
                   ? new RandomAccessSparseVector(outputDimension, 10)
                   : new DenseVector(outputDimension);
    }
View Full Code Here

 
  private static List<VectorWritable> getPoints(double[][] raw) {
    List<VectorWritable> points = new ArrayList<VectorWritable>();
    int i = 0;
    for (double[] fr : raw) {
      Vector vec = new RandomAccessSparseVector(String.valueOf(i++), fr.length);
      vec.assign(fr);
      points.add(new VectorWritable(vec));
    }
    return points;
  }
View Full Code Here

  }
 
  @Override
  public void setExpectations(String field, int numTerms, boolean storeOffsets, boolean storePositions) {
    this.field = field;
    vector = new RandomAccessSparseVector(termInfo.totalTerms(field));
    this.numTerms = numTerms;
  }
View Full Code Here

    if (values.hasNext() == false) {
      return;
    }
    StringTuple value = values.next();
   
    Vector vector = new RandomAccessSparseVector(key.toString(), dimension, value.length()); // guess at
                                                                                             // initial size
   
    if (maxNGramSize >= 2) {
      ShingleFilter sf = new ShingleFilter(new IteratorTokenStream(value.getEntries().iterator()),
          maxNGramSize);
     
      do {
        String term = ((TermAttribute) sf.getAttribute(TermAttribute.class)).term();
        if (term.length() > 0) { // ngram
          if (dictionary.containsKey(term) == false) {
            continue;
          }
          int termId = dictionary.get(term);
          vector.setQuick(termId, vector.getQuick(termId) + 1);
        }
      } while (sf.incrementToken());
     
      sf.end();
      sf.close();
    } else {
      for (String term : value.getEntries()) {
        if (term.length() > 0) { // unigram
          if (dictionary.containsKey(term) == false) {
            continue;
          }
          int termId = dictionary.get(term);
          vector.setQuick(termId, vector.getQuick(termId) + 1);
        }
      }
    }
    if (sequentialAccess) {
      vector = new SequentialAccessSparseVector(vector);
    }
    // if the vector has no nonZero entries (nothing in the dictionary), let's not waste space sending it to disk.
    if(vector.getNumNondefaultElements() > 0) {
      vectorWritable.set(vector);
      output.collect(key, vectorWritable);
    } else {
      reporter.incrCounter("TFParticalVectorReducer", "emptyVectorCount", 1);
    }
View Full Code Here

  public void reduce(WritableComparable<?> key,
                     Iterator<VectorWritable> values,
                     OutputCollector<WritableComparable<?>,VectorWritable> output,
                     Reporter reporter) throws IOException {
   
    Vector vector = new RandomAccessSparseVector(key.toString(), dimension, 10);
    while (values.hasNext()) {
      VectorWritable value = values.next();
      value.get().addTo(vector);
    }
    if (normPower != PartialVectorMerger.NO_NORMALIZING) {
      vector = vector.normalize(normPower);
    }
    if (sequentialAccess) {
      vector = new SequentialAccessSparseVector(vector);
    }
    vectorWritable.set(vector);
View Full Code Here

TOP

Related Classes of org.apache.mahout.math.RandomAccessSparseVector$NonZeroIterator

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.