Package de.lmu.ifi.dbs.elki.math

Examples of de.lmu.ifi.dbs.elki.math.MeanVariance


    // Euclidean
    {
      KNNJoin<DoubleVector, DoubleDistance, ?, ?> knnjoin = new KNNJoin<DoubleVector, DoubleDistance, RStarTreeNode, SpatialEntry>(EuclideanDistanceFunction.STATIC, 2);
      DataStore<KNNList<DoubleDistance>> result = knnjoin.run(db);

      MeanVariance meansize = new MeanVariance();
      for(DBID id : relation.getDBIDs()) {
        KNNList<DoubleDistance> knnlist = result.get(id);
        meansize.put(knnlist.size());
      }
      org.junit.Assert.assertEquals("Euclidean mean 2NN", mean2nnEuclid, meansize.getMean(), 0.00001);
      org.junit.Assert.assertEquals("Euclidean variance 2NN", var2nnEuclid, meansize.getSampleVariance(), 0.00001);
    }
    // Manhattan
    {
      KNNJoin<DoubleVector, DoubleDistance, ?, ?> knnjoin = new KNNJoin<DoubleVector, DoubleDistance, RStarTreeNode, SpatialEntry>(ManhattanDistanceFunction.STATIC, 2);
      DataStore<KNNList<DoubleDistance>> result = knnjoin.run(db);

      MeanVariance meansize = new MeanVariance();
      for(DBID id : relation.getDBIDs()) {
        KNNList<DoubleDistance> knnlist = result.get(id);
        meansize.put(knnlist.size());
      }
      org.junit.Assert.assertEquals("Manhattan mean 2NN", mean2nnManhattan, meansize.getMean(), 0.00001);
      org.junit.Assert.assertEquals("Manhattan variance 2NN", var2nnManhattan, meansize.getSampleVariance(), 0.00001);
    }
  }
View Full Code Here


      inter = meanv[1] - slope * meanv[0];
    }

    // calculate mean and variance for error
    WritableDataStore<Double> scores = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, Double.class);
    MeanVariance mv = new MeanVariance();
    for(DBID id : relation.iterDBIDs()) {
      // Compute the error from the linear regression
      double y_i = relation.get(id).doubleValue(1);
      double e = means.get(id) - (slope * y_i + inter);
      scores.put(id, e);
      mv.put(e);
    }

    // Normalize scores
    DoubleMinMax minmax = new DoubleMinMax();
    {
      final double mean = mv.getMean();
      final double variance = mv.getNaiveStddev();
      for(DBID id : relation.iterDBIDs()) {
        double score = Math.abs((scores.get(id) - mean) / variance);
        minmax.put(score);
        scores.put(id, score);
      }
View Full Code Here

   */
  public OutlierResult run(Relation<N> nrel, Relation<? extends NumberVector<?, ?>> relation) {
    final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(nrel);

    // Compute the global mean and variance
    MeanVariance globalmv = new MeanVariance();
    for(DBID id : relation.iterDBIDs()) {
      globalmv.put(relation.get(id).doubleValue(1));
    }

    DoubleMinMax minmax = new DoubleMinMax();
    WritableDataStore<Double> scores = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, Double.class);

    // calculate normalized attribute values
    // calculate neighborhood average of normalized attribute values.
    for(DBID id : relation.iterDBIDs()) {
      // Compute global z score
      final double globalZ = (relation.get(id).doubleValue(1) - globalmv.getMean()) / globalmv.getNaiveStddev();
      // Compute local average z score
      Mean localm = new Mean();
      for(DBID n : npred.getNeighborDBIDs(id)) {
        if(id.equals(n)) {
          continue;
        }
        localm.put((relation.get(n).doubleValue(1) - globalmv.getMean()) / globalmv.getNaiveStddev());
      }
      // if neighors.size == 0
      final double localZ;
      if(localm.getCount() > 0) {
        localZ = localm.getMean();
View Full Code Here

  }

  @Override
  public void prepare(OutlierResult or) {
    if(fixedmean == null) {
      MeanVariance mv = new MeanVariance();
      for(DBID id : or.getScores().iterDBIDs()) {
        double val = or.getScores().get(id);
        if(!Double.isNaN(val) && !Double.isInfinite(val)) {
          mv.put(val);
        }
      }
      mean = mv.getMean();
      factor = lambda * mv.getSampleStddev() * MathUtil.SQRT2;
      if (factor == 0.0) {
        factor = Double.MIN_NORMAL;
      }
    }
    else {
View Full Code Here

        }
      }
      min = mm.getMin();
    }
    if(mean == null) {
      MeanVariance mv = new MeanVariance();
      for(DBID id : or.getScores().iterDBIDs()) {
        double val = or.getScores().get(id);
        val = (val <= min) ? 0 : Math.sqrt(val - min);
        mv.put(val);
      }
      mean = mv.getMean();
      factor = lambda * mv.getSampleStddev() * MathUtil.SQRT2;
    }
    else {
      double sqsum = 0;
      int cnt = 0;
      for(DBID id : or.getScores().iterDBIDs()) {
View Full Code Here

    layer.appendChild(itag);
  }

  @Reference(authors = "D. W. Scott", title = "Multivariate density estimation", booktitle = "Multivariate Density Estimation: Theory, Practice, and Visualization", url = "http://dx.doi.org/10.1002/9780470316849.fmatter")
  private double[] initializeBandwidth(double[][] data) {
    MeanVariance mv0 = new MeanVariance();
    MeanVariance mv1 = new MeanVariance();
    // For Kernel bandwidth.
    for(double[] projected : data) {
      mv0.put(projected[0]);
      mv1.put(projected[1]);
    }
    // Set bandwidths according to Scott's rule:
    // Note: in projected space, d=2.
    double[] bandwidth = new double[2];
    bandwidth[0] = MathUtil.SQRT5 * mv0.getSampleStddev() * Math.pow(rel.size(), -1 / 6.);
    bandwidth[1] = MathUtil.SQRT5 * mv1.getSampleStddev() * Math.pow(rel.size(), -1 / 6.);
    return bandwidth;
  }
View Full Code Here

   */
  public OutlierResult run(Database database, Relation<N> nrel, Relation<? extends NumberVector<?, ?>> relation) {
    final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(nrel);
    WritableDataStore<Double> scores = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, Double.class);

    MeanVariance zmv = new MeanVariance();
    for(DBID id : relation.iterDBIDs()) {
      DBIDs neighbors = npred.getNeighborDBIDs(id);
      // Compute Mean of neighborhood
      Mean localmean = new Mean();
      for(DBID n : neighbors) {
        if(id.equals(n)) {
          continue;
        }
        else {
          localmean.put(relation.get(n).doubleValue(1));
        }
      }
      final double localdiff;
      if(localmean.getCount() > 0) {
        localdiff = relation.get(id).doubleValue(1) - localmean.getMean();
      }
      else {
        localdiff = 0.0;
      }
      scores.put(id, localdiff);
      zmv.put(localdiff);
    }

    // Normalize scores using mean and variance
    DoubleMinMax minmax = new DoubleMinMax();
    for(DBID id : relation.iterDBIDs()) {
      double score = Math.abs(scores.get(id) - zmv.getMean()) / zmv.getSampleStddev();
      minmax.put(score);
      scores.put(id, score);
    }

    // Wrap result
View Full Code Here

    ypos = addBarChart(svgp, parent, ypos, "Precision", 1, edit.editDistanceFirst());
    ypos = addBarChart(svgp, parent, ypos, "Recall", 1, edit.editDistanceSecond());

    ypos = addHeader(svgp, parent, ypos, "Gini measures");

    final MeanVariance gini = cont.averageSymmetricGini();
    ypos = addBarChart(svgp, parent, ypos, "Mean +-" + FormatUtil.format(gini.getSampleStddev(), FormatUtil.NF4), 1, gini.getMean());

    // scale vis
    double cols = 10; // Math.max(10, (int) (i * task.getHeight() /
    // task.getWidth()));
    double rows = ypos;
View Full Code Here

    DistanceQuery<O, D> distanceQuery = relation.getDatabase().getDistanceQuery(relation, distanceFunction);

    MetricalIndexTree<O, D, N, E> index = getMetricalIndex(relation);

    storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, List.class);
    MeanVariance pagesize = new MeanVariance();
    MeanVariance ksize = new MeanVariance();
    if(getLogger().isVerbose()) {
      getLogger().verbose("Approximating nearest neighbor lists to database objects");
    }

    List<E> leaves = index.getLeaves();
    FiniteProgress progress = getLogger().isVerbose() ? new FiniteProgress("Processing leaf nodes.", leaves.size(), getLogger()) : null;
    for(E leaf : leaves) {
      N node = index.getNode(leaf);
      int size = node.getNumEntries();
      pagesize.put(size);
      if(getLogger().isDebuggingFinest()) {
        getLogger().debugFinest("NumEntires = " + size);
      }
      // Collect the ids in this node.
      DBID[] ids = new DBID[size];
      for(int i = 0; i < size; i++) {
        ids[i] = ((LeafEntry) node.getEntry(i)).getDBID();
      }
      HashMap<DBIDPair, D> cache = new HashMap<DBIDPair, D>(size * size * 3 / 8);
      for(DBID id : ids) {
        KNNHeap<D> kNN = new KNNHeap<D>(k, distanceQuery.infiniteDistance());
        for(DBID id2 : ids) {
          DBIDPair key = DBIDUtil.newPair(id, id2);
          D d = cache.remove(key);
          if(d != null) {
            // consume the previous result.
            kNN.add(d, id2);
          }
          else {
            // compute new and store the previous result.
            d = distanceQuery.distance(id, id2);
            kNN.add(d, id2);
            // put it into the cache, but with the keys reversed
            key = DBIDUtil.newPair(id2, id);
            cache.put(key, d);
          }
        }
        ksize.put(kNN.size());
        storage.put(id, kNN.toSortedArrayList());
      }
      if(getLogger().isDebugging()) {
        if(cache.size() > 0) {
          getLogger().warning("Cache should be empty after each run, but still has " + cache.size() + " elements.");
        }
      }
      if(progress != null) {
        progress.incrementProcessed(getLogger());
      }
    }
    if(progress != null) {
      progress.ensureCompleted(getLogger());
    }
    if(getLogger().isVerbose()) {
      getLogger().verbose("Average page size = " + pagesize.getMean() + " +- " + pagesize.getSampleStddev());
      getLogger().verbose("On average, " + ksize.getMean() + " +- " + ksize.getSampleStddev() + " neighbors returned.");
    }
  }
View Full Code Here

  @Override
  protected void preprocess() {
    DistanceQuery<O, D> distanceQuery = relation.getDatabase().getDistanceQuery(relation, distanceFunction);
    storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, List.class);
    MeanVariance ksize = new MeanVariance();
    if(logger.isVerbose()) {
      logger.verbose("Approximating nearest neighbor lists to database objects");
    }

    ArrayDBIDs aids = DBIDUtil.ensureArray(relation.getDBIDs());
    int minsize = (int) Math.floor(aids.size() / partitions);

    FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("Processing partitions.", partitions, logger) : null;
    for(int part = 0; part < partitions; part++) {
      int size = (partitions * minsize + part >= aids.size()) ? minsize : minsize + 1;
      // Collect the ids in this node.
      ArrayModifiableDBIDs ids = DBIDUtil.newArray(size);
      for(int i = 0; i < size; i++) {
        assert (size * partitions + part < aids.size());
        ids.add(aids.get(i * partitions + part));
      }
      HashMap<DBIDPair, D> cache = new HashMap<DBIDPair, D>(size * size * 3 / 8);
      for(DBID id : ids) {
        KNNHeap<D> kNN = new KNNHeap<D>(k, distanceQuery.infiniteDistance());
        for(DBID id2 : ids) {
          DBIDPair key = DBIDUtil.newPair(id, id2);
          D d = cache.remove(key);
          if(d != null) {
            // consume the previous result.
            kNN.add(d, id2);
          }
          else {
            // compute new and store the previous result.
            d = distanceQuery.distance(id, id2);
            kNN.add(d, id2);
            // put it into the cache, but with the keys reversed
            key = DBIDUtil.newPair(id2, id);
            cache.put(key, d);
          }
        }
        ksize.put(kNN.size());
        storage.put(id, kNN.toSortedArrayList());
      }
      if(logger.isDebugging()) {
        if(cache.size() > 0) {
          logger.warning("Cache should be empty after each run, but still has " + cache.size() + " elements.");
        }
      }
      if(progress != null) {
        progress.incrementProcessed(logger);
      }
    }
    if(progress != null) {
      progress.ensureCompleted(logger);
    }
    if(logger.isVerbose()) {
      logger.verbose("On average, " + ksize.getMean() + " +- " + ksize.getSampleStddev() + " neighbors returned.");
    }
  }
View Full Code Here

TOP

Related Classes of de.lmu.ifi.dbs.elki.math.MeanVariance

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.