Package de.lmu.ifi.dbs.elki.database.ids

Examples of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs


   * @param clusters the clusters
   * @param threshold the threshold
   * @return the bad medoids
   */
  private ModifiableDBIDs computeBadMedoids(Map<DBID, PROCLUSCluster> clusters, int threshold) {
    ModifiableDBIDs badMedoids = DBIDUtil.newHashSet();
    for(DBID m_i : clusters.keySet()) {
      PROCLUSCluster c_i = clusters.get(m_i);
      if(c_i.objectIDs.size() < threshold) {
        badMedoids.add(m_i);
      }
    }
    return badMedoids;
  }
View Full Code Here


  public List<Pair<Subspace<V>, ModifiableDBIDs>> determineClusters() {
    List<Pair<Subspace<V>, ModifiableDBIDs>> clusters = new ArrayList<Pair<Subspace<V>, ModifiableDBIDs>>();

    for(CLIQUEUnit<V> unit : getDenseUnits()) {
      if(!unit.isAssigned()) {
        ModifiableDBIDs cluster = DBIDUtil.newHashSet();
        CLIQUESubspace<V> model = new CLIQUESubspace<V>(getDimensions());
        clusters.add(new Pair<Subspace<V>, ModifiableDBIDs>(model, cluster));
        dfs(unit, cluster, model);
      }
    }
View Full Code Here

    if(partitionProgress != null) {
      partitionProgress.ensureCompleted(logger);
    }
    if(logger.isVerbose()) {
      for(Integer corrDim : partitionMap.keySet()) {
        ModifiableDBIDs list = partitionMap.get(corrDim);
        logger.verbose("Partition [corrDim = " + corrDim + "]: " + list.size() + " objects.");
      }
    }

    // convert for partition algorithm.
    // TODO: do this with DynamicDBIDs instead
View Full Code Here

   */
  public Clustering<Model> run(Database database, Relation<NumberVector<?, ?>> relation) throws UnableToComplyException {
    Clustering<Model> ret = new Clustering<Model>("LMCLUS Clustering", "lmclus-clustering");
    FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("Clustered objects", relation.size(), logger) : null;
    IndefiniteProgress cprogress = logger.isVerbose() ? new IndefiniteProgress("Clusters found", logger) : null;
    ModifiableDBIDs unclustered = DBIDUtil.newHashSet(relation.getDBIDs());

    final int maxdim = Math.min(maxLMDim, DatabaseUtil.dimensionality(relation));
    int cnum = 0;
    while(unclustered.size() > minsize) {
      DBIDs current = unclustered;
      int lmDim = 1;
      for(int k = 1; k <= maxdim; k++) {
        // Implementation note: this while loop is from the original publication
        // and the published LMCLUS source code. It doesn't make sense to me -
        // it is lacking a stop criterion other than "cluster is too small" and
        // "cluster is inseparable"! Additionally, there is good criterion for
        // stopping at the appropriate dimensionality either.
        while(true) {
          Separation separation = findSeparation(relation, current, k);
          // logger.verbose("k: " + k + " goodness: " + separation.goodness +
          // " threshold: " + separation.threshold);
          if(separation.goodness <= sensitivityThreshold) {
            break;
          }
          ModifiableDBIDs subset = DBIDUtil.newArray(current.size());
          for(DBID id : current) {
            if(deviation(relation.get(id).getColumnVector().minusEquals(separation.originV), separation.basis) < separation.threshold) {
              subset.add(id);
            }
          }
          // logger.verbose("size:"+subset.size());
          if(subset.size() < minsize) {
            break;
          }
          current = subset;
          lmDim = k;
          // System.out.println("Partition: " + subset.size());
View Full Code Here

    // Temporary storage for m.
    WritableDataStore<D> m = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, distCls);

    FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("Clustering", relation.size(), logger) : null;
    // has to be an array for monotonicity reasons!
    ModifiableDBIDs processedIDs = DBIDUtil.newArray(relation.size());

    // apply the algorithm
    for(DBID id : relation.iterDBIDs()) {
      step1(id);
      step2(id, processedIDs, distQuery, m);
      step3(id, processedIDs, m);
      step4(id, processedIDs);

      processedIDs.add(id);

      if(progress != null) {
        progress.incrementProcessed(logger);
      }
    }
View Full Code Here

    // extract the child clusters
    Map<DBID, ModifiableDBIDs> cluster_ids = new HashMap<DBID, ModifiableDBIDs>();
    Map<DBID, D> cluster_distances = new HashMap<DBID, D>();
    for(DBID id : ids) {
      DBID lastObjectInCluster = lastObjectInCluster(id, stopdist, pi, lambda);
      ModifiableDBIDs cluster = cluster_ids.get(lastObjectInCluster);
      if(cluster == null) {
        cluster = DBIDUtil.newArray();
        cluster_ids.put(lastObjectInCluster, cluster);
      }
      cluster.add(id);

      D lambda_id = lambda.get(id);
      if(stopdist != null && lambda_id.compareTo(stopdist) <= 0 && (cluster_distances.get(lastObjectInCluster) == null || lambda_id.compareTo(cluster_distances.get(lastObjectInCluster)) > 0)) {
        cluster_distances.put(lastObjectInCluster, lambda_id);
      }
View Full Code Here

    for(DBID cur : order) {
      DBID dest = pi.get(cur);
      D l = lambda.get(cur);
      // logger.debugFine("DBID " + cur.toString() + " dist: " + l.toString());
      if(stopdist != null && stopdist.compareTo(l) > 0) {
        ModifiableDBIDs curset = cids.remove(cur);
        ModifiableDBIDs destset = cids.get(dest);
        if(destset == null) {
          if(curset != null) {
            destset = curset;
          }
          else {
            destset = DBIDUtil.newHashSet();
            destset.add(cur);
          }
          destset.add(dest);
          cids.put(dest, destset);
        }
        else {
          if(curset != null) {
            destset.addDBIDs(curset);
          }
          else {
            destset.add(cur);
          }
        }
        curdist = l;
      }
      else {
        if(curdist == null || l.compareTo(curdist) > 0) {
          // New distance level reached. Post-process the current objects
          for(Entry<DBID, ModifiableDBIDs> ent : cids.entrySet()) {
            DBID key = ent.getKey();
            ModifiableDBIDs clusids = ent.getValue();
            // Make a new cluster
            String cname = "Cluster_" + key.toString() + "_" + curdist.toString();
            Cluster<Model> cluster = new Cluster<Model>(cname, clusids, ClusterModel.CLUSTER, hier);
            // Collect child clusters and clean up the cluster ids, keeping only
            // "new" objects.
            Iterator<DBID> iter = clusids.iterator();
            while(iter.hasNext()) {
              DBID child = iter.next();
              Cluster<Model> chiclus = clusters.get(child);
              if(chiclus != null) {
                hier.add(cluster, chiclus);
                clusters.remove(child);
                iter.remove();
              }
            }
            clusters.put(key, cluster);
          }
          if(logger.isDebuggingFine()) {
            StringBuffer buf = new StringBuffer();
            buf.append("Number of clusters at depth ");
            buf.append((curdist != null ? curdist.toString() : "null"));
            buf.append(": ").append(clusters.size()).append(" ");
            buf.append("last-objects:");
            for(DBID id : clusters.keySet()) {
              buf.append(" ").append(id.toString());
            }
            logger.debugFine(buf.toString());
          }
          cids.clear();
          curdist = l;
        }
        // Add the current object to the destinations cluster
        {
          ModifiableDBIDs destset = cids.get(dest);
          if(destset == null) {
            destset = DBIDUtil.newHashSet();
            cids.put(dest, destset);
            destset.add(dest);
          }
          destset.add(cur);
        }
      }
      // Decrement counter
      if(progress != null) {
        progress.incrementProcessed(logger);
View Full Code Here

      }
      return;
    }

    // try to expand the cluster
    ModifiableDBIDs currentCluster = DBIDUtil.newArray();
    for(DBID seed : seeds) {
      if(!processedIDs.contains(seed)) {
        currentCluster.add(seed);
        processedIDs.add(seed);
      }
      else if(noise.contains(seed)) {
        currentCluster.add(seed);
        noise.remove(seed);
      }
    }

    while(seeds.size() > 0) {
      DBID o = seeds.remove(seeds.size() - 1);
      ArrayModifiableDBIDs neighborhood = findSNNNeighbors(snnInstance, o);

      if(neighborhood.size() >= minpts) {
        for(DBID p : neighborhood) {
          boolean inNoise = noise.contains(p);
          boolean unclassified = !processedIDs.contains(p);
          if(inNoise || unclassified) {
            if(unclassified) {
              seeds.add(p);
            }
            currentCluster.add(p);
            processedIDs.add(p);
            if(inNoise) {
              noise.remove(p);
            }
          }
        }
      }

      if(objprog != null && clusprog != null) {
        objprog.setProcessed(processedIDs.size(), logger);
        int numClusters = currentCluster.size() > minpts ? resultList.size() + 1 : resultList.size();
        clusprog.setProcessed(numClusters, logger);
      }

      if(processedIDs.size() == snnInstance.getRelation().size() && noise.size() == 0) {
        break;
      }
    }
    if(currentCluster.size() >= minpts) {
      resultList.add(currentCluster);
    }
    else {
      for(DBID id : currentCluster) {
        noise.add(id);
View Full Code Here

      }
      return;
    }

    // try to expand the cluster
    ModifiableDBIDs currentCluster = DBIDUtil.newArray();
    for(DistanceResultPair<DoubleDistance> seed : seeds) {
      DBID nextID = seed.getDBID();

      Integer nextID_corrDim = distFunc.getIndex().getLocalProjection(nextID).getCorrelationDimension();
      // nextID is not reachable from start object
      if(nextID_corrDim > lambda) {
        continue;
      }

      if(!processedIDs.contains(nextID)) {
        currentCluster.add(nextID);
        processedIDs.add(nextID);
      }
      else if(noise.contains(nextID)) {
        currentCluster.add(nextID);
        noise.remove(nextID);
      }
    }
    seeds.remove(0);

    while(seeds.size() > 0) {
      DBID q = seeds.remove(0).getDBID();
      Integer corrDim_q = distFunc.getIndex().getLocalProjection(q).getCorrelationDimension();
      // q forms no lambda-dim hyperplane
      if(corrDim_q > lambda) {
        continue;
      }

      List<DistanceResultPair<DoubleDistance>> reachables = rangeQuery.getRangeForDBID(q, epsilon);
      if(reachables.size() > minpts) {
        for(DistanceResultPair<DoubleDistance> r : reachables) {
          Integer corrDim_r = distFunc.getIndex().getLocalProjection(r.getDBID()).getCorrelationDimension();
          // r is not reachable from q
          if(corrDim_r > lambda) {
            continue;
          }

          boolean inNoise = noise.contains(r.getDBID());
          boolean unclassified = !processedIDs.contains(r.getDBID());
          if(inNoise || unclassified) {
            if(unclassified) {
              seeds.add(r);
            }
            currentCluster.add(r.getDBID());
            processedIDs.add(r.getDBID());
            if(inNoise) {
              noise.remove(r.getDBID());
            }
            if(objprog != null && clusprog != null) {
              objprog.setProcessed(processedIDs.size(), getLogger());
              int numClusters = currentCluster.size() > minpts ? resultList.size() + 1 : resultList.size();
              clusprog.setProcessed(numClusters, getLogger());
            }
          }
        }
      }

      /* if(processedIDs.size() == relation.size() && noise.size() == 0) {
        break;
      } */
    }

    if(currentCluster.size() >= minpts) {
      resultList.add(currentCluster);
    }
    else {
      for(DBID id : currentCluster) {
        noise.add(id);
View Full Code Here

  @Override
  public OutlierResult run(Database database) throws IllegalStateException {
    Relation<O> relation = database.getRelation(getInputTypeRestriction()[0]);
    DistanceQuery<O, D> distFunc = database.getDistanceQuery(relation, getDistanceFunction());

    ModifiableDBIDs processedIDs = DBIDUtil.newHashSet(relation.size());
    ModifiableDBIDs pruned = DBIDUtil.newHashSet();
    // KNNS
    WritableDataStore<ModifiableDBIDs> knns = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, ModifiableDBIDs.class);
    // RNNS
    WritableDataStore<ModifiableDBIDs> rnns = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, ModifiableDBIDs.class);
    // density
    WritableDoubleDataStore density = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT);
    // init knns and rnns
    for(DBID id : relation.iterDBIDs()) {
      knns.put(id, DBIDUtil.newArray());
      rnns.put(id, DBIDUtil.newArray());
    }

    // TODO: use kNN preprocessor?
    KNNQuery<O, D> knnQuery = database.getKNNQuery(distFunc, k, DatabaseQuery.HINT_HEAVY_USE);

    for(DBID id : relation.iterDBIDs()) {
      // if not visited count=0
      int count = rnns.get(id).size();
      ModifiableDBIDs s;
      if(!processedIDs.contains(id)) {
        // TODO: use exactly k neighbors?
        KNNResult<D> list = knnQuery.getKNNForDBID(id, k);
        knns.get(id).addDBIDs(list.asDBIDs());
        processedIDs.add(id);
        s = knns.get(id);
        density.putDouble(id, 1 / list.get(k - 1).getDistance().doubleValue());

      }
      else {
        s = knns.get(id);
      }
      for(DBID q : s) {
        if(!processedIDs.contains(q)) {
          // TODO: use exactly k neighbors?
          KNNResult<D> listQ = knnQuery.getKNNForDBID(q, k);
          knns.get(q).addDBIDs(listQ.asDBIDs());
          density.putDouble(q, 1 / listQ.getKNNDistance().doubleValue());
          processedIDs.add(q);
        }

        if(knns.get(q).contains(id)) {
          rnns.get(q).add(id);
          rnns.get(id).add(q);
          count++;
        }
      }
      if(count >= s.size() * m) {
        pruned.add(id);
      }
    }

    // Calculate INFLO for any Object
    // IF Object is pruned INFLO=1.0
    DoubleMinMax inflominmax = new DoubleMinMax();
    WritableDoubleDataStore inflos = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    for(DBID id : relation.iterDBIDs()) {
      if(!pruned.contains(id)) {
        ModifiableDBIDs knn = knns.get(id);
        ModifiableDBIDs rnn = rnns.get(id);

        double denP = density.doubleValue(id);
        knn.addDBIDs(rnn);
        double den = 0;
        for(DBID q : knn) {
          double denQ = density.doubleValue(q);
          den = den + denQ;
        }
        den = den / rnn.size();
        den = den / denP;
        inflos.putDouble(id, den);
        // update minimum and maximum
        inflominmax.put(den);

View Full Code Here

TOP

Related Classes of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.