Package cc.mallet.cluster

Examples of cc.mallet.cluster.Clustering


 
  public void testEvaluators ()
  {
    InstanceList instances = new InstanceList(new Randoms(1), 100, 2).subList(0,12);
    System.err.println(instances.size() + " instances");
    Clustering truth = generateTruth(instances);
    System.err.println("truth=" + truth);

    Clustering[] predicted = generatePredicted(instances);
    ClusteringEvaluator pweval = new PairF1Evaluator();
    ClusteringEvaluator bceval = new BCubedEvaluator();
View Full Code Here


   
    public Instance pipe (Instance carrier) {
      boolean mergeFirst = false;
     
      AgglomerativeNeighbor neighbor = (AgglomerativeNeighbor)carrier.getData();
      Clustering original = neighbor.getOriginal();
      InstanceList list = original.getInstances();     
      int[] mergedIndices = neighbor.getNewCluster();
      boolean match = true;
      for (int i = 0; i < mergedIndices.length; i++) {
        for (int j = i + 1; j < mergedIndices.length; j++) {
          if ((original.getLabel(mergedIndices[i]) !=
               original.getLabel(mergedIndices[j])) || mergeFirst) {
            FeatureVector fvi = (FeatureVector)list.get(mergedIndices[i]).getData();
            FeatureVector fvj = (FeatureVector)list.get(mergedIndices[j]).getData();
            if (!(fvi.contains("feature0") && fvj.contains("feature0"))) {
              match = false;
              break;             
            }
          }
        }
      }

      PropertyList pl = null;
      if (match)
        pl = PropertyList.add("Match", 1.0, pl);
      else
        pl = PropertyList.add("NoMatch", 1.0, pl);
     
      FeatureVector fv = new FeatureVector ((Alphabet)getDataAlphabet(),
                                            pl, true);
      carrier.setData(fv);

      boolean positive = true;
      for (int i = 0; i < mergedIndices.length; i++) {
        for (int j = i + 1; j < mergedIndices.length; j++) {
          if (original.getLabel(mergedIndices[i]) != original.getLabel(mergedIndices[j])) {
            positive = false;
            break;
          }
        }
      }
View Full Code Here

    super (name);
  }

  private Clustering generateClustering (InstanceList instances) {
    int[] labels = new int[]{0,0,0,1,1,1,2,2,2,2};
    return new Clustering(instances, 3, labels);
  }
View Full Code Here

  public void testEvaluators ()
  {
    Randoms random = new Randoms(1);
    InstanceList instances = new InstanceList(random, 100, 2).subList(0,10);
    System.err.println(instances.size() + " instances");
    Clustering clustering = generateClustering(instances);
    System.err.println("clustering=" + clustering);

    System.err.println("ClusterSampleIterator");
    NeighborIterator iter = new ClusterSampleIterator(clustering,
                                                      random,
View Full Code Here

    logger.info("number clusterings=" + clusterings.size());

    // Prune clusters based on size.
    if (minClusterSize.value > 1) {
      for (int i = 0; i < clusterings.size(); i++) {
        Clustering clustering = clusterings.get(i);
        InstanceList oldInstances = clustering.getInstances();
        Alphabet alph = oldInstances.getDataAlphabet();
        LabelAlphabet lalph = (LabelAlphabet) oldInstances.getTargetAlphabet();
        if (alph == null) alph = new Alphabet();
        if (lalph == null) lalph = new LabelAlphabet();
        Pipe noop = new Noop(alph, lalph);
        InstanceList newInstances = new InstanceList(noop);
        for (int j = 0; j < oldInstances.size(); j++) {
          int label = clustering.getLabel(j);
          Instance instance = oldInstances.get(j);
          if (clustering.size(label) >= minClusterSize.value)
            newInstances.add(noop.pipe(new Instance(instance.getData(), lalph.lookupLabel(new Integer(label)), instance.getName(), instance.getSource())));
        }
        clusterings.set(i, createSmallerClustering(newInstances));
      }
      if (outputPrefixFile.value != null) {
        try {
          ObjectOutputStream oos =
            new ObjectOutputStream(new FileOutputStream(outputPrefixFile.value));
          oos.writeObject(clusterings);
          oos.close();
        } catch (Exception e) {
          logger.warning("Exception writing clustering to file " + outputPrefixFile.value                        + " " + e);
          e.printStackTrace();
        }
      }
    }
   
   
    // Split into training/testing
    if (trainingProportion.value > 0) {
      if (clusterings.size() > 1)
        throw new IllegalArgumentException("Expect one clustering to do train/test split, not " + clusterings.size());
      Clustering clustering = clusterings.get(0);
      int targetTrainSize = (int)(trainingProportion.value * clustering.getNumInstances());
      TIntHashSet clustersSampled = new TIntHashSet();
      Randoms random = new Randoms(123);
      LabelAlphabet lalph = new LabelAlphabet();
      InstanceList trainingInstances = new InstanceList(new Noop(null, lalph));
      while (trainingInstances.size() < targetTrainSize) {
        int cluster = random.nextInt(clustering.getNumClusters());
        if (!clustersSampled.contains(cluster)) {
          clustersSampled.add(cluster);
          InstanceList instances = clustering.getCluster(cluster);
          for (int i = 0; i < instances.size(); i++) {
            Instance inst = instances.get(i);
            trainingInstances.add(new Instance(inst.getData(), lalph.lookupLabel(new Integer(cluster)), inst.getName(), inst.getSource()));
          }
        }
      }
      trainingInstances.shuffle(random);
      Clustering trainingClustering = createSmallerClustering(trainingInstances);
     
      InstanceList testingInstances = new InstanceList(null, lalph);
      for (int i = 0; i < clustering.getNumClusters(); i++) {
        if (!clustersSampled.contains(i)) {
          InstanceList instances = clustering.getCluster(i);
          for (int j = 0; j < instances.size(); j++) {
            Instance inst = instances.get(j);
            testingInstances.add(new Instance(inst.getData(), lalph.lookupLabel(new Integer(i)), inst.getName(), inst.getSource()));
          }         
        }
      }
      testingInstances.shuffle(random);
      Clustering testingClustering = createSmallerClustering(testingInstances);
      logger.info(outputPrefixFile.value + ".train : " + trainingClustering.getNumClusters() + " objects");
      logger.info(outputPrefixFile.value + ".test : " + testingClustering.getNumClusters() + " objects");
      if (outputPrefixFile.value != null) {
        try {
          ObjectOutputStream oos =
            new ObjectOutputStream(new FileOutputStream(new File(outputPrefixFile.value + ".train")));
          oos.writeObject(new Clusterings(new Clustering[]{trainingClustering}));
View Full Code Here

     
    }
  }

  private static Clustering createSmallerClustering (InstanceList instances) {
    Clustering c = ClusterUtils.createSingletonClustering(instances);
    return ClusterUtils.mergeInstancesWithSameLabel(c);
  }
View Full Code Here

      e.printStackTrace();
    }

    if (printOption.value) {
      for (int i = 0; i < clusterings.size(); i++) {
        Clustering c = clusterings.get(i);
        for (int j = 0; j < c.getNumClusters(); j++) {
          InstanceList cluster = c.getCluster(j);
          for (int k = 0; k < cluster.size(); k++) {
            System.out.println("clustering " + i + " cluster " + j + " element " + k + " " + cluster.get(k).getData());
          }
          System.out.println();
        }
      }
    }
    logger.info("number clusterings=" + clusterings.size());

    int totalInstances = 0;
    int totalClusters = 0;

    for (int i = 0; i < clusterings.size(); i++) {
      Clustering c = clusterings.get(i);
      totalClusters += c.getNumClusters();
      totalInstances += c.getNumInstances();
    }
    logger.info("total instances=" + totalInstances);
    logger.info("total clusters=" + totalClusters);
    logger.info("instances per clustering=" + (double) totalInstances
                / clusterings.size());
View Full Code Here

                        new Integer(label), record.toString(),
                        record.toString()));
        }
      }
      clusterings[i] =
          new Clustering(instances, subdirs.length, labels.toNativeArray());
    }

    logger.info("\nread " + fi + " objects in " + clusterings.length + " clusterings.");
    try {
      ObjectOutputStream oos =
View Full Code Here

      evaluator = new ClusteringEvaluators(
          new ClusteringEvaluator[] { new BCubedEvaluator(),
              new PairF1Evaluator(), new MUCEvaluator(), new AccuracyEvaluator() });
    ArrayList<Clustering> predictions = new ArrayList<Clustering>();
    for (int i = 0; i < testing.size(); i++) {
      Clustering clustering = testing.get(i);
      Clustering predicted = clusterer.cluster(clustering.getInstances());
      predictions.add(predicted);
      logger.info(evaluator.evaluate(clustering, predicted));
    }
    logger.info(evaluator.evaluateTotals());
   
View Full Code Here

    }

    public Instance pipe(Instance carrier) {
      AgglomerativeNeighbor neighbor = (AgglomerativeNeighbor) carrier
          .getData();
      Clustering original = neighbor.getOriginal();
      int[] cluster1 = neighbor.getOldClusters()[0];
      int[] cluster2 = neighbor.getOldClusters()[1];
      InstanceList list = original.getInstances();
      int[] mergedIndices = neighbor.getNewCluster();
      Record[] records = array2Records(mergedIndices, list);
      Alphabet fieldAlph = records[0].fieldAlphabet();
      Alphabet valueAlph = records[0].valueAlphabet();

      PropertyList features = null;
      features = addExactMatch(records, fieldAlph, valueAlph, features);
      features = addApproxMatch(records, fieldAlph, valueAlph, features);
      features = addSubstringMatch(records, fieldAlph, valueAlph, features);
      carrier
          .setData(new FeatureVector(getDataAlphabet(), features,
              true));

      LabelAlphabet ldict = (LabelAlphabet) getTargetAlphabet();
      String label = (original.getLabel(cluster1[0]) == original
          .getLabel(cluster2[0])) ? "YES" : "NO";
      carrier.setTarget(ldict.lookupLabel(label));     
      return carrier;
    }
View Full Code Here

TOP

Related Classes of cc.mallet.cluster.Clustering

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.