Package cc.mallet.cluster

Examples of cc.mallet.cluster.Clusterer


    InfoGain ig = new InfoGain(trainList);
    ig.print();

//     Clusterer clusterer = new GreedyAgglomerative(training.getInstances().getPipe(),
//                                                   eval, 0.5);
    Clusterer clusterer = new GreedyAgglomerativeByDensity(training.getInstances().getPipe(),
                                                           eval, 0.5, false,
                                                           new java.util.Random(1));

    // TEST
    Clustering testing = sampleClustering(alphabet);   
    InstanceList testList = testing.getInstances();
    Clustering predictedClusters = clusterer.cluster(testList);     

    // EVALUATE
    System.err.println("\n\nEvaluating System: " + clusterer);
    ClusteringEvaluators evaluators = new ClusteringEvaluators(new ClusteringEvaluator[]{
        new BCubedEvaluator(),
View Full Code Here


    CommandOption.process(Clusterings2Clusterer.class, args);

    // TRAIN

    Randoms random = new Randoms(123);
    Clusterer clusterer = null;
    if (!loadClusterer.value.exists()) {
      Clusterings training = readClusterings(trainingFile.value);

      Alphabet fieldAlphabet = ((Record) training.get(0).getInstances()
          .get(0).getData()).fieldAlphabet();

      Pipe pipe = new ClusteringPipe(string2ints(exactMatchFields.value, fieldAlphabet),
                                 string2ints(approxMatchFields.value, fieldAlphabet),
                                 string2ints(substringMatchFields.value, fieldAlphabet));

      InstanceList trainingInstances = new InstanceList(pipe);
      for (int i = 0; i < training.size(); i++) {
        PairSampleIterator iterator = new PairSampleIterator(training
            .get(i), random, 0.5, training.get(i).getNumInstances());
        while(iterator.hasNext()) {
          Instance inst = iterator.next();
          trainingInstances.add(pipe.pipe(inst));
        }
      }
      logger.info("generated " + trainingInstances.size()
          + " training instances");
      Classifier classifier = new MaxEntTrainer().train(trainingInstances);
      logger.info("InfoGain:\n");
      new InfoGain(trainingInstances).printByRank(System.out);
      logger.info("pairwise training accuracy="
          + new Trial(classifier, trainingInstances).getAccuracy());
      NeighborEvaluator neval = new PairwiseEvaluator(classifier, "YES",
          new PairwiseEvaluator.Average(), true);       
      clusterer = new GreedyAgglomerativeByDensity(
          training.get(0).getInstances().getPipe(), neval, 0.5, false,
          random);
      training = null;
      trainingInstances = null;
    } else {
      ObjectInputStream ois = new ObjectInputStream(new FileInputStream(loadClusterer.value));
      clusterer = (Clusterer) ois.readObject();
    }

    // TEST

    Clusterings testing = readClusterings(testingFile.value);
    ClusteringEvaluator evaluator = (ClusteringEvaluator) clusteringEvaluatorOption.value;
    if (evaluator == null)
      evaluator = new ClusteringEvaluators(
          new ClusteringEvaluator[] { new BCubedEvaluator(),
              new PairF1Evaluator(), new MUCEvaluator(), new AccuracyEvaluator() });
    ArrayList<Clustering> predictions = new ArrayList<Clustering>();
    for (int i = 0; i < testing.size(); i++) {
      Clustering clustering = testing.get(i);
      Clustering predicted = clusterer.cluster(clustering.getInstances());
      predictions.add(predicted);
      logger.info(evaluator.evaluate(clustering, predicted));
    }
    logger.info(evaluator.evaluateTotals());
   
View Full Code Here

TOP

Related Classes of cc.mallet.cluster.Clusterer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.