Examples of cc.mallet.pipe.Noop

cc.mallet.pipe.Noop
A pipe that does nothing to the instance fields but which has side effects on the dictionary. @author Andrew McCallum mccallum@cs.umass.edu

                1, 2, 3 }),
            new FeatureVector(crf.getInputAlphabet(), new int[] {
                1, 2, 3 }), });
    FeatureSequence ss = new FeatureSequence(crf.getOutputAlphabet(),
        new int[] { 0, 1, 2, 3 });
    InstanceList ilist = new InstanceList(new Noop(inputAlphabet,
        outputAlphabet));
    ilist.add(fvs, ss, null, null);


    crf.addFullyConnectedStates(stateNames);
    CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood(crf);

View Full Code Here

        super (pipe, numPages * instancesPerPage);
        this.instancesPerPage = instancesPerPage;
        this.swapDir = swapDir;
        this.inMemoryPageIds = new int[numPages];
        this.inMemoryPages = new InstanceList[numPages];
        this.noopPipe = new Noop(pipe.getDataAlphabet(), pipe.getTargetAlphabet());
        for (int i = 0; i < numPages; i++) {
            this.inMemoryPageIds[i] = -1;
        }


        try {

View Full Code Here

  private Pipe featurePipe;
  private String backgroundTag;
  private TokenizationFilter filter;


  public CRFExtractor (CRF crf) {
    this (crf, new Noop ());
  }

View Full Code Here

  public CRFExtractor (CRF crf) {
    this (crf, new Noop ());
  }


  public CRFExtractor (File crfFile) throws IOException {
    this (loadCrf(crfFile), new Noop ());
  }

View Full Code Here

   * @param dataAlphabet The vocabulary for added instances' data fields
   * @param targetAlphabet The vocabulary for added instances' targets
   */
  public InstanceList (Alphabet dataAlphabet, Alphabet targetAlphabet)
  {
    this (new Noop(dataAlphabet, targetAlphabet), 10);
    this.dataAlphabet = dataAlphabet;
    this.targetAlphabet = targetAlphabet;
  }

View Full Code Here

   * @param i
   * @param j
   * @return A new {@link InstanceList} containing the two argument {@link Instance}s.
   */
  public static InstanceList makeList (Instance i, Instance j) {
    InstanceList list = new InstanceList(new Noop(i.getDataAlphabet(), i.getTargetAlphabet()));
    list.add(i);
    list.add(j);
    return list;
  }

View Full Code Here

    //inputAlphabet.stopGrowth();
  }


  public CRF (Alphabet inputAlphabet, Alphabet outputAlphabet)
  {
    super (new Noop(inputAlphabet, outputAlphabet), null);
    inputAlphabet.stopGrowth();
    logger.info ("CRF input dictionary size = "+inputAlphabet.size());
    //xxx outputAlphabet.stopGrowth();
    this.inputAlphabet = inputAlphabet;
    this.outputAlphabet = outputAlphabet;

View Full Code Here

    setup (null, instance);
    
    // Incrementally add the counts of this new training instance
    incorporateOneInstance (instance, 1.0);
    if (instancePipe == null)
      instancePipe = new Noop (dataAlphabet, targetAlphabet);
    classifier = new NaiveBayes (instancePipe, pe.estimate(), estimateFeatureMultinomials());
    return classifier;
  }

View Full Code Here

        InstanceList oldInstances = clustering.getInstances();
        Alphabet alph = oldInstances.getDataAlphabet();
        LabelAlphabet lalph = (LabelAlphabet) oldInstances.getTargetAlphabet();
        if (alph == null) alph = new Alphabet();
        if (lalph == null) lalph = new LabelAlphabet();
        Pipe noop = new Noop(alph, lalph);
        InstanceList newInstances = new InstanceList(noop);
        for (int j = 0; j < oldInstances.size(); j++) {
          int label = clustering.getLabel(j);
          Instance instance = oldInstances.get(j);
          if (clustering.size(label) >= minClusterSize.value) 
            newInstances.add(noop.pipe(new Instance(instance.getData(), lalph.lookupLabel(new Integer(label)), instance.getName(), instance.getSource())));
        }
        clusterings.set(i, createSmallerClustering(newInstances));
      }
      if (outputPrefixFile.value != null) {
        try {
          ObjectOutputStream oos =
            new ObjectOutputStream(new FileOutputStream(outputPrefixFile.value));
          oos.writeObject(clusterings);
          oos.close();
        } catch (Exception e) {
          logger.warning("Exception writing clustering to file " + outputPrefixFile.value                        + " " + e);
          e.printStackTrace();
        }
      }
    }
    
    
    // Split into training/testing
    if (trainingProportion.value > 0) {
      if (clusterings.size() > 1) 
        throw new IllegalArgumentException("Expect one clustering to do train/test split, not " + clusterings.size());
      Clustering clustering = clusterings.get(0);
      int targetTrainSize = (int)(trainingProportion.value * clustering.getNumInstances());
      TIntHashSet clustersSampled = new TIntHashSet();
      Randoms random = new Randoms(123);
      LabelAlphabet lalph = new LabelAlphabet();
      InstanceList trainingInstances = new InstanceList(new Noop(null, lalph));
      while (trainingInstances.size() < targetTrainSize) {
        int cluster = random.nextInt(clustering.getNumClusters());
        if (!clustersSampled.contains(cluster)) {
          clustersSampled.add(cluster);
          InstanceList instances = clustering.getCluster(cluster);

View Full Code Here

      Alphabet fieldAlph = new Alphabet();
      Alphabet valueAlph = new Alphabet();
      File directory = new File(classDirs.value[i]);
      File[] subdirs = getSubDirs(directory);
      Alphabet clusterAlph = new Alphabet();
      InstanceList instances = new InstanceList(new Noop());
      TIntArrayList labels = new TIntArrayList();
      for (int j = 0; j < subdirs.length; j++) {
        ArrayList<File> records = new FileIterator(subdirs[j]).getFileArray();
        int label = clusterAlph.lookupIndex(subdirs[j].toString());
        for (int k = 0; k < records.size(); k++) {

View Full Code Here

0 1

TOP

Related Classes of cc.mallet.pipe.Noop

cc.mallet.classify.NaiveBayesTrainer

cc.mallet.cluster.tui.Clusterings2Clusterings

cc.mallet.cluster.tui.Text2Clusterings

cc.mallet.cluster.util.ClusterUtils

cc.mallet.extract.CRFExtractor

cc.mallet.fst.confidence.TransducerConfidenceEstimator

cc.mallet.fst.CRF

cc.mallet.fst.tests.TestCRF

cc.mallet.pipe.iterator.SegmentIterator

cc.mallet.pipe.tests.TestIterators

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.