Package cc.mallet.types

Examples of cc.mallet.types.InstanceList


                if (this.i.hasNext ()) {
                    return this.i.next ();
                }
               
                for (this.index++; this.index < lists.length; this.index++) {
                    final InstanceList list = lists[this.index];
                    if (list != null && lists[this.index].size () > 0) {
                        this.i = lists[this.index].iterator ();
                        return this.i.next ();
                    }
                }
View Full Code Here


    timing.tick ("Training");

    FileUtils.writeGzippedObject (new File (outputPrefix.value, "extor.ser.gz"), extor);
    timing.tick ("Serializing");

    InstanceList testing = trainer.getTestingData ();
    if (testing != null) {
      eval.test (extor.getAcrf (), testing, "Final results");
    }

    if ((extractionEval != null) && (testing != null)) {
View Full Code Here

  }

  protected void setupData ()
  {
    Timing timing = new Timing ();
    training = new InstanceList (featurePipe);
    training.addThruPipe (new PipedIterator (trainIterator, tokPipe));
    if (trainingPct > 0) training = subsetData (training, trainingPct);

    if (testIterator != null) {
      testing = new InstanceList (featurePipe);
      testing.addThruPipe (new PipedIterator (testIterator, tokPipe));
      if (testingPct > 0) testing = subsetData (testing, trainingPct);
    }

    timing.tick ("Data loading");
View Full Code Here

    Pipe p = new SerialPipes (new Pipe[]  {
      new TokenSequence2FeatureSequence (),
      new FeatureSequence2FeatureVector (),
      new Target2Label()});

    double testAcc1 = testRandomTrainedOn (new InstanceList (p));
    double testAcc2 = testRandomTrainedOn (new PagedInstanceList (p, 700, 200, new File(".")));
    assertEquals (testAcc1, testAcc2, 0.01);
  }
View Full Code Here

    Randoms r = new Randoms (1);
    Iterator<Instance> iter = new RandomTokenSequenceIterator (r,  new Dirichlet(fd, 2.0),
          30, 0, 10, 200, classNames);
    training.addThruPipe (iter);

    InstanceList testing = new InstanceList (training.getPipe ());
    testing.addThruPipe (new RandomTokenSequenceIterator (r,  new Dirichlet(fd, 2.0),
          30, 0, 10, 200, classNames));

    System.out.println ("Training set size = "+training.size());
    System.out.println ("Testing set size = "+testing.size());

    Classifier classifier = trainer.train (training);

    System.out.println ("Accuracy on training set:");
    System.out.println (classifier.getClass().getName()
View Full Code Here

  {
    Pipe pipe = TestMEMM.makeSpacePredictionPipe ();
    String[] data0 = { TestCRF.data[0] };
    String[] data1 = TestCRF.data;

    InstanceList training = new InstanceList (pipe);
    training.addThruPipe (new ArrayIterator (data0));
    InstanceList testing = new InstanceList (pipe);
    testing.addThruPipe (new ArrayIterator (data1));

    CRF crf = new CRF (pipe, null);
    crf.addFullyConnectedStatesForLabels ();
    CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood (crf);
    TokenAccuracyEvaluator eval = new TokenAccuracyEvaluator (new InstanceList[] {training, testing}, new String[] {"Training", "Testing"});
    for (int i = 0; i < 5; i++) {
      crft.train (training, 1);
      eval.evaluate(crft);
    }

    CRFExtractor extor = hackCrfExtor (crf);
    Extraction e1 = extor.extract (new ArrayIterator (data1));

    Pipe pipe2 = TestMEMM.makeSpacePredictionPipe ();
    InstanceList training2 = new InstanceList (pipe2);
    training2.addThruPipe (new ArrayIterator (data0));
    InstanceList testing2 = new InstanceList (pipe2);
    testing2.addThruPipe (new ArrayIterator (data1));

    MEMM memm = new MEMM (pipe2, null);
    memm.addFullyConnectedStatesForLabels ();
    MEMMTrainer memmt = new MEMMTrainer (memm);
    TransducerEvaluator memmeval = new TokenAccuracyEvaluator (new InstanceList[] {training2, testing2}, new String[] {"Training2", "Testing2"});
View Full Code Here

    Reader reader = new StringReader (input);
    ParenGroupIterator it = new ParenGroupIterator (reader);
    Pipe pipe = new Noop();
    pipe.setTargetProcessing (false);

    InstanceList lst = new InstanceList (pipe);
    lst.addThruPipe (it);

    assertEquals (3, lst.size());
    assertEquals ("(a (b c) ((d))  )", lst.get(0).getData());
    assertEquals ("(3\n 4)", lst.get(1).getData());
    assertEquals ("(  6)", lst.get(2).getData());
  }
View Full Code Here

    Pipe pipe = new SerialPipes (new Pipe[] {
        new GenericAcrfData2TokenSequence (2),
        new TokenSequence2FeatureVectorSequence (true, true),
    });

    InstanceList training = new InstanceList (pipe);
    training.addThruPipe (new LineGroupIterator (new FileReader (trainFile),
                                         Pattern.compile ("\\s*"),
                                         true));

    InstanceList testing = new InstanceList (pipe);
    training.addThruPipe (new LineGroupIterator (new FileReader (testFile),
                                         Pattern.compile ("\\s*"),
                                         true));

    ACRF.Template[] tmpls = new ACRF.Template[] {
View Full Code Here

      Pipe pipe = new ClusteringPipe(string2ints(exactMatchFields.value, fieldAlphabet),
                                 string2ints(approxMatchFields.value, fieldAlphabet),
                                 string2ints(substringMatchFields.value, fieldAlphabet));

      InstanceList trainingInstances = new InstanceList(pipe);
      for (int i = 0; i < training.size(); i++) {
        PairSampleIterator iterator = new PairSampleIterator(training
            .get(i), random, 0.5, training.get(i).getNumInstances());
        while(iterator.hasNext()) {
          Instance inst = iterator.next();
          trainingInstances.add(pipe.pipe(inst));
        }
      }
      logger.info("generated " + trainingInstances.size()
          + " training instances");
      Classifier classifier = new MaxEntTrainer().train(trainingInstances);
      logger.info("InfoGain:\n");
      new InfoGain(trainingInstances).printByRank(System.out);
      logger.info("pairwise training accuracy="
View Full Code Here

      AgglomerativeNeighbor neighbor = (AgglomerativeNeighbor) carrier
          .getData();
      Clustering original = neighbor.getOriginal();
      int[] cluster1 = neighbor.getOldClusters()[0];
      int[] cluster2 = neighbor.getOldClusters()[1];
      InstanceList list = original.getInstances();
      int[] mergedIndices = neighbor.getNewCluster();
      Record[] records = array2Records(mergedIndices, list);
      Alphabet fieldAlph = records[0].fieldAlphabet();
      Alphabet valueAlph = records[0].valueAlphabet();
View Full Code Here

TOP

Related Classes of cc.mallet.types.InstanceList

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.