Package cc.mallet.pipe.iterator

Examples of cc.mallet.pipe.iterator.FileIterator


          new TokenSequenceRemoveStopwords(),
          new TokenSequence2FeatureSequence(),
          new FeatureSequence2FeatureVector() });

    InstanceList instList = new InstanceList(instPipe);
    instList.addThruPipe(new
        FileIterator(directories, FileIterator.STARTING_DIRECTORIES));

    System.out.println("Training 1");
    NaiveBayesTrainer trainer = new NaiveBayesTrainer();
    NaiveBayes classifier = trainer.trainIncremental(instList);

    //instList.getDataAlphabet().stopGrowth();

    // incrementally train...
    String[] t2directories = {
        "src/cc/mallet/classify/tests/NaiveBayesData/learn/b"
    };

    System.out.println("data alphabet size " + instList.getDataAlphabet().size());
    System.out.println("target alphabet size " + instList.getTargetAlphabet().size());
    InstanceList instList2 = new InstanceList(instPipe);
    instList2.addThruPipe(new
        FileIterator(t2directories, FileIterator.STARTING_DIRECTORIES));

    System.out.println("Training 2");

    System.out.println("data alphabet size " + instList2.getDataAlphabet().size());
View Full Code Here


          new TokenSequenceRemoveStopwords(),
          new TokenSequence2FeatureSequence(),
          new FeatureSequence2FeatureVector() });

    InstanceList instList = new InstanceList(instPipe);
    instList.addThruPipe(new
        FileIterator(directories, FileIterator.STARTING_DIRECTORIES));

    System.out.println("Training 1");
    NaiveBayesTrainer trainer = new NaiveBayesTrainer();
    NaiveBayes classifier = (NaiveBayes) trainer.trainIncremental(instList);

    Classification initialClassification = classifier.classify("Hello Everybody");
    Classification initial2Classification = classifier.classify("Goodbye now");
    System.out.println("Initial Classification = ");
    initialClassification.print();
    initial2Classification.print();
    System.out.println("data alphabet " + classifier.getAlphabet());
    System.out.println("label alphabet " + classifier.getLabelAlphabet());


    // incrementally train...
    String[] t2directories = {
        "src/cc/mallet/classify/tests/NaiveBayesData/learn/b"
    };

    System.out.println("data alphabet size " + instList.getDataAlphabet().size());
    System.out.println("target alphabet size " + instList.getTargetAlphabet().size());
    InstanceList instList2 = new InstanceList(instPipe);
    instList2.addThruPipe(new
        FileIterator(t2directories, FileIterator.STARTING_DIRECTORIES));

    System.out.println("Training 2");

    System.out.println("data alphabet size " + instList2.getDataAlphabet().size());
View Full Code Here

          new TokenSequenceRemoveStopwords(),
          new TokenSequence2FeatureSequence(),
          new FeatureSequence2FeatureVector() });

    InstanceList instList = new InstanceList(instPipe);
    instList.addThruPipe(new
        FileIterator(directories, FileIterator.STARTING_DIRECTORIES));

    System.out.println("Training 1");
    NaiveBayesTrainer trainer = new NaiveBayesTrainer();
    NaiveBayes classifier = (NaiveBayes) trainer.trainIncremental(instList);

    Classification initialClassification = classifier.classify("Hello Everybody");
    Classification initial2Classification = classifier.classify("Goodbye now");
    System.out.println("Initial Classification = ");
    initialClassification.print();
    initial2Classification.print();
    System.out.println("data alphabet " + classifier.getAlphabet());
    System.out.println("label alphabet " + classifier.getLabelAlphabet());


    // test
    String[] t2directories = {
        "src/cc/mallet/classify/tests/NaiveBayesData/learn/b"
    };

    System.out.println("data alphabet size " + instList.getDataAlphabet().size());
    System.out.println("target alphabet size " + instList.getTargetAlphabet().size());
    InstanceList instList2 = new InstanceList(instPipe);
    instList2.addThruPipe(new
        FileIterator(t2directories, FileIterator.STARTING_DIRECTORIES, true));

    System.out.println("Training 2");

    System.out.println("data alphabet size " + instList2.getDataAlphabet().size());
View Full Code Here

      File[] subdirs = getSubDirs(directory);
      Alphabet clusterAlph = new Alphabet();
      InstanceList instances = new InstanceList(new Noop());
      TIntArrayList labels = new TIntArrayList();
      for (int j = 0; j < subdirs.length; j++) {
        ArrayList<File> records = new FileIterator(subdirs[j]).getFileArray();
        int label = clusterAlph.lookupIndex(subdirs[j].toString());
        for (int k = 0; k < records.size(); k++) {
          if (fi % 100 == 0) System.out.print(fi);
          else if (fi % 10 == 0) System.out.print(".");
          if (fi % 1000 == 0 && fi > 0) System.out.println();
View Full Code Here

  public static void main(String[] args) {
    String htmldir = args[0];
    Pipe pipe = new SerialPipes(new Pipe[] { new Input2CharSequence(),
        new CharSequenceRemoveHTML() });
    InstanceList list = new InstanceList(pipe);
    list.addThruPipe(new FileIterator(htmldir, FileIterator.STARTING_DIRECTORIES));

    for (int index = 0; index < list.size(); index++) {
      Instance inst = list.get(index);
      System.err.println(inst.getData());
    }
View Full Code Here

TOP

Related Classes of cc.mallet.pipe.iterator.FileIterator

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.