Package cc.mallet.pipe.iterator

Examples of cc.mallet.pipe.iterator.LineGroupIterator


    Pipe pipe = new SerialPipes (new Pipe[] {
        basePipe,
        new TokenSequence2FeatureVectorSequence (true, true),
    });

    Iterator<Instance> trainSource = new LineGroupIterator (new FileReader (trainFile.value), Pattern.compile ("^\\s*$"), true);
    Iterator<Instance> testSource;
    if (testFile.wasInvoked ()) {
      testSource = new LineGroupIterator (new FileReader (testFile.value), Pattern.compile ("^\\s*$"), true);
    } else {
      testSource = null;
    }

    InstanceList training = new InstanceList (pipe);
View Full Code Here


  public void testFromSerialization () throws IOException, ClassNotFoundException
  {
    Pipe p = new GenericAcrfData2TokenSequence ();
    InstanceList training = new InstanceList (p);
    training.addThruPipe (new LineGroupIterator (new StringReader (sampleData), Pattern.compile ("^$"), true));

    Pipe p2 = (Pipe) TestSerializable.cloneViaSerialization (p);

    InstanceList l1 = new InstanceList (p);
    l1.addThruPipe (new LineGroupIterator (new StringReader (sampleData2), Pattern.compile ("^$"), true));
    InstanceList l2 = new InstanceList (p2);
    l2.addThruPipe (new LineGroupIterator (new StringReader (sampleData2), Pattern.compile ("^$"), true));

    // the readResolve alphabet thing doesn't kick in on first deserialization
    assertTrue (p.getTargetAlphabet () != p2.getTargetAlphabet ());

    assertEquals (1, l1.size ());
View Full Code Here

  public void testFixedNumLabels () throws IOException, ClassNotFoundException
  {
    Pipe p = new GenericAcrfData2TokenSequence (2);
    InstanceList training = new InstanceList (p);
    training.addThruPipe (new LineGroupIterator (new StringReader (sampleFixedData), Pattern.compile ("^$"), true));

    assertEquals (1, training.size ());

    Instance inst1 = training.get (0);
    LabelsSequence ls1 = (LabelsSequence) inst1.getTarget ();
View Full Code Here

  {
    GenericAcrfData2TokenSequence p = new GenericAcrfData2TokenSequence (2);
    p.setLabelsAtEnd (true);

    InstanceList training = new InstanceList (p);
    training.addThruPipe (new LineGroupIterator (new StringReader (labelsAtEndData), Pattern.compile ("^$"), true));

    assertEquals (1, training.size ());

    Instance inst1 = training.get (0);
    StringTokenization toks = (StringTokenization) inst1.getData ();
View Full Code Here

    GenericAcrfData2TokenSequence p = new GenericAcrfData2TokenSequence (2);
    p.setFeaturesIncludeToken(false);
    p.setIncludeTokenText(false);

    InstanceList training = new InstanceList (p);
    training.addThruPipe (new LineGroupIterator (new StringReader (sampleFixedData), Pattern.compile ("^$"), true));

    assertEquals (1, training.size ());

    Instance inst1 = training.get (0);
View Full Code Here

        new TokenSequenceParseFeatureString(false), new TokenText(),
        new TokenSequence2FeatureVectorSequence(true, false),
        new Target2LabelSequence(), new PrintInputAndTarget(), });

    InstanceList data = new InstanceList(p);
    data.addThruPipe(new LineGroupIterator(new StringReader(toy), Pattern
        .compile("\n"), true));

    CRF crf = new CRF(p, null);
    crf.print();
    crf.addStatesForLabelsConnectedAsIn(data);
View Full Code Here

    if (trainOption.value)
    {
      p.setTargetProcessing(true);
      trainingData = new InstanceList(p);
      trainingData.addThruPipe(
          new LineGroupIterator(trainingFile,
            Pattern.compile("^\\s*$"), true));
      logger.info
        ("Number of features in training data: "+p.getDataAlphabet().size());
      if (testOption.value != null)
      {
        if (testFile != null)
        {
          testData = new InstanceList(p);
          testData.addThruPipe(
              new LineGroupIterator(testFile,
                Pattern.compile("^\\s*$"), true));
        } else
        {
          Random r = new Random (randomSeedOption.value);
          InstanceList[] trainingLists =
            trainingData.split(
                r, new double[] {trainingFractionOption.value,
                  1-trainingFractionOption.value});
          trainingData = trainingLists[0];
          testData = trainingLists[1];
        }
      }
    } else if (testOption.value != null)
    {
      p.setTargetProcessing(true);
      testData = new InstanceList(p);
      testData.addThruPipe(
          new LineGroupIterator(testFile,
            Pattern.compile("^\\s*$"), true));
    } else
    {
      p.setTargetProcessing(false);
      testData = new InstanceList(p);
      testData.addThruPipe(
          new LineGroupIterator(testFile,
            Pattern.compile("^\\s*$"), true));
    }
    logger.info ("Number of predicates: "+p.getDataAlphabet().size());
   
   
View Full Code Here


    if (trainOption.value) {
      p.setTargetProcessing(true);
      trainingData = new InstanceList(p);
      trainingData.addThruPipe(new LineGroupIterator(trainingFile,
                               Pattern.compile("^\\s*$"), true));

      logger.info("Number of features in training data: "+p.getDataAlphabet().size());

      if (testOption.value != null) {
        if (testFile != null) {
          testData = new InstanceList(p);
          testData.addThruPipe(new LineGroupIterator(testFile,
                                 Pattern.compile("^\\s*$"), true));
        }
        else {
          Random r = new Random (randomSeedOption.value);
          InstanceList[] trainingLists =
            trainingData.split(r, new double[] {trainingFractionOption.value,
                              1 - trainingFractionOption.value});
          trainingData = trainingLists[0];
          testData = trainingLists[1];
        }
      }
    }
    else if (testOption.value != null) {
      p.setTargetProcessing(true);
      testData = new InstanceList(p);
      testData.addThruPipe(new LineGroupIterator(testFile,
                             Pattern.compile("^\\s*$"), true));
    }
    else {
        p.setTargetProcessing(false);
        testData = new InstanceList(p);
        testData.addThruPipe(
                   new LineGroupIterator(testFile,
                               Pattern.compile("^\\s*$"), true));
    }
    logger.info ("Number of predicates: "+p.getDataAlphabet().size());
   
   
View Full Code Here

  private static Iterator<Instance> constructIterator (File trainFile, File dataDir, boolean isList) throws IOException
  {
    if (isList) {
      return new FileListIterator (trainFile, dataDir, null, null, true);
    } else {
      return new LineGroupIterator (new FileReader (trainFile), Pattern.compile ("^\\s*$"), true);
    }
  }
View Full Code Here

        new GenericAcrfData2TokenSequence (2),
        new TokenSequence2FeatureVectorSequence (true, true),
    });

    InstanceList training = new InstanceList (pipe);
    training.addThruPipe (new LineGroupIterator (new FileReader (trainFile),
                                         Pattern.compile ("\\s*"),
                                         true));

    InstanceList testing = new InstanceList (pipe);
    training.addThruPipe (new LineGroupIterator (new FileReader (testFile),
                                         Pattern.compile ("\\s*"),
                                         true));

    ACRF.Template[] tmpls = new ACRF.Template[] {
            new ACRF.BigramTemplate (0),
View Full Code Here

TOP

Related Classes of cc.mallet.pipe.iterator.LineGroupIterator

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.