Package cc.mallet.pipe

Examples of cc.mallet.pipe.Pipe


  {
    Pipe[] newPipes = new Pipe [3];

    SerialPipes pipes = (SerialPipes) crf.getInputPipe ();
    for (int i = 0; i < 3; i++) {
      Pipe p0 = pipes.getPipe (0);
      //pipes.removePipe (0);  TODO Fix me
      //p0.setParent (null);
      newPipes[i] = p0;
    }

    Pipe tokPipe = new SerialPipes (newPipes);

    CRFExtractor extor = new CRFExtractor (crf, (Pipe)tokPipe);
    return extor;
  }
View Full Code Here


  }


  public void testDualSpaceViewer () throws IOException
  {
    Pipe pipe = TestMEMM.makeSpacePredictionPipe ();
    String[] data0 = { TestCRF.data[0] };
    String[] data1 = TestCRF.data;

    InstanceList training = new InstanceList (pipe);
    training.addThruPipe (new ArrayIterator (data0));
    InstanceList testing = new InstanceList (pipe);
    testing.addThruPipe (new ArrayIterator (data1));

    CRF crf = new CRF (pipe, null);
    crf.addFullyConnectedStatesForLabels ();
    CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood (crf);
    TokenAccuracyEvaluator eval = new TokenAccuracyEvaluator (new InstanceList[] {training, testing}, new String[] {"Training", "Testing"});
    for (int i = 0; i < 5; i++) {
      crft.train (training, 1);
      eval.evaluate(crft);
    }

    CRFExtractor extor = hackCrfExtor (crf);
    Extraction e1 = extor.extract (new ArrayIterator (data1));

    Pipe pipe2 = TestMEMM.makeSpacePredictionPipe ();
    InstanceList training2 = new InstanceList (pipe2);
    training2.addThruPipe (new ArrayIterator (data0));
    InstanceList testing2 = new InstanceList (pipe2);
    testing2.addThruPipe (new ArrayIterator (data1));
View Full Code Here

  private static String data = "f1 f2 CL1\nf1 f3 CL2";

  public void testPipesAreStupid ()
  {
    Pipe p1 = new StupidPipe ();
    Pipe p2 = new SimpleTaggerSentence2TokenSequence ();
    // initialize p2's dict
    p2.instanceFrom(new Instance (data, null, null, null));

    Pipe serial = new SerialPipes (new Pipe[] { p1, p2 });
    try {
      serial.getDataAlphabet ();
      assertTrue ("Test failed: Should have generated exception.", false);
    } catch (IllegalStateException e) {}
  }
View Full Code Here

    } catch (IllegalStateException e) {}
  }

  public void testConcatenatePipes ()
  {
    Pipe p1 = new StupidPipe ();
    Pipe p2 = new SimpleTagger.SimpleTaggerSentence2FeatureVectorSequence ();
    // initialize p2's dict
    p2.instanceFrom(new Instance (data, null, null, null));

    assertEquals (3, p2.getDataAlphabet ().size());

    Pipe serial = PipeUtils.concatenatePipes (p1, p2);
    Alphabet dict = serial.getDataAlphabet ();

    assertEquals (3, dict.size ());
    assertTrue (dict == p2.getDataAlphabet ());
  }
View Full Code Here

    // if "train" is run more than once,
    // we will be reinitializing the weights
    // TODO: provide method to save weights
    trainingList.getDataAlphabet().stopGrowth();
    trainingList.getTargetAlphabet().stopGrowth();
    Pipe dataPipe = trainingList.getPipe ();
    Alphabet dict = (Alphabet) trainingList.getDataAlphabet ();
    int numLabels = trainingList.getTargetAlphabet().size();
    int numFeats = dict.size();
    this.theta =  numFeats * this.nfactor;
    this.weights = new double [numLabels][numFeats];
View Full Code Here

    LabelSequence lbls = new LabelSequence (ld, new int [] { 0, 2, 0, 1});

    FeatureVectorSequence fvs = new FeatureVectorSequence (vecs);
    StringWriter sw = new StringWriter ();
    PrintWriter w = new PrintWriter (sw);
    Pipe p = new SequencePrintingPipe (w);

    // pipe the instance
    p.instanceFrom(new Instance (fvs, lbls, null, null));

    // Do a second one
    FeatureVectorSequence fvs2 = new FeatureVectorSequence (new FeatureVector[] {
      new FeatureVector (dict, new int[] { 1 }),
      new FeatureVector (dict, new int[] { 0 }),
    });
    LabelSequence lbls2 = new LabelSequence (ld, new int[] { 2, 1 });
    p.instanceFrom(new Instance (fvs2, lbls2, null, null));

    w.close();

    assertEquals ("LABEL0 feature0 feature1\n" +
            "LABEL2 feature0 feature2\n" +
View Full Code Here

  private static String[] doc1 =  { "Meet\nme\nat\n4\nPM\ntomorrow" };

  public static void testMultiTag ()
  {
    Pipe mtPipe = new SerialPipes (new Pipe[] {
            new SimpleTaggerSentence2TokenSequence (),
            new TokenText (),
            new RegexMatches ("digits", Pattern.compile ("[0-9]+")),
            new RegexMatches ("ampm", Pattern.compile ("[aApP][mM]")),
            new OffsetFeatureConjunction ("time",
                    new String[] { "digits", "ampm" },
                    new int[] { 0, 1 },
                    true),
            new PrintInputAndTarget (),
    });
    Pipe noMtPipe = new SerialPipes (new Pipe[] {
            new SimpleTaggerSentence2TokenSequence (),
            new TokenText (),
            new RegexMatches ("digits", Pattern.compile ("[0-9]+")),
            new RegexMatches ("ampm", Pattern.compile ("[aApP][mM]")),
            new OffsetFeatureConjunction ("time",
View Full Code Here

    assertEquals (0.0, noMtTs.get (4).getFeatureValue ("time"), 1e-15);
  }

  public static void testMultiTagSerialization () throws IOException, ClassNotFoundException
  {
    Pipe origPipe = new SerialPipes (new Pipe[] {
            new SimpleTaggerSentence2TokenSequence (),
            new TokenText (),
            new RegexMatches ("digits", Pattern.compile ("[0-9]+")),
            new RegexMatches ("ampm", Pattern.compile ("[aApP][mM]")),
            new OffsetFeatureConjunction ("time",
                    new String[] { "digits", "ampm" },
                    new int[] { 0, 1 },
                    true),
            new PrintInputAndTarget (),
    });

    Pipe mtPipe = (Pipe) TestSerializable.cloneViaSerialization (origPipe);
    InstanceList mtLst = new InstanceList (mtPipe);
    mtLst.addThruPipe (new ArrayIterator (doc1));
    Instance mtInst = mtLst.get (0);
    TokenSequence mtTs = (TokenSequence) mtInst.getData ();
    assertEquals (6, mtTs.size ());
View Full Code Here

        InstanceList oldInstances = clustering.getInstances();
        Alphabet alph = oldInstances.getDataAlphabet();
        LabelAlphabet lalph = (LabelAlphabet) oldInstances.getTargetAlphabet();
        if (alph == null) alph = new Alphabet();
        if (lalph == null) lalph = new LabelAlphabet();
        Pipe noop = new Noop(alph, lalph);
        InstanceList newInstances = new InstanceList(noop);
        for (int j = 0; j < oldInstances.size(); j++) {
          int label = clustering.getLabel(j);
          Instance instance = oldInstances.get(j);
          if (clustering.size(label) >= minClusterSize.value)
            newInstances.add(noop.pipe(new Instance(instance.getData(), lalph.lookupLabel(new Integer(label)), instance.getName(), instance.getSource())));
        }
        clusterings.set(i, createSmallerClustering(newInstances));
      }
      if (outputPrefixFile.value != null) {
        try {
View Full Code Here

  public static Pipe concatenatePipes (Pipe p1, Pipe p2)
  {
    Alphabet dataDict = combinedDataDicts (p1, p2);
    Alphabet targetDict = combinedTargetDicts (p1, p2);
    Pipe ret = new SerialPipes (new Pipe[] { p1, p2 });

    if (dataDict != null) ret.dataAlphabetResolved = true;
    if (targetDict != null) ret.targetAlphabetResolved = true;
   
    ret.dataAlphabet = dataDict;
View Full Code Here

TOP

Related Classes of cc.mallet.pipe.Pipe

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.