Package cc.mallet.pipe

Examples of cc.mallet.pipe.Pipe


    TestOptimizable.testGetSetParameters(omemm);
  }

  public void testSpaceMaximizable ()
  {
    Pipe p = makeSpacePredictionPipe ();
    InstanceList training = new InstanceList (p);
//    String[] data = { TestMEMM.data[0], }; // TestMEMM.data[1], TestMEMM.data[2], TestMEMM.data[3], };
//    String[] data = { "ab" };
    training.addThruPipe (new ArrayIterator (data));
View Full Code Here


    TestOptimizable.testValueAndGradient (mcrf);
  }

  public void testSpaceSerializable () throws IOException, ClassNotFoundException
  {
    Pipe p = makeSpacePredictionPipe ();
    InstanceList training = new InstanceList (p);
    training.addThruPipe (new ArrayIterator (data));

    MEMM memm = new MEMM (p, null);
    memm.addFullyConnectedStatesForLabels ();
View Full Code Here

  }


  public void doTestSpacePrediction(boolean testValueAndGradient)
  {
    Pipe p = makeSpacePredictionPipe ();
    Pipe p2 = new TestMEMM2String();

    InstanceList instances = new InstanceList(p);
    instances.addThruPipe(new ArrayIterator(data));
    InstanceList[] lists = instances.split(new double[]{.5, .5});
    MEMM memm = new MEMM(p, p2);
View Full Code Here

  public void doTestSpacePrediction(boolean testValueAndGradient,
                                    boolean useSaved,
                                    boolean useSparseWeights)
  {
    Pipe p = makeSpacePredictionPipe ();

    MEMM savedCRF;
    File f = new File("TestObject.obj");
    InstanceList instances = new InstanceList(p);
    instances.addThruPipe(new ArrayIterator(data));
    InstanceList[] lists = instances.split(new double[]{.5, .5});
    MEMM crf = new MEMM(p.getDataAlphabet(), p.getTargetAlphabet());
    crf.addFullyConnectedStatesForLabels();
    if (useSparseWeights)
      crf.setWeightsDimensionAsIn(lists[0]);
    else
      crf.setWeightsDimensionDensely();
View Full Code Here

  }


  public static  Pipe makeSpacePredictionPipe ()
  {
    Pipe p = new SerialPipes(new Pipe[]{
      new CharSequence2TokenSequence("."),
      new TokenSequenceLowercase(),
      new TestMEMMTokenSequenceRemoveSpaces(),
      new TokenText(),
      new OffsetConjunctions(true,
View Full Code Here

  }


  public void disabledtestAddOrderNStates ()
  {
    Pipe p = makeSpacePredictionPipe ();

    InstanceList instances = new InstanceList (p);
    instances.addThruPipe (new ArrayIterator(data));
    InstanceList[] lists = instances.split (new java.util.Random (678), new double[]{.5, .5});

    // Compare 3 CRFs trained with addOrderNStates, and make sure
    // that having more features leads to a higher likelihood

    MEMM crf1 = new MEMM(p.getDataAlphabet(), p.getTargetAlphabet());
    crf1.addOrderNStates (lists [0],
                         new int[] { 1, },
                         new boolean[] { false, },
                         "START",
                         null,
                         null,
                         false);
    crf1.setWeightsDimensionAsIn(lists[0]);
    MEMMTrainer memmt1 = new MEMMTrainer (crf1);
    memmt1.train(lists [0]);


    MEMM crf2 = new MEMM(p.getDataAlphabet(), p.getTargetAlphabet());
    crf2.addOrderNStates (lists [0],
                           new int[] { 1, 2, },
                           new boolean[] { false, true },
                           "START",
                           null,
                           null,
                           false);
    crf2.setWeightsDimensionAsIn(lists[0]);
    MEMMTrainer memmt2 = new MEMMTrainer (crf2);
    memmt2.train(lists [0]);


    MEMM crf3 = new MEMM(p.getDataAlphabet(), p.getTargetAlphabet());
    crf3.addOrderNStates (lists [0],
                         new int[] { 1, 2, },
                         new boolean[] { false, false },
                         "START",
                         null,
View Full Code Here

    doTestSpacePrediction(false, true, false);
  }

  public void disabledtestPrint ()
  {
    Pipe p = new SerialPipes (new Pipe[] {
       new CharSequence2TokenSequence("."),
       new TokenText(),
       new TestMEMM.TestMEMMTokenSequenceRemoveSpaces(),
       new TokenSequence2FeatureVectorSequence(),
       new PrintInputAndTarget(),
View Full Code Here

        constraintsFile = new FileReader(new File(args[restArgs+1]));
      }
    } else
      testFile = new FileReader(new File(args[restArgs]));

    Pipe p = null;
    CRF crf = null;
    TransducerEvaluator eval = null;
    if (continueTrainingOption.value || !trainOption.value) {
      if (modelOption.value == null)
      {
        commandOptions.printUsage(true);
        throw new IllegalArgumentException("Missing model file option");
      }
      ObjectInputStream s =
        new ObjectInputStream(new FileInputStream(modelOption.value));
      crf = (CRF) s.readObject();
      s.close();
      p = crf.getInputPipe();
    }
    else {
      p = new SimpleTaggerSentence2FeatureVectorSequence();
      p.getTargetAlphabet().lookupIndex(defaultOption.value);
    }


    if (trainOption.value)
    {
      p.setTargetProcessing(true);
      trainingData = new InstanceList(p);
      trainingData.addThruPipe(
          new LineGroupIterator(trainingFile,
            Pattern.compile("^\\s*$"), true));
      logger.info
        ("Number of features in training data: "+p.getDataAlphabet().size());
      if (testOption.value != null)
      {
        if (testFile != null)
        {
          testData = new InstanceList(p);
          testData.addThruPipe(
              new LineGroupIterator(testFile,
                Pattern.compile("^\\s*$"), true));
        } else
        {
          Random r = new Random (randomSeedOption.value);
          InstanceList[] trainingLists =
            trainingData.split(
                r, new double[] {trainingFractionOption.value,
                  1-trainingFractionOption.value});
          trainingData = trainingLists[0];
          testData = trainingLists[1];
        }
      }
    } else if (testOption.value != null)
    {
      p.setTargetProcessing(true);
      testData = new InstanceList(p);
      testData.addThruPipe(
          new LineGroupIterator(testFile,
            Pattern.compile("^\\s*$"), true));
    } else
    {
      p.setTargetProcessing(false);
      testData = new InstanceList(p);
      testData.addThruPipe(
          new LineGroupIterator(testFile,
            Pattern.compile("^\\s*$"), true));
    }
    logger.info ("Number of predicates: "+p.getDataAlphabet().size());
   
   
    if (testOption.value != null)
    {
      if (testOption.value.startsWith("lab"))
        eval = new TokenAccuracyEvaluator(new InstanceList[] {trainingData, testData}, new String[] {"Training", "Testing"});
      else if (testOption.value.startsWith("seg="))
      {
        String[] pairs = testOption.value.substring(4).split(",");
        if (pairs.length < 1)
        {
          commandOptions.printUsage(true);
          throw new IllegalArgumentException(
              "Missing segment start/continue labels: " + testOption.value);
        }
        String startTags[] = new String[pairs.length];
        String continueTags[] = new String[pairs.length];
        for (int i = 0; i < pairs.length; i++)
        {
          String[] pair = pairs[i].split("\\.");
          if (pair.length != 2)
          {
            commandOptions.printUsage(true);
            throw new
              IllegalArgumentException(
                  "Incorrectly-specified segment start and end labels: " +
                  pairs[i]);
          }
          startTags[i] = pair[0];
          continueTags[i] = pair[1];
        }
        eval = new MultiSegmentationEvaluator(new InstanceList[] {trainingData, testData}, new String[] {"Training", "Testing"},
            startTags, continueTags);
      }
      else
      {
        commandOptions.printUsage(true);
        throw new IllegalArgumentException("Invalid test option: " +
            testOption.value);
      }
    }
   
   
   
    if (p.isTargetProcessing())
    {
      Alphabet targets = p.getTargetAlphabet();
      StringBuffer buf = new StringBuffer("Labels:");
      for (int i = 0; i < targets.size(); i++)
        buf.append(" ").append(targets.lookupObject(i).toString());
      logger.info(buf.toString());
    }
View Full Code Here

    }
    else {
      testFile = new FileReader(new File(args[restArgs]));
    }

    Pipe p = null;
    CRF crf = null;
    TransducerEvaluator eval = null;

    if (continueTrainingOption.value || !trainOption.value) {
      if (modelOption.value == null) {
        commandOptions.printUsage(true);
        throw new IllegalArgumentException("Missing model file option");
      }
      ObjectInputStream s =
        new ObjectInputStream(new FileInputStream(modelOption.value));
      crf = (CRF) s.readObject();
      s.close();
      p = crf.getInputPipe();
    }
    else {
      p = new SimpleTaggerSentence2FeatureVectorSequence();
      p.getTargetAlphabet().lookupIndex(defaultOption.value);
    }


    if (trainOption.value) {
      p.setTargetProcessing(true);
      trainingData = new InstanceList(p);
      trainingData.addThruPipe(new LineGroupIterator(trainingFile,
                               Pattern.compile("^\\s*$"), true));

      logger.info("Number of features in training data: "+p.getDataAlphabet().size());

      if (testOption.value != null) {
        if (testFile != null) {
          testData = new InstanceList(p);
          testData.addThruPipe(new LineGroupIterator(testFile,
                                 Pattern.compile("^\\s*$"), true));
        }
        else {
          Random r = new Random (randomSeedOption.value);
          InstanceList[] trainingLists =
            trainingData.split(r, new double[] {trainingFractionOption.value,
                              1 - trainingFractionOption.value});
          trainingData = trainingLists[0];
          testData = trainingLists[1];
        }
      }
    }
    else if (testOption.value != null) {
      p.setTargetProcessing(true);
      testData = new InstanceList(p);
      testData.addThruPipe(new LineGroupIterator(testFile,
                             Pattern.compile("^\\s*$"), true));
    }
    else {
        p.setTargetProcessing(false);
        testData = new InstanceList(p);
        testData.addThruPipe(
                   new LineGroupIterator(testFile,
                               Pattern.compile("^\\s*$"), true));
    }
    logger.info ("Number of predicates: "+p.getDataAlphabet().size());
   
   
    if (testOption.value != null) {
      if (testOption.value.startsWith("lab")) {
          eval = new TokenAccuracyEvaluator(new InstanceList[] {trainingData, testData}, new String[] {"Training", "Testing"});
      }
      else if (testOption.value.startsWith("seg=")) {
        String[] pairs = testOption.value.substring(4).split(",");
        if (pairs.length < 1) {
          commandOptions.printUsage(true);
          throw new IllegalArgumentException
            ("Missing segment start/continue labels: " + testOption.value);
        }
        String startTags[] = new String[pairs.length];
        String continueTags[] = new String[pairs.length];

        for (int i = 0; i < pairs.length; i++) {
          String[] pair = pairs[i].split("\\.");
          if (pair.length != 2) {
            commandOptions.printUsage(true);
            throw new
              IllegalArgumentException
              ("Incorrectly-specified segment start and end labels: " + pairs[i]);
          }
          startTags[i] = pair[0];
          continueTags[i] = pair[1];
        }
        eval = new MultiSegmentationEvaluator(new InstanceList[] {trainingData, testData}, new String[] {"Training", "Testing"},
                            startTags, continueTags);
      }
      else {
          commandOptions.printUsage(true);
          throw new IllegalArgumentException("Invalid test option: " +
                             testOption.value);
      }
    }


   
    if (p.isTargetProcessing()) {
      Alphabet targets = p.getTargetAlphabet();
      StringBuffer buf = new StringBuffer("Labels:");
      for (int i = 0; i < targets.size(); i++)
        buf.append(" ").append(targets.lookupObject(i).toString());
      logger.info(buf.toString());
    }
View Full Code Here

  {
    return pipes.size();
  }

  public Pipe getPipe (int index) {
    Pipe retPipe = null;
    try {
      retPipe = pipes.get(index);
    }
    catch (Exception e) {
      System.err.println("Error getting pipe. Index = " + index + ".  " + e.getMessage());
View Full Code Here

TOP

Related Classes of cc.mallet.pipe.Pipe

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.