Package cc.mallet.types

Examples of cc.mallet.types.InstanceList


      new FeatureVector(crf.getInputAlphabet(), new int[]{1, 2, 3}, new double[]{1, 1, 1}),
      new FeatureVector(crf.getInputAlphabet(), new int[]{1, 2, 3}, new double[]{1, 1, 1}),
      new FeatureVector(crf.getInputAlphabet(), new int[]{1, 2, 3}, new double[]{1, 1, 1}),
    });
    FeatureSequence ss = new FeatureSequence(crf.getOutputAlphabet(), new int[]{0, 1, 2, 3});
    InstanceList ilist = new InstanceList(null);
    ilist.add(fvs, ss, null, null);

    crf.addFullyConnectedStates(stateNames);

    try {
      ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(f));
View Full Code Here


  public void doTestSpacePrediction(boolean testValueAndGradient)
  {
    Pipe p = makeSpacePredictionPipe ();
    Pipe p2 = new TestMEMM2String();

    InstanceList instances = new InstanceList(p);
    instances.addThruPipe(new ArrayIterator(data));
    InstanceList[] lists = instances.split(new double[]{.5, .5});
    MEMM memm = new MEMM(p, p2);
    memm.addFullyConnectedStatesForLabels();
    memm.setWeightsDimensionAsIn(lists[0]);
   
    MEMMTrainer memmt = new MEMMTrainer (memm);
View Full Code Here

  {
    Pipe p = makeSpacePredictionPipe ();

    MEMM savedCRF;
    File f = new File("TestObject.obj");
    InstanceList instances = new InstanceList(p);
    instances.addThruPipe(new ArrayIterator(data));
    InstanceList[] lists = instances.split(new double[]{.5, .5});
    MEMM crf = new MEMM(p.getDataAlphabet(), p.getTargetAlphabet());
    crf.addFullyConnectedStatesForLabels();
    if (useSparseWeights)
      crf.setWeightsDimensionAsIn(lists[0]);
    else
View Full Code Here

  public void disabledtestAddOrderNStates ()
  {
    Pipe p = makeSpacePredictionPipe ();

    InstanceList instances = new InstanceList (p);
    instances.addThruPipe (new ArrayIterator(data));
    InstanceList[] lists = instances.split (new java.util.Random (678), new double[]{.5, .5});

    // Compare 3 CRFs trained with addOrderNStates, and make sure
    // that having more features leads to a higher likelihood

    MEMM crf1 = new MEMM(p.getDataAlphabet(), p.getTargetAlphabet());
View Full Code Here

       new TokenText(),
       new TestMEMM.TestMEMMTokenSequenceRemoveSpaces(),
       new TokenSequence2FeatureVectorSequence(),
       new PrintInputAndTarget(),
    });
    InstanceList one = new InstanceList (p);
    String[] data = new String[] { "ABCDE", };
    one.addThruPipe (new ArrayIterator (data));
    MEMM crf = new MEMM (p, null);
    crf.addFullyConnectedStatesForLabels();
    crf.setWeightsDimensionAsIn (one);
    MEMMTrainer memmt = new MEMMTrainer (crf);
    MEMMTrainer.MEMMOptimizableByLabelLikelihood mcrf = memmt.getOptimizableMEMM(one);
View Full Code Here

  {
    long startTime = System.currentTimeMillis();
   
    Reader trainingFile = null, testFile = null;
    Reader constraintsFile = null;
    InstanceList trainingData = null, testData = null;
    int restArgs = commandOptions.processOptions(args);
    if (restArgs == args.length)
    {
      commandOptions.printUsage(true);
      throw new IllegalArgumentException("Missing data file(s)");
    }
    if (trainOption.value)
    {
      trainingFile = new FileReader(new File(args[restArgs]));
      if (testOption.value != null) {
        testFile = new FileReader(new File(args[restArgs+1]));
        constraintsFile = new FileReader(new File(args[restArgs+2]));
      }
      else {
        constraintsFile = new FileReader(new File(args[restArgs+1]));
      }
    } else
      testFile = new FileReader(new File(args[restArgs]));

    Pipe p = null;
    CRF crf = null;
    TransducerEvaluator eval = null;
    if (continueTrainingOption.value || !trainOption.value) {
      if (modelOption.value == null)
      {
        commandOptions.printUsage(true);
        throw new IllegalArgumentException("Missing model file option");
      }
      ObjectInputStream s =
        new ObjectInputStream(new FileInputStream(modelOption.value));
      crf = (CRF) s.readObject();
      s.close();
      p = crf.getInputPipe();
    }
    else {
      p = new SimpleTaggerSentence2FeatureVectorSequence();
      p.getTargetAlphabet().lookupIndex(defaultOption.value);
    }


    if (trainOption.value)
    {
      p.setTargetProcessing(true);
      trainingData = new InstanceList(p);
      trainingData.addThruPipe(
          new LineGroupIterator(trainingFile,
            Pattern.compile("^\\s*$"), true));
      logger.info
        ("Number of features in training data: "+p.getDataAlphabet().size());
      if (testOption.value != null)
      {
        if (testFile != null)
        {
          testData = new InstanceList(p);
          testData.addThruPipe(
              new LineGroupIterator(testFile,
                Pattern.compile("^\\s*$"), true));
        } else
        {
          Random r = new Random (randomSeedOption.value);
          InstanceList[] trainingLists =
            trainingData.split(
                r, new double[] {trainingFractionOption.value,
                  1-trainingFractionOption.value});
          trainingData = trainingLists[0];
          testData = trainingLists[1];
        }
      }
    } else if (testOption.value != null)
    {
      p.setTargetProcessing(true);
      testData = new InstanceList(p);
      testData.addThruPipe(
          new LineGroupIterator(testFile,
            Pattern.compile("^\\s*$"), true));
    } else
    {
      p.setTargetProcessing(false);
      testData = new InstanceList(p);
      testData.addThruPipe(
          new LineGroupIterator(testFile,
            Pattern.compile("^\\s*$"), true));
    }
    logger.info ("Number of predicates: "+p.getDataAlphabet().size());
View Full Code Here

  public static void main(String[] args) {
    String htmldir = args[0];
    Pipe pipe = new SerialPipes(new Pipe[] { new Input2CharSequence(),
        new CharSequenceRemoveHTML() });
    InstanceList list = new InstanceList(pipe);
    list.addThruPipe(new FileIterator(htmldir, FileIterator.STARTING_DIRECTORIES));

    for (int index = 0; index < list.size(); index++) {
      Instance inst = list.get(index);
      System.err.println(inst.getData());
    }

  }
View Full Code Here

   * @exception Exception if an error occurs
   */
  public static void main (String[] args) throws Exception {

    Reader trainingFile = null, testFile = null;
    InstanceList trainingData = null, testData = null;
    int numEvaluations = 0;
    int iterationsBetweenEvals = 16;
    int restArgs = commandOptions.processOptions(args);

    if (restArgs == args.length) {
      commandOptions.printUsage(true);
      throw new IllegalArgumentException("Missing data file(s)");
    }

    if (trainOption.value) {
      trainingFile = new FileReader(new File(args[restArgs]));
      if (testOption.value != null && restArgs < args.length - 1) {
        testFile = new FileReader(new File(args[restArgs+1]));
      }
    }
    else {
      testFile = new FileReader(new File(args[restArgs]));
    }

    Pipe p = null;
    CRF crf = null;
    TransducerEvaluator eval = null;

    if (continueTrainingOption.value || !trainOption.value) {
      if (modelOption.value == null) {
        commandOptions.printUsage(true);
        throw new IllegalArgumentException("Missing model file option");
      }
      ObjectInputStream s =
        new ObjectInputStream(new FileInputStream(modelOption.value));
      crf = (CRF) s.readObject();
      s.close();
      p = crf.getInputPipe();
    }
    else {
      p = new SimpleTaggerSentence2FeatureVectorSequence();
      p.getTargetAlphabet().lookupIndex(defaultOption.value);
    }


    if (trainOption.value) {
      p.setTargetProcessing(true);
      trainingData = new InstanceList(p);
      trainingData.addThruPipe(new LineGroupIterator(trainingFile,
                               Pattern.compile("^\\s*$"), true));

      logger.info("Number of features in training data: "+p.getDataAlphabet().size());

      if (testOption.value != null) {
        if (testFile != null) {
          testData = new InstanceList(p);
          testData.addThruPipe(new LineGroupIterator(testFile,
                                 Pattern.compile("^\\s*$"), true));
        }
        else {
          Random r = new Random (randomSeedOption.value);
          InstanceList[] trainingLists =
            trainingData.split(r, new double[] {trainingFractionOption.value,
                              1 - trainingFractionOption.value});
          trainingData = trainingLists[0];
          testData = trainingLists[1];
        }
      }
    }
    else if (testOption.value != null) {
      p.setTargetProcessing(true);
      testData = new InstanceList(p);
      testData.addThruPipe(new LineGroupIterator(testFile,
                             Pattern.compile("^\\s*$"), true));
    }
    else {
        p.setTargetProcessing(false);
        testData = new InstanceList(p);
        testData.addThruPipe(
                   new LineGroupIterator(testFile,
                               Pattern.compile("^\\s*$"), true));
    }
    logger.info ("Number of predicates: "+p.getDataAlphabet().size());
View Full Code Here

            if (tmp.lists.length != this.lists.length) {
                return false;
            }
           
            for (int i = 0; i < this.lists.length; i++) {
                InstanceList thisList = this.lists[i];
                InstanceList tmpList = tmp.lists[i];

                if (thisList == null && tmpList != null) {
                    return false;
                } else if (!thisList.equals (tmpList)) {
                    return false;
View Full Code Here

                if (this.i.hasNext ()) {
                    return true;
                }

                for (int tmpIndex = this.index + 1; tmpIndex < lists.length; tmpIndex++) {
                    final InstanceList list = lists[tmpIndex];
                    if (list != null && lists[tmpIndex].size () > 0) {
                        return true;
                    }
                }
            }
View Full Code Here

TOP

Related Classes of cc.mallet.types.InstanceList

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.