Examples of cc.mallet.types.FeatureSequence

cc.mallet.types.FeatureSequence
An implementation of {@link Sequence} that ensures that everyObject in the sequence has the same class. Feature sequences are mutable, and will expand as new objects are added. @author Andrew McCallum mccallum@cs.umass.edu

      if (false) {
        // This method not yet obeying its last "false" argument, and must be for this to work
        //sampleTopicsForOneDoc((FeatureSequence)instance.getData(), topicSequence, false, false);
      } else {
        Randoms r = new Randoms();
        FeatureSequence fs = (FeatureSequence) instance.getData();
        int[] topics = topicSequence.getFeatures();
        for (int i = 0; i < topics.length; i++) {
          int type = fs.getIndexAtPosition(i);
          topics[i] = r.nextInt(numTopics);
          typeTopicCounts[type].adjustOrPutValue(topics[i], 1, 1);
            tokensPerTopic[topics[i]]++;
        }
      }
      topicSequences.add (topicSequence);
    }


    //construct test
    assert (testing.size() == topicSequences.size());
    for (int i = 0; i < testing.size(); i++) {
      Topication t = new Topication (testing.get(i), this, topicSequences.get(i));
      test.add (t);
    }


    long startTime = System.currentTimeMillis();
    //loop
    int iter = 0;
    for ( ; iter <= maxIteration; iter++) {
      if(iter%100==0)
      {
        System.out.print("Iteration: " + iter);
        System.out.println();
      }
      int numDocs = test.size(); // TODO
      for (int di = 0; di < numDocs; di++) {
        FeatureSequence tokenSequence = (FeatureSequence) test.get(di).instance.getData();
        LabelSequence topicSequence = test.get(di).topicSequence;
        sampleTopicsForOneTestDocAll (tokenSequence, topicSequence);
      }
    }

View Full Code Here

      if (false) {
        // This method not yet obeying its last "false" argument, and must be for this to work
        //sampleTopicsForOneDoc((FeatureSequence)instance.getData(), topicSequence, false, false);
      } else {
        Randoms r = new Randoms();
        FeatureSequence fs = (FeatureSequence) instance.getData();
        int[] topics = topicSequence.getFeatures();
        for (int i = 0; i < topics.length; i++) {
          int type = fs.getIndexAtPosition(i);
          topics[i] = r.nextInt(numTopics);
        /*  if(typeTopicCounts[type].size() != 0) {
            topics[i] = r.nextInt(numTopics);
          } else {
            topics[i] = -1;  // for unseen words
          }*/
        }
      }
      topicSequences.add (topicSequence);
    }


    //construct test
    assert (testing.size() == topicSequences.size());
    for (int i = 0; i < testing.size(); i++) {
      Topication t = new Topication (testing.get(i), this, topicSequences.get(i));
      test.add (t);
      // Include sufficient statistics for this one doc
      // add count on new data to n[k][w] and n[k][*]
      // pay attention to unseen words
      FeatureSequence tokenSequence = (FeatureSequence) t.instance.getData();
      LabelSequence topicSequence = t.topicSequence;
      for (int pi = 0; pi < topicSequence.getLength(); pi++) {
        int topic = topicSequence.getIndexAtPosition(pi);
        int type = tokenSequence.getIndexAtPosition(pi);
        if(topic != -1) // type seen in training
        {
          typeTopicCounts[type].adjustOrPutValue(topic, 1, 1);
            tokensPerTopic[topic]++;
        }
      }
    }


    long startTime = System.currentTimeMillis();
    //loop
    int iter = 0;
    for ( ; iter <= maxIteration; iter++) {
      if(iter%100==0)
      {
        System.out.print("Iteration: " + iter);
        System.out.println();
      }
      int numDocs = test.size(); // TODO
      for (int di = 0; di < numDocs; di++) {
        FeatureSequence tokenSequence = (FeatureSequence) test.get(di).instance.getData();
        LabelSequence topicSequence = test.get(di).topicSequence;
        sampleTopicsForOneTestDoc (tokenSequence, topicSequence);
      }
    }

View Full Code Here

      if (false) {
        // This method not yet obeying its last "false" argument, and must be for this to work
        //sampleTopicsForOneDoc((FeatureSequence)instance.getData(), topicSequence, false, false);
      } else {
        Randoms r = new Randoms();
        FeatureSequence fs = (FeatureSequence) instance.getData();
        int[] topics = topicSequence.getFeatures();
        for (int i = 0; i < topics.length; i++) {
          int type = fs.getIndexAtPosition(i);
          topics[i] = r.nextInt(numTopics);
          typeTopicCounts[type].adjustOrPutValue(topics[i], 1, 1);
          tokensPerTopic[topics[i]]++;
        /*  if(typeTopicCounts[type].size() != 0) {
            topics[i] = r.nextInt(numTopics);
            typeTopicCounts[type].adjustOrPutValue(topics[i], 1, 1);
            tokensPerTopic[topics[i]]++;
          } else {
            topics[i] = -1;  // for unseen words
          }*/
        }
      }
      topicSequences.add (topicSequence);
    }


    //construct test
    assert (testing.size() == topicSequences.size());
    for (int i = 0; i < testing.size(); i++) {
      Topication t = new Topication (testing.get(i), this, topicSequences.get(i));
      test.add (t);
    }


    long startTime = System.currentTimeMillis();
    //loop
    int iter = 0;
    int numDocs = test.size(); // TODO
    for (int di = 0; di < numDocs; di++) {
      iter = 0;
      FeatureSequence tokenSequence = (FeatureSequence) test.get(di).instance.getData();
      LabelSequence topicSequence = test.get(di).topicSequence;
      for( ; iter <= maxIteration; iter++) {
        sampleTopicsForOneTestDoc (tokenSequence, topicSequence);
      }
      if(di%100==0)

View Full Code Here

      if (false) {
        // This method not yet obeying its last "false" argument, and must be for this to work
        //sampleTopicsForOneDoc((FeatureSequence)instance.getData(), topicSequence, false, false);
      } else {
        Randoms r = new Randoms();
        FeatureSequence fs = (FeatureSequence) instance.getData();
        int[] topics = topicSequence.getFeatures();
        for (int i = 0; i < topics.length; i++) {
          int type = fs.getIndexAtPosition(i);
          topics[i] = r.nextInt(numTopics);
        }
      }
      topicSequences.add (topicSequence);
    }


    //construct test
    assert (testing.size() == topicSequences.size());
    for (int i = 0; i < testing.size(); i++) {
      Topication t = new Topication (testing.get(i), this, topicSequences.get(i));
      test.add (t);
      // Include sufficient statistics for this one doc
      // add count on new data to n[k][w] and n[k][*]
      // pay attention to unseen words
      FeatureSequence tokenSequence = (FeatureSequence) t.instance.getData();
      LabelSequence topicSequence = t.topicSequence;
      for (int pi = 0; pi < topicSequence.getLength(); pi++) {
        int topic = topicSequence.getIndexAtPosition(pi);
        int type = tokenSequence.getIndexAtPosition(pi);
        if(topic != -1) // type seen in training
        {
          typeTopicCounts[type].adjustOrPutValue(topic, 1, 1);
            tokensPerTopic[topic]++;
        }
      }
    }


    long startTime = System.currentTimeMillis();
    //loop
    int iter = 0;
    for ( ; iter <= maxIteration; iter++) {
      if(iter%100==0)
      {
        System.out.print("Iteration: " + iter);
        System.out.println();
      }
      int numDocs = test.size(); // TODO
      for (int di = 0; di < numDocs; di++) {
        FeatureVector fvTheta = (FeatureVector) theta.get(di).getData();
        double[] topicDistribution = fvTheta.getValues();
        FeatureSequence tokenSequence = (FeatureSequence) test.get(di).instance.getData();
        LabelSequence topicSequence = test.get(di).topicSequence;
        sampleTopicsForOneDocWithTheta (tokenSequence, topicSequence, topicDistribution);
      }
    }

View Full Code Here

  public void printState (ArrayList<Topication> dataset, PrintStream out) {


    out.println ("#doc source pos typeindex type topic");


    for (int di = 0; di < dataset.size(); di++) {
      FeatureSequence tokenSequence =  (FeatureSequence) dataset.get(di).instance.getData();
      LabelSequence topicSequence =  dataset.get(di).topicSequence;


      String source = "NA";
      if (dataset.get(di).instance.getSource() != null) {
        source = dataset.get(di).instance.getSource().toString();
      }


      for (int pi = 0; pi < topicSequence.getLength(); pi++) {
        int type = tokenSequence.getIndexAtPosition(pi);
        int topic = topicSequence.getIndexAtPosition(pi);
        out.print(di); out.print(' ');
        out.print(source); out.print(' ');
        out.print(pi); out.print(' ');
        out.print(type); out.print(' ');

View Full Code Here

  public FeatureSequence randomFeatureSequence (Randoms r, int length)
  {
    if (! (dictionary instanceof Alphabet))
      throw new UnsupportedOperationException
        ("Multinomial's dictionary must be a Alphabet");
    FeatureSequence fs = new FeatureSequence ((Alphabet)dictionary, length);
    while (length-- > 0)
      fs.add (randomIndex (r));
    return fs;
  }

View Full Code Here

  }
  
  
  public Instance pipe (Instance carrier)
  {
    FeatureSequence fs = (FeatureSequence) carrier.getData();
    carrier.setData(new FeatureVector (fs, binary));
    return carrier;
  }

View Full Code Here

    }
  }*/
  
  public Instance pipe (Instance carrier)
  {
    FeatureSequence fseq = (FeatureSequence) carrier.getData();
    FeatureSequence ret =
      new FeatureSequence ((Alphabet)getDataAlphabet());
    int i,j, curLen;
    curLen=fseq.getLength();
    //first add fseq to ret
    for(i = 0; i < curLen; i++) {
      ret.add(fseq.getObjectAtPosition(i));
    }
    //second word co-occurrence
    int pre, cur;
    Object coO;
    for(i = 0; i < curLen-1; i++) {
      for(j = i + 1; j < curLen; j++) {
        pre = fseq.getIndexAtPosition(i);
        cur = fseq.getIndexAtPosition(j);
        coO = pre + "_" + cur;
        ret.add(coO);
      }
    }
    if(carrier.isLocked()) {
      carrier.unLock();
    }

View Full Code Here

      new FeatureVector(crf.getInputAlphabet(), new int[]{1, 2, 3}, new double[]{1, 1, 1}),
      new FeatureVector(crf.getInputAlphabet(), new int[]{1, 2, 3}, new double[]{1, 1, 1}),
      new FeatureVector(crf.getInputAlphabet(), new int[]{1, 2, 3}, new double[]{1, 1, 1}),
      new FeatureVector(crf.getInputAlphabet(), new int[]{1, 2, 3}, new double[]{1, 1, 1}),
    });
    FeatureSequence ss = new FeatureSequence(crf.getOutputAlphabet(), new int[]{0, 1, 2, 3});
    InstanceList ilist = new InstanceList(null);
    ilist.add(fvs, ss, null, null);


    crf.addFullyConnectedStates(stateNames);

View Full Code Here

    assert (constraints.structureMatches(crf.parameters));
    constraints.zero();


    for (Instance instance : ilist) {
      FeatureVectorSequence input = (FeatureVectorSequence) instance.getData();
      FeatureSequence output = (FeatureSequence) instance.getTarget();
      double instanceWeight = ilist.getInstanceWeight(instance);
      Transducer.Incrementor incrementor =
        instanceWeight == 1.0 ? constraints.new Incrementor()
      : constraints.new WeightedIncrementor(instanceWeight);
        new SumLatticeDefault (this.crf, input, output, incrementor);

View Full Code Here

0 1 2 3

TOP

Related Classes of cc.mallet.types.FeatureSequence

cc.mallet.fst.CRF

cc.mallet.fst.CRFOptimizableByBatchLabelLikelihood

cc.mallet.fst.CRFOptimizableByLabelLikelihood

cc.mallet.fst.HMM

cc.mallet.fst.HMMTrainerByLikelihood

cc.mallet.fst.MEMMTrainer

cc.mallet.fst.tests.TestCRF

cc.mallet.fst.tests.TestCRF$TestCRFTokenSequenceRemoveSpaces

cc.mallet.fst.tests.TestMEMM

cc.mallet.pipe.FeatureSequence2FeatureVector

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.