Examples of cc.mallet.types.FeatureVector

cc.mallet.types.FeatureVector
A subset of an {@link cc.mallet.types.Alphabet} in which each element of the subset has an associated value.The subset is represented as a {@link cc.mallet.types.SparseVector}
A SparseVector represents only the non-zero locations of a vector. In the case of a FeatureVector, a location represents the index of an entry in the Alphabet that is contained in the FeatureVector.
To loop over the elements of a feature vector, one loops over the consecutive integers between 0 and the number of locations in the feature vector. From these locations one can cheaply obtain the index of the entry in the underlying Alphabet, the entry itself, and the value in this feature vector associated the entry.
A SparseVector (or FeatureVector) can be sparse or dense depending on whether or not an array if indices is specified at construction time. If the FeatureVector is dense, the mapping from location to index is the identity mapping.
The associated value of an element in a SparseVector (or FeatureVector) can be a double or binary (0.0 or 1.0), depending on whether an array of doubles is specified at contruction time. @see SparseVector @see Alphabet @author Andrew McCallum mccallum@cs.umass.edu

    public Classification classify (Instance instance)
    {
        int numClasses = getLabelAlphabet().size();
        int numFeats = getAlphabet().size();
        double[] scores = new double[numClasses];
        FeatureVector fv = (FeatureVector) instance.getData ();


        // Make sure the feature vector's feature dictionary matches
        // what we are expecting from our data pipe (and thus our notion
        // of feature probabilities.
        assert (instancePipe == null || fv.getAlphabet () == this.instancePipe.getDataAlphabet ());
        int fvisize = fv.numLocations();


        // Take dot products
        double sum = 0;
        for (int ci = 0; ci < numClasses; ci++) {
      for (int fvi = 0; fvi < fvisize; fvi++) {
        int fi = fv.indexAtLocation (fvi);
        double vi = fv.valueAtLocation(fvi);


        if ( m_weights[ci].length > fi ) {
        scores[ci] += vi * m_weights[ci][fi];
        sum += vi * m_weights[ci][fi];
        }

View Full Code Here

    //double sumLogP = 0;
    
    for (int ii = 0; ii < trainingData.size(); ii++) {
      double[] scores = new double[numLabels];
      Instance instance = trainingData.get(ii);
      FeatureVector input = (FeatureVector) instance.getData ();
      double instanceWeight = trainingData.getInstanceWeight(ii);
      
      classifier.getClassificationScores(instance, scores);
      double logZ = Double.NEGATIVE_INFINITY;

View Full Code Here

      logger.fine("Number of instances in training list = " + trainingList.size());
      for (Instance inst : trainingList) {
        double instanceWeight = trainingList.getInstanceWeight(inst);
        Labeling labeling = inst.getLabeling ();
        //logger.fine ("Instance "+ii+" labeling="+labeling);
        FeatureVector fv = (FeatureVector) inst.getData ();
        Alphabet fdict = fv.getAlphabet();
        assert (fv.getAlphabet() == fd);
        int li = labeling.getBestIndex();
        // The "2*" below is because there is one copy for the p(y|x)and another for the p(x|y).
        MatrixOps.rowPlusEquals (constraints, numFeatures, li, fv, 2*instanceWeight);
        // For the default feature, whose weight is 1.0
        assert(!Double.isNaN(instanceWeight)) : "instanceWeight is NaN";
        assert(!Double.isNaN(li)) : "bestIndex is NaN";
        boolean hasNaN = false;
        for(int i = 0; i < fv.numLocations(); i++) {
          if(Double.isNaN(fv.valueAtLocation(i))) {
            logger.info("NaN for feature " + fdict.lookupObject(fv.indexAtLocation(i)).toString());
            hasNaN = true;
          }
        }
        if(hasNaN)
          logger.info("NaN in instance: " + inst.getName());

View Full Code Here

          double instanceWeight = trainingList.getInstanceWeight(instance);
          Labeling labeling = instance.getLabeling ();
          //System.out.println("L Now "+inputAlphabet.size()+" regular features.");


          this.theClassifier.getClassificationScores (instance, scores);
          FeatureVector fv = (FeatureVector) instance.getData ();
          int li = labeling.getBestIndex();
          value = - (instanceWeight * Math.log (scores[li]));
          if(Double.isNaN(value)) {
            logger.fine ("MCMaxEntTrainer: Instance " + instance.getName() +
                         "has NaN value. log(scores)= " + Math.log(scores[li]) +
                         " scores = " + scores[li] +
                         " has instance weight = " + instanceWeight);


          }
          if (Double.isInfinite(value)) {
            logger.warning ("Instance "+instance.getSource() + " has infinite value; skipping value and gradient");
            cachedValue -= value;
            cachedValueStale = false;
            return -value;
//            continue;
          }
          cachedValue += value;
          // CPAL - this is a loop over classes and their scores
          //      - we compute the gradient by taking the dot product of the feature value
          //        and the probability of the class
          for (int si = 0; si < scores.length; si++) {
            if (scores[si] == 0) continue;
            assert (!Double.isInfinite(scores[si]));
            // CPAL - accumulating the current classifiers expectation of the feature
            // vector counts for this class label
            // Current classifier has expectation over class label, not over feature vector
            MatrixOps.rowPlusEquals (cachedGradient, numFeatures,
                                     si, fv, -instanceWeight * scores[si]);
            cachedGradient[numFeatures*si + defaultFeatureIndex] += (-instanceWeight * scores[si]);
          }


          // CPAL - if we wish to do multiconditional training we need another term for this accumulated
          //        expectation
          if (usingMultiConditionalTraining) {
            // need something analogous to this
            // this.theClassifier.getClassificationScores (instance, scores);
            // this.theClassifier.getFeatureDistributions (instance,
            // Note: li is the "label" for this instance


            // Get the sum of the feature vector
            // which is the number of counts for the document if we use that as input
            double Ncounts = MatrixOps.sum(fv);


            // CPAL - get the additional term for the value of our - log probability
            //      - this computation amounts to the dot product of the feature vector and the probability vector
            cachedValue -= (instanceWeight * fv.dotProduct(lprobs[li]));


            // CPAL - get the model expectation over features for the given class
            for (int fi = 0; fi < numFeatures; fi++) {


              //if(parameters[numFeatures*li + fi] != 0) {

View Full Code Here

    pw.println ();
  }


  public Instance toInstance() {
    Instance ret;
    FeatureVector fv;
    double[] values = new double[labeling.numLocations()];
    int[] indices = new int[labeling.numLocations()];
    for(int i = 0; i < labeling.numLocations(); i++){
      indices[i] = labeling.indexAtLocation(i);
      values[i] = labeling.valueAtLocation(i);
    }
    fv = new FeatureVector(labeling.getAlphabet(), indices, values);
    ret = new Instance(fv,null,null,instance.getSource());
    return ret;
  }

View Full Code Here

    URI uri = null;
    try { uri = new URI ("random:" + classNames[currentClassIndex] + "/" + currentInstanceIndex); }
    catch (Exception e) {e.printStackTrace(); throw new IllegalStateException (); }
    //xxx Producing small numbers? int randomSize = r.nextPoisson (featureVectorSizePoissonLambda);
    int randomSize = (int)featureVectorSizePoissonLambda;
    FeatureVector fv = classCentroid[currentClassIndex].randomFeatureVector (r, randomSize);
    //logger.fine ("FeatureVector "+currentClassIndex+" "+currentInstanceIndex); fv.print();
    currentInstanceIndex--;
    return new Instance (fv, classNames[currentClassIndex], uri, null);
  }

View Full Code Here

        System.out.print("Iteration: " + iter);
        System.out.println();
      }
      int numDocs = test.size(); // TODO
      for (int di = 0; di < numDocs; di++) {
        FeatureVector fvTheta = (FeatureVector) theta.get(di).getData();
        double[] topicDistribution = fvTheta.getValues();
        FeatureSequence tokenSequence = (FeatureSequence) test.get(di).instance.getData();
        LabelSequence topicSequence = test.get(di).topicSequence;
        sampleTopicsForOneDocWithTheta (tokenSequence, topicSequence, topicDistribution);
      }
    }

View Full Code Here

    }
    
    for(int ti = 0; ti < numTopics; ti++){
      pw.println("Topic\t" + ti);
      FeatureCounter counter = wordCountsPerTopic[ti];
      FeatureVector fv = counter.toFeatureVector();
      for(int pos = 0; pos < fv.numLocations(); pos++){
        int fi = fv.indexAtLocation(pos);
        String word = (String) alphabet.lookupObject(fi);
        int count = (int) fv.valueAtLocation(pos);
        double prob;
        prob = (double) (count+beta)/(tokensPerTopic[ti] + betaSum);
        pw.println(word + "\t" + prob);
      }
      pw.println();

View Full Code Here

  // find examples that contain constrained input features
  public BitSet preProcess(InstanceList data) {
    // count
    int ii = 0;
    int fi;
    FeatureVector fv;
    BitSet bitSet = new BitSet(data.size());
    for (Instance instance : data) {
      FeatureVectorSequence fvs = (FeatureVectorSequence)instance.getData();
      for (int ip = 0; ip < fvs.size(); ip++) {
        fv = fvs.get(ip);
        for (int loc = 0; loc < fv.numLocations(); loc++) {
          fi = fv.indexAtLocation(loc);
          if (constraints.containsKey(fi)) {
            constraints.get(fi).count += 1;
            bitSet.set(ii);
          }
        }

View Full Code Here

  // find examples that contain constrained input features
  public BitSet preProcess(InstanceList data) {
    // count
    int ii = 0;
    int fi;
    FeatureVector fv;
    BitSet bitSet = new BitSet(data.size());
    for (Instance instance : data) {
      FeatureVectorSequence fvs = (FeatureVectorSequence)instance.getData();
      for (int ip = 0; ip < fvs.size(); ip++) {
        fv = fvs.get(ip);
        for (int loc = 0; loc < fv.numLocations(); loc++) {
          fi = fv.indexAtLocation(loc);
          if (constraints.containsKey(fi)) {
            constraints.get(fi).count += 1;
            bitSet.set(ii);
          }
        }
        if (constraints.containsKey(fv.getAlphabet().size())) {
          bitSet.set(ii);
          constraints.get(fv.getAlphabet().size()).count += 1;
        }
      }


      ii++;
    }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of cc.mallet.types.FeatureVector

cc.mallet.classify.BalancedWinnow

cc.mallet.classify.BalancedWinnowTrainer

cc.mallet.classify.C45

cc.mallet.classify.C45$Node

cc.mallet.classify.Classification

cc.mallet.classify.constraints.ge.MaxEntFLGEConstraints

cc.mallet.classify.constraints.ge.MaxEntRangeL2FLGEConstraints

cc.mallet.classify.constraints.pr.MaxEntFLPRConstraints

cc.mallet.classify.DecisionTree

cc.mallet.classify.DecisionTree$Node

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.