Package cc.mallet.types

Examples of cc.mallet.types.FeatureVector


    public Classification classify (Instance instance)
    {
        int numClasses = getLabelAlphabet().size();
        int numFeats = getAlphabet().size();
        double[] scores = new double[numClasses];
        FeatureVector fv = (FeatureVector) instance.getData ();

        // Make sure the feature vector's feature dictionary matches
        // what we are expecting from our data pipe (and thus our notion
        // of feature probabilities.
        assert (instancePipe == null || fv.getAlphabet () == this.instancePipe.getDataAlphabet ());
        int fvisize = fv.numLocations();

        // Take dot products
        double sum = 0;
        for (int ci = 0; ci < numClasses; ci++) {
      for (int fvi = 0; fvi < fvisize; fvi++) {
        int fi = fv.indexAtLocation (fvi);
        double vi = fv.valueAtLocation(fvi);

        if ( m_weights[ci].length > fi ) {
        scores[ci] += vi * m_weights[ci][fi];
        sum += vi * m_weights[ci][fi];
        }
 
View Full Code Here


    //double sumLogP = 0;
   
    for (int ii = 0; ii < trainingData.size(); ii++) {
      double[] scores = new double[numLabels];
      Instance instance = trainingData.get(ii);
      FeatureVector input = (FeatureVector) instance.getData ();
      double instanceWeight = trainingData.getInstanceWeight(ii);
     
      classifier.getClassificationScores(instance, scores);
      double logZ = Double.NEGATIVE_INFINITY;
     
View Full Code Here

      logger.fine("Number of instances in training list = " + trainingList.size());
      for (Instance inst : trainingList) {
        double instanceWeight = trainingList.getInstanceWeight(inst);
        Labeling labeling = inst.getLabeling ();
        //logger.fine ("Instance "+ii+" labeling="+labeling);
        FeatureVector fv = (FeatureVector) inst.getData ();
        Alphabet fdict = fv.getAlphabet();
        assert (fv.getAlphabet() == fd);
        int li = labeling.getBestIndex();
        // The "2*" below is because there is one copy for the p(y|x)and another for the p(x|y).
        MatrixOps.rowPlusEquals (constraints, numFeatures, li, fv, 2*instanceWeight);
        // For the default feature, whose weight is 1.0
        assert(!Double.isNaN(instanceWeight)) : "instanceWeight is NaN";
        assert(!Double.isNaN(li)) : "bestIndex is NaN";
        boolean hasNaN = false;
        for(int i = 0; i < fv.numLocations(); i++) {
          if(Double.isNaN(fv.valueAtLocation(i))) {
            logger.info("NaN for feature " + fdict.lookupObject(fv.indexAtLocation(i)).toString());
            hasNaN = true;
          }
        }
        if(hasNaN)
          logger.info("NaN in instance: " + inst.getName());
View Full Code Here

          double instanceWeight = trainingList.getInstanceWeight(instance);
          Labeling labeling = instance.getLabeling ();
          //System.out.println("L Now "+inputAlphabet.size()+" regular features.");

          this.theClassifier.getClassificationScores (instance, scores);
          FeatureVector fv = (FeatureVector) instance.getData ();
          int li = labeling.getBestIndex();
          value = - (instanceWeight * Math.log (scores[li]));
          if(Double.isNaN(value)) {
            logger.fine ("MCMaxEntTrainer: Instance " + instance.getName() +
                         "has NaN value. log(scores)= " + Math.log(scores[li]) +
                         " scores = " + scores[li] +
                         " has instance weight = " + instanceWeight);

          }
          if (Double.isInfinite(value)) {
            logger.warning ("Instance "+instance.getSource() + " has infinite value; skipping value and gradient");
            cachedValue -= value;
            cachedValueStale = false;
            return -value;
//            continue;
          }
          cachedValue += value;
          // CPAL - this is a loop over classes and their scores
          //      - we compute the gradient by taking the dot product of the feature value
          //        and the probability of the class
          for (int si = 0; si < scores.length; si++) {
            if (scores[si] == 0) continue;
            assert (!Double.isInfinite(scores[si]));
            // CPAL - accumulating the current classifiers expectation of the feature
            // vector counts for this class label
            // Current classifier has expectation over class label, not over feature vector
            MatrixOps.rowPlusEquals (cachedGradient, numFeatures,
                                     si, fv, -instanceWeight * scores[si]);
            cachedGradient[numFeatures*si + defaultFeatureIndex] += (-instanceWeight * scores[si]);
          }

          // CPAL - if we wish to do multiconditional training we need another term for this accumulated
          //        expectation
          if (usingMultiConditionalTraining) {
            // need something analogous to this
            // this.theClassifier.getClassificationScores (instance, scores);
            // this.theClassifier.getFeatureDistributions (instance,
            // Note: li is the "label" for this instance

            // Get the sum of the feature vector
            // which is the number of counts for the document if we use that as input
            double Ncounts = MatrixOps.sum(fv);

            // CPAL - get the additional term for the value of our - log probability
            //      - this computation amounts to the dot product of the feature vector and the probability vector
            cachedValue -= (instanceWeight * fv.dotProduct(lprobs[li]));

            // CPAL - get the model expectation over features for the given class
            for (int fi = 0; fi < numFeatures; fi++) {

              //if(parameters[numFeatures*li + fi] != 0) {
View Full Code Here

    pw.println ();
  }

  public Instance toInstance() {
    Instance ret;
    FeatureVector fv;
    double[] values = new double[labeling.numLocations()];
    int[] indices = new int[labeling.numLocations()];
    for(int i = 0; i < labeling.numLocations(); i++){
      indices[i] = labeling.indexAtLocation(i);
      values[i] = labeling.valueAtLocation(i);
    }
    fv = new FeatureVector(labeling.getAlphabet(), indices, values);
    ret = new Instance(fv,null,null,instance.getSource());
    return ret;
  }
View Full Code Here

    URI uri = null;
    try { uri = new URI ("random:" + classNames[currentClassIndex] + "/" + currentInstanceIndex); }
    catch (Exception e) {e.printStackTrace(); throw new IllegalStateException (); }
    //xxx Producing small numbers? int randomSize = r.nextPoisson (featureVectorSizePoissonLambda);
    int randomSize = (int)featureVectorSizePoissonLambda;
    FeatureVector fv = classCentroid[currentClassIndex].randomFeatureVector (r, randomSize);
    //logger.fine ("FeatureVector "+currentClassIndex+" "+currentInstanceIndex); fv.print();
    currentInstanceIndex--;
    return new Instance (fv, classNames[currentClassIndex], uri, null);
  }
View Full Code Here

        System.out.print("Iteration: " + iter);
        System.out.println();
      }
      int numDocs = test.size(); // TODO
      for (int di = 0; di < numDocs; di++) {
        FeatureVector fvTheta = (FeatureVector) theta.get(di).getData();
        double[] topicDistribution = fvTheta.getValues();
        FeatureSequence tokenSequence = (FeatureSequence) test.get(di).instance.getData();
        LabelSequence topicSequence = test.get(di).topicSequence;
        sampleTopicsForOneDocWithTheta (tokenSequence, topicSequence, topicDistribution);
      }
    }
View Full Code Here

    }
   
    for(int ti = 0; ti < numTopics; ti++){
      pw.println("Topic\t" + ti);
      FeatureCounter counter = wordCountsPerTopic[ti];
      FeatureVector fv = counter.toFeatureVector();
      for(int pos = 0; pos < fv.numLocations(); pos++){
        int fi = fv.indexAtLocation(pos);
        String word = (String) alphabet.lookupObject(fi);
        int count = (int) fv.valueAtLocation(pos);
        double prob;
        prob = (double) (count+beta)/(tokensPerTopic[ti] + betaSum);
        pw.println(word + "\t" + prob);
      }
      pw.println();
View Full Code Here

  // find examples that contain constrained input features
  public BitSet preProcess(InstanceList data) {
    // count
    int ii = 0;
    int fi;
    FeatureVector fv;
    BitSet bitSet = new BitSet(data.size());
    for (Instance instance : data) {
      FeatureVectorSequence fvs = (FeatureVectorSequence)instance.getData();
      for (int ip = 0; ip < fvs.size(); ip++) {
        fv = fvs.get(ip);
        for (int loc = 0; loc < fv.numLocations(); loc++) {
          fi = fv.indexAtLocation(loc);
          if (constraints.containsKey(fi)) {
            constraints.get(fi).count += 1;
            bitSet.set(ii);
          }
        }
View Full Code Here

  // find examples that contain constrained input features
  public BitSet preProcess(InstanceList data) {
    // count
    int ii = 0;
    int fi;
    FeatureVector fv;
    BitSet bitSet = new BitSet(data.size());
    for (Instance instance : data) {
      FeatureVectorSequence fvs = (FeatureVectorSequence)instance.getData();
      for (int ip = 0; ip < fvs.size(); ip++) {
        fv = fvs.get(ip);
        for (int loc = 0; loc < fv.numLocations(); loc++) {
          fi = fv.indexAtLocation(loc);
          if (constraints.containsKey(fi)) {
            constraints.get(fi).count += 1;
            bitSet.set(ii);
          }
        }
        if (constraints.containsKey(fv.getAlphabet().size())) {
          bitSet.set(ii);
          constraints.get(fv.getAlphabet().size()).count += 1;
        }
      }

      ii++;
    }
View Full Code Here

TOP

Related Classes of cc.mallet.types.FeatureVector

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.