Package cc.mallet.types

Examples of cc.mallet.types.Instance


      if (ilist == null)
        throw new IllegalStateException ("Frozen.  Cannot split.");
      InstanceList ilist0 = new InstanceList (ilist.getPipe());
      InstanceList ilist1 = new InstanceList (ilist.getPipe());
      for (int i = 0; i < ilist.size(); i++) {
        Instance instance = ilist.get(i);
        FeatureVector fv = (FeatureVector) instance.getData ();
        // xxx What test should this be?  What to do with negative values?
          // Whatever is decided here should also go in InfoGain.calcInfoGains()
        if (fv.value (featureIndex) != 0) {
          //System.out.println ("list1 add "+instance.getUri()+" weight="+ilist.getInstanceWeight(i));
          ilist1.add (instance, ilist.getInstanceWeight(i));
View Full Code Here


      MatrixOps.expNormalize(scores);
  
      entropy += Maths.getEntropy(scores);

      LabelVector lv = new LabelVector((LabelAlphabet)data.getTargetAlphabet(), scores);
      Instance instance = new Instance(data.get(ii).getData(),lv,null,null);
      dataLabeled.add(instance);
    }
   
    // train supervised
    MaxEntOptimizableByLabelDistribution opt = new  MaxEntOptimizableByLabelDistribution(dataLabeled,p);
View Full Code Here

    double[] results = new double[numLabels];
    for (int iter = 0; iter < m_maxIterations; iter++) {

      // loop through all instances
      for (int ii = 0; ii < trainingList.size(); ii++) {
        Instance inst = trainingList.get(ii);
        Labeling labeling = inst.getLabeling ();
        FeatureVector fv = (FeatureVector) inst.getData();
        int fvisize = fv.numLocations();
        int correctIndex = labeling.getBestIndex();
        Arrays.fill(results, 0);

        // compute dot(x, wi) for each class i
View Full Code Here

        if (testSequence.size() < 1) {
            return new Pair<Double, Sequence<?>>(-1.0, null);
        }

        Instance inst = testSequence.get(0);
        input = (Sequence<?>) inst.getData();

        output = crf.transduce(input);
        conf = crf_estimator.estimateConfidenceFor(inst, startTags, inTags);

        return new Pair<Double, Sequence<?>>(conf, output);
View Full Code Here

  private boolean[][] labelConnectionsIn (InstanceList trainingSet, String start)
  {
    int numLabels = outputAlphabet.size();
    boolean[][] connections = new boolean[numLabels][numLabels];
    for (int i = 0; i < trainingSet.size(); i++) {
      Instance instance = trainingSet.get(i);
      FeatureSequence output = (FeatureSequence) instance.getTarget();
      for (int j = 1; j < output.size(); j++) {
        int sourceIndex = outputAlphabet.lookupIndex (output.get(j-1));
        int destIndex = outputAlphabet.lookupIndex (output.get(j));
        assert (sourceIndex >= 0 && destIndex >= 0);
        connections[sourceIndex][destIndex] = true;
View Full Code Here

    for (int i = 0; i < parameters.weights.length; i++)
      for (int j = parameters.weights[i].numLocations()-1; j >= 0; j--)
        weightsPresent[i].set (parameters.weights[i].indexAtLocation(j));
    // Put in the weights in the training set
    for (int i = 0; i < trainingData.size(); i++) {
      Instance instance = trainingData.get(i);
      FeatureVectorSequence input = (FeatureVectorSequence) instance.getData();
      FeatureSequence output = (FeatureSequence) instance.getTarget();
      // gsc: trainingData can have unlabeled instances as well
      if (output != null && output.size() > 0) {
        // Do it for the paths consistent with the labels...
        sumLatticeFactory.newSumLattice (this, input, output, new Transducer.Incrementor() {
          public void incrementTransition (Transducer.TransitionIterator ti, double count) {
View Full Code Here

      FeatureInducer klfi = (FeatureInducer)featureInducers.get(i);
      klfi.induceFeaturesFor (testing, false, false);
    }
    Sequence[] ret = new Sequence[testing.size()];
    for (int i = 0; i < testing.size(); i++) {
      Instance instance = testing.get(i);
      Sequence input = (Sequence) instance.getData();
      Sequence trueOutput = (Sequence) instance.getTarget();
      assert (input.size() == trueOutput.size());
      Sequence predOutput = new MaxLatticeDefault(this, input).bestOutputSequence();
      assert (predOutput.size() == trueOutput.size());
      ret[i] = predOutput;
    }
View Full Code Here

        new SGML2TokenSequence()
//        new SGML2TokenSequence (new CharSequenceLexer (Pattern.compile (".")), "O")
        });

      for (int i = 0; i < args.length; i++) {
        Instance carrier = p.instanceFrom(new Instance (new File(args[i]), null, null, null));
        TokenSequence data = (TokenSequence) carrier.getData();
        TokenSequence target = (TokenSequence) carrier.getTarget();
        logger.finer ("===");
        logger.info (args[i]);
        for (int j = 0; j < data.size(); j++)
          logger.info (target.get(j).getText()+" "+data.get(j).getText());
      }
View Full Code Here

    double[][] scores = new double[trainingList.size()][numLabels];
   
    // pass 1: calculate model distribution
    for (int ii = 0; ii < trainingList.size(); ii++) {
      Instance instance = trainingList.get(ii);
      double instanceWeight = trainingList.getInstanceWeight(instance);
     
      // skip if labeled
      if (instance.getTarget() != null) {
        continue;
      }
     
      FeatureVector fv = (FeatureVector) instance.getData();
      classifier.getClassificationScoresWithTemperature(instance, temperature, scores[ii]);
     
      for (int loc = 0; loc < fv.numLocations(); loc++) {
        int featureIndex = fv.indexAtLocation(loc);
        if (constraints.containsKey(featureIndex)) {
          int cIndex = mapping.get(featureIndex);           
          double val;
          if (!useValues) {
            val = 1.;
          }
          else {
            val = fv.valueAtLocation(loc);
          }
          featureCounts[cIndex] += val;
          for (int l = 0; l < numLabels; l++) {
            modelExpectations[cIndex][l] += scores[ii][l] * val * instanceWeight;
          }
        }
      }
     
      // special case of label regularization
      if (constraints.containsKey(defaultFeatureIndex)) {
        int cIndex = mapping.get(defaultFeatureIndex);
        featureCounts[cIndex] += 1;
        for (int l = 0; l < numLabels; l++) {
          modelExpectations[cIndex][l] += scores[ii][l] * instanceWeight;
        }       
      }
    }
   
    double value = 0;
    for (int featureIndex : constraints.keySet()) {
      int cIndex = mapping.get(featureIndex);
      if (featureCounts[cIndex] > 0) {
        for (int label = 0; label < numLabels; label++) {
          double cProb = constraints.get(featureIndex)[label];
          // normalize by count
          modelExpectations[cIndex][label] /= featureCounts[cIndex];
          ratio[cIndex][label] =  cProb / modelExpectations[cIndex][label];
          // add to the cross entropy term
          value += scalingFactor * cProb * Math.log(modelExpectations[cIndex][label]);
          // add to the entropy term
          if (cProb > 0) {
            value -= scalingFactor * cProb * Math.log(cProb);
          }
        }
        assert(Maths.almostEquals(MatrixOps.sum(modelExpectations[cIndex]),1));
      }
    }

    // pass 2: determine per example gradient
    for (int ii = 0; ii < trainingList.size(); ii++) {
      Instance instance = trainingList.get(ii);
     
      // skip if labeled
      if (instance.getTarget() != null) {
        continue;
      }
     
      double instanceWeight = trainingList.getInstanceWeight(instance);
      FeatureVector fv = (FeatureVector) instance.getData();

      for (int loc = 0; loc < fv.numLocations() + 1; loc++) {
        int featureIndex;
        if (loc == fv.numLocations()) {
          featureIndex = defaultFeatureIndex;
View Full Code Here

    Iterator<Integer> keyIter = labeledFeatures.keySet().iterator();
   
    double[][] featureCounts = new double[labeledFeatures.size()][numLabels];
    for (int ii = 0; ii < trainingData.size(); ii++) {
      Instance instance = trainingData.get(ii);
      FeatureVector fv = (FeatureVector)instance.getData();
      Labeling labeling = trainingData.get(ii).getLabeling();
      double[] labelDist = new double[numLabels];
     
      if (labeling == null) {
        labelByVoting(labeledFeatures,instance,labelDist);
View Full Code Here

TOP

Related Classes of cc.mallet.types.Instance

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.