Package cc.mallet.types

Examples of cc.mallet.types.Alphabet


  {
    SparseVector v = makeSparseVectorToN (5);
    AugmentableFeatureVector afv = makeAfv (new int[] { 1, 3 }, true);
    double dp = afv.dotProduct (v);
    assertEquals (4.0, dp, 1e-5);
    new AugmentableFeatureVector (new Alphabet(), true);
  }
View Full Code Here


    assertEquals (7.0, dp, 1e-5);
  }

  private AugmentableFeatureVector makeAfv (int[] ints, boolean binary)
  {
    AugmentableFeatureVector afv = new AugmentableFeatureVector (new Alphabet(), binary);
    for (int i = 0; i < ints.length; i++) {
      int idx = ints[i];
      afv.add (idx, 1.0);
    }
    return afv;
View Full Code Here

  private static Logger logger = MalletLogger.getLogger(Array2FeatureVector.class.getName());

  public Array2FeatureVector(int capacity) {

    this.dataAlphabet = new Alphabet(capacity);

  }
View Full Code Here

    return new SparseVector (vals);
  }

  public void testAddWithPrefix ()
  {
    Alphabet dict = new Alphabet ();
    dict.lookupIndex ("ZERO");
    dict.lookupIndex ("ONE");
    dict.lookupIndex ("TWO");
    dict.lookupIndex ("THREE");

    FeatureVector fv = new FeatureVector (dict, new int[] { 1,3 });

    AugmentableFeatureVector afv = new AugmentableFeatureVector (new Alphabet (), true);
    afv.add (fv, "O:");

    assertEquals (4, dict.size());
    assertEquals (2, afv.getAlphabet ().size());
    assertEquals ("O:ONE\nO:THREE\n", afv.toString ());
  }
View Full Code Here

    public double [] initialWeights; // indexed by state index
    public double [] finalWeights; // indexed by state index
   
    /** Construct a new empty Factors with a new empty weightsAlphabet, 0-length initialWeights and finalWeights, and the other arrays null. */
    public Factors () {
      weightAlphabet = new Alphabet();
      initialWeights = new double[0];
      finalWeights = new double[0];
      // Leave the rest as null.  They will get set later by addState() and addWeight()
      // Alternatively, we could create zero-length arrays
    }
View Full Code Here

    // we will be reinitializing the weights
    // TODO: provide method to save weights
    trainingList.getDataAlphabet().stopGrowth();
    trainingList.getTargetAlphabet().stopGrowth();
    Pipe dataPipe = trainingList.getPipe ();
    Alphabet dict = (Alphabet) trainingList.getDataAlphabet ();
    int numLabels = trainingList.getTargetAlphabet().size();
    int numFeats = dict.size();
    this.theta =  numFeats * this.nfactor;
    this.weights = new double [numLabels][numFeats];
    // init weights to 1
    for(int i=0; i<numLabels; i++)
      for(int j=0; j<numFeats; j++)
View Full Code Here

  }

  public MaxEntOptimizableByLabelLikelihood (InstanceList trainingSet, MaxEnt initialClassifier)
  {
    this.trainingList = trainingSet;
    Alphabet fd = trainingSet.getDataAlphabet();
    LabelAlphabet ld = (LabelAlphabet) trainingSet.getTargetAlphabet();
    // Don't fd.stopGrowth, because someone might want to do feature induction
    ld.stopGrowth();
    // Add one feature for the "default feature".
    this.numLabels = ld.size();
    this.numFeatures = fd.size() + 1;
    this.defaultFeatureIndex = numFeatures-1;
    this.parameters = new double [numLabels * numFeatures];
    this.constraints = new double [numLabels * numFeatures];
    this.cachedGradient = new double [numLabels * numFeatures];
    Arrays.fill (parameters, 0.0);
    Arrays.fill (constraints, 0.0);
    Arrays.fill (cachedGradient, 0.0);
    this.featureSelection = trainingSet.getFeatureSelection();
    this.perLabelFeatureSelection = trainingSet.getPerLabelFeatureSelection();
    // Add the default feature index to the selection
    if (featureSelection != null)
      featureSelection.add (defaultFeatureIndex);
    if (perLabelFeatureSelection != null)
      for (int i = 0; i < perLabelFeatureSelection.length; i++)
        perLabelFeatureSelection[i].add (defaultFeatureIndex);
    // xxx Later change this to allow both to be set, but select which one to use by a boolean flag?
    assert (featureSelection == null || perLabelFeatureSelection == null);
    if (initialClassifier != null) {
      this.theClassifier = initialClassifier;
      this.parameters = theClassifier.parameters;
      this.featureSelection = theClassifier.featureSelection;
      this.perLabelFeatureSelection = theClassifier.perClassFeatureSelection;
      this.defaultFeatureIndex = theClassifier.defaultFeatureIndex;
      assert (initialClassifier.getInstancePipe() == trainingSet.getPipe());
    }
    else if (this.theClassifier == null) {
      this.theClassifier = new MaxEnt (trainingSet.getPipe(), parameters, featureSelection, perLabelFeatureSelection);
    }
    cachedValueStale = true;
    cachedGradientStale = true;

    // Initialize the constraints
    logger.fine("Number of instances in training list = " + trainingList.size());
    for (Instance inst : trainingList) {
      double instanceWeight = trainingList.getInstanceWeight(inst);
      Labeling labeling = inst.getLabeling ();
      if (labeling == null)
        continue;
      //logger.fine ("Instance "+ii+" labeling="+labeling);
      FeatureVector fv = (FeatureVector) inst.getData ();
      Alphabet fdict = fv.getAlphabet();
      assert (fv.getAlphabet() == fd);
      int li = labeling.getBestIndex();
      MatrixOps.rowPlusEquals (constraints, numFeatures, li, fv, instanceWeight);
      // For the default feature, whose weight is 1.0
      assert(!Double.isNaN(instanceWeight)) : "instanceWeight is NaN";
      assert(!Double.isNaN(li)) : "bestIndex is NaN";
      boolean hasNaN = false;
      for (int i = 0; i < fv.numLocations(); i++) {
        if(Double.isNaN(fv.valueAtLocation(i))) {
          logger.info("NaN for feature " + fdict.lookupObject(fv.indexAtLocation(i)).toString());
          hasNaN = true;
        }
      }
      if (hasNaN)
        logger.info("NaN in instance: " + inst.getName());
View Full Code Here

    return new LabelVector(labelAlphabet, allScores);
  }
 
  public void print ()
  {   
    final Alphabet dict = getAlphabet();
    final LabelAlphabet labelDict = (LabelAlphabet)getLabelAlphabet();
   
    int numFeatures = dict.size() + 1;
    int numLabels = labelDict.size();
   
    // Include the feature weights according to each label
    //for (int li = 0; li < numLabels; li++) {
    System.out.println ("FEATURES FOR CLASS "+labelDict.lookupObject (0));
    System.out.println (" <default> "+parameters [defaultFeatureIndex]);
    for (int i = 0; i < defaultFeatureIndex; i++) {
      Object name = dict.lookupObject (i);
      double weight = parameters [i];
      System.out.println (" "+name+" "+weight);
    }   
  } 
View Full Code Here

                        scores));
    }

  public void print ()
  {   
    final Alphabet dict = getAlphabet();
    final LabelAlphabet labelDict = getLabelAlphabet();
       
    int numFeatures = dict.size() + 1;
    int numLabels = labelDict.size();
   
     // Include the feature weights according to each label
     for (int li = 0; li < numLabels; li++) {
       System.out.println ("FEATURES FOR CLASS "+labelDict.lookupObject (li));
       System.out.println (" <default> "+parameters [li*numFeatures + defaultFeatureIndex]);
       for (int i = 0; i < defaultFeatureIndex; i++) {
         Object name = dict.lookupObject (i);
              double weight = parameters [li*numFeatures + i];
         System.out.println (" "+name+" "+weight);
       }
     }
  }
View Full Code Here

    public MaximizableTrainer (){}

    public MaximizableTrainer (InstanceList ilist, RankMaxEnt initialClassifier)
    {
      this.trainingList = ilist;
      Alphabet fd = ilist.getDataAlphabet();
      LabelAlphabet ld = (LabelAlphabet) ilist.getTargetAlphabet();
      // Don't fd.stopGrowth, because someone might want to do feature induction
      //ld.stopGrowth();
      // Add one feature for the "default feature".
      // assume underlying Instances are binary
      //this.numLabels = underlyingLabelAlphabet.size();
      // xxx
      this.numLabels = 2;

      this.numFeatures = fd.size() + 1;
      this.defaultFeatureIndex = numFeatures-1;
      this.parameters = new double [numLabels * numFeatures];
      this.constraints = new double [numLabels * numFeatures];
      this.cachedGradient = new double [numLabels * numFeatures];
      Arrays.fill (parameters, 0.0);
      Arrays.fill (constraints, 0.0);
      Arrays.fill (cachedGradient, 0.0);
      this.featureSelection = ilist.getFeatureSelection();
      this.perLabelFeatureSelection = ilist.getPerLabelFeatureSelection();
      // Add the default feature index to the selection
      if (featureSelection != null)
        featureSelection.add (defaultFeatureIndex);
      if (perLabelFeatureSelection != null)
        for (int i = 0; i < perLabelFeatureSelection.length; i++)
          perLabelFeatureSelection[i].add (defaultFeatureIndex);
      // xxx Later change this to allow both to be set, but select which one to use by a boolean flag?
      assert (featureSelection == null || perLabelFeatureSelection == null);
      if (initialClassifier != null) {       
        this.theClassifier = initialClassifier;
        this.parameters = theClassifier.parameters;
        this.featureSelection = theClassifier.featureSelection;
        this.perLabelFeatureSelection = theClassifier.perClassFeatureSelection;
        this.defaultFeatureIndex = theClassifier.defaultFeatureIndex;
        assert (initialClassifier.getInstancePipe() == ilist.getPipe());
      }
      else if (this.theClassifier == null) {
        this.theClassifier = new RankMaxEnt (ilist.getPipe(), parameters, featureSelection, perLabelFeatureSelection);
      }
      cachedValueStale = true;
      cachedGradientStale = true;

      // Initialize the constraints, using only the constraints from
      // the "positive" instance
      Iterator<Instance> iter = trainingList.iterator ();
      logger.fine("Number of instances in training list = " + trainingList.size());
      while (iter.hasNext()) {
        Instance instance = iter.next();
        double instanceWeight = trainingList.getInstanceWeight(instance);
        FeatureVectorSequence fvs = (FeatureVectorSequence) instance.getData();
        // label of best instance in subList
        Object target = instance.getTarget();
        Label label = null;
        if (target instanceof Labels)
          label = ((Labels)target).get(0);
        else label = (Label)target;
        int positiveIndex =
          Integer.valueOf(label.getBestLabel().getEntry().toString()).intValue();
        if (positiveIndex == -1) { // invalid instance
          logger.warning("True label is -1. Skipping...");
           continue;
        }
        FeatureVector fv = (FeatureVector)fvs.get(positiveIndex);
        Alphabet fdict = fv.getAlphabet();
        assert (fv.getAlphabet() == fd);

        // xxx ensure dimensionality of constraints correct
        MatrixOps.rowPlusEquals (constraints, numFeatures, 0, fv, instanceWeight);

        // For the default feature, whose weight is 1.0
        assert(!Double.isNaN(instanceWeight)) : "instanceWeight is NaN";
        //assert(!Double.isNaN(li)) : "bestIndex is NaN";
        boolean hasNaN = false;
        for(int i = 0; i < fv.numLocations(); i++) {
          if(Double.isNaN(fv.valueAtLocation(i))) {
            logger.info("NaN for feature " + fdict.lookupObject(fv.indexAtLocation(i)).toString());
            hasNaN = true;
          }
        }
        if(hasNaN)
          logger.info("NaN in instance: " + instance.getName());
View Full Code Here

TOP

Related Classes of cc.mallet.types.Alphabet

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.