Examples of cc.mallet.types.Alphabet

cc.mallet.types.Alphabet
A mapping between integers and objects where the mapping in each direction is efficient. Integers are assigned consecutively, starting at zero, as objects are added to the Alphabet. Objects can not be deleted from the Alphabet and thus the integers are never reused.
The most common use of an alphabet is as a dictionary of feature names associated with a {@link cc.mallet.types.FeatureVector} in an{@link cc.mallet.types.Instance}. In a simple document classification usage, each unique word in a document would be a unique entry in the Alphabet with a unique integer associated with it. FeatureVectors rely on the integer part of the mapping to efficiently represent the subset of the Alphabet present in the FeatureVector. @see FeatureVector @see Instance @see cc.mallet.pipe.Pipe

  {
    SparseVector v = makeSparseVectorToN (5);
    AugmentableFeatureVector afv = makeAfv (new int[] { 1, 3 }, true);
    double dp = afv.dotProduct (v);
    assertEquals (4.0, dp, 1e-5);
    new AugmentableFeatureVector (new Alphabet(), true);
  }

View Full Code Here

    assertEquals (7.0, dp, 1e-5);
  }


  private AugmentableFeatureVector makeAfv (int[] ints, boolean binary)
  {
    AugmentableFeatureVector afv = new AugmentableFeatureVector (new Alphabet(), binary);
    for (int i = 0; i < ints.length; i++) {
      int idx = ints[i];
      afv.add (idx, 1.0);
    }
    return afv;

View Full Code Here


  private static Logger logger = MalletLogger.getLogger(Array2FeatureVector.class.getName());


  public Array2FeatureVector(int capacity) {


    this.dataAlphabet = new Alphabet(capacity);


  }

View Full Code Here

    return new SparseVector (vals);
  }


  public void testAddWithPrefix ()
  {
    Alphabet dict = new Alphabet ();
    dict.lookupIndex ("ZERO");
    dict.lookupIndex ("ONE");
    dict.lookupIndex ("TWO");
    dict.lookupIndex ("THREE");


    FeatureVector fv = new FeatureVector (dict, new int[] { 1,3 });


    AugmentableFeatureVector afv = new AugmentableFeatureVector (new Alphabet (), true);
    afv.add (fv, "O:");


    assertEquals (4, dict.size());
    assertEquals (2, afv.getAlphabet ().size());
    assertEquals ("O:ONE\nO:THREE\n", afv.toString ());
  }

View Full Code Here

    public double [] initialWeights; // indexed by state index
    public double [] finalWeights; // indexed by state index
    
    /** Construct a new empty Factors with a new empty weightsAlphabet, 0-length initialWeights and finalWeights, and the other arrays null. */
    public Factors () {
      weightAlphabet = new Alphabet();
      initialWeights = new double[0];
      finalWeights = new double[0];
      // Leave the rest as null.  They will get set later by addState() and addWeight()
      // Alternatively, we could create zero-length arrays
    }

View Full Code Here

    // we will be reinitializing the weights
    // TODO: provide method to save weights
    trainingList.getDataAlphabet().stopGrowth();
    trainingList.getTargetAlphabet().stopGrowth();
    Pipe dataPipe = trainingList.getPipe ();
    Alphabet dict = (Alphabet) trainingList.getDataAlphabet ();
    int numLabels = trainingList.getTargetAlphabet().size();
    int numFeats = dict.size(); 
    this.theta =  numFeats * this.nfactor;
    this.weights = new double [numLabels][numFeats];
    // init weights to 1
    for(int i=0; i<numLabels; i++)
      for(int j=0; j<numFeats; j++)

View Full Code Here

  }


  public MaxEntOptimizableByLabelLikelihood (InstanceList trainingSet, MaxEnt initialClassifier)
  {
    this.trainingList = trainingSet;
    Alphabet fd = trainingSet.getDataAlphabet();
    LabelAlphabet ld = (LabelAlphabet) trainingSet.getTargetAlphabet();
    // Don't fd.stopGrowth, because someone might want to do feature induction
    ld.stopGrowth();
    // Add one feature for the "default feature".
    this.numLabels = ld.size();
    this.numFeatures = fd.size() + 1;
    this.defaultFeatureIndex = numFeatures-1;
    this.parameters = new double [numLabels * numFeatures];
    this.constraints = new double [numLabels * numFeatures];
    this.cachedGradient = new double [numLabels * numFeatures];
    Arrays.fill (parameters, 0.0);
    Arrays.fill (constraints, 0.0);
    Arrays.fill (cachedGradient, 0.0);
    this.featureSelection = trainingSet.getFeatureSelection();
    this.perLabelFeatureSelection = trainingSet.getPerLabelFeatureSelection();
    // Add the default feature index to the selection
    if (featureSelection != null)
      featureSelection.add (defaultFeatureIndex);
    if (perLabelFeatureSelection != null)
      for (int i = 0; i < perLabelFeatureSelection.length; i++)
        perLabelFeatureSelection[i].add (defaultFeatureIndex);
    // xxx Later change this to allow both to be set, but select which one to use by a boolean flag?
    assert (featureSelection == null || perLabelFeatureSelection == null);
    if (initialClassifier != null) {
      this.theClassifier = initialClassifier;
      this.parameters = theClassifier.parameters;
      this.featureSelection = theClassifier.featureSelection;
      this.perLabelFeatureSelection = theClassifier.perClassFeatureSelection;
      this.defaultFeatureIndex = theClassifier.defaultFeatureIndex;
      assert (initialClassifier.getInstancePipe() == trainingSet.getPipe());
    }
    else if (this.theClassifier == null) {
      this.theClassifier = new MaxEnt (trainingSet.getPipe(), parameters, featureSelection, perLabelFeatureSelection);
    }
    cachedValueStale = true;
    cachedGradientStale = true;


    // Initialize the constraints
    logger.fine("Number of instances in training list = " + trainingList.size());
    for (Instance inst : trainingList) {
      double instanceWeight = trainingList.getInstanceWeight(inst);
      Labeling labeling = inst.getLabeling ();
      if (labeling == null)
        continue;
      //logger.fine ("Instance "+ii+" labeling="+labeling);
      FeatureVector fv = (FeatureVector) inst.getData ();
      Alphabet fdict = fv.getAlphabet();
      assert (fv.getAlphabet() == fd);
      int li = labeling.getBestIndex();
      MatrixOps.rowPlusEquals (constraints, numFeatures, li, fv, instanceWeight);
      // For the default feature, whose weight is 1.0
      assert(!Double.isNaN(instanceWeight)) : "instanceWeight is NaN";
      assert(!Double.isNaN(li)) : "bestIndex is NaN";
      boolean hasNaN = false;
      for (int i = 0; i < fv.numLocations(); i++) {
        if(Double.isNaN(fv.valueAtLocation(i))) {
          logger.info("NaN for feature " + fdict.lookupObject(fv.indexAtLocation(i)).toString()); 
          hasNaN = true;
        }
      }
      if (hasNaN)
        logger.info("NaN in instance: " + inst.getName());

View Full Code Here

    return new LabelVector(labelAlphabet, allScores);
  }
  
  public void print () 
  {    
    final Alphabet dict = getAlphabet();
    final LabelAlphabet labelDict = (LabelAlphabet)getLabelAlphabet();
    
    int numFeatures = dict.size() + 1;
    int numLabels = labelDict.size();
    
    // Include the feature weights according to each label
    //for (int li = 0; li < numLabels; li++) {
    System.out.println ("FEATURES FOR CLASS "+labelDict.lookupObject (0));
    System.out.println (" <default> "+parameters [defaultFeatureIndex]);
    for (int i = 0; i < defaultFeatureIndex; i++) {
      Object name = dict.lookupObject (i);
      double weight = parameters [i];
      System.out.println (" "+name+" "+weight);
    }    
  }

View Full Code Here

                        scores));
    }


  public void print () 
  {    
    final Alphabet dict = getAlphabet();
    final LabelAlphabet labelDict = getLabelAlphabet();
        
    int numFeatures = dict.size() + 1;
    int numLabels = labelDict.size();
    
     // Include the feature weights according to each label
     for (int li = 0; li < numLabels; li++) {
       System.out.println ("FEATURES FOR CLASS "+labelDict.lookupObject (li));
       System.out.println (" <default> "+parameters [li*numFeatures + defaultFeatureIndex]);
       for (int i = 0; i < defaultFeatureIndex; i++) {
         Object name = dict.lookupObject (i);
              double weight = parameters [li*numFeatures + i];
         System.out.println (" "+name+" "+weight);
       }
     }
  }

View Full Code Here

    public MaximizableTrainer (){}


    public MaximizableTrainer (InstanceList ilist, RankMaxEnt initialClassifier)
    {
      this.trainingList = ilist;
      Alphabet fd = ilist.getDataAlphabet();
      LabelAlphabet ld = (LabelAlphabet) ilist.getTargetAlphabet();
      // Don't fd.stopGrowth, because someone might want to do feature induction
      //ld.stopGrowth();
      // Add one feature for the "default feature".
      // assume underlying Instances are binary
      //this.numLabels = underlyingLabelAlphabet.size();
      // xxx
      this.numLabels = 2;


      this.numFeatures = fd.size() + 1;
      this.defaultFeatureIndex = numFeatures-1;
      this.parameters = new double [numLabels * numFeatures];
      this.constraints = new double [numLabels * numFeatures];
      this.cachedGradient = new double [numLabels * numFeatures];
      Arrays.fill (parameters, 0.0);
      Arrays.fill (constraints, 0.0);
      Arrays.fill (cachedGradient, 0.0);
      this.featureSelection = ilist.getFeatureSelection();
      this.perLabelFeatureSelection = ilist.getPerLabelFeatureSelection();
      // Add the default feature index to the selection
      if (featureSelection != null)
        featureSelection.add (defaultFeatureIndex);
      if (perLabelFeatureSelection != null)
        for (int i = 0; i < perLabelFeatureSelection.length; i++)
          perLabelFeatureSelection[i].add (defaultFeatureIndex);
      // xxx Later change this to allow both to be set, but select which one to use by a boolean flag?
      assert (featureSelection == null || perLabelFeatureSelection == null);
      if (initialClassifier != null) {        
        this.theClassifier = initialClassifier;
        this.parameters = theClassifier.parameters;
        this.featureSelection = theClassifier.featureSelection;
        this.perLabelFeatureSelection = theClassifier.perClassFeatureSelection;
        this.defaultFeatureIndex = theClassifier.defaultFeatureIndex;
        assert (initialClassifier.getInstancePipe() == ilist.getPipe());
      }
      else if (this.theClassifier == null) {
        this.theClassifier = new RankMaxEnt (ilist.getPipe(), parameters, featureSelection, perLabelFeatureSelection);
      }
      cachedValueStale = true;
      cachedGradientStale = true;


      // Initialize the constraints, using only the constraints from
      // the "positive" instance
      Iterator<Instance> iter = trainingList.iterator ();
      logger.fine("Number of instances in training list = " + trainingList.size());
      while (iter.hasNext()) {
        Instance instance = iter.next();
        double instanceWeight = trainingList.getInstanceWeight(instance);
        FeatureVectorSequence fvs = (FeatureVectorSequence) instance.getData();
        // label of best instance in subList
        Object target = instance.getTarget();
        Label label = null;
        if (target instanceof Labels)
          label = ((Labels)target).get(0);
        else label = (Label)target;
        int positiveIndex =
          Integer.valueOf(label.getBestLabel().getEntry().toString()).intValue();
        if (positiveIndex == -1) { // invalid instance
          logger.warning("True label is -1. Skipping...");
           continue;
        }
        FeatureVector fv = (FeatureVector)fvs.get(positiveIndex);
        Alphabet fdict = fv.getAlphabet();
        assert (fv.getAlphabet() == fd);


        // xxx ensure dimensionality of constraints correct
        MatrixOps.rowPlusEquals (constraints, numFeatures, 0, fv, instanceWeight);


        // For the default feature, whose weight is 1.0
        assert(!Double.isNaN(instanceWeight)) : "instanceWeight is NaN";
        //assert(!Double.isNaN(li)) : "bestIndex is NaN";
        boolean hasNaN = false;
        for(int i = 0; i < fv.numLocations(); i++) {
          if(Double.isNaN(fv.valueAtLocation(i))) {
            logger.info("NaN for feature " + fdict.lookupObject(fv.indexAtLocation(i)).toString()); 
            hasNaN = true;
          }
        }
        if(hasNaN)
          logger.info("NaN in instance: " + instance.getName());

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of cc.mallet.types.Alphabet

cc.mallet.classify.BalancedWinnowTrainer

cc.mallet.classify.FeatureConstraintUtil

cc.mallet.classify.MaxEnt

cc.mallet.classify.MaxEntOptimizableByLabelDistribution

cc.mallet.classify.MaxEntOptimizableByLabelLikelihood

cc.mallet.classify.MCMaxEnt

cc.mallet.classify.MCMaxEntTrainer$MaximizableTrainer

cc.mallet.classify.RankMaxEnt

cc.mallet.classify.RankMaxEntTrainer$MaximizableTrainer

cc.mallet.classify.WinnowTrainer

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.