Examples of cc.mallet.types.LabelAlphabet

cc.mallet.types.LabelAlphabet
A mapping from arbitrary objects (usually String's) to integers (and corresponding Label objects) and back. @author Andrew McCallum mccallum@cs.umass.edu

  public Instance pipe (Instance carrier)
  {
    if (carrier.getTarget() != null) {
      if (carrier.getTarget() instanceof Label)
        throw new IllegalArgumentException ("Already a label.");
      LabelAlphabet ldict = (LabelAlphabet) getTargetAlphabet();
      carrier.setTarget(ldict.lookupLabel (carrier.getTarget()));
    }
    return carrier;
  }

View Full Code Here


  public static class TestMEMMTokenSequenceRemoveSpaces extends Pipe implements Serializable {


    public TestMEMMTokenSequenceRemoveSpaces()
    {
      super(null, new LabelAlphabet());
    }

View Full Code Here


    public Instance pipe(Instance carrier)
    {
      StringTokenization ts =  (StringTokenization) carrier.getData();
      StringTokenization newTs = new StringTokenization((CharSequence) ts.getDocument ());
      final LabelAlphabet dict = (LabelAlphabet) getTargetAlphabet();
      LabelSequence labelSeq = new LabelSequence(dict);
      Label start = dict.lookupLabel ("start");
      Label notstart = dict.lookupLabel ("notstart");


      boolean lastWasSpace = true;
      StringBuffer sb = new StringBuffer();
      for (int i = 0; i < ts.size(); i++) {
        StringSpan t = (StringSpan) ts.getSpan(i);

View Full Code Here

    /**
     * Creates a new
     * <code>SimpleTaggerSentence2FeatureVectorSequence</code> instance.
     */
    public SimpleTaggerSentence2FeatureVectorSequence () {
      super (new Alphabet(), new LabelAlphabet());
    }

View Full Code Here


    public Instance pipe (Instance carrier) {


      Object inputData = carrier.getData();
      Alphabet features = getDataAlphabet();
      LabelAlphabet labels;
      LabelSequence target = null;
      String [][] tokens;


      if (inputData instanceof String) {
        tokens = parseSentence((String)inputData);

View Full Code Here

public class SvmLight2FeatureVectorAndLabel extends Pipe {


  private static final long serialVersionUID = 1L;
  
  public SvmLight2FeatureVectorAndLabel () {
    super (new Alphabet(), new LabelAlphabet());
  }

View Full Code Here

    return new TestSuite (TestDocumentExtraction.class);
  }




  public void testToXml () {
    LabelAlphabet dict = new LabelAlphabet ();
    String document = "the quick brown fox leapt over the lazy dog";
    StringTokenization toks = new StringTokenization (document, new CharSequenceLexer ());


    Label O = dict.lookupLabel ("O");
    Label ANML = dict.lookupLabel ("ANIMAL");
    Label VB = dict.lookupLabel ("VERB");
    LabelSequence tags = new LabelSequence (new Label[] { O, ANML, ANML, ANML, VB, O, O, ANML, ANML });


    DocumentExtraction extr = new DocumentExtraction ("Test", dict, toks, tags, "O");
    String actualXml = extr.toXmlString();
    String expectedXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n" +

View Full Code Here

            "<doc>the <ANIMAL>quick brown fox </ANIMAL><VERB>leapt </VERB>over the <ANIMAL>lazy dog</ANIMAL></doc>\r\n";
    assertEquals (expectedXml, actualXml);
  }


   public void testToXmlBIO () {
    LabelAlphabet dict = new LabelAlphabet ();
    String document = "the quick brown fox leapt over the lazy dog";
    StringTokenization toks = new StringTokenization (document, new CharSequenceLexer ());


    Label O = dict.lookupLabel ("O");
    Label BANML = dict.lookupLabel ("B-ANIMAL");
    Label ANML = dict.lookupLabel ("ANIMAL");
    Label BVB = dict.lookupLabel ("B-VERB");
    Label VB = dict.lookupLabel ("I-VERB");
    LabelSequence tags = new LabelSequence (new Label[] { O, BANML, ANML, BANML, BVB, VB, O, ANML, ANML });


    DocumentExtraction extr = new DocumentExtraction ("Test", dict, toks, tags, null, "O", new BIOTokenizationFilter());
    String actualXml = extr.toXmlString();
    String expectedXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n" +

View Full Code Here

    assertEquals (expectedXml, actualXml);
  }


  public void testNestedToXML ()
  {
    LabelAlphabet dict = new LabelAlphabet ();
    String document = "the quick brown fox leapt over the lazy dog";
    StringTokenization toks = new StringTokenization (document, new CharSequenceLexer ());


    Label O = dict.lookupLabel ("O");
    Label ANML = dict.lookupLabel ("ANIMAL");
    Label VB = dict.lookupLabel ("VERB");
    Label JJ = dict.lookupLabel ("ADJ");
    Label MAMMAL = dict.lookupLabel ("MAMMAL");


    LabelSequence tags = new LabelSequence (new Label[] { O, ANML, ANML, ANML, VB, O, ANML, ANML, ANML });


    LabeledSpans spans = new DefaultTokenizationFilter ().constructLabeledSpans (dict, document, O, toks, tags);

View Full Code Here


  }


  public void testNestedXMLTokenizationFilter ()
  {
    LabelAlphabet dict = new LabelAlphabet ();
    String document = "the quick brown fox leapt over the lazy dog";
    StringTokenization toks = new StringTokenization (document, new CharSequenceLexer ());


    Label O = dict.lookupLabel ("O");
    Label ANML = dict.lookupLabel ("ANIMAL");
    Label ANML_MAMM = dict.lookupLabel ("ANIMAL|MAMMAL");
    Label VB = dict.lookupLabel ("VERB");
    Label ANML_JJ = dict.lookupLabel ("ANIMAL|ADJ");
    Label ANML_JJ_MAMM = dict.lookupLabel ("ANIMAL|ADJ|MAMMAL");


    LabelSequence tags = new LabelSequence (new Label[] { O, ANML, ANML, ANML_MAMM, VB, O, ANML, ANML_JJ, ANML_JJ_MAMM });
    DocumentExtraction extr = new DocumentExtraction ("Test", dict, toks, tags, null, "O", new HierarchicalTokenizationFilter ());


    String actualXml = extr.toXmlString();

View Full Code Here

0 1 2 3 4 5

TOP

Related Classes of cc.mallet.types.LabelAlphabet

cc.mallet.classify.evaluate.ConfusionMatrix

cc.mallet.classify.MaxEnt

cc.mallet.classify.MaxEntOptimizableByLabelDistribution

cc.mallet.classify.MaxEntOptimizableByLabelLikelihood

cc.mallet.classify.MCMaxEnt

cc.mallet.classify.MCMaxEntTrainer$MaximizableTrainer

cc.mallet.classify.RankMaxEnt

cc.mallet.classify.RankMaxEntTrainer$MaximizableTrainer

cc.mallet.cluster.examples.FirstOrderClusterExample$OverlappingFeaturePipe

cc.mallet.cluster.tui.Clusterings2Clusterer$ClusteringPipe

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.