Package cc.mallet.types

Examples of cc.mallet.types.LabelAlphabet


  public Instance pipe (Instance carrier)
  {
    if (carrier.getTarget() != null) {
      if (carrier.getTarget() instanceof Label)
        throw new IllegalArgumentException ("Already a label.");
      LabelAlphabet ldict = (LabelAlphabet) getTargetAlphabet();
      carrier.setTarget(ldict.lookupLabel (carrier.getTarget()));
    }
    return carrier;
  }
View Full Code Here


  public static class TestMEMMTokenSequenceRemoveSpaces extends Pipe implements Serializable {

    public TestMEMMTokenSequenceRemoveSpaces()
    {
      super(null, new LabelAlphabet());
    }
View Full Code Here

    public Instance pipe(Instance carrier)
    {
      StringTokenization ts =  (StringTokenization) carrier.getData();
      StringTokenization newTs = new StringTokenization((CharSequence) ts.getDocument ());
      final LabelAlphabet dict = (LabelAlphabet) getTargetAlphabet();
      LabelSequence labelSeq = new LabelSequence(dict);
      Label start = dict.lookupLabel ("start");
      Label notstart = dict.lookupLabel ("notstart");

      boolean lastWasSpace = true;
      StringBuffer sb = new StringBuffer();
      for (int i = 0; i < ts.size(); i++) {
        StringSpan t = (StringSpan) ts.getSpan(i);
View Full Code Here

    /**
     * Creates a new
     * <code>SimpleTaggerSentence2FeatureVectorSequence</code> instance.
     */
    public SimpleTaggerSentence2FeatureVectorSequence () {
      super (new Alphabet(), new LabelAlphabet());
    }
View Full Code Here

    public Instance pipe (Instance carrier) {

      Object inputData = carrier.getData();
      Alphabet features = getDataAlphabet();
      LabelAlphabet labels;
      LabelSequence target = null;
      String [][] tokens;

      if (inputData instanceof String) {
        tokens = parseSentence((String)inputData);
View Full Code Here

public class SvmLight2FeatureVectorAndLabel extends Pipe {

  private static final long serialVersionUID = 1L;
 
  public SvmLight2FeatureVectorAndLabel () {
    super (new Alphabet(), new LabelAlphabet());
  }
View Full Code Here

    return new TestSuite (TestDocumentExtraction.class);
  }


  public void testToXml () {
    LabelAlphabet dict = new LabelAlphabet ();
    String document = "the quick brown fox leapt over the lazy dog";
    StringTokenization toks = new StringTokenization (document, new CharSequenceLexer ());

    Label O = dict.lookupLabel ("O");
    Label ANML = dict.lookupLabel ("ANIMAL");
    Label VB = dict.lookupLabel ("VERB");
    LabelSequence tags = new LabelSequence (new Label[] { O, ANML, ANML, ANML, VB, O, O, ANML, ANML });

    DocumentExtraction extr = new DocumentExtraction ("Test", dict, toks, tags, "O");
    String actualXml = extr.toXmlString();
    String expectedXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n" +
View Full Code Here

            "<doc>the <ANIMAL>quick brown fox </ANIMAL><VERB>leapt </VERB>over the <ANIMAL>lazy dog</ANIMAL></doc>\r\n";
    assertEquals (expectedXml, actualXml);
  }

   public void testToXmlBIO () {
    LabelAlphabet dict = new LabelAlphabet ();
    String document = "the quick brown fox leapt over the lazy dog";
    StringTokenization toks = new StringTokenization (document, new CharSequenceLexer ());

    Label O = dict.lookupLabel ("O");
    Label BANML = dict.lookupLabel ("B-ANIMAL");
    Label ANML = dict.lookupLabel ("ANIMAL");
    Label BVB = dict.lookupLabel ("B-VERB");
    Label VB = dict.lookupLabel ("I-VERB");
    LabelSequence tags = new LabelSequence (new Label[] { O, BANML, ANML, BANML, BVB, VB, O, ANML, ANML });

    DocumentExtraction extr = new DocumentExtraction ("Test", dict, toks, tags, null, "O", new BIOTokenizationFilter());
    String actualXml = extr.toXmlString();
    String expectedXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n" +
View Full Code Here

    assertEquals (expectedXml, actualXml);
  }

  public void testNestedToXML ()
  {
    LabelAlphabet dict = new LabelAlphabet ();
    String document = "the quick brown fox leapt over the lazy dog";
    StringTokenization toks = new StringTokenization (document, new CharSequenceLexer ());

    Label O = dict.lookupLabel ("O");
    Label ANML = dict.lookupLabel ("ANIMAL");
    Label VB = dict.lookupLabel ("VERB");
    Label JJ = dict.lookupLabel ("ADJ");
    Label MAMMAL = dict.lookupLabel ("MAMMAL");

    LabelSequence tags = new LabelSequence (new Label[] { O, ANML, ANML, ANML, VB, O, ANML, ANML, ANML });

    LabeledSpans spans = new DefaultTokenizationFilter ().constructLabeledSpans (dict, document, O, toks, tags);
View Full Code Here

  }

  public void testNestedXMLTokenizationFilter ()
  {
    LabelAlphabet dict = new LabelAlphabet ();
    String document = "the quick brown fox leapt over the lazy dog";
    StringTokenization toks = new StringTokenization (document, new CharSequenceLexer ());

    Label O = dict.lookupLabel ("O");
    Label ANML = dict.lookupLabel ("ANIMAL");
    Label ANML_MAMM = dict.lookupLabel ("ANIMAL|MAMMAL");
    Label VB = dict.lookupLabel ("VERB");
    Label ANML_JJ = dict.lookupLabel ("ANIMAL|ADJ");
    Label ANML_JJ_MAMM = dict.lookupLabel ("ANIMAL|ADJ|MAMMAL");

    LabelSequence tags = new LabelSequence (new Label[] { O, ANML, ANML, ANML_MAMM, VB, O, ANML, ANML_JJ, ANML_JJ_MAMM });
    DocumentExtraction extr = new DocumentExtraction ("Test", dict, toks, tags, null, "O", new HierarchicalTokenizationFilter ());

    String actualXml = extr.toXmlString();
View Full Code Here

TOP

Related Classes of cc.mallet.types.LabelAlphabet

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.