"<doc>the <ANIMAL>quick brown fox </ANIMAL><VERB>leapt </VERB>over the <ANIMAL>lazy dog</ANIMAL></doc>\r\n";
assertEquals (expectedXml, actualXml);
}
public void testToXmlBIO () {
LabelAlphabet dict = new LabelAlphabet ();
String document = "the quick brown fox leapt over the lazy dog";
StringTokenization toks = new StringTokenization (document, new CharSequenceLexer ());
Label O = dict.lookupLabel ("O");
Label BANML = dict.lookupLabel ("B-ANIMAL");
Label ANML = dict.lookupLabel ("ANIMAL");
Label BVB = dict.lookupLabel ("B-VERB");
Label VB = dict.lookupLabel ("I-VERB");
LabelSequence tags = new LabelSequence (new Label[] { O, BANML, ANML, BANML, BVB, VB, O, ANML, ANML });
DocumentExtraction extr = new DocumentExtraction ("Test", dict, toks, tags, null, "O", new BIOTokenizationFilter());
String actualXml = extr.toXmlString();
String expectedXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n" +