public void testLinks() throws Exception {
String test = "[http://lucene.apache.org/java/docs/index.html#news here] [http://lucene.apache.org/java/docs/index.html?b=c here] [https://lucene.apache.org/java/docs/index.html?b=c here]";
WikipediaTokenizer tf = new WikipediaTokenizer(new StringReader(test));
TermAttribute termAtt = tf.addAttribute(TermAttribute.class);
TypeAttribute typeAtt = tf.addAttribute(TypeAttribute.class);
assertTrue(tf.incrementToken());
assertTrue(termAtt.term() + " is not equal to " + "http://lucene.apache.org/java/docs/index.html#news",
termAtt.term().equals("http://lucene.apache.org/java/docs/index.html#news") == true);
assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.EXTERNAL_LINK_URL, typeAtt.type().equals(WikipediaTokenizer.EXTERNAL_LINK_URL) == true);
tf.incrementToken();//skip here
assertTrue(tf.incrementToken());
assertTrue(termAtt.term() + " is not equal to " + "http://lucene.apache.org/java/docs/index.html?b=c",
termAtt.term().equals("http://lucene.apache.org/java/docs/index.html?b=c") == true);
assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.EXTERNAL_LINK_URL, typeAtt.type().equals(WikipediaTokenizer.EXTERNAL_LINK_URL) == true);
tf.incrementToken();//skip here
assertTrue(tf.incrementToken());
assertTrue(termAtt.term() + " is not equal to " + "https://lucene.apache.org/java/docs/index.html?b=c",
termAtt.term().equals("https://lucene.apache.org/java/docs/index.html?b=c") == true);
assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.EXTERNAL_LINK_URL, typeAtt.type().equals(WikipediaTokenizer.EXTERNAL_LINK_URL) == true);
assertTrue(tf.incrementToken());
assertFalse(tf.incrementToken());
}