public class WikipediaArticleReaderTest {
@Test
public void testParsing() throws UnsupportedEncodingException, FileNotFoundException, IOException, SAXException {
URL u = this.getClass().getResource("/en/mercedes.xml");
WikipediaArticleReader wap = new WikipediaArticleReader(u.getFile(),"/tmp/mercedes.json.gz", Language.EN);
wap.start();
String json = IOUtils.getFileAsUTF8String("/tmp/mercedes.json.gz");
Article a = Article.fromJson(json);
assertTrue(a.getCleanText().startsWith("Mercedes-Benz"));
assertEquals(15, a.getCategories().size());