package xml;
import org.junit.Test;
import javax.xml.stream.EventFilter;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.EndElement;
import javax.xml.stream.events.StartElement;
import javax.xml.stream.events.XMLEvent;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringWriter;
public class ReadXmlTest {
@Test
public void readXmlPart() throws Exception {
XMLInputFactory xmlInputFactory = XMLInputFactory.newFactory();
xmlInputFactory.setProperty("javax.xml.stream.isNamespaceAware", false);
XMLEventReader xmlEventReader = xmlInputFactory.createXMLEventReader(new FileReader("src/main/resources/nlwiki-latest-pages-articles-short.xml"));
MyEventFilter eventFilter = createEventFilter("title");
xmlEventReader = xmlInputFactory.createFilteredReader(xmlEventReader, eventFilter);
while (xmlEventReader.hasNext()) {
String xml = readPart(xmlEventReader, eventFilter);
System.out.println("part: " + xml);
}
}
private String readPart(XMLEventReader xmlEventReader, MyEventFilter filter) throws XMLStreamException, IOException {
StringWriter writer = new StringWriter();
while (filter.isStartFound()) {
XMLEvent event = xmlEventReader.nextEvent();
event.writeAsEncodedUnicode(writer);
}
writer.flush();
writer.close();
return writer.toString();
}
private MyEventFilter createEventFilter(final String tag) {
return new MyEventFilter(tag);
}
private static class MyEventFilter implements EventFilter {
boolean startFound = false;
private String tag;
public MyEventFilter(String tag) {
this.tag = tag;
}
public boolean isStartFound() {
return startFound;
}
private boolean isTag(StartElement element) {
return tag.equals(element.getName().getLocalPart());
}
private boolean isTag(EndElement element) {
return tag.equals(element.getName().getLocalPart());
}
@Override
public boolean accept(XMLEvent xmlEvent) {
if (xmlEvent.isStartElement() && isTag(xmlEvent.asStartElement())) {
startFound = true;
}
// accept everything after start is found
// also accept closing tag
boolean accept = startFound;
// reset when end tag found
if (startFound && xmlEvent.isEndElement() && isTag(xmlEvent.asEndElement())) {
startFound = false;
}
return accept;
}
}
@Test
public void readWithStax() throws Exception {
XMLInputFactory xmlInputFactory = XMLInputFactory.newFactory();
xmlInputFactory.setProperty("javax.xml.stream.isNamespaceAware", false);
XMLEventReader xmlEventReader = xmlInputFactory.createXMLEventReader(new FileReader("src/main/resources/nlwiki-latest-pages-articles-short.xml"));
xmlEventReader = xmlInputFactory.createFilteredReader(xmlEventReader, new EventFilter() {
@Override
public boolean accept(XMLEvent xmlEvent) {
return true;
}
});
PrintWriter printWriter = new PrintWriter(System.err);
while (xmlEventReader.hasNext()) {
XMLEvent event = xmlEventReader.nextEvent();
event.writeAsEncodedUnicode(printWriter);
}
printWriter.flush();
printWriter.close();
}
}