package fueltrack.server.motormouth;
import java.io.IOException;
import java.io.Reader;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.List;
import javax.xml.transform.Source;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.TransformerFactoryConfigurationError;
import javax.xml.transform.dom.DOMResult;
import javax.xml.transform.sax.SAXSource;
import javax.xml.transform.stream.StreamSource;
import org.ccil.cowan.tagsoup.Parser;
import org.w3c.dom.DOMException;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.NodeList;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import fueltrack.server.motormouth.model.FuelPriceRecord;
public class HTMLParser {
public static List<FuelPriceRecord> parseHTML(Reader htmlInputReader) throws IOException, SAXException, TransformerException, TransformerFactoryConfigurationError {
SimpleDateFormat format = new SimpleDateFormat("dd/MM/yy h:mm a");
XMLReader reader = new Parser();
Source s = new SAXSource(reader, new InputSource(htmlInputReader));
DOMResult r = new DOMResult();
TransformerFactory.newInstance().newTransformer(new StreamSource(HTMLParser.class.getClassLoader().getResourceAsStream("fueltrack/server/motormouth/TransformHTML.xsl"))).transform(s, r);
List<FuelPriceRecord> results = new ArrayList<FuelPriceRecord>();
NodeList recordNodes = r.getNode().getFirstChild().getChildNodes();
for (int i = 0; i < recordNodes.getLength(); i++) {
NamedNodeMap attributes = recordNodes.item(i).getAttributes();
try {
results.add(new FuelPriceRecord(
attributes.getNamedItem("name").getNodeValue(),
attributes.getNamedItem("address").getNodeValue(),
Integer.valueOf(attributes.getNamedItem("postCode").getNodeValue()),
attributes.getNamedItem("suburb").getNodeValue(),
format.parse(attributes.getNamedItem("collectedDate").getNodeValue()),
Integer.valueOf(attributes.getNamedItem("price").getNodeValue().replace("\u00A0", "").replace(".", ""))
));
} catch (NumberFormatException e) {
} catch (DOMException e) {
} catch (ParseException e) {
}
}
return results;
}
public static String parseHTMLInputTagValue(Reader htmlInputReader, final String tagName) throws IOException, SAXException {
if (tagName == null || tagName.length() <= 0) {
return null;
}
final StringBuilder builder = new StringBuilder();
XMLReader reader = new Parser();
reader.setContentHandler(new DefaultHandler() {
@Override
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
super.startElement(uri, localName, qName, attributes);
if ("input".equalsIgnoreCase(localName)) {
if (tagName.equalsIgnoreCase(attributes.getValue("name"))) {
builder.append(attributes.getValue("value"));
throw new SAXTerminateProcessing();
}
}
}
});
try {
reader.parse(new InputSource(htmlInputReader));
} catch (SAXTerminateProcessing e) {}
return builder.toString();
}
private static class SAXTerminateProcessing extends SAXException {
private static final long serialVersionUID = -8372863640574614437L;
}
}