Package fueltrack.server.motormouth

Source Code of fueltrack.server.motormouth.HTMLParser$SAXTerminateProcessing

package fueltrack.server.motormouth;

import java.io.IOException;
import java.io.Reader;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.List;

import javax.xml.transform.Source;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.TransformerFactoryConfigurationError;
import javax.xml.transform.dom.DOMResult;
import javax.xml.transform.sax.SAXSource;
import javax.xml.transform.stream.StreamSource;

import org.ccil.cowan.tagsoup.Parser;
import org.w3c.dom.DOMException;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.NodeList;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;

import fueltrack.server.motormouth.model.FuelPriceRecord;

public class HTMLParser {
  public static List<FuelPriceRecord> parseHTML(Reader htmlInputReader) throws IOException, SAXException, TransformerException, TransformerFactoryConfigurationError {
    SimpleDateFormat format = new SimpleDateFormat("dd/MM/yy h:mm a");
    XMLReader reader = new Parser();
   
    Source s = new SAXSource(reader, new InputSource(htmlInputReader));
    DOMResult r = new DOMResult();
   
    TransformerFactory.newInstance().newTransformer(new StreamSource(HTMLParser.class.getClassLoader().getResourceAsStream("fueltrack/server/motormouth/TransformHTML.xsl"))).transform(s, r);
   
    List<FuelPriceRecord> results = new ArrayList<FuelPriceRecord>();
   
    NodeList recordNodes = r.getNode().getFirstChild().getChildNodes();
    for (int i = 0; i < recordNodes.getLength(); i++) {
      NamedNodeMap attributes = recordNodes.item(i).getAttributes();
     
      try {
        results.add(new FuelPriceRecord(
            attributes.getNamedItem("name").getNodeValue(),
            attributes.getNamedItem("address").getNodeValue(),
            Integer.valueOf(attributes.getNamedItem("postCode").getNodeValue()),
            attributes.getNamedItem("suburb").getNodeValue(),
            format.parse(attributes.getNamedItem("collectedDate").getNodeValue()),
            Integer.valueOf(attributes.getNamedItem("price").getNodeValue().replace("\u00A0", "").replace(".", ""))
          ));
      } catch (NumberFormatException e) {
      } catch (DOMException e) {
      } catch (ParseException e) {
      }
    }
   
    return results;
  }
 
  public static String parseHTMLInputTagValue(Reader htmlInputReader, final String tagName) throws IOException, SAXException {
    if (tagName == null || tagName.length() <= 0) {
      return null;
    }
   
    final StringBuilder builder = new StringBuilder();
   
    XMLReader reader = new Parser();
   
    reader.setContentHandler(new DefaultHandler() {
      @Override
      public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
        super.startElement(uri, localName, qName, attributes);
       
        if ("input".equalsIgnoreCase(localName)) {
          if (tagName.equalsIgnoreCase(attributes.getValue("name"))) {
            builder.append(attributes.getValue("value"));
            throw new SAXTerminateProcessing();
          }
        }
      }
    });
   
    try {
      reader.parse(new InputSource(htmlInputReader));
    } catch (SAXTerminateProcessing e) {}
   
    return builder.toString();
  }
 
  private static class SAXTerminateProcessing extends SAXException {
    private static final long serialVersionUID = -8372863640574614437L;
  }
}
TOP

Related Classes of fueltrack.server.motormouth.HTMLParser$SAXTerminateProcessing

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.