* Created on Mar 2, 2005
* TODO To change the template for this generated file go to
* Window - Preferences - Java - Code Style - Code Templates
package intrade.utils;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.io.StringWriter;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.StringTokenizer;
import java.util.TreeSet;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.FactoryConfigurationError;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Result;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.Document;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import com.google.appengine.api.urlfetch.HTTPResponse;
import com.google.appengine.api.urlfetch.URLFetchService;
import com.google.appengine.api.urlfetch.URLFetchServiceFactory;
* @author Panos Ipeirotis
* TODO To change the template for this generated type comment go to
* Window - Preferences - Java - Code Style - Code Templates
public class Utilities {
private static final char c[] = { '<', '>', '&', '\"' };
private static final String expansion[] = { "<", ">", "&", """ };
public static Document getThrottledURL(String url) throws FactoryConfigurationError {
byte[] page = null;
boolean done = false;
int trial = 0;
do {
page = Utilities.getFile(url);
if (page == null && trial < 3) {
} else {
done = true;
} while (!done);
Document d;
if (page == null) {
System.out.println("Error:" + url);
// Utilities.sleep(5);
d = null;
} else {
d = Utilities.getXMLFromString(page);
return d;
public static String cleanLine(String line) {
StringBuffer buffer = new StringBuffer();
for (int i = 0; i < line.length(); i++) {
char c1 = line.charAt(i);
if (c1 < 128 && Character.isLetter(c1)) {
} else {
buffer.append(' ');
return buffer.toString().toLowerCase();
public static String cleanForXML(String line) {
StringBuffer buffer = new StringBuffer();
for (int i = 0; i < line.length(); i++) {
char c1 = line.charAt(i);
if (c1 < 128 && Character.isLetter(c1)) {
} else {
return buffer.toString();
public static TreeSet<String> getWords(String TextFile) {
TreeSet<String> result = new TreeSet<String>();
StringTokenizer st = new StringTokenizer(TextFile);
while (st.hasMoreTokens()) {
return result;
public static byte[] getFile(String URLName) {
try {
URL url = new URL(URLName);
URLFetchService u = URLFetchServiceFactory.getURLFetchService();
HTTPResponse r = u.fetch(url);
return r.getContent();
} catch (MalformedURLException e) {
System.err.println("Malformed URL:" + URLName);
return null;
} catch (IOException e) {
System.err.println("I/O exception:" + e.getMessage());
return null;
} catch (com.google.appengine.api.urlfetch.ResponseTooLargeException e) {
System.err.println("Response Too Large Exception:" + e.getMessage());
return null;
} catch (com.google.apphosting.api.DeadlineExceededException e) {
return null;
public static String getPage(String URLName) {
StringBuffer buffer = new StringBuffer();
try {
URL url = new URL(URLName);
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
BufferedReader reader = new BufferedReader(new InputStreamReader(connection.getInputStream()));
String line;
while ((line = reader.readLine()) != null) {
} catch (MalformedURLException e) {
System.err.println("Malformed URL:" + URLName);
return null;
} catch (IOException e) {
System.err.println("I/O exception:" + e.getMessage());
return null;
} catch (com.google.appengine.api.urlfetch.ResponseTooLargeException e) {
System.err.println("Response Too Large Exception:" + e.getMessage());
return null;
return buffer.toString();
public static void sleep(int secs) {
try {
Thread.sleep(secs * 1000);
} catch (InterruptedException e) {
* Helper function. It reads an XML file and returns the in-memory
* representation
* It accepts only valid documents
* @param file
* @return the XML in-memory representation of the string
* @throws FactoryConfigurationError
public static Document getXMLFromString(byte[] file) throws FactoryConfigurationError {
Document MIQuery = null;
try {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
// factory.setValidating(true);
// Amazon does not put a DTD
DocumentBuilder builder = factory.newDocumentBuilder();
builder.setErrorHandler(new org.xml.sax.ErrorHandler() {
// ignore fatal errors (an exception is guaranteed)
public void fatalError(SAXParseException exception) {
System.out.println("** Error" + ", line " + exception.getLineNumber() + ", uri " + exception.getSystemId());
System.out.println(" " + exception.getMessage());
// treat validation errors as fatal
public void error(SAXParseException e) throws SAXParseException {
System.out.println("** Error" + ", line " + e.getLineNumber() + ", uri " + e.getSystemId());
System.out.println(" " + e.getMessage());
throw e;
// dump warnings too
public void warning(SAXParseException err) {
System.out.println("** Warning" + ", line " + err.getLineNumber() + ", uri " + err.getSystemId());
System.out.println(" " + err.getMessage());
InputSource inputSource = new InputSource(new StringReader(new String(file)));
MIQuery = builder.parse(inputSource);
} catch (SAXException sxe) {
// Error generated during parsing
Exception x = sxe;
if (sxe.getException() != null)
x = sxe.getException();
} catch (ParserConfigurationException pce) {
// Parser with specified options can't be built
} catch (IOException ioe) {
// I/O error
} catch (FactoryConfigurationError fce) {
// Factory configuration error
return MIQuery;
* Helper function. It reads an XML file and returns the in-memory
* representation
* It accepts only valid documents
* @param MIxmlQuery
* @return an empty XML document
* @throws FactoryConfigurationError
public static Document getXML() throws FactoryConfigurationError {
Document MIQuery = null;
try {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
// factory.setValidating(true);
// Amazon does not put a DTD
DocumentBuilder builder = factory.newDocumentBuilder();
builder.setErrorHandler(new org.xml.sax.ErrorHandler() {
// ignore fatal errors (an exception is guaranteed)
public void fatalError(SAXParseException exception) {
System.out.println("** Error" + ", line " + exception.getLineNumber() + ", uri " + exception.getSystemId());
System.out.println(" " + exception.getMessage());
// treat validation errors as fatal
public void error(SAXParseException e) throws SAXParseException {
System.out.println("** Error" + ", line " + e.getLineNumber() + ", uri " + e.getSystemId());
System.out.println(" " + e.getMessage());
throw e;
// dump warnings too
public void warning(SAXParseException err) {
System.out.println("** Warning" + ", line " + err.getLineNumber() + ", uri " + err.getSystemId());
System.out.println(" " + err.getMessage());
MIQuery = builder.newDocument();
} catch (ParserConfigurationException pce) {
// Parser with specified options can't be built
} catch (FactoryConfigurationError fce) {
// Factory configuration error
return MIQuery;
// This method writes a DOM document to a file
public static void writeXmlFile(Document doc, String filename) {
try {
// Prepare the DOM document for writing
Source source = new DOMSource(doc);
// Prepare the output file
File file = new File(filename);
Result result = new StreamResult(file);
// Write the DOM document to the file
Transformer xformer = TransformerFactory.newInstance().newTransformer();
xformer.transform(source, result);
} catch (TransformerConfigurationException e) {
} catch (TransformerException e) {
// This method writes a DOM document to a file
public static void writeXmlFile(Document doc, File file) {
try {
// Prepare the DOM document for writing
Source source = new DOMSource(doc);
// Prepare the output file
Result result = new StreamResult(file);
// Write the DOM document to the file
Transformer xformer = TransformerFactory.newInstance().newTransformer();
xformer.transform(source, result);
} catch (TransformerConfigurationException e) {
} catch (TransformerException e) {
// This method retuns an XML string from the DOM document
public static String writeXmlString(Document doc) {
StringWriter sw = new StringWriter();
try {
// Prepare the DOM document for writing
Source source = new DOMSource(doc);
Result result = new StreamResult(sw);
// Write the DOM document to the file
Transformer xformer = TransformerFactory.newInstance().newTransformer();
xformer.transform(source, result);
} catch (TransformerConfigurationException e) {
} catch (TransformerException e) {
return sw.toString();
public static String httpget(String url) {
try {
StringBuffer sb = new StringBuffer();
URL href = new URL(url);
HttpURLConnection hc = (HttpURLConnection) href.openConnection();
String ua = "Mozilla/4.0 (compatible; MSIE 6.0; WINDOWS; .NET CLR 1.1.4322)";
hc.setRequestProperty("user-agent", ua);
InputStream is = hc.getInputStream();
int i;
while ((i = is.read()) != -1) {
char c1 = (char) i;
return new String(sb);
} catch (Exception e) {
return null;
public static String HTMLEncode(String s) {
StringBuffer st = new StringBuffer();
for (int i = 0; i < s.length(); i++) {
boolean copy = true;
char ch = s.charAt(i);
for (int j = 0; j < c.length; j++) {
if (c[j] == ch) {
copy = false;
if (copy)
return st.toString();