/*
* @(#)StandardURLReader.java 11/12/2004
*
* Copyright (c) 2004, 2005 jASEN.org
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the distribution.
*
* 3. The names of the authors may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* 4. Any modification or additions to the software must be contributed back
* to the project.
*
* 5. Any investigation or reverse engineering of source code or binary to
* enable emails to bypass the filters, and hence inflict spam and or viruses
* onto users who use or do not use jASEN could subject the perpetrator to
* criminal and or civil liability.
*
* THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESSED OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JASEN.ORG,
* OR ANY CONTRIBUTORS TO THIS SOFTWARE BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
* OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
package org.jasen.net;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URL;
import org.jasen.error.JasenException;
import org.jasen.interfaces.URLReader;
import org.jasen.io.NonBlockingStreamReader;
/**
* <P>
* Extracts the content from a remote web server for the purposes of analysis.
* </P>
* @author Jason Polites
*/
public class StandardURLReader implements URLReader {
private int readBufferSize = 2048;
private long readTimeout = 5000L; // 5 seconds
/*
* (non-Javadoc)
* @see org.jasen.interfaces.URLReader#getHTML(java.net.URL)
*/
public String readURL(URL url) throws JasenException {
OutputStream out = new ByteArrayOutputStream();
InputStream in = null;
String html = null;
NonBlockingStreamReader reader = null;
try {
in = url.openStream();
reader = new NonBlockingStreamReader();
reader.read(in, out, readBufferSize, readTimeout, null);
html = new String(((ByteArrayOutputStream)out).toByteArray());
}
catch (IOException e) {
throw new JasenException(e);
}
finally {
if(in != null) {
try {
in.close();
}
catch (IOException ignore) {}
}
}
return html;
}
/**
* @return Returns the size (in bytes) of the buffer used when reading url data.
*/
public int getReadBufferSize() {
return readBufferSize;
}
/**
* @param readBufferSize The size (in bytes) of the buffer used when reading url data.
*/
public void setReadBufferSize(int readBufferSize) {
this.readBufferSize = readBufferSize;
}
/**
* @return Returns the time (in milliseconds) to wait for data from the url stream until reading is abnormally aborted.
*/
public long getReadTimeout() {
return readTimeout;
}
/**
* @param readTimeout The time (in milliseconds) to wait for data from the url stream until reading is abnormally aborted.
*/
public void setReadTimeout(long readTimeout) {
this.readTimeout = readTimeout;
}
}