public class OReillyNewsGrabber implements NewsGrabber {
public String getContent(String link) throws IOException {
System.out.println("link " + link);
URL url = new URL(link);
InputStream in = new BufferedInputStream(url.openStream());
in = new IncludeFilterInputStream(in, "/lpt", ">");
StringBuffer text = new StringBuffer();
int b;
try {
while ((b = in.read()) != -1 && b != '\"') {
text.append((char) b);
}
} finally {
in.close();
}
url = new URL(url, "/lpt" + text);
text = new StringBuffer();
// text.append(url.toExternalForm()+"</br>");
// text.append(absoluteURL+"</br>");
// text.append(relativeURL+"</br>");
in = new BufferedInputStream(url.openStream());
in = new IncludeFilterInputStream(in, "<HTML>", "</HTML>");
in = new ExcludeFilterInputStream(in, "<HEAD", "/HEAD>");
in = new ExcludeFilterInputStream(in, "<BODY", ">");
in = new ExcludeFilterInputStream(in, "</BODY", ">");
in = new ExcludeFilterInputStream(in, "<!--", "-->");
in = new ExcludeFilterInputStream(in, "<SCRIPT", "</SCRIPT>");