package org.jsoup.integration;
import org.jsoup.HttpStatusException;
import org.jsoup.UnsupportedMimeTypeException;
import org.junit.Test;
import org.junit.Ignore;
import static org.junit.Assert.*;
import org.jsoup.nodes.Document;
import org.jsoup.Jsoup;
import org.jsoup.Connection;
import java.net.MalformedURLException;
import java.net.URL;
import java.io.IOException;
import java.util.Map;
/**
Tests the URL connection. Not enabled by default, so tests don't require network connection.
@author Jonathan Hedley, jonathan@hedley.net */
@Ignore // ignored by default so tests don't require network access. comment out to enable.
public class UrlConnectTest {
private static String echoURL = "http://direct.infohound.net/tools/q.pl";
@Test
public void fetchURl() throws IOException {
String url = "http://www.google.com"; // no trailing / to force redir
Document doc = Jsoup.parse(new URL(url), 10*1000);
assertTrue(doc.title().contains("Google"));
}
@Test
public void fetchBaidu() throws IOException {
Connection.Response res = Jsoup.connect("http://www.baidu.com/").timeout(10*1000).execute();
Document doc = res.parse();
assertEquals("GBK", doc.outputSettings().charset().displayName());
assertEquals("GBK", res.charset());
assert(res.hasCookie("BAIDUID"));
assertEquals("text/html;charset=gbk", res.contentType());
}
@Test
public void exceptOnUnknownContentType() {
String url = "http://jsoup.org/rez/osi_logo.png"; // not text/* but image/png, should throw
boolean threw = false;
try {
Document doc = Jsoup.parse(new URL(url), 3000);
} catch (UnsupportedMimeTypeException e) {
threw = true;
assertEquals("org.jsoup.UnsupportedMimeTypeException: Unhandled content type. Must be text/*, application/xml, or application/xhtml+xml. Mimetype=image/png, URL=http://jsoup.org/rez/osi_logo.png", e.toString());
assertEquals(url, e.getUrl());
assertEquals("image/png", e.getMimeType());
} catch (IOException e) {
}
assertTrue(threw);
}
@Test
public void exceptOnUnsupportedProtocol(){
String url = "file://etc/passwd";
boolean threw = false;
try {
Document doc = Jsoup.connect(url).get();
} catch (MalformedURLException e) {
threw = true;
assertEquals("java.net.MalformedURLException: Only http & https protocols supported", e.toString());
} catch (IOException e) {
}
assertTrue(threw);
}
@Test
public void ignoresContentTypeIfSoConfigured() throws IOException {
Document doc = Jsoup.connect("http://jsoup.org/rez/osi_logo.png").ignoreContentType(true).get();
assertEquals("", doc.title()); // this will cause an ugly parse tree
}
@Test
public void doesPost() throws IOException {
Document doc = Jsoup.connect(echoURL)
.data("uname", "Jsoup", "uname", "Jonathan", "百", "度一下")
.cookie("auth", "token")
.post();
assertEquals("POST", ihVal("REQUEST_METHOD", doc));
//assertEquals("gzip", ihVal("HTTP_ACCEPT_ENCODING", doc)); // current proxy removes gzip on post
assertEquals("auth=token", ihVal("HTTP_COOKIE", doc));
assertEquals("度一下", ihVal("百", doc));
assertEquals("Jsoup, Jonathan", ihVal("uname", doc));
}
@Test
public void doesGet() throws IOException {
Connection con = Jsoup.connect(echoURL + "?what=the")
.userAgent("Mozilla")
.referrer("http://example.com")
.data("what", "about & me?");
Document doc = con.get();
assertEquals("what=the&what=about+%26+me%3F", ihVal("QUERY_STRING", doc));
assertEquals("the, about & me?", ihVal("what", doc));
assertEquals("Mozilla", ihVal("HTTP_USER_AGENT", doc));
assertEquals("http://example.com", ihVal("HTTP_REFERER", doc));
}
private static String ihVal(String key, Document doc) {
return doc.select("th:contains("+key+") + td").first().text();
}
@Test
public void followsTempRedirect() throws IOException {
Connection con = Jsoup.connect("http://direct.infohound.net/tools/302.pl"); // http://jsoup.org
Document doc = con.get();
assertTrue(doc.title().contains("jsoup"));
}
@Test
public void postRedirectsFetchWithGet() throws IOException {
Connection con = Jsoup.connect("http://direct.infohound.net/tools/302.pl")
.data("Argument", "Riposte")
.method(Connection.Method.POST);
Connection.Response res = con.execute();
assertEquals("http://jsoup.org", res.url().toExternalForm());
assertEquals(Connection.Method.GET, res.method());
}
@Test
public void followsRedirectToHttps() throws IOException {
Connection con = Jsoup.connect("http://direct.infohound.net/tools/302-secure.pl"); // https://www.google.com
con.data("id", "5");
Document doc = con.get();
assertTrue(doc.title().contains("Google"));
}
@Test
public void followsRelativeRedirect() throws IOException {
Connection con = Jsoup.connect("http://direct.infohound.net/tools/302-rel.pl"); // to ./ - /tools/
Document doc = con.post();
assertTrue(doc.title().contains("HTML Tidy Online"));
}
@Test
public void throwsExceptionOnError() {
String url = "http://direct.infohound.net/tools/404";
Connection con = Jsoup.connect(url);
boolean threw = false;
try {
Document doc = con.get();
} catch (HttpStatusException e) {
threw = true;
assertEquals("org.jsoup.HttpStatusException: HTTP error fetching URL. Status=404, URL=http://direct.infohound.net/tools/404", e.toString());
assertEquals(url, e.getUrl());
assertEquals(404, e.getStatusCode());
} catch (IOException e) {
}
assertTrue(threw);
}
@Test
public void ignoresExceptionIfSoConfigured() throws IOException {
Connection con = Jsoup.connect("http://direct.infohound.net/tools/404").ignoreHttpErrors(true);
Connection.Response res = con.execute();
Document doc = res.parse();
assertEquals(404, res.statusCode());
assertEquals("404 Not Found", doc.select("h1").first().text());
}
@Test
public void doesntRedirectIfSoConfigured() throws IOException {
Connection con = Jsoup.connect("http://direct.infohound.net/tools/302.pl").followRedirects(false);
Connection.Response res = con.execute();
assertEquals(302, res.statusCode());
assertEquals("http://jsoup.org", res.header("Location"));
}
@Test
public void redirectsResponseCookieToNextResponse() throws IOException {
Connection con = Jsoup.connect("http://direct.infohound.net/tools/302-cookie.pl");
Connection.Response res = con.execute();
assertEquals("asdfg123", res.cookie("token")); // confirms that cookies set on 1st hit are presented in final result
Document doc = res.parse();
assertEquals("uid=jhy; token=asdfg123", ihVal("HTTP_COOKIE", doc)); // confirms that redirected hit saw cookie
}
@Test
public void maximumRedirects() {
boolean threw = false;
try {
Document doc = Jsoup.connect("http://direct.infohound.net/tools/loop.pl").get();
} catch (IOException e) {
assertTrue(e.getMessage().contains("Too many redirects"));
threw = true;
}
assertTrue(threw);
}
@Test
public void multiCookieSet() throws IOException {
Connection con = Jsoup.connect("http://direct.infohound.net/tools/302-cookie.pl");
Connection.Response res = con.execute();
// test cookies set by redirect:
Map<String, String> cookies = res.cookies();
assertEquals("asdfg123", cookies.get("token"));
assertEquals("jhy", cookies.get("uid"));
// send those cookies into the echo URL by map:
Document doc = Jsoup.connect(echoURL).cookies(cookies).get();
assertEquals("uid=jhy; token=asdfg123", ihVal("HTTP_COOKIE", doc));
}
@Test
public void handlesDodgyCharset() throws IOException {
// tests that when we get back "UFT8", that it is recognised as unsupported, and falls back to default instead
String url = "http://direct.infohound.net/tools/bad-charset.pl";
Connection.Response res = Jsoup.connect(url).execute();
assertEquals("text/html; charset=UFT8", res.header("Content-Type")); // from the header
assertEquals(null, res.charset()); // tried to get from header, not supported, so returns null
Document doc = res.parse(); // would throw an error if charset unsupported
assertTrue(doc.text().contains("Hello!"));
assertEquals("UTF-8", res.charset()); // set from default on parse
}
@Test
public void maxBodySize() throws IOException {
String url = "http://direct.infohound.net/tools/large.html"; // 280 K
Connection.Response defaultRes = Jsoup.connect(url).execute();
Connection.Response smallRes = Jsoup.connect(url).maxBodySize(50 * 1024).execute(); // crops
Connection.Response mediumRes = Jsoup.connect(url).maxBodySize(200 * 1024).execute(); // crops
Connection.Response largeRes = Jsoup.connect(url).maxBodySize(300 * 1024).execute(); // does not crop
Connection.Response unlimitedRes = Jsoup.connect(url).maxBodySize(0).execute();
int actualString = 280735;
assertEquals(actualString, defaultRes.body().length());
assertEquals(50 * 1024, smallRes.body().length());
assertEquals(200 * 1024, mediumRes.body().length());
assertEquals(actualString, largeRes.body().length());
assertEquals(actualString, unlimitedRes.body().length());
int actualDocText = 269541;
assertEquals(actualDocText, defaultRes.parse().text().length());
assertEquals(49165, smallRes.parse().text().length());
assertEquals(196577, mediumRes.parse().text().length());
assertEquals(actualDocText, largeRes.parse().text().length());
assertEquals(actualDocText, unlimitedRes.parse().text().length());
}
}