* @param o
* @return
*/
public ParsedObject parseObject(Object o)
{
ParsedObject result = new BaseParsedObject();
if ((o instanceof URL) == false)
{
logger.error("URLToDocHandler: invalid object type: " + o);
return null;
}
URL pageToAdd = (URL) o;
HttpClient client = new HttpClient();
client.startSession(pageToAdd);
GetMethod method = new GetMethod(pageToAdd.getPath());
method.setFollowRedirects(true);
int statusCode = -1;
int attempt = 0;
// We will retry up to 3 times.
while (statusCode == -1 && attempt < 3)
{
try
{
// execute the method.
client.executeMethod(method);
statusCode = method.getStatusCode();
if (logger.isDebugEnabled())
{
logger.debug("URL = " + pageToAdd.toString() + "Status code = " + statusCode);
}
}
catch (HttpException e)
{
// We will retry
}
catch (IOException e)
{
return null;
}
}
// Check that we didn't run out of retries.
if (statusCode != -1)
{
String content = null;
try
{
content = method.getDataAsString();
}
catch (IOException ioe)
{
logger.error("Getting content for " + pageToAdd.toString(), ioe);
}
if (content != null)
{
try
{
result.setKey(java.net.URLEncoder.encode(pageToAdd.toString()));
result.setType(ParsedObject.OBJECT_TYPE_URL);
// TODO: We should extract the <title> tag here.
result.setTitle(pageToAdd.toString());
result.setContent(content);
result.setDescription("");
result.setLanguage("");
result.setURL(pageToAdd);
result.setClassName(o.getClass().getName());
logger.info("Parsed '" + pageToAdd.toString() + "'");
}
catch (Exception e)
{
e.printStackTrace();