Package com.ontometrics.scraper.extraction

Source Code of com.ontometrics.scraper.extraction.HttpClientExtractor

package com.ontometrics.scraper.extraction;

import java.io.IOException;
import java.net.URL;

import net.htmlparser.jericho.Source;

import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.ResponseHandler;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.BasicResponseHandler;
import org.apache.http.impl.client.DefaultHttpClient;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class HttpClientExtractor implements SourceExtractor {

  private Logger log = LoggerFactory.getLogger(HttpClientExtractor.class);

  private HttpClient client;

  private ResponseHandler<String> responseHandler;

  public HttpClientExtractor() {
    responseHandler = new BasicResponseHandler();
    client = new DefaultHttpClient();

  }

  @Override
  public Source getSource(URL url) {
    String responseBody = null;
    try {
      HttpGet get = new HttpGet(url.toString());
      responseBody = client.execute(get, responseHandler);
    } catch (ClientProtocolException e) {
      log.error("Error getting source in session mode: {}", e);
    } catch (IOException e) {
      log.error("Error getting source in session mode: {}", e);
    }

    Source source = new Source(responseBody);
    return source;
  }
 
  public void close() {
    log.debug("CALLING CLOSE IN HTTP CLIENT EXTRACTOR");
    client.getConnectionManager().shutdown();
  }
}
TOP

Related Classes of com.ontometrics.scraper.extraction.HttpClientExtractor

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.