Package org.wikipediacleaner.api

Source Code of org.wikipediacleaner.api.HttpUtils

/*
*  WPCleaner: A tool to help on Wikipedia maintenance tasks.
*  Copyright (C) 2013  Nicolas Vervelle
*
*  See README.txt file for licensing information.
*/

package org.wikipediacleaner.api;

import java.io.ByteArrayOutputStream;
import java.io.UnsupportedEncodingException;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.ByteBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CodingErrorAction;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import org.apache.commons.httpclient.HttpMethod;
import org.apache.commons.httpclient.NameValuePair;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.methods.PostMethod;
import org.wikipediacleaner.api.request.ApiLoginRequest;
import org.wikipediacleaner.utils.Configuration;
import org.wikipediacleaner.utils.ConfigurationValueBoolean;


/**
* Utilities class for HTTP requests.
*/
public class HttpUtils {

  // ==========================================================================
  // Configuration
  // ==========================================================================

  /**
   * Flag for tracing secret keys.
   */
  private static boolean DEBUG_SECRET_KEYS = false;

  /**
   * Flag for tracing time.
   */
  private static boolean DEBUG_TIME = false;

  /**
   * Flag for tracing URL.
   */
  private static boolean DEBUG_URL = true;

  /**
   * Update configuration.
   */
  public static void updateConfiguration() {
    Configuration config = Configuration.getConfiguration();
    DEBUG_TIME = config.getBoolean(
        null, ConfigurationValueBoolean.DEBUG_TIME);
    DEBUG_URL = config.getBoolean(
        null, ConfigurationValueBoolean.DEBUG_URL);
  }

  // ==========================================================================
  // HTTP methods
  // ==========================================================================

  /**
   * Create an HttpMethod.
   *
   * @param url URL of the request.
   * @param properties Properties to add to the request.
   * @param canUseGetMethod Flag indicating if a GET method can be used.
   * @return HttpMethod.
   */
  public static HttpMethod createHttpMethod(
      String url,
      Map<String, String> properties,
      boolean canUseGetMethod) {
    if (canUseGetMethod) {
      return createHttpGetMethod(url, properties);
    }
    return createHttpPostMethod(url, properties);
  }

  /**
   * Create an HTTP POST Method.
   *
   * @param url URL of the request.
   * @param properties Properties to drive the API.
   * @return POST Method
   */
  private static PostMethod createHttpPostMethod(
      String url,
      Map<String, String> properties) {
    StringBuilder debugUrl = (DEBUG_URL) ? new StringBuilder("POST " + url) : null;
    PostMethod method = new PostMethod(url);
    method.getParams().setContentCharset("UTF-8");
    method.setRequestHeader("Accept-Encoding", "gzip");
    if (properties != null) {
      boolean first = true;
      Iterator<Map.Entry<String, String>> iter = properties.entrySet().iterator();
      while (iter.hasNext()) {
        Map.Entry<String, String> property = iter.next();
        String key = property.getKey();
        String value = property.getValue();
        method.addParameter(key, value);
        if (DEBUG_URL &&
            (debugUrl != null) &&
            (DEBUG_SECRET_KEYS || !isSecretKey(key))) {
          int start = 0;
          while ((start < value.length()) && Character.isWhitespace(value.charAt(start))) {
            start++;
          }
          if (value.indexOf('\n', start) > 0) {
            value = value.substring(start, value.indexOf('\n', start)) + "...";
          }
          debugUrl.append(
              (first ? "?" : "&") +
              key + "=" +
              (isSecretKey(key) ? "XXXXX" : value));
          first = false;
        }
      }
      if (DEBUG_URL && (debugUrl != null)) {
        debugText(debugUrl.toString());
      }
    }
    return method;
  }

  /**
   * Create an HTTP GET Method.
   *
   * @param url URL of the request.
   * @param properties Properties to drive the API.
   * @return GET Method
   */
  private static GetMethod createHttpGetMethod(
      String url,
      Map<String, String> properties) {

    // Initialize GET Method
    GetMethod method = new GetMethod(url);
    method.getParams().setContentCharset("UTF-8");
    method.setRequestHeader("Accept-Encoding", "gzip");

    // Manager query string
    StringBuilder debugUrl = (DEBUG_URL) ? new StringBuilder("GET  " + url) : null;
    List<NameValuePair> params = new ArrayList<NameValuePair>();
    if (properties != null) {
      boolean first = true;
      Iterator<Map.Entry<String, String>> iter = properties.entrySet().iterator();
      while (iter.hasNext()) {
        Map.Entry<String, String> property = iter.next();
        String key = property.getKey();
        String value = property.getValue();
        params.add(new NameValuePair(key, value));
        if (DEBUG_URL &&
            (debugUrl != null) &&
            (DEBUG_SECRET_KEYS || !isSecretKey(key))) {
          int start = 0;
          while ((start < value.length()) && Character.isWhitespace(value.charAt(start))) {
            start++;
          }
          if (value.indexOf('\n', start) > 0) {
            value = value.substring(start, value.indexOf('\n', start)) + "...";
          }
          debugUrl.append(
              (first ? "?" : "&") +
              key + "=" +
              (isSecretKey(key) ? "XXXXX" : value));
        }
        first = false;
      }
      if (DEBUG_URL && (debugUrl != null)) {
        debugText(debugUrl.toString());
      }
    }
    NameValuePair[] tmpParams = new NameValuePair[params.size()];
    method.setQueryString(params.toArray(tmpParams));

    return method;
  }

  /**
   * @param key Key.
   * @return True if the value for the key should be kept secret.
   */
  private static boolean isSecretKey(String key) {
    boolean result = false;
    result |= ApiLoginRequest.PROPERTY_PASSWORD.equals(key);
    result |= ApiLoginRequest.PROPERTY_TOKEN.equals(key);
    result |= ApiLoginRequest.PROPERTY_USER_NAME.equals(key);
    result |= ApiLoginRequest.PROPERTY_USER_ID.equals(key);
    return result;
  }

  /**
   * Debug text.
   *
   * @param text Text to add to debug.
   */
  private static void debugText(String text) {
    if (DEBUG_TIME) {
      System.out.println("" + System.currentTimeMillis() + ": " + text);
    } else {
      System.out.println(text);
    }
  }

  // ==========================================================================
  // Configuration
  // ==========================================================================

  private static Charset utf8Charset = null;
  private static Charset iso88591Charset = null;

  static {
    utf8Charset = Charset.forName("UTF8");
    iso88591Charset = Charset.forName("ISO-8859-1");
  }

  /**
   * Append bytes of a String to a buffer.
   *
   * @param buf Byte buffer.
   * @param data String.
   * @throws UnsupportedEncodingException
   */
  private static void appendBytes(ByteArrayOutputStream buf, String data) throws UnsupportedEncodingException {
    byte[] b = data.getBytes("UTF8");
    buf.write(b, 0, b.length);
  }

  /**
   * Parse an encoded string into a byte array.
   *
   * @param segment String
   * @return Byte array.
   * @throws UnsupportedEncodingException
   */
  private static byte[] parseEncodedString(String segment) throws UnsupportedEncodingException {
    ByteArrayOutputStream buf = new ByteArrayOutputStream(segment.length());
    int last = 0;
    int index = 0;
    while (index < segment.length()) {
      if (segment.charAt(index) == '%') {
        appendBytes(buf, segment.substring(last, index));
        if ((index < segment.length() + 2) &&
            ("ABCDEFabcdef0123456789".indexOf(segment.charAt(index + 1)) >= 0) &&
            ("ABCDEFabcdef0123456789".indexOf(segment.charAt(index + 2)) >= 0)) {
          buf.write((byte) Integer.parseInt(segment.substring(index + 1, index + 3), 16));
          index += 3;
        } else if ((index < segment.length() + 1) &&
                   (segment.charAt(index + 1) == '%')) {
          buf.write((byte) '%');
          index += 2;
        } else {
          buf.write((byte) '%');
          index++;
        }
        last = index;
      } else {
        index++;
      }
    }
    appendBytes(buf, segment.substring(last));
    return buf.toByteArray();
  }

  /**
   * Parse an encoded string, trying several characters sets.
   *
   * @param segment String to parse.
   * @param encodings Characters sets.
   * @return Decoded string.
   */
  private static String parseEncodedString(String segment, Charset... encodings) {
    if ((segment == null) || (segment.indexOf('%') < 0)) {
      return segment;
    }
    try {
      byte[] data = parseEncodedString(segment);
      for (Charset encoding : encodings) {
        try {
          if (encoding != null) {
            return encoding.newDecoder().
                onMalformedInput(CodingErrorAction.REPORT).
                decode(ByteBuffer.wrap(data)).toString();
          }
        } catch (CharacterCodingException e) {
          // Incorrect encoding, try next one
        }
      }
    } catch (UnsupportedEncodingException e) {
      // Nothing to do
    }
    return segment;
  }

  /**
   * Find if a given URL is for an article.
   *
   * @param url URL.
   * @param base Base URL.
   * @return Article name or null if it doesn't match an article.
   */
  public static String getArticleFromUrl(String url, String base) {
    if ((url == null) || (base == null)) {
      return null;
    }

    // Cleanup URL
    while (url.endsWith("|")) {
      url = url.substring(0, url.length() - 1);
    }

    // Create URI
    URI uri = null;
    try {
      uri = new URI(url);
    } catch (URISyntaxException e) {
      return null;
    }

    // Various checks
    if (!uri.isAbsolute() || uri.isOpaque()) {
      return null;
    }

    // Check scheme
    String scheme = uri.getScheme();
    if (scheme == null) {
      return null;
    }
    if (!scheme.equalsIgnoreCase("http") &&
        !scheme.equalsIgnoreCase("https")) {
      return null;
    }

    // Build decoded parts
    StringBuilder details = new StringBuilder();
    details.append("//");
    details.append(uri.getAuthority());
    details.append(parseEncodedString(uri.getRawPath(), utf8Charset, iso88591Charset));
    if (uri.getQuery() != null) {
      details.append("?");
      details.append(uri.getQuery());
    }
    if (uri.getFragment() != null) {
      details.append("#");
      details.append(uri.getFragment());
    }

    // Check that URL starts correctly
    int paramIndex = base.indexOf("$1");
    if (paramIndex < 0) {
      return null;
    }
    String detailsStr = details.toString();
    if (!detailsStr.startsWith(base.substring(0, paramIndex))) {
      return null;
    }

    // Check that URL ends correctly
    String result = null;
    if (paramIndex + 2 >= base.length()) {
      result = detailsStr.substring(paramIndex);
    } else if (!detailsStr.endsWith(base.substring(paramIndex + 2))) {
      return null;
    } else {
      result = detailsStr.substring(paramIndex, details.length() - base.length() + 2 + paramIndex);
    }
    if (result != null) {
      result = result.replaceAll("\\_", " ");
      if (result.endsWith("/")) {
        result = result.substring(0, result.length() - 1);
      }
    }
    return result;
  }
}
TOP

Related Classes of org.wikipediacleaner.api.HttpUtils

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.