/**
* Copyright (C) 2010 Peter Karich <jetwick_@_pannous_._info>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.jetwick.ese.util;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.net.CookieHandler;
import java.net.CookieManager;
import java.net.CookiePolicy;
import java.net.HttpURLConnection;
import java.net.Proxy;
import java.net.URL;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TimeZone;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.xml.sax.SAXException;
/**
* TODO tested through methods in YTweetTable!
*
* @author Peter Karich, peat_hal 'at' users 'dot' sourceforge 'dot' net
*/
public class Helper {
private static Logger logger = LoggerFactory.getLogger(Helper.class);
public static final String HTTP = "http://";
public static final String HTTPS = "https://";
public static final String TURL = "http://twitter.com";
public static final String JURL = "";
public static final String UTF8 = "UTF8";
private static final String localDateTimeFormatString = "yyyy-MM-dd'T'HH:mm:ss.S'Z'";
private static final String simpleDateString = "HH:mm yyyy-MM-dd";
public static DateFormat createLocalFormat() {
DateFormat df = new SimpleDateFormat(localDateTimeFormatString);
df.setTimeZone(TimeZone.getTimeZone("UTC"));
return df;
}
public static DateFormat createSimpleFormat() {
DateFormat df = new SimpleDateFormat(simpleDateString);
df.setTimeZone(TimeZone.getTimeZone("UTC"));
return df;
}
public static BufferedReader createBuffReader(File file) throws FileNotFoundException, UnsupportedEncodingException {
return new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));
}
public static BufferedReader createBuffReader(InputStream is) throws FileNotFoundException, UnsupportedEncodingException {
return new BufferedReader(new InputStreamReader(is, "UTF-8"));
}
/**
* Read a file from classpath
*/
public static BufferedReader createBuffReaderCP(String file) throws UnsupportedEncodingException {
return new BufferedReader(new InputStreamReader(Helper.class.getResourceAsStream(file), "UTF-8"));
}
public static BufferedWriter createBuffWriter(File file) throws FileNotFoundException, UnsupportedEncodingException {
return new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), "UTF-8"));
}
public static List<String> readFile(String file) throws IOException {
return readFile(new InputStreamReader(new FileInputStream(file), UTF8));
}
public static List<String> readFile(Reader simpleReader) throws IOException {
BufferedReader reader = new BufferedReader(simpleReader);
List<String> res = new ArrayList();
String line = null;
while ((line = reader.readLine()) != null) {
res.add(line);
}
reader.close();
return res;
}
/**
* Liefert einen DOM Parser zurück.
*/
public static DocumentBuilder newDocumentBuilder()
throws ParserConfigurationException {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
// ist das langsammer: factory.setValidating(true);
factory.setValidating(false);
factory.setNamespaceAware(false);
DocumentBuilder builder = factory.newDocumentBuilder();
return builder;
}
public static Document getAsDocument(String xmlString) throws SAXException,
IOException, ParserConfigurationException {
return newDocumentBuilder().parse(
new ByteArrayInputStream(xmlString.getBytes()));
}
public static String getDocumentAsString(Node node, boolean prettyXml)
throws TransformerException, UnsupportedEncodingException {
Transformer transformer = TransformerFactory.newInstance().newTransformer();
if (prettyXml) {
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
}
transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
ByteArrayOutputStream baos = new ByteArrayOutputStream();
transformer.transform(new DOMSource(node), new StreamResult(baos));
return baos.toString("UTF-8");
}
public static void removeDuplicates(List list) {
Set set = new LinkedHashSet(list);
list.clear();
list.addAll(set);
}
public static String getFileUnderHome(String str) {
char c = File.separatorChar;
String appHome = System.getProperty("user.home") + c + ".es-example";
File f = new File(appHome);
if (!f.exists())
f.mkdir();
return appHome + c + str;
}
public static String toSimpleDateTime(Date date) {
return createSimpleFormat().format(date);
}
public static String toLocalDateTime(Date date) {
return createLocalFormat().format(date);
}
public static Date toDate(String createdAt) {
try {
return createLocalFormat().parse(createdAt);
} catch (ParseException ex) {
throw new RuntimeException(ex);
}
}
public static Date toDateNoNPE(String string) {
if (string == null)
return null;
return toDate(string);
}
public static String stripOutLuceneHighlighting(String str) {
str = str.replaceAll("<B>", "");
str = str.replaceAll("</B>", "");
str = str.replaceAll("<b>", "");
str = str.replaceAll("</b>", "");
return str;
}
public static String fixForUserInput(String str) {
str = str.replaceAll("\\@<B>", "<B>@");
return str;
}
public static String toTwitterHref(String user, long id) {
return TURL + "/" + user + "/status/" + id;
}
public static String toTwitterStatus(String txt) {
return TURL + "?status=" + txt;
}
public static String toReplyHref(String user, Long tweetId) {
return toReplyStatusHref("@" + user + " ", user, tweetId, false);
}
public static String toReplyStatusHref(String status, String user, Long tweetId, boolean encode) {
if (encode)
status = twitterUrlEncode(status);
String str = TURL + "?status=" + status;
if (tweetId != null)
str += "&in_reply_to_status_id=" + tweetId;
if (user != null)
str += "&in_reply_to=" + user;
return str;
}
public static String toTwitterLink(String title, String url) {
return toLink(title, TURL + "/" + url);
}
public static String toInternLink(String title, String url) {
if (url.startsWith("www."))
url = "http://" + url;
return "<a class=\"i-tw-link\" href=\"" + url + "\">" + title + "</a>";
}
public static String toLink(String title, String url) {
if (url.startsWith("www."))
url = "http://" + url;
String shortTitle = title;
if (title.length() > 50)
shortTitle = title.substring(0, 47) + "...";
return "<a title=\"" + title + "\" class=\"ex-tw-link\" target=\"_blank\" href=\"" + url + "\">" + shortTitle + "</a>";
}
public static Date plusDays(Date date, int days) {
return new Date(date.getTime() + days * 24 * 3600 * 1000);
}
public static Map<String, String> parseArguments(String[] args) {
Map<String, String> map = new LinkedHashMap<String, String>();
for (String arg : args) {
String strs[] = arg.split("\\=");
if (strs.length != 2)
continue;
String key = strs[0];
if (key.startsWith("-")) {
key = key.substring(1);
}
if (key.startsWith("-")) {
key = key.substring(1);
}
String value = strs[1];
map.put(key, value);
}
return map;
}
public static String urlEncode(String str) {
try {
return URLEncoder.encode(str, UTF8);
} catch (UnsupportedEncodingException ex) {
return str;
}
}
public static String urlDecode(String str) {
try {
return URLDecoder.decode(str, UTF8);
} catch (UnsupportedEncodingException ex) {
return str;
}
}
/**
* encode space not as +
*/
public static String twitterUrlEncode(String str) {
return urlEncode(str).replaceAll("\\+", "%20");
}
/**
* Skip characters which are not allowed in xml.
*
* Taken from
* http://stackoverflow.com/questions/20762/how-do-you-remove-invalid-hexadecimal-characters-from-an-xml-based-data-source-pr
*/
public static String xmlCharacterWhitelist(String inputStr) {
if (inputStr == null)
return null;
StringBuilder sbOutput = new StringBuilder();
char ch;
for (int i = 0; i < inputStr.length(); i++) {
ch = inputStr.charAt(i);
if ((ch >= 0x0020 && ch <= 0xD7FF)
|| (ch >= 0xE000 && ch <= 0xFFFD)
|| ch == 0x0009
|| ch == 0x000A
|| ch == 0x000D) {
sbOutput.append(ch);
}
}
return sbOutput.toString();
}
/**
* @return a sorted list where the object with the highest integer value comes first!
*/
public static <T> List<Entry<T, Integer>> sort(Collection<Entry<T, Integer>> entrySet) {
List<Entry<T, Integer>> sorted = new ArrayList<Entry<T, Integer>>(entrySet);
Collections.sort(sorted, new Comparator<Entry<T, Integer>>() {
@Override
public int compare(Entry<T, Integer> o1, Entry<T, Integer> o2) {
int i1 = o1.getValue();
int i2 = o2.getValue();
if (i1 < i2)
return 1;
else if (i1 > i2)
return -1;
else
return 0;
}
});
return sorted;
}
/**
* @return a sorted list where the string with the highest integer value comes first!
*/
public static <T> List<Entry<T, Long>> sortLong(Collection<Entry<T, Long>> entrySet) {
List<Entry<T, Long>> sorted = new ArrayList<Entry<T, Long>>(entrySet);
Collections.sort(sorted, new Comparator<Entry<T, Long>>() {
@Override
public int compare(Entry<T, Long> o1, Entry<T, Long> o2) {
long i1 = o1.getValue();
long i2 = o2.getValue();
if (i1 < i2)
return 1;
else if (i1 > i2)
return -1;
else
return 0;
}
});
return sorted;
}
/**
* Returns an 'optimized'/fast HttpUrlConnection
*/
public static HttpURLConnection getHttpURLConnection(String urlAsString) throws Exception {
URL url = new URL(urlAsString);
//using proxy may increase latency
HttpURLConnection hConn = (HttpURLConnection) url.openConnection(Proxy.NO_PROXY);
// force no follow
hConn.setInstanceFollowRedirects(false);
// the program doesn't care what the content actually is !!
// http://java.sun.com/developer/JDCTechTips/2003/tt0422.html
hConn.setRequestMethod("HEAD");
return hConn;
}
public static String extractDomain(String url) {
// url shorteners seems to have a "domain.de" shorter or equal to 11
// the longest was tinyurl.com the shortest is t.co
if (url.startsWith(HTTP))
url = url.substring(HTTP.length());
if (url.startsWith(HTTPS))
url = url.substring(HTTPS.length());
int index = url.indexOf("/");
if (index < 0)
index = Math.max(url.length(), url.indexOf(" "));
String domain = url.substring(0, index);
if (domain.startsWith("www."))
domain = domain.substring(4);
// skip if the domain of domain.de is of zero length or if the "de" is less then 2 chars
index = domain.indexOf(".");
if (index < 0 || domain.length() < 4)
return "";
return domain;
}
/**
* On some devices we have to hack:
* http://developers.sun.com/mobility/reference/techart/design_guidelines/http_redirection.html
* @return the resolved url if any. Or null if it couldn't resolve the url
* (within the specified time) or the same url if response code is OK
*/
public static String getResolvedUrl(String urlAsString, int timeout) {
try {
HttpURLConnection hConn = getHttpURLConnection(urlAsString);
// default is 0 => infinity waiting
hConn.setConnectTimeout(timeout);
hConn.setReadTimeout(timeout);
hConn.connect();
int responseCode = hConn.getResponseCode();
hConn.getInputStream().close();
if (responseCode == HttpURLConnection.HTTP_OK)
return urlAsString;
String loc = hConn.getHeaderField("Location");
if (responseCode == HttpURLConnection.HTTP_MOVED_PERM && loc != null)
return loc.replaceAll(" ", "+");
} catch (Exception ex) {
}
return "";
}
final static String DESCRIPTION = "<meta name=\"description\" content=\"";
final static String DESCRIPTION2 = "<meta name=\"Description\" content=\"";
/**
* Returns title and description of a specified string (as byte array)
*/
public static String[] getUrlInfosFromText(byte[] arr) {
String res = new String(arr);
int index = getStartTitleEndPos(res);
if (index < 0)
return new String[]{"", ""};
int encIndex = res.indexOf("charset=");
if (encIndex > 0) {
int lastEncIndex = res.indexOf("\"", encIndex + 8);
// if we have charset="something"
if (lastEncIndex == encIndex + 8)
lastEncIndex = res.indexOf("\"", ++encIndex + 8);
if (lastEncIndex > encIndex + 8) {
String encoding = res.substring(encIndex + 8, lastEncIndex);
try {
res = new String(arr, encoding);
index = getStartTitleEndPos(res);
if (index < 0)
return new String[]{"", ""};
} catch (Exception ex) {
}
}
}
int lastIndex = res.indexOf("</title>");
if (lastIndex <= index)
return new String[]{"", ""};
String title = res.substring(index, lastIndex);
index = res.indexOf(DESCRIPTION);
if (index < 0)
index = res.indexOf(DESCRIPTION2);
lastIndex = res.indexOf("\"", index + DESCRIPTION.length());
if (index < 0 || lastIndex < 0)
return new String[]{title, ""};
index += DESCRIPTION.length();
return new String[]{title, res.substring(index, lastIndex)};
}
public static String[] getUrlInfos(String urlAsString, int timeout) {
try {
URL url = new URL(urlAsString);
//using proxy may increase latency
HttpURLConnection hConn = (HttpURLConnection) url.openConnection(Proxy.NO_PROXY);
hConn.setRequestProperty("User-Agent", "Mozilla/5.0 Gecko/20100915 Firefox/3.6.10");
hConn.setConnectTimeout(timeout);
hConn.setReadTimeout(timeout);
// default length of bufferedinputstream is 8k
byte[] arr = new byte[4096];
BufferedInputStream in = new BufferedInputStream(hConn.getInputStream(), arr.length);
in.read(arr);
return getUrlInfosFromText(arr);
} catch (Exception ex) {
}
return new String[]{"", ""};
}
/**
* @return tries to get the title of the specified url. returns an empty string
* if this failed
*/
public static String getUrlTitle(String urlAsString, int timeout) {
return getUrlInfos(urlAsString, timeout)[0];
}
public static int getStartTitleEndPos(String res) {
int index = res.indexOf("<title>");
if (index < 0) {
index = res.indexOf("<title ");
if (index < 0)
return -1;
index = res.indexOf(">", index);
if (index >= 0)
index++;
} else
index += "<title>".length();
return index;
}
public static Document readUrlAsDocument(String urlAsString, int timeout) throws Exception {
URL url = new URL(urlAsString);
//using proxy may increase latency
HttpURLConnection hConn = (HttpURLConnection) url.openConnection();
hConn.setReadTimeout(timeout);
hConn.setConnectTimeout(timeout);
return newDocumentBuilder().parse(hConn.getInputStream());
}
public static String readUrl(String urlAsString, int timeout) throws IOException {
URL url = new URL(urlAsString);
//using proxy may increase latency
HttpURLConnection hConn = (HttpURLConnection) url.openConnection();
hConn.setReadTimeout(timeout);
hConn.setConnectTimeout(timeout);
return readInputStream(hConn.getInputStream());
}
public static String readInputStream(InputStream is) throws IOException {
if (is == null)
throw new IllegalArgumentException("stream mustn't be null!");
BufferedReader bufReader = new BufferedReader(new InputStreamReader(is, "UTF8"));
StringBuilder sb = new StringBuilder();
String line;
while ((line = bufReader.readLine()) != null) {
sb.append(line);
sb.append('\n');
}
bufReader.close();
return sb.toString();
}
/**
* Returns the given string with consecutive whitespace characters
* replaced with a single space and then trimmed
* @see http://www.rgagnon.com/javadetails/java-0352.html
*/
public static String trimAll(String str) {
return str.replaceAll("\\s+", " ").trim();
}
/**
* removes new lines
* @return
*/
public static String trimNL(String str) {
return str.replaceAll("\n", " ");
}
/**
* the following method was taken from suns Decoder and stands under CDDL
*
* look here for a converter:
* http://gmailassistant.sourceforge.net/src/org/freeshell/zs/common/HtmlManipulator.java.html
*/
public static String htmlEntityDecode(String s) {
int i = 0, j = 0, pos = 0;
StringBuffer sb = new StringBuffer();
while ((i = s.indexOf("&", pos)) != -1 && (j = s.indexOf(';', i)) != -1) {
int n = -1;
for (i += 1; i < j; ++i) {
char c = s.charAt(i);
if ('0' <= c && c <= '9')
n = (n == -1 ? 0 : n * 10) + c - '0';
else
break;
}
// skip malformed html entities
if (i != j)
n = -1;
if (n != -1) {
sb.append((char) n);
} else {
// force deletion of chars
for (int k = pos; k < i - 1; ++k) {
sb.append(s.charAt(k));
}
sb.append(" ");
}
// skip ';'
i = j + 1;
pos = i;
}
if (sb.length() == 0)
return s;
else
sb.append(s.substring(pos, s.length()));
return sb.toString();
}
/**
* @see http://blogs.sun.com/CoreJavaTechTips/entry/cookie_handling_in_java_se
*/
public static void enableCookieMgmt() {
CookieManager manager = new CookieManager();
manager.setCookiePolicy(CookiePolicy.ACCEPT_ALL);
CookieHandler.setDefault(manager);
}
/**
* @see http://stackoverflow.com/questions/2529682/setting-user-agent-of-a-java-urlconnection
*/
public static void enableUserAgentOverwrite() {
System.setProperty("http.agent", "");
}
public static byte bitString2byte(String str) {
int res = 0;
if (str.length() > 8)
throw new UnsupportedOperationException("string length may be max 8");
for (int i = 0; i < str.length(); i++) {
res = res << 1;
if ('1' == str.charAt(i)) {
res |= 1;
} else if ('0' == str.charAt(i)) {
} else
throw new UnsupportedOperationException("string may contain only 1 or 0");
}
return (byte) res;
}
public static String byte2bitString(byte b) {
int integ = b;
String res = "";
for (int j = 0; j < 8; j++) {
if ((integ & 0x01) == 1)
res = "1" + res;
else
res = "0" + res;
integ = integ >> 1;
}
return res;
}
public static long byteArray2long(byte[] signature) {
if (signature.length > 8)
throw new UnsupportedOperationException("Cannot lossless convert byte array into long if length is greater than 8");
long val = 0;
for (int i = signature.length - 1; i >= 0; i--) {
val = val << 8;
val |= signature[i];
}
return val;
}
public static String[] toStringArray(Collection<String> coll) {
return coll.toArray(new String[coll.size()]);
}
public static boolean isEmpty(String str) {
return str == null || str.isEmpty();
}
}