package com.tubeonfire.util;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import com.cybozu.labs.langdetect.Detector;
import com.cybozu.labs.langdetect.DetectorFactory;
import com.cybozu.labs.langdetect.LangDetectException;
import com.google.appengine.api.datastore.Text;
public class StringHelper {
private static boolean isLoadLanguageProfile = false;
public static String replace(String str) {
str = str.replaceAll("[\\'\\/]+", "");
str = str
.replaceAll(
"[-\\!\"\\@\\#\\,\\.\\$%\\^&\\*\\(\\)_\\+\\=\\?\\;\\:\\~\\`\\{\\}\\[\\]\\|\\\\]+",
"-");
str = str.replaceAll(" ", "-");
str = str.replaceAll("[-]+", "-");
str = str.replaceAll("[\\W]+", "-");
str = str.replaceAll("^-", "");
str = str.replaceAll("-$", "");
str = str.toLowerCase();
return str;
}
public static String replaceSpace(String str) {
str = str.replaceAll("[\\'\\/]+", "");
str = str
.replaceAll(
"[-\\!\"\\@\\#\\,\\.\\$%\\^&\\*\\(\\)_\\+\\=\\?\\;\\:\\~\\`\\{\\}\\[\\]\\|\\\\]+",
"-");
str = str.replaceAll(" ", "-");
str = str.replaceAll("[-]+", "-");
str = str.replaceAll("^-", "");
str = str.replaceAll("-$", "");
str = str.toLowerCase();
return str;
}
public static String replaceVietnamese(String str) {
String[] aArray = { "ấ", "ầ", "ẩ", "ẫ", "ậ", "Ấ", "Ầ", "Ẩ", "Ẫ", "Ậ",
"ắ", "ằ", "ẳ", "ẵ", "ặ", "Ắ", "Ằ", "Ẳ", "Ẵ", "Ặ", "á", "à",
"ả", "ã", "ạ", "â", "ă", "Á", "À", "Ả", "Ã", "Ạ", "Â", "Ă" };
String[] eArray = { "ế", "ề", "ể", "ễ", "ệ", "Ế", "Ề", "Ể", "Ễ", "Ệ",
"é", "è", "ẻ", "ẽ", "ẹ", "ê", "É", "È", "Ẻ", "Ẽ", "Ẹ", "Ê" };
String[] iArray = { "í", "ì", "ỉ", "ĩ", "ị", "Í", "Ì", "Ỉ", "Ĩ", "Ị" };
String[] oArray = { "ố", "ồ", "ổ", "ỗ", "ộ", "Ố", "Ồ", "Ổ", "Ô", "Ộ",
"ớ", "ờ", "ở", "ỡ", "ợ", "Ớ", "Ờ", "Ở", "Ỡ", "Ợ", "ó", "ò",
"ỏ", "õ", "ọ", "ô", "ơ", "Ó", "Ò", "Ỏ", "Õ", "Ọ", "Ô", "Ơ" };
String[] uArray = { "ứ", "ừ", "ử", "ữ", "ự", "Ứ", "Ừ", "Ử", "Ữ", "Ự",
"ú", "ù", "ủ", "ũ", "ụ", "ư", "Ú", "Ù", "Ủ", "Ũ", "Ụ", "Ư" };
String[] yArray = { "ý", "ỳ", "ỷ", "ỹ", "ỵ", "Ý", "Ỳ", "Ỷ", "Ỹ", "Ỵ" };
String[] dArray = { "đ", "Đ" };
for (String item : aArray) {
str = str.replaceAll(item, "a");
}
for (String item : eArray) {
str = str.replaceAll(item, "e");
}
for (String item : iArray) {
str = str.replaceAll(item, "i");
}
for (String item : oArray) {
str = str.replaceAll(item, "o");
}
for (String item : uArray) {
str = str.replaceAll(item, "u");
}
for (String item : yArray) {
str = str.replaceAll(item, "y");
}
for (String item : dArray) {
str = str.replaceAll(item, "d");
}
str = str.replaceAll("[\\'\\/]+", "");
str = str
.replaceAll(
"[-\\!\"\\@\\#\\,\\.\\$%\\^&\\*\\(\\)_\\+\\=\\?\\;\\:\\~\\`\\{\\}\\[\\]\\|\\\\]+",
"-");
str = str.replaceAll(" ", "-");
str = str.replaceAll("[-]+", "-");
str = str.replaceAll("[\\W]+", "-");
str = str.replaceAll("^-", "");
str = str.replaceAll("-$", "");
str = str.toLowerCase();
return str;
}
public static String tag(String str) {
str = str.replaceAll("\\#", "Sharp");
str = str.replaceAll("\\+", "Plus");
str = str.replaceAll("[^.\\w]+", "-");
str = str.replaceAll("^-", "");
str = str.replaceAll("-$", "");
str = str.toLowerCase();
return str;
}
public static String remove(String str) {
str = str.replaceAll("[\'\"]+", """);
return str;
}
public static boolean StringIsNullOrEmpty(String string) {
if (string == null || string.equals("")) {
return true;
}
return false;
}
public static String getAliasByLanguage(String title) {
String alias = "";
String lang = "";
try {
if (!isLoadLanguageProfile) {
DetectorFactory.loadProfile("language_detect");
isLoadLanguageProfile = true;
}
Detector detector = DetectorFactory.create();
detector.append(title);
lang = detector.detect();
} catch (LangDetectException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
int type = 1;
if (lang.equalsIgnoreCase("en")) {
type = 1;
}
if (lang.equalsIgnoreCase("es")) {
type = 1;
}
if (lang.equalsIgnoreCase("zh-cn")) {
type = 2;
}
if (lang.equalsIgnoreCase("zh-tw")) {
type = 2;
}
if (lang.equalsIgnoreCase("ko")) {
type = 2;
}
if (lang.equalsIgnoreCase("ja")) {
type = 2;
}
if (lang.equalsIgnoreCase("vi")) {
type = 3;
}
if (type == 1) {
alias = replace(title);
} else if (type == 2) {
alias = replaceSpace(title);
} else if (type == 3) {
alias = replaceVietnamese(title);
}
if (alias.length() < 8) {
return title.replaceAll(" ", "-");
} else {
return alias;
}
}
public static void main(String[] args) {
System.out.println(StringHelper
.getAliasByLanguage("[中字] 百分滿分13 - Beast 耀燮 & T-ara 智妍很好笑"));
}
}